greenmining 1.0.6__py3-none-any.whl → 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
greenmining/__init__.py CHANGED
@@ -9,7 +9,7 @@ from greenmining.gsf_patterns import (
9
9
  is_green_aware,
10
10
  )
11
11
 
12
- __version__ = "1.0.6"
12
+ __version__ = "1.0.8"
13
13
 
14
14
 
15
15
  def fetch_repositories(
@@ -18,6 +18,10 @@ def fetch_repositories(
18
18
  min_stars: int = None,
19
19
  languages: list = None,
20
20
  keywords: str = None,
21
+ created_after: str = None,
22
+ created_before: str = None,
23
+ pushed_after: str = None,
24
+ pushed_before: str = None,
21
25
  ):
22
26
  # Fetch repositories from GitHub with custom search keywords.
23
27
  config = Config()
@@ -29,6 +33,10 @@ def fetch_repositories(
29
33
  min_stars=min_stars,
30
34
  languages=languages,
31
35
  keywords=keywords,
36
+ created_after=created_after,
37
+ created_before=created_before,
38
+ pushed_after=pushed_after,
39
+ pushed_before=pushed_before,
32
40
  )
33
41
 
34
42
 
@@ -1,26 +1,5 @@
1
1
  # Repository Controller - Handles repository fetching operations.
2
2
 
3
- # ============================================================================
4
- # OLD REST API IMPLEMENTATION (DEADCODE - REPLACED WITH GRAPHQL)
5
- # ============================================================================
6
- # from github import Github, GithubException
7
- # from tqdm import tqdm
8
- #
9
- # from greenmining.config import Config
10
- # from greenmining.models.repository import Repository
11
- # from greenmining.utils import colored_print, load_json_file, save_json_file
12
- #
13
- #
14
- # class RepositoryController:
15
- # # Controller for GitHub repository operations.
16
- #
17
- # def __init__(self, config: Config):
18
- # # Initialize controller with configuration.
19
- # self.config = config
20
- # self.github = Github(config.GITHUB_TOKEN)
21
- # ============================================================================
22
-
23
- # NEW GRAPHQL IMPLEMENTATION (5-10x faster)
24
3
  from tqdm import tqdm
25
4
 
26
5
  from greenmining.config import Config
@@ -37,80 +16,6 @@ class RepositoryController:
37
16
  self.config = config
38
17
  self.graphql_fetcher = GitHubGraphQLFetcher(config.GITHUB_TOKEN)
39
18
 
40
- # ============================================================================
41
- # OLD REST API METHOD (DEADCODE - 10x slower, high rate limit cost)
42
- # ============================================================================
43
- # def fetch_repositories(
44
- # self,
45
- # max_repos: int = None,
46
- # min_stars: int = None,
47
- # languages: list[str] = None,
48
- # keywords: str = None,
49
- # created_after: str = None,
50
- # created_before: str = None,
51
- # pushed_after: str = None,
52
- # pushed_before: str = None,
53
- # ) -> list[Repository]:
54
- # # Fetch repositories from GitHub using REST API (slow).
55
- # max_repos = max_repos or self.config.MAX_REPOS
56
- # min_stars = min_stars or self.config.MIN_STARS
57
- # languages = languages or self.config.SUPPORTED_LANGUAGES
58
- # keywords = keywords or "microservices"
59
- #
60
- # colored_print(f" Fetching up to {max_repos} repositories...", "cyan")
61
- # colored_print(f" Keywords: {keywords}", "cyan")
62
- # colored_print(f" Filters: min_stars={min_stars}", "cyan")
63
- #
64
- # if created_after or created_before:
65
- # colored_print(
66
- # f" Created: {created_after or 'any'} to {created_before or 'any'}", "cyan"
67
- # )
68
- # if pushed_after or pushed_before:
69
- # colored_print(f" Pushed: {pushed_after or 'any'} to {pushed_before or 'any'}", "cyan")
70
- #
71
- # # Build search query with temporal filters
72
- # query = self._build_temporal_query(
73
- # keywords, min_stars, created_after, created_before, pushed_after, pushed_before
74
- # )
75
- #
76
- # try:
77
- # # Execute search (REST API - many requests)
78
- # search_results = self.github.search_repositories(
79
- # query=query, sort="stars", order="desc"
80
- # )
81
- #
82
- # total_found = search_results.totalCount
83
- # colored_print(f" Found {total_found} repositories", "green")
84
- #
85
- # # Fetch repositories (1 request per repo = slow)
86
- # repositories = []
87
- # with tqdm(total=min(max_repos, total_found), desc="Fetching", unit="repo") as pbar:
88
- # for idx, repo in enumerate(search_results):
89
- # if idx >= max_repos:
90
- # break
91
- #
92
- # try:
93
- # repo_model = Repository.from_github_repo(repo, idx + 1)
94
- # repositories.append(repo_model)
95
- # pbar.update(1)
96
- # except GithubException as e:
97
- # colored_print(f" Error: {repo.full_name}: {e}", "yellow")
98
- # continue
99
- #
100
- # # Save to file
101
- # repo_dicts = [r.to_dict() for r in repositories]
102
- # save_json_file(repo_dicts, self.config.REPOS_FILE)
103
- #
104
- # colored_print(f" Fetched {len(repositories)} repositories", "green")
105
- # colored_print(f" Saved to: {self.config.REPOS_FILE}", "cyan")
106
- #
107
- # return repositories
108
- #
109
- # except Exception as e:
110
- # colored_print(f" Error fetching repositories: {e}", "red")
111
- # raise
112
- # ============================================================================
113
-
114
19
  def fetch_repositories(
115
20
  self,
116
21
  max_repos: int = None,
@@ -122,13 +27,13 @@ class RepositoryController:
122
27
  pushed_after: str = None,
123
28
  pushed_before: str = None,
124
29
  ) -> list[Repository]:
125
- # Fetch repositories from GitHub using GraphQL API (5-10x faster).
30
+ # Fetch repositories from GitHub using GraphQL API.
126
31
  max_repos = max_repos or self.config.MAX_REPOS
127
32
  min_stars = min_stars or self.config.MIN_STARS
128
33
  languages = languages or self.config.SUPPORTED_LANGUAGES
129
34
  keywords = keywords or "microservices"
130
35
 
131
- colored_print(f"🚀 Fetching up to {max_repos} repositories (GraphQL API)...", "cyan")
36
+ colored_print(f"Fetching up to {max_repos} repositories...", "cyan")
132
37
  colored_print(f" Keywords: {keywords}", "cyan")
133
38
  colored_print(f" Filters: min_stars={min_stars}", "cyan")
134
39
 
@@ -140,7 +45,7 @@ class RepositoryController:
140
45
  colored_print(f" Pushed: {pushed_after or 'any'} to {pushed_before or 'any'}", "cyan")
141
46
 
142
47
  try:
143
- # Use GraphQL API (much faster!)
48
+ # Execute GraphQL search
144
49
  repositories = self.graphql_fetcher.search_repositories(
145
50
  keywords=keywords,
146
51
  max_repos=max_repos,
@@ -156,51 +61,15 @@ class RepositoryController:
156
61
  repo_dicts = [r.to_dict() for r in repositories]
157
62
  save_json_file(repo_dicts, self.config.REPOS_FILE)
158
63
 
159
- colored_print(f"Fetched {len(repositories)} repositories", "green")
64
+ colored_print(f"Fetched {len(repositories)} repositories", "green")
160
65
  colored_print(f" Saved to: {self.config.REPOS_FILE}", "cyan")
161
- colored_print(f" API: GraphQL (5-10x faster than REST)", "green")
162
66
 
163
67
  return repositories
164
68
 
165
69
  except Exception as e:
166
- colored_print(f"Error fetching repositories: {e}", "red")
70
+ colored_print(f"Error fetching repositories: {e}", "red")
167
71
  raise
168
72
 
169
- # ============================================================================
170
- # OLD REST API HELPER (DEADCODE - handled by GraphQL fetcher now)
171
- # ============================================================================
172
- # def _build_temporal_query(
173
- # self,
174
- # keywords: str,
175
- # min_stars: int,
176
- # created_after: str = None,
177
- # created_before: str = None,
178
- # pushed_after: str = None,
179
- # pushed_before: str = None,
180
- # ) -> str:
181
- # # Build GitHub search query with temporal constraints.
182
- # query_parts = [keywords, f"stars:>={min_stars}"]
183
- #
184
- # # Temporal filters
185
- # if created_after and created_before:
186
- # query_parts.append(f"created:{created_after}..{created_before}")
187
- # elif created_after:
188
- # query_parts.append(f"created:>={created_after}")
189
- # elif created_before:
190
- # query_parts.append(f"created:<={created_before}")
191
- #
192
- # if pushed_after and pushed_before:
193
- # query_parts.append(f"pushed:{pushed_after}..{pushed_before}")
194
- # elif pushed_after:
195
- # query_parts.append(f"pushed:>={pushed_after}")
196
- # elif pushed_before:
197
- # query_parts.append(f"pushed:<={pushed_before}")
198
- #
199
- # query = " ".join(query_parts)
200
- # colored_print(f" Query: {query}", "cyan")
201
- # return query
202
- # ============================================================================
203
-
204
73
  def load_repositories(self) -> list[Repository]:
205
74
  # Load repositories from file.
206
75
  if not self.config.REPOS_FILE.exists():
@@ -113,7 +113,7 @@ class ConsolePresenter:
113
113
 
114
114
  table_data = []
115
115
  for phase, info in status.items():
116
- status_icon = "" if info.get("completed") else ""
116
+ status_icon = "done" if info.get("completed") else "pending"
117
117
  table_data.append(
118
118
  [status_icon, phase, info.get("file", "N/A"), info.get("size", "N/A")]
119
119
  )
@@ -1,210 +1,2 @@
1
- # ================================================================================
2
- # DEADCODE - OLD REST API IMPLEMENTATION
3
- # ================================================================================
4
- #
5
- # This file contains the OLD GitHub REST API implementation.
6
- # It has been REPLACED by GitHubGraphQLFetcher for better performance.
7
- #
8
- # Performance comparison:
9
- # REST API: 10+ requests for 100 repos, ~2 minutes
10
- # GraphQL API: 1-2 requests for 100 repos, ~15 seconds (10x faster!)
11
- #
12
- # USE INSTEAD: greenmining.services.github_graphql_fetcher.GitHubGraphQLFetcher
13
- #
14
- # This file is kept for reference only. Do not use in production.
15
- #
16
- # ================================================================================
17
-
18
- # GitHub repository fetcher for green microservices mining.
19
-
20
- # from __future__ import annotations
21
- #
22
- # from datetime import datetime
23
- # from pathlib import Path
24
- # from typing import Any, Dict, List, Optional
25
- #
26
- # from github import Github, GithubException, RateLimitExceededException
27
- # from tqdm import tqdm
28
- #
29
- # from greenmining.config import get_config
30
- # from greenmining.utils import (
31
- # colored_print,
32
- # format_timestamp,
33
- # print_banner,
34
- # save_json_file,
35
- # )
36
- #
37
- #
38
- # class GitHubFetcher:
39
- # # Fetches microservice repositories from GitHub using REST API (SLOW).
40
- #
41
- # def __init__(
42
- # self,
43
- # token: str,
44
- # max_repos: int = 100,
45
- # min_stars: int = 100,
46
- # languages: Optional[list[str]] = None,
47
- # created_after: Optional[str] = None,
48
- # created_before: Optional[str] = None,
49
- # pushed_after: Optional[str] = None,
50
- # pushed_before: Optional[str] = None,
51
- # ):
52
- # # Initialize GitHub fetcher.
53
- # self.github = Github(token)
54
- # self.max_repos = max_repos
55
- # self.min_stars = min_stars
56
- # self.languages = languages or [
57
- # "Java",
58
- # "Python",
59
- # "Go",
60
- # "JavaScript",
61
- # "TypeScript",
62
- # "C#",
63
- # "Rust",
64
- # ]
65
- # self.created_after = created_after
66
- # self.created_before = created_before
67
- # self.pushed_after = pushed_after
68
- # self.pushed_before = pushed_before
69
- #
70
- # def search_repositories(self) -> list[dict[str, Any]]:
71
- # # Search for microservice repositories (REST API - many requests).
72
- # repositories = []
73
- # keywords = ["microservices", "microservice-architecture", "cloud-native"]
74
- #
75
- # colored_print(f"Searching for repositories with keywords: {', '.join(keywords)}", "cyan")
76
- # colored_print(
77
- # f"Filters: min_stars={self.min_stars}, languages={', '.join(self.languages)}", "cyan"
78
- # )
79
- #
80
- # # Build search query with temporal filters
81
- # query = self._build_temporal_query(keywords)
82
- #
83
- # try:
84
- # # Execute search (1 request)
85
- # search_results = self.github.search_repositories(
86
- # query=query, sort="stars", order="desc"
87
- # )
88
- #
89
- # total_found = search_results.totalCount
90
- # colored_print(f"Found {total_found} repositories matching criteria", "green")
91
- #
92
- # # Fetch repository details with progress bar (1 request per repo = SLOW)
93
- # with tqdm(
94
- # total=min(self.max_repos, total_found), desc="Fetching repositories", unit="repo"
95
- # ) as pbar:
96
- # for idx, repo in enumerate(search_results):
97
- # if idx >= self.max_repos:
98
- # break
99
- #
100
- # try:
101
- # repo_data = self._extract_repo_metadata(repo, idx + 1)
102
- # repositories.append(repo_data)
103
- # pbar.update(1)
104
- # except GithubException as e:
105
- # colored_print(f"Error fetching {repo.full_name}: {e}", "yellow")
106
- # continue
107
- # except RateLimitExceededException:
108
- # colored_print("Rate limit exceeded. Waiting...", "red")
109
- # self._handle_rate_limit()
110
- # continue
111
- #
112
- # return repositories
113
- #
114
- # except GithubException as e:
115
- # colored_print(f"GitHub API error: {e}", "red")
116
- # raise
117
- # except Exception as e:
118
- # colored_print(f"Unexpected error: {e}", "red")
119
- # raise
120
- #
121
- # def _extract_repo_metadata(self, repo, repo_id: int) -> dict[str, Any]:
122
- # # Extract metadata from repository object.
123
- # return {
124
- # "repo_id": repo_id,
125
- # "name": repo.name,
126
- # "owner": repo.owner.login,
127
- # "full_name": repo.full_name,
128
- # "url": repo.html_url,
129
- # "clone_url": repo.clone_url,
130
- # "language": repo.language,
131
- # "stars": repo.stargazers_count,
132
- # "forks": repo.forks_count,
133
- # "watchers": repo.watchers_count,
134
- # "open_issues": repo.open_issues_count,
135
- # "last_updated": repo.updated_at.isoformat() if repo.updated_at else None,
136
- # "created_at": repo.created_at.isoformat() if repo.created_at else None,
137
- # "description": repo.description or "",
138
- # "main_branch": repo.default_branch,
139
- # "topics": repo.get_topics() if hasattr(repo, "get_topics") else [],
140
- # "size": repo.size,
141
- # "has_issues": repo.has_issues,
142
- # "has_wiki": repo.has_wiki,
143
- # "archived": repo.archived,
144
- # "license": repo.license.name if repo.license else None,
145
- # }
146
- #
147
- # def _build_temporal_query(self, keywords: list[str]) -> str:
148
- # # Build GitHub search query with temporal constraints.
149
- # query_parts = []
150
- #
151
- # # Keywords
152
- # keyword_query = " OR ".join(keywords)
153
- # query_parts.append(f"({keyword_query})")
154
- #
155
- # # Languages
156
- # language_query = " OR ".join([f"language:{lang}" for lang in self.languages])
157
- # query_parts.append(f"({language_query})")
158
- #
159
- # # Stars
160
- # query_parts.append(f"stars:>={self.min_stars}")
161
- #
162
- # # Archived filter
163
- # query_parts.append("archived:false")
164
- #
165
- # # Temporal filters
166
- # if self.created_after and self.created_before:
167
- # query_parts.append(f"created:{self.created_after}..{self.created_before}")
168
- # elif self.created_after:
169
- # query_parts.append(f"created:>={self.created_after}")
170
- # elif self.created_before:
171
- # query_parts.append(f"created:<={self.created_before}")
172
- #
173
- # if self.pushed_after and self.pushed_before:
174
- # query_parts.append(f"pushed:{self.pushed_after}..{self.pushed_before}")
175
- # elif self.pushed_after:
176
- # query_parts.append(f"pushed:>={self.pushed_after}")
177
- # elif self.pushed_before:
178
- # query_parts.append(f"pushed:<={self.pushed_before}")
179
- #
180
- # query = " ".join(query_parts)
181
- # colored_print(f"Query: {query}", "cyan")
182
- # return query
183
- #
184
- # def _handle_rate_limit(self):
185
- # # Handle GitHub API rate limiting.
186
- # rate_limit = self.github.get_rate_limit()
187
- # reset_time = rate_limit.core.reset
188
- # wait_seconds = (reset_time - datetime.now()).total_seconds()
189
- #
190
- # if wait_seconds > 0:
191
- # colored_print(f"Rate limit will reset in {wait_seconds:.0f} seconds", "yellow")
192
- # import time
193
- #
194
- # time.sleep(min(wait_seconds + 10, 60)) # Wait with max 60 seconds
195
- #
196
- # def save_results(self, repositories: list[dict[str, Any]], output_file: Path):
197
- # # Save fetched repositories to JSON file.
198
- # data = {
199
- # "metadata": {
200
- # "fetched_at": format_timestamp(),
201
- # "total_repos": len(repositories),
202
- # "min_stars": self.min_stars,
203
- # "languages": self.languages,
204
- # "search_keywords": ["microservices", "microservice-architecture", "cloud-native"],
205
- # },
206
- # "repositories": repositories,
207
- # }
208
- #
209
- # save_json_file(data, output_file)
210
- # colored_print(f"Saved {len(repositories)} repositories to {output_file}", "green")
1
+ # Legacy GitHub REST API fetcher (deprecated).
2
+ # Use github_graphql_fetcher.GitHubGraphQLFetcher instead.
@@ -1,7 +1,4 @@
1
- # GitHub GraphQL API fetcher for faster and more efficient repository fetching.
2
- #
3
- # GraphQL allows fetching exactly the data you need in a single request,
4
- # reducing API calls and improving rate limit efficiency.
1
+ # GitHub GraphQL API fetcher for repository search and data retrieval.
5
2
 
6
3
  import json
7
4
  import time
@@ -14,12 +11,6 @@ from greenmining.models.repository import Repository
14
11
 
15
12
  class GitHubGraphQLFetcher:
16
13
  # Fetch GitHub repositories using GraphQL API v4.
17
- #
18
- # Benefits over REST API:
19
- # - Fetch repos + commits in 1 request instead of 100+ REST calls
20
- # - Get exactly the fields you need (no over-fetching)
21
- # - Better rate limit efficiency (5000 points/hour vs 5000 requests/hour)
22
- # - More powerful search capabilities
23
14
 
24
15
  GRAPHQL_ENDPOINT = "https://api.github.com/graphql"
25
16
 
@@ -193,10 +184,10 @@ class GitHubGraphQLFetcher:
193
184
  # Star count
194
185
  query_parts.append(f"stars:>={min_stars}")
195
186
 
196
- # Languages
197
- if languages:
198
- lang_query = " OR ".join([f"language:{lang}" for lang in languages])
199
- query_parts.append(f"({lang_query})")
187
+ # Languages - skip filter if more than 5 to avoid exceeding GitHub query limits
188
+ if languages and len(languages) <= 5:
189
+ lang_query = " ".join([f"language:{lang}" for lang in languages])
190
+ query_parts.append(lang_query)
200
191
 
201
192
  # Date filters
202
193
  if created_after:
@@ -259,9 +250,6 @@ class GitHubGraphQLFetcher:
259
250
  ) -> List[Dict[str, Any]]:
260
251
  # Fetch commits for a specific repository using GraphQL.
261
252
  #
262
- # This is much faster than REST API as it gets all commits in 1-2 requests
263
- # instead of paginating through 100 individual REST calls.
264
- #
265
253
  # Args:
266
254
  # owner: Repository owner
267
255
  # name: Repository name
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: greenmining
3
- Version: 1.0.6
3
+ Version: 1.0.8
4
4
  Summary: An empirical Python library for Mining Software Repositories (MSR) in Green IT research
5
5
  Author-email: Adam Bouafia <a.bouafia@student.vu.nl>
6
6
  License: MIT
@@ -330,7 +330,137 @@ print(f"Top patterns: {stats['top_patterns'][:5]}")
330
330
  aggregator.export_to_csv(results, "output.csv")
331
331
  ```
332
332
 
333
- #### Batch Analysis
333
+ #### URL-Based Repository Analysis
334
+
335
+ ```python
336
+ from greenmining.services.local_repo_analyzer import LocalRepoAnalyzer
337
+
338
+ analyzer = LocalRepoAnalyzer(
339
+ max_commits=200,
340
+ cleanup_after=True,
341
+ )
342
+
343
+ result = analyzer.analyze_repository("https://github.com/pallets/flask")
344
+
345
+ print(f"Repository: {result.name}")
346
+ print(f"Commits analyzed: {result.total_commits}")
347
+ print(f"Green-aware: {result.green_commits} ({result.green_commit_rate:.1%})")
348
+
349
+ for commit in result.commits[:5]:
350
+ if commit.green_aware:
351
+ print(f" {commit.message[:60]}...")
352
+ ```
353
+
354
+ #### Batch Analysis with Parallelism
355
+
356
+ ```python
357
+ from greenmining import analyze_repositories
358
+
359
+ results = analyze_repositories(
360
+ urls=[
361
+ "https://github.com/kubernetes/kubernetes",
362
+ "https://github.com/istio/istio",
363
+ "https://github.com/envoyproxy/envoy",
364
+ ],
365
+ max_commits=100,
366
+ parallel_workers=3,
367
+ energy_tracking=True,
368
+ energy_backend="auto",
369
+ )
370
+
371
+ for result in results:
372
+ print(f"{result.name}: {result.green_commit_rate:.1%} green")
373
+ ```
374
+
375
+ #### Private Repository Analysis
376
+
377
+ ```python
378
+ from greenmining.services.local_repo_analyzer import LocalRepoAnalyzer
379
+
380
+ # HTTPS with token
381
+ analyzer = LocalRepoAnalyzer(github_token="ghp_xxxx")
382
+ result = analyzer.analyze_repository("https://github.com/company/private-repo")
383
+
384
+ # SSH with key
385
+ analyzer = LocalRepoAnalyzer(ssh_key_path="~/.ssh/id_rsa")
386
+ result = analyzer.analyze_repository("git@github.com:company/private-repo.git")
387
+ ```
388
+
389
+ #### Power Regression Detection
390
+
391
+ ```python
392
+ from greenmining.analyzers import PowerRegressionDetector
393
+
394
+ detector = PowerRegressionDetector(
395
+ test_command="pytest tests/ -x",
396
+ energy_backend="rapl",
397
+ threshold_percent=5.0,
398
+ iterations=5,
399
+ )
400
+
401
+ regressions = detector.detect(
402
+ repo_path="/path/to/repo",
403
+ baseline_commit="v1.0.0",
404
+ target_commit="HEAD",
405
+ )
406
+
407
+ for regression in regressions:
408
+ print(f"Commit {regression.sha[:8]}: +{regression.power_increase:.1f}%")
409
+ ```
410
+
411
+ #### Version Power Comparison
412
+
413
+ ```python
414
+ from greenmining.analyzers import VersionPowerAnalyzer
415
+
416
+ analyzer = VersionPowerAnalyzer(
417
+ test_command="pytest tests/",
418
+ energy_backend="rapl",
419
+ iterations=10,
420
+ warmup_iterations=2,
421
+ )
422
+
423
+ report = analyzer.analyze_versions(
424
+ repo_path="/path/to/repo",
425
+ versions=["v1.0", "v1.1", "v1.2", "v2.0"],
426
+ )
427
+
428
+ print(report.summary())
429
+ print(f"Trend: {report.trend}")
430
+ print(f"Most efficient: {report.most_efficient}")
431
+ ```
432
+
433
+ #### Metrics-to-Power Correlation
434
+
435
+ ```python
436
+ from greenmining.analyzers import MetricsPowerCorrelator
437
+
438
+ correlator = MetricsPowerCorrelator()
439
+ correlator.fit(
440
+ metrics=["complexity", "nloc", "code_churn"],
441
+ metrics_values={
442
+ "complexity": [10, 20, 30, 40],
443
+ "nloc": [100, 200, 300, 400],
444
+ "code_churn": [50, 100, 150, 200],
445
+ },
446
+ power_measurements=[5.0, 8.0, 12.0, 15.0],
447
+ )
448
+
449
+ print(f"Pearson: {correlator.pearson}")
450
+ print(f"Spearman: {correlator.spearman}")
451
+ print(f"Feature importance: {correlator.feature_importance}")
452
+ ```
453
+
454
+ #### Web Dashboard
455
+
456
+ ```python
457
+ from greenmining.dashboard import run_dashboard
458
+
459
+ # Launch interactive dashboard (requires pip install greenmining[dashboard])
460
+ run_dashboard(data_dir="./data", host="127.0.0.1", port=5000)
461
+ ```
462
+
463
+ #### Pipeline Batch Analysis
334
464
 
335
465
  ```python
336
466
  from greenmining.controllers.repository_controller import RepositoryController
@@ -551,17 +681,24 @@ config = Config(
551
681
 
552
682
  ### Core Capabilities
553
683
 
554
- - **Pattern Detection**: Automatically identifies 122 sustainability patterns across 15 categories
555
- - **Keyword Analysis**: Scans commit messages using 321 green software keywords
556
- - **Custom Repository Fetching**: Fetch repositories with custom search keywords (not limited to microservices)
557
- - **Repository Analysis**: Analyzes repositories from GitHub with flexible filtering
558
- - **Batch Processing**: Analyze hundreds of repositories and thousands of commits
559
- - **Multi-format Output**: Generates Markdown reports, CSV exports, and JSON data
560
- - **Statistical Analysis**: Calculates green-awareness metrics, pattern distribution, and trends
684
+ - **Pattern Detection**: 122 sustainability patterns across 15 categories from the GSF catalog
685
+ - **Keyword Analysis**: 321 green software detection keywords
686
+ - **Repository Fetching**: GraphQL API with date, star, and language filters
687
+ - **URL-Based Analysis**: Direct PyDriller analysis from GitHub URLs (HTTPS and SSH)
688
+ - **Batch Processing**: Parallel analysis of multiple repositories with configurable workers
689
+ - **Private Repository Support**: Authentication via SSH keys or GitHub tokens
690
+ - **Energy Measurement**: RAPL, CodeCarbon, and CPU Energy Meter backends
691
+ - **Carbon Footprint Reporting**: CO2 emissions with 20+ country profiles and cloud region support (AWS, GCP, Azure)
692
+ - **Power Regression Detection**: Identify commits that increased energy consumption
693
+ - **Metrics-to-Power Correlation**: Pearson and Spearman analysis between code metrics and power
694
+ - **Version Power Comparison**: Compare power consumption across software versions with trend detection
695
+ - **Method-Level Analysis**: Per-method complexity metrics via Lizard integration
696
+ - **Source Code Access**: Before/after source code for refactoring detection
697
+ - **Full Process Metrics**: All 8 PyDriller process metrics (ChangeSet, CodeChurn, CommitsCount, ContributorsCount, ContributorsExperience, HistoryComplexity, HunksCount, LinesCount)
698
+ - **Statistical Analysis**: Correlations, effect sizes, and temporal trends
699
+ - **Multi-format Output**: Markdown reports, CSV exports, JSON data
700
+ - **Web Dashboard**: Flask-based interactive visualization (`pip install greenmining[dashboard]`)
561
701
  - **Docker Support**: Pre-built images for containerized analysis
562
- - **Programmatic API**: Full Python API for custom workflows and integrations
563
- - **Clean Architecture**: Modular design with services layer (Fetcher, Extractor, Analyzer, Aggregator, Reports)
564
- - **Energy Measurement**: Real-time energy consumption tracking via RAPL (Linux) or CodeCarbon (cross-platform)
565
702
 
566
703
  ### Energy Measurement
567
704
 
@@ -712,8 +849,15 @@ ruff check greenmining/ tests/
712
849
  - Python 3.9+
713
850
  - PyGithub >= 2.1.1
714
851
  - PyDriller >= 2.5
715
- - pandas >= 2.2.0
716
- - codecarbon >= 2.0.0 (optional, for cross-platform energy measurement)
852
+ - pandas >= 2.2.0
853
+
854
+ **Optional dependencies:**
855
+
856
+ ```bash
857
+ pip install greenmining[energy] # psutil, codecarbon (energy measurement)
858
+ pip install greenmining[dashboard] # flask (web dashboard)
859
+ pip install greenmining[dev] # pytest, black, ruff, mypy (development)
860
+ ```
717
861
 
718
862
  ## License
719
863
 
@@ -1,4 +1,4 @@
1
- greenmining/__init__.py,sha256=wbMwJYwC1HKIPn4w5N6Ux8GV5fAohevW7iO_BYuFuA4,2637
1
+ greenmining/__init__.py,sha256=cilA0cKdT3R8t-yFdkkcCvsSGzQ0PLoPMY2OQ4Irytg,2909
2
2
  greenmining/__main__.py,sha256=NYOVS7D4w2XDLn6SyXHXPKE5GrNGOeoWSTb_KazgK5c,590
3
3
  greenmining/__version__.py,sha256=xZc02a8bS3vUJlzh8k9RoxemB1irQmq_SpVVj6Cg5M0,62
4
4
  greenmining/config.py,sha256=M4a7AwM1ErCmOY0n5Vmyoo9HPblSkTZ-HD3k2YHzs4A,8340
@@ -13,7 +13,7 @@ greenmining/analyzers/statistical_analyzer.py,sha256=DzWAcCyw42Ig3FIxTwPPBikgt2u
13
13
  greenmining/analyzers/temporal_analyzer.py,sha256=JfTcAoI20oCFMehGrSRnDqhJTXI-RUbdCTMwDOTW9-g,14259
14
14
  greenmining/analyzers/version_power_analyzer.py,sha256=2P6zOqBg-ButtIhF-4cutiwD2Q1geMY49VFUghHXXoI,8119
15
15
  greenmining/controllers/__init__.py,sha256=UiAT6zBvC1z_9cJWfzq1cLA0I4r9b2vURHipj8oDczI,180
16
- greenmining/controllers/repository_controller.py,sha256=fyL6Y8xpoixDplP4_rKWiwak42M2DaIihzyKVaBlivA,9680
16
+ greenmining/controllers/repository_controller.py,sha256=DM9BabUAwZJARGngCk_4wEYPw2adn8iESCiFQ7Um4LQ,3880
17
17
  greenmining/dashboard/__init__.py,sha256=Ig_291-hLrH9k3rV0whhQ1EkhiaRR8ciHiJ5s5OCBf4,141
18
18
  greenmining/dashboard/app.py,sha256=Hk6_i2qmcg6SGW7UzxglEIvUBJiloRA-hMYI-YSORcA,8604
19
19
  greenmining/energy/__init__.py,sha256=GoCYh7hitWBoPMtan1HF1yezCHi7o4sa_YUJgGkeJc8,558
@@ -28,17 +28,17 @@ greenmining/models/analysis_result.py,sha256=YICTCEcrJxZ1R8Xaio3AZOjCGwMzC_62BMA
28
28
  greenmining/models/commit.py,sha256=mnRDWSiIyGtJeGXI8sav9hukWUyVFpoNe6GixRlZjY4,2439
29
29
  greenmining/models/repository.py,sha256=SKjS01onOptpMioumtAPZxKpKheHAeVXnXyvatl7CfM,2856
30
30
  greenmining/presenters/__init__.py,sha256=d1CMtqtUAHYHYNzigPyjtGOUtnH1drtUwf7-bFQq2B8,138
31
- greenmining/presenters/console_presenter.py,sha256=XOahvlcr4qLbUdhk8cGq1ZWagvemEd3Wgriu8T5EI3s,4896
31
+ greenmining/presenters/console_presenter.py,sha256=qagn2c2aOym0WNKV8n175MQ-BTheLjrXzW8c1OafzAQ,4904
32
32
  greenmining/services/__init__.py,sha256=ZEMOVut0KRdume_vz58beSNps3YgeoGBXmUjEqNgIhc,690
33
33
  greenmining/services/commit_extractor.py,sha256=Fz2WTWjIZ_vQhSfkJKnWpJnBpI2nm0KacA4qYAvCpSE,8451
34
34
  greenmining/services/data_aggregator.py,sha256=TsFT0oGOnnHk0QGZ1tT6ZhKGc5X1H1D1u7-7OpiPo7Y,19566
35
35
  greenmining/services/data_analyzer.py,sha256=f0nlJkPAclHHCzzTyQW5bjhYrgE0XXiR1x7_o3fJaDs,9732
36
- greenmining/services/github_fetcher.py,sha256=mUcmQevhdDRYX72O-M7Vi-s3y4ZwNyKewleti838cqU,8285
37
- greenmining/services/github_graphql_fetcher.py,sha256=p76vp5EgStzkmTcws__jb90za8m61toW0CBrwrm5Ew4,11972
36
+ greenmining/services/github_fetcher.py,sha256=sdkS-LhHmX7mgMdlClCwEUVnZrItc0Pt6FVtlWk5iLU,106
37
+ greenmining/services/github_graphql_fetcher.py,sha256=HvADlXGqrqfzqnsI9xJQifhy8rQ5fQzosdlCBdNxjsU,11467
38
38
  greenmining/services/local_repo_analyzer.py,sha256=5DMN9RIyGXNdsOlIDV4Mp0fPavbB69oBA9us17P5cNo,24668
39
39
  greenmining/services/reports.py,sha256=Vrw_pBNmVw2mTAf1dpcAqjBe6gXv-O4w_XweoVTt7L8,23392
40
- greenmining-1.0.6.dist-info/licenses/LICENSE,sha256=M7ma3JHGeiIZIs3ea0HTcFl_wLFPX2NZElUliYs4bCA,1083
41
- greenmining-1.0.6.dist-info/METADATA,sha256=cd82RQon4bIBdJT85mcCeZuXL3evl4N9YkcqywDw41k,26920
42
- greenmining-1.0.6.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
43
- greenmining-1.0.6.dist-info/top_level.txt,sha256=nreXgXxZIWI-42yQknQ0HXtUrFnzZ8N1ra4Mdy2KcsI,12
44
- greenmining-1.0.6.dist-info/RECORD,,
40
+ greenmining-1.0.8.dist-info/licenses/LICENSE,sha256=M7ma3JHGeiIZIs3ea0HTcFl_wLFPX2NZElUliYs4bCA,1083
41
+ greenmining-1.0.8.dist-info/METADATA,sha256=46ygTrzFHVKFOPpF9gb9D_HbTCWs1ZN0VH4v1I7U7Zg,30913
42
+ greenmining-1.0.8.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
43
+ greenmining-1.0.8.dist-info/top_level.txt,sha256=nreXgXxZIWI-42yQknQ0HXtUrFnzZ8N1ra4Mdy2KcsI,12
44
+ greenmining-1.0.8.dist-info/RECORD,,