greenmining 1.0.6__py3-none-any.whl → 1.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- greenmining/__init__.py +9 -1
- greenmining/controllers/repository_controller.py +5 -136
- greenmining/presenters/console_presenter.py +1 -1
- greenmining/services/github_fetcher.py +2 -210
- greenmining/services/github_graphql_fetcher.py +5 -17
- {greenmining-1.0.6.dist-info → greenmining-1.0.8.dist-info}/METADATA +158 -14
- {greenmining-1.0.6.dist-info → greenmining-1.0.8.dist-info}/RECORD +10 -10
- {greenmining-1.0.6.dist-info → greenmining-1.0.8.dist-info}/WHEEL +0 -0
- {greenmining-1.0.6.dist-info → greenmining-1.0.8.dist-info}/licenses/LICENSE +0 -0
- {greenmining-1.0.6.dist-info → greenmining-1.0.8.dist-info}/top_level.txt +0 -0
greenmining/__init__.py
CHANGED
|
@@ -9,7 +9,7 @@ from greenmining.gsf_patterns import (
|
|
|
9
9
|
is_green_aware,
|
|
10
10
|
)
|
|
11
11
|
|
|
12
|
-
__version__ = "1.0.
|
|
12
|
+
__version__ = "1.0.8"
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
def fetch_repositories(
|
|
@@ -18,6 +18,10 @@ def fetch_repositories(
|
|
|
18
18
|
min_stars: int = None,
|
|
19
19
|
languages: list = None,
|
|
20
20
|
keywords: str = None,
|
|
21
|
+
created_after: str = None,
|
|
22
|
+
created_before: str = None,
|
|
23
|
+
pushed_after: str = None,
|
|
24
|
+
pushed_before: str = None,
|
|
21
25
|
):
|
|
22
26
|
# Fetch repositories from GitHub with custom search keywords.
|
|
23
27
|
config = Config()
|
|
@@ -29,6 +33,10 @@ def fetch_repositories(
|
|
|
29
33
|
min_stars=min_stars,
|
|
30
34
|
languages=languages,
|
|
31
35
|
keywords=keywords,
|
|
36
|
+
created_after=created_after,
|
|
37
|
+
created_before=created_before,
|
|
38
|
+
pushed_after=pushed_after,
|
|
39
|
+
pushed_before=pushed_before,
|
|
32
40
|
)
|
|
33
41
|
|
|
34
42
|
|
|
@@ -1,26 +1,5 @@
|
|
|
1
1
|
# Repository Controller - Handles repository fetching operations.
|
|
2
2
|
|
|
3
|
-
# ============================================================================
|
|
4
|
-
# OLD REST API IMPLEMENTATION (DEADCODE - REPLACED WITH GRAPHQL)
|
|
5
|
-
# ============================================================================
|
|
6
|
-
# from github import Github, GithubException
|
|
7
|
-
# from tqdm import tqdm
|
|
8
|
-
#
|
|
9
|
-
# from greenmining.config import Config
|
|
10
|
-
# from greenmining.models.repository import Repository
|
|
11
|
-
# from greenmining.utils import colored_print, load_json_file, save_json_file
|
|
12
|
-
#
|
|
13
|
-
#
|
|
14
|
-
# class RepositoryController:
|
|
15
|
-
# # Controller for GitHub repository operations.
|
|
16
|
-
#
|
|
17
|
-
# def __init__(self, config: Config):
|
|
18
|
-
# # Initialize controller with configuration.
|
|
19
|
-
# self.config = config
|
|
20
|
-
# self.github = Github(config.GITHUB_TOKEN)
|
|
21
|
-
# ============================================================================
|
|
22
|
-
|
|
23
|
-
# NEW GRAPHQL IMPLEMENTATION (5-10x faster)
|
|
24
3
|
from tqdm import tqdm
|
|
25
4
|
|
|
26
5
|
from greenmining.config import Config
|
|
@@ -37,80 +16,6 @@ class RepositoryController:
|
|
|
37
16
|
self.config = config
|
|
38
17
|
self.graphql_fetcher = GitHubGraphQLFetcher(config.GITHUB_TOKEN)
|
|
39
18
|
|
|
40
|
-
# ============================================================================
|
|
41
|
-
# OLD REST API METHOD (DEADCODE - 10x slower, high rate limit cost)
|
|
42
|
-
# ============================================================================
|
|
43
|
-
# def fetch_repositories(
|
|
44
|
-
# self,
|
|
45
|
-
# max_repos: int = None,
|
|
46
|
-
# min_stars: int = None,
|
|
47
|
-
# languages: list[str] = None,
|
|
48
|
-
# keywords: str = None,
|
|
49
|
-
# created_after: str = None,
|
|
50
|
-
# created_before: str = None,
|
|
51
|
-
# pushed_after: str = None,
|
|
52
|
-
# pushed_before: str = None,
|
|
53
|
-
# ) -> list[Repository]:
|
|
54
|
-
# # Fetch repositories from GitHub using REST API (slow).
|
|
55
|
-
# max_repos = max_repos or self.config.MAX_REPOS
|
|
56
|
-
# min_stars = min_stars or self.config.MIN_STARS
|
|
57
|
-
# languages = languages or self.config.SUPPORTED_LANGUAGES
|
|
58
|
-
# keywords = keywords or "microservices"
|
|
59
|
-
#
|
|
60
|
-
# colored_print(f" Fetching up to {max_repos} repositories...", "cyan")
|
|
61
|
-
# colored_print(f" Keywords: {keywords}", "cyan")
|
|
62
|
-
# colored_print(f" Filters: min_stars={min_stars}", "cyan")
|
|
63
|
-
#
|
|
64
|
-
# if created_after or created_before:
|
|
65
|
-
# colored_print(
|
|
66
|
-
# f" Created: {created_after or 'any'} to {created_before or 'any'}", "cyan"
|
|
67
|
-
# )
|
|
68
|
-
# if pushed_after or pushed_before:
|
|
69
|
-
# colored_print(f" Pushed: {pushed_after or 'any'} to {pushed_before or 'any'}", "cyan")
|
|
70
|
-
#
|
|
71
|
-
# # Build search query with temporal filters
|
|
72
|
-
# query = self._build_temporal_query(
|
|
73
|
-
# keywords, min_stars, created_after, created_before, pushed_after, pushed_before
|
|
74
|
-
# )
|
|
75
|
-
#
|
|
76
|
-
# try:
|
|
77
|
-
# # Execute search (REST API - many requests)
|
|
78
|
-
# search_results = self.github.search_repositories(
|
|
79
|
-
# query=query, sort="stars", order="desc"
|
|
80
|
-
# )
|
|
81
|
-
#
|
|
82
|
-
# total_found = search_results.totalCount
|
|
83
|
-
# colored_print(f" Found {total_found} repositories", "green")
|
|
84
|
-
#
|
|
85
|
-
# # Fetch repositories (1 request per repo = slow)
|
|
86
|
-
# repositories = []
|
|
87
|
-
# with tqdm(total=min(max_repos, total_found), desc="Fetching", unit="repo") as pbar:
|
|
88
|
-
# for idx, repo in enumerate(search_results):
|
|
89
|
-
# if idx >= max_repos:
|
|
90
|
-
# break
|
|
91
|
-
#
|
|
92
|
-
# try:
|
|
93
|
-
# repo_model = Repository.from_github_repo(repo, idx + 1)
|
|
94
|
-
# repositories.append(repo_model)
|
|
95
|
-
# pbar.update(1)
|
|
96
|
-
# except GithubException as e:
|
|
97
|
-
# colored_print(f" Error: {repo.full_name}: {e}", "yellow")
|
|
98
|
-
# continue
|
|
99
|
-
#
|
|
100
|
-
# # Save to file
|
|
101
|
-
# repo_dicts = [r.to_dict() for r in repositories]
|
|
102
|
-
# save_json_file(repo_dicts, self.config.REPOS_FILE)
|
|
103
|
-
#
|
|
104
|
-
# colored_print(f" Fetched {len(repositories)} repositories", "green")
|
|
105
|
-
# colored_print(f" Saved to: {self.config.REPOS_FILE}", "cyan")
|
|
106
|
-
#
|
|
107
|
-
# return repositories
|
|
108
|
-
#
|
|
109
|
-
# except Exception as e:
|
|
110
|
-
# colored_print(f" Error fetching repositories: {e}", "red")
|
|
111
|
-
# raise
|
|
112
|
-
# ============================================================================
|
|
113
|
-
|
|
114
19
|
def fetch_repositories(
|
|
115
20
|
self,
|
|
116
21
|
max_repos: int = None,
|
|
@@ -122,13 +27,13 @@ class RepositoryController:
|
|
|
122
27
|
pushed_after: str = None,
|
|
123
28
|
pushed_before: str = None,
|
|
124
29
|
) -> list[Repository]:
|
|
125
|
-
# Fetch repositories from GitHub using GraphQL API
|
|
30
|
+
# Fetch repositories from GitHub using GraphQL API.
|
|
126
31
|
max_repos = max_repos or self.config.MAX_REPOS
|
|
127
32
|
min_stars = min_stars or self.config.MIN_STARS
|
|
128
33
|
languages = languages or self.config.SUPPORTED_LANGUAGES
|
|
129
34
|
keywords = keywords or "microservices"
|
|
130
35
|
|
|
131
|
-
colored_print(f"
|
|
36
|
+
colored_print(f"Fetching up to {max_repos} repositories...", "cyan")
|
|
132
37
|
colored_print(f" Keywords: {keywords}", "cyan")
|
|
133
38
|
colored_print(f" Filters: min_stars={min_stars}", "cyan")
|
|
134
39
|
|
|
@@ -140,7 +45,7 @@ class RepositoryController:
|
|
|
140
45
|
colored_print(f" Pushed: {pushed_after or 'any'} to {pushed_before or 'any'}", "cyan")
|
|
141
46
|
|
|
142
47
|
try:
|
|
143
|
-
#
|
|
48
|
+
# Execute GraphQL search
|
|
144
49
|
repositories = self.graphql_fetcher.search_repositories(
|
|
145
50
|
keywords=keywords,
|
|
146
51
|
max_repos=max_repos,
|
|
@@ -156,51 +61,15 @@ class RepositoryController:
|
|
|
156
61
|
repo_dicts = [r.to_dict() for r in repositories]
|
|
157
62
|
save_json_file(repo_dicts, self.config.REPOS_FILE)
|
|
158
63
|
|
|
159
|
-
colored_print(f"
|
|
64
|
+
colored_print(f"Fetched {len(repositories)} repositories", "green")
|
|
160
65
|
colored_print(f" Saved to: {self.config.REPOS_FILE}", "cyan")
|
|
161
|
-
colored_print(f" API: GraphQL (5-10x faster than REST)", "green")
|
|
162
66
|
|
|
163
67
|
return repositories
|
|
164
68
|
|
|
165
69
|
except Exception as e:
|
|
166
|
-
colored_print(f"
|
|
70
|
+
colored_print(f"Error fetching repositories: {e}", "red")
|
|
167
71
|
raise
|
|
168
72
|
|
|
169
|
-
# ============================================================================
|
|
170
|
-
# OLD REST API HELPER (DEADCODE - handled by GraphQL fetcher now)
|
|
171
|
-
# ============================================================================
|
|
172
|
-
# def _build_temporal_query(
|
|
173
|
-
# self,
|
|
174
|
-
# keywords: str,
|
|
175
|
-
# min_stars: int,
|
|
176
|
-
# created_after: str = None,
|
|
177
|
-
# created_before: str = None,
|
|
178
|
-
# pushed_after: str = None,
|
|
179
|
-
# pushed_before: str = None,
|
|
180
|
-
# ) -> str:
|
|
181
|
-
# # Build GitHub search query with temporal constraints.
|
|
182
|
-
# query_parts = [keywords, f"stars:>={min_stars}"]
|
|
183
|
-
#
|
|
184
|
-
# # Temporal filters
|
|
185
|
-
# if created_after and created_before:
|
|
186
|
-
# query_parts.append(f"created:{created_after}..{created_before}")
|
|
187
|
-
# elif created_after:
|
|
188
|
-
# query_parts.append(f"created:>={created_after}")
|
|
189
|
-
# elif created_before:
|
|
190
|
-
# query_parts.append(f"created:<={created_before}")
|
|
191
|
-
#
|
|
192
|
-
# if pushed_after and pushed_before:
|
|
193
|
-
# query_parts.append(f"pushed:{pushed_after}..{pushed_before}")
|
|
194
|
-
# elif pushed_after:
|
|
195
|
-
# query_parts.append(f"pushed:>={pushed_after}")
|
|
196
|
-
# elif pushed_before:
|
|
197
|
-
# query_parts.append(f"pushed:<={pushed_before}")
|
|
198
|
-
#
|
|
199
|
-
# query = " ".join(query_parts)
|
|
200
|
-
# colored_print(f" Query: {query}", "cyan")
|
|
201
|
-
# return query
|
|
202
|
-
# ============================================================================
|
|
203
|
-
|
|
204
73
|
def load_repositories(self) -> list[Repository]:
|
|
205
74
|
# Load repositories from file.
|
|
206
75
|
if not self.config.REPOS_FILE.exists():
|
|
@@ -113,7 +113,7 @@ class ConsolePresenter:
|
|
|
113
113
|
|
|
114
114
|
table_data = []
|
|
115
115
|
for phase, info in status.items():
|
|
116
|
-
status_icon = "" if info.get("completed") else "
|
|
116
|
+
status_icon = "done" if info.get("completed") else "pending"
|
|
117
117
|
table_data.append(
|
|
118
118
|
[status_icon, phase, info.get("file", "N/A"), info.get("size", "N/A")]
|
|
119
119
|
)
|
|
@@ -1,210 +1,2 @@
|
|
|
1
|
-
#
|
|
2
|
-
#
|
|
3
|
-
# ================================================================================
|
|
4
|
-
#
|
|
5
|
-
# This file contains the OLD GitHub REST API implementation.
|
|
6
|
-
# It has been REPLACED by GitHubGraphQLFetcher for better performance.
|
|
7
|
-
#
|
|
8
|
-
# Performance comparison:
|
|
9
|
-
# REST API: 10+ requests for 100 repos, ~2 minutes
|
|
10
|
-
# GraphQL API: 1-2 requests for 100 repos, ~15 seconds (10x faster!)
|
|
11
|
-
#
|
|
12
|
-
# USE INSTEAD: greenmining.services.github_graphql_fetcher.GitHubGraphQLFetcher
|
|
13
|
-
#
|
|
14
|
-
# This file is kept for reference only. Do not use in production.
|
|
15
|
-
#
|
|
16
|
-
# ================================================================================
|
|
17
|
-
|
|
18
|
-
# GitHub repository fetcher for green microservices mining.
|
|
19
|
-
|
|
20
|
-
# from __future__ import annotations
|
|
21
|
-
#
|
|
22
|
-
# from datetime import datetime
|
|
23
|
-
# from pathlib import Path
|
|
24
|
-
# from typing import Any, Dict, List, Optional
|
|
25
|
-
#
|
|
26
|
-
# from github import Github, GithubException, RateLimitExceededException
|
|
27
|
-
# from tqdm import tqdm
|
|
28
|
-
#
|
|
29
|
-
# from greenmining.config import get_config
|
|
30
|
-
# from greenmining.utils import (
|
|
31
|
-
# colored_print,
|
|
32
|
-
# format_timestamp,
|
|
33
|
-
# print_banner,
|
|
34
|
-
# save_json_file,
|
|
35
|
-
# )
|
|
36
|
-
#
|
|
37
|
-
#
|
|
38
|
-
# class GitHubFetcher:
|
|
39
|
-
# # Fetches microservice repositories from GitHub using REST API (SLOW).
|
|
40
|
-
#
|
|
41
|
-
# def __init__(
|
|
42
|
-
# self,
|
|
43
|
-
# token: str,
|
|
44
|
-
# max_repos: int = 100,
|
|
45
|
-
# min_stars: int = 100,
|
|
46
|
-
# languages: Optional[list[str]] = None,
|
|
47
|
-
# created_after: Optional[str] = None,
|
|
48
|
-
# created_before: Optional[str] = None,
|
|
49
|
-
# pushed_after: Optional[str] = None,
|
|
50
|
-
# pushed_before: Optional[str] = None,
|
|
51
|
-
# ):
|
|
52
|
-
# # Initialize GitHub fetcher.
|
|
53
|
-
# self.github = Github(token)
|
|
54
|
-
# self.max_repos = max_repos
|
|
55
|
-
# self.min_stars = min_stars
|
|
56
|
-
# self.languages = languages or [
|
|
57
|
-
# "Java",
|
|
58
|
-
# "Python",
|
|
59
|
-
# "Go",
|
|
60
|
-
# "JavaScript",
|
|
61
|
-
# "TypeScript",
|
|
62
|
-
# "C#",
|
|
63
|
-
# "Rust",
|
|
64
|
-
# ]
|
|
65
|
-
# self.created_after = created_after
|
|
66
|
-
# self.created_before = created_before
|
|
67
|
-
# self.pushed_after = pushed_after
|
|
68
|
-
# self.pushed_before = pushed_before
|
|
69
|
-
#
|
|
70
|
-
# def search_repositories(self) -> list[dict[str, Any]]:
|
|
71
|
-
# # Search for microservice repositories (REST API - many requests).
|
|
72
|
-
# repositories = []
|
|
73
|
-
# keywords = ["microservices", "microservice-architecture", "cloud-native"]
|
|
74
|
-
#
|
|
75
|
-
# colored_print(f"Searching for repositories with keywords: {', '.join(keywords)}", "cyan")
|
|
76
|
-
# colored_print(
|
|
77
|
-
# f"Filters: min_stars={self.min_stars}, languages={', '.join(self.languages)}", "cyan"
|
|
78
|
-
# )
|
|
79
|
-
#
|
|
80
|
-
# # Build search query with temporal filters
|
|
81
|
-
# query = self._build_temporal_query(keywords)
|
|
82
|
-
#
|
|
83
|
-
# try:
|
|
84
|
-
# # Execute search (1 request)
|
|
85
|
-
# search_results = self.github.search_repositories(
|
|
86
|
-
# query=query, sort="stars", order="desc"
|
|
87
|
-
# )
|
|
88
|
-
#
|
|
89
|
-
# total_found = search_results.totalCount
|
|
90
|
-
# colored_print(f"Found {total_found} repositories matching criteria", "green")
|
|
91
|
-
#
|
|
92
|
-
# # Fetch repository details with progress bar (1 request per repo = SLOW)
|
|
93
|
-
# with tqdm(
|
|
94
|
-
# total=min(self.max_repos, total_found), desc="Fetching repositories", unit="repo"
|
|
95
|
-
# ) as pbar:
|
|
96
|
-
# for idx, repo in enumerate(search_results):
|
|
97
|
-
# if idx >= self.max_repos:
|
|
98
|
-
# break
|
|
99
|
-
#
|
|
100
|
-
# try:
|
|
101
|
-
# repo_data = self._extract_repo_metadata(repo, idx + 1)
|
|
102
|
-
# repositories.append(repo_data)
|
|
103
|
-
# pbar.update(1)
|
|
104
|
-
# except GithubException as e:
|
|
105
|
-
# colored_print(f"Error fetching {repo.full_name}: {e}", "yellow")
|
|
106
|
-
# continue
|
|
107
|
-
# except RateLimitExceededException:
|
|
108
|
-
# colored_print("Rate limit exceeded. Waiting...", "red")
|
|
109
|
-
# self._handle_rate_limit()
|
|
110
|
-
# continue
|
|
111
|
-
#
|
|
112
|
-
# return repositories
|
|
113
|
-
#
|
|
114
|
-
# except GithubException as e:
|
|
115
|
-
# colored_print(f"GitHub API error: {e}", "red")
|
|
116
|
-
# raise
|
|
117
|
-
# except Exception as e:
|
|
118
|
-
# colored_print(f"Unexpected error: {e}", "red")
|
|
119
|
-
# raise
|
|
120
|
-
#
|
|
121
|
-
# def _extract_repo_metadata(self, repo, repo_id: int) -> dict[str, Any]:
|
|
122
|
-
# # Extract metadata from repository object.
|
|
123
|
-
# return {
|
|
124
|
-
# "repo_id": repo_id,
|
|
125
|
-
# "name": repo.name,
|
|
126
|
-
# "owner": repo.owner.login,
|
|
127
|
-
# "full_name": repo.full_name,
|
|
128
|
-
# "url": repo.html_url,
|
|
129
|
-
# "clone_url": repo.clone_url,
|
|
130
|
-
# "language": repo.language,
|
|
131
|
-
# "stars": repo.stargazers_count,
|
|
132
|
-
# "forks": repo.forks_count,
|
|
133
|
-
# "watchers": repo.watchers_count,
|
|
134
|
-
# "open_issues": repo.open_issues_count,
|
|
135
|
-
# "last_updated": repo.updated_at.isoformat() if repo.updated_at else None,
|
|
136
|
-
# "created_at": repo.created_at.isoformat() if repo.created_at else None,
|
|
137
|
-
# "description": repo.description or "",
|
|
138
|
-
# "main_branch": repo.default_branch,
|
|
139
|
-
# "topics": repo.get_topics() if hasattr(repo, "get_topics") else [],
|
|
140
|
-
# "size": repo.size,
|
|
141
|
-
# "has_issues": repo.has_issues,
|
|
142
|
-
# "has_wiki": repo.has_wiki,
|
|
143
|
-
# "archived": repo.archived,
|
|
144
|
-
# "license": repo.license.name if repo.license else None,
|
|
145
|
-
# }
|
|
146
|
-
#
|
|
147
|
-
# def _build_temporal_query(self, keywords: list[str]) -> str:
|
|
148
|
-
# # Build GitHub search query with temporal constraints.
|
|
149
|
-
# query_parts = []
|
|
150
|
-
#
|
|
151
|
-
# # Keywords
|
|
152
|
-
# keyword_query = " OR ".join(keywords)
|
|
153
|
-
# query_parts.append(f"({keyword_query})")
|
|
154
|
-
#
|
|
155
|
-
# # Languages
|
|
156
|
-
# language_query = " OR ".join([f"language:{lang}" for lang in self.languages])
|
|
157
|
-
# query_parts.append(f"({language_query})")
|
|
158
|
-
#
|
|
159
|
-
# # Stars
|
|
160
|
-
# query_parts.append(f"stars:>={self.min_stars}")
|
|
161
|
-
#
|
|
162
|
-
# # Archived filter
|
|
163
|
-
# query_parts.append("archived:false")
|
|
164
|
-
#
|
|
165
|
-
# # Temporal filters
|
|
166
|
-
# if self.created_after and self.created_before:
|
|
167
|
-
# query_parts.append(f"created:{self.created_after}..{self.created_before}")
|
|
168
|
-
# elif self.created_after:
|
|
169
|
-
# query_parts.append(f"created:>={self.created_after}")
|
|
170
|
-
# elif self.created_before:
|
|
171
|
-
# query_parts.append(f"created:<={self.created_before}")
|
|
172
|
-
#
|
|
173
|
-
# if self.pushed_after and self.pushed_before:
|
|
174
|
-
# query_parts.append(f"pushed:{self.pushed_after}..{self.pushed_before}")
|
|
175
|
-
# elif self.pushed_after:
|
|
176
|
-
# query_parts.append(f"pushed:>={self.pushed_after}")
|
|
177
|
-
# elif self.pushed_before:
|
|
178
|
-
# query_parts.append(f"pushed:<={self.pushed_before}")
|
|
179
|
-
#
|
|
180
|
-
# query = " ".join(query_parts)
|
|
181
|
-
# colored_print(f"Query: {query}", "cyan")
|
|
182
|
-
# return query
|
|
183
|
-
#
|
|
184
|
-
# def _handle_rate_limit(self):
|
|
185
|
-
# # Handle GitHub API rate limiting.
|
|
186
|
-
# rate_limit = self.github.get_rate_limit()
|
|
187
|
-
# reset_time = rate_limit.core.reset
|
|
188
|
-
# wait_seconds = (reset_time - datetime.now()).total_seconds()
|
|
189
|
-
#
|
|
190
|
-
# if wait_seconds > 0:
|
|
191
|
-
# colored_print(f"Rate limit will reset in {wait_seconds:.0f} seconds", "yellow")
|
|
192
|
-
# import time
|
|
193
|
-
#
|
|
194
|
-
# time.sleep(min(wait_seconds + 10, 60)) # Wait with max 60 seconds
|
|
195
|
-
#
|
|
196
|
-
# def save_results(self, repositories: list[dict[str, Any]], output_file: Path):
|
|
197
|
-
# # Save fetched repositories to JSON file.
|
|
198
|
-
# data = {
|
|
199
|
-
# "metadata": {
|
|
200
|
-
# "fetched_at": format_timestamp(),
|
|
201
|
-
# "total_repos": len(repositories),
|
|
202
|
-
# "min_stars": self.min_stars,
|
|
203
|
-
# "languages": self.languages,
|
|
204
|
-
# "search_keywords": ["microservices", "microservice-architecture", "cloud-native"],
|
|
205
|
-
# },
|
|
206
|
-
# "repositories": repositories,
|
|
207
|
-
# }
|
|
208
|
-
#
|
|
209
|
-
# save_json_file(data, output_file)
|
|
210
|
-
# colored_print(f"Saved {len(repositories)} repositories to {output_file}", "green")
|
|
1
|
+
# Legacy GitHub REST API fetcher (deprecated).
|
|
2
|
+
# Use github_graphql_fetcher.GitHubGraphQLFetcher instead.
|
|
@@ -1,7 +1,4 @@
|
|
|
1
|
-
# GitHub GraphQL API fetcher for
|
|
2
|
-
#
|
|
3
|
-
# GraphQL allows fetching exactly the data you need in a single request,
|
|
4
|
-
# reducing API calls and improving rate limit efficiency.
|
|
1
|
+
# GitHub GraphQL API fetcher for repository search and data retrieval.
|
|
5
2
|
|
|
6
3
|
import json
|
|
7
4
|
import time
|
|
@@ -14,12 +11,6 @@ from greenmining.models.repository import Repository
|
|
|
14
11
|
|
|
15
12
|
class GitHubGraphQLFetcher:
|
|
16
13
|
# Fetch GitHub repositories using GraphQL API v4.
|
|
17
|
-
#
|
|
18
|
-
# Benefits over REST API:
|
|
19
|
-
# - Fetch repos + commits in 1 request instead of 100+ REST calls
|
|
20
|
-
# - Get exactly the fields you need (no over-fetching)
|
|
21
|
-
# - Better rate limit efficiency (5000 points/hour vs 5000 requests/hour)
|
|
22
|
-
# - More powerful search capabilities
|
|
23
14
|
|
|
24
15
|
GRAPHQL_ENDPOINT = "https://api.github.com/graphql"
|
|
25
16
|
|
|
@@ -193,10 +184,10 @@ class GitHubGraphQLFetcher:
|
|
|
193
184
|
# Star count
|
|
194
185
|
query_parts.append(f"stars:>={min_stars}")
|
|
195
186
|
|
|
196
|
-
# Languages
|
|
197
|
-
if languages:
|
|
198
|
-
lang_query = "
|
|
199
|
-
query_parts.append(
|
|
187
|
+
# Languages - skip filter if more than 5 to avoid exceeding GitHub query limits
|
|
188
|
+
if languages and len(languages) <= 5:
|
|
189
|
+
lang_query = " ".join([f"language:{lang}" for lang in languages])
|
|
190
|
+
query_parts.append(lang_query)
|
|
200
191
|
|
|
201
192
|
# Date filters
|
|
202
193
|
if created_after:
|
|
@@ -259,9 +250,6 @@ class GitHubGraphQLFetcher:
|
|
|
259
250
|
) -> List[Dict[str, Any]]:
|
|
260
251
|
# Fetch commits for a specific repository using GraphQL.
|
|
261
252
|
#
|
|
262
|
-
# This is much faster than REST API as it gets all commits in 1-2 requests
|
|
263
|
-
# instead of paginating through 100 individual REST calls.
|
|
264
|
-
#
|
|
265
253
|
# Args:
|
|
266
254
|
# owner: Repository owner
|
|
267
255
|
# name: Repository name
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: greenmining
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.8
|
|
4
4
|
Summary: An empirical Python library for Mining Software Repositories (MSR) in Green IT research
|
|
5
5
|
Author-email: Adam Bouafia <a.bouafia@student.vu.nl>
|
|
6
6
|
License: MIT
|
|
@@ -330,7 +330,137 @@ print(f"Top patterns: {stats['top_patterns'][:5]}")
|
|
|
330
330
|
aggregator.export_to_csv(results, "output.csv")
|
|
331
331
|
```
|
|
332
332
|
|
|
333
|
-
####
|
|
333
|
+
#### URL-Based Repository Analysis
|
|
334
|
+
|
|
335
|
+
```python
|
|
336
|
+
from greenmining.services.local_repo_analyzer import LocalRepoAnalyzer
|
|
337
|
+
|
|
338
|
+
analyzer = LocalRepoAnalyzer(
|
|
339
|
+
max_commits=200,
|
|
340
|
+
cleanup_after=True,
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
result = analyzer.analyze_repository("https://github.com/pallets/flask")
|
|
344
|
+
|
|
345
|
+
print(f"Repository: {result.name}")
|
|
346
|
+
print(f"Commits analyzed: {result.total_commits}")
|
|
347
|
+
print(f"Green-aware: {result.green_commits} ({result.green_commit_rate:.1%})")
|
|
348
|
+
|
|
349
|
+
for commit in result.commits[:5]:
|
|
350
|
+
if commit.green_aware:
|
|
351
|
+
print(f" {commit.message[:60]}...")
|
|
352
|
+
```
|
|
353
|
+
|
|
354
|
+
#### Batch Analysis with Parallelism
|
|
355
|
+
|
|
356
|
+
```python
|
|
357
|
+
from greenmining import analyze_repositories
|
|
358
|
+
|
|
359
|
+
results = analyze_repositories(
|
|
360
|
+
urls=[
|
|
361
|
+
"https://github.com/kubernetes/kubernetes",
|
|
362
|
+
"https://github.com/istio/istio",
|
|
363
|
+
"https://github.com/envoyproxy/envoy",
|
|
364
|
+
],
|
|
365
|
+
max_commits=100,
|
|
366
|
+
parallel_workers=3,
|
|
367
|
+
energy_tracking=True,
|
|
368
|
+
energy_backend="auto",
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
for result in results:
|
|
372
|
+
print(f"{result.name}: {result.green_commit_rate:.1%} green")
|
|
373
|
+
```
|
|
374
|
+
|
|
375
|
+
#### Private Repository Analysis
|
|
376
|
+
|
|
377
|
+
```python
|
|
378
|
+
from greenmining.services.local_repo_analyzer import LocalRepoAnalyzer
|
|
379
|
+
|
|
380
|
+
# HTTPS with token
|
|
381
|
+
analyzer = LocalRepoAnalyzer(github_token="ghp_xxxx")
|
|
382
|
+
result = analyzer.analyze_repository("https://github.com/company/private-repo")
|
|
383
|
+
|
|
384
|
+
# SSH with key
|
|
385
|
+
analyzer = LocalRepoAnalyzer(ssh_key_path="~/.ssh/id_rsa")
|
|
386
|
+
result = analyzer.analyze_repository("git@github.com:company/private-repo.git")
|
|
387
|
+
```
|
|
388
|
+
|
|
389
|
+
#### Power Regression Detection
|
|
390
|
+
|
|
391
|
+
```python
|
|
392
|
+
from greenmining.analyzers import PowerRegressionDetector
|
|
393
|
+
|
|
394
|
+
detector = PowerRegressionDetector(
|
|
395
|
+
test_command="pytest tests/ -x",
|
|
396
|
+
energy_backend="rapl",
|
|
397
|
+
threshold_percent=5.0,
|
|
398
|
+
iterations=5,
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
regressions = detector.detect(
|
|
402
|
+
repo_path="/path/to/repo",
|
|
403
|
+
baseline_commit="v1.0.0",
|
|
404
|
+
target_commit="HEAD",
|
|
405
|
+
)
|
|
406
|
+
|
|
407
|
+
for regression in regressions:
|
|
408
|
+
print(f"Commit {regression.sha[:8]}: +{regression.power_increase:.1f}%")
|
|
409
|
+
```
|
|
410
|
+
|
|
411
|
+
#### Version Power Comparison
|
|
412
|
+
|
|
413
|
+
```python
|
|
414
|
+
from greenmining.analyzers import VersionPowerAnalyzer
|
|
415
|
+
|
|
416
|
+
analyzer = VersionPowerAnalyzer(
|
|
417
|
+
test_command="pytest tests/",
|
|
418
|
+
energy_backend="rapl",
|
|
419
|
+
iterations=10,
|
|
420
|
+
warmup_iterations=2,
|
|
421
|
+
)
|
|
422
|
+
|
|
423
|
+
report = analyzer.analyze_versions(
|
|
424
|
+
repo_path="/path/to/repo",
|
|
425
|
+
versions=["v1.0", "v1.1", "v1.2", "v2.0"],
|
|
426
|
+
)
|
|
427
|
+
|
|
428
|
+
print(report.summary())
|
|
429
|
+
print(f"Trend: {report.trend}")
|
|
430
|
+
print(f"Most efficient: {report.most_efficient}")
|
|
431
|
+
```
|
|
432
|
+
|
|
433
|
+
#### Metrics-to-Power Correlation
|
|
434
|
+
|
|
435
|
+
```python
|
|
436
|
+
from greenmining.analyzers import MetricsPowerCorrelator
|
|
437
|
+
|
|
438
|
+
correlator = MetricsPowerCorrelator()
|
|
439
|
+
correlator.fit(
|
|
440
|
+
metrics=["complexity", "nloc", "code_churn"],
|
|
441
|
+
metrics_values={
|
|
442
|
+
"complexity": [10, 20, 30, 40],
|
|
443
|
+
"nloc": [100, 200, 300, 400],
|
|
444
|
+
"code_churn": [50, 100, 150, 200],
|
|
445
|
+
},
|
|
446
|
+
power_measurements=[5.0, 8.0, 12.0, 15.0],
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
print(f"Pearson: {correlator.pearson}")
|
|
450
|
+
print(f"Spearman: {correlator.spearman}")
|
|
451
|
+
print(f"Feature importance: {correlator.feature_importance}")
|
|
452
|
+
```
|
|
453
|
+
|
|
454
|
+
#### Web Dashboard
|
|
455
|
+
|
|
456
|
+
```python
|
|
457
|
+
from greenmining.dashboard import run_dashboard
|
|
458
|
+
|
|
459
|
+
# Launch interactive dashboard (requires pip install greenmining[dashboard])
|
|
460
|
+
run_dashboard(data_dir="./data", host="127.0.0.1", port=5000)
|
|
461
|
+
```
|
|
462
|
+
|
|
463
|
+
#### Pipeline Batch Analysis
|
|
334
464
|
|
|
335
465
|
```python
|
|
336
466
|
from greenmining.controllers.repository_controller import RepositoryController
|
|
@@ -551,17 +681,24 @@ config = Config(
|
|
|
551
681
|
|
|
552
682
|
### Core Capabilities
|
|
553
683
|
|
|
554
|
-
- **Pattern Detection**:
|
|
555
|
-
- **Keyword Analysis**:
|
|
556
|
-
- **
|
|
557
|
-
- **
|
|
558
|
-
- **Batch Processing**:
|
|
559
|
-
- **
|
|
560
|
-
- **
|
|
684
|
+
- **Pattern Detection**: 122 sustainability patterns across 15 categories from the GSF catalog
|
|
685
|
+
- **Keyword Analysis**: 321 green software detection keywords
|
|
686
|
+
- **Repository Fetching**: GraphQL API with date, star, and language filters
|
|
687
|
+
- **URL-Based Analysis**: Direct PyDriller analysis from GitHub URLs (HTTPS and SSH)
|
|
688
|
+
- **Batch Processing**: Parallel analysis of multiple repositories with configurable workers
|
|
689
|
+
- **Private Repository Support**: Authentication via SSH keys or GitHub tokens
|
|
690
|
+
- **Energy Measurement**: RAPL, CodeCarbon, and CPU Energy Meter backends
|
|
691
|
+
- **Carbon Footprint Reporting**: CO2 emissions with 20+ country profiles and cloud region support (AWS, GCP, Azure)
|
|
692
|
+
- **Power Regression Detection**: Identify commits that increased energy consumption
|
|
693
|
+
- **Metrics-to-Power Correlation**: Pearson and Spearman analysis between code metrics and power
|
|
694
|
+
- **Version Power Comparison**: Compare power consumption across software versions with trend detection
|
|
695
|
+
- **Method-Level Analysis**: Per-method complexity metrics via Lizard integration
|
|
696
|
+
- **Source Code Access**: Before/after source code for refactoring detection
|
|
697
|
+
- **Full Process Metrics**: All 8 PyDriller process metrics (ChangeSet, CodeChurn, CommitsCount, ContributorsCount, ContributorsExperience, HistoryComplexity, HunksCount, LinesCount)
|
|
698
|
+
- **Statistical Analysis**: Correlations, effect sizes, and temporal trends
|
|
699
|
+
- **Multi-format Output**: Markdown reports, CSV exports, JSON data
|
|
700
|
+
- **Web Dashboard**: Flask-based interactive visualization (`pip install greenmining[dashboard]`)
|
|
561
701
|
- **Docker Support**: Pre-built images for containerized analysis
|
|
562
|
-
- **Programmatic API**: Full Python API for custom workflows and integrations
|
|
563
|
-
- **Clean Architecture**: Modular design with services layer (Fetcher, Extractor, Analyzer, Aggregator, Reports)
|
|
564
|
-
- **Energy Measurement**: Real-time energy consumption tracking via RAPL (Linux) or CodeCarbon (cross-platform)
|
|
565
702
|
|
|
566
703
|
### Energy Measurement
|
|
567
704
|
|
|
@@ -712,8 +849,15 @@ ruff check greenmining/ tests/
|
|
|
712
849
|
- Python 3.9+
|
|
713
850
|
- PyGithub >= 2.1.1
|
|
714
851
|
- PyDriller >= 2.5
|
|
715
|
-
- pandas >= 2.2.0
|
|
716
|
-
|
|
852
|
+
- pandas >= 2.2.0
|
|
853
|
+
|
|
854
|
+
**Optional dependencies:**
|
|
855
|
+
|
|
856
|
+
```bash
|
|
857
|
+
pip install greenmining[energy] # psutil, codecarbon (energy measurement)
|
|
858
|
+
pip install greenmining[dashboard] # flask (web dashboard)
|
|
859
|
+
pip install greenmining[dev] # pytest, black, ruff, mypy (development)
|
|
860
|
+
```
|
|
717
861
|
|
|
718
862
|
## License
|
|
719
863
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
greenmining/__init__.py,sha256=
|
|
1
|
+
greenmining/__init__.py,sha256=cilA0cKdT3R8t-yFdkkcCvsSGzQ0PLoPMY2OQ4Irytg,2909
|
|
2
2
|
greenmining/__main__.py,sha256=NYOVS7D4w2XDLn6SyXHXPKE5GrNGOeoWSTb_KazgK5c,590
|
|
3
3
|
greenmining/__version__.py,sha256=xZc02a8bS3vUJlzh8k9RoxemB1irQmq_SpVVj6Cg5M0,62
|
|
4
4
|
greenmining/config.py,sha256=M4a7AwM1ErCmOY0n5Vmyoo9HPblSkTZ-HD3k2YHzs4A,8340
|
|
@@ -13,7 +13,7 @@ greenmining/analyzers/statistical_analyzer.py,sha256=DzWAcCyw42Ig3FIxTwPPBikgt2u
|
|
|
13
13
|
greenmining/analyzers/temporal_analyzer.py,sha256=JfTcAoI20oCFMehGrSRnDqhJTXI-RUbdCTMwDOTW9-g,14259
|
|
14
14
|
greenmining/analyzers/version_power_analyzer.py,sha256=2P6zOqBg-ButtIhF-4cutiwD2Q1geMY49VFUghHXXoI,8119
|
|
15
15
|
greenmining/controllers/__init__.py,sha256=UiAT6zBvC1z_9cJWfzq1cLA0I4r9b2vURHipj8oDczI,180
|
|
16
|
-
greenmining/controllers/repository_controller.py,sha256=
|
|
16
|
+
greenmining/controllers/repository_controller.py,sha256=DM9BabUAwZJARGngCk_4wEYPw2adn8iESCiFQ7Um4LQ,3880
|
|
17
17
|
greenmining/dashboard/__init__.py,sha256=Ig_291-hLrH9k3rV0whhQ1EkhiaRR8ciHiJ5s5OCBf4,141
|
|
18
18
|
greenmining/dashboard/app.py,sha256=Hk6_i2qmcg6SGW7UzxglEIvUBJiloRA-hMYI-YSORcA,8604
|
|
19
19
|
greenmining/energy/__init__.py,sha256=GoCYh7hitWBoPMtan1HF1yezCHi7o4sa_YUJgGkeJc8,558
|
|
@@ -28,17 +28,17 @@ greenmining/models/analysis_result.py,sha256=YICTCEcrJxZ1R8Xaio3AZOjCGwMzC_62BMA
|
|
|
28
28
|
greenmining/models/commit.py,sha256=mnRDWSiIyGtJeGXI8sav9hukWUyVFpoNe6GixRlZjY4,2439
|
|
29
29
|
greenmining/models/repository.py,sha256=SKjS01onOptpMioumtAPZxKpKheHAeVXnXyvatl7CfM,2856
|
|
30
30
|
greenmining/presenters/__init__.py,sha256=d1CMtqtUAHYHYNzigPyjtGOUtnH1drtUwf7-bFQq2B8,138
|
|
31
|
-
greenmining/presenters/console_presenter.py,sha256=
|
|
31
|
+
greenmining/presenters/console_presenter.py,sha256=qagn2c2aOym0WNKV8n175MQ-BTheLjrXzW8c1OafzAQ,4904
|
|
32
32
|
greenmining/services/__init__.py,sha256=ZEMOVut0KRdume_vz58beSNps3YgeoGBXmUjEqNgIhc,690
|
|
33
33
|
greenmining/services/commit_extractor.py,sha256=Fz2WTWjIZ_vQhSfkJKnWpJnBpI2nm0KacA4qYAvCpSE,8451
|
|
34
34
|
greenmining/services/data_aggregator.py,sha256=TsFT0oGOnnHk0QGZ1tT6ZhKGc5X1H1D1u7-7OpiPo7Y,19566
|
|
35
35
|
greenmining/services/data_analyzer.py,sha256=f0nlJkPAclHHCzzTyQW5bjhYrgE0XXiR1x7_o3fJaDs,9732
|
|
36
|
-
greenmining/services/github_fetcher.py,sha256=
|
|
37
|
-
greenmining/services/github_graphql_fetcher.py,sha256=
|
|
36
|
+
greenmining/services/github_fetcher.py,sha256=sdkS-LhHmX7mgMdlClCwEUVnZrItc0Pt6FVtlWk5iLU,106
|
|
37
|
+
greenmining/services/github_graphql_fetcher.py,sha256=HvADlXGqrqfzqnsI9xJQifhy8rQ5fQzosdlCBdNxjsU,11467
|
|
38
38
|
greenmining/services/local_repo_analyzer.py,sha256=5DMN9RIyGXNdsOlIDV4Mp0fPavbB69oBA9us17P5cNo,24668
|
|
39
39
|
greenmining/services/reports.py,sha256=Vrw_pBNmVw2mTAf1dpcAqjBe6gXv-O4w_XweoVTt7L8,23392
|
|
40
|
-
greenmining-1.0.
|
|
41
|
-
greenmining-1.0.
|
|
42
|
-
greenmining-1.0.
|
|
43
|
-
greenmining-1.0.
|
|
44
|
-
greenmining-1.0.
|
|
40
|
+
greenmining-1.0.8.dist-info/licenses/LICENSE,sha256=M7ma3JHGeiIZIs3ea0HTcFl_wLFPX2NZElUliYs4bCA,1083
|
|
41
|
+
greenmining-1.0.8.dist-info/METADATA,sha256=46ygTrzFHVKFOPpF9gb9D_HbTCWs1ZN0VH4v1I7U7Zg,30913
|
|
42
|
+
greenmining-1.0.8.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
43
|
+
greenmining-1.0.8.dist-info/top_level.txt,sha256=nreXgXxZIWI-42yQknQ0HXtUrFnzZ8N1ra4Mdy2KcsI,12
|
|
44
|
+
greenmining-1.0.8.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|