greenmining 1.0.7__tar.gz → 1.0.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {greenmining-1.0.7/greenmining.egg-info → greenmining-1.0.9}/PKG-INFO +1 -1
- {greenmining-1.0.7 → greenmining-1.0.9}/greenmining/__init__.py +1 -1
- greenmining-1.0.9/greenmining/controllers/repository_controller.py +100 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/greenmining/presenters/console_presenter.py +1 -1
- greenmining-1.0.9/greenmining/services/github_fetcher.py +2 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/greenmining/services/github_graphql_fetcher.py +32 -39
- {greenmining-1.0.7 → greenmining-1.0.9/greenmining.egg-info}/PKG-INFO +1 -1
- {greenmining-1.0.7 → greenmining-1.0.9}/pyproject.toml +1 -1
- greenmining-1.0.7/greenmining/controllers/repository_controller.py +0 -231
- greenmining-1.0.7/greenmining/services/github_fetcher.py +0 -210
- {greenmining-1.0.7 → greenmining-1.0.9}/CHANGELOG.md +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/LICENSE +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/MANIFEST.in +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/README.md +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/greenmining/__main__.py +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/greenmining/__version__.py +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/greenmining/analyzers/__init__.py +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/greenmining/analyzers/code_diff_analyzer.py +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/greenmining/analyzers/metrics_power_correlator.py +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/greenmining/analyzers/power_regression.py +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/greenmining/analyzers/qualitative_analyzer.py +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/greenmining/analyzers/statistical_analyzer.py +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/greenmining/analyzers/temporal_analyzer.py +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/greenmining/analyzers/version_power_analyzer.py +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/greenmining/config.py +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/greenmining/controllers/__init__.py +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/greenmining/dashboard/__init__.py +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/greenmining/dashboard/app.py +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/greenmining/energy/__init__.py +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/greenmining/energy/base.py +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/greenmining/energy/carbon_reporter.py +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/greenmining/energy/codecarbon_meter.py +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/greenmining/energy/cpu_meter.py +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/greenmining/energy/rapl.py +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/greenmining/gsf_patterns.py +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/greenmining/models/__init__.py +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/greenmining/models/aggregated_stats.py +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/greenmining/models/analysis_result.py +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/greenmining/models/commit.py +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/greenmining/models/repository.py +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/greenmining/presenters/__init__.py +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/greenmining/services/__init__.py +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/greenmining/services/commit_extractor.py +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/greenmining/services/data_aggregator.py +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/greenmining/services/data_analyzer.py +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/greenmining/services/local_repo_analyzer.py +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/greenmining/services/reports.py +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/greenmining/utils.py +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/greenmining.egg-info/SOURCES.txt +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/greenmining.egg-info/dependency_links.txt +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/greenmining.egg-info/requires.txt +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/greenmining.egg-info/top_level.txt +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/setup.cfg +0 -0
- {greenmining-1.0.7 → greenmining-1.0.9}/setup.py +0 -0
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
# Repository Controller - Handles repository fetching operations.
|
|
2
|
+
|
|
3
|
+
from tqdm import tqdm
|
|
4
|
+
|
|
5
|
+
from greenmining.config import Config
|
|
6
|
+
from greenmining.models.repository import Repository
|
|
7
|
+
from greenmining.services.github_graphql_fetcher import GitHubGraphQLFetcher
|
|
8
|
+
from greenmining.utils import colored_print, load_json_file, save_json_file
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class RepositoryController:
|
|
12
|
+
# Controller for GitHub repository operations using GraphQL API.
|
|
13
|
+
|
|
14
|
+
def __init__(self, config: Config):
|
|
15
|
+
# Initialize controller with configuration.
|
|
16
|
+
self.config = config
|
|
17
|
+
self.graphql_fetcher = GitHubGraphQLFetcher(config.GITHUB_TOKEN)
|
|
18
|
+
|
|
19
|
+
def fetch_repositories(
|
|
20
|
+
self,
|
|
21
|
+
max_repos: int = None,
|
|
22
|
+
min_stars: int = None,
|
|
23
|
+
languages: list[str] = None,
|
|
24
|
+
keywords: str = None,
|
|
25
|
+
created_after: str = None,
|
|
26
|
+
created_before: str = None,
|
|
27
|
+
pushed_after: str = None,
|
|
28
|
+
pushed_before: str = None,
|
|
29
|
+
) -> list[Repository]:
|
|
30
|
+
# Fetch repositories from GitHub using GraphQL API.
|
|
31
|
+
max_repos = max_repos or self.config.MAX_REPOS
|
|
32
|
+
min_stars = min_stars or self.config.MIN_STARS
|
|
33
|
+
languages = languages or self.config.SUPPORTED_LANGUAGES
|
|
34
|
+
keywords = keywords or "microservices"
|
|
35
|
+
|
|
36
|
+
colored_print(f"Fetching up to {max_repos} repositories...", "cyan")
|
|
37
|
+
colored_print(f" Keywords: {keywords}", "cyan")
|
|
38
|
+
colored_print(f" Filters: min_stars={min_stars}", "cyan")
|
|
39
|
+
|
|
40
|
+
if created_after or created_before:
|
|
41
|
+
colored_print(
|
|
42
|
+
f" Created: {created_after or 'any'} to {created_before or 'any'}", "cyan"
|
|
43
|
+
)
|
|
44
|
+
if pushed_after or pushed_before:
|
|
45
|
+
colored_print(f" Pushed: {pushed_after or 'any'} to {pushed_before or 'any'}", "cyan")
|
|
46
|
+
|
|
47
|
+
try:
|
|
48
|
+
# Execute GraphQL search
|
|
49
|
+
repositories = self.graphql_fetcher.search_repositories(
|
|
50
|
+
keywords=keywords,
|
|
51
|
+
max_repos=max_repos,
|
|
52
|
+
min_stars=min_stars,
|
|
53
|
+
languages=languages,
|
|
54
|
+
created_after=created_after,
|
|
55
|
+
created_before=created_before,
|
|
56
|
+
pushed_after=pushed_after,
|
|
57
|
+
pushed_before=pushed_before,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
# Save to file
|
|
61
|
+
repo_dicts = [r.to_dict() for r in repositories]
|
|
62
|
+
save_json_file(repo_dicts, self.config.REPOS_FILE)
|
|
63
|
+
|
|
64
|
+
colored_print(f"Fetched {len(repositories)} repositories", "green")
|
|
65
|
+
colored_print(f" Saved to: {self.config.REPOS_FILE}", "cyan")
|
|
66
|
+
|
|
67
|
+
return repositories
|
|
68
|
+
|
|
69
|
+
except Exception as e:
|
|
70
|
+
colored_print(f"Error fetching repositories: {e}", "red")
|
|
71
|
+
raise
|
|
72
|
+
|
|
73
|
+
def load_repositories(self) -> list[Repository]:
|
|
74
|
+
# Load repositories from file.
|
|
75
|
+
if not self.config.REPOS_FILE.exists():
|
|
76
|
+
raise FileNotFoundError(f"No repositories file found at {self.config.REPOS_FILE}")
|
|
77
|
+
|
|
78
|
+
repo_dicts = load_json_file(self.config.REPOS_FILE)
|
|
79
|
+
return [Repository.from_dict(r) for r in repo_dicts]
|
|
80
|
+
|
|
81
|
+
def get_repository_stats(self, repositories: list[Repository]) -> dict:
|
|
82
|
+
# Get statistics about fetched repositories.
|
|
83
|
+
if not repositories:
|
|
84
|
+
return {}
|
|
85
|
+
|
|
86
|
+
return {
|
|
87
|
+
"total": len(repositories),
|
|
88
|
+
"by_language": self._count_by_language(repositories),
|
|
89
|
+
"total_stars": sum(r.stars for r in repositories),
|
|
90
|
+
"avg_stars": sum(r.stars for r in repositories) / len(repositories),
|
|
91
|
+
"top_repo": max(repositories, key=lambda r: r.stars).full_name,
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
def _count_by_language(self, repositories: list[Repository]) -> dict:
|
|
95
|
+
# Count repositories by language.
|
|
96
|
+
counts = {}
|
|
97
|
+
for repo in repositories:
|
|
98
|
+
lang = repo.language or "Unknown"
|
|
99
|
+
counts[lang] = counts.get(lang, 0) + 1
|
|
100
|
+
return counts
|
|
@@ -113,7 +113,7 @@ class ConsolePresenter:
|
|
|
113
113
|
|
|
114
114
|
table_data = []
|
|
115
115
|
for phase, info in status.items():
|
|
116
|
-
status_icon = "" if info.get("completed") else "
|
|
116
|
+
status_icon = "done" if info.get("completed") else "pending"
|
|
117
117
|
table_data.append(
|
|
118
118
|
[status_icon, phase, info.get("file", "N/A"), info.get("size", "N/A")]
|
|
119
119
|
)
|
|
@@ -1,7 +1,4 @@
|
|
|
1
|
-
# GitHub GraphQL API fetcher for
|
|
2
|
-
#
|
|
3
|
-
# GraphQL allows fetching exactly the data you need in a single request,
|
|
4
|
-
# reducing API calls and improving rate limit efficiency.
|
|
1
|
+
# GitHub GraphQL API fetcher for repository search and data retrieval.
|
|
5
2
|
|
|
6
3
|
import json
|
|
7
4
|
import time
|
|
@@ -14,12 +11,6 @@ from greenmining.models.repository import Repository
|
|
|
14
11
|
|
|
15
12
|
class GitHubGraphQLFetcher:
|
|
16
13
|
# Fetch GitHub repositories using GraphQL API v4.
|
|
17
|
-
#
|
|
18
|
-
# Benefits over REST API:
|
|
19
|
-
# - Fetch repos + commits in 1 request instead of 100+ REST calls
|
|
20
|
-
# - Get exactly the fields you need (no over-fetching)
|
|
21
|
-
# - Better rate limit efficiency (5000 points/hour vs 5000 requests/hour)
|
|
22
|
-
# - More powerful search capabilities
|
|
23
14
|
|
|
24
15
|
GRAPHQL_ENDPOINT = "https://api.github.com/graphql"
|
|
25
16
|
|
|
@@ -153,7 +144,7 @@ class GitHubGraphQLFetcher:
|
|
|
153
144
|
nodes = search.get("nodes", [])
|
|
154
145
|
for node in nodes:
|
|
155
146
|
if node and len(repositories) < max_repos:
|
|
156
|
-
repo = self._parse_repository(node)
|
|
147
|
+
repo = self._parse_repository(node, len(repositories) + 1)
|
|
157
148
|
repositories.append(repo)
|
|
158
149
|
|
|
159
150
|
# Check pagination
|
|
@@ -193,10 +184,10 @@ class GitHubGraphQLFetcher:
|
|
|
193
184
|
# Star count
|
|
194
185
|
query_parts.append(f"stars:>={min_stars}")
|
|
195
186
|
|
|
196
|
-
# Languages
|
|
197
|
-
if languages:
|
|
198
|
-
lang_query = "
|
|
199
|
-
query_parts.append(
|
|
187
|
+
# Languages - skip filter if more than 5 to avoid exceeding GitHub query limits
|
|
188
|
+
if languages and len(languages) <= 5:
|
|
189
|
+
lang_query = " ".join([f"language:{lang}" for lang in languages])
|
|
190
|
+
query_parts.append(lang_query)
|
|
200
191
|
|
|
201
192
|
# Date filters
|
|
202
193
|
if created_after:
|
|
@@ -221,37 +212,42 @@ class GitHubGraphQLFetcher:
|
|
|
221
212
|
response.raise_for_status()
|
|
222
213
|
return response.json()
|
|
223
214
|
|
|
224
|
-
def _parse_repository(self, node: Dict[str, Any]) -> Repository:
|
|
215
|
+
def _parse_repository(self, node: Dict[str, Any], repo_id: int = 0) -> Repository:
|
|
225
216
|
# Parse GraphQL repository node to Repository object.
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
217
|
+
full_name = node.get("nameWithOwner", "")
|
|
218
|
+
owner = full_name.split("/")[0] if "/" in full_name else ""
|
|
219
|
+
url = node.get("url", "")
|
|
220
|
+
|
|
221
|
+
# Extract primary language
|
|
222
|
+
lang_node = node.get("primaryLanguage") or {}
|
|
223
|
+
language = lang_node.get("name")
|
|
232
224
|
|
|
233
225
|
# Extract license
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
226
|
+
license_info = node.get("licenseInfo") or {}
|
|
227
|
+
license_name = license_info.get("name")
|
|
228
|
+
|
|
229
|
+
# Extract default branch safely (can be null for empty repos)
|
|
230
|
+
branch_ref = node.get("defaultBranchRef") or {}
|
|
231
|
+
main_branch = branch_ref.get("name", "main")
|
|
237
232
|
|
|
238
233
|
return Repository(
|
|
234
|
+
repo_id=repo_id,
|
|
239
235
|
name=node.get("name", ""),
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
url=
|
|
236
|
+
owner=owner,
|
|
237
|
+
full_name=full_name,
|
|
238
|
+
url=url,
|
|
239
|
+
clone_url=f"{url}.git" if url else "",
|
|
240
|
+
language=language,
|
|
243
241
|
stars=node.get("stargazerCount", 0),
|
|
244
242
|
forks=node.get("forkCount", 0),
|
|
245
|
-
watchers=node.get("watchers"
|
|
246
|
-
|
|
247
|
-
|
|
243
|
+
watchers=(node.get("watchers") or {}).get("totalCount", 0),
|
|
244
|
+
open_issues=0,
|
|
245
|
+
last_updated=node.get("updatedAt", ""),
|
|
248
246
|
created_at=node.get("createdAt", ""),
|
|
249
|
-
|
|
250
|
-
|
|
247
|
+
description=node.get("description", ""),
|
|
248
|
+
main_branch=main_branch,
|
|
249
|
+
archived=node.get("isArchived", False),
|
|
251
250
|
license=license_name,
|
|
252
|
-
is_fork=node.get("isFork", False),
|
|
253
|
-
is_archived=node.get("isArchived", False),
|
|
254
|
-
default_branch=node.get("defaultBranchRef", {}).get("name", "main"),
|
|
255
251
|
)
|
|
256
252
|
|
|
257
253
|
def get_repository_commits(
|
|
@@ -259,9 +255,6 @@ class GitHubGraphQLFetcher:
|
|
|
259
255
|
) -> List[Dict[str, Any]]:
|
|
260
256
|
# Fetch commits for a specific repository using GraphQL.
|
|
261
257
|
#
|
|
262
|
-
# This is much faster than REST API as it gets all commits in 1-2 requests
|
|
263
|
-
# instead of paginating through 100 individual REST calls.
|
|
264
|
-
#
|
|
265
258
|
# Args:
|
|
266
259
|
# owner: Repository owner
|
|
267
260
|
# name: Repository name
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "greenmining"
|
|
7
|
-
version = "1.0.
|
|
7
|
+
version = "1.0.9"
|
|
8
8
|
description = "An empirical Python library for Mining Software Repositories (MSR) in Green IT research"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.9"
|
|
@@ -1,231 +0,0 @@
|
|
|
1
|
-
# Repository Controller - Handles repository fetching operations.
|
|
2
|
-
|
|
3
|
-
# ============================================================================
|
|
4
|
-
# OLD REST API IMPLEMENTATION (DEADCODE - REPLACED WITH GRAPHQL)
|
|
5
|
-
# ============================================================================
|
|
6
|
-
# from github import Github, GithubException
|
|
7
|
-
# from tqdm import tqdm
|
|
8
|
-
#
|
|
9
|
-
# from greenmining.config import Config
|
|
10
|
-
# from greenmining.models.repository import Repository
|
|
11
|
-
# from greenmining.utils import colored_print, load_json_file, save_json_file
|
|
12
|
-
#
|
|
13
|
-
#
|
|
14
|
-
# class RepositoryController:
|
|
15
|
-
# # Controller for GitHub repository operations.
|
|
16
|
-
#
|
|
17
|
-
# def __init__(self, config: Config):
|
|
18
|
-
# # Initialize controller with configuration.
|
|
19
|
-
# self.config = config
|
|
20
|
-
# self.github = Github(config.GITHUB_TOKEN)
|
|
21
|
-
# ============================================================================
|
|
22
|
-
|
|
23
|
-
# NEW GRAPHQL IMPLEMENTATION (5-10x faster)
|
|
24
|
-
from tqdm import tqdm
|
|
25
|
-
|
|
26
|
-
from greenmining.config import Config
|
|
27
|
-
from greenmining.models.repository import Repository
|
|
28
|
-
from greenmining.services.github_graphql_fetcher import GitHubGraphQLFetcher
|
|
29
|
-
from greenmining.utils import colored_print, load_json_file, save_json_file
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
class RepositoryController:
|
|
33
|
-
# Controller for GitHub repository operations using GraphQL API.
|
|
34
|
-
|
|
35
|
-
def __init__(self, config: Config):
|
|
36
|
-
# Initialize controller with configuration.
|
|
37
|
-
self.config = config
|
|
38
|
-
self.graphql_fetcher = GitHubGraphQLFetcher(config.GITHUB_TOKEN)
|
|
39
|
-
|
|
40
|
-
# ============================================================================
|
|
41
|
-
# OLD REST API METHOD (DEADCODE - 10x slower, high rate limit cost)
|
|
42
|
-
# ============================================================================
|
|
43
|
-
# def fetch_repositories(
|
|
44
|
-
# self,
|
|
45
|
-
# max_repos: int = None,
|
|
46
|
-
# min_stars: int = None,
|
|
47
|
-
# languages: list[str] = None,
|
|
48
|
-
# keywords: str = None,
|
|
49
|
-
# created_after: str = None,
|
|
50
|
-
# created_before: str = None,
|
|
51
|
-
# pushed_after: str = None,
|
|
52
|
-
# pushed_before: str = None,
|
|
53
|
-
# ) -> list[Repository]:
|
|
54
|
-
# # Fetch repositories from GitHub using REST API (slow).
|
|
55
|
-
# max_repos = max_repos or self.config.MAX_REPOS
|
|
56
|
-
# min_stars = min_stars or self.config.MIN_STARS
|
|
57
|
-
# languages = languages or self.config.SUPPORTED_LANGUAGES
|
|
58
|
-
# keywords = keywords or "microservices"
|
|
59
|
-
#
|
|
60
|
-
# colored_print(f" Fetching up to {max_repos} repositories...", "cyan")
|
|
61
|
-
# colored_print(f" Keywords: {keywords}", "cyan")
|
|
62
|
-
# colored_print(f" Filters: min_stars={min_stars}", "cyan")
|
|
63
|
-
#
|
|
64
|
-
# if created_after or created_before:
|
|
65
|
-
# colored_print(
|
|
66
|
-
# f" Created: {created_after or 'any'} to {created_before or 'any'}", "cyan"
|
|
67
|
-
# )
|
|
68
|
-
# if pushed_after or pushed_before:
|
|
69
|
-
# colored_print(f" Pushed: {pushed_after or 'any'} to {pushed_before or 'any'}", "cyan")
|
|
70
|
-
#
|
|
71
|
-
# # Build search query with temporal filters
|
|
72
|
-
# query = self._build_temporal_query(
|
|
73
|
-
# keywords, min_stars, created_after, created_before, pushed_after, pushed_before
|
|
74
|
-
# )
|
|
75
|
-
#
|
|
76
|
-
# try:
|
|
77
|
-
# # Execute search (REST API - many requests)
|
|
78
|
-
# search_results = self.github.search_repositories(
|
|
79
|
-
# query=query, sort="stars", order="desc"
|
|
80
|
-
# )
|
|
81
|
-
#
|
|
82
|
-
# total_found = search_results.totalCount
|
|
83
|
-
# colored_print(f" Found {total_found} repositories", "green")
|
|
84
|
-
#
|
|
85
|
-
# # Fetch repositories (1 request per repo = slow)
|
|
86
|
-
# repositories = []
|
|
87
|
-
# with tqdm(total=min(max_repos, total_found), desc="Fetching", unit="repo") as pbar:
|
|
88
|
-
# for idx, repo in enumerate(search_results):
|
|
89
|
-
# if idx >= max_repos:
|
|
90
|
-
# break
|
|
91
|
-
#
|
|
92
|
-
# try:
|
|
93
|
-
# repo_model = Repository.from_github_repo(repo, idx + 1)
|
|
94
|
-
# repositories.append(repo_model)
|
|
95
|
-
# pbar.update(1)
|
|
96
|
-
# except GithubException as e:
|
|
97
|
-
# colored_print(f" Error: {repo.full_name}: {e}", "yellow")
|
|
98
|
-
# continue
|
|
99
|
-
#
|
|
100
|
-
# # Save to file
|
|
101
|
-
# repo_dicts = [r.to_dict() for r in repositories]
|
|
102
|
-
# save_json_file(repo_dicts, self.config.REPOS_FILE)
|
|
103
|
-
#
|
|
104
|
-
# colored_print(f" Fetched {len(repositories)} repositories", "green")
|
|
105
|
-
# colored_print(f" Saved to: {self.config.REPOS_FILE}", "cyan")
|
|
106
|
-
#
|
|
107
|
-
# return repositories
|
|
108
|
-
#
|
|
109
|
-
# except Exception as e:
|
|
110
|
-
# colored_print(f" Error fetching repositories: {e}", "red")
|
|
111
|
-
# raise
|
|
112
|
-
# ============================================================================
|
|
113
|
-
|
|
114
|
-
def fetch_repositories(
|
|
115
|
-
self,
|
|
116
|
-
max_repos: int = None,
|
|
117
|
-
min_stars: int = None,
|
|
118
|
-
languages: list[str] = None,
|
|
119
|
-
keywords: str = None,
|
|
120
|
-
created_after: str = None,
|
|
121
|
-
created_before: str = None,
|
|
122
|
-
pushed_after: str = None,
|
|
123
|
-
pushed_before: str = None,
|
|
124
|
-
) -> list[Repository]:
|
|
125
|
-
# Fetch repositories from GitHub using GraphQL API (5-10x faster).
|
|
126
|
-
max_repos = max_repos or self.config.MAX_REPOS
|
|
127
|
-
min_stars = min_stars or self.config.MIN_STARS
|
|
128
|
-
languages = languages or self.config.SUPPORTED_LANGUAGES
|
|
129
|
-
keywords = keywords or "microservices"
|
|
130
|
-
|
|
131
|
-
colored_print(f"🚀 Fetching up to {max_repos} repositories (GraphQL API)...", "cyan")
|
|
132
|
-
colored_print(f" Keywords: {keywords}", "cyan")
|
|
133
|
-
colored_print(f" Filters: min_stars={min_stars}", "cyan")
|
|
134
|
-
|
|
135
|
-
if created_after or created_before:
|
|
136
|
-
colored_print(
|
|
137
|
-
f" Created: {created_after or 'any'} to {created_before or 'any'}", "cyan"
|
|
138
|
-
)
|
|
139
|
-
if pushed_after or pushed_before:
|
|
140
|
-
colored_print(f" Pushed: {pushed_after or 'any'} to {pushed_before or 'any'}", "cyan")
|
|
141
|
-
|
|
142
|
-
try:
|
|
143
|
-
# Use GraphQL API (much faster!)
|
|
144
|
-
repositories = self.graphql_fetcher.search_repositories(
|
|
145
|
-
keywords=keywords,
|
|
146
|
-
max_repos=max_repos,
|
|
147
|
-
min_stars=min_stars,
|
|
148
|
-
languages=languages,
|
|
149
|
-
created_after=created_after,
|
|
150
|
-
created_before=created_before,
|
|
151
|
-
pushed_after=pushed_after,
|
|
152
|
-
pushed_before=pushed_before,
|
|
153
|
-
)
|
|
154
|
-
|
|
155
|
-
# Save to file
|
|
156
|
-
repo_dicts = [r.to_dict() for r in repositories]
|
|
157
|
-
save_json_file(repo_dicts, self.config.REPOS_FILE)
|
|
158
|
-
|
|
159
|
-
colored_print(f"✓ Fetched {len(repositories)} repositories", "green")
|
|
160
|
-
colored_print(f" Saved to: {self.config.REPOS_FILE}", "cyan")
|
|
161
|
-
colored_print(f" API: GraphQL (5-10x faster than REST)", "green")
|
|
162
|
-
|
|
163
|
-
return repositories
|
|
164
|
-
|
|
165
|
-
except Exception as e:
|
|
166
|
-
colored_print(f"✗ Error fetching repositories: {e}", "red")
|
|
167
|
-
raise
|
|
168
|
-
|
|
169
|
-
# ============================================================================
|
|
170
|
-
# OLD REST API HELPER (DEADCODE - handled by GraphQL fetcher now)
|
|
171
|
-
# ============================================================================
|
|
172
|
-
# def _build_temporal_query(
|
|
173
|
-
# self,
|
|
174
|
-
# keywords: str,
|
|
175
|
-
# min_stars: int,
|
|
176
|
-
# created_after: str = None,
|
|
177
|
-
# created_before: str = None,
|
|
178
|
-
# pushed_after: str = None,
|
|
179
|
-
# pushed_before: str = None,
|
|
180
|
-
# ) -> str:
|
|
181
|
-
# # Build GitHub search query with temporal constraints.
|
|
182
|
-
# query_parts = [keywords, f"stars:>={min_stars}"]
|
|
183
|
-
#
|
|
184
|
-
# # Temporal filters
|
|
185
|
-
# if created_after and created_before:
|
|
186
|
-
# query_parts.append(f"created:{created_after}..{created_before}")
|
|
187
|
-
# elif created_after:
|
|
188
|
-
# query_parts.append(f"created:>={created_after}")
|
|
189
|
-
# elif created_before:
|
|
190
|
-
# query_parts.append(f"created:<={created_before}")
|
|
191
|
-
#
|
|
192
|
-
# if pushed_after and pushed_before:
|
|
193
|
-
# query_parts.append(f"pushed:{pushed_after}..{pushed_before}")
|
|
194
|
-
# elif pushed_after:
|
|
195
|
-
# query_parts.append(f"pushed:>={pushed_after}")
|
|
196
|
-
# elif pushed_before:
|
|
197
|
-
# query_parts.append(f"pushed:<={pushed_before}")
|
|
198
|
-
#
|
|
199
|
-
# query = " ".join(query_parts)
|
|
200
|
-
# colored_print(f" Query: {query}", "cyan")
|
|
201
|
-
# return query
|
|
202
|
-
# ============================================================================
|
|
203
|
-
|
|
204
|
-
def load_repositories(self) -> list[Repository]:
|
|
205
|
-
# Load repositories from file.
|
|
206
|
-
if not self.config.REPOS_FILE.exists():
|
|
207
|
-
raise FileNotFoundError(f"No repositories file found at {self.config.REPOS_FILE}")
|
|
208
|
-
|
|
209
|
-
repo_dicts = load_json_file(self.config.REPOS_FILE)
|
|
210
|
-
return [Repository.from_dict(r) for r in repo_dicts]
|
|
211
|
-
|
|
212
|
-
def get_repository_stats(self, repositories: list[Repository]) -> dict:
|
|
213
|
-
# Get statistics about fetched repositories.
|
|
214
|
-
if not repositories:
|
|
215
|
-
return {}
|
|
216
|
-
|
|
217
|
-
return {
|
|
218
|
-
"total": len(repositories),
|
|
219
|
-
"by_language": self._count_by_language(repositories),
|
|
220
|
-
"total_stars": sum(r.stars for r in repositories),
|
|
221
|
-
"avg_stars": sum(r.stars for r in repositories) / len(repositories),
|
|
222
|
-
"top_repo": max(repositories, key=lambda r: r.stars).full_name,
|
|
223
|
-
}
|
|
224
|
-
|
|
225
|
-
def _count_by_language(self, repositories: list[Repository]) -> dict:
|
|
226
|
-
# Count repositories by language.
|
|
227
|
-
counts = {}
|
|
228
|
-
for repo in repositories:
|
|
229
|
-
lang = repo.language or "Unknown"
|
|
230
|
-
counts[lang] = counts.get(lang, 0) + 1
|
|
231
|
-
return counts
|
|
@@ -1,210 +0,0 @@
|
|
|
1
|
-
# ================================================================================
|
|
2
|
-
# DEADCODE - OLD REST API IMPLEMENTATION
|
|
3
|
-
# ================================================================================
|
|
4
|
-
#
|
|
5
|
-
# This file contains the OLD GitHub REST API implementation.
|
|
6
|
-
# It has been REPLACED by GitHubGraphQLFetcher for better performance.
|
|
7
|
-
#
|
|
8
|
-
# Performance comparison:
|
|
9
|
-
# REST API: 10+ requests for 100 repos, ~2 minutes
|
|
10
|
-
# GraphQL API: 1-2 requests for 100 repos, ~15 seconds (10x faster!)
|
|
11
|
-
#
|
|
12
|
-
# USE INSTEAD: greenmining.services.github_graphql_fetcher.GitHubGraphQLFetcher
|
|
13
|
-
#
|
|
14
|
-
# This file is kept for reference only. Do not use in production.
|
|
15
|
-
#
|
|
16
|
-
# ================================================================================
|
|
17
|
-
|
|
18
|
-
# GitHub repository fetcher for green microservices mining.
|
|
19
|
-
|
|
20
|
-
# from __future__ import annotations
|
|
21
|
-
#
|
|
22
|
-
# from datetime import datetime
|
|
23
|
-
# from pathlib import Path
|
|
24
|
-
# from typing import Any, Dict, List, Optional
|
|
25
|
-
#
|
|
26
|
-
# from github import Github, GithubException, RateLimitExceededException
|
|
27
|
-
# from tqdm import tqdm
|
|
28
|
-
#
|
|
29
|
-
# from greenmining.config import get_config
|
|
30
|
-
# from greenmining.utils import (
|
|
31
|
-
# colored_print,
|
|
32
|
-
# format_timestamp,
|
|
33
|
-
# print_banner,
|
|
34
|
-
# save_json_file,
|
|
35
|
-
# )
|
|
36
|
-
#
|
|
37
|
-
#
|
|
38
|
-
# class GitHubFetcher:
|
|
39
|
-
# # Fetches microservice repositories from GitHub using REST API (SLOW).
|
|
40
|
-
#
|
|
41
|
-
# def __init__(
|
|
42
|
-
# self,
|
|
43
|
-
# token: str,
|
|
44
|
-
# max_repos: int = 100,
|
|
45
|
-
# min_stars: int = 100,
|
|
46
|
-
# languages: Optional[list[str]] = None,
|
|
47
|
-
# created_after: Optional[str] = None,
|
|
48
|
-
# created_before: Optional[str] = None,
|
|
49
|
-
# pushed_after: Optional[str] = None,
|
|
50
|
-
# pushed_before: Optional[str] = None,
|
|
51
|
-
# ):
|
|
52
|
-
# # Initialize GitHub fetcher.
|
|
53
|
-
# self.github = Github(token)
|
|
54
|
-
# self.max_repos = max_repos
|
|
55
|
-
# self.min_stars = min_stars
|
|
56
|
-
# self.languages = languages or [
|
|
57
|
-
# "Java",
|
|
58
|
-
# "Python",
|
|
59
|
-
# "Go",
|
|
60
|
-
# "JavaScript",
|
|
61
|
-
# "TypeScript",
|
|
62
|
-
# "C#",
|
|
63
|
-
# "Rust",
|
|
64
|
-
# ]
|
|
65
|
-
# self.created_after = created_after
|
|
66
|
-
# self.created_before = created_before
|
|
67
|
-
# self.pushed_after = pushed_after
|
|
68
|
-
# self.pushed_before = pushed_before
|
|
69
|
-
#
|
|
70
|
-
# def search_repositories(self) -> list[dict[str, Any]]:
|
|
71
|
-
# # Search for microservice repositories (REST API - many requests).
|
|
72
|
-
# repositories = []
|
|
73
|
-
# keywords = ["microservices", "microservice-architecture", "cloud-native"]
|
|
74
|
-
#
|
|
75
|
-
# colored_print(f"Searching for repositories with keywords: {', '.join(keywords)}", "cyan")
|
|
76
|
-
# colored_print(
|
|
77
|
-
# f"Filters: min_stars={self.min_stars}, languages={', '.join(self.languages)}", "cyan"
|
|
78
|
-
# )
|
|
79
|
-
#
|
|
80
|
-
# # Build search query with temporal filters
|
|
81
|
-
# query = self._build_temporal_query(keywords)
|
|
82
|
-
#
|
|
83
|
-
# try:
|
|
84
|
-
# # Execute search (1 request)
|
|
85
|
-
# search_results = self.github.search_repositories(
|
|
86
|
-
# query=query, sort="stars", order="desc"
|
|
87
|
-
# )
|
|
88
|
-
#
|
|
89
|
-
# total_found = search_results.totalCount
|
|
90
|
-
# colored_print(f"Found {total_found} repositories matching criteria", "green")
|
|
91
|
-
#
|
|
92
|
-
# # Fetch repository details with progress bar (1 request per repo = SLOW)
|
|
93
|
-
# with tqdm(
|
|
94
|
-
# total=min(self.max_repos, total_found), desc="Fetching repositories", unit="repo"
|
|
95
|
-
# ) as pbar:
|
|
96
|
-
# for idx, repo in enumerate(search_results):
|
|
97
|
-
# if idx >= self.max_repos:
|
|
98
|
-
# break
|
|
99
|
-
#
|
|
100
|
-
# try:
|
|
101
|
-
# repo_data = self._extract_repo_metadata(repo, idx + 1)
|
|
102
|
-
# repositories.append(repo_data)
|
|
103
|
-
# pbar.update(1)
|
|
104
|
-
# except GithubException as e:
|
|
105
|
-
# colored_print(f"Error fetching {repo.full_name}: {e}", "yellow")
|
|
106
|
-
# continue
|
|
107
|
-
# except RateLimitExceededException:
|
|
108
|
-
# colored_print("Rate limit exceeded. Waiting...", "red")
|
|
109
|
-
# self._handle_rate_limit()
|
|
110
|
-
# continue
|
|
111
|
-
#
|
|
112
|
-
# return repositories
|
|
113
|
-
#
|
|
114
|
-
# except GithubException as e:
|
|
115
|
-
# colored_print(f"GitHub API error: {e}", "red")
|
|
116
|
-
# raise
|
|
117
|
-
# except Exception as e:
|
|
118
|
-
# colored_print(f"Unexpected error: {e}", "red")
|
|
119
|
-
# raise
|
|
120
|
-
#
|
|
121
|
-
# def _extract_repo_metadata(self, repo, repo_id: int) -> dict[str, Any]:
|
|
122
|
-
# # Extract metadata from repository object.
|
|
123
|
-
# return {
|
|
124
|
-
# "repo_id": repo_id,
|
|
125
|
-
# "name": repo.name,
|
|
126
|
-
# "owner": repo.owner.login,
|
|
127
|
-
# "full_name": repo.full_name,
|
|
128
|
-
# "url": repo.html_url,
|
|
129
|
-
# "clone_url": repo.clone_url,
|
|
130
|
-
# "language": repo.language,
|
|
131
|
-
# "stars": repo.stargazers_count,
|
|
132
|
-
# "forks": repo.forks_count,
|
|
133
|
-
# "watchers": repo.watchers_count,
|
|
134
|
-
# "open_issues": repo.open_issues_count,
|
|
135
|
-
# "last_updated": repo.updated_at.isoformat() if repo.updated_at else None,
|
|
136
|
-
# "created_at": repo.created_at.isoformat() if repo.created_at else None,
|
|
137
|
-
# "description": repo.description or "",
|
|
138
|
-
# "main_branch": repo.default_branch,
|
|
139
|
-
# "topics": repo.get_topics() if hasattr(repo, "get_topics") else [],
|
|
140
|
-
# "size": repo.size,
|
|
141
|
-
# "has_issues": repo.has_issues,
|
|
142
|
-
# "has_wiki": repo.has_wiki,
|
|
143
|
-
# "archived": repo.archived,
|
|
144
|
-
# "license": repo.license.name if repo.license else None,
|
|
145
|
-
# }
|
|
146
|
-
#
|
|
147
|
-
# def _build_temporal_query(self, keywords: list[str]) -> str:
|
|
148
|
-
# # Build GitHub search query with temporal constraints.
|
|
149
|
-
# query_parts = []
|
|
150
|
-
#
|
|
151
|
-
# # Keywords
|
|
152
|
-
# keyword_query = " OR ".join(keywords)
|
|
153
|
-
# query_parts.append(f"({keyword_query})")
|
|
154
|
-
#
|
|
155
|
-
# # Languages
|
|
156
|
-
# language_query = " OR ".join([f"language:{lang}" for lang in self.languages])
|
|
157
|
-
# query_parts.append(f"({language_query})")
|
|
158
|
-
#
|
|
159
|
-
# # Stars
|
|
160
|
-
# query_parts.append(f"stars:>={self.min_stars}")
|
|
161
|
-
#
|
|
162
|
-
# # Archived filter
|
|
163
|
-
# query_parts.append("archived:false")
|
|
164
|
-
#
|
|
165
|
-
# # Temporal filters
|
|
166
|
-
# if self.created_after and self.created_before:
|
|
167
|
-
# query_parts.append(f"created:{self.created_after}..{self.created_before}")
|
|
168
|
-
# elif self.created_after:
|
|
169
|
-
# query_parts.append(f"created:>={self.created_after}")
|
|
170
|
-
# elif self.created_before:
|
|
171
|
-
# query_parts.append(f"created:<={self.created_before}")
|
|
172
|
-
#
|
|
173
|
-
# if self.pushed_after and self.pushed_before:
|
|
174
|
-
# query_parts.append(f"pushed:{self.pushed_after}..{self.pushed_before}")
|
|
175
|
-
# elif self.pushed_after:
|
|
176
|
-
# query_parts.append(f"pushed:>={self.pushed_after}")
|
|
177
|
-
# elif self.pushed_before:
|
|
178
|
-
# query_parts.append(f"pushed:<={self.pushed_before}")
|
|
179
|
-
#
|
|
180
|
-
# query = " ".join(query_parts)
|
|
181
|
-
# colored_print(f"Query: {query}", "cyan")
|
|
182
|
-
# return query
|
|
183
|
-
#
|
|
184
|
-
# def _handle_rate_limit(self):
|
|
185
|
-
# # Handle GitHub API rate limiting.
|
|
186
|
-
# rate_limit = self.github.get_rate_limit()
|
|
187
|
-
# reset_time = rate_limit.core.reset
|
|
188
|
-
# wait_seconds = (reset_time - datetime.now()).total_seconds()
|
|
189
|
-
#
|
|
190
|
-
# if wait_seconds > 0:
|
|
191
|
-
# colored_print(f"Rate limit will reset in {wait_seconds:.0f} seconds", "yellow")
|
|
192
|
-
# import time
|
|
193
|
-
#
|
|
194
|
-
# time.sleep(min(wait_seconds + 10, 60)) # Wait with max 60 seconds
|
|
195
|
-
#
|
|
196
|
-
# def save_results(self, repositories: list[dict[str, Any]], output_file: Path):
|
|
197
|
-
# # Save fetched repositories to JSON file.
|
|
198
|
-
# data = {
|
|
199
|
-
# "metadata": {
|
|
200
|
-
# "fetched_at": format_timestamp(),
|
|
201
|
-
# "total_repos": len(repositories),
|
|
202
|
-
# "min_stars": self.min_stars,
|
|
203
|
-
# "languages": self.languages,
|
|
204
|
-
# "search_keywords": ["microservices", "microservice-architecture", "cloud-native"],
|
|
205
|
-
# },
|
|
206
|
-
# "repositories": repositories,
|
|
207
|
-
# }
|
|
208
|
-
#
|
|
209
|
-
# save_json_file(data, output_file)
|
|
210
|
-
# colored_print(f"Saved {len(repositories)} repositories to {output_file}", "green")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|