greenmining 1.0.7__py3-none-any.whl → 1.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- greenmining/__init__.py +1 -1
- greenmining/controllers/repository_controller.py +5 -136
- greenmining/presenters/console_presenter.py +1 -1
- greenmining/services/github_fetcher.py +2 -210
- greenmining/services/github_graphql_fetcher.py +32 -39
- {greenmining-1.0.7.dist-info → greenmining-1.0.9.dist-info}/METADATA +1 -1
- {greenmining-1.0.7.dist-info → greenmining-1.0.9.dist-info}/RECORD +10 -10
- {greenmining-1.0.7.dist-info → greenmining-1.0.9.dist-info}/WHEEL +0 -0
- {greenmining-1.0.7.dist-info → greenmining-1.0.9.dist-info}/licenses/LICENSE +0 -0
- {greenmining-1.0.7.dist-info → greenmining-1.0.9.dist-info}/top_level.txt +0 -0
greenmining/__init__.py
CHANGED
|
@@ -1,26 +1,5 @@
|
|
|
1
1
|
# Repository Controller - Handles repository fetching operations.
|
|
2
2
|
|
|
3
|
-
# ============================================================================
|
|
4
|
-
# OLD REST API IMPLEMENTATION (DEADCODE - REPLACED WITH GRAPHQL)
|
|
5
|
-
# ============================================================================
|
|
6
|
-
# from github import Github, GithubException
|
|
7
|
-
# from tqdm import tqdm
|
|
8
|
-
#
|
|
9
|
-
# from greenmining.config import Config
|
|
10
|
-
# from greenmining.models.repository import Repository
|
|
11
|
-
# from greenmining.utils import colored_print, load_json_file, save_json_file
|
|
12
|
-
#
|
|
13
|
-
#
|
|
14
|
-
# class RepositoryController:
|
|
15
|
-
# # Controller for GitHub repository operations.
|
|
16
|
-
#
|
|
17
|
-
# def __init__(self, config: Config):
|
|
18
|
-
# # Initialize controller with configuration.
|
|
19
|
-
# self.config = config
|
|
20
|
-
# self.github = Github(config.GITHUB_TOKEN)
|
|
21
|
-
# ============================================================================
|
|
22
|
-
|
|
23
|
-
# NEW GRAPHQL IMPLEMENTATION (5-10x faster)
|
|
24
3
|
from tqdm import tqdm
|
|
25
4
|
|
|
26
5
|
from greenmining.config import Config
|
|
@@ -37,80 +16,6 @@ class RepositoryController:
|
|
|
37
16
|
self.config = config
|
|
38
17
|
self.graphql_fetcher = GitHubGraphQLFetcher(config.GITHUB_TOKEN)
|
|
39
18
|
|
|
40
|
-
# ============================================================================
|
|
41
|
-
# OLD REST API METHOD (DEADCODE - 10x slower, high rate limit cost)
|
|
42
|
-
# ============================================================================
|
|
43
|
-
# def fetch_repositories(
|
|
44
|
-
# self,
|
|
45
|
-
# max_repos: int = None,
|
|
46
|
-
# min_stars: int = None,
|
|
47
|
-
# languages: list[str] = None,
|
|
48
|
-
# keywords: str = None,
|
|
49
|
-
# created_after: str = None,
|
|
50
|
-
# created_before: str = None,
|
|
51
|
-
# pushed_after: str = None,
|
|
52
|
-
# pushed_before: str = None,
|
|
53
|
-
# ) -> list[Repository]:
|
|
54
|
-
# # Fetch repositories from GitHub using REST API (slow).
|
|
55
|
-
# max_repos = max_repos or self.config.MAX_REPOS
|
|
56
|
-
# min_stars = min_stars or self.config.MIN_STARS
|
|
57
|
-
# languages = languages or self.config.SUPPORTED_LANGUAGES
|
|
58
|
-
# keywords = keywords or "microservices"
|
|
59
|
-
#
|
|
60
|
-
# colored_print(f" Fetching up to {max_repos} repositories...", "cyan")
|
|
61
|
-
# colored_print(f" Keywords: {keywords}", "cyan")
|
|
62
|
-
# colored_print(f" Filters: min_stars={min_stars}", "cyan")
|
|
63
|
-
#
|
|
64
|
-
# if created_after or created_before:
|
|
65
|
-
# colored_print(
|
|
66
|
-
# f" Created: {created_after or 'any'} to {created_before or 'any'}", "cyan"
|
|
67
|
-
# )
|
|
68
|
-
# if pushed_after or pushed_before:
|
|
69
|
-
# colored_print(f" Pushed: {pushed_after or 'any'} to {pushed_before or 'any'}", "cyan")
|
|
70
|
-
#
|
|
71
|
-
# # Build search query with temporal filters
|
|
72
|
-
# query = self._build_temporal_query(
|
|
73
|
-
# keywords, min_stars, created_after, created_before, pushed_after, pushed_before
|
|
74
|
-
# )
|
|
75
|
-
#
|
|
76
|
-
# try:
|
|
77
|
-
# # Execute search (REST API - many requests)
|
|
78
|
-
# search_results = self.github.search_repositories(
|
|
79
|
-
# query=query, sort="stars", order="desc"
|
|
80
|
-
# )
|
|
81
|
-
#
|
|
82
|
-
# total_found = search_results.totalCount
|
|
83
|
-
# colored_print(f" Found {total_found} repositories", "green")
|
|
84
|
-
#
|
|
85
|
-
# # Fetch repositories (1 request per repo = slow)
|
|
86
|
-
# repositories = []
|
|
87
|
-
# with tqdm(total=min(max_repos, total_found), desc="Fetching", unit="repo") as pbar:
|
|
88
|
-
# for idx, repo in enumerate(search_results):
|
|
89
|
-
# if idx >= max_repos:
|
|
90
|
-
# break
|
|
91
|
-
#
|
|
92
|
-
# try:
|
|
93
|
-
# repo_model = Repository.from_github_repo(repo, idx + 1)
|
|
94
|
-
# repositories.append(repo_model)
|
|
95
|
-
# pbar.update(1)
|
|
96
|
-
# except GithubException as e:
|
|
97
|
-
# colored_print(f" Error: {repo.full_name}: {e}", "yellow")
|
|
98
|
-
# continue
|
|
99
|
-
#
|
|
100
|
-
# # Save to file
|
|
101
|
-
# repo_dicts = [r.to_dict() for r in repositories]
|
|
102
|
-
# save_json_file(repo_dicts, self.config.REPOS_FILE)
|
|
103
|
-
#
|
|
104
|
-
# colored_print(f" Fetched {len(repositories)} repositories", "green")
|
|
105
|
-
# colored_print(f" Saved to: {self.config.REPOS_FILE}", "cyan")
|
|
106
|
-
#
|
|
107
|
-
# return repositories
|
|
108
|
-
#
|
|
109
|
-
# except Exception as e:
|
|
110
|
-
# colored_print(f" Error fetching repositories: {e}", "red")
|
|
111
|
-
# raise
|
|
112
|
-
# ============================================================================
|
|
113
|
-
|
|
114
19
|
def fetch_repositories(
|
|
115
20
|
self,
|
|
116
21
|
max_repos: int = None,
|
|
@@ -122,13 +27,13 @@ class RepositoryController:
|
|
|
122
27
|
pushed_after: str = None,
|
|
123
28
|
pushed_before: str = None,
|
|
124
29
|
) -> list[Repository]:
|
|
125
|
-
# Fetch repositories from GitHub using GraphQL API
|
|
30
|
+
# Fetch repositories from GitHub using GraphQL API.
|
|
126
31
|
max_repos = max_repos or self.config.MAX_REPOS
|
|
127
32
|
min_stars = min_stars or self.config.MIN_STARS
|
|
128
33
|
languages = languages or self.config.SUPPORTED_LANGUAGES
|
|
129
34
|
keywords = keywords or "microservices"
|
|
130
35
|
|
|
131
|
-
colored_print(f"
|
|
36
|
+
colored_print(f"Fetching up to {max_repos} repositories...", "cyan")
|
|
132
37
|
colored_print(f" Keywords: {keywords}", "cyan")
|
|
133
38
|
colored_print(f" Filters: min_stars={min_stars}", "cyan")
|
|
134
39
|
|
|
@@ -140,7 +45,7 @@ class RepositoryController:
|
|
|
140
45
|
colored_print(f" Pushed: {pushed_after or 'any'} to {pushed_before or 'any'}", "cyan")
|
|
141
46
|
|
|
142
47
|
try:
|
|
143
|
-
#
|
|
48
|
+
# Execute GraphQL search
|
|
144
49
|
repositories = self.graphql_fetcher.search_repositories(
|
|
145
50
|
keywords=keywords,
|
|
146
51
|
max_repos=max_repos,
|
|
@@ -156,51 +61,15 @@ class RepositoryController:
|
|
|
156
61
|
repo_dicts = [r.to_dict() for r in repositories]
|
|
157
62
|
save_json_file(repo_dicts, self.config.REPOS_FILE)
|
|
158
63
|
|
|
159
|
-
colored_print(f"
|
|
64
|
+
colored_print(f"Fetched {len(repositories)} repositories", "green")
|
|
160
65
|
colored_print(f" Saved to: {self.config.REPOS_FILE}", "cyan")
|
|
161
|
-
colored_print(f" API: GraphQL (5-10x faster than REST)", "green")
|
|
162
66
|
|
|
163
67
|
return repositories
|
|
164
68
|
|
|
165
69
|
except Exception as e:
|
|
166
|
-
colored_print(f"
|
|
70
|
+
colored_print(f"Error fetching repositories: {e}", "red")
|
|
167
71
|
raise
|
|
168
72
|
|
|
169
|
-
# ============================================================================
|
|
170
|
-
# OLD REST API HELPER (DEADCODE - handled by GraphQL fetcher now)
|
|
171
|
-
# ============================================================================
|
|
172
|
-
# def _build_temporal_query(
|
|
173
|
-
# self,
|
|
174
|
-
# keywords: str,
|
|
175
|
-
# min_stars: int,
|
|
176
|
-
# created_after: str = None,
|
|
177
|
-
# created_before: str = None,
|
|
178
|
-
# pushed_after: str = None,
|
|
179
|
-
# pushed_before: str = None,
|
|
180
|
-
# ) -> str:
|
|
181
|
-
# # Build GitHub search query with temporal constraints.
|
|
182
|
-
# query_parts = [keywords, f"stars:>={min_stars}"]
|
|
183
|
-
#
|
|
184
|
-
# # Temporal filters
|
|
185
|
-
# if created_after and created_before:
|
|
186
|
-
# query_parts.append(f"created:{created_after}..{created_before}")
|
|
187
|
-
# elif created_after:
|
|
188
|
-
# query_parts.append(f"created:>={created_after}")
|
|
189
|
-
# elif created_before:
|
|
190
|
-
# query_parts.append(f"created:<={created_before}")
|
|
191
|
-
#
|
|
192
|
-
# if pushed_after and pushed_before:
|
|
193
|
-
# query_parts.append(f"pushed:{pushed_after}..{pushed_before}")
|
|
194
|
-
# elif pushed_after:
|
|
195
|
-
# query_parts.append(f"pushed:>={pushed_after}")
|
|
196
|
-
# elif pushed_before:
|
|
197
|
-
# query_parts.append(f"pushed:<={pushed_before}")
|
|
198
|
-
#
|
|
199
|
-
# query = " ".join(query_parts)
|
|
200
|
-
# colored_print(f" Query: {query}", "cyan")
|
|
201
|
-
# return query
|
|
202
|
-
# ============================================================================
|
|
203
|
-
|
|
204
73
|
def load_repositories(self) -> list[Repository]:
|
|
205
74
|
# Load repositories from file.
|
|
206
75
|
if not self.config.REPOS_FILE.exists():
|
|
@@ -113,7 +113,7 @@ class ConsolePresenter:
|
|
|
113
113
|
|
|
114
114
|
table_data = []
|
|
115
115
|
for phase, info in status.items():
|
|
116
|
-
status_icon = "" if info.get("completed") else "
|
|
116
|
+
status_icon = "done" if info.get("completed") else "pending"
|
|
117
117
|
table_data.append(
|
|
118
118
|
[status_icon, phase, info.get("file", "N/A"), info.get("size", "N/A")]
|
|
119
119
|
)
|
|
@@ -1,210 +1,2 @@
|
|
|
1
|
-
#
|
|
2
|
-
#
|
|
3
|
-
# ================================================================================
|
|
4
|
-
#
|
|
5
|
-
# This file contains the OLD GitHub REST API implementation.
|
|
6
|
-
# It has been REPLACED by GitHubGraphQLFetcher for better performance.
|
|
7
|
-
#
|
|
8
|
-
# Performance comparison:
|
|
9
|
-
# REST API: 10+ requests for 100 repos, ~2 minutes
|
|
10
|
-
# GraphQL API: 1-2 requests for 100 repos, ~15 seconds (10x faster!)
|
|
11
|
-
#
|
|
12
|
-
# USE INSTEAD: greenmining.services.github_graphql_fetcher.GitHubGraphQLFetcher
|
|
13
|
-
#
|
|
14
|
-
# This file is kept for reference only. Do not use in production.
|
|
15
|
-
#
|
|
16
|
-
# ================================================================================
|
|
17
|
-
|
|
18
|
-
# GitHub repository fetcher for green microservices mining.
|
|
19
|
-
|
|
20
|
-
# from __future__ import annotations
|
|
21
|
-
#
|
|
22
|
-
# from datetime import datetime
|
|
23
|
-
# from pathlib import Path
|
|
24
|
-
# from typing import Any, Dict, List, Optional
|
|
25
|
-
#
|
|
26
|
-
# from github import Github, GithubException, RateLimitExceededException
|
|
27
|
-
# from tqdm import tqdm
|
|
28
|
-
#
|
|
29
|
-
# from greenmining.config import get_config
|
|
30
|
-
# from greenmining.utils import (
|
|
31
|
-
# colored_print,
|
|
32
|
-
# format_timestamp,
|
|
33
|
-
# print_banner,
|
|
34
|
-
# save_json_file,
|
|
35
|
-
# )
|
|
36
|
-
#
|
|
37
|
-
#
|
|
38
|
-
# class GitHubFetcher:
|
|
39
|
-
# # Fetches microservice repositories from GitHub using REST API (SLOW).
|
|
40
|
-
#
|
|
41
|
-
# def __init__(
|
|
42
|
-
# self,
|
|
43
|
-
# token: str,
|
|
44
|
-
# max_repos: int = 100,
|
|
45
|
-
# min_stars: int = 100,
|
|
46
|
-
# languages: Optional[list[str]] = None,
|
|
47
|
-
# created_after: Optional[str] = None,
|
|
48
|
-
# created_before: Optional[str] = None,
|
|
49
|
-
# pushed_after: Optional[str] = None,
|
|
50
|
-
# pushed_before: Optional[str] = None,
|
|
51
|
-
# ):
|
|
52
|
-
# # Initialize GitHub fetcher.
|
|
53
|
-
# self.github = Github(token)
|
|
54
|
-
# self.max_repos = max_repos
|
|
55
|
-
# self.min_stars = min_stars
|
|
56
|
-
# self.languages = languages or [
|
|
57
|
-
# "Java",
|
|
58
|
-
# "Python",
|
|
59
|
-
# "Go",
|
|
60
|
-
# "JavaScript",
|
|
61
|
-
# "TypeScript",
|
|
62
|
-
# "C#",
|
|
63
|
-
# "Rust",
|
|
64
|
-
# ]
|
|
65
|
-
# self.created_after = created_after
|
|
66
|
-
# self.created_before = created_before
|
|
67
|
-
# self.pushed_after = pushed_after
|
|
68
|
-
# self.pushed_before = pushed_before
|
|
69
|
-
#
|
|
70
|
-
# def search_repositories(self) -> list[dict[str, Any]]:
|
|
71
|
-
# # Search for microservice repositories (REST API - many requests).
|
|
72
|
-
# repositories = []
|
|
73
|
-
# keywords = ["microservices", "microservice-architecture", "cloud-native"]
|
|
74
|
-
#
|
|
75
|
-
# colored_print(f"Searching for repositories with keywords: {', '.join(keywords)}", "cyan")
|
|
76
|
-
# colored_print(
|
|
77
|
-
# f"Filters: min_stars={self.min_stars}, languages={', '.join(self.languages)}", "cyan"
|
|
78
|
-
# )
|
|
79
|
-
#
|
|
80
|
-
# # Build search query with temporal filters
|
|
81
|
-
# query = self._build_temporal_query(keywords)
|
|
82
|
-
#
|
|
83
|
-
# try:
|
|
84
|
-
# # Execute search (1 request)
|
|
85
|
-
# search_results = self.github.search_repositories(
|
|
86
|
-
# query=query, sort="stars", order="desc"
|
|
87
|
-
# )
|
|
88
|
-
#
|
|
89
|
-
# total_found = search_results.totalCount
|
|
90
|
-
# colored_print(f"Found {total_found} repositories matching criteria", "green")
|
|
91
|
-
#
|
|
92
|
-
# # Fetch repository details with progress bar (1 request per repo = SLOW)
|
|
93
|
-
# with tqdm(
|
|
94
|
-
# total=min(self.max_repos, total_found), desc="Fetching repositories", unit="repo"
|
|
95
|
-
# ) as pbar:
|
|
96
|
-
# for idx, repo in enumerate(search_results):
|
|
97
|
-
# if idx >= self.max_repos:
|
|
98
|
-
# break
|
|
99
|
-
#
|
|
100
|
-
# try:
|
|
101
|
-
# repo_data = self._extract_repo_metadata(repo, idx + 1)
|
|
102
|
-
# repositories.append(repo_data)
|
|
103
|
-
# pbar.update(1)
|
|
104
|
-
# except GithubException as e:
|
|
105
|
-
# colored_print(f"Error fetching {repo.full_name}: {e}", "yellow")
|
|
106
|
-
# continue
|
|
107
|
-
# except RateLimitExceededException:
|
|
108
|
-
# colored_print("Rate limit exceeded. Waiting...", "red")
|
|
109
|
-
# self._handle_rate_limit()
|
|
110
|
-
# continue
|
|
111
|
-
#
|
|
112
|
-
# return repositories
|
|
113
|
-
#
|
|
114
|
-
# except GithubException as e:
|
|
115
|
-
# colored_print(f"GitHub API error: {e}", "red")
|
|
116
|
-
# raise
|
|
117
|
-
# except Exception as e:
|
|
118
|
-
# colored_print(f"Unexpected error: {e}", "red")
|
|
119
|
-
# raise
|
|
120
|
-
#
|
|
121
|
-
# def _extract_repo_metadata(self, repo, repo_id: int) -> dict[str, Any]:
|
|
122
|
-
# # Extract metadata from repository object.
|
|
123
|
-
# return {
|
|
124
|
-
# "repo_id": repo_id,
|
|
125
|
-
# "name": repo.name,
|
|
126
|
-
# "owner": repo.owner.login,
|
|
127
|
-
# "full_name": repo.full_name,
|
|
128
|
-
# "url": repo.html_url,
|
|
129
|
-
# "clone_url": repo.clone_url,
|
|
130
|
-
# "language": repo.language,
|
|
131
|
-
# "stars": repo.stargazers_count,
|
|
132
|
-
# "forks": repo.forks_count,
|
|
133
|
-
# "watchers": repo.watchers_count,
|
|
134
|
-
# "open_issues": repo.open_issues_count,
|
|
135
|
-
# "last_updated": repo.updated_at.isoformat() if repo.updated_at else None,
|
|
136
|
-
# "created_at": repo.created_at.isoformat() if repo.created_at else None,
|
|
137
|
-
# "description": repo.description or "",
|
|
138
|
-
# "main_branch": repo.default_branch,
|
|
139
|
-
# "topics": repo.get_topics() if hasattr(repo, "get_topics") else [],
|
|
140
|
-
# "size": repo.size,
|
|
141
|
-
# "has_issues": repo.has_issues,
|
|
142
|
-
# "has_wiki": repo.has_wiki,
|
|
143
|
-
# "archived": repo.archived,
|
|
144
|
-
# "license": repo.license.name if repo.license else None,
|
|
145
|
-
# }
|
|
146
|
-
#
|
|
147
|
-
# def _build_temporal_query(self, keywords: list[str]) -> str:
|
|
148
|
-
# # Build GitHub search query with temporal constraints.
|
|
149
|
-
# query_parts = []
|
|
150
|
-
#
|
|
151
|
-
# # Keywords
|
|
152
|
-
# keyword_query = " OR ".join(keywords)
|
|
153
|
-
# query_parts.append(f"({keyword_query})")
|
|
154
|
-
#
|
|
155
|
-
# # Languages
|
|
156
|
-
# language_query = " OR ".join([f"language:{lang}" for lang in self.languages])
|
|
157
|
-
# query_parts.append(f"({language_query})")
|
|
158
|
-
#
|
|
159
|
-
# # Stars
|
|
160
|
-
# query_parts.append(f"stars:>={self.min_stars}")
|
|
161
|
-
#
|
|
162
|
-
# # Archived filter
|
|
163
|
-
# query_parts.append("archived:false")
|
|
164
|
-
#
|
|
165
|
-
# # Temporal filters
|
|
166
|
-
# if self.created_after and self.created_before:
|
|
167
|
-
# query_parts.append(f"created:{self.created_after}..{self.created_before}")
|
|
168
|
-
# elif self.created_after:
|
|
169
|
-
# query_parts.append(f"created:>={self.created_after}")
|
|
170
|
-
# elif self.created_before:
|
|
171
|
-
# query_parts.append(f"created:<={self.created_before}")
|
|
172
|
-
#
|
|
173
|
-
# if self.pushed_after and self.pushed_before:
|
|
174
|
-
# query_parts.append(f"pushed:{self.pushed_after}..{self.pushed_before}")
|
|
175
|
-
# elif self.pushed_after:
|
|
176
|
-
# query_parts.append(f"pushed:>={self.pushed_after}")
|
|
177
|
-
# elif self.pushed_before:
|
|
178
|
-
# query_parts.append(f"pushed:<={self.pushed_before}")
|
|
179
|
-
#
|
|
180
|
-
# query = " ".join(query_parts)
|
|
181
|
-
# colored_print(f"Query: {query}", "cyan")
|
|
182
|
-
# return query
|
|
183
|
-
#
|
|
184
|
-
# def _handle_rate_limit(self):
|
|
185
|
-
# # Handle GitHub API rate limiting.
|
|
186
|
-
# rate_limit = self.github.get_rate_limit()
|
|
187
|
-
# reset_time = rate_limit.core.reset
|
|
188
|
-
# wait_seconds = (reset_time - datetime.now()).total_seconds()
|
|
189
|
-
#
|
|
190
|
-
# if wait_seconds > 0:
|
|
191
|
-
# colored_print(f"Rate limit will reset in {wait_seconds:.0f} seconds", "yellow")
|
|
192
|
-
# import time
|
|
193
|
-
#
|
|
194
|
-
# time.sleep(min(wait_seconds + 10, 60)) # Wait with max 60 seconds
|
|
195
|
-
#
|
|
196
|
-
# def save_results(self, repositories: list[dict[str, Any]], output_file: Path):
|
|
197
|
-
# # Save fetched repositories to JSON file.
|
|
198
|
-
# data = {
|
|
199
|
-
# "metadata": {
|
|
200
|
-
# "fetched_at": format_timestamp(),
|
|
201
|
-
# "total_repos": len(repositories),
|
|
202
|
-
# "min_stars": self.min_stars,
|
|
203
|
-
# "languages": self.languages,
|
|
204
|
-
# "search_keywords": ["microservices", "microservice-architecture", "cloud-native"],
|
|
205
|
-
# },
|
|
206
|
-
# "repositories": repositories,
|
|
207
|
-
# }
|
|
208
|
-
#
|
|
209
|
-
# save_json_file(data, output_file)
|
|
210
|
-
# colored_print(f"Saved {len(repositories)} repositories to {output_file}", "green")
|
|
1
|
+
# Legacy GitHub REST API fetcher (deprecated).
|
|
2
|
+
# Use github_graphql_fetcher.GitHubGraphQLFetcher instead.
|
|
@@ -1,7 +1,4 @@
|
|
|
1
|
-
# GitHub GraphQL API fetcher for
|
|
2
|
-
#
|
|
3
|
-
# GraphQL allows fetching exactly the data you need in a single request,
|
|
4
|
-
# reducing API calls and improving rate limit efficiency.
|
|
1
|
+
# GitHub GraphQL API fetcher for repository search and data retrieval.
|
|
5
2
|
|
|
6
3
|
import json
|
|
7
4
|
import time
|
|
@@ -14,12 +11,6 @@ from greenmining.models.repository import Repository
|
|
|
14
11
|
|
|
15
12
|
class GitHubGraphQLFetcher:
|
|
16
13
|
# Fetch GitHub repositories using GraphQL API v4.
|
|
17
|
-
#
|
|
18
|
-
# Benefits over REST API:
|
|
19
|
-
# - Fetch repos + commits in 1 request instead of 100+ REST calls
|
|
20
|
-
# - Get exactly the fields you need (no over-fetching)
|
|
21
|
-
# - Better rate limit efficiency (5000 points/hour vs 5000 requests/hour)
|
|
22
|
-
# - More powerful search capabilities
|
|
23
14
|
|
|
24
15
|
GRAPHQL_ENDPOINT = "https://api.github.com/graphql"
|
|
25
16
|
|
|
@@ -153,7 +144,7 @@ class GitHubGraphQLFetcher:
|
|
|
153
144
|
nodes = search.get("nodes", [])
|
|
154
145
|
for node in nodes:
|
|
155
146
|
if node and len(repositories) < max_repos:
|
|
156
|
-
repo = self._parse_repository(node)
|
|
147
|
+
repo = self._parse_repository(node, len(repositories) + 1)
|
|
157
148
|
repositories.append(repo)
|
|
158
149
|
|
|
159
150
|
# Check pagination
|
|
@@ -193,10 +184,10 @@ class GitHubGraphQLFetcher:
|
|
|
193
184
|
# Star count
|
|
194
185
|
query_parts.append(f"stars:>={min_stars}")
|
|
195
186
|
|
|
196
|
-
# Languages
|
|
197
|
-
if languages:
|
|
198
|
-
lang_query = "
|
|
199
|
-
query_parts.append(
|
|
187
|
+
# Languages - skip filter if more than 5 to avoid exceeding GitHub query limits
|
|
188
|
+
if languages and len(languages) <= 5:
|
|
189
|
+
lang_query = " ".join([f"language:{lang}" for lang in languages])
|
|
190
|
+
query_parts.append(lang_query)
|
|
200
191
|
|
|
201
192
|
# Date filters
|
|
202
193
|
if created_after:
|
|
@@ -221,37 +212,42 @@ class GitHubGraphQLFetcher:
|
|
|
221
212
|
response.raise_for_status()
|
|
222
213
|
return response.json()
|
|
223
214
|
|
|
224
|
-
def _parse_repository(self, node: Dict[str, Any]) -> Repository:
|
|
215
|
+
def _parse_repository(self, node: Dict[str, Any], repo_id: int = 0) -> Repository:
|
|
225
216
|
# Parse GraphQL repository node to Repository object.
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
217
|
+
full_name = node.get("nameWithOwner", "")
|
|
218
|
+
owner = full_name.split("/")[0] if "/" in full_name else ""
|
|
219
|
+
url = node.get("url", "")
|
|
220
|
+
|
|
221
|
+
# Extract primary language
|
|
222
|
+
lang_node = node.get("primaryLanguage") or {}
|
|
223
|
+
language = lang_node.get("name")
|
|
232
224
|
|
|
233
225
|
# Extract license
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
226
|
+
license_info = node.get("licenseInfo") or {}
|
|
227
|
+
license_name = license_info.get("name")
|
|
228
|
+
|
|
229
|
+
# Extract default branch safely (can be null for empty repos)
|
|
230
|
+
branch_ref = node.get("defaultBranchRef") or {}
|
|
231
|
+
main_branch = branch_ref.get("name", "main")
|
|
237
232
|
|
|
238
233
|
return Repository(
|
|
234
|
+
repo_id=repo_id,
|
|
239
235
|
name=node.get("name", ""),
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
url=
|
|
236
|
+
owner=owner,
|
|
237
|
+
full_name=full_name,
|
|
238
|
+
url=url,
|
|
239
|
+
clone_url=f"{url}.git" if url else "",
|
|
240
|
+
language=language,
|
|
243
241
|
stars=node.get("stargazerCount", 0),
|
|
244
242
|
forks=node.get("forkCount", 0),
|
|
245
|
-
watchers=node.get("watchers"
|
|
246
|
-
|
|
247
|
-
|
|
243
|
+
watchers=(node.get("watchers") or {}).get("totalCount", 0),
|
|
244
|
+
open_issues=0,
|
|
245
|
+
last_updated=node.get("updatedAt", ""),
|
|
248
246
|
created_at=node.get("createdAt", ""),
|
|
249
|
-
|
|
250
|
-
|
|
247
|
+
description=node.get("description", ""),
|
|
248
|
+
main_branch=main_branch,
|
|
249
|
+
archived=node.get("isArchived", False),
|
|
251
250
|
license=license_name,
|
|
252
|
-
is_fork=node.get("isFork", False),
|
|
253
|
-
is_archived=node.get("isArchived", False),
|
|
254
|
-
default_branch=node.get("defaultBranchRef", {}).get("name", "main"),
|
|
255
251
|
)
|
|
256
252
|
|
|
257
253
|
def get_repository_commits(
|
|
@@ -259,9 +255,6 @@ class GitHubGraphQLFetcher:
|
|
|
259
255
|
) -> List[Dict[str, Any]]:
|
|
260
256
|
# Fetch commits for a specific repository using GraphQL.
|
|
261
257
|
#
|
|
262
|
-
# This is much faster than REST API as it gets all commits in 1-2 requests
|
|
263
|
-
# instead of paginating through 100 individual REST calls.
|
|
264
|
-
#
|
|
265
258
|
# Args:
|
|
266
259
|
# owner: Repository owner
|
|
267
260
|
# name: Repository name
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
greenmining/__init__.py,sha256=
|
|
1
|
+
greenmining/__init__.py,sha256=0aYxDkV6H_sBdfQUGZ6n_MNUuv8RrJmSsa9LXB78OA4,2909
|
|
2
2
|
greenmining/__main__.py,sha256=NYOVS7D4w2XDLn6SyXHXPKE5GrNGOeoWSTb_KazgK5c,590
|
|
3
3
|
greenmining/__version__.py,sha256=xZc02a8bS3vUJlzh8k9RoxemB1irQmq_SpVVj6Cg5M0,62
|
|
4
4
|
greenmining/config.py,sha256=M4a7AwM1ErCmOY0n5Vmyoo9HPblSkTZ-HD3k2YHzs4A,8340
|
|
@@ -13,7 +13,7 @@ greenmining/analyzers/statistical_analyzer.py,sha256=DzWAcCyw42Ig3FIxTwPPBikgt2u
|
|
|
13
13
|
greenmining/analyzers/temporal_analyzer.py,sha256=JfTcAoI20oCFMehGrSRnDqhJTXI-RUbdCTMwDOTW9-g,14259
|
|
14
14
|
greenmining/analyzers/version_power_analyzer.py,sha256=2P6zOqBg-ButtIhF-4cutiwD2Q1geMY49VFUghHXXoI,8119
|
|
15
15
|
greenmining/controllers/__init__.py,sha256=UiAT6zBvC1z_9cJWfzq1cLA0I4r9b2vURHipj8oDczI,180
|
|
16
|
-
greenmining/controllers/repository_controller.py,sha256=
|
|
16
|
+
greenmining/controllers/repository_controller.py,sha256=DM9BabUAwZJARGngCk_4wEYPw2adn8iESCiFQ7Um4LQ,3880
|
|
17
17
|
greenmining/dashboard/__init__.py,sha256=Ig_291-hLrH9k3rV0whhQ1EkhiaRR8ciHiJ5s5OCBf4,141
|
|
18
18
|
greenmining/dashboard/app.py,sha256=Hk6_i2qmcg6SGW7UzxglEIvUBJiloRA-hMYI-YSORcA,8604
|
|
19
19
|
greenmining/energy/__init__.py,sha256=GoCYh7hitWBoPMtan1HF1yezCHi7o4sa_YUJgGkeJc8,558
|
|
@@ -28,17 +28,17 @@ greenmining/models/analysis_result.py,sha256=YICTCEcrJxZ1R8Xaio3AZOjCGwMzC_62BMA
|
|
|
28
28
|
greenmining/models/commit.py,sha256=mnRDWSiIyGtJeGXI8sav9hukWUyVFpoNe6GixRlZjY4,2439
|
|
29
29
|
greenmining/models/repository.py,sha256=SKjS01onOptpMioumtAPZxKpKheHAeVXnXyvatl7CfM,2856
|
|
30
30
|
greenmining/presenters/__init__.py,sha256=d1CMtqtUAHYHYNzigPyjtGOUtnH1drtUwf7-bFQq2B8,138
|
|
31
|
-
greenmining/presenters/console_presenter.py,sha256=
|
|
31
|
+
greenmining/presenters/console_presenter.py,sha256=qagn2c2aOym0WNKV8n175MQ-BTheLjrXzW8c1OafzAQ,4904
|
|
32
32
|
greenmining/services/__init__.py,sha256=ZEMOVut0KRdume_vz58beSNps3YgeoGBXmUjEqNgIhc,690
|
|
33
33
|
greenmining/services/commit_extractor.py,sha256=Fz2WTWjIZ_vQhSfkJKnWpJnBpI2nm0KacA4qYAvCpSE,8451
|
|
34
34
|
greenmining/services/data_aggregator.py,sha256=TsFT0oGOnnHk0QGZ1tT6ZhKGc5X1H1D1u7-7OpiPo7Y,19566
|
|
35
35
|
greenmining/services/data_analyzer.py,sha256=f0nlJkPAclHHCzzTyQW5bjhYrgE0XXiR1x7_o3fJaDs,9732
|
|
36
|
-
greenmining/services/github_fetcher.py,sha256=
|
|
37
|
-
greenmining/services/github_graphql_fetcher.py,sha256=
|
|
36
|
+
greenmining/services/github_fetcher.py,sha256=sdkS-LhHmX7mgMdlClCwEUVnZrItc0Pt6FVtlWk5iLU,106
|
|
37
|
+
greenmining/services/github_graphql_fetcher.py,sha256=ZklXdEAc60KeFL83zRYMwW_-2OwMKpfPY7Wrifl0D50,11539
|
|
38
38
|
greenmining/services/local_repo_analyzer.py,sha256=5DMN9RIyGXNdsOlIDV4Mp0fPavbB69oBA9us17P5cNo,24668
|
|
39
39
|
greenmining/services/reports.py,sha256=Vrw_pBNmVw2mTAf1dpcAqjBe6gXv-O4w_XweoVTt7L8,23392
|
|
40
|
-
greenmining-1.0.
|
|
41
|
-
greenmining-1.0.
|
|
42
|
-
greenmining-1.0.
|
|
43
|
-
greenmining-1.0.
|
|
44
|
-
greenmining-1.0.
|
|
40
|
+
greenmining-1.0.9.dist-info/licenses/LICENSE,sha256=M7ma3JHGeiIZIs3ea0HTcFl_wLFPX2NZElUliYs4bCA,1083
|
|
41
|
+
greenmining-1.0.9.dist-info/METADATA,sha256=Bo4NXk-LroVrvNYiNSgQixoyBF_jmhUWtqd_hWm5c_Q,30913
|
|
42
|
+
greenmining-1.0.9.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
43
|
+
greenmining-1.0.9.dist-info/top_level.txt,sha256=nreXgXxZIWI-42yQknQ0HXtUrFnzZ8N1ra4Mdy2KcsI,12
|
|
44
|
+
greenmining-1.0.9.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|