greenmining 1.0.7__tar.gz → 1.0.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. {greenmining-1.0.7/greenmining.egg-info → greenmining-1.0.8}/PKG-INFO +1 -1
  2. {greenmining-1.0.7 → greenmining-1.0.8}/greenmining/__init__.py +1 -1
  3. greenmining-1.0.8/greenmining/controllers/repository_controller.py +100 -0
  4. {greenmining-1.0.7 → greenmining-1.0.8}/greenmining/presenters/console_presenter.py +1 -1
  5. greenmining-1.0.8/greenmining/services/github_fetcher.py +2 -0
  6. {greenmining-1.0.7 → greenmining-1.0.8}/greenmining/services/github_graphql_fetcher.py +5 -17
  7. {greenmining-1.0.7 → greenmining-1.0.8/greenmining.egg-info}/PKG-INFO +1 -1
  8. {greenmining-1.0.7 → greenmining-1.0.8}/pyproject.toml +1 -1
  9. greenmining-1.0.7/greenmining/controllers/repository_controller.py +0 -231
  10. greenmining-1.0.7/greenmining/services/github_fetcher.py +0 -210
  11. {greenmining-1.0.7 → greenmining-1.0.8}/CHANGELOG.md +0 -0
  12. {greenmining-1.0.7 → greenmining-1.0.8}/LICENSE +0 -0
  13. {greenmining-1.0.7 → greenmining-1.0.8}/MANIFEST.in +0 -0
  14. {greenmining-1.0.7 → greenmining-1.0.8}/README.md +0 -0
  15. {greenmining-1.0.7 → greenmining-1.0.8}/greenmining/__main__.py +0 -0
  16. {greenmining-1.0.7 → greenmining-1.0.8}/greenmining/__version__.py +0 -0
  17. {greenmining-1.0.7 → greenmining-1.0.8}/greenmining/analyzers/__init__.py +0 -0
  18. {greenmining-1.0.7 → greenmining-1.0.8}/greenmining/analyzers/code_diff_analyzer.py +0 -0
  19. {greenmining-1.0.7 → greenmining-1.0.8}/greenmining/analyzers/metrics_power_correlator.py +0 -0
  20. {greenmining-1.0.7 → greenmining-1.0.8}/greenmining/analyzers/power_regression.py +0 -0
  21. {greenmining-1.0.7 → greenmining-1.0.8}/greenmining/analyzers/qualitative_analyzer.py +0 -0
  22. {greenmining-1.0.7 → greenmining-1.0.8}/greenmining/analyzers/statistical_analyzer.py +0 -0
  23. {greenmining-1.0.7 → greenmining-1.0.8}/greenmining/analyzers/temporal_analyzer.py +0 -0
  24. {greenmining-1.0.7 → greenmining-1.0.8}/greenmining/analyzers/version_power_analyzer.py +0 -0
  25. {greenmining-1.0.7 → greenmining-1.0.8}/greenmining/config.py +0 -0
  26. {greenmining-1.0.7 → greenmining-1.0.8}/greenmining/controllers/__init__.py +0 -0
  27. {greenmining-1.0.7 → greenmining-1.0.8}/greenmining/dashboard/__init__.py +0 -0
  28. {greenmining-1.0.7 → greenmining-1.0.8}/greenmining/dashboard/app.py +0 -0
  29. {greenmining-1.0.7 → greenmining-1.0.8}/greenmining/energy/__init__.py +0 -0
  30. {greenmining-1.0.7 → greenmining-1.0.8}/greenmining/energy/base.py +0 -0
  31. {greenmining-1.0.7 → greenmining-1.0.8}/greenmining/energy/carbon_reporter.py +0 -0
  32. {greenmining-1.0.7 → greenmining-1.0.8}/greenmining/energy/codecarbon_meter.py +0 -0
  33. {greenmining-1.0.7 → greenmining-1.0.8}/greenmining/energy/cpu_meter.py +0 -0
  34. {greenmining-1.0.7 → greenmining-1.0.8}/greenmining/energy/rapl.py +0 -0
  35. {greenmining-1.0.7 → greenmining-1.0.8}/greenmining/gsf_patterns.py +0 -0
  36. {greenmining-1.0.7 → greenmining-1.0.8}/greenmining/models/__init__.py +0 -0
  37. {greenmining-1.0.7 → greenmining-1.0.8}/greenmining/models/aggregated_stats.py +0 -0
  38. {greenmining-1.0.7 → greenmining-1.0.8}/greenmining/models/analysis_result.py +0 -0
  39. {greenmining-1.0.7 → greenmining-1.0.8}/greenmining/models/commit.py +0 -0
  40. {greenmining-1.0.7 → greenmining-1.0.8}/greenmining/models/repository.py +0 -0
  41. {greenmining-1.0.7 → greenmining-1.0.8}/greenmining/presenters/__init__.py +0 -0
  42. {greenmining-1.0.7 → greenmining-1.0.8}/greenmining/services/__init__.py +0 -0
  43. {greenmining-1.0.7 → greenmining-1.0.8}/greenmining/services/commit_extractor.py +0 -0
  44. {greenmining-1.0.7 → greenmining-1.0.8}/greenmining/services/data_aggregator.py +0 -0
  45. {greenmining-1.0.7 → greenmining-1.0.8}/greenmining/services/data_analyzer.py +0 -0
  46. {greenmining-1.0.7 → greenmining-1.0.8}/greenmining/services/local_repo_analyzer.py +0 -0
  47. {greenmining-1.0.7 → greenmining-1.0.8}/greenmining/services/reports.py +0 -0
  48. {greenmining-1.0.7 → greenmining-1.0.8}/greenmining/utils.py +0 -0
  49. {greenmining-1.0.7 → greenmining-1.0.8}/greenmining.egg-info/SOURCES.txt +0 -0
  50. {greenmining-1.0.7 → greenmining-1.0.8}/greenmining.egg-info/dependency_links.txt +0 -0
  51. {greenmining-1.0.7 → greenmining-1.0.8}/greenmining.egg-info/requires.txt +0 -0
  52. {greenmining-1.0.7 → greenmining-1.0.8}/greenmining.egg-info/top_level.txt +0 -0
  53. {greenmining-1.0.7 → greenmining-1.0.8}/setup.cfg +0 -0
  54. {greenmining-1.0.7 → greenmining-1.0.8}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: greenmining
3
- Version: 1.0.7
3
+ Version: 1.0.8
4
4
  Summary: An empirical Python library for Mining Software Repositories (MSR) in Green IT research
5
5
  Author-email: Adam Bouafia <a.bouafia@student.vu.nl>
6
6
  License: MIT
@@ -9,7 +9,7 @@ from greenmining.gsf_patterns import (
9
9
  is_green_aware,
10
10
  )
11
11
 
12
- __version__ = "1.0.7"
12
+ __version__ = "1.0.8"
13
13
 
14
14
 
15
15
  def fetch_repositories(
@@ -0,0 +1,100 @@
1
+ # Repository Controller - Handles repository fetching operations.
2
+
3
+ from tqdm import tqdm
4
+
5
+ from greenmining.config import Config
6
+ from greenmining.models.repository import Repository
7
+ from greenmining.services.github_graphql_fetcher import GitHubGraphQLFetcher
8
+ from greenmining.utils import colored_print, load_json_file, save_json_file
9
+
10
+
11
+ class RepositoryController:
12
+ # Controller for GitHub repository operations using GraphQL API.
13
+
14
+ def __init__(self, config: Config):
15
+ # Initialize controller with configuration.
16
+ self.config = config
17
+ self.graphql_fetcher = GitHubGraphQLFetcher(config.GITHUB_TOKEN)
18
+
19
+ def fetch_repositories(
20
+ self,
21
+ max_repos: int = None,
22
+ min_stars: int = None,
23
+ languages: list[str] = None,
24
+ keywords: str = None,
25
+ created_after: str = None,
26
+ created_before: str = None,
27
+ pushed_after: str = None,
28
+ pushed_before: str = None,
29
+ ) -> list[Repository]:
30
+ # Fetch repositories from GitHub using GraphQL API.
31
+ max_repos = max_repos or self.config.MAX_REPOS
32
+ min_stars = min_stars or self.config.MIN_STARS
33
+ languages = languages or self.config.SUPPORTED_LANGUAGES
34
+ keywords = keywords or "microservices"
35
+
36
+ colored_print(f"Fetching up to {max_repos} repositories...", "cyan")
37
+ colored_print(f" Keywords: {keywords}", "cyan")
38
+ colored_print(f" Filters: min_stars={min_stars}", "cyan")
39
+
40
+ if created_after or created_before:
41
+ colored_print(
42
+ f" Created: {created_after or 'any'} to {created_before or 'any'}", "cyan"
43
+ )
44
+ if pushed_after or pushed_before:
45
+ colored_print(f" Pushed: {pushed_after or 'any'} to {pushed_before or 'any'}", "cyan")
46
+
47
+ try:
48
+ # Execute GraphQL search
49
+ repositories = self.graphql_fetcher.search_repositories(
50
+ keywords=keywords,
51
+ max_repos=max_repos,
52
+ min_stars=min_stars,
53
+ languages=languages,
54
+ created_after=created_after,
55
+ created_before=created_before,
56
+ pushed_after=pushed_after,
57
+ pushed_before=pushed_before,
58
+ )
59
+
60
+ # Save to file
61
+ repo_dicts = [r.to_dict() for r in repositories]
62
+ save_json_file(repo_dicts, self.config.REPOS_FILE)
63
+
64
+ colored_print(f"Fetched {len(repositories)} repositories", "green")
65
+ colored_print(f" Saved to: {self.config.REPOS_FILE}", "cyan")
66
+
67
+ return repositories
68
+
69
+ except Exception as e:
70
+ colored_print(f"Error fetching repositories: {e}", "red")
71
+ raise
72
+
73
+ def load_repositories(self) -> list[Repository]:
74
+ # Load repositories from file.
75
+ if not self.config.REPOS_FILE.exists():
76
+ raise FileNotFoundError(f"No repositories file found at {self.config.REPOS_FILE}")
77
+
78
+ repo_dicts = load_json_file(self.config.REPOS_FILE)
79
+ return [Repository.from_dict(r) for r in repo_dicts]
80
+
81
+ def get_repository_stats(self, repositories: list[Repository]) -> dict:
82
+ # Get statistics about fetched repositories.
83
+ if not repositories:
84
+ return {}
85
+
86
+ return {
87
+ "total": len(repositories),
88
+ "by_language": self._count_by_language(repositories),
89
+ "total_stars": sum(r.stars for r in repositories),
90
+ "avg_stars": sum(r.stars for r in repositories) / len(repositories),
91
+ "top_repo": max(repositories, key=lambda r: r.stars).full_name,
92
+ }
93
+
94
+ def _count_by_language(self, repositories: list[Repository]) -> dict:
95
+ # Count repositories by language.
96
+ counts = {}
97
+ for repo in repositories:
98
+ lang = repo.language or "Unknown"
99
+ counts[lang] = counts.get(lang, 0) + 1
100
+ return counts
@@ -113,7 +113,7 @@ class ConsolePresenter:
113
113
 
114
114
  table_data = []
115
115
  for phase, info in status.items():
116
- status_icon = "" if info.get("completed") else ""
116
+ status_icon = "done" if info.get("completed") else "pending"
117
117
  table_data.append(
118
118
  [status_icon, phase, info.get("file", "N/A"), info.get("size", "N/A")]
119
119
  )
@@ -0,0 +1,2 @@
1
+ # Legacy GitHub REST API fetcher (deprecated).
2
+ # Use github_graphql_fetcher.GitHubGraphQLFetcher instead.
@@ -1,7 +1,4 @@
1
- # GitHub GraphQL API fetcher for faster and more efficient repository fetching.
2
- #
3
- # GraphQL allows fetching exactly the data you need in a single request,
4
- # reducing API calls and improving rate limit efficiency.
1
+ # GitHub GraphQL API fetcher for repository search and data retrieval.
5
2
 
6
3
  import json
7
4
  import time
@@ -14,12 +11,6 @@ from greenmining.models.repository import Repository
14
11
 
15
12
  class GitHubGraphQLFetcher:
16
13
  # Fetch GitHub repositories using GraphQL API v4.
17
- #
18
- # Benefits over REST API:
19
- # - Fetch repos + commits in 1 request instead of 100+ REST calls
20
- # - Get exactly the fields you need (no over-fetching)
21
- # - Better rate limit efficiency (5000 points/hour vs 5000 requests/hour)
22
- # - More powerful search capabilities
23
14
 
24
15
  GRAPHQL_ENDPOINT = "https://api.github.com/graphql"
25
16
 
@@ -193,10 +184,10 @@ class GitHubGraphQLFetcher:
193
184
  # Star count
194
185
  query_parts.append(f"stars:>={min_stars}")
195
186
 
196
- # Languages
197
- if languages:
198
- lang_query = " OR ".join([f"language:{lang}" for lang in languages])
199
- query_parts.append(f"({lang_query})")
187
+ # Languages - skip filter if more than 5 to avoid exceeding GitHub query limits
188
+ if languages and len(languages) <= 5:
189
+ lang_query = " ".join([f"language:{lang}" for lang in languages])
190
+ query_parts.append(lang_query)
200
191
 
201
192
  # Date filters
202
193
  if created_after:
@@ -259,9 +250,6 @@ class GitHubGraphQLFetcher:
259
250
  ) -> List[Dict[str, Any]]:
260
251
  # Fetch commits for a specific repository using GraphQL.
261
252
  #
262
- # This is much faster than REST API as it gets all commits in 1-2 requests
263
- # instead of paginating through 100 individual REST calls.
264
- #
265
253
  # Args:
266
254
  # owner: Repository owner
267
255
  # name: Repository name
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: greenmining
3
- Version: 1.0.7
3
+ Version: 1.0.8
4
4
  Summary: An empirical Python library for Mining Software Repositories (MSR) in Green IT research
5
5
  Author-email: Adam Bouafia <a.bouafia@student.vu.nl>
6
6
  License: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "greenmining"
7
- version = "1.0.7"
7
+ version = "1.0.8"
8
8
  description = "An empirical Python library for Mining Software Repositories (MSR) in Green IT research"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -1,231 +0,0 @@
1
- # Repository Controller - Handles repository fetching operations.
2
-
3
- # ============================================================================
4
- # OLD REST API IMPLEMENTATION (DEADCODE - REPLACED WITH GRAPHQL)
5
- # ============================================================================
6
- # from github import Github, GithubException
7
- # from tqdm import tqdm
8
- #
9
- # from greenmining.config import Config
10
- # from greenmining.models.repository import Repository
11
- # from greenmining.utils import colored_print, load_json_file, save_json_file
12
- #
13
- #
14
- # class RepositoryController:
15
- # # Controller for GitHub repository operations.
16
- #
17
- # def __init__(self, config: Config):
18
- # # Initialize controller with configuration.
19
- # self.config = config
20
- # self.github = Github(config.GITHUB_TOKEN)
21
- # ============================================================================
22
-
23
- # NEW GRAPHQL IMPLEMENTATION (5-10x faster)
24
- from tqdm import tqdm
25
-
26
- from greenmining.config import Config
27
- from greenmining.models.repository import Repository
28
- from greenmining.services.github_graphql_fetcher import GitHubGraphQLFetcher
29
- from greenmining.utils import colored_print, load_json_file, save_json_file
30
-
31
-
32
- class RepositoryController:
33
- # Controller for GitHub repository operations using GraphQL API.
34
-
35
- def __init__(self, config: Config):
36
- # Initialize controller with configuration.
37
- self.config = config
38
- self.graphql_fetcher = GitHubGraphQLFetcher(config.GITHUB_TOKEN)
39
-
40
- # ============================================================================
41
- # OLD REST API METHOD (DEADCODE - 10x slower, high rate limit cost)
42
- # ============================================================================
43
- # def fetch_repositories(
44
- # self,
45
- # max_repos: int = None,
46
- # min_stars: int = None,
47
- # languages: list[str] = None,
48
- # keywords: str = None,
49
- # created_after: str = None,
50
- # created_before: str = None,
51
- # pushed_after: str = None,
52
- # pushed_before: str = None,
53
- # ) -> list[Repository]:
54
- # # Fetch repositories from GitHub using REST API (slow).
55
- # max_repos = max_repos or self.config.MAX_REPOS
56
- # min_stars = min_stars or self.config.MIN_STARS
57
- # languages = languages or self.config.SUPPORTED_LANGUAGES
58
- # keywords = keywords or "microservices"
59
- #
60
- # colored_print(f" Fetching up to {max_repos} repositories...", "cyan")
61
- # colored_print(f" Keywords: {keywords}", "cyan")
62
- # colored_print(f" Filters: min_stars={min_stars}", "cyan")
63
- #
64
- # if created_after or created_before:
65
- # colored_print(
66
- # f" Created: {created_after or 'any'} to {created_before or 'any'}", "cyan"
67
- # )
68
- # if pushed_after or pushed_before:
69
- # colored_print(f" Pushed: {pushed_after or 'any'} to {pushed_before or 'any'}", "cyan")
70
- #
71
- # # Build search query with temporal filters
72
- # query = self._build_temporal_query(
73
- # keywords, min_stars, created_after, created_before, pushed_after, pushed_before
74
- # )
75
- #
76
- # try:
77
- # # Execute search (REST API - many requests)
78
- # search_results = self.github.search_repositories(
79
- # query=query, sort="stars", order="desc"
80
- # )
81
- #
82
- # total_found = search_results.totalCount
83
- # colored_print(f" Found {total_found} repositories", "green")
84
- #
85
- # # Fetch repositories (1 request per repo = slow)
86
- # repositories = []
87
- # with tqdm(total=min(max_repos, total_found), desc="Fetching", unit="repo") as pbar:
88
- # for idx, repo in enumerate(search_results):
89
- # if idx >= max_repos:
90
- # break
91
- #
92
- # try:
93
- # repo_model = Repository.from_github_repo(repo, idx + 1)
94
- # repositories.append(repo_model)
95
- # pbar.update(1)
96
- # except GithubException as e:
97
- # colored_print(f" Error: {repo.full_name}: {e}", "yellow")
98
- # continue
99
- #
100
- # # Save to file
101
- # repo_dicts = [r.to_dict() for r in repositories]
102
- # save_json_file(repo_dicts, self.config.REPOS_FILE)
103
- #
104
- # colored_print(f" Fetched {len(repositories)} repositories", "green")
105
- # colored_print(f" Saved to: {self.config.REPOS_FILE}", "cyan")
106
- #
107
- # return repositories
108
- #
109
- # except Exception as e:
110
- # colored_print(f" Error fetching repositories: {e}", "red")
111
- # raise
112
- # ============================================================================
113
-
114
- def fetch_repositories(
115
- self,
116
- max_repos: int = None,
117
- min_stars: int = None,
118
- languages: list[str] = None,
119
- keywords: str = None,
120
- created_after: str = None,
121
- created_before: str = None,
122
- pushed_after: str = None,
123
- pushed_before: str = None,
124
- ) -> list[Repository]:
125
- # Fetch repositories from GitHub using GraphQL API (5-10x faster).
126
- max_repos = max_repos or self.config.MAX_REPOS
127
- min_stars = min_stars or self.config.MIN_STARS
128
- languages = languages or self.config.SUPPORTED_LANGUAGES
129
- keywords = keywords or "microservices"
130
-
131
- colored_print(f"🚀 Fetching up to {max_repos} repositories (GraphQL API)...", "cyan")
132
- colored_print(f" Keywords: {keywords}", "cyan")
133
- colored_print(f" Filters: min_stars={min_stars}", "cyan")
134
-
135
- if created_after or created_before:
136
- colored_print(
137
- f" Created: {created_after or 'any'} to {created_before or 'any'}", "cyan"
138
- )
139
- if pushed_after or pushed_before:
140
- colored_print(f" Pushed: {pushed_after or 'any'} to {pushed_before or 'any'}", "cyan")
141
-
142
- try:
143
- # Use GraphQL API (much faster!)
144
- repositories = self.graphql_fetcher.search_repositories(
145
- keywords=keywords,
146
- max_repos=max_repos,
147
- min_stars=min_stars,
148
- languages=languages,
149
- created_after=created_after,
150
- created_before=created_before,
151
- pushed_after=pushed_after,
152
- pushed_before=pushed_before,
153
- )
154
-
155
- # Save to file
156
- repo_dicts = [r.to_dict() for r in repositories]
157
- save_json_file(repo_dicts, self.config.REPOS_FILE)
158
-
159
- colored_print(f"✓ Fetched {len(repositories)} repositories", "green")
160
- colored_print(f" Saved to: {self.config.REPOS_FILE}", "cyan")
161
- colored_print(f" API: GraphQL (5-10x faster than REST)", "green")
162
-
163
- return repositories
164
-
165
- except Exception as e:
166
- colored_print(f"✗ Error fetching repositories: {e}", "red")
167
- raise
168
-
169
- # ============================================================================
170
- # OLD REST API HELPER (DEADCODE - handled by GraphQL fetcher now)
171
- # ============================================================================
172
- # def _build_temporal_query(
173
- # self,
174
- # keywords: str,
175
- # min_stars: int,
176
- # created_after: str = None,
177
- # created_before: str = None,
178
- # pushed_after: str = None,
179
- # pushed_before: str = None,
180
- # ) -> str:
181
- # # Build GitHub search query with temporal constraints.
182
- # query_parts = [keywords, f"stars:>={min_stars}"]
183
- #
184
- # # Temporal filters
185
- # if created_after and created_before:
186
- # query_parts.append(f"created:{created_after}..{created_before}")
187
- # elif created_after:
188
- # query_parts.append(f"created:>={created_after}")
189
- # elif created_before:
190
- # query_parts.append(f"created:<={created_before}")
191
- #
192
- # if pushed_after and pushed_before:
193
- # query_parts.append(f"pushed:{pushed_after}..{pushed_before}")
194
- # elif pushed_after:
195
- # query_parts.append(f"pushed:>={pushed_after}")
196
- # elif pushed_before:
197
- # query_parts.append(f"pushed:<={pushed_before}")
198
- #
199
- # query = " ".join(query_parts)
200
- # colored_print(f" Query: {query}", "cyan")
201
- # return query
202
- # ============================================================================
203
-
204
- def load_repositories(self) -> list[Repository]:
205
- # Load repositories from file.
206
- if not self.config.REPOS_FILE.exists():
207
- raise FileNotFoundError(f"No repositories file found at {self.config.REPOS_FILE}")
208
-
209
- repo_dicts = load_json_file(self.config.REPOS_FILE)
210
- return [Repository.from_dict(r) for r in repo_dicts]
211
-
212
- def get_repository_stats(self, repositories: list[Repository]) -> dict:
213
- # Get statistics about fetched repositories.
214
- if not repositories:
215
- return {}
216
-
217
- return {
218
- "total": len(repositories),
219
- "by_language": self._count_by_language(repositories),
220
- "total_stars": sum(r.stars for r in repositories),
221
- "avg_stars": sum(r.stars for r in repositories) / len(repositories),
222
- "top_repo": max(repositories, key=lambda r: r.stars).full_name,
223
- }
224
-
225
- def _count_by_language(self, repositories: list[Repository]) -> dict:
226
- # Count repositories by language.
227
- counts = {}
228
- for repo in repositories:
229
- lang = repo.language or "Unknown"
230
- counts[lang] = counts.get(lang, 0) + 1
231
- return counts
@@ -1,210 +0,0 @@
1
- # ================================================================================
2
- # DEADCODE - OLD REST API IMPLEMENTATION
3
- # ================================================================================
4
- #
5
- # This file contains the OLD GitHub REST API implementation.
6
- # It has been REPLACED by GitHubGraphQLFetcher for better performance.
7
- #
8
- # Performance comparison:
9
- # REST API: 10+ requests for 100 repos, ~2 minutes
10
- # GraphQL API: 1-2 requests for 100 repos, ~15 seconds (10x faster!)
11
- #
12
- # USE INSTEAD: greenmining.services.github_graphql_fetcher.GitHubGraphQLFetcher
13
- #
14
- # This file is kept for reference only. Do not use in production.
15
- #
16
- # ================================================================================
17
-
18
- # GitHub repository fetcher for green microservices mining.
19
-
20
- # from __future__ import annotations
21
- #
22
- # from datetime import datetime
23
- # from pathlib import Path
24
- # from typing import Any, Dict, List, Optional
25
- #
26
- # from github import Github, GithubException, RateLimitExceededException
27
- # from tqdm import tqdm
28
- #
29
- # from greenmining.config import get_config
30
- # from greenmining.utils import (
31
- # colored_print,
32
- # format_timestamp,
33
- # print_banner,
34
- # save_json_file,
35
- # )
36
- #
37
- #
38
- # class GitHubFetcher:
39
- # # Fetches microservice repositories from GitHub using REST API (SLOW).
40
- #
41
- # def __init__(
42
- # self,
43
- # token: str,
44
- # max_repos: int = 100,
45
- # min_stars: int = 100,
46
- # languages: Optional[list[str]] = None,
47
- # created_after: Optional[str] = None,
48
- # created_before: Optional[str] = None,
49
- # pushed_after: Optional[str] = None,
50
- # pushed_before: Optional[str] = None,
51
- # ):
52
- # # Initialize GitHub fetcher.
53
- # self.github = Github(token)
54
- # self.max_repos = max_repos
55
- # self.min_stars = min_stars
56
- # self.languages = languages or [
57
- # "Java",
58
- # "Python",
59
- # "Go",
60
- # "JavaScript",
61
- # "TypeScript",
62
- # "C#",
63
- # "Rust",
64
- # ]
65
- # self.created_after = created_after
66
- # self.created_before = created_before
67
- # self.pushed_after = pushed_after
68
- # self.pushed_before = pushed_before
69
- #
70
- # def search_repositories(self) -> list[dict[str, Any]]:
71
- # # Search for microservice repositories (REST API - many requests).
72
- # repositories = []
73
- # keywords = ["microservices", "microservice-architecture", "cloud-native"]
74
- #
75
- # colored_print(f"Searching for repositories with keywords: {', '.join(keywords)}", "cyan")
76
- # colored_print(
77
- # f"Filters: min_stars={self.min_stars}, languages={', '.join(self.languages)}", "cyan"
78
- # )
79
- #
80
- # # Build search query with temporal filters
81
- # query = self._build_temporal_query(keywords)
82
- #
83
- # try:
84
- # # Execute search (1 request)
85
- # search_results = self.github.search_repositories(
86
- # query=query, sort="stars", order="desc"
87
- # )
88
- #
89
- # total_found = search_results.totalCount
90
- # colored_print(f"Found {total_found} repositories matching criteria", "green")
91
- #
92
- # # Fetch repository details with progress bar (1 request per repo = SLOW)
93
- # with tqdm(
94
- # total=min(self.max_repos, total_found), desc="Fetching repositories", unit="repo"
95
- # ) as pbar:
96
- # for idx, repo in enumerate(search_results):
97
- # if idx >= self.max_repos:
98
- # break
99
- #
100
- # try:
101
- # repo_data = self._extract_repo_metadata(repo, idx + 1)
102
- # repositories.append(repo_data)
103
- # pbar.update(1)
104
- # except GithubException as e:
105
- # colored_print(f"Error fetching {repo.full_name}: {e}", "yellow")
106
- # continue
107
- # except RateLimitExceededException:
108
- # colored_print("Rate limit exceeded. Waiting...", "red")
109
- # self._handle_rate_limit()
110
- # continue
111
- #
112
- # return repositories
113
- #
114
- # except GithubException as e:
115
- # colored_print(f"GitHub API error: {e}", "red")
116
- # raise
117
- # except Exception as e:
118
- # colored_print(f"Unexpected error: {e}", "red")
119
- # raise
120
- #
121
- # def _extract_repo_metadata(self, repo, repo_id: int) -> dict[str, Any]:
122
- # # Extract metadata from repository object.
123
- # return {
124
- # "repo_id": repo_id,
125
- # "name": repo.name,
126
- # "owner": repo.owner.login,
127
- # "full_name": repo.full_name,
128
- # "url": repo.html_url,
129
- # "clone_url": repo.clone_url,
130
- # "language": repo.language,
131
- # "stars": repo.stargazers_count,
132
- # "forks": repo.forks_count,
133
- # "watchers": repo.watchers_count,
134
- # "open_issues": repo.open_issues_count,
135
- # "last_updated": repo.updated_at.isoformat() if repo.updated_at else None,
136
- # "created_at": repo.created_at.isoformat() if repo.created_at else None,
137
- # "description": repo.description or "",
138
- # "main_branch": repo.default_branch,
139
- # "topics": repo.get_topics() if hasattr(repo, "get_topics") else [],
140
- # "size": repo.size,
141
- # "has_issues": repo.has_issues,
142
- # "has_wiki": repo.has_wiki,
143
- # "archived": repo.archived,
144
- # "license": repo.license.name if repo.license else None,
145
- # }
146
- #
147
- # def _build_temporal_query(self, keywords: list[str]) -> str:
148
- # # Build GitHub search query with temporal constraints.
149
- # query_parts = []
150
- #
151
- # # Keywords
152
- # keyword_query = " OR ".join(keywords)
153
- # query_parts.append(f"({keyword_query})")
154
- #
155
- # # Languages
156
- # language_query = " OR ".join([f"language:{lang}" for lang in self.languages])
157
- # query_parts.append(f"({language_query})")
158
- #
159
- # # Stars
160
- # query_parts.append(f"stars:>={self.min_stars}")
161
- #
162
- # # Archived filter
163
- # query_parts.append("archived:false")
164
- #
165
- # # Temporal filters
166
- # if self.created_after and self.created_before:
167
- # query_parts.append(f"created:{self.created_after}..{self.created_before}")
168
- # elif self.created_after:
169
- # query_parts.append(f"created:>={self.created_after}")
170
- # elif self.created_before:
171
- # query_parts.append(f"created:<={self.created_before}")
172
- #
173
- # if self.pushed_after and self.pushed_before:
174
- # query_parts.append(f"pushed:{self.pushed_after}..{self.pushed_before}")
175
- # elif self.pushed_after:
176
- # query_parts.append(f"pushed:>={self.pushed_after}")
177
- # elif self.pushed_before:
178
- # query_parts.append(f"pushed:<={self.pushed_before}")
179
- #
180
- # query = " ".join(query_parts)
181
- # colored_print(f"Query: {query}", "cyan")
182
- # return query
183
- #
184
- # def _handle_rate_limit(self):
185
- # # Handle GitHub API rate limiting.
186
- # rate_limit = self.github.get_rate_limit()
187
- # reset_time = rate_limit.core.reset
188
- # wait_seconds = (reset_time - datetime.now()).total_seconds()
189
- #
190
- # if wait_seconds > 0:
191
- # colored_print(f"Rate limit will reset in {wait_seconds:.0f} seconds", "yellow")
192
- # import time
193
- #
194
- # time.sleep(min(wait_seconds + 10, 60)) # Wait with max 60 seconds
195
- #
196
- # def save_results(self, repositories: list[dict[str, Any]], output_file: Path):
197
- # # Save fetched repositories to JSON file.
198
- # data = {
199
- # "metadata": {
200
- # "fetched_at": format_timestamp(),
201
- # "total_repos": len(repositories),
202
- # "min_stars": self.min_stars,
203
- # "languages": self.languages,
204
- # "search_keywords": ["microservices", "microservice-architecture", "cloud-native"],
205
- # },
206
- # "repositories": repositories,
207
- # }
208
- #
209
- # save_json_file(data, output_file)
210
- # colored_print(f"Saved {len(repositories)} repositories to {output_file}", "green")
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes