greenmining 1.0.4__py3-none-any.whl → 1.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
greenmining/__init__.py CHANGED
@@ -9,7 +9,7 @@ from greenmining.gsf_patterns import (
9
9
  is_green_aware,
10
10
  )
11
11
 
12
- __version__ = "1.0.4"
12
+ __version__ = "1.0.5"
13
13
 
14
14
 
15
15
  def fetch_repositories(
@@ -1,3 +1,3 @@
1
1
  # Version information for greenmining.
2
2
 
3
- __version__ = "1.0.4"
3
+ __version__ = "1.0.5"
@@ -1,21 +1,19 @@
1
- """
2
- ================================================================================
3
- DEADCODE - OLD REST API IMPLEMENTATION
4
- ================================================================================
5
-
6
- This file contains the OLD GitHub REST API implementation.
7
- It has been REPLACED by GitHubGraphQLFetcher for better performance.
8
-
9
- Performance comparison:
10
- REST API: 10+ requests for 100 repos, ~2 minutes
11
- GraphQL API: 1-2 requests for 100 repos, ~15 seconds (10x faster!)
12
-
13
- USE INSTEAD: greenmining.services.github_graphql_fetcher.GitHubGraphQLFetcher
14
-
15
- This file is kept for reference only. Do not use in production.
16
-
17
- ================================================================================
18
- """
1
+ # ================================================================================
2
+ # DEADCODE - OLD REST API IMPLEMENTATION
3
+ # ================================================================================
4
+ #
5
+ # This file contains the OLD GitHub REST API implementation.
6
+ # It has been REPLACED by GitHubGraphQLFetcher for better performance.
7
+ #
8
+ # Performance comparison:
9
+ # REST API: 10+ requests for 100 repos, ~2 minutes
10
+ # GraphQL API: 1-2 requests for 100 repos, ~15 seconds (10x faster!)
11
+ #
12
+ # USE INSTEAD: greenmining.services.github_graphql_fetcher.GitHubGraphQLFetcher
13
+ #
14
+ # This file is kept for reference only. Do not use in production.
15
+ #
16
+ # ================================================================================
19
17
 
20
18
  # GitHub repository fetcher for green microservices mining.
21
19
 
@@ -1,9 +1,7 @@
1
- """
2
- GitHub GraphQL API fetcher for faster and more efficient repository fetching.
3
-
4
- GraphQL allows fetching exactly the data you need in a single request,
5
- reducing API calls and improving rate limit efficiency.
6
- """
1
+ # GitHub GraphQL API fetcher for faster and more efficient repository fetching.
2
+ #
3
+ # GraphQL allows fetching exactly the data you need in a single request,
4
+ # reducing API calls and improving rate limit efficiency.
7
5
 
8
6
  import json
9
7
  import time
@@ -15,25 +13,21 @@ from greenmining.models.repository import Repository
15
13
 
16
14
 
17
15
  class GitHubGraphQLFetcher:
18
- """
19
- Fetch GitHub repositories using GraphQL API v4.
20
-
21
- Benefits over REST API:
22
- - Fetch repos + commits in 1 request instead of 100+ REST calls
23
- - Get exactly the fields you need (no over-fetching)
24
- - Better rate limit efficiency (5000 points/hour vs 5000 requests/hour)
25
- - More powerful search capabilities
26
- """
16
+ # Fetch GitHub repositories using GraphQL API v4.
17
+ #
18
+ # Benefits over REST API:
19
+ # - Fetch repos + commits in 1 request instead of 100+ REST calls
20
+ # - Get exactly the fields you need (no over-fetching)
21
+ # - Better rate limit efficiency (5000 points/hour vs 5000 requests/hour)
22
+ # - More powerful search capabilities
27
23
 
28
24
  GRAPHQL_ENDPOINT = "https://api.github.com/graphql"
29
25
 
30
26
  def __init__(self, token: str):
31
- """
32
- Initialize GraphQL fetcher.
33
-
34
- Args:
35
- token: GitHub personal access token
36
- """
27
+ # Initialize GraphQL fetcher.
28
+ #
29
+ # Args:
30
+ # token: GitHub personal access token
37
31
  self.token = token
38
32
  self.headers = {
39
33
  "Authorization": f"Bearer {token}",
@@ -51,22 +45,20 @@ class GitHubGraphQLFetcher:
51
45
  pushed_after: Optional[str] = None,
52
46
  pushed_before: Optional[str] = None,
53
47
  ) -> List[Repository]:
54
- """
55
- Search GitHub repositories using GraphQL.
56
-
57
- Args:
58
- keywords: Search keywords
59
- max_repos: Maximum number of repositories to fetch
60
- min_stars: Minimum star count
61
- languages: Programming languages to filter
62
- created_after: Created after date (YYYY-MM-DD)
63
- created_before: Created before date (YYYY-MM-DD)
64
- pushed_after: Pushed after date (YYYY-MM-DD)
65
- pushed_before: Pushed before date (YYYY-MM-DD)
66
-
67
- Returns:
68
- List of Repository objects
69
- """
48
+ # Search GitHub repositories using GraphQL.
49
+ #
50
+ # Args:
51
+ # keywords: Search keywords
52
+ # max_repos: Maximum number of repositories to fetch
53
+ # min_stars: Minimum star count
54
+ # languages: Programming languages to filter
55
+ # created_after: Created after date (YYYY-MM-DD)
56
+ # created_before: Created before date (YYYY-MM-DD)
57
+ # pushed_after: Pushed after date (YYYY-MM-DD)
58
+ # pushed_before: Pushed before date (YYYY-MM-DD)
59
+ #
60
+ # Returns:
61
+ # List of Repository objects
70
62
  # Build search query
71
63
  search_query = self._build_search_query(
72
64
  keywords,
@@ -195,7 +187,7 @@ class GitHubGraphQLFetcher:
195
187
  pushed_after: Optional[str],
196
188
  pushed_before: Optional[str],
197
189
  ) -> str:
198
- """Build GitHub search query string."""
190
+ # Build GitHub search query string.
199
191
  query_parts = [keywords]
200
192
 
201
193
  # Star count
@@ -219,7 +211,7 @@ class GitHubGraphQLFetcher:
219
211
  return " ".join(query_parts)
220
212
 
221
213
  def _execute_query(self, query: str, variables: Dict[str, Any]) -> Dict[str, Any]:
222
- """Execute GraphQL query."""
214
+ # Execute GraphQL query.
223
215
  payload = {"query": query, "variables": variables}
224
216
 
225
217
  response = requests.post(
@@ -230,7 +222,7 @@ class GitHubGraphQLFetcher:
230
222
  return response.json()
231
223
 
232
224
  def _parse_repository(self, node: Dict[str, Any]) -> Repository:
233
- """Parse GraphQL repository node to Repository object."""
225
+ # Parse GraphQL repository node to Repository object.
234
226
  # Extract languages
235
227
  languages = []
236
228
  if node.get("languages") and node["languages"].get("nodes"):
@@ -265,20 +257,18 @@ class GitHubGraphQLFetcher:
265
257
  def get_repository_commits(
266
258
  self, owner: str, name: str, max_commits: int = 100
267
259
  ) -> List[Dict[str, Any]]:
268
- """
269
- Fetch commits for a specific repository using GraphQL.
270
-
271
- This is much faster than REST API as it gets all commits in 1-2 requests
272
- instead of paginating through 100 individual REST calls.
273
-
274
- Args:
275
- owner: Repository owner
276
- name: Repository name
277
- max_commits: Maximum commits to fetch
278
-
279
- Returns:
280
- List of commit dictionaries
281
- """
260
+ # Fetch commits for a specific repository using GraphQL.
261
+ #
262
+ # This is much faster than REST API as it gets all commits in 1-2 requests
263
+ # instead of paginating through 100 individual REST calls.
264
+ #
265
+ # Args:
266
+ # owner: Repository owner
267
+ # name: Repository name
268
+ # max_commits: Maximum commits to fetch
269
+ #
270
+ # Returns:
271
+ # List of commit dictionaries
282
272
  query = """
283
273
  query($owner: String!, $name: String!, $first: Int!) {
284
274
  repository(owner: $owner, name: $name) {
@@ -359,7 +349,7 @@ class GitHubGraphQLFetcher:
359
349
  return commits
360
350
 
361
351
  def save_results(self, repositories: List[Repository], output_file: str):
362
- """Save repositories to JSON file."""
352
+ # Save repositories to JSON file.
363
353
  data = {
364
354
  "total_repositories": len(repositories),
365
355
  "repositories": [repo.to_dict() for repo in repositories],
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: greenmining
3
- Version: 1.0.4
3
+ Version: 1.0.5
4
4
  Summary: Analyze GitHub repositories to identify green software engineering patterns and energy-efficient practices
5
5
  Author-email: Adam Bouafia <a.bouafia@student.vu.nl>
6
6
  License: MIT
@@ -43,7 +43,7 @@ Requires-Dist: pytest-mock>=3.12.0; extra == "dev"
43
43
  Requires-Dist: black>=23.12.0; extra == "dev"
44
44
  Requires-Dist: ruff>=0.1.9; extra == "dev"
45
45
  Requires-Dist: mypy>=1.8.0; extra == "dev"
46
- Requires-Dist: build>=1.0.4; extra == "dev"
46
+ Requires-Dist: build>=1.0.5; extra == "dev"
47
47
  Requires-Dist: twine>=4.0.2; extra == "dev"
48
48
  Provides-Extra: docs
49
49
  Requires-Dist: sphinx>=7.2.0; extra == "docs"
@@ -61,7 +61,7 @@ Green mining for microservices repositories.
61
61
 
62
62
  ## Overview
63
63
 
64
- `greenmining` is a Python library and CLI tool for analyzing GitHub repositories to identify green software engineering practices and energy-efficient patterns. It detects sustainable software patterns across cloud, web, AI, database, networking, and general categories.
64
+ `greenmining` is a Python library for analyzing GitHub repositories to identify green software engineering practices and energy-efficient patterns. It detects sustainable software patterns across cloud, web, AI, database, networking, and general categories.
65
65
 
66
66
  ## Installation
67
67
 
@@ -105,7 +105,7 @@ if is_green_aware(commit_msg):
105
105
  # Output: ['Cache Static Data', 'Use Efficient Cache Strategies']
106
106
  ```
107
107
 
108
- #### Fetch Repositories with Custom Keywords (NEW)
108
+ #### Fetch Repositories with Custom Keywords
109
109
 
110
110
  ```python
111
111
  from greenmining import fetch_repositories
@@ -144,8 +144,6 @@ for repo in repos[:5]:
144
144
  ```python
145
145
  from greenmining.services.commit_extractor import CommitExtractor
146
146
  from greenmining.services.data_analyzer import DataAnalyzer
147
- from greenmining.analyzers.nlp_analyzer import NLPAnalyzer
148
- from greenmining.analyzers.ml_feature_extractor import MLFeatureExtractor
149
147
  from greenmining import fetch_repositories
150
148
 
151
149
  # Fetch repositories with custom keywords
@@ -195,18 +193,6 @@ for commit in commits:
195
193
  results.append(result)
196
194
  print(f"Green commit found: {commit.message[:50]}...")
197
195
  print(f" Patterns: {result['known_pattern']}")
198
-
199
- # Access NLP analysis results (NEW)
200
- if 'nlp_analysis' in result:
201
- nlp = result['nlp_analysis']
202
- print(f" NLP: {nlp['morphological_count']} morphological matches, "
203
- f"{nlp['semantic_count']} semantic matches")
204
-
205
- # Access ML features (NEW)
206
- if 'ml_features' in result:
207
- ml = result['ml_features']['text']
208
- print(f" ML Features: {ml['word_count']} words, "
209
- f"keyword density: {ml['keyword_density']:.2f}")
210
196
  ```
211
197
 
212
198
  #### Access Sustainability Patterns Data
@@ -242,7 +228,7 @@ print(f"Available categories: {sorted(categories)}")
242
228
  # 'monitoring', 'network', 'networking', 'resource', 'web']
243
229
  ```
244
230
 
245
- #### Advanced Analysis: Temporal Trends (NEW)
231
+ #### Advanced Analysis: Temporal Trends
246
232
 
247
233
  ```python
248
234
  from greenmining.services.data_aggregator import DataAggregator
@@ -374,7 +360,7 @@ repositories = fetch_repositories(
374
360
  min_stars=10,
375
361
  keywords="software engineering",
376
362
  )
377
- print(f"Fetched {len(repositories)} repositories")
363
+ print(f"Fetched {len(repositories)} repositories")
378
364
 
379
365
  # STAGE 2: Extract Commits
380
366
  print("\nExtracting commits...")
@@ -386,7 +372,7 @@ extractor = CommitExtractor(
386
372
  timeout=120,
387
373
  )
388
374
  all_commits = extractor.extract_from_repositories(repositories)
389
- print(f"Extracted {len(all_commits)} commits")
375
+ print(f"Extracted {len(all_commits)} commits")
390
376
 
391
377
  # Save commits
392
378
  extractor.save_results(
@@ -405,8 +391,8 @@ analyzed_commits = analyzer.analyze_commits(all_commits)
405
391
  # Count green-aware commits
406
392
  green_count = sum(1 for c in analyzed_commits if c.get("green_aware", False))
407
393
  green_percentage = (green_count / len(analyzed_commits) * 100) if analyzed_commits else 0
408
- print(f"Analyzed {len(analyzed_commits)} commits")
409
- print(f"Green-aware: {green_count} ({green_percentage:.1f}%)")
394
+ print(f"Analyzed {len(analyzed_commits)} commits")
395
+ print(f"Green-aware: {green_count} ({green_percentage:.1f}%)")
410
396
 
411
397
  # Save analysis
412
398
  analyzer.save_results(analyzed_commits, output_dir / "analyzed.json")
@@ -434,7 +420,7 @@ print("\n" + "="*80)
434
420
  print("ANALYSIS COMPLETE")
435
421
  print("="*80)
436
422
  aggregator.print_summary(results)
437
- print(f"\n📁 Results saved in: {output_dir.absolute()}")
423
+ print(f"\nResults saved in: {output_dir.absolute()}")
438
424
  ```
439
425
 
440
426
  **What this example does:**
@@ -1,6 +1,6 @@
1
- greenmining/__init__.py,sha256=m1foz4CUfEgBsgyWHEjehsT3qt1vogyer1fjhIVFqjg,992
1
+ greenmining/__init__.py,sha256=wlLxbv7hzKNy1yWAEZckcSpYkC1rYDIHU9bjQZHs9es,992
2
2
  greenmining/__main__.py,sha256=NYOVS7D4w2XDLn6SyXHXPKE5GrNGOeoWSTb_KazgK5c,590
3
- greenmining/__version__.py,sha256=QpL6VzwpRtOZiXqutOetZBUCoiJKYFeTosucueZ4gpQ,62
3
+ greenmining/__version__.py,sha256=xZc02a8bS3vUJlzh8k9RoxemB1irQmq_SpVVj6Cg5M0,62
4
4
  greenmining/config.py,sha256=00v1Ln8eZE0RxMrLxvonf8XOWqeRYaIme_iC1yDLR90,8228
5
5
  greenmining/gsf_patterns.py,sha256=hnd9GuWB8GEflrusEib5hjvl8CD5TSbGcBtb0gfxFp4,54193
6
6
  greenmining/utils.py,sha256=dSFwQzQwbS8rYZSgwLIxM_geLqbldwqVOIXMqEg08Qs,5609
@@ -26,12 +26,12 @@ greenmining/services/__init__.py,sha256=UhjS2X9x2v5iH991UDPazP3dTPuSgylMq4kQJaue
26
26
  greenmining/services/commit_extractor.py,sha256=3EfUVBwd8hGSbl7pS-_jAL8gX8RxIASXTX5EZBbKQPI,8387
27
27
  greenmining/services/data_aggregator.py,sha256=TsFT0oGOnnHk0QGZ1tT6ZhKGc5X1H1D1u7-7OpiPo7Y,19566
28
28
  greenmining/services/data_analyzer.py,sha256=f0nlJkPAclHHCzzTyQW5bjhYrgE0XXiR1x7_o3fJaDs,9732
29
- greenmining/services/github_fetcher.py,sha256=WFyowC0tROKAhP3bA4QXH7QIYf42yaZf2ePpUEvOZ6s,8266
30
- greenmining/services/github_graphql_fetcher.py,sha256=mQOUZZEltYWusA7TR6Q7BZrHZ-N1Ijt0Wg2kV5T7lFM,11958
29
+ greenmining/services/github_fetcher.py,sha256=mUcmQevhdDRYX72O-M7Vi-s3y4ZwNyKewleti838cqU,8285
30
+ greenmining/services/github_graphql_fetcher.py,sha256=p76vp5EgStzkmTcws__jb90za8m61toW0CBrwrm5Ew4,11972
31
31
  greenmining/services/local_repo_analyzer.py,sha256=IrfqY1L6peGO78zufEj4uAU1N7nskc0edAYVzE0Ew_w,14785
32
32
  greenmining/services/reports.py,sha256=7Smc7a4KtpmkAJ8UoMlzH5BZerC_iO_jMyQw3_42n1s,23387
33
- greenmining-1.0.4.dist-info/licenses/LICENSE,sha256=M7ma3JHGeiIZIs3ea0HTcFl_wLFPX2NZElUliYs4bCA,1083
34
- greenmining-1.0.4.dist-info/METADATA,sha256=rP_4u0fkgp_Bmq2txIpBTLa7ZMj3By2ylTjoej7RSEw,25610
35
- greenmining-1.0.4.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
36
- greenmining-1.0.4.dist-info/top_level.txt,sha256=nreXgXxZIWI-42yQknQ0HXtUrFnzZ8N1ra4Mdy2KcsI,12
37
- greenmining-1.0.4.dist-info/RECORD,,
33
+ greenmining-1.0.5.dist-info/licenses/LICENSE,sha256=M7ma3JHGeiIZIs3ea0HTcFl_wLFPX2NZElUliYs4bCA,1083
34
+ greenmining-1.0.5.dist-info/METADATA,sha256=UIp-sji0KZ4GOtLaZOsPjaT9Qb0uDfuzFSIpw-UEcjQ,24899
35
+ greenmining-1.0.5.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
36
+ greenmining-1.0.5.dist-info/top_level.txt,sha256=nreXgXxZIWI-42yQknQ0HXtUrFnzZ8N1ra4Mdy2KcsI,12
37
+ greenmining-1.0.5.dist-info/RECORD,,