greenmining 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
greenmining/__init__.py CHANGED
@@ -9,7 +9,7 @@ from greenmining.gsf_patterns import (
9
9
  is_green_aware,
10
10
  )
11
11
 
12
- __version__ = "0.1.12"
12
+ __version__ = "1.0.3"
13
13
 
14
14
 
15
15
  def fetch_repositories(
@@ -1,3 +1,3 @@
1
1
  """Version information for greenmining."""
2
2
 
3
- __version__ = "1.0.1"
3
+ __version__ = "1.0.2"
@@ -12,6 +12,7 @@ from github import Github
12
12
  from tqdm import tqdm
13
13
 
14
14
  from greenmining.config import get_config
15
+ from greenmining.models.repository import Repository
15
16
  from greenmining.utils import (
16
17
  colored_print,
17
18
  format_timestamp,
@@ -49,11 +50,11 @@ class CommitExtractor:
49
50
  self.github = Github(github_token) if github_token else None
50
51
  self.timeout = timeout
51
52
 
52
- def extract_from_repositories(self, repositories: list[dict[str, Any]]) -> list[dict[str, Any]]:
53
+ def extract_from_repositories(self, repositories: list[dict[str, Any] | Repository]) -> list[dict[str, Any]]:
53
54
  """Extract commits from list of repositories.
54
55
 
55
56
  Args:
56
- repositories: List of repository metadata
57
+ repositories: List of repository metadata (dicts or Repository objects)
57
58
 
58
59
  Returns:
59
60
  List of commit data dictionaries
@@ -89,15 +90,17 @@ class CommitExtractor:
89
90
  pbar.update(1)
90
91
  except TimeoutError:
91
92
  signal.alarm(0) # Cancel alarm
93
+ repo_name = repo.full_name if isinstance(repo, Repository) else repo["full_name"]
92
94
  colored_print(
93
- f"\nTimeout processing {repo['full_name']} (>{self.timeout}s)", "yellow"
95
+ f"\nTimeout processing {repo_name} (>{self.timeout}s)", "yellow"
94
96
  )
95
- failed_repos.append(repo["full_name"])
97
+ failed_repos.append(repo_name)
96
98
  pbar.update(1)
97
99
  except Exception as e:
98
100
  signal.alarm(0) # Cancel alarm
99
- colored_print(f"\nError processing {repo['full_name']}: {e}", "yellow")
100
- failed_repos.append(repo["full_name"])
101
+ repo_name = repo.full_name if isinstance(repo, Repository) else repo["full_name"]
102
+ colored_print(f"\nError processing {repo_name}: {e}", "yellow")
103
+ failed_repos.append(repo_name)
101
104
  pbar.update(1)
102
105
 
103
106
  if failed_repos:
@@ -114,13 +117,14 @@ class CommitExtractor:
114
117
  """Extract commits from a single repository using GitHub API.
115
118
 
116
119
  Args:
117
- repo: Repository metadata dictionary
120
+ repo: Repository metadata (dict or Repository object)
118
121
 
119
122
  Returns:
120
123
  List of commit dictionaries
121
124
  """
122
125
  commits = []
123
- repo_name = repo["full_name"]
126
+ # Handle both Repository objects and dicts
127
+ repo_name = repo.full_name if isinstance(repo, Repository) else repo["full_name"]
124
128
 
125
129
  try:
126
130
  # Get repository from GitHub API
@@ -16,6 +16,7 @@ from greenmining.analyzers import (
16
16
  QualitativeAnalyzer,
17
17
  )
18
18
  from greenmining.config import get_config
19
+ from greenmining.models.repository import Repository
19
20
  from greenmining.utils import (
20
21
  colored_print,
21
22
  format_number,
@@ -270,8 +271,13 @@ class DataAggregator:
270
271
  self, results: list[dict[str, Any]], repos: list[dict[str, Any]]
271
272
  ) -> list[dict[str, Any]]:
272
273
  """Generate per-language statistics."""
273
- # Create repo name to language mapping
274
- repo_language_map = {repo["full_name"]: repo.get("language", "Unknown") for repo in repos}
274
+ # Create repo name to language mapping (handle both Repository objects and dicts)
275
+ repo_language_map = {}
276
+ for repo in repos:
277
+ if isinstance(repo, Repository):
278
+ repo_language_map[repo.full_name] = repo.language or "Unknown"
279
+ else:
280
+ repo_language_map[repo["full_name"]] = repo.get("language", "Unknown")
275
281
 
276
282
  # Group commits by language
277
283
  language_commits = defaultdict(list)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: greenmining
3
- Version: 1.0.1
3
+ Version: 1.0.3
4
4
  Summary: Analyze GitHub repositories to identify green software engineering patterns and energy-efficient practices
5
5
  Author-email: Adam Bouafia <a.bouafia@student.vu.nl>
6
6
  License: MIT
@@ -397,6 +397,119 @@ controller.generate_report()
397
397
  print("Analysis complete! Check data/ directory for results.")
398
398
  ```
399
399
 
400
+ #### Complete Working Example: Full Pipeline
401
+
402
+ This is a complete, production-ready example that demonstrates the entire analysis pipeline. This example successfully analyzed 100 repositories with 30,543 commits in our testing.
403
+
404
+ ```python
405
+ import os
406
+ from pathlib import Path
407
+ from dotenv import load_dotenv
408
+
409
+ # Load environment variables
410
+ load_dotenv()
411
+
412
+ # Import from greenmining package
413
+ from greenmining import fetch_repositories
414
+ from greenmining.services.commit_extractor import CommitExtractor
415
+ from greenmining.services.data_analyzer import DataAnalyzer
416
+ from greenmining.services.data_aggregator import DataAggregator
417
+
418
+ # Configuration
419
+ token = os.getenv("GITHUB_TOKEN")
420
+ output_dir = Path("results")
421
+ output_dir.mkdir(exist_ok=True)
422
+
423
+ # STAGE 1: Fetch Repositories
424
+ print("Fetching repositories...")
425
+ repositories = fetch_repositories(
426
+ github_token=token,
427
+ max_repos=100,
428
+ min_stars=10,
429
+ keywords="software engineering",
430
+ )
431
+ print(f"✓ Fetched {len(repositories)} repositories")
432
+
433
+ # STAGE 2: Extract Commits
434
+ print("\nExtracting commits...")
435
+ extractor = CommitExtractor(
436
+ github_token=token,
437
+ max_commits=1000,
438
+ skip_merges=True,
439
+ days_back=730,
440
+ timeout=120,
441
+ )
442
+ all_commits = extractor.extract_from_repositories(repositories)
443
+ print(f"✓ Extracted {len(all_commits)} commits")
444
+
445
+ # Save commits
446
+ extractor.save_results(
447
+ all_commits,
448
+ output_dir / "commits.json",
449
+ len(repositories)
450
+ )
451
+
452
+ # STAGE 3: Analyze Commits
453
+ print("\nAnalyzing commits...")
454
+ analyzer = DataAnalyzer(
455
+ enable_nlp=True,
456
+ enable_ml_features=True,
457
+ enable_diff_analysis=False, # Set to True for detailed code analysis (slower)
458
+ )
459
+ analyzed_commits = analyzer.analyze_commits(all_commits)
460
+
461
+ # Count green-aware commits
462
+ green_count = sum(1 for c in analyzed_commits if c.get("green_aware", False))
463
+ green_percentage = (green_count / len(analyzed_commits) * 100) if analyzed_commits else 0
464
+ print(f"✓ Analyzed {len(analyzed_commits)} commits")
465
+ print(f"✓ Green-aware: {green_count} ({green_percentage:.1f}%)")
466
+
467
+ # Save analysis
468
+ analyzer.save_results(analyzed_commits, output_dir / "analyzed.json")
469
+
470
+ # STAGE 4: Aggregate Results
471
+ print("\nAggregating results...")
472
+ aggregator = DataAggregator(
473
+ enable_enhanced_stats=True,
474
+ enable_temporal=True,
475
+ temporal_granularity="quarter",
476
+ )
477
+ results = aggregator.aggregate(analyzed_commits, repositories)
478
+
479
+ # STAGE 5: Save Results
480
+ print("\nSaving results...")
481
+ aggregator.save_results(
482
+ results,
483
+ output_dir / "aggregated.json",
484
+ output_dir / "aggregated.csv",
485
+ analyzed_commits
486
+ )
487
+
488
+ # Print summary
489
+ print("\n" + "="*80)
490
+ print("ANALYSIS COMPLETE")
491
+ print("="*80)
492
+ aggregator.print_summary(results)
493
+ print(f"\n📁 Results saved in: {output_dir.absolute()}")
494
+ ```
495
+
496
+ **What this example does:**
497
+
498
+ 1. **Fetches repositories** from GitHub based on keywords and filters
499
+ 2. **Extracts commits** from each repository (up to 1000 per repo)
500
+ 3. **Analyzes commits** for green software patterns using NLP and ML
501
+ 4. **Aggregates results** with temporal analysis and enhanced statistics
502
+ 5. **Saves results** to JSON and CSV files for further analysis
503
+
504
+ **Expected output files:**
505
+ - `commits.json` - All extracted commits with metadata
506
+ - `analyzed.json` - Commits analyzed for green patterns
507
+ - `aggregated.json` - Summary statistics and pattern distributions
508
+ - `aggregated.csv` - Tabular format for spreadsheet analysis
509
+ - `metadata.json` - Experiment configuration and timing
510
+
511
+ **Performance:** This pipeline successfully processed 100 repositories (30,543 commits) in approximately 6.4 hours, identifying 7,600 green-aware commits (24.9%).
512
+
400
513
  ### Docker Usage
401
514
 
402
515
  ```bash
@@ -1,6 +1,6 @@
1
- greenmining/__init__.py,sha256=c_Vaq_WW6-SkI_es4cQKXpdEtXdfVEnGjIDxACF6bzk,1764
1
+ greenmining/__init__.py,sha256=p_pk0TmyP34o97wTYVTHkXe7qpGtH43GUVD_iCadrYY,1763
2
2
  greenmining/__main__.py,sha256=1RwcSXcwdza6xJX5fRT8-HhZjlnKbkmGY_uxTm-NYZ4,138
3
- greenmining/__version__.py,sha256=1jisBTMaMLSYH7jCobgcRNneQXzskU1YHLC5Za-5YsQ,66
3
+ greenmining/__version__.py,sha256=3OgUZ5K2OXa9_-2kjlgye1N6G_QeQDeex2uw33Ja6Cs,66
4
4
  greenmining/cli.py,sha256=40eKDEZHNeDVb91xKBG70VfPk45mwb4YjuVCC2efVPA,17458
5
5
  greenmining/config.py,sha256=1_puT52zNS589hTxEZ3UCqRC_Qw5Jw2UupUPNbNz_hs,5195
6
6
  greenmining/gsf_patterns.py,sha256=Prsk_stnQrfOsk0x0zn-zdevbueAnPfGDM4XNA9PbdA,54664
@@ -23,14 +23,14 @@ greenmining/models/repository.py,sha256=k1X9UYZYLl0RznohOHx_Y5wur-ZBvLcNyc9vPVAr
23
23
  greenmining/presenters/__init__.py,sha256=-ukAvhNuTvy1Xpknps0faDZ78HKdPHPySzFpQHABzKM,203
24
24
  greenmining/presenters/console_presenter.py,sha256=ykJ9Hgors2dRTqQNaqCTxH4fd49F0AslQTgUOr_csI0,5347
25
25
  greenmining/services/__init__.py,sha256=7CJDjHMTrY0bBoqzx22AUzIwEvby0FbAUUKYbjSlNPQ,460
26
- greenmining/services/commit_extractor.py,sha256=ldwfb6pNMPqaAXEYMIGYyo5yFx-tYcLlAiMpJdKc8Ek,12738
27
- greenmining/services/data_aggregator.py,sha256=WRYmVoscX0kMyI0CRnYFPhYwOBVI73o573LhibZTcPA,23770
26
+ greenmining/services/commit_extractor.py,sha256=FSgoHpMvoqjZ6b1UQYtwfUaLVX_GDfiR0BVd51y-gYk,13126
27
+ greenmining/services/data_aggregator.py,sha256=OqJvQZp9xaZaSmbwWoiHAHECAghd8agbhVmStDvebOU,24054
28
28
  greenmining/services/data_analyzer.py,sha256=HZDQLFZDCwCUGIzRjypyXC09Fl_-zaxhly74n3siwQc,16325
29
29
  greenmining/services/github_fetcher.py,sha256=J47-plM_NKXwHDSWNBuSUZMnZnGP6wXiJyrVfeWT9ug,11360
30
30
  greenmining/services/reports.py,sha256=NCNI9SCTnSLeAO8WmkNIdkB0hr-XyVpuzV0sovOoUOM,27107
31
- greenmining-1.0.1.dist-info/licenses/LICENSE,sha256=M7ma3JHGeiIZIs3ea0HTcFl_wLFPX2NZElUliYs4bCA,1083
32
- greenmining-1.0.1.dist-info/METADATA,sha256=BxsHBbcm6_287X-8Cq2Phi-twkwXN7E8OrDDdYemhUw,25694
33
- greenmining-1.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
34
- greenmining-1.0.1.dist-info/entry_points.txt,sha256=oHvTWMzNFGf2W3CFEKVVPsG4exeMv0MaQu9YsUoQ9lw,53
35
- greenmining-1.0.1.dist-info/top_level.txt,sha256=nreXgXxZIWI-42yQknQ0HXtUrFnzZ8N1ra4Mdy2KcsI,12
36
- greenmining-1.0.1.dist-info/RECORD,,
31
+ greenmining-1.0.3.dist-info/licenses/LICENSE,sha256=M7ma3JHGeiIZIs3ea0HTcFl_wLFPX2NZElUliYs4bCA,1083
32
+ greenmining-1.0.3.dist-info/METADATA,sha256=0Hj5qXVUkuJhIUBZBRhSysc3zx6L3py0HpZg9vKcl7Y,29260
33
+ greenmining-1.0.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
34
+ greenmining-1.0.3.dist-info/entry_points.txt,sha256=oHvTWMzNFGf2W3CFEKVVPsG4exeMv0MaQu9YsUoQ9lw,53
35
+ greenmining-1.0.3.dist-info/top_level.txt,sha256=nreXgXxZIWI-42yQknQ0HXtUrFnzZ8N1ra4Mdy2KcsI,12
36
+ greenmining-1.0.3.dist-info/RECORD,,