greenmining 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
greenmining/__init__.py CHANGED
@@ -9,7 +9,7 @@ from greenmining.gsf_patterns import (
9
9
  is_green_aware,
10
10
  )
11
11
 
12
- __version__ = "0.1.12"
12
+ __version__ = "1.0.3"
13
13
 
14
14
 
15
15
  def fetch_repositories(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: greenmining
3
- Version: 1.0.2
3
+ Version: 1.0.3
4
4
  Summary: Analyze GitHub repositories to identify green software engineering patterns and energy-efficient practices
5
5
  Author-email: Adam Bouafia <a.bouafia@student.vu.nl>
6
6
  License: MIT
@@ -397,6 +397,119 @@ controller.generate_report()
397
397
  print("Analysis complete! Check data/ directory for results.")
398
398
  ```
399
399
 
400
+ #### Complete Working Example: Full Pipeline
401
+
402
+ This is a complete, production-ready example that demonstrates the entire analysis pipeline. This example successfully analyzed 100 repositories with 30,543 commits in our testing.
403
+
404
+ ```python
405
+ import os
406
+ from pathlib import Path
407
+ from dotenv import load_dotenv
408
+
409
+ # Load environment variables
410
+ load_dotenv()
411
+
412
+ # Import from greenmining package
413
+ from greenmining import fetch_repositories
414
+ from greenmining.services.commit_extractor import CommitExtractor
415
+ from greenmining.services.data_analyzer import DataAnalyzer
416
+ from greenmining.services.data_aggregator import DataAggregator
417
+
418
+ # Configuration
419
+ token = os.getenv("GITHUB_TOKEN")
420
+ output_dir = Path("results")
421
+ output_dir.mkdir(exist_ok=True)
422
+
423
+ # STAGE 1: Fetch Repositories
424
+ print("Fetching repositories...")
425
+ repositories = fetch_repositories(
426
+ github_token=token,
427
+ max_repos=100,
428
+ min_stars=10,
429
+ keywords="software engineering",
430
+ )
431
+ print(f"✓ Fetched {len(repositories)} repositories")
432
+
433
+ # STAGE 2: Extract Commits
434
+ print("\nExtracting commits...")
435
+ extractor = CommitExtractor(
436
+ github_token=token,
437
+ max_commits=1000,
438
+ skip_merges=True,
439
+ days_back=730,
440
+ timeout=120,
441
+ )
442
+ all_commits = extractor.extract_from_repositories(repositories)
443
+ print(f"✓ Extracted {len(all_commits)} commits")
444
+
445
+ # Save commits
446
+ extractor.save_results(
447
+ all_commits,
448
+ output_dir / "commits.json",
449
+ len(repositories)
450
+ )
451
+
452
+ # STAGE 3: Analyze Commits
453
+ print("\nAnalyzing commits...")
454
+ analyzer = DataAnalyzer(
455
+ enable_nlp=True,
456
+ enable_ml_features=True,
457
+ enable_diff_analysis=False, # Set to True for detailed code analysis (slower)
458
+ )
459
+ analyzed_commits = analyzer.analyze_commits(all_commits)
460
+
461
+ # Count green-aware commits
462
+ green_count = sum(1 for c in analyzed_commits if c.get("green_aware", False))
463
+ green_percentage = (green_count / len(analyzed_commits) * 100) if analyzed_commits else 0
464
+ print(f"✓ Analyzed {len(analyzed_commits)} commits")
465
+ print(f"✓ Green-aware: {green_count} ({green_percentage:.1f}%)")
466
+
467
+ # Save analysis
468
+ analyzer.save_results(analyzed_commits, output_dir / "analyzed.json")
469
+
470
+ # STAGE 4: Aggregate Results
471
+ print("\nAggregating results...")
472
+ aggregator = DataAggregator(
473
+ enable_enhanced_stats=True,
474
+ enable_temporal=True,
475
+ temporal_granularity="quarter",
476
+ )
477
+ results = aggregator.aggregate(analyzed_commits, repositories)
478
+
479
+ # STAGE 5: Save Results
480
+ print("\nSaving results...")
481
+ aggregator.save_results(
482
+ results,
483
+ output_dir / "aggregated.json",
484
+ output_dir / "aggregated.csv",
485
+ analyzed_commits
486
+ )
487
+
488
+ # Print summary
489
+ print("\n" + "="*80)
490
+ print("ANALYSIS COMPLETE")
491
+ print("="*80)
492
+ aggregator.print_summary(results)
493
+ print(f"\n📁 Results saved in: {output_dir.absolute()}")
494
+ ```
495
+
496
+ **What this example does:**
497
+
498
+ 1. **Fetches repositories** from GitHub based on keywords and filters
499
+ 2. **Extracts commits** from each repository (up to 1000 per repo)
500
+ 3. **Analyzes commits** for green software patterns using NLP and ML
501
+ 4. **Aggregates results** with temporal analysis and enhanced statistics
502
+ 5. **Saves results** to JSON and CSV files for further analysis
503
+
504
+ **Expected output files:**
505
+ - `commits.json` - All extracted commits with metadata
506
+ - `analyzed.json` - Commits analyzed for green patterns
507
+ - `aggregated.json` - Summary statistics and pattern distributions
508
+ - `aggregated.csv` - Tabular format for spreadsheet analysis
509
+ - `metadata.json` - Experiment configuration and timing
510
+
511
+ **Performance:** This pipeline successfully processed 100 repositories (30,543 commits) in approximately 6.4 hours, identifying 7,600 green-aware commits (24.9%).
512
+
400
513
  ### Docker Usage
401
514
 
402
515
  ```bash
@@ -1,4 +1,4 @@
1
- greenmining/__init__.py,sha256=c_Vaq_WW6-SkI_es4cQKXpdEtXdfVEnGjIDxACF6bzk,1764
1
+ greenmining/__init__.py,sha256=p_pk0TmyP34o97wTYVTHkXe7qpGtH43GUVD_iCadrYY,1763
2
2
  greenmining/__main__.py,sha256=1RwcSXcwdza6xJX5fRT8-HhZjlnKbkmGY_uxTm-NYZ4,138
3
3
  greenmining/__version__.py,sha256=3OgUZ5K2OXa9_-2kjlgye1N6G_QeQDeex2uw33Ja6Cs,66
4
4
  greenmining/cli.py,sha256=40eKDEZHNeDVb91xKBG70VfPk45mwb4YjuVCC2efVPA,17458
@@ -28,9 +28,9 @@ greenmining/services/data_aggregator.py,sha256=OqJvQZp9xaZaSmbwWoiHAHECAghd8agbh
28
28
  greenmining/services/data_analyzer.py,sha256=HZDQLFZDCwCUGIzRjypyXC09Fl_-zaxhly74n3siwQc,16325
29
29
  greenmining/services/github_fetcher.py,sha256=J47-plM_NKXwHDSWNBuSUZMnZnGP6wXiJyrVfeWT9ug,11360
30
30
  greenmining/services/reports.py,sha256=NCNI9SCTnSLeAO8WmkNIdkB0hr-XyVpuzV0sovOoUOM,27107
31
- greenmining-1.0.2.dist-info/licenses/LICENSE,sha256=M7ma3JHGeiIZIs3ea0HTcFl_wLFPX2NZElUliYs4bCA,1083
32
- greenmining-1.0.2.dist-info/METADATA,sha256=vTygz3S30HRdkpHdMIiD-jau6Lx60VcPsf3OSSBeL0w,25694
33
- greenmining-1.0.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
34
- greenmining-1.0.2.dist-info/entry_points.txt,sha256=oHvTWMzNFGf2W3CFEKVVPsG4exeMv0MaQu9YsUoQ9lw,53
35
- greenmining-1.0.2.dist-info/top_level.txt,sha256=nreXgXxZIWI-42yQknQ0HXtUrFnzZ8N1ra4Mdy2KcsI,12
36
- greenmining-1.0.2.dist-info/RECORD,,
31
+ greenmining-1.0.3.dist-info/licenses/LICENSE,sha256=M7ma3JHGeiIZIs3ea0HTcFl_wLFPX2NZElUliYs4bCA,1083
32
+ greenmining-1.0.3.dist-info/METADATA,sha256=0Hj5qXVUkuJhIUBZBRhSysc3zx6L3py0HpZg9vKcl7Y,29260
33
+ greenmining-1.0.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
34
+ greenmining-1.0.3.dist-info/entry_points.txt,sha256=oHvTWMzNFGf2W3CFEKVVPsG4exeMv0MaQu9YsUoQ9lw,53
35
+ greenmining-1.0.3.dist-info/top_level.txt,sha256=nreXgXxZIWI-42yQknQ0HXtUrFnzZ8N1ra4Mdy2KcsI,12
36
+ greenmining-1.0.3.dist-info/RECORD,,