greenmining 1.0.2__tar.gz → 1.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {greenmining-1.0.2/greenmining.egg-info → greenmining-1.0.3}/PKG-INFO +114 -1
  2. {greenmining-1.0.2 → greenmining-1.0.3}/README.md +113 -0
  3. {greenmining-1.0.2 → greenmining-1.0.3}/greenmining/__init__.py +1 -1
  4. {greenmining-1.0.2 → greenmining-1.0.3/greenmining.egg-info}/PKG-INFO +114 -1
  5. {greenmining-1.0.2 → greenmining-1.0.3}/pyproject.toml +1 -1
  6. {greenmining-1.0.2 → greenmining-1.0.3}/CHANGELOG.md +0 -0
  7. {greenmining-1.0.2 → greenmining-1.0.3}/LICENSE +0 -0
  8. {greenmining-1.0.2 → greenmining-1.0.3}/MANIFEST.in +0 -0
  9. {greenmining-1.0.2 → greenmining-1.0.3}/greenmining/__main__.py +0 -0
  10. {greenmining-1.0.2 → greenmining-1.0.3}/greenmining/__version__.py +0 -0
  11. {greenmining-1.0.2 → greenmining-1.0.3}/greenmining/analyzers/__init__.py +0 -0
  12. {greenmining-1.0.2 → greenmining-1.0.3}/greenmining/analyzers/code_diff_analyzer.py +0 -0
  13. {greenmining-1.0.2 → greenmining-1.0.3}/greenmining/analyzers/ml_feature_extractor.py +0 -0
  14. {greenmining-1.0.2 → greenmining-1.0.3}/greenmining/analyzers/nlp_analyzer.py +0 -0
  15. {greenmining-1.0.2 → greenmining-1.0.3}/greenmining/analyzers/qualitative_analyzer.py +0 -0
  16. {greenmining-1.0.2 → greenmining-1.0.3}/greenmining/analyzers/statistical_analyzer.py +0 -0
  17. {greenmining-1.0.2 → greenmining-1.0.3}/greenmining/analyzers/temporal_analyzer.py +0 -0
  18. {greenmining-1.0.2 → greenmining-1.0.3}/greenmining/cli.py +0 -0
  19. {greenmining-1.0.2 → greenmining-1.0.3}/greenmining/config.py +0 -0
  20. {greenmining-1.0.2 → greenmining-1.0.3}/greenmining/controllers/__init__.py +0 -0
  21. {greenmining-1.0.2 → greenmining-1.0.3}/greenmining/controllers/repository_controller.py +0 -0
  22. {greenmining-1.0.2 → greenmining-1.0.3}/greenmining/gsf_patterns.py +0 -0
  23. {greenmining-1.0.2 → greenmining-1.0.3}/greenmining/main.py +0 -0
  24. {greenmining-1.0.2 → greenmining-1.0.3}/greenmining/models/__init__.py +0 -0
  25. {greenmining-1.0.2 → greenmining-1.0.3}/greenmining/models/aggregated_stats.py +0 -0
  26. {greenmining-1.0.2 → greenmining-1.0.3}/greenmining/models/analysis_result.py +0 -0
  27. {greenmining-1.0.2 → greenmining-1.0.3}/greenmining/models/commit.py +0 -0
  28. {greenmining-1.0.2 → greenmining-1.0.3}/greenmining/models/repository.py +0 -0
  29. {greenmining-1.0.2 → greenmining-1.0.3}/greenmining/presenters/__init__.py +0 -0
  30. {greenmining-1.0.2 → greenmining-1.0.3}/greenmining/presenters/console_presenter.py +0 -0
  31. {greenmining-1.0.2 → greenmining-1.0.3}/greenmining/services/__init__.py +0 -0
  32. {greenmining-1.0.2 → greenmining-1.0.3}/greenmining/services/commit_extractor.py +0 -0
  33. {greenmining-1.0.2 → greenmining-1.0.3}/greenmining/services/data_aggregator.py +0 -0
  34. {greenmining-1.0.2 → greenmining-1.0.3}/greenmining/services/data_analyzer.py +0 -0
  35. {greenmining-1.0.2 → greenmining-1.0.3}/greenmining/services/github_fetcher.py +0 -0
  36. {greenmining-1.0.2 → greenmining-1.0.3}/greenmining/services/reports.py +0 -0
  37. {greenmining-1.0.2 → greenmining-1.0.3}/greenmining/utils.py +0 -0
  38. {greenmining-1.0.2 → greenmining-1.0.3}/greenmining.egg-info/SOURCES.txt +0 -0
  39. {greenmining-1.0.2 → greenmining-1.0.3}/greenmining.egg-info/dependency_links.txt +0 -0
  40. {greenmining-1.0.2 → greenmining-1.0.3}/greenmining.egg-info/entry_points.txt +0 -0
  41. {greenmining-1.0.2 → greenmining-1.0.3}/greenmining.egg-info/requires.txt +0 -0
  42. {greenmining-1.0.2 → greenmining-1.0.3}/greenmining.egg-info/top_level.txt +0 -0
  43. {greenmining-1.0.2 → greenmining-1.0.3}/pytest.ini +0 -0
  44. {greenmining-1.0.2 → greenmining-1.0.3}/setup.cfg +0 -0
  45. {greenmining-1.0.2 → greenmining-1.0.3}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: greenmining
3
- Version: 1.0.2
3
+ Version: 1.0.3
4
4
  Summary: Analyze GitHub repositories to identify green software engineering patterns and energy-efficient practices
5
5
  Author-email: Adam Bouafia <a.bouafia@student.vu.nl>
6
6
  License: MIT
@@ -397,6 +397,119 @@ controller.generate_report()
397
397
  print("Analysis complete! Check data/ directory for results.")
398
398
  ```
399
399
 
400
+ #### Complete Working Example: Full Pipeline
401
+
402
+ This is a complete, production-ready example that demonstrates the entire analysis pipeline. This example successfully analyzed 100 repositories with 30,543 commits in our testing.
403
+
404
+ ```python
405
+ import os
406
+ from pathlib import Path
407
+ from dotenv import load_dotenv
408
+
409
+ # Load environment variables
410
+ load_dotenv()
411
+
412
+ # Import from greenmining package
413
+ from greenmining import fetch_repositories
414
+ from greenmining.services.commit_extractor import CommitExtractor
415
+ from greenmining.services.data_analyzer import DataAnalyzer
416
+ from greenmining.services.data_aggregator import DataAggregator
417
+
418
+ # Configuration
419
+ token = os.getenv("GITHUB_TOKEN")
420
+ output_dir = Path("results")
421
+ output_dir.mkdir(exist_ok=True)
422
+
423
+ # STAGE 1: Fetch Repositories
424
+ print("Fetching repositories...")
425
+ repositories = fetch_repositories(
426
+ github_token=token,
427
+ max_repos=100,
428
+ min_stars=10,
429
+ keywords="software engineering",
430
+ )
431
+ print(f"✓ Fetched {len(repositories)} repositories")
432
+
433
+ # STAGE 2: Extract Commits
434
+ print("\nExtracting commits...")
435
+ extractor = CommitExtractor(
436
+ github_token=token,
437
+ max_commits=1000,
438
+ skip_merges=True,
439
+ days_back=730,
440
+ timeout=120,
441
+ )
442
+ all_commits = extractor.extract_from_repositories(repositories)
443
+ print(f"✓ Extracted {len(all_commits)} commits")
444
+
445
+ # Save commits
446
+ extractor.save_results(
447
+ all_commits,
448
+ output_dir / "commits.json",
449
+ len(repositories)
450
+ )
451
+
452
+ # STAGE 3: Analyze Commits
453
+ print("\nAnalyzing commits...")
454
+ analyzer = DataAnalyzer(
455
+ enable_nlp=True,
456
+ enable_ml_features=True,
457
+ enable_diff_analysis=False, # Set to True for detailed code analysis (slower)
458
+ )
459
+ analyzed_commits = analyzer.analyze_commits(all_commits)
460
+
461
+ # Count green-aware commits
462
+ green_count = sum(1 for c in analyzed_commits if c.get("green_aware", False))
463
+ green_percentage = (green_count / len(analyzed_commits) * 100) if analyzed_commits else 0
464
+ print(f"✓ Analyzed {len(analyzed_commits)} commits")
465
+ print(f"✓ Green-aware: {green_count} ({green_percentage:.1f}%)")
466
+
467
+ # Save analysis
468
+ analyzer.save_results(analyzed_commits, output_dir / "analyzed.json")
469
+
470
+ # STAGE 4: Aggregate Results
471
+ print("\nAggregating results...")
472
+ aggregator = DataAggregator(
473
+ enable_enhanced_stats=True,
474
+ enable_temporal=True,
475
+ temporal_granularity="quarter",
476
+ )
477
+ results = aggregator.aggregate(analyzed_commits, repositories)
478
+
479
+ # STAGE 5: Save Results
480
+ print("\nSaving results...")
481
+ aggregator.save_results(
482
+ results,
483
+ output_dir / "aggregated.json",
484
+ output_dir / "aggregated.csv",
485
+ analyzed_commits
486
+ )
487
+
488
+ # Print summary
489
+ print("\n" + "="*80)
490
+ print("ANALYSIS COMPLETE")
491
+ print("="*80)
492
+ aggregator.print_summary(results)
493
+ print(f"\n📁 Results saved in: {output_dir.absolute()}")
494
+ ```
495
+
496
+ **What this example does:**
497
+
498
+ 1. **Fetches repositories** from GitHub based on keywords and filters
499
+ 2. **Extracts commits** from each repository (up to 1000 per repo)
500
+ 3. **Analyzes commits** for green software patterns using NLP and ML
501
+ 4. **Aggregates results** with temporal analysis and enhanced statistics
502
+ 5. **Saves results** to JSON and CSV files for further analysis
503
+
504
+ **Expected output files:**
505
+ - `commits.json` - All extracted commits with metadata
506
+ - `analyzed.json` - Commits analyzed for green patterns
507
+ - `aggregated.json` - Summary statistics and pattern distributions
508
+ - `aggregated.csv` - Tabular format for spreadsheet analysis
509
+ - `metadata.json` - Experiment configuration and timing
510
+
511
+ **Performance:** This pipeline successfully processed 100 repositories (30,543 commits) in approximately 6.4 hours, identifying 7,600 green-aware commits (24.9%).
512
+
400
513
  ### Docker Usage
401
514
 
402
515
  ```bash
@@ -343,6 +343,119 @@ controller.generate_report()
343
343
  print("Analysis complete! Check data/ directory for results.")
344
344
  ```
345
345
 
346
+ #### Complete Working Example: Full Pipeline
347
+
348
+ This is a complete, production-ready example that demonstrates the entire analysis pipeline. This example successfully analyzed 100 repositories with 30,543 commits in our testing.
349
+
350
+ ```python
351
+ import os
352
+ from pathlib import Path
353
+ from dotenv import load_dotenv
354
+
355
+ # Load environment variables
356
+ load_dotenv()
357
+
358
+ # Import from greenmining package
359
+ from greenmining import fetch_repositories
360
+ from greenmining.services.commit_extractor import CommitExtractor
361
+ from greenmining.services.data_analyzer import DataAnalyzer
362
+ from greenmining.services.data_aggregator import DataAggregator
363
+
364
+ # Configuration
365
+ token = os.getenv("GITHUB_TOKEN")
366
+ output_dir = Path("results")
367
+ output_dir.mkdir(exist_ok=True)
368
+
369
+ # STAGE 1: Fetch Repositories
370
+ print("Fetching repositories...")
371
+ repositories = fetch_repositories(
372
+ github_token=token,
373
+ max_repos=100,
374
+ min_stars=10,
375
+ keywords="software engineering",
376
+ )
377
+ print(f"✓ Fetched {len(repositories)} repositories")
378
+
379
+ # STAGE 2: Extract Commits
380
+ print("\nExtracting commits...")
381
+ extractor = CommitExtractor(
382
+ github_token=token,
383
+ max_commits=1000,
384
+ skip_merges=True,
385
+ days_back=730,
386
+ timeout=120,
387
+ )
388
+ all_commits = extractor.extract_from_repositories(repositories)
389
+ print(f"✓ Extracted {len(all_commits)} commits")
390
+
391
+ # Save commits
392
+ extractor.save_results(
393
+ all_commits,
394
+ output_dir / "commits.json",
395
+ len(repositories)
396
+ )
397
+
398
+ # STAGE 3: Analyze Commits
399
+ print("\nAnalyzing commits...")
400
+ analyzer = DataAnalyzer(
401
+ enable_nlp=True,
402
+ enable_ml_features=True,
403
+ enable_diff_analysis=False, # Set to True for detailed code analysis (slower)
404
+ )
405
+ analyzed_commits = analyzer.analyze_commits(all_commits)
406
+
407
+ # Count green-aware commits
408
+ green_count = sum(1 for c in analyzed_commits if c.get("green_aware", False))
409
+ green_percentage = (green_count / len(analyzed_commits) * 100) if analyzed_commits else 0
410
+ print(f"✓ Analyzed {len(analyzed_commits)} commits")
411
+ print(f"✓ Green-aware: {green_count} ({green_percentage:.1f}%)")
412
+
413
+ # Save analysis
414
+ analyzer.save_results(analyzed_commits, output_dir / "analyzed.json")
415
+
416
+ # STAGE 4: Aggregate Results
417
+ print("\nAggregating results...")
418
+ aggregator = DataAggregator(
419
+ enable_enhanced_stats=True,
420
+ enable_temporal=True,
421
+ temporal_granularity="quarter",
422
+ )
423
+ results = aggregator.aggregate(analyzed_commits, repositories)
424
+
425
+ # STAGE 5: Save Results
426
+ print("\nSaving results...")
427
+ aggregator.save_results(
428
+ results,
429
+ output_dir / "aggregated.json",
430
+ output_dir / "aggregated.csv",
431
+ analyzed_commits
432
+ )
433
+
434
+ # Print summary
435
+ print("\n" + "="*80)
436
+ print("ANALYSIS COMPLETE")
437
+ print("="*80)
438
+ aggregator.print_summary(results)
439
+ print(f"\n📁 Results saved in: {output_dir.absolute()}")
440
+ ```
441
+
442
+ **What this example does:**
443
+
444
+ 1. **Fetches repositories** from GitHub based on keywords and filters
445
+ 2. **Extracts commits** from each repository (up to 1000 per repo)
446
+ 3. **Analyzes commits** for green software patterns using NLP and ML
447
+ 4. **Aggregates results** with temporal analysis and enhanced statistics
448
+ 5. **Saves results** to JSON and CSV files for further analysis
449
+
450
+ **Expected output files:**
451
+ - `commits.json` - All extracted commits with metadata
452
+ - `analyzed.json` - Commits analyzed for green patterns
453
+ - `aggregated.json` - Summary statistics and pattern distributions
454
+ - `aggregated.csv` - Tabular format for spreadsheet analysis
455
+ - `metadata.json` - Experiment configuration and timing
456
+
457
+ **Performance:** This pipeline successfully processed 100 repositories (30,543 commits) in approximately 6.4 hours, identifying 7,600 green-aware commits (24.9%).
458
+
346
459
  ### Docker Usage
347
460
 
348
461
  ```bash
@@ -9,7 +9,7 @@ from greenmining.gsf_patterns import (
9
9
  is_green_aware,
10
10
  )
11
11
 
12
- __version__ = "0.1.12"
12
+ __version__ = "1.0.3"
13
13
 
14
14
 
15
15
  def fetch_repositories(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: greenmining
3
- Version: 1.0.2
3
+ Version: 1.0.3
4
4
  Summary: Analyze GitHub repositories to identify green software engineering patterns and energy-efficient practices
5
5
  Author-email: Adam Bouafia <a.bouafia@student.vu.nl>
6
6
  License: MIT
@@ -397,6 +397,119 @@ controller.generate_report()
397
397
  print("Analysis complete! Check data/ directory for results.")
398
398
  ```
399
399
 
400
+ #### Complete Working Example: Full Pipeline
401
+
402
+ This is a complete, production-ready example that demonstrates the entire analysis pipeline. This example successfully analyzed 100 repositories with 30,543 commits in our testing.
403
+
404
+ ```python
405
+ import os
406
+ from pathlib import Path
407
+ from dotenv import load_dotenv
408
+
409
+ # Load environment variables
410
+ load_dotenv()
411
+
412
+ # Import from greenmining package
413
+ from greenmining import fetch_repositories
414
+ from greenmining.services.commit_extractor import CommitExtractor
415
+ from greenmining.services.data_analyzer import DataAnalyzer
416
+ from greenmining.services.data_aggregator import DataAggregator
417
+
418
+ # Configuration
419
+ token = os.getenv("GITHUB_TOKEN")
420
+ output_dir = Path("results")
421
+ output_dir.mkdir(exist_ok=True)
422
+
423
+ # STAGE 1: Fetch Repositories
424
+ print("Fetching repositories...")
425
+ repositories = fetch_repositories(
426
+ github_token=token,
427
+ max_repos=100,
428
+ min_stars=10,
429
+ keywords="software engineering",
430
+ )
431
+ print(f"✓ Fetched {len(repositories)} repositories")
432
+
433
+ # STAGE 2: Extract Commits
434
+ print("\nExtracting commits...")
435
+ extractor = CommitExtractor(
436
+ github_token=token,
437
+ max_commits=1000,
438
+ skip_merges=True,
439
+ days_back=730,
440
+ timeout=120,
441
+ )
442
+ all_commits = extractor.extract_from_repositories(repositories)
443
+ print(f"✓ Extracted {len(all_commits)} commits")
444
+
445
+ # Save commits
446
+ extractor.save_results(
447
+ all_commits,
448
+ output_dir / "commits.json",
449
+ len(repositories)
450
+ )
451
+
452
+ # STAGE 3: Analyze Commits
453
+ print("\nAnalyzing commits...")
454
+ analyzer = DataAnalyzer(
455
+ enable_nlp=True,
456
+ enable_ml_features=True,
457
+ enable_diff_analysis=False, # Set to True for detailed code analysis (slower)
458
+ )
459
+ analyzed_commits = analyzer.analyze_commits(all_commits)
460
+
461
+ # Count green-aware commits
462
+ green_count = sum(1 for c in analyzed_commits if c.get("green_aware", False))
463
+ green_percentage = (green_count / len(analyzed_commits) * 100) if analyzed_commits else 0
464
+ print(f"✓ Analyzed {len(analyzed_commits)} commits")
465
+ print(f"✓ Green-aware: {green_count} ({green_percentage:.1f}%)")
466
+
467
+ # Save analysis
468
+ analyzer.save_results(analyzed_commits, output_dir / "analyzed.json")
469
+
470
+ # STAGE 4: Aggregate Results
471
+ print("\nAggregating results...")
472
+ aggregator = DataAggregator(
473
+ enable_enhanced_stats=True,
474
+ enable_temporal=True,
475
+ temporal_granularity="quarter",
476
+ )
477
+ results = aggregator.aggregate(analyzed_commits, repositories)
478
+
479
+ # STAGE 5: Save Results
480
+ print("\nSaving results...")
481
+ aggregator.save_results(
482
+ results,
483
+ output_dir / "aggregated.json",
484
+ output_dir / "aggregated.csv",
485
+ analyzed_commits
486
+ )
487
+
488
+ # Print summary
489
+ print("\n" + "="*80)
490
+ print("ANALYSIS COMPLETE")
491
+ print("="*80)
492
+ aggregator.print_summary(results)
493
+ print(f"\n📁 Results saved in: {output_dir.absolute()}")
494
+ ```
495
+
496
+ **What this example does:**
497
+
498
+ 1. **Fetches repositories** from GitHub based on keywords and filters
499
+ 2. **Extracts commits** from each repository (up to 1000 per repo)
500
+ 3. **Analyzes commits** for green software patterns using NLP and ML
501
+ 4. **Aggregates results** with temporal analysis and enhanced statistics
502
+ 5. **Saves results** to JSON and CSV files for further analysis
503
+
504
+ **Expected output files:**
505
+ - `commits.json` - All extracted commits with metadata
506
+ - `analyzed.json` - Commits analyzed for green patterns
507
+ - `aggregated.json` - Summary statistics and pattern distributions
508
+ - `aggregated.csv` - Tabular format for spreadsheet analysis
509
+ - `metadata.json` - Experiment configuration and timing
510
+
511
+ **Performance:** This pipeline successfully processed 100 repositories (30,543 commits) in approximately 6.4 hours, identifying 7,600 green-aware commits (24.9%).
512
+
400
513
  ### Docker Usage
401
514
 
402
515
  ```bash
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "greenmining"
7
- version = "1.0.2"
7
+ version = "1.0.3"
8
8
  description = "Analyze GitHub repositories to identify green software engineering patterns and energy-efficient practices"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes