greenmining 1.0.6__tar.gz → 1.0.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. {greenmining-1.0.6/greenmining.egg-info → greenmining-1.0.7}/PKG-INFO +158 -14
  2. {greenmining-1.0.6 → greenmining-1.0.7}/README.md +157 -13
  3. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/__init__.py +9 -1
  4. {greenmining-1.0.6 → greenmining-1.0.7/greenmining.egg-info}/PKG-INFO +158 -14
  5. {greenmining-1.0.6 → greenmining-1.0.7}/pyproject.toml +1 -1
  6. {greenmining-1.0.6 → greenmining-1.0.7}/CHANGELOG.md +0 -0
  7. {greenmining-1.0.6 → greenmining-1.0.7}/LICENSE +0 -0
  8. {greenmining-1.0.6 → greenmining-1.0.7}/MANIFEST.in +0 -0
  9. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/__main__.py +0 -0
  10. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/__version__.py +0 -0
  11. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/analyzers/__init__.py +0 -0
  12. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/analyzers/code_diff_analyzer.py +0 -0
  13. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/analyzers/metrics_power_correlator.py +0 -0
  14. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/analyzers/power_regression.py +0 -0
  15. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/analyzers/qualitative_analyzer.py +0 -0
  16. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/analyzers/statistical_analyzer.py +0 -0
  17. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/analyzers/temporal_analyzer.py +0 -0
  18. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/analyzers/version_power_analyzer.py +0 -0
  19. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/config.py +0 -0
  20. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/controllers/__init__.py +0 -0
  21. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/controllers/repository_controller.py +0 -0
  22. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/dashboard/__init__.py +0 -0
  23. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/dashboard/app.py +0 -0
  24. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/energy/__init__.py +0 -0
  25. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/energy/base.py +0 -0
  26. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/energy/carbon_reporter.py +0 -0
  27. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/energy/codecarbon_meter.py +0 -0
  28. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/energy/cpu_meter.py +0 -0
  29. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/energy/rapl.py +0 -0
  30. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/gsf_patterns.py +0 -0
  31. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/models/__init__.py +0 -0
  32. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/models/aggregated_stats.py +0 -0
  33. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/models/analysis_result.py +0 -0
  34. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/models/commit.py +0 -0
  35. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/models/repository.py +0 -0
  36. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/presenters/__init__.py +0 -0
  37. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/presenters/console_presenter.py +0 -0
  38. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/services/__init__.py +0 -0
  39. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/services/commit_extractor.py +0 -0
  40. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/services/data_aggregator.py +0 -0
  41. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/services/data_analyzer.py +0 -0
  42. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/services/github_fetcher.py +0 -0
  43. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/services/github_graphql_fetcher.py +0 -0
  44. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/services/local_repo_analyzer.py +0 -0
  45. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/services/reports.py +0 -0
  46. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining/utils.py +0 -0
  47. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining.egg-info/SOURCES.txt +0 -0
  48. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining.egg-info/dependency_links.txt +0 -0
  49. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining.egg-info/requires.txt +0 -0
  50. {greenmining-1.0.6 → greenmining-1.0.7}/greenmining.egg-info/top_level.txt +0 -0
  51. {greenmining-1.0.6 → greenmining-1.0.7}/setup.cfg +0 -0
  52. {greenmining-1.0.6 → greenmining-1.0.7}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: greenmining
3
- Version: 1.0.6
3
+ Version: 1.0.7
4
4
  Summary: An empirical Python library for Mining Software Repositories (MSR) in Green IT research
5
5
  Author-email: Adam Bouafia <a.bouafia@student.vu.nl>
6
6
  License: MIT
@@ -330,7 +330,137 @@ print(f"Top patterns: {stats['top_patterns'][:5]}")
330
330
  aggregator.export_to_csv(results, "output.csv")
331
331
  ```
332
332
 
333
- #### Batch Analysis
333
+ #### URL-Based Repository Analysis
334
+
335
+ ```python
336
+ from greenmining.services.local_repo_analyzer import LocalRepoAnalyzer
337
+
338
+ analyzer = LocalRepoAnalyzer(
339
+ max_commits=200,
340
+ cleanup_after=True,
341
+ )
342
+
343
+ result = analyzer.analyze_repository("https://github.com/pallets/flask")
344
+
345
+ print(f"Repository: {result.name}")
346
+ print(f"Commits analyzed: {result.total_commits}")
347
+ print(f"Green-aware: {result.green_commits} ({result.green_commit_rate:.1%})")
348
+
349
+ for commit in result.commits[:5]:
350
+ if commit.green_aware:
351
+ print(f" {commit.message[:60]}...")
352
+ ```
353
+
354
+ #### Batch Analysis with Parallelism
355
+
356
+ ```python
357
+ from greenmining import analyze_repositories
358
+
359
+ results = analyze_repositories(
360
+ urls=[
361
+ "https://github.com/kubernetes/kubernetes",
362
+ "https://github.com/istio/istio",
363
+ "https://github.com/envoyproxy/envoy",
364
+ ],
365
+ max_commits=100,
366
+ parallel_workers=3,
367
+ energy_tracking=True,
368
+ energy_backend="auto",
369
+ )
370
+
371
+ for result in results:
372
+ print(f"{result.name}: {result.green_commit_rate:.1%} green")
373
+ ```
374
+
375
+ #### Private Repository Analysis
376
+
377
+ ```python
378
+ from greenmining.services.local_repo_analyzer import LocalRepoAnalyzer
379
+
380
+ # HTTPS with token
381
+ analyzer = LocalRepoAnalyzer(github_token="ghp_xxxx")
382
+ result = analyzer.analyze_repository("https://github.com/company/private-repo")
383
+
384
+ # SSH with key
385
+ analyzer = LocalRepoAnalyzer(ssh_key_path="~/.ssh/id_rsa")
386
+ result = analyzer.analyze_repository("git@github.com:company/private-repo.git")
387
+ ```
388
+
389
+ #### Power Regression Detection
390
+
391
+ ```python
392
+ from greenmining.analyzers import PowerRegressionDetector
393
+
394
+ detector = PowerRegressionDetector(
395
+ test_command="pytest tests/ -x",
396
+ energy_backend="rapl",
397
+ threshold_percent=5.0,
398
+ iterations=5,
399
+ )
400
+
401
+ regressions = detector.detect(
402
+ repo_path="/path/to/repo",
403
+ baseline_commit="v1.0.0",
404
+ target_commit="HEAD",
405
+ )
406
+
407
+ for regression in regressions:
408
+ print(f"Commit {regression.sha[:8]}: +{regression.power_increase:.1f}%")
409
+ ```
410
+
411
+ #### Version Power Comparison
412
+
413
+ ```python
414
+ from greenmining.analyzers import VersionPowerAnalyzer
415
+
416
+ analyzer = VersionPowerAnalyzer(
417
+ test_command="pytest tests/",
418
+ energy_backend="rapl",
419
+ iterations=10,
420
+ warmup_iterations=2,
421
+ )
422
+
423
+ report = analyzer.analyze_versions(
424
+ repo_path="/path/to/repo",
425
+ versions=["v1.0", "v1.1", "v1.2", "v2.0"],
426
+ )
427
+
428
+ print(report.summary())
429
+ print(f"Trend: {report.trend}")
430
+ print(f"Most efficient: {report.most_efficient}")
431
+ ```
432
+
433
+ #### Metrics-to-Power Correlation
434
+
435
+ ```python
436
+ from greenmining.analyzers import MetricsPowerCorrelator
437
+
438
+ correlator = MetricsPowerCorrelator()
439
+ correlator.fit(
440
+ metrics=["complexity", "nloc", "code_churn"],
441
+ metrics_values={
442
+ "complexity": [10, 20, 30, 40],
443
+ "nloc": [100, 200, 300, 400],
444
+ "code_churn": [50, 100, 150, 200],
445
+ },
446
+ power_measurements=[5.0, 8.0, 12.0, 15.0],
447
+ )
448
+
449
+ print(f"Pearson: {correlator.pearson}")
450
+ print(f"Spearman: {correlator.spearman}")
451
+ print(f"Feature importance: {correlator.feature_importance}")
452
+ ```
453
+
454
+ #### Web Dashboard
455
+
456
+ ```python
457
+ from greenmining.dashboard import run_dashboard
458
+
459
+ # Launch interactive dashboard (requires pip install greenmining[dashboard])
460
+ run_dashboard(data_dir="./data", host="127.0.0.1", port=5000)
461
+ ```
462
+
463
+ #### Pipeline Batch Analysis
334
464
 
335
465
  ```python
336
466
  from greenmining.controllers.repository_controller import RepositoryController
@@ -551,17 +681,24 @@ config = Config(
551
681
 
552
682
  ### Core Capabilities
553
683
 
554
- - **Pattern Detection**: Automatically identifies 122 sustainability patterns across 15 categories
555
- - **Keyword Analysis**: Scans commit messages using 321 green software keywords
556
- - **Custom Repository Fetching**: Fetch repositories with custom search keywords (not limited to microservices)
557
- - **Repository Analysis**: Analyzes repositories from GitHub with flexible filtering
558
- - **Batch Processing**: Analyze hundreds of repositories and thousands of commits
559
- - **Multi-format Output**: Generates Markdown reports, CSV exports, and JSON data
560
- - **Statistical Analysis**: Calculates green-awareness metrics, pattern distribution, and trends
684
+ - **Pattern Detection**: 122 sustainability patterns across 15 categories from the GSF catalog
685
+ - **Keyword Analysis**: 321 green software detection keywords
686
+ - **Repository Fetching**: GraphQL API with date, star, and language filters
687
+ - **URL-Based Analysis**: Direct PyDriller analysis from GitHub URLs (HTTPS and SSH)
688
+ - **Batch Processing**: Parallel analysis of multiple repositories with configurable workers
689
+ - **Private Repository Support**: Authentication via SSH keys or GitHub tokens
690
+ - **Energy Measurement**: RAPL, CodeCarbon, and CPU Energy Meter backends
691
+ - **Carbon Footprint Reporting**: CO2 emissions with 20+ country profiles and cloud region support (AWS, GCP, Azure)
692
+ - **Power Regression Detection**: Identify commits that increased energy consumption
693
+ - **Metrics-to-Power Correlation**: Pearson and Spearman analysis between code metrics and power
694
+ - **Version Power Comparison**: Compare power consumption across software versions with trend detection
695
+ - **Method-Level Analysis**: Per-method complexity metrics via Lizard integration
696
+ - **Source Code Access**: Before/after source code for refactoring detection
697
+ - **Full Process Metrics**: All 8 PyDriller process metrics (ChangeSet, CodeChurn, CommitsCount, ContributorsCount, ContributorsExperience, HistoryComplexity, HunksCount, LinesCount)
698
+ - **Statistical Analysis**: Correlations, effect sizes, and temporal trends
699
+ - **Multi-format Output**: Markdown reports, CSV exports, JSON data
700
+ - **Web Dashboard**: Flask-based interactive visualization (`pip install greenmining[dashboard]`)
561
701
  - **Docker Support**: Pre-built images for containerized analysis
562
- - **Programmatic API**: Full Python API for custom workflows and integrations
563
- - **Clean Architecture**: Modular design with services layer (Fetcher, Extractor, Analyzer, Aggregator, Reports)
564
- - **Energy Measurement**: Real-time energy consumption tracking via RAPL (Linux) or CodeCarbon (cross-platform)
565
702
 
566
703
  ### Energy Measurement
567
704
 
@@ -712,8 +849,15 @@ ruff check greenmining/ tests/
712
849
  - Python 3.9+
713
850
  - PyGithub >= 2.1.1
714
851
  - PyDriller >= 2.5
715
- - pandas >= 2.2.0
716
- - codecarbon >= 2.0.0 (optional, for cross-platform energy measurement)
852
+ - pandas >= 2.2.0
853
+
854
+ **Optional dependencies:**
855
+
856
+ ```bash
857
+ pip install greenmining[energy] # psutil, codecarbon (energy measurement)
858
+ pip install greenmining[dashboard] # flask (web dashboard)
859
+ pip install greenmining[dev] # pytest, black, ruff, mypy (development)
860
+ ```
717
861
 
718
862
  ## License
719
863
 
@@ -272,7 +272,137 @@ print(f"Top patterns: {stats['top_patterns'][:5]}")
272
272
  aggregator.export_to_csv(results, "output.csv")
273
273
  ```
274
274
 
275
- #### Batch Analysis
275
+ #### URL-Based Repository Analysis
276
+
277
+ ```python
278
+ from greenmining.services.local_repo_analyzer import LocalRepoAnalyzer
279
+
280
+ analyzer = LocalRepoAnalyzer(
281
+ max_commits=200,
282
+ cleanup_after=True,
283
+ )
284
+
285
+ result = analyzer.analyze_repository("https://github.com/pallets/flask")
286
+
287
+ print(f"Repository: {result.name}")
288
+ print(f"Commits analyzed: {result.total_commits}")
289
+ print(f"Green-aware: {result.green_commits} ({result.green_commit_rate:.1%})")
290
+
291
+ for commit in result.commits[:5]:
292
+ if commit.green_aware:
293
+ print(f" {commit.message[:60]}...")
294
+ ```
295
+
296
+ #### Batch Analysis with Parallelism
297
+
298
+ ```python
299
+ from greenmining import analyze_repositories
300
+
301
+ results = analyze_repositories(
302
+ urls=[
303
+ "https://github.com/kubernetes/kubernetes",
304
+ "https://github.com/istio/istio",
305
+ "https://github.com/envoyproxy/envoy",
306
+ ],
307
+ max_commits=100,
308
+ parallel_workers=3,
309
+ energy_tracking=True,
310
+ energy_backend="auto",
311
+ )
312
+
313
+ for result in results:
314
+ print(f"{result.name}: {result.green_commit_rate:.1%} green")
315
+ ```
316
+
317
+ #### Private Repository Analysis
318
+
319
+ ```python
320
+ from greenmining.services.local_repo_analyzer import LocalRepoAnalyzer
321
+
322
+ # HTTPS with token
323
+ analyzer = LocalRepoAnalyzer(github_token="ghp_xxxx")
324
+ result = analyzer.analyze_repository("https://github.com/company/private-repo")
325
+
326
+ # SSH with key
327
+ analyzer = LocalRepoAnalyzer(ssh_key_path="~/.ssh/id_rsa")
328
+ result = analyzer.analyze_repository("git@github.com:company/private-repo.git")
329
+ ```
330
+
331
+ #### Power Regression Detection
332
+
333
+ ```python
334
+ from greenmining.analyzers import PowerRegressionDetector
335
+
336
+ detector = PowerRegressionDetector(
337
+ test_command="pytest tests/ -x",
338
+ energy_backend="rapl",
339
+ threshold_percent=5.0,
340
+ iterations=5,
341
+ )
342
+
343
+ regressions = detector.detect(
344
+ repo_path="/path/to/repo",
345
+ baseline_commit="v1.0.0",
346
+ target_commit="HEAD",
347
+ )
348
+
349
+ for regression in regressions:
350
+ print(f"Commit {regression.sha[:8]}: +{regression.power_increase:.1f}%")
351
+ ```
352
+
353
+ #### Version Power Comparison
354
+
355
+ ```python
356
+ from greenmining.analyzers import VersionPowerAnalyzer
357
+
358
+ analyzer = VersionPowerAnalyzer(
359
+ test_command="pytest tests/",
360
+ energy_backend="rapl",
361
+ iterations=10,
362
+ warmup_iterations=2,
363
+ )
364
+
365
+ report = analyzer.analyze_versions(
366
+ repo_path="/path/to/repo",
367
+ versions=["v1.0", "v1.1", "v1.2", "v2.0"],
368
+ )
369
+
370
+ print(report.summary())
371
+ print(f"Trend: {report.trend}")
372
+ print(f"Most efficient: {report.most_efficient}")
373
+ ```
374
+
375
+ #### Metrics-to-Power Correlation
376
+
377
+ ```python
378
+ from greenmining.analyzers import MetricsPowerCorrelator
379
+
380
+ correlator = MetricsPowerCorrelator()
381
+ correlator.fit(
382
+ metrics=["complexity", "nloc", "code_churn"],
383
+ metrics_values={
384
+ "complexity": [10, 20, 30, 40],
385
+ "nloc": [100, 200, 300, 400],
386
+ "code_churn": [50, 100, 150, 200],
387
+ },
388
+ power_measurements=[5.0, 8.0, 12.0, 15.0],
389
+ )
390
+
391
+ print(f"Pearson: {correlator.pearson}")
392
+ print(f"Spearman: {correlator.spearman}")
393
+ print(f"Feature importance: {correlator.feature_importance}")
394
+ ```
395
+
396
+ #### Web Dashboard
397
+
398
+ ```python
399
+ from greenmining.dashboard import run_dashboard
400
+
401
+ # Launch interactive dashboard (requires pip install greenmining[dashboard])
402
+ run_dashboard(data_dir="./data", host="127.0.0.1", port=5000)
403
+ ```
404
+
405
+ #### Pipeline Batch Analysis
276
406
 
277
407
  ```python
278
408
  from greenmining.controllers.repository_controller import RepositoryController
@@ -493,17 +623,24 @@ config = Config(
493
623
 
494
624
  ### Core Capabilities
495
625
 
496
- - **Pattern Detection**: Automatically identifies 122 sustainability patterns across 15 categories
497
- - **Keyword Analysis**: Scans commit messages using 321 green software keywords
498
- - **Custom Repository Fetching**: Fetch repositories with custom search keywords (not limited to microservices)
499
- - **Repository Analysis**: Analyzes repositories from GitHub with flexible filtering
500
- - **Batch Processing**: Analyze hundreds of repositories and thousands of commits
501
- - **Multi-format Output**: Generates Markdown reports, CSV exports, and JSON data
502
- - **Statistical Analysis**: Calculates green-awareness metrics, pattern distribution, and trends
626
+ - **Pattern Detection**: 122 sustainability patterns across 15 categories from the GSF catalog
627
+ - **Keyword Analysis**: 321 green software detection keywords
628
+ - **Repository Fetching**: GraphQL API with date, star, and language filters
629
+ - **URL-Based Analysis**: Direct PyDriller analysis from GitHub URLs (HTTPS and SSH)
630
+ - **Batch Processing**: Parallel analysis of multiple repositories with configurable workers
631
+ - **Private Repository Support**: Authentication via SSH keys or GitHub tokens
632
+ - **Energy Measurement**: RAPL, CodeCarbon, and CPU Energy Meter backends
633
+ - **Carbon Footprint Reporting**: CO2 emissions with 20+ country profiles and cloud region support (AWS, GCP, Azure)
634
+ - **Power Regression Detection**: Identify commits that increased energy consumption
635
+ - **Metrics-to-Power Correlation**: Pearson and Spearman analysis between code metrics and power
636
+ - **Version Power Comparison**: Compare power consumption across software versions with trend detection
637
+ - **Method-Level Analysis**: Per-method complexity metrics via Lizard integration
638
+ - **Source Code Access**: Before/after source code for refactoring detection
639
+ - **Full Process Metrics**: All 8 PyDriller process metrics (ChangeSet, CodeChurn, CommitsCount, ContributorsCount, ContributorsExperience, HistoryComplexity, HunksCount, LinesCount)
640
+ - **Statistical Analysis**: Correlations, effect sizes, and temporal trends
641
+ - **Multi-format Output**: Markdown reports, CSV exports, JSON data
642
+ - **Web Dashboard**: Flask-based interactive visualization (`pip install greenmining[dashboard]`)
503
643
  - **Docker Support**: Pre-built images for containerized analysis
504
- - **Programmatic API**: Full Python API for custom workflows and integrations
505
- - **Clean Architecture**: Modular design with services layer (Fetcher, Extractor, Analyzer, Aggregator, Reports)
506
- - **Energy Measurement**: Real-time energy consumption tracking via RAPL (Linux) or CodeCarbon (cross-platform)
507
644
 
508
645
  ### Energy Measurement
509
646
 
@@ -654,8 +791,15 @@ ruff check greenmining/ tests/
654
791
  - Python 3.9+
655
792
  - PyGithub >= 2.1.1
656
793
  - PyDriller >= 2.5
657
- - pandas >= 2.2.0
658
- - codecarbon >= 2.0.0 (optional, for cross-platform energy measurement)
794
+ - pandas >= 2.2.0
795
+
796
+ **Optional dependencies:**
797
+
798
+ ```bash
799
+ pip install greenmining[energy] # psutil, codecarbon (energy measurement)
800
+ pip install greenmining[dashboard] # flask (web dashboard)
801
+ pip install greenmining[dev] # pytest, black, ruff, mypy (development)
802
+ ```
659
803
 
660
804
  ## License
661
805
 
@@ -9,7 +9,7 @@ from greenmining.gsf_patterns import (
9
9
  is_green_aware,
10
10
  )
11
11
 
12
- __version__ = "1.0.6"
12
+ __version__ = "1.0.7"
13
13
 
14
14
 
15
15
  def fetch_repositories(
@@ -18,6 +18,10 @@ def fetch_repositories(
18
18
  min_stars: int = None,
19
19
  languages: list = None,
20
20
  keywords: str = None,
21
+ created_after: str = None,
22
+ created_before: str = None,
23
+ pushed_after: str = None,
24
+ pushed_before: str = None,
21
25
  ):
22
26
  # Fetch repositories from GitHub with custom search keywords.
23
27
  config = Config()
@@ -29,6 +33,10 @@ def fetch_repositories(
29
33
  min_stars=min_stars,
30
34
  languages=languages,
31
35
  keywords=keywords,
36
+ created_after=created_after,
37
+ created_before=created_before,
38
+ pushed_after=pushed_after,
39
+ pushed_before=pushed_before,
32
40
  )
33
41
 
34
42
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: greenmining
3
- Version: 1.0.6
3
+ Version: 1.0.7
4
4
  Summary: An empirical Python library for Mining Software Repositories (MSR) in Green IT research
5
5
  Author-email: Adam Bouafia <a.bouafia@student.vu.nl>
6
6
  License: MIT
@@ -330,7 +330,137 @@ print(f"Top patterns: {stats['top_patterns'][:5]}")
330
330
  aggregator.export_to_csv(results, "output.csv")
331
331
  ```
332
332
 
333
- #### Batch Analysis
333
+ #### URL-Based Repository Analysis
334
+
335
+ ```python
336
+ from greenmining.services.local_repo_analyzer import LocalRepoAnalyzer
337
+
338
+ analyzer = LocalRepoAnalyzer(
339
+ max_commits=200,
340
+ cleanup_after=True,
341
+ )
342
+
343
+ result = analyzer.analyze_repository("https://github.com/pallets/flask")
344
+
345
+ print(f"Repository: {result.name}")
346
+ print(f"Commits analyzed: {result.total_commits}")
347
+ print(f"Green-aware: {result.green_commits} ({result.green_commit_rate:.1%})")
348
+
349
+ for commit in result.commits[:5]:
350
+ if commit.green_aware:
351
+ print(f" {commit.message[:60]}...")
352
+ ```
353
+
354
+ #### Batch Analysis with Parallelism
355
+
356
+ ```python
357
+ from greenmining import analyze_repositories
358
+
359
+ results = analyze_repositories(
360
+ urls=[
361
+ "https://github.com/kubernetes/kubernetes",
362
+ "https://github.com/istio/istio",
363
+ "https://github.com/envoyproxy/envoy",
364
+ ],
365
+ max_commits=100,
366
+ parallel_workers=3,
367
+ energy_tracking=True,
368
+ energy_backend="auto",
369
+ )
370
+
371
+ for result in results:
372
+ print(f"{result.name}: {result.green_commit_rate:.1%} green")
373
+ ```
374
+
375
+ #### Private Repository Analysis
376
+
377
+ ```python
378
+ from greenmining.services.local_repo_analyzer import LocalRepoAnalyzer
379
+
380
+ # HTTPS with token
381
+ analyzer = LocalRepoAnalyzer(github_token="ghp_xxxx")
382
+ result = analyzer.analyze_repository("https://github.com/company/private-repo")
383
+
384
+ # SSH with key
385
+ analyzer = LocalRepoAnalyzer(ssh_key_path="~/.ssh/id_rsa")
386
+ result = analyzer.analyze_repository("git@github.com:company/private-repo.git")
387
+ ```
388
+
389
+ #### Power Regression Detection
390
+
391
+ ```python
392
+ from greenmining.analyzers import PowerRegressionDetector
393
+
394
+ detector = PowerRegressionDetector(
395
+ test_command="pytest tests/ -x",
396
+ energy_backend="rapl",
397
+ threshold_percent=5.0,
398
+ iterations=5,
399
+ )
400
+
401
+ regressions = detector.detect(
402
+ repo_path="/path/to/repo",
403
+ baseline_commit="v1.0.0",
404
+ target_commit="HEAD",
405
+ )
406
+
407
+ for regression in regressions:
408
+ print(f"Commit {regression.sha[:8]}: +{regression.power_increase:.1f}%")
409
+ ```
410
+
411
+ #### Version Power Comparison
412
+
413
+ ```python
414
+ from greenmining.analyzers import VersionPowerAnalyzer
415
+
416
+ analyzer = VersionPowerAnalyzer(
417
+ test_command="pytest tests/",
418
+ energy_backend="rapl",
419
+ iterations=10,
420
+ warmup_iterations=2,
421
+ )
422
+
423
+ report = analyzer.analyze_versions(
424
+ repo_path="/path/to/repo",
425
+ versions=["v1.0", "v1.1", "v1.2", "v2.0"],
426
+ )
427
+
428
+ print(report.summary())
429
+ print(f"Trend: {report.trend}")
430
+ print(f"Most efficient: {report.most_efficient}")
431
+ ```
432
+
433
+ #### Metrics-to-Power Correlation
434
+
435
+ ```python
436
+ from greenmining.analyzers import MetricsPowerCorrelator
437
+
438
+ correlator = MetricsPowerCorrelator()
439
+ correlator.fit(
440
+ metrics=["complexity", "nloc", "code_churn"],
441
+ metrics_values={
442
+ "complexity": [10, 20, 30, 40],
443
+ "nloc": [100, 200, 300, 400],
444
+ "code_churn": [50, 100, 150, 200],
445
+ },
446
+ power_measurements=[5.0, 8.0, 12.0, 15.0],
447
+ )
448
+
449
+ print(f"Pearson: {correlator.pearson}")
450
+ print(f"Spearman: {correlator.spearman}")
451
+ print(f"Feature importance: {correlator.feature_importance}")
452
+ ```
453
+
454
+ #### Web Dashboard
455
+
456
+ ```python
457
+ from greenmining.dashboard import run_dashboard
458
+
459
+ # Launch interactive dashboard (requires pip install greenmining[dashboard])
460
+ run_dashboard(data_dir="./data", host="127.0.0.1", port=5000)
461
+ ```
462
+
463
+ #### Pipeline Batch Analysis
334
464
 
335
465
  ```python
336
466
  from greenmining.controllers.repository_controller import RepositoryController
@@ -551,17 +681,24 @@ config = Config(
551
681
 
552
682
  ### Core Capabilities
553
683
 
554
- - **Pattern Detection**: Automatically identifies 122 sustainability patterns across 15 categories
555
- - **Keyword Analysis**: Scans commit messages using 321 green software keywords
556
- - **Custom Repository Fetching**: Fetch repositories with custom search keywords (not limited to microservices)
557
- - **Repository Analysis**: Analyzes repositories from GitHub with flexible filtering
558
- - **Batch Processing**: Analyze hundreds of repositories and thousands of commits
559
- - **Multi-format Output**: Generates Markdown reports, CSV exports, and JSON data
560
- - **Statistical Analysis**: Calculates green-awareness metrics, pattern distribution, and trends
684
+ - **Pattern Detection**: 122 sustainability patterns across 15 categories from the GSF catalog
685
+ - **Keyword Analysis**: 321 green software detection keywords
686
+ - **Repository Fetching**: GraphQL API with date, star, and language filters
687
+ - **URL-Based Analysis**: Direct PyDriller analysis from GitHub URLs (HTTPS and SSH)
688
+ - **Batch Processing**: Parallel analysis of multiple repositories with configurable workers
689
+ - **Private Repository Support**: Authentication via SSH keys or GitHub tokens
690
+ - **Energy Measurement**: RAPL, CodeCarbon, and CPU Energy Meter backends
691
+ - **Carbon Footprint Reporting**: CO2 emissions with 20+ country profiles and cloud region support (AWS, GCP, Azure)
692
+ - **Power Regression Detection**: Identify commits that increased energy consumption
693
+ - **Metrics-to-Power Correlation**: Pearson and Spearman analysis between code metrics and power
694
+ - **Version Power Comparison**: Compare power consumption across software versions with trend detection
695
+ - **Method-Level Analysis**: Per-method complexity metrics via Lizard integration
696
+ - **Source Code Access**: Before/after source code for refactoring detection
697
+ - **Full Process Metrics**: All 8 PyDriller process metrics (ChangeSet, CodeChurn, CommitsCount, ContributorsCount, ContributorsExperience, HistoryComplexity, HunksCount, LinesCount)
698
+ - **Statistical Analysis**: Correlations, effect sizes, and temporal trends
699
+ - **Multi-format Output**: Markdown reports, CSV exports, JSON data
700
+ - **Web Dashboard**: Flask-based interactive visualization (`pip install greenmining[dashboard]`)
561
701
  - **Docker Support**: Pre-built images for containerized analysis
562
- - **Programmatic API**: Full Python API for custom workflows and integrations
563
- - **Clean Architecture**: Modular design with services layer (Fetcher, Extractor, Analyzer, Aggregator, Reports)
564
- - **Energy Measurement**: Real-time energy consumption tracking via RAPL (Linux) or CodeCarbon (cross-platform)
565
702
 
566
703
  ### Energy Measurement
567
704
 
@@ -712,8 +849,15 @@ ruff check greenmining/ tests/
712
849
  - Python 3.9+
713
850
  - PyGithub >= 2.1.1
714
851
  - PyDriller >= 2.5
715
- - pandas >= 2.2.0
716
- - codecarbon >= 2.0.0 (optional, for cross-platform energy measurement)
852
+ - pandas >= 2.2.0
853
+
854
+ **Optional dependencies:**
855
+
856
+ ```bash
857
+ pip install greenmining[energy] # psutil, codecarbon (energy measurement)
858
+ pip install greenmining[dashboard] # flask (web dashboard)
859
+ pip install greenmining[dev] # pytest, black, ruff, mypy (development)
860
+ ```
717
861
 
718
862
  ## License
719
863
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "greenmining"
7
- version = "1.0.6"
7
+ version = "1.0.7"
8
8
  description = "An empirical Python library for Mining Software Repositories (MSR) in Green IT research"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
File without changes
File without changes
File without changes
File without changes
File without changes