greenmining 1.0.2__py3-none-any.whl → 1.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- greenmining/__init__.py +11 -29
- greenmining/__main__.py +9 -3
- greenmining/__version__.py +2 -2
- greenmining/analyzers/__init__.py +3 -7
- greenmining/analyzers/code_diff_analyzer.py +151 -61
- greenmining/analyzers/qualitative_analyzer.py +15 -81
- greenmining/analyzers/statistical_analyzer.py +8 -69
- greenmining/analyzers/temporal_analyzer.py +16 -72
- greenmining/config.py +105 -58
- greenmining/controllers/__init__.py +1 -5
- greenmining/controllers/repository_controller.py +153 -94
- greenmining/energy/__init__.py +13 -0
- greenmining/energy/base.py +165 -0
- greenmining/energy/codecarbon_meter.py +146 -0
- greenmining/energy/rapl.py +157 -0
- greenmining/gsf_patterns.py +4 -26
- greenmining/models/__init__.py +1 -5
- greenmining/models/aggregated_stats.py +4 -4
- greenmining/models/analysis_result.py +4 -4
- greenmining/models/commit.py +5 -5
- greenmining/models/repository.py +5 -5
- greenmining/presenters/__init__.py +1 -5
- greenmining/presenters/console_presenter.py +24 -24
- greenmining/services/__init__.py +10 -6
- greenmining/services/commit_extractor.py +8 -152
- greenmining/services/data_aggregator.py +45 -175
- greenmining/services/data_analyzer.py +9 -202
- greenmining/services/github_fetcher.py +212 -323
- greenmining/services/github_graphql_fetcher.py +371 -0
- greenmining/services/local_repo_analyzer.py +387 -0
- greenmining/services/reports.py +33 -137
- greenmining/utils.py +21 -149
- {greenmining-1.0.2.dist-info → greenmining-1.0.4.dist-info}/METADATA +169 -146
- greenmining-1.0.4.dist-info/RECORD +37 -0
- {greenmining-1.0.2.dist-info → greenmining-1.0.4.dist-info}/WHEEL +1 -1
- greenmining/analyzers/ml_feature_extractor.py +0 -512
- greenmining/analyzers/nlp_analyzer.py +0 -365
- greenmining/cli.py +0 -471
- greenmining/main.py +0 -37
- greenmining-1.0.2.dist-info/RECORD +0 -36
- greenmining-1.0.2.dist-info/entry_points.txt +0 -2
- {greenmining-1.0.2.dist-info → greenmining-1.0.4.dist-info}/licenses/LICENSE +0 -0
- {greenmining-1.0.2.dist-info → greenmining-1.0.4.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: greenmining
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.4
|
|
4
4
|
Summary: Analyze GitHub repositories to identify green software engineering patterns and energy-efficient practices
|
|
5
5
|
Author-email: Adam Bouafia <a.bouafia@student.vu.nl>
|
|
6
6
|
License: MIT
|
|
@@ -23,20 +23,19 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
23
23
|
Classifier: Programming Language :: Python :: 3.12
|
|
24
24
|
Classifier: Programming Language :: Python :: 3.13
|
|
25
25
|
Classifier: Operating System :: OS Independent
|
|
26
|
-
Classifier: Environment :: Console
|
|
27
26
|
Requires-Python: >=3.9
|
|
28
27
|
Description-Content-Type: text/markdown
|
|
29
28
|
License-File: LICENSE
|
|
30
29
|
Requires-Dist: PyGithub>=2.1.1
|
|
31
30
|
Requires-Dist: PyDriller>=2.5
|
|
32
31
|
Requires-Dist: pandas>=2.2.0
|
|
33
|
-
Requires-Dist: click>=8.1.7
|
|
34
32
|
Requires-Dist: colorama>=0.4.6
|
|
35
33
|
Requires-Dist: tabulate>=0.9.0
|
|
36
34
|
Requires-Dist: tqdm>=4.66.0
|
|
37
35
|
Requires-Dist: matplotlib>=3.8.0
|
|
38
36
|
Requires-Dist: plotly>=5.18.0
|
|
39
37
|
Requires-Dist: python-dotenv>=1.0.0
|
|
38
|
+
Requires-Dist: requests>=2.31.0
|
|
40
39
|
Provides-Extra: dev
|
|
41
40
|
Requires-Dist: pytest>=7.4.0; extra == "dev"
|
|
42
41
|
Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
|
|
@@ -44,7 +43,7 @@ Requires-Dist: pytest-mock>=3.12.0; extra == "dev"
|
|
|
44
43
|
Requires-Dist: black>=23.12.0; extra == "dev"
|
|
45
44
|
Requires-Dist: ruff>=0.1.9; extra == "dev"
|
|
46
45
|
Requires-Dist: mypy>=1.8.0; extra == "dev"
|
|
47
|
-
Requires-Dist: build>=1.0.
|
|
46
|
+
Requires-Dist: build>=1.0.4; extra == "dev"
|
|
48
47
|
Requires-Dist: twine>=4.0.2; extra == "dev"
|
|
49
48
|
Provides-Extra: docs
|
|
50
49
|
Requires-Dist: sphinx>=7.2.0; extra == "docs"
|
|
@@ -88,37 +87,6 @@ docker pull adambouafia/greenmining:latest
|
|
|
88
87
|
|
|
89
88
|
## Quick Start
|
|
90
89
|
|
|
91
|
-
### CLI Usage
|
|
92
|
-
|
|
93
|
-
```bash
|
|
94
|
-
# Set your GitHub token
|
|
95
|
-
export GITHUB_TOKEN="your_github_token"
|
|
96
|
-
|
|
97
|
-
# Run full analysis pipeline
|
|
98
|
-
greenmining pipeline --max-repos 100
|
|
99
|
-
|
|
100
|
-
# Fetch repositories with custom keywords
|
|
101
|
-
greenmining fetch --max-repos 100 --min-stars 100 --keywords "kubernetes docker cloud-native"
|
|
102
|
-
|
|
103
|
-
# Fetch with default (microservices)
|
|
104
|
-
greenmining fetch --max-repos 100 --min-stars 100
|
|
105
|
-
|
|
106
|
-
# Extract commits
|
|
107
|
-
greenmining extract --max-commits 50
|
|
108
|
-
|
|
109
|
-
# Analyze for green patterns
|
|
110
|
-
greenmining analyze
|
|
111
|
-
|
|
112
|
-
# Analyze with advanced features
|
|
113
|
-
greenmining analyze --enable-nlp --enable-ml-features --enable-diff-analysis
|
|
114
|
-
|
|
115
|
-
# Aggregate results with temporal analysis
|
|
116
|
-
greenmining aggregate --enable-temporal --temporal-granularity quarter --enable-enhanced-stats
|
|
117
|
-
|
|
118
|
-
# Generate report
|
|
119
|
-
greenmining report
|
|
120
|
-
```
|
|
121
|
-
|
|
122
90
|
### Python API
|
|
123
91
|
|
|
124
92
|
#### Basic Pattern Detection
|
|
@@ -197,23 +165,10 @@ extractor = CommitExtractor(
|
|
|
197
165
|
# Initialize analyzer with advanced features
|
|
198
166
|
analyzer = DataAnalyzer(
|
|
199
167
|
enable_diff_analysis=False, # Enable code diff analysis (slower but more accurate)
|
|
200
|
-
enable_nlp=True, # Enable NLP-enhanced pattern detection
|
|
201
|
-
enable_ml_features=True, # Enable ML feature extraction
|
|
202
168
|
patterns=None, # Custom pattern dict (default: GSF_PATTERNS)
|
|
203
169
|
batch_size=10 # Batch processing size (default: 10)
|
|
204
170
|
)
|
|
205
171
|
|
|
206
|
-
# Optional: Configure NLP analyzer separately
|
|
207
|
-
nlp_analyzer = NLPAnalyzer(
|
|
208
|
-
enable_stemming=True, # Enable morphological analysis (optimize→optimizing)
|
|
209
|
-
enable_synonyms=True # Enable semantic synonym matching (cache→buffer)
|
|
210
|
-
)
|
|
211
|
-
|
|
212
|
-
# Optional: Configure ML feature extractor
|
|
213
|
-
ml_extractor = MLFeatureExtractor(
|
|
214
|
-
green_keywords=None # Custom keyword list (default: built-in 19 keywords)
|
|
215
|
-
)
|
|
216
|
-
|
|
217
172
|
# Extract commits from first repo
|
|
218
173
|
commits = extractor.extract_commits(
|
|
219
174
|
repository=repos[0], # PyGithub Repository object
|
|
@@ -229,18 +184,9 @@ commits = extractor.extract_commits(
|
|
|
229
184
|
|
|
230
185
|
**DataAnalyzer Parameters:**
|
|
231
186
|
- `enable_diff_analysis` (bool, default=False): Enable code diff analysis (slower)
|
|
232
|
-
- `enable_nlp` (bool, default=False): Enable NLP-enhanced pattern detection
|
|
233
|
-
- `enable_ml_features` (bool, default=False): Enable ML feature extraction
|
|
234
187
|
- `patterns` (dict, optional): Custom pattern dictionary (default: GSF_PATTERNS)
|
|
235
188
|
- `batch_size` (int, default=10): Number of commits to process in each batch
|
|
236
189
|
|
|
237
|
-
**NLPAnalyzer Parameters:**
|
|
238
|
-
- `enable_stemming` (bool, default=True): Enable morphological variant matching
|
|
239
|
-
- `enable_synonyms` (bool, default=True): Enable semantic synonym expansion
|
|
240
|
-
|
|
241
|
-
**MLFeatureExtractor Parameters:**
|
|
242
|
-
- `green_keywords` (list[str], optional): Custom green keywords list
|
|
243
|
-
|
|
244
190
|
# Analyze commits for green patterns
|
|
245
191
|
results = []
|
|
246
192
|
for commit in commits:
|
|
@@ -306,7 +252,7 @@ from greenmining.analyzers.qualitative_analyzer import QualitativeAnalyzer
|
|
|
306
252
|
# Initialize aggregator with all advanced features
|
|
307
253
|
aggregator = DataAggregator(
|
|
308
254
|
config=None, # Config object (optional)
|
|
309
|
-
|
|
255
|
+
enable_stats=True, # Enable statistical analysis (correlations, trends)
|
|
310
256
|
enable_temporal=True, # Enable temporal trend analysis
|
|
311
257
|
temporal_granularity="quarter" # Time granularity: day/week/month/quarter/year
|
|
312
258
|
)
|
|
@@ -330,7 +276,7 @@ aggregated = aggregator.aggregate(
|
|
|
330
276
|
|
|
331
277
|
**DataAggregator Parameters:**
|
|
332
278
|
- `config` (Config, optional): Configuration object
|
|
333
|
-
- `
|
|
279
|
+
- `enable_stats` (bool, default=False): Enable pattern correlations and effect size analysis
|
|
334
280
|
- `enable_temporal` (bool, default=False): Enable temporal trend analysis over time
|
|
335
281
|
- `temporal_granularity` (str, default="quarter"): Time granularity (day/week/month/quarter/year)
|
|
336
282
|
|
|
@@ -397,20 +343,127 @@ controller.generate_report()
|
|
|
397
343
|
print("Analysis complete! Check data/ directory for results.")
|
|
398
344
|
```
|
|
399
345
|
|
|
346
|
+
#### Complete Working Example: Full Pipeline
|
|
347
|
+
|
|
348
|
+
This is a complete, production-ready example that demonstrates the entire analysis pipeline. This example successfully analyzed 100 repositories with 30,543 commits in our testing.
|
|
349
|
+
|
|
350
|
+
```python
|
|
351
|
+
import os
|
|
352
|
+
from pathlib import Path
|
|
353
|
+
from dotenv import load_dotenv
|
|
354
|
+
|
|
355
|
+
# Load environment variables
|
|
356
|
+
load_dotenv()
|
|
357
|
+
|
|
358
|
+
# Import from greenmining package
|
|
359
|
+
from greenmining import fetch_repositories
|
|
360
|
+
from greenmining.services.commit_extractor import CommitExtractor
|
|
361
|
+
from greenmining.services.data_analyzer import DataAnalyzer
|
|
362
|
+
from greenmining.services.data_aggregator import DataAggregator
|
|
363
|
+
|
|
364
|
+
# Configuration
|
|
365
|
+
token = os.getenv("GITHUB_TOKEN")
|
|
366
|
+
output_dir = Path("results")
|
|
367
|
+
output_dir.mkdir(exist_ok=True)
|
|
368
|
+
|
|
369
|
+
# STAGE 1: Fetch Repositories
|
|
370
|
+
print("Fetching repositories...")
|
|
371
|
+
repositories = fetch_repositories(
|
|
372
|
+
github_token=token,
|
|
373
|
+
max_repos=100,
|
|
374
|
+
min_stars=10,
|
|
375
|
+
keywords="software engineering",
|
|
376
|
+
)
|
|
377
|
+
print(f"✓ Fetched {len(repositories)} repositories")
|
|
378
|
+
|
|
379
|
+
# STAGE 2: Extract Commits
|
|
380
|
+
print("\nExtracting commits...")
|
|
381
|
+
extractor = CommitExtractor(
|
|
382
|
+
github_token=token,
|
|
383
|
+
max_commits=1000,
|
|
384
|
+
skip_merges=True,
|
|
385
|
+
days_back=730,
|
|
386
|
+
timeout=120,
|
|
387
|
+
)
|
|
388
|
+
all_commits = extractor.extract_from_repositories(repositories)
|
|
389
|
+
print(f"✓ Extracted {len(all_commits)} commits")
|
|
390
|
+
|
|
391
|
+
# Save commits
|
|
392
|
+
extractor.save_results(
|
|
393
|
+
all_commits,
|
|
394
|
+
output_dir / "commits.json",
|
|
395
|
+
len(repositories)
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
# STAGE 3: Analyze Commits
|
|
399
|
+
print("\nAnalyzing commits...")
|
|
400
|
+
analyzer = DataAnalyzer(
|
|
401
|
+
enable_diff_analysis=False, # Set to True for detailed code analysis (slower)
|
|
402
|
+
)
|
|
403
|
+
analyzed_commits = analyzer.analyze_commits(all_commits)
|
|
404
|
+
|
|
405
|
+
# Count green-aware commits
|
|
406
|
+
green_count = sum(1 for c in analyzed_commits if c.get("green_aware", False))
|
|
407
|
+
green_percentage = (green_count / len(analyzed_commits) * 100) if analyzed_commits else 0
|
|
408
|
+
print(f"✓ Analyzed {len(analyzed_commits)} commits")
|
|
409
|
+
print(f"✓ Green-aware: {green_count} ({green_percentage:.1f}%)")
|
|
410
|
+
|
|
411
|
+
# Save analysis
|
|
412
|
+
analyzer.save_results(analyzed_commits, output_dir / "analyzed.json")
|
|
413
|
+
|
|
414
|
+
# STAGE 4: Aggregate Results
|
|
415
|
+
print("\nAggregating results...")
|
|
416
|
+
aggregator = DataAggregator(
|
|
417
|
+
enable_stats=True,
|
|
418
|
+
enable_temporal=True,
|
|
419
|
+
temporal_granularity="quarter",
|
|
420
|
+
)
|
|
421
|
+
results = aggregator.aggregate(analyzed_commits, repositories)
|
|
422
|
+
|
|
423
|
+
# STAGE 5: Save Results
|
|
424
|
+
print("\nSaving results...")
|
|
425
|
+
aggregator.save_results(
|
|
426
|
+
results,
|
|
427
|
+
output_dir / "aggregated.json",
|
|
428
|
+
output_dir / "aggregated.csv",
|
|
429
|
+
analyzed_commits
|
|
430
|
+
)
|
|
431
|
+
|
|
432
|
+
# Print summary
|
|
433
|
+
print("\n" + "="*80)
|
|
434
|
+
print("ANALYSIS COMPLETE")
|
|
435
|
+
print("="*80)
|
|
436
|
+
aggregator.print_summary(results)
|
|
437
|
+
print(f"\n📁 Results saved in: {output_dir.absolute()}")
|
|
438
|
+
```
|
|
439
|
+
|
|
440
|
+
**What this example does:**
|
|
441
|
+
|
|
442
|
+
1. **Fetches repositories** from GitHub based on keywords and filters
|
|
443
|
+
2. **Extracts commits** from each repository (up to 1000 per repo)
|
|
444
|
+
3. **Analyzes commits** for green software patterns
|
|
445
|
+
4. **Aggregates results** with temporal analysis and statistics
|
|
446
|
+
5. **Saves results** to JSON and CSV files for further analysis
|
|
447
|
+
|
|
448
|
+
**Expected output files:**
|
|
449
|
+
- `commits.json` - All extracted commits with metadata
|
|
450
|
+
- `analyzed.json` - Commits analyzed for green patterns
|
|
451
|
+
- `aggregated.json` - Summary statistics and pattern distributions
|
|
452
|
+
- `aggregated.csv` - Tabular format for spreadsheet analysis
|
|
453
|
+
- `metadata.json` - Experiment configuration and timing
|
|
454
|
+
|
|
455
|
+
**Performance:** This pipeline successfully processed 100 repositories (30,543 commits) in approximately 6.4 hours, identifying 7,600 green-aware commits (24.9%).
|
|
456
|
+
|
|
400
457
|
### Docker Usage
|
|
401
458
|
|
|
402
459
|
```bash
|
|
403
|
-
#
|
|
404
|
-
docker run -v $(pwd)/data:/app/data \
|
|
405
|
-
adambouafia/greenmining:latest
|
|
460
|
+
# Interactive shell with Python
|
|
461
|
+
docker run -it -v $(pwd)/data:/app/data \
|
|
462
|
+
adambouafia/greenmining:latest python
|
|
406
463
|
|
|
407
|
-
#
|
|
408
|
-
docker run -v $(pwd)
|
|
409
|
-
|
|
410
|
-
adambouafia/greenmining:latest pipeline --max-repos 50
|
|
411
|
-
|
|
412
|
-
# Interactive shell
|
|
413
|
-
docker run -it adambouafia/greenmining:latest /bin/bash
|
|
464
|
+
# Run Python script
|
|
465
|
+
docker run -v $(pwd)/data:/app/data \
|
|
466
|
+
adambouafia/greenmining:latest python your_script.py
|
|
414
467
|
```
|
|
415
468
|
|
|
416
469
|
## Configuration
|
|
@@ -436,14 +489,12 @@ EXCLUDE_BOT_COMMITS=true
|
|
|
436
489
|
|
|
437
490
|
# Optional - Analysis Features
|
|
438
491
|
ENABLE_DIFF_ANALYSIS=false
|
|
439
|
-
ENABLE_NLP=true
|
|
440
|
-
ENABLE_ML_FEATURES=true
|
|
441
492
|
BATCH_SIZE=10
|
|
442
493
|
|
|
443
494
|
# Optional - Temporal Analysis
|
|
444
495
|
ENABLE_TEMPORAL=true
|
|
445
496
|
TEMPORAL_GRANULARITY=quarter
|
|
446
|
-
|
|
497
|
+
ENABLE_STATS=true
|
|
447
498
|
|
|
448
499
|
# Optional - Output
|
|
449
500
|
OUTPUT_DIR=./data
|
|
@@ -473,14 +524,12 @@ config = Config(
|
|
|
473
524
|
|
|
474
525
|
# Analysis Options
|
|
475
526
|
enable_diff_analysis=False, # Enable code diff analysis
|
|
476
|
-
enable_nlp=True, # Enable NLP features
|
|
477
|
-
enable_ml_features=True, # Enable ML feature extraction
|
|
478
527
|
batch_size=10, # Batch processing size
|
|
479
528
|
|
|
480
529
|
# Temporal Analysis
|
|
481
530
|
enable_temporal=True, # Enable temporal trend analysis
|
|
482
531
|
temporal_granularity="quarter", # day/week/month/quarter/year
|
|
483
|
-
|
|
532
|
+
enable_stats=True, # Enable statistical analysis
|
|
484
533
|
|
|
485
534
|
# Output Configuration
|
|
486
535
|
output_dir="./data", # Output directory path
|
|
@@ -506,6 +555,50 @@ config = Config(
|
|
|
506
555
|
- **Docker Support**: Pre-built images for containerized analysis
|
|
507
556
|
- **Programmatic API**: Full Python API for custom workflows and integrations
|
|
508
557
|
- **Clean Architecture**: Modular design with services layer (Fetcher, Extractor, Analyzer, Aggregator, Reports)
|
|
558
|
+
- **Energy Measurement**: Real-time energy consumption tracking via RAPL (Linux) or CodeCarbon (cross-platform)
|
|
559
|
+
|
|
560
|
+
### Energy Measurement
|
|
561
|
+
|
|
562
|
+
greenmining includes built-in energy measurement capabilities for tracking the carbon footprint of your analysis:
|
|
563
|
+
|
|
564
|
+
#### Backend Options
|
|
565
|
+
|
|
566
|
+
| Backend | Platform | Metrics | Requirements |
|
|
567
|
+
|---------|----------|---------|--------------|
|
|
568
|
+
| **RAPL** | Linux (Intel/AMD) | CPU/RAM energy (Joules) | `/sys/class/powercap/` access |
|
|
569
|
+
| **CodeCarbon** | Cross-platform | Energy + Carbon emissions (gCO2) | `pip install codecarbon` |
|
|
570
|
+
|
|
571
|
+
#### Python API
|
|
572
|
+
|
|
573
|
+
```python
|
|
574
|
+
from greenmining.energy import RAPLEnergyMeter, CodeCarbonMeter
|
|
575
|
+
|
|
576
|
+
# RAPL (Linux only)
|
|
577
|
+
rapl = RAPLEnergyMeter()
|
|
578
|
+
if rapl.is_available():
|
|
579
|
+
rapl.start()
|
|
580
|
+
# ... run analysis ...
|
|
581
|
+
result = rapl.stop()
|
|
582
|
+
print(f"Energy: {result.energy_joules:.2f} J")
|
|
583
|
+
|
|
584
|
+
# CodeCarbon (cross-platform)
|
|
585
|
+
cc = CodeCarbonMeter()
|
|
586
|
+
if cc.is_available():
|
|
587
|
+
cc.start()
|
|
588
|
+
# ... run analysis ...
|
|
589
|
+
result = cc.stop()
|
|
590
|
+
print(f"Energy: {result.energy_joules:.2f} J")
|
|
591
|
+
print(f"Carbon: {result.carbon_grams:.4f} gCO2")
|
|
592
|
+
```
|
|
593
|
+
|
|
594
|
+
#### Experiment Results
|
|
595
|
+
|
|
596
|
+
CodeCarbon was verified with a real experiment:
|
|
597
|
+
- **Repository**: flask (pallets/flask)
|
|
598
|
+
- **Commits analyzed**: 10
|
|
599
|
+
- **Energy measured**: 160.6 J
|
|
600
|
+
- **Carbon emissions**: 0.0119 gCO2
|
|
601
|
+
- **Duration**: 11.28 seconds
|
|
509
602
|
|
|
510
603
|
### Pattern Database
|
|
511
604
|
|
|
@@ -570,77 +663,6 @@ Alpine containers, Infrastructure as Code, renewable energy regions, container o
|
|
|
570
663
|
### 15. General (8 patterns)
|
|
571
664
|
Feature flags, incremental processing, precomputation, background jobs, workflow optimization
|
|
572
665
|
|
|
573
|
-
## CLI Commands
|
|
574
|
-
|
|
575
|
-
| Command | Description | Key Options |
|
|
576
|
-
|---------|-------------|-------------|
|
|
577
|
-
| `fetch` | Fetch repositories from GitHub with custom keywords | `--max-repos`, `--min-stars`, `--languages`, `--keywords` |
|
|
578
|
-
| `extract` | Extract commit history from repositories | `--max-commits` per repository |
|
|
579
|
-
| `analyze` | Analyze commits for green patterns | `--enable-nlp`, `--enable-ml-features`, `--enable-diff-analysis` |
|
|
580
|
-
| `aggregate` | Aggregate analysis results | `--enable-temporal`, `--temporal-granularity`, `--enable-enhanced-stats` |
|
|
581
|
-
| `report` | Generate comprehensive report | Creates Markdown and CSV outputs |
|
|
582
|
-
| `pipeline` | Run complete analysis pipeline | `--max-repos`, `--max-commits` (all-in-one) |
|
|
583
|
-
| `status` | Show current analysis status | Displays progress and file statistics |
|
|
584
|
-
|
|
585
|
-
### Command Details
|
|
586
|
-
|
|
587
|
-
#### Fetch Repositories
|
|
588
|
-
```bash
|
|
589
|
-
# Fetch with custom search keywords
|
|
590
|
-
greenmining fetch --max-repos 100 --min-stars 50 --languages Python --keywords "kubernetes docker"
|
|
591
|
-
|
|
592
|
-
# Fetch microservices (default)
|
|
593
|
-
greenmining fetch --max-repos 100 --min-stars 50 --languages Python
|
|
594
|
-
```
|
|
595
|
-
Options:
|
|
596
|
-
- `--max-repos`: Maximum repositories to fetch (default: 100)
|
|
597
|
-
- `--min-stars`: Minimum GitHub stars (default: 100)
|
|
598
|
-
- `--languages`: Filter by programming languages (default: "Python,Java,Go,JavaScript,TypeScript")
|
|
599
|
-
- `--keywords`: Custom search keywords (default: "microservices")
|
|
600
|
-
|
|
601
|
-
#### Extract Commits
|
|
602
|
-
```bash
|
|
603
|
-
greenmining extract --max-commits 50
|
|
604
|
-
```
|
|
605
|
-
Options:
|
|
606
|
-
- `--max-commits`: Maximum commits per repository (default: 50)
|
|
607
|
-
|
|
608
|
-
#### Analyze Commits (with Advanced Features)
|
|
609
|
-
```bash
|
|
610
|
-
# Basic analysis
|
|
611
|
-
greenmining analyze
|
|
612
|
-
|
|
613
|
-
# Advanced analysis with all features
|
|
614
|
-
greenmining analyze --enable-nlp --enable-ml-features --enable-diff-analysis --batch-size 20
|
|
615
|
-
```
|
|
616
|
-
Options:
|
|
617
|
-
- `--batch-size`: Batch size for processing (default: 10)
|
|
618
|
-
- `--enable-diff-analysis`: Enable code diff analysis (slower but more accurate)
|
|
619
|
-
- `--enable-nlp`: Enable NLP-enhanced pattern detection with morphological variants and synonyms
|
|
620
|
-
- `--enable-ml-features`: Enable ML feature extraction for model training
|
|
621
|
-
|
|
622
|
-
#### Aggregate Results (with Temporal Analysis)
|
|
623
|
-
```bash
|
|
624
|
-
# Basic aggregation
|
|
625
|
-
greenmining aggregate
|
|
626
|
-
|
|
627
|
-
# Advanced aggregation with temporal trends
|
|
628
|
-
greenmining aggregate --enable-temporal --temporal-granularity quarter --enable-enhanced-stats
|
|
629
|
-
```
|
|
630
|
-
Options:
|
|
631
|
-
- `--enable-enhanced-stats`: Enable enhanced statistical analysis (correlations, effect sizes)
|
|
632
|
-
- `--enable-temporal`: Enable temporal trend analysis
|
|
633
|
-
- `--temporal-granularity`: Time period granularity (choices: day, week, month, quarter, year)
|
|
634
|
-
|
|
635
|
-
#### Run Pipeline
|
|
636
|
-
```bash
|
|
637
|
-
greenmining pipeline --max-repos 50 --max-commits 100
|
|
638
|
-
```
|
|
639
|
-
Options:
|
|
640
|
-
- `--max-repos`: Repositories to analyze
|
|
641
|
-
- `--max-commits`: Commits per repository
|
|
642
|
-
- Executes: fetch → extract → analyze → aggregate → report
|
|
643
|
-
|
|
644
666
|
## Output Files
|
|
645
667
|
|
|
646
668
|
All outputs are saved to the `data/` directory:
|
|
@@ -680,6 +702,7 @@ ruff check greenmining/ tests/
|
|
|
680
702
|
- PyDriller >= 2.5
|
|
681
703
|
- pandas >= 2.2.0
|
|
682
704
|
- click >= 8.1.7
|
|
705
|
+
- codecarbon >= 2.0.0 (optional, for cross-platform energy measurement)
|
|
683
706
|
|
|
684
707
|
## License
|
|
685
708
|
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
greenmining/__init__.py,sha256=m1foz4CUfEgBsgyWHEjehsT3qt1vogyer1fjhIVFqjg,992
|
|
2
|
+
greenmining/__main__.py,sha256=NYOVS7D4w2XDLn6SyXHXPKE5GrNGOeoWSTb_KazgK5c,590
|
|
3
|
+
greenmining/__version__.py,sha256=QpL6VzwpRtOZiXqutOetZBUCoiJKYFeTosucueZ4gpQ,62
|
|
4
|
+
greenmining/config.py,sha256=00v1Ln8eZE0RxMrLxvonf8XOWqeRYaIme_iC1yDLR90,8228
|
|
5
|
+
greenmining/gsf_patterns.py,sha256=hnd9GuWB8GEflrusEib5hjvl8CD5TSbGcBtb0gfxFp4,54193
|
|
6
|
+
greenmining/utils.py,sha256=dSFwQzQwbS8rYZSgwLIxM_geLqbldwqVOIXMqEg08Qs,5609
|
|
7
|
+
greenmining/analyzers/__init__.py,sha256=VI22zb3TnhZrwEuBy0J3pIbqNVVZl1wx8NYRlhR6Wok,362
|
|
8
|
+
greenmining/analyzers/code_diff_analyzer.py,sha256=1dk68R3O0RZG8gx1cm9B_UlZ1Uwyb_Q3oScRbCVx4tM,10950
|
|
9
|
+
greenmining/analyzers/qualitative_analyzer.py,sha256=RcjOMLj_DPH869ey9J0uI7JK_krCefMhNkPLOJUDFF8,15391
|
|
10
|
+
greenmining/analyzers/statistical_analyzer.py,sha256=DzWAcCyw42Ig3FIxTwPPBikgt2uzMdktxklonOYfnOk,7166
|
|
11
|
+
greenmining/analyzers/temporal_analyzer.py,sha256=JfTcAoI20oCFMehGrSRnDqhJTXI-RUbdCTMwDOTW9-g,14259
|
|
12
|
+
greenmining/controllers/__init__.py,sha256=UiAT6zBvC1z_9cJWfzq1cLA0I4r9b2vURHipj8oDczI,180
|
|
13
|
+
greenmining/controllers/repository_controller.py,sha256=fyL6Y8xpoixDplP4_rKWiwak42M2DaIihzyKVaBlivA,9680
|
|
14
|
+
greenmining/energy/__init__.py,sha256=Y9RkNuZ3T6npEBxOZJhVc8wy6feXQePdXojLaZxkfGM,308
|
|
15
|
+
greenmining/energy/base.py,sha256=s0yyRDhnEkrkCE5cgp2yHOrIhKbCpU9V7n4Rf1ejWLM,5559
|
|
16
|
+
greenmining/energy/codecarbon_meter.py,sha256=z4T63qunEsU2R2qylZdGCtk3e-y_HYaBjBMD3nuFRU0,5102
|
|
17
|
+
greenmining/energy/rapl.py,sha256=nZoVmdZshSsFLEYWNQxYyNg8fhhzgNME02bpQuIQL_U,5584
|
|
18
|
+
greenmining/models/__init__.py,sha256=2hkB0quhMePvvA1AkYfj5uiF_HyGtXVxn0BU-5m_oSg,302
|
|
19
|
+
greenmining/models/aggregated_stats.py,sha256=N-ZGcQO7IJ33Joa8luMVjtHhKYzNe48VW8hFqs9a5Jc,1016
|
|
20
|
+
greenmining/models/analysis_result.py,sha256=YICTCEcrJxZ1R8Xaio3AZOjCGwMzC_62BMAL0J_XY1w,1509
|
|
21
|
+
greenmining/models/commit.py,sha256=mnRDWSiIyGtJeGXI8sav9hukWUyVFpoNe6GixRlZjY4,2439
|
|
22
|
+
greenmining/models/repository.py,sha256=SKjS01onOptpMioumtAPZxKpKheHAeVXnXyvatl7CfM,2856
|
|
23
|
+
greenmining/presenters/__init__.py,sha256=d1CMtqtUAHYHYNzigPyjtGOUtnH1drtUwf7-bFQq2B8,138
|
|
24
|
+
greenmining/presenters/console_presenter.py,sha256=XOahvlcr4qLbUdhk8cGq1ZWagvemEd3Wgriu8T5EI3s,4896
|
|
25
|
+
greenmining/services/__init__.py,sha256=UhjS2X9x2v5iH991UDPazP3dTPuSgylMq4kQJaueQYs,481
|
|
26
|
+
greenmining/services/commit_extractor.py,sha256=3EfUVBwd8hGSbl7pS-_jAL8gX8RxIASXTX5EZBbKQPI,8387
|
|
27
|
+
greenmining/services/data_aggregator.py,sha256=TsFT0oGOnnHk0QGZ1tT6ZhKGc5X1H1D1u7-7OpiPo7Y,19566
|
|
28
|
+
greenmining/services/data_analyzer.py,sha256=f0nlJkPAclHHCzzTyQW5bjhYrgE0XXiR1x7_o3fJaDs,9732
|
|
29
|
+
greenmining/services/github_fetcher.py,sha256=WFyowC0tROKAhP3bA4QXH7QIYf42yaZf2ePpUEvOZ6s,8266
|
|
30
|
+
greenmining/services/github_graphql_fetcher.py,sha256=mQOUZZEltYWusA7TR6Q7BZrHZ-N1Ijt0Wg2kV5T7lFM,11958
|
|
31
|
+
greenmining/services/local_repo_analyzer.py,sha256=IrfqY1L6peGO78zufEj4uAU1N7nskc0edAYVzE0Ew_w,14785
|
|
32
|
+
greenmining/services/reports.py,sha256=7Smc7a4KtpmkAJ8UoMlzH5BZerC_iO_jMyQw3_42n1s,23387
|
|
33
|
+
greenmining-1.0.4.dist-info/licenses/LICENSE,sha256=M7ma3JHGeiIZIs3ea0HTcFl_wLFPX2NZElUliYs4bCA,1083
|
|
34
|
+
greenmining-1.0.4.dist-info/METADATA,sha256=rP_4u0fkgp_Bmq2txIpBTLa7ZMj3By2ylTjoej7RSEw,25610
|
|
35
|
+
greenmining-1.0.4.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
36
|
+
greenmining-1.0.4.dist-info/top_level.txt,sha256=nreXgXxZIWI-42yQknQ0HXtUrFnzZ8N1ra4Mdy2KcsI,12
|
|
37
|
+
greenmining-1.0.4.dist-info/RECORD,,
|