greenmining 1.0.3__py3-none-any.whl → 1.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- greenmining/__init__.py +11 -29
- greenmining/__main__.py +9 -3
- greenmining/__version__.py +2 -2
- greenmining/analyzers/__init__.py +3 -7
- greenmining/analyzers/code_diff_analyzer.py +151 -61
- greenmining/analyzers/qualitative_analyzer.py +15 -81
- greenmining/analyzers/statistical_analyzer.py +8 -69
- greenmining/analyzers/temporal_analyzer.py +16 -72
- greenmining/config.py +105 -58
- greenmining/controllers/__init__.py +1 -5
- greenmining/controllers/repository_controller.py +153 -94
- greenmining/energy/__init__.py +13 -0
- greenmining/energy/base.py +165 -0
- greenmining/energy/codecarbon_meter.py +146 -0
- greenmining/energy/rapl.py +157 -0
- greenmining/gsf_patterns.py +4 -26
- greenmining/models/__init__.py +1 -5
- greenmining/models/aggregated_stats.py +4 -4
- greenmining/models/analysis_result.py +4 -4
- greenmining/models/commit.py +5 -5
- greenmining/models/repository.py +5 -5
- greenmining/presenters/__init__.py +1 -5
- greenmining/presenters/console_presenter.py +24 -24
- greenmining/services/__init__.py +10 -6
- greenmining/services/commit_extractor.py +8 -152
- greenmining/services/data_aggregator.py +45 -175
- greenmining/services/data_analyzer.py +9 -202
- greenmining/services/github_fetcher.py +212 -323
- greenmining/services/github_graphql_fetcher.py +371 -0
- greenmining/services/local_repo_analyzer.py +387 -0
- greenmining/services/reports.py +33 -137
- greenmining/utils.py +21 -149
- {greenmining-1.0.3.dist-info → greenmining-1.0.4.dist-info}/METADATA +61 -151
- greenmining-1.0.4.dist-info/RECORD +37 -0
- {greenmining-1.0.3.dist-info → greenmining-1.0.4.dist-info}/WHEEL +1 -1
- greenmining/analyzers/ml_feature_extractor.py +0 -512
- greenmining/analyzers/nlp_analyzer.py +0 -365
- greenmining/cli.py +0 -471
- greenmining/main.py +0 -37
- greenmining-1.0.3.dist-info/RECORD +0 -36
- greenmining-1.0.3.dist-info/entry_points.txt +0 -2
- {greenmining-1.0.3.dist-info → greenmining-1.0.4.dist-info}/licenses/LICENSE +0 -0
- {greenmining-1.0.3.dist-info → greenmining-1.0.4.dist-info}/top_level.txt +0 -0
|
@@ -1,18 +1,4 @@
|
|
|
1
|
-
|
|
2
|
-
Temporal and Historical Analysis for Green Software Practices
|
|
3
|
-
|
|
4
|
-
Implements time-series analysis from Soliman et al. (2017):
|
|
5
|
-
- Adoption trend analysis (when practices emerged)
|
|
6
|
-
- Velocity analysis (commit frequency over time)
|
|
7
|
-
- Pattern evolution tracking (which practices dominated when)
|
|
8
|
-
- Temporal correlations (do practices cluster in time?)
|
|
9
|
-
|
|
10
|
-
Addresses research questions:
|
|
11
|
-
1. When did green practices emerge in software development?
|
|
12
|
-
2. Are green practices increasing or decreasing over time?
|
|
13
|
-
3. Which practices were early vs. late adopters?
|
|
14
|
-
4. Do green practices correlate with project maturity?
|
|
15
|
-
"""
|
|
1
|
+
# Temporal and Historical Analysis for Green Software Practices
|
|
16
2
|
|
|
17
3
|
from __future__ import annotations
|
|
18
4
|
|
|
@@ -25,7 +11,7 @@ import statistics
|
|
|
25
11
|
|
|
26
12
|
@dataclass
|
|
27
13
|
class TemporalMetrics:
|
|
28
|
-
|
|
14
|
+
# Metrics for a specific time period
|
|
29
15
|
|
|
30
16
|
period: str
|
|
31
17
|
start_date: datetime
|
|
@@ -40,7 +26,7 @@ class TemporalMetrics:
|
|
|
40
26
|
|
|
41
27
|
@dataclass
|
|
42
28
|
class TrendAnalysis:
|
|
43
|
-
|
|
29
|
+
# Trend analysis results
|
|
44
30
|
|
|
45
31
|
trend_direction: str # 'increasing', 'decreasing', 'stable'
|
|
46
32
|
slope: float
|
|
@@ -51,35 +37,16 @@ class TrendAnalysis:
|
|
|
51
37
|
|
|
52
38
|
|
|
53
39
|
class TemporalAnalyzer:
|
|
54
|
-
|
|
55
|
-
Analyze temporal patterns in green software adoption.
|
|
56
|
-
|
|
57
|
-
Based on Soliman et al.: "Time-range filtering is standard practice"
|
|
58
|
-
Extends with: trend detection, velocity analysis, evolution tracking
|
|
59
|
-
"""
|
|
40
|
+
# Analyze temporal patterns in green software adoption.
|
|
60
41
|
|
|
61
42
|
def __init__(self, granularity: str = "quarter"):
|
|
62
|
-
|
|
63
|
-
Initialize temporal analyzer.
|
|
64
|
-
|
|
65
|
-
Args:
|
|
66
|
-
granularity: Time period granularity ('day', 'week', 'month', 'quarter', 'year')
|
|
67
|
-
"""
|
|
43
|
+
# Initialize temporal analyzer.
|
|
68
44
|
self.granularity = granularity
|
|
69
45
|
|
|
70
46
|
def group_commits_by_period(
|
|
71
47
|
self, commits: List[Dict], date_field: str = "date"
|
|
72
48
|
) -> Dict[str, List[Dict]]:
|
|
73
|
-
|
|
74
|
-
Group commits into time periods.
|
|
75
|
-
|
|
76
|
-
Args:
|
|
77
|
-
commits: List of commit dictionaries
|
|
78
|
-
date_field: Field containing commit date
|
|
79
|
-
|
|
80
|
-
Returns:
|
|
81
|
-
Dictionary mapping period strings to commit lists
|
|
82
|
-
"""
|
|
49
|
+
# Group commits into time periods.
|
|
83
50
|
periods = defaultdict(list)
|
|
84
51
|
|
|
85
52
|
for commit in commits:
|
|
@@ -103,7 +70,7 @@ class TemporalAnalyzer:
|
|
|
103
70
|
return dict(periods)
|
|
104
71
|
|
|
105
72
|
def _get_period_key(self, date: datetime) -> str:
|
|
106
|
-
|
|
73
|
+
# Get period key for a date based on granularity.
|
|
107
74
|
if self.granularity == "day":
|
|
108
75
|
return date.strftime("%Y-%m-%d")
|
|
109
76
|
elif self.granularity == "week":
|
|
@@ -120,7 +87,7 @@ class TemporalAnalyzer:
|
|
|
120
87
|
return date.strftime("%Y-%m")
|
|
121
88
|
|
|
122
89
|
def _parse_period_key(self, period_key: str) -> Tuple[datetime, datetime]:
|
|
123
|
-
|
|
90
|
+
# Parse period key back to start and end dates.
|
|
124
91
|
if "W" in period_key:
|
|
125
92
|
# Week format: 2024-W15
|
|
126
93
|
year, week = period_key.split("-W")
|
|
@@ -173,17 +140,7 @@ class TemporalAnalyzer:
|
|
|
173
140
|
def calculate_period_metrics(
|
|
174
141
|
self, period_key: str, commits: List[Dict], analysis_results: List[Dict]
|
|
175
142
|
) -> TemporalMetrics:
|
|
176
|
-
|
|
177
|
-
Calculate metrics for a time period.
|
|
178
|
-
|
|
179
|
-
Args:
|
|
180
|
-
period_key: Period identifier
|
|
181
|
-
commits: Commits in this period
|
|
182
|
-
analysis_results: Pattern analysis results for commits
|
|
183
|
-
|
|
184
|
-
Returns:
|
|
185
|
-
TemporalMetrics object
|
|
186
|
-
"""
|
|
143
|
+
# Calculate metrics for a time period.
|
|
187
144
|
start_date, end_date = self._parse_period_key(period_key)
|
|
188
145
|
|
|
189
146
|
# Count green commits
|
|
@@ -229,20 +186,7 @@ class TemporalAnalyzer:
|
|
|
229
186
|
)
|
|
230
187
|
|
|
231
188
|
def analyze_trends(self, commits: List[Dict], analysis_results: List[Dict]) -> Dict:
|
|
232
|
-
|
|
233
|
-
Comprehensive temporal trend analysis.
|
|
234
|
-
|
|
235
|
-
Args:
|
|
236
|
-
commits: All commits to analyze
|
|
237
|
-
analysis_results: Pattern analysis results
|
|
238
|
-
|
|
239
|
-
Returns:
|
|
240
|
-
Dictionary with:
|
|
241
|
-
- periods: List of TemporalMetrics
|
|
242
|
-
- trend: TrendAnalysis
|
|
243
|
-
- adoption_curve: List of (period, cumulative_rate)
|
|
244
|
-
- velocity_trend: Velocity change over time
|
|
245
|
-
"""
|
|
189
|
+
# Comprehensive temporal trend analysis.
|
|
246
190
|
# Group by periods
|
|
247
191
|
grouped = self.group_commits_by_period(commits)
|
|
248
192
|
|
|
@@ -284,7 +228,7 @@ class TemporalAnalyzer:
|
|
|
284
228
|
}
|
|
285
229
|
|
|
286
230
|
def _calculate_trend(self, periods: List[TemporalMetrics]) -> Optional[TrendAnalysis]:
|
|
287
|
-
|
|
231
|
+
# Calculate linear trend using least squares regression.
|
|
288
232
|
if len(periods) < 2:
|
|
289
233
|
return None
|
|
290
234
|
|
|
@@ -332,7 +276,7 @@ class TemporalAnalyzer:
|
|
|
332
276
|
)
|
|
333
277
|
|
|
334
278
|
def _calculate_adoption_curve(self, periods: List[TemporalMetrics]) -> List[Tuple[str, float]]:
|
|
335
|
-
|
|
279
|
+
# Calculate cumulative adoption over time.
|
|
336
280
|
cumulative_green = 0
|
|
337
281
|
cumulative_total = 0
|
|
338
282
|
curve = []
|
|
@@ -348,7 +292,7 @@ class TemporalAnalyzer:
|
|
|
348
292
|
return curve
|
|
349
293
|
|
|
350
294
|
def _calculate_velocity_trend(self, periods: List[TemporalMetrics]) -> Dict:
|
|
351
|
-
|
|
295
|
+
# Analyze velocity changes over time.
|
|
352
296
|
if not periods:
|
|
353
297
|
return {}
|
|
354
298
|
|
|
@@ -365,7 +309,7 @@ class TemporalAnalyzer:
|
|
|
365
309
|
def _analyze_pattern_evolution(
|
|
366
310
|
self, periods: List[TemporalMetrics], analysis_results: List[Dict]
|
|
367
311
|
) -> Dict:
|
|
368
|
-
|
|
312
|
+
# Track when different patterns emerged and dominated.
|
|
369
313
|
pattern_timeline = defaultdict(lambda: {"first_seen": None, "occurrences_by_period": {}})
|
|
370
314
|
|
|
371
315
|
for period in periods:
|
|
@@ -406,7 +350,7 @@ class TemporalAnalyzer:
|
|
|
406
350
|
}
|
|
407
351
|
|
|
408
352
|
def _metrics_to_dict(self, metrics: TemporalMetrics) -> Dict:
|
|
409
|
-
|
|
353
|
+
# Convert TemporalMetrics to dictionary.
|
|
410
354
|
return {
|
|
411
355
|
"period": metrics.period,
|
|
412
356
|
"start_date": metrics.start_date.isoformat(),
|
|
@@ -420,7 +364,7 @@ class TemporalAnalyzer:
|
|
|
420
364
|
}
|
|
421
365
|
|
|
422
366
|
def _trend_to_dict(self, trend: Optional[TrendAnalysis]) -> Dict:
|
|
423
|
-
|
|
367
|
+
# Convert TrendAnalysis to dictionary.
|
|
424
368
|
if not trend:
|
|
425
369
|
return {}
|
|
426
370
|
|
greenmining/config.py
CHANGED
|
@@ -1,72 +1,93 @@
|
|
|
1
|
-
"""Configuration management for green microservices mining CLI."""
|
|
2
|
-
|
|
3
1
|
import os
|
|
4
2
|
from pathlib import Path
|
|
3
|
+
from typing import Any, Dict, List, Optional
|
|
5
4
|
|
|
6
5
|
from dotenv import load_dotenv
|
|
7
6
|
|
|
8
7
|
|
|
9
|
-
|
|
10
|
-
|
|
8
|
+
def _load_yaml_config(yaml_path: Path) -> Dict[str, Any]:
|
|
9
|
+
# Load configuration from YAML file if it exists.
|
|
10
|
+
if not yaml_path.exists():
|
|
11
|
+
return {}
|
|
12
|
+
try:
|
|
13
|
+
import yaml
|
|
14
|
+
with open(yaml_path, 'r') as f:
|
|
15
|
+
return yaml.safe_load(f) or {}
|
|
16
|
+
except ImportError:
|
|
17
|
+
return {}
|
|
18
|
+
except Exception:
|
|
19
|
+
return {}
|
|
11
20
|
|
|
12
|
-
def __init__(self, env_file: str = ".env"):
|
|
13
|
-
"""Initialize configuration from environment file.
|
|
14
21
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
22
|
+
class Config:
|
|
23
|
+
# Configuration class for loading from env vars and YAML.
|
|
24
|
+
|
|
25
|
+
def __init__(self, env_file: str = ".env", yaml_file: str = "greenmining.yaml"):
|
|
26
|
+
# Initialize configuration from environment and YAML file.
|
|
18
27
|
# Load environment variables
|
|
19
28
|
env_path = Path(env_file)
|
|
20
29
|
if env_path.exists():
|
|
21
30
|
load_dotenv(env_path)
|
|
22
31
|
else:
|
|
23
32
|
load_dotenv() # Load from system environment
|
|
33
|
+
|
|
34
|
+
# Load YAML config (takes precedence for certain options)
|
|
35
|
+
yaml_path = Path(yaml_file)
|
|
36
|
+
self._yaml_config = _load_yaml_config(yaml_path)
|
|
24
37
|
|
|
25
38
|
# GitHub API Configuration
|
|
26
39
|
self.GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
|
|
27
40
|
if not self.GITHUB_TOKEN or self.GITHUB_TOKEN == "your_github_pat_here":
|
|
28
41
|
raise ValueError("GITHUB_TOKEN not set. Please set it in .env file or environment.")
|
|
29
42
|
|
|
30
|
-
# Analysis Type
|
|
43
|
+
# Analysis Type
|
|
31
44
|
self.ANALYSIS_TYPE = "keyword_heuristic"
|
|
32
45
|
|
|
33
|
-
# Search and Processing Configuration
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
"JavaScript",
|
|
41
|
-
|
|
42
|
-
"C#",
|
|
43
|
-
"Rust",
|
|
44
|
-
]
|
|
45
|
-
|
|
46
|
-
# Repository and Commit Limits
|
|
47
|
-
self.MIN_STARS = int(os.getenv("MIN_STARS", "100"))
|
|
48
|
-
self.MAX_REPOS = int(os.getenv("MAX_REPOS", "100"))
|
|
49
|
-
self.COMMITS_PER_REPO = int(os.getenv("COMMITS_PER_REPO", "50"))
|
|
50
|
-
self.DAYS_BACK = int(os.getenv("DAYS_BACK", "730")) # 2 years
|
|
46
|
+
# Search and Processing Configuration (YAML: sources.search.keywords)
|
|
47
|
+
yaml_search = self._yaml_config.get("sources", {}).get("search", {})
|
|
48
|
+
self.GITHUB_SEARCH_KEYWORDS = yaml_search.get("keywords",
|
|
49
|
+
["microservices", "microservice-architecture", "cloud-native"])
|
|
50
|
+
|
|
51
|
+
# Supported Languages (YAML: sources.search.languages)
|
|
52
|
+
self.SUPPORTED_LANGUAGES = yaml_search.get("languages", [
|
|
53
|
+
"Java", "Python", "Go", "JavaScript", "TypeScript", "C#", "Rust",
|
|
54
|
+
])
|
|
51
55
|
|
|
52
|
-
#
|
|
56
|
+
# Repository and Commit Limits (YAML: extraction.*)
|
|
57
|
+
yaml_extraction = self._yaml_config.get("extraction", {})
|
|
58
|
+
self.MIN_STARS = yaml_search.get("min_stars", int(os.getenv("MIN_STARS", "100")))
|
|
59
|
+
self.MAX_REPOS = int(os.getenv("MAX_REPOS", "100"))
|
|
60
|
+
self.COMMITS_PER_REPO = yaml_extraction.get("max_commits",
|
|
61
|
+
int(os.getenv("COMMITS_PER_REPO", "50")))
|
|
62
|
+
self.DAYS_BACK = yaml_extraction.get("days_back",
|
|
63
|
+
int(os.getenv("DAYS_BACK", "730")))
|
|
64
|
+
self.SKIP_MERGES = yaml_extraction.get("skip_merges", True)
|
|
65
|
+
|
|
66
|
+
# Analysis Configuration (YAML: analysis.*)
|
|
67
|
+
yaml_analysis = self._yaml_config.get("analysis", {})
|
|
53
68
|
self.ENABLE_NLP_ANALYSIS = os.getenv("ENABLE_NLP_ANALYSIS", "false").lower() == "true"
|
|
54
69
|
self.ENABLE_TEMPORAL_ANALYSIS = (
|
|
55
70
|
os.getenv("ENABLE_TEMPORAL_ANALYSIS", "false").lower() == "true"
|
|
56
71
|
)
|
|
57
|
-
self.TEMPORAL_GRANULARITY = os.getenv(
|
|
58
|
-
"TEMPORAL_GRANULARITY", "quarter"
|
|
59
|
-
) # day, week, month, quarter, year
|
|
72
|
+
self.TEMPORAL_GRANULARITY = os.getenv("TEMPORAL_GRANULARITY", "quarter")
|
|
60
73
|
self.ENABLE_ML_FEATURES = os.getenv("ENABLE_ML_FEATURES", "false").lower() == "true"
|
|
61
74
|
self.VALIDATION_SAMPLE_SIZE = int(os.getenv("VALIDATION_SAMPLE_SIZE", "30"))
|
|
62
|
-
|
|
63
|
-
#
|
|
64
|
-
self.
|
|
65
|
-
|
|
66
|
-
self.
|
|
67
|
-
|
|
68
|
-
self.
|
|
69
|
-
|
|
75
|
+
|
|
76
|
+
# PyDriller options (YAML: analysis.process_metrics, etc.)
|
|
77
|
+
self.PROCESS_METRICS_ENABLED = yaml_analysis.get("process_metrics",
|
|
78
|
+
os.getenv("PROCESS_METRICS_ENABLED", "true").lower() == "true")
|
|
79
|
+
self.STRUCTURAL_METRICS_ENABLED = yaml_analysis.get("structural_metrics",
|
|
80
|
+
os.getenv("STRUCTURAL_METRICS_ENABLED", "true").lower() == "true")
|
|
81
|
+
self.DMM_ENABLED = yaml_analysis.get("delta_maintainability",
|
|
82
|
+
os.getenv("DMM_ENABLED", "true").lower() == "true")
|
|
83
|
+
|
|
84
|
+
# Temporal Filtering
|
|
85
|
+
self.CREATED_AFTER = os.getenv("CREATED_AFTER")
|
|
86
|
+
self.CREATED_BEFORE = os.getenv("CREATED_BEFORE")
|
|
87
|
+
self.PUSHED_AFTER = os.getenv("PUSHED_AFTER")
|
|
88
|
+
self.PUSHED_BEFORE = os.getenv("PUSHED_BEFORE")
|
|
89
|
+
self.COMMIT_DATE_FROM = os.getenv("COMMIT_DATE_FROM")
|
|
90
|
+
self.COMMIT_DATE_TO = os.getenv("COMMIT_DATE_TO")
|
|
70
91
|
self.MIN_COMMITS = int(os.getenv("MIN_COMMITS", "0"))
|
|
71
92
|
self.ACTIVITY_WINDOW_DAYS = int(os.getenv("ACTIVITY_WINDOW_DAYS", "730"))
|
|
72
93
|
|
|
@@ -76,11 +97,13 @@ class Config:
|
|
|
76
97
|
# Processing Configuration
|
|
77
98
|
self.TIMEOUT_SECONDS = int(os.getenv("TIMEOUT_SECONDS", "30"))
|
|
78
99
|
self.MAX_RETRIES = int(os.getenv("MAX_RETRIES", "3"))
|
|
79
|
-
self.RETRY_DELAY = 2
|
|
100
|
+
self.RETRY_DELAY = 2
|
|
80
101
|
self.EXPONENTIAL_BACKOFF = True
|
|
81
102
|
|
|
82
|
-
# Output Configuration
|
|
83
|
-
|
|
103
|
+
# Output Configuration (YAML: output.directory)
|
|
104
|
+
yaml_output = self._yaml_config.get("output", {})
|
|
105
|
+
self.OUTPUT_DIR = Path(yaml_output.get("directory",
|
|
106
|
+
os.getenv("OUTPUT_DIR", "./data")))
|
|
84
107
|
self.OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
|
85
108
|
|
|
86
109
|
# File Paths
|
|
@@ -92,17 +115,48 @@ class Config:
|
|
|
92
115
|
self.REPORT_FILE = self.OUTPUT_DIR / "green_microservices_analysis.md"
|
|
93
116
|
self.CHECKPOINT_FILE = self.OUTPUT_DIR / "checkpoint.json"
|
|
94
117
|
|
|
118
|
+
# Direct Repository URL Support (YAML: sources.urls)
|
|
119
|
+
yaml_urls = self._yaml_config.get("sources", {}).get("urls", [])
|
|
120
|
+
env_urls = self._parse_repository_urls(os.getenv("REPOSITORY_URLS", ""))
|
|
121
|
+
self.REPOSITORY_URLS: List[str] = yaml_urls if yaml_urls else env_urls
|
|
122
|
+
|
|
123
|
+
# Clone path (YAML: extraction.clone_path)
|
|
124
|
+
self.CLONE_PATH = Path(yaml_extraction.get("clone_path",
|
|
125
|
+
os.getenv("CLONE_PATH", "/tmp/greenmining_repos")))
|
|
126
|
+
self.CLEANUP_AFTER_ANALYSIS = (
|
|
127
|
+
os.getenv("CLEANUP_AFTER_ANALYSIS", "true").lower() == "true"
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
# Energy Measurement (YAML: energy.*)
|
|
131
|
+
yaml_energy = self._yaml_config.get("energy", {})
|
|
132
|
+
self.ENERGY_ENABLED = yaml_energy.get("enabled",
|
|
133
|
+
os.getenv("ENERGY_ENABLED", "false").lower() == "true")
|
|
134
|
+
self.ENERGY_BACKEND = yaml_energy.get("backend",
|
|
135
|
+
os.getenv("ENERGY_BACKEND", "rapl"))
|
|
136
|
+
self.CARBON_TRACKING = yaml_energy.get("carbon_tracking",
|
|
137
|
+
os.getenv("CARBON_TRACKING", "false").lower() == "true")
|
|
138
|
+
self.COUNTRY_ISO = yaml_energy.get("country_iso",
|
|
139
|
+
os.getenv("COUNTRY_ISO", "USA"))
|
|
140
|
+
|
|
141
|
+
# Power profiling (YAML: energy.power_profiling.*)
|
|
142
|
+
yaml_power = yaml_energy.get("power_profiling", {})
|
|
143
|
+
self.POWER_PROFILING_ENABLED = yaml_power.get("enabled", False)
|
|
144
|
+
self.POWER_TEST_COMMAND = yaml_power.get("test_command", None)
|
|
145
|
+
self.POWER_REGRESSION_THRESHOLD = yaml_power.get("regression_threshold", 5.0)
|
|
146
|
+
|
|
95
147
|
# Logging
|
|
96
148
|
self.VERBOSE = os.getenv("VERBOSE", "false").lower() == "true"
|
|
97
149
|
self.LOG_FILE = self.OUTPUT_DIR / "mining.log"
|
|
98
150
|
|
|
99
|
-
def
|
|
100
|
-
|
|
151
|
+
def _parse_repository_urls(self, urls_str: str) -> List[str]:
|
|
152
|
+
# Parse comma-separated repository URLs from environment variable.
|
|
153
|
+
if not urls_str:
|
|
154
|
+
return []
|
|
155
|
+
return [url.strip() for url in urls_str.split(",") if url.strip()]
|
|
101
156
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
"""
|
|
105
|
-
required_attrs = ["GITHUB_TOKEN", "CLAUDE_API_KEY", "MAX_REPOS", "COMMITS_PER_REPO"]
|
|
157
|
+
def validate(self) -> bool:
|
|
158
|
+
# Validate that all required configuration is present.
|
|
159
|
+
required_attrs = ["GITHUB_TOKEN", "MAX_REPOS", "COMMITS_PER_REPO"]
|
|
106
160
|
|
|
107
161
|
for attr in required_attrs:
|
|
108
162
|
if not getattr(self, attr, None):
|
|
@@ -111,7 +165,7 @@ class Config:
|
|
|
111
165
|
return True
|
|
112
166
|
|
|
113
167
|
def __repr__(self) -> str:
|
|
114
|
-
|
|
168
|
+
# String representation of configuration (hiding sensitive data).
|
|
115
169
|
return (
|
|
116
170
|
f"Config("
|
|
117
171
|
f"MAX_REPOS={self.MAX_REPOS}, "
|
|
@@ -127,14 +181,7 @@ _config_instance = None
|
|
|
127
181
|
|
|
128
182
|
|
|
129
183
|
def get_config(env_file: str = ".env") -> Config:
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
Args:
|
|
133
|
-
env_file: Path to .env file
|
|
134
|
-
|
|
135
|
-
Returns:
|
|
136
|
-
Config instance
|
|
137
|
-
"""
|
|
184
|
+
# Get or create global configuration instance.
|
|
138
185
|
global _config_instance
|
|
139
186
|
if _config_instance is None:
|
|
140
187
|
_config_instance = Config(env_file)
|
|
@@ -1,8 +1,4 @@
|
|
|
1
|
-
|
|
2
|
-
Controllers Package - Business logic and orchestration for mining operations.
|
|
3
|
-
|
|
4
|
-
Controllers coordinate between models, services, and presenters following MCP architecture.
|
|
5
|
-
"""
|
|
1
|
+
# Controllers Package - Business logic and orchestration for mining operations.
|
|
6
2
|
|
|
7
3
|
from .repository_controller import RepositoryController
|
|
8
4
|
|