greenmining 1.0.3__py3-none-any.whl → 1.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. greenmining/__init__.py +11 -29
  2. greenmining/__main__.py +9 -3
  3. greenmining/__version__.py +2 -2
  4. greenmining/analyzers/__init__.py +3 -7
  5. greenmining/analyzers/code_diff_analyzer.py +151 -61
  6. greenmining/analyzers/qualitative_analyzer.py +15 -81
  7. greenmining/analyzers/statistical_analyzer.py +8 -69
  8. greenmining/analyzers/temporal_analyzer.py +16 -72
  9. greenmining/config.py +105 -58
  10. greenmining/controllers/__init__.py +1 -5
  11. greenmining/controllers/repository_controller.py +153 -94
  12. greenmining/energy/__init__.py +13 -0
  13. greenmining/energy/base.py +165 -0
  14. greenmining/energy/codecarbon_meter.py +146 -0
  15. greenmining/energy/rapl.py +157 -0
  16. greenmining/gsf_patterns.py +4 -26
  17. greenmining/models/__init__.py +1 -5
  18. greenmining/models/aggregated_stats.py +4 -4
  19. greenmining/models/analysis_result.py +4 -4
  20. greenmining/models/commit.py +5 -5
  21. greenmining/models/repository.py +5 -5
  22. greenmining/presenters/__init__.py +1 -5
  23. greenmining/presenters/console_presenter.py +24 -24
  24. greenmining/services/__init__.py +10 -6
  25. greenmining/services/commit_extractor.py +8 -152
  26. greenmining/services/data_aggregator.py +45 -175
  27. greenmining/services/data_analyzer.py +9 -202
  28. greenmining/services/github_fetcher.py +210 -323
  29. greenmining/services/github_graphql_fetcher.py +361 -0
  30. greenmining/services/local_repo_analyzer.py +387 -0
  31. greenmining/services/reports.py +33 -137
  32. greenmining/utils.py +21 -149
  33. {greenmining-1.0.3.dist-info → greenmining-1.0.5.dist-info}/METADATA +69 -173
  34. greenmining-1.0.5.dist-info/RECORD +37 -0
  35. {greenmining-1.0.3.dist-info → greenmining-1.0.5.dist-info}/WHEEL +1 -1
  36. greenmining/analyzers/ml_feature_extractor.py +0 -512
  37. greenmining/analyzers/nlp_analyzer.py +0 -365
  38. greenmining/cli.py +0 -471
  39. greenmining/main.py +0 -37
  40. greenmining-1.0.3.dist-info/RECORD +0 -36
  41. greenmining-1.0.3.dist-info/entry_points.txt +0 -2
  42. {greenmining-1.0.3.dist-info → greenmining-1.0.5.dist-info}/licenses/LICENSE +0 -0
  43. {greenmining-1.0.3.dist-info → greenmining-1.0.5.dist-info}/top_level.txt +0 -0
greenmining/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- """Green Microservices Mining - GSF Pattern Analysis Tool."""
1
+ # Green Microservices Mining - GSF Pattern Analysis Tool.
2
2
 
3
3
  from greenmining.config import Config
4
4
  from greenmining.controllers.repository_controller import RepositoryController
@@ -9,44 +9,26 @@ from greenmining.gsf_patterns import (
9
9
  is_green_aware,
10
10
  )
11
11
 
12
- __version__ = "1.0.3"
12
+ __version__ = "1.0.5"
13
13
 
14
14
 
15
15
  def fetch_repositories(
16
16
  github_token: str,
17
- max_repos: int = 100,
18
- min_stars: int = 100,
17
+ max_repos: int = None,
18
+ min_stars: int = None,
19
19
  languages: list = None,
20
- keywords: str = "microservices",
20
+ keywords: str = None,
21
21
  ):
22
- """Fetch repositories from GitHub with custom search keywords.
23
-
24
- Args:
25
- github_token: GitHub personal access token
26
- max_repos: Maximum number of repositories to fetch (default: 100)
27
- min_stars: Minimum GitHub stars required (default: 100)
28
- languages: List of programming languages to filter (default: ["Python", "Java", "Go", "JavaScript", "TypeScript"])
29
- keywords: Search keywords (default: "microservices")
30
-
31
- Returns:
32
- List of Repository model instances
33
-
34
- Example:
35
- >>> from greenmining import fetch_repositories
36
- >>> repos = fetch_repositories(
37
- ... github_token="your_token",
38
- ... max_repos=50,
39
- ... keywords="kubernetes cloud-native",
40
- ... min_stars=500
41
- ... )
42
- >>> print(f"Found {len(repos)} repositories")
43
- """
22
+ # Fetch repositories from GitHub with custom search keywords.
44
23
  config = Config()
45
24
  config.GITHUB_TOKEN = github_token
46
25
  controller = RepositoryController(config)
47
26
 
48
27
  return controller.fetch_repositories(
49
- max_repos=max_repos, min_stars=min_stars, languages=languages, keywords=keywords
28
+ max_repos=max_repos,
29
+ min_stars=min_stars,
30
+ languages=languages,
31
+ keywords=keywords,
50
32
  )
51
33
 
52
34
 
@@ -58,4 +40,4 @@ __all__ = [
58
40
  "get_pattern_by_keywords",
59
41
  "fetch_repositories",
60
42
  "__version__",
61
- ]
43
+ ]
greenmining/__main__.py CHANGED
@@ -1,6 +1,12 @@
1
- """Allow running greenmining as a module: python -m greenmining"""
1
+ # Allow running greenmining as a module: python -m greenmining
2
+ # This is a library - use Python API for programmatic access.
2
3
 
3
- from greenmining.cli import cli
4
+ from greenmining import __version__
4
5
 
5
6
  if __name__ == "__main__":
6
- cli()
7
+ print(f"greenmining v{__version__}")
8
+ print("This is a Python library for analyzing green software patterns.")
9
+ print("\nUsage:")
10
+ print(" from greenmining import GSF_PATTERNS, is_green_aware, get_pattern_by_keywords")
11
+ print(" from greenmining.services import GitHubFetcher, CommitExtractor, DataAnalyzer")
12
+ print("\nDocumentation: https://github.com/adam-bouafia/greenmining")
@@ -1,3 +1,3 @@
1
- """Version information for greenmining."""
1
+ # Version information for greenmining.
2
2
 
3
- __version__ = "1.0.2"
3
+ __version__ = "1.0.5"
@@ -1,17 +1,13 @@
1
- """Analyzers for GreenMining framework."""
1
+ # Analyzers for GreenMining framework.
2
2
 
3
3
  from .code_diff_analyzer import CodeDiffAnalyzer
4
- from .statistical_analyzer import EnhancedStatisticalAnalyzer
5
- from .nlp_analyzer import NLPAnalyzer
4
+ from .statistical_analyzer import StatisticalAnalyzer
6
5
  from .temporal_analyzer import TemporalAnalyzer
7
6
  from .qualitative_analyzer import QualitativeAnalyzer
8
- from .ml_feature_extractor import MLFeatureExtractor
9
7
 
10
8
  __all__ = [
11
9
  "CodeDiffAnalyzer",
12
- "EnhancedStatisticalAnalyzer",
13
- "NLPAnalyzer",
10
+ "StatisticalAnalyzer",
14
11
  "TemporalAnalyzer",
15
12
  "QualitativeAnalyzer",
16
- "MLFeatureExtractor",
17
13
  ]
@@ -1,4 +1,4 @@
1
- """Code diff analyzer for detecting green software patterns in code changes."""
1
+ # Code diff analyzer for detecting green software patterns in code changes.
2
2
 
3
3
  import re
4
4
  from typing import Any, Dict, List
@@ -7,10 +7,7 @@ from pydriller import Commit, ModifiedFile
7
7
 
8
8
 
9
9
  class CodeDiffAnalyzer:
10
- """
11
- Analyze code diffs to detect green software patterns
12
- beyond commit message keywords.
13
- """
10
+ # Analyze code diffs to detect green software patterns
14
11
 
15
12
  # Pattern indicators in code changes
16
13
  PATTERN_SIGNATURES = {
@@ -64,22 +61,154 @@ class CodeDiffAnalyzer:
64
61
  "keywords": [r"lazy", r"defer", r"\.only\(", r"select_related"],
65
62
  "patterns": [r"@lazy", r"LazyLoader", r"dynamic.*import"],
66
63
  },
64
+ # NEW: Serverless computing patterns
65
+ "serverless_computing": {
66
+ "providers": [
67
+ r"aws.*lambda",
68
+ r"@app\.route",
69
+ r"functions\.https",
70
+ r"azure.*function",
71
+ ],
72
+ "frameworks": [r"serverless", r"chalice", r"zappa", r"claudia"],
73
+ "keywords": [r"lambda_handler", r"cloud.*function", r"function.*app"],
74
+ },
75
+ # NEW: CDN and edge computing
76
+ "cdn_edge": {
77
+ "providers": [
78
+ r"cloudflare",
79
+ r"cloudfront",
80
+ r"fastly",
81
+ r"akamai",
82
+ r"cdn\.js",
83
+ ],
84
+ "keywords": [
85
+ r"edge.*cache",
86
+ r"cdn",
87
+ r"\.distribute\(",
88
+ r"edge.*function",
89
+ ],
90
+ },
91
+ # NEW: Compression patterns
92
+ "compression": {
93
+ "algorithms": [r"gzip", r"brotli", r"deflate", r"zstd", r"lz4"],
94
+ "keywords": [
95
+ r"compress",
96
+ r"decompress",
97
+ r"\.gz\b",
98
+ r"Content-Encoding",
99
+ ],
100
+ "libraries": [r"import gzip", r"import zlib", r"import brotli"],
101
+ },
102
+ # NEW: ML model optimization
103
+ "model_optimization": {
104
+ "techniques": [
105
+ r"quantize",
106
+ r"quantization",
107
+ r"prune",
108
+ r"pruning",
109
+ r"distill",
110
+ ],
111
+ "formats": [r"onnx", r"tensorrt", r"tflite", r"coreml"],
112
+ "keywords": [
113
+ r"int8",
114
+ r"fp16",
115
+ r"mixed.*precision",
116
+ r"model\.optimize",
117
+ ],
118
+ },
119
+ # NEW: Efficient protocols (HTTP/2, gRPC)
120
+ "efficient_protocols": {
121
+ "http2": [r"http2", r"http/2", r"h2", r"alpn"],
122
+ "grpc": [r"grpc", r"protobuf", r"\.proto\b"],
123
+ "keywords": [
124
+ r"stream",
125
+ r"multiplexing",
126
+ r"server.*push",
127
+ r"binary.*protocol",
128
+ ],
129
+ },
130
+ # NEW: Container optimization
131
+ "container_optimization": {
132
+ "base_images": [
133
+ r"FROM.*alpine",
134
+ r"FROM.*scratch",
135
+ r"FROM.*distroless",
136
+ ],
137
+ "techniques": [
138
+ r"multi-stage",
139
+ r"--no-install-recommends",
140
+ r"&&.*rm.*-rf",
141
+ r"\.dockerignore",
142
+ ],
143
+ "keywords": [r"layer.*cache", r"build.*cache", r"image.*size"],
144
+ },
145
+ # NEW: Green cloud regions
146
+ "green_regions": {
147
+ "regions": [
148
+ r"eu-west",
149
+ r"eu-north",
150
+ r"sweden",
151
+ r"norway",
152
+ r"canada",
153
+ ],
154
+ "keywords": [
155
+ r"renewable",
156
+ r"green.*region",
157
+ r"sustainable.*region",
158
+ r"carbon.*neutral",
159
+ ],
160
+ },
161
+ # NEW: Auto-scaling patterns
162
+ "auto_scaling": {
163
+ "kubernetes": [
164
+ r"HorizontalPodAutoscaler",
165
+ r"autoscaling/v",
166
+ r"hpa",
167
+ r"minReplicas",
168
+ r"maxReplicas",
169
+ ],
170
+ "cloud": [
171
+ r"auto.*scal",
172
+ r"scale.*to.*zero",
173
+ r"ScalingPolicy",
174
+ r"TargetTracking",
175
+ ],
176
+ "keywords": [
177
+ r"scale.*up",
178
+ r"scale.*down",
179
+ r"metrics.*server",
180
+ r"cpu.*utilization",
181
+ ],
182
+ },
183
+ # NEW: Code splitting and lazy loading (web)
184
+ "code_splitting": {
185
+ "webpack": [
186
+ r"dynamic.*import",
187
+ r"lazy.*load",
188
+ r"code.*split",
189
+ r"chunk",
190
+ ],
191
+ "react": [r"React\.lazy", r"Suspense", r"loadable"],
192
+ "keywords": [r"bundle", r"split.*chunk", r"async.*component"],
193
+ },
194
+ # NEW: Green ML training
195
+ "green_ml_training": {
196
+ "keywords": [
197
+ r"early.*stopping",
198
+ r"learning.*rate.*scheduler",
199
+ r"gradient.*checkpointing",
200
+ r"mixed.*precision",
201
+ ],
202
+ "frameworks": [
203
+ r"apex",
204
+ r"torch\.cuda\.amp",
205
+ r"tf\.keras\.mixed_precision",
206
+ ],
207
+ },
67
208
  }
68
209
 
69
210
  def analyze_commit_diff(self, commit: Commit) -> Dict[str, Any]:
70
- """
71
- Analyze code changes in a commit to detect green patterns.
72
-
73
- Args:
74
- commit: PyDriller Commit object
75
-
76
- Returns:
77
- Dictionary containing:
78
- - patterns_detected: List of detected pattern names
79
- - confidence: Confidence level (high/medium/low/none)
80
- - evidence: Dictionary mapping patterns to evidence lines
81
- - metrics: Code change metrics
82
- """
211
+ # Analyze code changes in a commit to detect green patterns.
83
212
  patterns_detected = []
84
213
  evidence = {}
85
214
  metrics = self._calculate_metrics(commit)
@@ -116,15 +245,7 @@ class CodeDiffAnalyzer:
116
245
  }
117
246
 
118
247
  def _detect_patterns_in_line(self, code_line: str) -> List[str]:
119
- """
120
- Detect patterns in a single line of code.
121
-
122
- Args:
123
- code_line: Line of code to analyze
124
-
125
- Returns:
126
- List of detected pattern names
127
- """
248
+ # Detect patterns in a single line of code.
128
249
  detected = []
129
250
 
130
251
  for pattern_name, signatures in self.PATTERN_SIGNATURES.items():
@@ -137,15 +258,7 @@ class CodeDiffAnalyzer:
137
258
  return detected
138
259
 
139
260
  def _calculate_metrics(self, commit: Commit) -> Dict[str, int]:
140
- """
141
- Calculate code change metrics.
142
-
143
- Args:
144
- commit: PyDriller Commit object
145
-
146
- Returns:
147
- Dictionary of metrics
148
- """
261
+ # Calculate code change metrics.
149
262
  lines_added = sum(f.added_lines for f in commit.modified_files)
150
263
  lines_removed = sum(f.deleted_lines for f in commit.modified_files)
151
264
  files_changed = len(commit.modified_files)
@@ -165,22 +278,7 @@ class CodeDiffAnalyzer:
165
278
  def _calculate_diff_confidence(
166
279
  self, patterns: List[str], evidence: Dict[str, List[str]], metrics: Dict[str, int]
167
280
  ) -> str:
168
- """
169
- Calculate confidence level for diff-based detection.
170
-
171
- Factors:
172
- - Number of patterns detected
173
- - Amount of evidence per pattern
174
- - Code change magnitude
175
-
176
- Args:
177
- patterns: List of detected patterns
178
- evidence: Dictionary mapping patterns to evidence
179
- metrics: Code change metrics
180
-
181
- Returns:
182
- Confidence level: high/medium/low/none
183
- """
281
+ # Calculate confidence level for diff-based detection.
184
282
  if not patterns:
185
283
  return "none"
186
284
 
@@ -194,15 +292,7 @@ class CodeDiffAnalyzer:
194
292
  return "low"
195
293
 
196
294
  def _is_code_file(self, modified_file: ModifiedFile) -> bool:
197
- """
198
- Check if file is a code file (not config, docs, etc.).
199
-
200
- Args:
201
- modified_file: PyDriller ModifiedFile object
202
-
203
- Returns:
204
- True if file is a code file
205
- """
295
+ # Check if file is a code file (not config, docs, etc.).
206
296
  code_extensions = [
207
297
  ".py",
208
298
  ".java",
@@ -1,15 +1,4 @@
1
- """
2
- Qualitative Analysis Framework for Pattern Validation
3
-
4
- Implements qualitative validation from Soliman et al. (2017):
5
- - Stratified random sampling for manual validation
6
- - Precision/recall calculation framework
7
- - Inter-rater reliability support
8
- - False positive/negative tracking
9
-
10
- Based on Soliman et al.: 42/151 studies used qualitative analysis
11
- Critical for: validating IR-based approaches, calculating accuracy metrics
12
- """
1
+ # Qualitative Analysis Framework for Pattern Validation
13
2
 
14
3
  from __future__ import annotations
15
4
 
@@ -22,7 +11,7 @@ import json
22
11
 
23
12
  @dataclass
24
13
  class ValidationSample:
25
- """Represents a single validation sample"""
14
+ # Represents a single validation sample
26
15
 
27
16
  commit_sha: str
28
17
  commit_message: str
@@ -38,7 +27,7 @@ class ValidationSample:
38
27
 
39
28
  @dataclass
40
29
  class ValidationMetrics:
41
- """Precision/recall metrics for validation"""
30
+ # Precision/recall metrics for validation
42
31
 
43
32
  true_positives: int
44
33
  false_positives: int
@@ -51,26 +40,10 @@ class ValidationMetrics:
51
40
 
52
41
 
53
42
  class QualitativeAnalyzer:
54
- """
55
- Framework for manual validation and qualitative analysis.
56
-
57
- Implements:
58
- 1. Stratified sampling (ensure representation across categories)
59
- 2. Validation workflow (export → review → import → calculate metrics)
60
- 3. Precision/recall calculation
61
- 4. Inter-rater reliability (if multiple reviewers)
62
-
63
- Based on Soliman et al.: "42 studies used qualitative analysis for validation"
64
- """
43
+ # Framework for manual validation and qualitative analysis.
65
44
 
66
45
  def __init__(self, sample_size: int = 30, stratify_by: str = "pattern"):
67
- """
68
- Initialize qualitative analyzer.
69
-
70
- Args:
71
- sample_size: Number of commits to sample for validation
72
- stratify_by: Stratification method ('pattern', 'repository', 'time', 'random')
73
- """
46
+ # Initialize qualitative analyzer.
74
47
  self.sample_size = sample_size
75
48
  self.stratify_by = stratify_by
76
49
  self.samples: List[ValidationSample] = []
@@ -78,17 +51,7 @@ class QualitativeAnalyzer:
78
51
  def generate_validation_samples(
79
52
  self, commits: List[Dict], analysis_results: List[Dict], include_negatives: bool = True
80
53
  ) -> List[ValidationSample]:
81
- """
82
- Generate stratified validation samples.
83
-
84
- Args:
85
- commits: All commits
86
- analysis_results: Pattern detection results
87
- include_negatives: Include non-green commits for false negative detection
88
-
89
- Returns:
90
- List of ValidationSample objects
91
- """
54
+ # Generate stratified validation samples.
92
55
  # Build commit lookup
93
56
  commit_lookup = {c.get("hash", c.get("sha")): c for c in commits}
94
57
 
@@ -141,7 +104,7 @@ class QualitativeAnalyzer:
141
104
  return samples
142
105
 
143
106
  def _stratified_sample_by_pattern(self, results: List[Dict], sample_size: int) -> List[Dict]:
144
- """Stratified sampling ensuring each pattern category is represented."""
107
+ # Stratified sampling ensuring each pattern category is represented.
145
108
  # Group by dominant pattern
146
109
  pattern_groups = defaultdict(list)
147
110
  for result in results:
@@ -172,7 +135,7 @@ class QualitativeAnalyzer:
172
135
  def _stratified_sample_by_repo(
173
136
  self, results: List[Dict], commit_lookup: Dict, sample_size: int
174
137
  ) -> List[Dict]:
175
- """Stratified sampling ensuring each repository is represented."""
138
+ # Stratified sampling ensuring each repository is represented.
176
139
  # Group by repository
177
140
  repo_groups = defaultdict(list)
178
141
  for result in results:
@@ -194,12 +157,7 @@ class QualitativeAnalyzer:
194
157
  return samples[:sample_size]
195
158
 
196
159
  def export_samples_for_review(self, output_path: str) -> None:
197
- """
198
- Export validation samples to JSON for manual review.
199
-
200
- Args:
201
- output_path: Path to output JSON file
202
- """
160
+ # Export validation samples to JSON for manual review.
203
161
  samples_data = []
204
162
  for i, sample in enumerate(self.samples, 1):
205
163
  samples_data.append(
@@ -223,12 +181,7 @@ class QualitativeAnalyzer:
223
181
  json.dump(samples_data, f, indent=2)
224
182
 
225
183
  def import_validated_samples(self, input_path: str) -> None:
226
- """
227
- Import manually validated samples from JSON.
228
-
229
- Args:
230
- input_path: Path to JSON file with validated samples
231
- """
184
+ # Import manually validated samples from JSON.
232
185
  with open(input_path, "r") as f:
233
186
  samples_data = json.load(f)
234
187
 
@@ -248,12 +201,7 @@ class QualitativeAnalyzer:
248
201
  break
249
202
 
250
203
  def calculate_metrics(self) -> ValidationMetrics:
251
- """
252
- Calculate precision, recall, F1, and accuracy.
253
-
254
- Returns:
255
- ValidationMetrics object
256
- """
204
+ # Calculate precision, recall, F1, and accuracy.
257
205
  # Count outcomes
258
206
  tp = 0 # True positive: detected as green, truly green
259
207
  fp = 0 # False positive: detected as green, not green
@@ -295,12 +243,7 @@ class QualitativeAnalyzer:
295
243
  )
296
244
 
297
245
  def get_validation_report(self) -> Dict:
298
- """
299
- Generate comprehensive validation report.
300
-
301
- Returns:
302
- Dictionary with validation statistics and metrics
303
- """
246
+ # Generate comprehensive validation report.
304
247
  validated_count = sum(1 for s in self.samples if s.validation_status == "validated")
305
248
  pending_count = sum(1 for s in self.samples if s.validation_status == "pending")
306
249
 
@@ -360,7 +303,7 @@ class QualitativeAnalyzer:
360
303
  }
361
304
 
362
305
  def _analyze_pattern_accuracy(self) -> Dict:
363
- """Analyze accuracy per pattern category."""
306
+ # Analyze accuracy per pattern category.
364
307
  pattern_stats = defaultdict(lambda: {"tp": 0, "fp": 0})
365
308
 
366
309
  for sample in self.samples:
@@ -391,16 +334,7 @@ class QualitativeAnalyzer:
391
334
  samples_from_reviewer_a: List[ValidationSample],
392
335
  samples_from_reviewer_b: List[ValidationSample],
393
336
  ) -> Dict:
394
- """
395
- Calculate inter-rater reliability (Cohen's Kappa).
396
-
397
- Args:
398
- samples_from_reviewer_a: Samples validated by reviewer A
399
- samples_from_reviewer_b: Samples validated by reviewer B (same commits)
400
-
401
- Returns:
402
- Dictionary with Cohen's Kappa and agreement statistics
403
- """
337
+ # Calculate inter-rater reliability (Cohen's Kappa).
404
338
  # Match samples by commit_sha
405
339
  matched_samples = []
406
340
  for sample_a in samples_from_reviewer_a:
@@ -445,7 +379,7 @@ class QualitativeAnalyzer:
445
379
  }
446
380
 
447
381
  def _interpret_kappa(self, kappa: float) -> str:
448
- """Interpret Cohen's Kappa value."""
382
+ # Interpret Cohen's Kappa value.
449
383
  if kappa < 0:
450
384
  return "Poor (less than chance)"
451
385
  elif kappa < 0.20:
@@ -1,4 +1,4 @@
1
- """Enhanced statistical analyzer for green software patterns."""
1
+ # Statistical analyzer for green software patterns.
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -9,24 +9,11 @@ import pandas as pd
9
9
  from scipy import stats
10
10
 
11
11
 
12
- class EnhancedStatisticalAnalyzer:
13
- """
14
- Advanced statistical analyses for green software patterns.
15
- Based on Soliman et al. quantitative validation techniques.
16
- """
12
+ class StatisticalAnalyzer:
13
+ # Advanced statistical analyses for green software patterns.
17
14
 
18
15
  def analyze_pattern_correlations(self, commit_data: pd.DataFrame) -> Dict[str, Any]:
19
- """
20
- Analyze correlations between patterns.
21
-
22
- Question: Do repositories that adopt caching also adopt resource limits?
23
-
24
- Args:
25
- commit_data: DataFrame with pattern columns
26
-
27
- Returns:
28
- Dictionary containing correlation matrix and significant pairs
29
- """
16
+ # Analyze correlations between patterns.
30
17
  # Create pattern co-occurrence matrix
31
18
  pattern_columns = [col for col in commit_data.columns if col.startswith("pattern_")]
32
19
 
@@ -61,20 +48,7 @@ class EnhancedStatisticalAnalyzer:
61
48
  }
62
49
 
63
50
  def temporal_trend_analysis(self, commits_df: pd.DataFrame) -> Dict[str, Any]:
64
- """
65
- Analyze temporal trends in green awareness.
66
-
67
- Techniques:
68
- - Mann-Kendall trend test (monotonic trend detection)
69
- - Seasonal decomposition (identify cyclical patterns)
70
- - Change point detection (identify sudden shifts)
71
-
72
- Args:
73
- commits_df: DataFrame with date and green_aware columns
74
-
75
- Returns:
76
- Dictionary containing trend analysis results
77
- """
51
+ # Analyze temporal trends in green awareness.
78
52
  # Prepare time series data
79
53
  commits_df["date"] = pd.to_datetime(commits_df["date"])
80
54
  commits_df = commits_df.sort_values("date")
@@ -127,21 +101,7 @@ class EnhancedStatisticalAnalyzer:
127
101
  }
128
102
 
129
103
  def effect_size_analysis(self, group1: List[float], group2: List[float]) -> Dict[str, Any]:
130
- """
131
- Calculate effect size between two groups.
132
-
133
- Use case: Compare green awareness between:
134
- - Different programming languages
135
- - Different time periods
136
- - Different repository sizes
137
-
138
- Args:
139
- group1: First group values
140
- group2: Second group values
141
-
142
- Returns:
143
- Dictionary containing effect size metrics
144
- """
104
+ # Calculate effect size between two groups.
145
105
  # Cohen's d (effect size)
146
106
  mean1, mean2 = np.mean(group1), np.mean(group2)
147
107
  std1, std2 = np.std(group1, ddof=1), np.std(group2, ddof=1)
@@ -175,20 +135,7 @@ class EnhancedStatisticalAnalyzer:
175
135
  }
176
136
 
177
137
  def pattern_adoption_rate_analysis(self, commits_df: pd.DataFrame) -> Dict[str, Any]:
178
- """
179
- Analyze pattern adoption rates over repository lifetime.
180
-
181
- Metrics:
182
- - Time to first adoption (TTFA)
183
- - Adoption acceleration
184
- - Pattern stickiness (continued use after adoption)
185
-
186
- Args:
187
- commits_df: DataFrame with pattern and date columns
188
-
189
- Returns:
190
- Dictionary mapping patterns to adoption metrics
191
- """
138
+ # Analyze pattern adoption rates over repository lifetime.
192
139
  results = {}
193
140
 
194
141
  for pattern in commits_df["pattern"].unique():
@@ -220,15 +167,7 @@ class EnhancedStatisticalAnalyzer:
220
167
  return results
221
168
 
222
169
  def _interpret_correlations(self, significant_pairs: List[Dict[str, Any]]) -> str:
223
- """
224
- Generate interpretation of correlation results.
225
-
226
- Args:
227
- significant_pairs: List of significant correlation pairs
228
-
229
- Returns:
230
- Interpretation string
231
- """
170
+ # Generate interpretation of correlation results.
232
171
  if not significant_pairs:
233
172
  return "No significant correlations found between patterns."
234
173