greenmining 0.1.11__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,13 +1,20 @@
1
1
  """Data aggregator for green microservices analysis results."""
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  import json
4
6
  from collections import defaultdict
5
7
  from pathlib import Path
6
- from typing import Any, Optional
8
+ from typing import Any, Dict, List, Optional
7
9
 
8
10
  import click
9
11
  import pandas as pd
10
12
 
13
+ from greenmining.analyzers import (
14
+ EnhancedStatisticalAnalyzer,
15
+ TemporalAnalyzer,
16
+ QualitativeAnalyzer,
17
+ )
11
18
  from greenmining.config import get_config
12
19
  from greenmining.utils import (
13
20
  colored_print,
@@ -23,9 +30,35 @@ from greenmining.utils import (
23
30
  class DataAggregator:
24
31
  """Aggregates analysis results and generates statistics."""
25
32
 
26
- def __init__(self):
27
- """Initialize aggregator."""
28
- pass
33
+ def __init__(
34
+ self,
35
+ enable_enhanced_stats: bool = False,
36
+ enable_temporal: bool = False,
37
+ temporal_granularity: str = "quarter",
38
+ ):
39
+ """Initialize aggregator.
40
+
41
+ Args:
42
+ enable_enhanced_stats: Enable enhanced statistical analysis
43
+ enable_temporal: Enable temporal trend analysis
44
+ temporal_granularity: Granularity for temporal analysis (day/week/month/quarter/year)
45
+ """
46
+ self.enable_enhanced_stats = enable_enhanced_stats
47
+ self.enable_temporal = enable_temporal
48
+
49
+ if self.enable_enhanced_stats:
50
+ self.statistical_analyzer = EnhancedStatisticalAnalyzer()
51
+ colored_print("Enhanced statistical analysis enabled", "cyan")
52
+ else:
53
+ self.statistical_analyzer = None
54
+
55
+ if self.enable_temporal:
56
+ self.temporal_analyzer = TemporalAnalyzer(granularity=temporal_granularity)
57
+ colored_print(
58
+ f"Temporal analysis enabled (granularity: {temporal_granularity})", "cyan"
59
+ )
60
+ else:
61
+ self.temporal_analyzer = None
29
62
 
30
63
  def aggregate(
31
64
  self, analysis_results: list[dict[str, Any]], repositories: list[dict[str, Any]]
@@ -56,7 +89,38 @@ class DataAggregator:
56
89
  # Per-language statistics
57
90
  per_language_stats = self._generate_language_stats(analysis_results, repositories)
58
91
 
59
- return {
92
+ # Enhanced statistical analysis (if enabled)
93
+ enhanced_stats = None
94
+ if self.enable_enhanced_stats and len(analysis_results) > 0:
95
+ try:
96
+ enhanced_stats = self._generate_enhanced_statistics(analysis_results)
97
+ colored_print("✅ Enhanced statistical analysis complete", "green")
98
+ except Exception as e:
99
+ colored_print(f"⚠️ Enhanced statistics failed: {e}", "yellow")
100
+ enhanced_stats = {"error": str(e)}
101
+
102
+ # Temporal trend analysis (if enabled)
103
+ temporal_analysis = None
104
+ if self.enable_temporal and len(analysis_results) > 0:
105
+ try:
106
+ # Convert analysis results to commits format for temporal analyzer
107
+ commits = [
108
+ {
109
+ "hash": r.get("commit_hash", "unknown"),
110
+ "date": r.get("date"),
111
+ "message": r.get("message", ""),
112
+ "repository": r.get("repository", "unknown"),
113
+ }
114
+ for r in analysis_results
115
+ ]
116
+
117
+ temporal_analysis = self.temporal_analyzer.analyze_trends(commits, analysis_results)
118
+ colored_print("✅ Temporal trend analysis complete", "green")
119
+ except Exception as e:
120
+ colored_print(f"⚠️ Temporal analysis failed: {e}", "yellow")
121
+ temporal_analysis = {"error": str(e)}
122
+
123
+ result = {
60
124
  "summary": summary,
61
125
  "known_patterns": known_patterns,
62
126
  "emergent_patterns": emergent_patterns,
@@ -64,6 +128,14 @@ class DataAggregator:
64
128
  "per_language_stats": per_language_stats,
65
129
  }
66
130
 
131
+ if enhanced_stats:
132
+ result["enhanced_statistics"] = enhanced_stats
133
+
134
+ if temporal_analysis:
135
+ result["temporal_analysis"] = temporal_analysis
136
+
137
+ return result
138
+
67
139
  def _generate_summary(
68
140
  self, results: list[dict[str, Any]], repos: list[dict[str, Any]]
69
141
  ) -> dict[str, Any]:
@@ -228,6 +300,92 @@ class DataAggregator:
228
300
 
229
301
  return language_stats
230
302
 
303
+ def _generate_enhanced_statistics(self, results: list[dict[str, Any]]) -> dict[str, Any]:
304
+ """Generate enhanced statistical analysis.
305
+
306
+ Args:
307
+ results: List of commit analysis results
308
+
309
+ Returns:
310
+ Dictionary with enhanced statistical analysis
311
+ """
312
+ # Prepare DataFrame
313
+ df = pd.DataFrame(results)
314
+
315
+ # Ensure required columns exist
316
+ if "date" not in df.columns or "green_aware" not in df.columns:
317
+ return {"error": "Missing required columns for enhanced statistics"}
318
+
319
+ enhanced_stats = {}
320
+
321
+ # 1. Temporal Trend Analysis
322
+ if len(df) >= 8: # Need at least 8 data points
323
+ try:
324
+ df_copy = df.copy()
325
+ df_copy["commit_hash"] = df_copy.get("commit_hash", df_copy.index)
326
+ trends = self.statistical_analyzer.temporal_trend_analysis(df_copy)
327
+ enhanced_stats["temporal_trends"] = {
328
+ "trend_direction": trends["trend"]["direction"],
329
+ "correlation": float(trends["trend"]["correlation"]),
330
+ "p_value": float(trends["trend"]["p_value"]),
331
+ "significant": trends["trend"]["significant"],
332
+ "monthly_data_points": len(trends.get("monthly_data", {})),
333
+ }
334
+ except Exception as e:
335
+ enhanced_stats["temporal_trends"] = {"error": str(e)}
336
+
337
+ # 2. Pattern Correlation Analysis (if pattern columns exist)
338
+ pattern_cols = [col for col in df.columns if col.startswith("pattern_")]
339
+ if pattern_cols and len(pattern_cols) >= 2:
340
+ try:
341
+ correlations = self.statistical_analyzer.analyze_pattern_correlations(df)
342
+ enhanced_stats["pattern_correlations"] = {
343
+ "significant_pairs_count": len(correlations["significant_pairs"]),
344
+ "significant_pairs": correlations["significant_pairs"][:5], # Top 5
345
+ "interpretation": correlations["interpretation"],
346
+ }
347
+ except Exception as e:
348
+ enhanced_stats["pattern_correlations"] = {"error": str(e)}
349
+
350
+ # 3. Effect Size Analysis by Repository
351
+ if "repository" in df.columns:
352
+ try:
353
+ # Group by repository
354
+ green_rates_by_repo = df.groupby("repository")["green_aware"].mean()
355
+ if len(green_rates_by_repo) >= 2:
356
+ # Compare top vs bottom half
357
+ sorted_rates = sorted(green_rates_by_repo.values)
358
+ mid_point = len(sorted_rates) // 2
359
+ group1 = sorted_rates[:mid_point]
360
+ group2 = sorted_rates[mid_point:]
361
+
362
+ if len(group1) > 0 and len(group2) > 0:
363
+ effect = self.statistical_analyzer.effect_size_analysis(
364
+ list(group1), list(group2)
365
+ )
366
+ enhanced_stats["effect_size"] = {
367
+ "cohens_d": float(effect["cohens_d"]),
368
+ "magnitude": effect["magnitude"],
369
+ "mean_difference": float(effect["mean_difference"]),
370
+ "significant": effect["significant"],
371
+ "comparison": "high_green_vs_low_green_repos",
372
+ }
373
+ except Exception as e:
374
+ enhanced_stats["effect_size"] = {"error": str(e)}
375
+
376
+ # 4. Basic descriptive statistics
377
+ enhanced_stats["descriptive"] = {
378
+ "total_commits": len(df),
379
+ "green_commits": int(df["green_aware"].sum()),
380
+ "green_rate_mean": float(df["green_aware"].mean()),
381
+ "green_rate_std": float(df["green_aware"].std()) if len(df) > 1 else 0.0,
382
+ "unique_repositories": (
383
+ int(df["repository"].nunique()) if "repository" in df.columns else 0
384
+ ),
385
+ }
386
+
387
+ return enhanced_stats
388
+
231
389
  def save_results(
232
390
  self,
233
391
  aggregated_data: dict[str, Any],
@@ -1,14 +1,21 @@
1
1
  """Data analyzer for green microservices commits using GSF patterns."""
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  import json
4
6
  import re
5
7
  from collections import Counter
6
8
  from pathlib import Path
7
- from typing import Any, Optional
9
+ from typing import Any, Dict, List, Optional, Tuple
8
10
 
9
11
  import click
10
12
  from tqdm import tqdm
11
13
 
14
+ from greenmining.analyzers import (
15
+ CodeDiffAnalyzer,
16
+ NLPAnalyzer,
17
+ MLFeatureExtractor,
18
+ )
12
19
  from greenmining.config import get_config
13
20
  from greenmining.gsf_patterns import (
14
21
  GREEN_KEYWORDS,
@@ -30,16 +37,49 @@ from greenmining.utils import (
30
37
  class DataAnalyzer:
31
38
  """Analyzes commits for green software patterns using GSF (Green Software Foundation) patterns."""
32
39
 
33
- def __init__(self, batch_size: int = 10):
40
+ def __init__(
41
+ self,
42
+ batch_size: int = 10,
43
+ enable_diff_analysis: bool = False,
44
+ enable_nlp: bool = False,
45
+ enable_ml_features: bool = False,
46
+ ):
34
47
  """Initialize analyzer with GSF patterns.
35
48
 
36
49
  Args:
37
50
  batch_size: Number of commits to process in each batch
51
+ enable_diff_analysis: Enable code diff analysis (slower but more accurate)
52
+ enable_nlp: Enable NLP-enhanced pattern detection
53
+ enable_ml_features: Enable ML feature extraction
38
54
  """
39
55
  # Use GSF patterns from gsf_patterns.py
40
56
  self.gsf_patterns = GSF_PATTERNS
41
57
  self.green_keywords = GREEN_KEYWORDS
42
58
  self.batch_size = batch_size
59
+ self.enable_diff_analysis = enable_diff_analysis
60
+ self.enable_nlp = enable_nlp
61
+ self.enable_ml_features = enable_ml_features
62
+
63
+ # Initialize code diff analyzer if enabled
64
+ if self.enable_diff_analysis:
65
+ self.diff_analyzer = CodeDiffAnalyzer()
66
+ colored_print("Code diff analysis enabled (may increase processing time)", "cyan")
67
+ else:
68
+ self.diff_analyzer = None
69
+
70
+ # Initialize NLP analyzer if enabled
71
+ if self.enable_nlp:
72
+ self.nlp_analyzer = NLPAnalyzer(enable_stemming=True, enable_synonyms=True)
73
+ colored_print("NLP analysis enabled (morphological variants + synonyms)", "cyan")
74
+ else:
75
+ self.nlp_analyzer = None
76
+
77
+ # Initialize ML feature extractor if enabled
78
+ if self.enable_ml_features:
79
+ self.ml_extractor = MLFeatureExtractor(green_keywords=list(GREEN_KEYWORDS))
80
+ colored_print("ML feature extraction enabled", "cyan")
81
+ else:
82
+ self.ml_extractor = None
43
83
 
44
84
  def analyze_commits(
45
85
  self, commits: list[dict[str, Any]], resume_from: int = 0
@@ -91,6 +131,42 @@ class DataAnalyzer:
91
131
  # Q2: KNOWN GSF PATTERNS - Match against Green Software Foundation patterns
92
132
  matched_patterns = get_pattern_by_keywords(message)
93
133
 
134
+ # Enhanced NLP analysis (if enabled)
135
+ nlp_results = None
136
+ if self.nlp_analyzer:
137
+ nlp_results = self.nlp_analyzer.analyze_text(message, list(self.green_keywords))
138
+
139
+ # Check if NLP found additional matches not caught by keyword matching
140
+ has_nlp_matches, additional_terms = self.nlp_analyzer.enhance_pattern_detection(
141
+ message, matched_patterns
142
+ )
143
+
144
+ if has_nlp_matches:
145
+ # NLP enhancement found additional evidence
146
+ green_aware = True
147
+
148
+ # Q3: CODE DIFF ANALYSIS (if enabled and diff data available)
149
+ diff_analysis = None
150
+ if self.diff_analyzer and commit.get("diff_data"):
151
+ try:
152
+ # Note: This requires commit object from PyDriller
153
+ # For now, we'll store a placeholder for future integration
154
+ diff_analysis = {
155
+ "enabled": True,
156
+ "status": "requires_pydriller_commit_object",
157
+ "patterns_detected": [],
158
+ "confidence": "none",
159
+ "evidence": {},
160
+ "metrics": {},
161
+ }
162
+ except Exception as e:
163
+ diff_analysis = {
164
+ "enabled": True,
165
+ "status": f"error: {str(e)}",
166
+ "patterns_detected": [],
167
+ "confidence": "none",
168
+ }
169
+
94
170
  # Get detailed pattern info
95
171
  pattern_details = []
96
172
  for _pattern_id, pattern in self.gsf_patterns.items():
@@ -105,13 +181,14 @@ class DataAnalyzer:
105
181
  )
106
182
 
107
183
  # Calculate confidence based on number of patterns matched
108
- confidence = (
109
- "high"
110
- if len(matched_patterns) >= 2
111
- else "medium" if len(matched_patterns) == 1 else "low"
112
- )
184
+ # Boost confidence if diff analysis also detected patterns
185
+ pattern_count = len(matched_patterns)
186
+ if diff_analysis and diff_analysis.get("patterns_detected"):
187
+ pattern_count += len(diff_analysis["patterns_detected"])
188
+
189
+ confidence = "high" if pattern_count >= 2 else "medium" if pattern_count == 1 else "low"
113
190
 
114
- return {
191
+ result = {
115
192
  "commit_hash": commit.get("hash", commit.get("commit_id", "unknown")),
116
193
  "repository": commit.get("repository", commit.get("repo_name", "unknown")),
117
194
  "author": commit.get("author", commit.get("author_name", "unknown")),
@@ -130,6 +207,32 @@ class DataAnalyzer:
130
207
  "deletions": commit.get("lines_deleted", commit.get("deletions", 0)),
131
208
  }
132
209
 
210
+ # Add diff analysis results if available
211
+ if diff_analysis:
212
+ result["diff_analysis"] = diff_analysis
213
+
214
+ # Add NLP analysis results if available
215
+ if nlp_results:
216
+ result["nlp_analysis"] = {
217
+ "total_matches": nlp_results["total_nlp_matches"],
218
+ "match_density": nlp_results["match_density"],
219
+ "morphological_count": len(nlp_results["morphological_matches"]),
220
+ "semantic_count": len(nlp_results["semantic_matches"]),
221
+ "phrase_count": len(nlp_results["phrase_matches"]),
222
+ }
223
+
224
+ # Add ML features if enabled
225
+ if self.enable_ml_features and self.ml_extractor:
226
+ # Note: Full feature extraction requires repository context
227
+ # For now, extract basic text features
228
+ text_features = self.ml_extractor.extract_text_features(message)
229
+ result["ml_features"] = {
230
+ "text": text_features,
231
+ "note": "Full ML features require repository and historical context",
232
+ }
233
+
234
+ return result
235
+
133
236
  def _check_green_awareness(self, message: str, files: list[str]) -> tuple[bool, Optional[str]]:
134
237
  """Check if commit explicitly mentions green/energy concerns.
135
238
 
@@ -1,8 +1,10 @@
1
1
  """GitHub repository fetcher for green microservices mining."""
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  from datetime import datetime
4
6
  from pathlib import Path
5
- from typing import Any, Optional
7
+ from typing import Any, Dict, List, Optional
6
8
 
7
9
  import click
8
10
  from github import Github, GithubException, RateLimitExceededException
@@ -26,6 +28,10 @@ class GitHubFetcher:
26
28
  max_repos: int = 100,
27
29
  min_stars: int = 100,
28
30
  languages: Optional[list[str]] = None,
31
+ created_after: Optional[str] = None,
32
+ created_before: Optional[str] = None,
33
+ pushed_after: Optional[str] = None,
34
+ pushed_before: Optional[str] = None,
29
35
  ):
30
36
  """Initialize GitHub fetcher.
31
37
 
@@ -34,6 +40,10 @@ class GitHubFetcher:
34
40
  max_repos: Maximum number of repositories to fetch
35
41
  min_stars: Minimum number of stars required
36
42
  languages: List of programming languages to filter
43
+ created_after: Repository created after date (YYYY-MM-DD)
44
+ created_before: Repository created before date (YYYY-MM-DD)
45
+ pushed_after: Repository pushed after date (YYYY-MM-DD)
46
+ pushed_before: Repository pushed before date (YYYY-MM-DD)
37
47
  """
38
48
  self.github = Github(token)
39
49
  self.max_repos = max_repos
@@ -47,6 +57,10 @@ class GitHubFetcher:
47
57
  "C#",
48
58
  "Rust",
49
59
  ]
60
+ self.created_after = created_after
61
+ self.created_before = created_before
62
+ self.pushed_after = pushed_after
63
+ self.pushed_before = pushed_before
50
64
 
51
65
  def search_repositories(self) -> list[dict[str, Any]]:
52
66
  """Search for microservice repositories.
@@ -62,10 +76,8 @@ class GitHubFetcher:
62
76
  f"Filters: min_stars={self.min_stars}, languages={', '.join(self.languages)}", "cyan"
63
77
  )
64
78
 
65
- # Build search query
66
- keyword_query = " OR ".join(keywords)
67
- language_query = " OR ".join([f"language:{lang}" for lang in self.languages])
68
- query = f"({keyword_query}) ({language_query}) stars:>={self.min_stars}"
79
+ # Build search query with temporal filters
80
+ query = self._build_temporal_query(keywords)
69
81
 
70
82
  try:
71
83
  # Execute search
@@ -139,6 +151,51 @@ class GitHubFetcher:
139
151
  "license": repo.license.name if repo.license else None,
140
152
  }
141
153
 
154
+ def _build_temporal_query(self, keywords: list[str]) -> str:
155
+ """
156
+ Build GitHub search query with temporal constraints.
157
+
158
+ Args:
159
+ keywords: List of search keywords
160
+
161
+ Returns:
162
+ Complete search query string
163
+ """
164
+ query_parts = []
165
+
166
+ # Keywords
167
+ keyword_query = " OR ".join(keywords)
168
+ query_parts.append(f"({keyword_query})")
169
+
170
+ # Languages
171
+ language_query = " OR ".join([f"language:{lang}" for lang in self.languages])
172
+ query_parts.append(f"({language_query})")
173
+
174
+ # Stars
175
+ query_parts.append(f"stars:>={self.min_stars}")
176
+
177
+ # Archived filter
178
+ query_parts.append("archived:false")
179
+
180
+ # Temporal filters
181
+ if self.created_after and self.created_before:
182
+ query_parts.append(f"created:{self.created_after}..{self.created_before}")
183
+ elif self.created_after:
184
+ query_parts.append(f"created:>={self.created_after}")
185
+ elif self.created_before:
186
+ query_parts.append(f"created:<={self.created_before}")
187
+
188
+ if self.pushed_after and self.pushed_before:
189
+ query_parts.append(f"pushed:{self.pushed_after}..{self.pushed_before}")
190
+ elif self.pushed_after:
191
+ query_parts.append(f"pushed:>={self.pushed_after}")
192
+ elif self.pushed_before:
193
+ query_parts.append(f"pushed:<={self.pushed_before}")
194
+
195
+ query = " ".join(query_parts)
196
+ colored_print(f"Query: {query}", "cyan")
197
+ return query
198
+
142
199
  def _handle_rate_limit(self):
143
200
  """Handle GitHub API rate limiting."""
144
201
  rate_limit = self.github.get_rate_limit()
@@ -1,9 +1,11 @@
1
- """Report generator for green microservices analysis."""
1
+ """Report generation for green mining analysis."""
2
+
3
+ from __future__ import annotations
2
4
 
3
5
  import json
4
6
  from datetime import datetime
5
7
  from pathlib import Path
6
- from typing import Any, Optional
8
+ from typing import Any, Dict, Optional
7
9
 
8
10
  import click
9
11
 
@@ -180,6 +182,11 @@ Commits were analyzed using a keyword and heuristic-based classification framewo
180
182
  # 2.4 Per-Repository Analysis
181
183
  sections.append(self._generate_repo_analysis_section(data))
182
184
 
185
+ # 2.5 Enhanced Statistics (if available)
186
+ enhanced_section = self._generate_enhanced_statistics_section(data)
187
+ if enhanced_section:
188
+ sections.append(enhanced_section)
189
+
183
190
  return "### 2. Results\n\n" + "\n\n".join(sections)
184
191
 
185
192
  def _generate_green_awareness_section(self, data: dict[str, Any]) -> str:
@@ -300,6 +307,120 @@ No novel microservice-specific green practices were automatically detected. Manu
300
307
 
301
308
  **Repositories with No Green Mentions:** {no_green_count} out of {len(per_repo)} repositories had zero green-aware commits."""
302
309
 
310
+ def _generate_enhanced_statistics_section(self, data: dict[str, Any]) -> str:
311
+ """Generate enhanced statistical analysis subsection.
312
+
313
+ Args:
314
+ data: Aggregated data containing enhanced_statistics field
315
+
316
+ Returns:
317
+ Markdown section with enhanced statistics
318
+ """
319
+ enhanced_stats = data.get("enhanced_statistics")
320
+
321
+ if not enhanced_stats:
322
+ return ""
323
+
324
+ # Handle error case
325
+ if "error" in enhanced_stats:
326
+ return f"""#### 2.5 Enhanced Statistical Analysis
327
+
328
+ **Note:** Enhanced statistical analysis encountered an error: {enhanced_stats['error']}
329
+ """
330
+
331
+ sections = []
332
+ sections.append("#### 2.5 Enhanced Statistical Analysis")
333
+ sections.append("")
334
+ sections.append(
335
+ "This section presents advanced statistical analyses of green software engineering patterns."
336
+ )
337
+ sections.append("")
338
+
339
+ # Temporal trends
340
+ temporal = enhanced_stats.get("temporal_trends", {})
341
+ if temporal and "error" not in temporal:
342
+ sections.append("##### Temporal Trends")
343
+ sections.append("")
344
+
345
+ if "overall_trend" in temporal:
346
+ trend_dir = temporal["overall_trend"].get("direction", "unknown")
347
+ trend_sig = temporal["overall_trend"].get("significant", False)
348
+ sections.append(f"**Overall Trend:** {trend_dir.capitalize()}")
349
+ if trend_sig:
350
+ sections.append(" (statistically significant)")
351
+ sections.append("")
352
+
353
+ if "monthly_stats" in temporal and temporal["monthly_stats"]:
354
+ sections.append("**Monthly Pattern Statistics:**")
355
+ sections.append("")
356
+ monthly = temporal["monthly_stats"]
357
+ sections.append(f"- Mean commits/month: {format_number(monthly.get('mean', 0))}")
358
+ sections.append(
359
+ f"- Median commits/month: {format_number(monthly.get('median', 0))}"
360
+ )
361
+ sections.append(f"- Std deviation: {format_number(monthly.get('std', 0))}")
362
+ sections.append("")
363
+
364
+ # Pattern correlations
365
+ correlations = enhanced_stats.get("pattern_correlations", {})
366
+ if correlations and "error" not in correlations:
367
+ sections.append("##### Pattern Correlations")
368
+ sections.append("")
369
+
370
+ top_corr = correlations.get("top_positive_correlations", [])
371
+ if top_corr:
372
+ sections.append("**Top Positive Correlations (|r| > 0.5):**")
373
+ sections.append("")
374
+ sections.append("| Pattern 1 | Pattern 2 | Correlation (r) |")
375
+ sections.append("|-----------|-----------|-----------------|")
376
+ for corr in top_corr[:5]:
377
+ sections.append(
378
+ f"| {corr['pattern1']} | {corr['pattern2']} | {corr['correlation']:.3f} |"
379
+ )
380
+ sections.append("")
381
+ else:
382
+ sections.append("No strong pattern correlations detected (|r| > 0.5).")
383
+ sections.append("")
384
+
385
+ # Effect sizes
386
+ effect_sizes = enhanced_stats.get("effect_size", {})
387
+ if effect_sizes and "error" not in effect_sizes:
388
+ sections.append("##### Effect Size Analysis")
389
+ sections.append("")
390
+
391
+ green_vs_nongreen = effect_sizes.get("green_vs_nongreen_patterns")
392
+ if green_vs_nongreen:
393
+ cohens_d = green_vs_nongreen.get("cohens_d", 0)
394
+ magnitude = green_vs_nongreen.get("magnitude", "negligible")
395
+ sections.append(f"**Green vs Non-Green Pattern Usage:**")
396
+ sections.append(f"- Cohen's d: {cohens_d:.3f}")
397
+ sections.append(f"- Effect magnitude: {magnitude.capitalize()}")
398
+ sections.append("")
399
+
400
+ # Descriptive statistics
401
+ descriptive = enhanced_stats.get("descriptive", {})
402
+ if descriptive and "error" not in descriptive:
403
+ sections.append("##### Descriptive Statistics")
404
+ sections.append("")
405
+
406
+ patterns = descriptive.get("patterns_per_commit", {})
407
+ if patterns:
408
+ sections.append("**Patterns per Commit:**")
409
+ sections.append(f"- Mean: {patterns.get('mean', 0):.2f}")
410
+ sections.append(f"- Median: {patterns.get('median', 0):.2f}")
411
+ sections.append(f"- Standard deviation: {patterns.get('std', 0):.2f}")
412
+ sections.append("")
413
+
414
+ repos = descriptive.get("green_commits_per_repo", {})
415
+ if repos:
416
+ sections.append("**Green Commits per Repository:**")
417
+ sections.append(f"- Mean: {repos.get('mean', 0):.2f}")
418
+ sections.append(f"- Median: {repos.get('median', 0):.2f}")
419
+ sections.append(f"- Standard deviation: {repos.get('std', 0):.2f}")
420
+ sections.append("")
421
+
422
+ return "\n".join(sections)
423
+
303
424
  def _generate_discussion(self, data: dict[str, Any]) -> str:
304
425
  """Generate discussion section."""
305
426
  summary = data["summary"]