greenmining 1.1.9__py3-none-any.whl → 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- greenmining/__init__.py +29 -10
- greenmining/analyzers/__init__.py +0 -8
- greenmining/controllers/repository_controller.py +83 -88
- greenmining/services/local_repo_analyzer.py +15 -8
- greenmining-1.2.1.dist-info/METADATA +311 -0
- {greenmining-1.1.9.dist-info → greenmining-1.2.1.dist-info}/RECORD +9 -15
- greenmining/analyzers/power_regression.py +0 -211
- greenmining/analyzers/qualitative_analyzer.py +0 -394
- greenmining/analyzers/version_power_analyzer.py +0 -246
- greenmining/config.py +0 -91
- greenmining/presenters/__init__.py +0 -7
- greenmining/presenters/console_presenter.py +0 -143
- greenmining-1.1.9.dist-info/METADATA +0 -865
- {greenmining-1.1.9.dist-info → greenmining-1.2.1.dist-info}/WHEEL +0 -0
- {greenmining-1.1.9.dist-info → greenmining-1.2.1.dist-info}/licenses/LICENSE +0 -0
- {greenmining-1.1.9.dist-info → greenmining-1.2.1.dist-info}/top_level.txt +0 -0
|
@@ -1,18 +1,14 @@
|
|
|
1
|
-
greenmining/__init__.py,sha256=
|
|
1
|
+
greenmining/__init__.py,sha256=W-CMUfo3U7pWBGAKtwT5XwjcMxbKjF5c42ViFFsOBS4,4096
|
|
2
2
|
greenmining/__main__.py,sha256=NYOVS7D4w2XDLn6SyXHXPKE5GrNGOeoWSTb_KazgK5c,590
|
|
3
|
-
greenmining/config.py,sha256=MQ5aPaa_Y9MZke774dmibz2-XSqRVsQiiNaLDr8f7S0,2771
|
|
4
3
|
greenmining/gsf_patterns.py,sha256=UvNJPY3HlAx1SicwUqci40TlLg8lCL0tszSOH4haxQs,55921
|
|
5
4
|
greenmining/utils.py,sha256=-dnLUw9taCzvQ2dk6uc66GAohOFiXJFKs9TLSEPk5kM,2893
|
|
6
|
-
greenmining/analyzers/__init__.py,sha256=
|
|
5
|
+
greenmining/analyzers/__init__.py,sha256=wnBrn8EyAHG_qnesOPAYkZyc-XigXWy2pI3bMeIoLH4,416
|
|
7
6
|
greenmining/analyzers/code_diff_analyzer.py,sha256=1dk68R3O0RZG8gx1cm9B_UlZ1Uwyb_Q3oScRbCVx4tM,10950
|
|
8
7
|
greenmining/analyzers/metrics_power_correlator.py,sha256=MgKXAIYjNihzzyilCd88_AMjZP9sdC6NkCAVbrvvOus,5957
|
|
9
|
-
greenmining/analyzers/power_regression.py,sha256=j_SL8BHQi89zkjjKPPcjsPrvfDAeGpLeZujQiNw_RKI,7375
|
|
10
|
-
greenmining/analyzers/qualitative_analyzer.py,sha256=5LiqP2It3q6_RLiLGkyGRZaRxg00dcyTPvlN5l-wq_k,15379
|
|
11
8
|
greenmining/analyzers/statistical_analyzer.py,sha256=PA0w0sytRmMO6N1a2iH7VdA6Icg4DcyBLFXOGq7PepY,5942
|
|
12
9
|
greenmining/analyzers/temporal_analyzer.py,sha256=JfTcAoI20oCFMehGrSRnDqhJTXI-RUbdCTMwDOTW9-g,14259
|
|
13
|
-
greenmining/analyzers/version_power_analyzer.py,sha256=2P6zOqBg-ButtIhF-4cutiwD2Q1geMY49VFUghHXXoI,8119
|
|
14
10
|
greenmining/controllers/__init__.py,sha256=UiAT6zBvC1z_9cJWfzq1cLA0I4r9b2vURHipj8oDczI,180
|
|
15
|
-
greenmining/controllers/repository_controller.py,sha256=
|
|
11
|
+
greenmining/controllers/repository_controller.py,sha256=sjfbDhyRY59MsKLw0dkxzpe1QZKtm9ScO4E8VFYZy9A,6041
|
|
16
12
|
greenmining/energy/__init__.py,sha256=GoCYh7hitWBoPMtan1HF1yezCHi7o4sa_YUJgGkeJc8,558
|
|
17
13
|
greenmining/energy/base.py,sha256=3hIPgc4B0Nz9V7DTh2Xd6trDRtmozUBBpa5UWRuWzcw,5918
|
|
18
14
|
greenmining/energy/carbon_reporter.py,sha256=bKIFlLhHfYzI4DBu_ff4GW1Psz4oSCAF4NmzQb-EShA,8298
|
|
@@ -24,17 +20,15 @@ greenmining/models/aggregated_stats.py,sha256=CZxjwXswvtmYPwpcbodLUsZpsbsNKBDIqv
|
|
|
24
20
|
greenmining/models/analysis_result.py,sha256=YICTCEcrJxZ1R8Xaio3AZOjCGwMzC_62BMAL0J_XY1w,1509
|
|
25
21
|
greenmining/models/commit.py,sha256=LCwDcRu4-BeCJQdk590oQNZZZM9t8W9FlaHlo9DCVmc,2415
|
|
26
22
|
greenmining/models/repository.py,sha256=MUeCOtVMOsU4Oa_BBoB163Ij5BKytTKwbzoGORJx4rU,2850
|
|
27
|
-
greenmining/presenters/__init__.py,sha256=d1CMtqtUAHYHYNzigPyjtGOUtnH1drtUwf7-bFQq2B8,138
|
|
28
|
-
greenmining/presenters/console_presenter.py,sha256=qagn2c2aOym0WNKV8n175MQ-BTheLjrXzW8c1OafzAQ,4904
|
|
29
23
|
greenmining/services/__init__.py,sha256=ZEMOVut0KRdume_vz58beSNps3YgeoGBXmUjEqNgIhc,690
|
|
30
24
|
greenmining/services/commit_extractor.py,sha256=qBM9QpGzPZRmGMFufJ6gP8eWIuufTowLX8mQxqZwyEU,6996
|
|
31
25
|
greenmining/services/data_aggregator.py,sha256=BU_HUb-8c0n0sa_7VZRB8jIVnaVhRLf-E6KA4ASh-08,19427
|
|
32
26
|
greenmining/services/data_analyzer.py,sha256=0XqW-slrnt7RotrHDweOqKtoN8XIA7y6p7s2Jau6cMg,7431
|
|
33
27
|
greenmining/services/github_graphql_fetcher.py,sha256=ZklXdEAc60KeFL83zRYMwW_-2OwMKpfPY7Wrifl0D50,11539
|
|
34
|
-
greenmining/services/local_repo_analyzer.py,sha256=
|
|
28
|
+
greenmining/services/local_repo_analyzer.py,sha256=kdqN9O7GlmOs1_H1iNni1HEcCzF9bBCPeP3wMStGe5Q,25306
|
|
35
29
|
greenmining/services/reports.py,sha256=nhJuYiA5tPD_9AjtgSLEnrpW3x15sZXrwIxpxQEBbh0,23219
|
|
36
|
-
greenmining-1.1.
|
|
37
|
-
greenmining-1.1.
|
|
38
|
-
greenmining-1.1.
|
|
39
|
-
greenmining-1.1.
|
|
40
|
-
greenmining-1.1.
|
|
30
|
+
greenmining-1.2.1.dist-info/licenses/LICENSE,sha256=M7ma3JHGeiIZIs3ea0HTcFl_wLFPX2NZElUliYs4bCA,1083
|
|
31
|
+
greenmining-1.2.1.dist-info/METADATA,sha256=ZmLnx5P5jN8XpRGgfx_gVzykWvUZdsHrTEpg3Plh9-A,10522
|
|
32
|
+
greenmining-1.2.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
33
|
+
greenmining-1.2.1.dist-info/top_level.txt,sha256=nreXgXxZIWI-42yQknQ0HXtUrFnzZ8N1ra4Mdy2KcsI,12
|
|
34
|
+
greenmining-1.2.1.dist-info/RECORD,,
|
|
@@ -1,211 +0,0 @@
|
|
|
1
|
-
# Power regression detection for identifying commits that increased power consumption.
|
|
2
|
-
# Compares energy measurements between baseline and target commits.
|
|
3
|
-
|
|
4
|
-
from __future__ import annotations
|
|
5
|
-
|
|
6
|
-
import subprocess
|
|
7
|
-
from dataclasses import dataclass, field
|
|
8
|
-
from typing import Any, Dict, List, Optional
|
|
9
|
-
|
|
10
|
-
from pydriller import Repository
|
|
11
|
-
|
|
12
|
-
from greenmining.utils import colored_print
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
@dataclass
|
|
16
|
-
class PowerRegression:
|
|
17
|
-
# A detected power regression from a commit.
|
|
18
|
-
|
|
19
|
-
sha: str
|
|
20
|
-
message: str
|
|
21
|
-
author: str
|
|
22
|
-
date: str
|
|
23
|
-
power_before: float # watts
|
|
24
|
-
power_after: float # watts
|
|
25
|
-
power_increase: float # percentage
|
|
26
|
-
energy_before: float # joules
|
|
27
|
-
energy_after: float # joules
|
|
28
|
-
is_regression: bool = True
|
|
29
|
-
|
|
30
|
-
def to_dict(self) -> Dict[str, Any]:
|
|
31
|
-
return {
|
|
32
|
-
"sha": self.sha,
|
|
33
|
-
"message": self.message,
|
|
34
|
-
"author": self.author,
|
|
35
|
-
"date": self.date,
|
|
36
|
-
"power_before": round(self.power_before, 4),
|
|
37
|
-
"power_after": round(self.power_after, 4),
|
|
38
|
-
"power_increase": round(self.power_increase, 2),
|
|
39
|
-
"energy_before": round(self.energy_before, 4),
|
|
40
|
-
"energy_after": round(self.energy_after, 4),
|
|
41
|
-
"is_regression": self.is_regression,
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
class PowerRegressionDetector:
|
|
46
|
-
# Detect commits that caused power consumption regressions.
|
|
47
|
-
# Runs a test command at each commit and measures energy usage.
|
|
48
|
-
|
|
49
|
-
def __init__(
|
|
50
|
-
self,
|
|
51
|
-
test_command: str = "pytest tests/ -x",
|
|
52
|
-
energy_backend: str = "rapl",
|
|
53
|
-
threshold_percent: float = 5.0,
|
|
54
|
-
iterations: int = 5,
|
|
55
|
-
warmup_iterations: int = 1,
|
|
56
|
-
):
|
|
57
|
-
# Initialize power regression detector.
|
|
58
|
-
# Args:
|
|
59
|
-
# test_command: Shell command to run for energy measurement
|
|
60
|
-
# energy_backend: Energy measurement backend (rapl, codecarbon, cpu_meter)
|
|
61
|
-
# threshold_percent: Minimum percentage increase to flag as regression
|
|
62
|
-
# iterations: Number of measurement iterations per commit (for accuracy)
|
|
63
|
-
# warmup_iterations: Number of warmup runs before measurement
|
|
64
|
-
self.test_command = test_command
|
|
65
|
-
self.energy_backend = energy_backend
|
|
66
|
-
self.threshold_percent = threshold_percent
|
|
67
|
-
self.iterations = iterations
|
|
68
|
-
self.warmup_iterations = warmup_iterations
|
|
69
|
-
self._meter = None
|
|
70
|
-
|
|
71
|
-
def _get_energy_meter(self):
|
|
72
|
-
# Get energy meter instance.
|
|
73
|
-
if self._meter is None:
|
|
74
|
-
from greenmining.energy.base import get_energy_meter
|
|
75
|
-
|
|
76
|
-
self._meter = get_energy_meter(self.energy_backend)
|
|
77
|
-
return self._meter
|
|
78
|
-
|
|
79
|
-
def _run_test_command(self, cwd: str) -> float:
|
|
80
|
-
# Run test command and return energy consumed in joules.
|
|
81
|
-
meter = self._get_energy_meter()
|
|
82
|
-
|
|
83
|
-
# Warmup
|
|
84
|
-
for _ in range(self.warmup_iterations):
|
|
85
|
-
subprocess.run(
|
|
86
|
-
self.test_command,
|
|
87
|
-
shell=True,
|
|
88
|
-
cwd=cwd,
|
|
89
|
-
capture_output=True,
|
|
90
|
-
text=True,
|
|
91
|
-
timeout=300,
|
|
92
|
-
)
|
|
93
|
-
|
|
94
|
-
# Measure
|
|
95
|
-
total_joules = 0.0
|
|
96
|
-
for _ in range(self.iterations):
|
|
97
|
-
meter.start()
|
|
98
|
-
subprocess.run(
|
|
99
|
-
self.test_command,
|
|
100
|
-
shell=True,
|
|
101
|
-
cwd=cwd,
|
|
102
|
-
capture_output=True,
|
|
103
|
-
text=True,
|
|
104
|
-
timeout=300,
|
|
105
|
-
)
|
|
106
|
-
metrics = meter.stop()
|
|
107
|
-
total_joules += metrics.joules
|
|
108
|
-
|
|
109
|
-
return total_joules / self.iterations
|
|
110
|
-
|
|
111
|
-
def detect(
|
|
112
|
-
self,
|
|
113
|
-
repo_path: str,
|
|
114
|
-
baseline_commit: str = "HEAD~10",
|
|
115
|
-
target_commit: str = "HEAD",
|
|
116
|
-
max_commits: int = 50,
|
|
117
|
-
) -> List[PowerRegression]:
|
|
118
|
-
# Detect power regressions between baseline and target commits.
|
|
119
|
-
# Args:
|
|
120
|
-
# repo_path: Path to local git repository
|
|
121
|
-
# baseline_commit: Baseline commit SHA or reference
|
|
122
|
-
# target_commit: Target commit SHA or reference
|
|
123
|
-
# max_commits: Maximum commits to analyze
|
|
124
|
-
regressions = []
|
|
125
|
-
|
|
126
|
-
colored_print(f"Detecting power regressions in {repo_path}", "cyan")
|
|
127
|
-
colored_print(f" Range: {baseline_commit}..{target_commit}", "cyan")
|
|
128
|
-
colored_print(f" Test: {self.test_command}", "cyan")
|
|
129
|
-
colored_print(f" Threshold: {self.threshold_percent}%", "cyan")
|
|
130
|
-
|
|
131
|
-
# Get commits in range
|
|
132
|
-
commits = list(
|
|
133
|
-
Repository(
|
|
134
|
-
path_to_repo=repo_path,
|
|
135
|
-
from_commit=baseline_commit,
|
|
136
|
-
to_commit=target_commit,
|
|
137
|
-
).traverse_commits()
|
|
138
|
-
)
|
|
139
|
-
|
|
140
|
-
if not commits:
|
|
141
|
-
colored_print("No commits found in range", "yellow")
|
|
142
|
-
return regressions
|
|
143
|
-
|
|
144
|
-
# Measure baseline
|
|
145
|
-
colored_print(f" Measuring baseline ({commits[0].hash[:8]})...", "cyan")
|
|
146
|
-
self._checkout(repo_path, commits[0].hash)
|
|
147
|
-
baseline_energy = self._run_test_command(repo_path)
|
|
148
|
-
colored_print(f" Baseline: {baseline_energy:.4f} joules", "green")
|
|
149
|
-
|
|
150
|
-
previous_energy = baseline_energy
|
|
151
|
-
commit_count = 0
|
|
152
|
-
|
|
153
|
-
for commit in commits[1:]:
|
|
154
|
-
if commit_count >= max_commits:
|
|
155
|
-
break
|
|
156
|
-
|
|
157
|
-
try:
|
|
158
|
-
self._checkout(repo_path, commit.hash)
|
|
159
|
-
current_energy = self._run_test_command(repo_path)
|
|
160
|
-
|
|
161
|
-
# Calculate change
|
|
162
|
-
if previous_energy > 0:
|
|
163
|
-
change_percent = ((current_energy - previous_energy) / previous_energy) * 100
|
|
164
|
-
else:
|
|
165
|
-
change_percent = 0.0
|
|
166
|
-
|
|
167
|
-
# Check for regression
|
|
168
|
-
if change_percent > self.threshold_percent:
|
|
169
|
-
regression = PowerRegression(
|
|
170
|
-
sha=commit.hash,
|
|
171
|
-
message=commit.msg[:200],
|
|
172
|
-
author=commit.author.name,
|
|
173
|
-
date=commit.author_date.isoformat() if commit.author_date else "",
|
|
174
|
-
power_before=previous_energy / max(1, self.iterations),
|
|
175
|
-
power_after=current_energy / max(1, self.iterations),
|
|
176
|
-
power_increase=change_percent,
|
|
177
|
-
energy_before=previous_energy,
|
|
178
|
-
energy_after=current_energy,
|
|
179
|
-
)
|
|
180
|
-
regressions.append(regression)
|
|
181
|
-
colored_print(f" REGRESSION: {commit.hash[:8]} +{change_percent:.1f}%", "red")
|
|
182
|
-
else:
|
|
183
|
-
colored_print(f" OK: {commit.hash[:8]} {change_percent:+.1f}%", "green")
|
|
184
|
-
|
|
185
|
-
previous_energy = current_energy
|
|
186
|
-
commit_count += 1
|
|
187
|
-
|
|
188
|
-
except Exception as e:
|
|
189
|
-
colored_print(f" Warning: Failed on {commit.hash[:8]}: {e}", "yellow")
|
|
190
|
-
continue
|
|
191
|
-
|
|
192
|
-
# Restore to target
|
|
193
|
-
self._checkout(repo_path, target_commit)
|
|
194
|
-
|
|
195
|
-
colored_print(
|
|
196
|
-
f"\nFound {len(regressions)} power regressions "
|
|
197
|
-
f"(>{self.threshold_percent}% increase)",
|
|
198
|
-
"cyan" if not regressions else "red",
|
|
199
|
-
)
|
|
200
|
-
|
|
201
|
-
return regressions
|
|
202
|
-
|
|
203
|
-
@staticmethod
|
|
204
|
-
def _checkout(repo_path: str, ref: str):
|
|
205
|
-
# Checkout a specific commit.
|
|
206
|
-
subprocess.run(
|
|
207
|
-
["git", "checkout", ref, "--quiet"],
|
|
208
|
-
cwd=repo_path,
|
|
209
|
-
capture_output=True,
|
|
210
|
-
text=True,
|
|
211
|
-
)
|
|
@@ -1,394 +0,0 @@
|
|
|
1
|
-
# Qualitative Analysis Framework for Pattern Validation
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
import random
|
|
6
|
-
from typing import Dict, List, Optional
|
|
7
|
-
from dataclasses import dataclass
|
|
8
|
-
from collections import defaultdict
|
|
9
|
-
import json
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
@dataclass
|
|
13
|
-
class ValidationSample:
|
|
14
|
-
# Represents a single validation sample
|
|
15
|
-
|
|
16
|
-
commit_sha: str
|
|
17
|
-
commit_message: str
|
|
18
|
-
code_diff: Optional[str]
|
|
19
|
-
repository: str
|
|
20
|
-
detected_patterns: List[str]
|
|
21
|
-
detection_method: str # 'keyword', 'nlp', 'code_diff'
|
|
22
|
-
validation_status: Optional[str] = None # 'pending', 'validated', 'rejected'
|
|
23
|
-
true_label: Optional[bool] = None # Ground truth after manual review
|
|
24
|
-
reviewer: Optional[str] = None
|
|
25
|
-
review_notes: Optional[str] = None
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
@dataclass
|
|
29
|
-
class ValidationMetrics:
|
|
30
|
-
# Precision/recall metrics for validation
|
|
31
|
-
|
|
32
|
-
true_positives: int
|
|
33
|
-
false_positives: int
|
|
34
|
-
true_negatives: int
|
|
35
|
-
false_negatives: int
|
|
36
|
-
precision: float
|
|
37
|
-
recall: float
|
|
38
|
-
f1_score: float
|
|
39
|
-
accuracy: float
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
class QualitativeAnalyzer:
|
|
43
|
-
# Framework for manual validation and qualitative analysis.
|
|
44
|
-
|
|
45
|
-
def __init__(self, sample_size: int = 30, stratify_by: str = "pattern"):
|
|
46
|
-
# Initialize qualitative analyzer.
|
|
47
|
-
self.sample_size = sample_size
|
|
48
|
-
self.stratify_by = stratify_by
|
|
49
|
-
self.samples: List[ValidationSample] = []
|
|
50
|
-
|
|
51
|
-
def generate_validation_samples(
|
|
52
|
-
self, commits: List[Dict], analysis_results: List[Dict], include_negatives: bool = True
|
|
53
|
-
) -> List[ValidationSample]:
|
|
54
|
-
# Generate stratified validation samples.
|
|
55
|
-
# Build commit lookup
|
|
56
|
-
commit_lookup = {c.get("hash", c.get("sha")): c for c in commits}
|
|
57
|
-
|
|
58
|
-
# Separate positives (detected as green) and negatives
|
|
59
|
-
positives = [r for r in analysis_results if r.get("is_green_aware", False)]
|
|
60
|
-
negatives = [r for r in analysis_results if not r.get("is_green_aware", False)]
|
|
61
|
-
|
|
62
|
-
samples = []
|
|
63
|
-
|
|
64
|
-
# Calculate sample distribution
|
|
65
|
-
if include_negatives:
|
|
66
|
-
# 80% positives, 20% negatives (to check false negatives)
|
|
67
|
-
pos_sample_size = int(self.sample_size * 0.8)
|
|
68
|
-
neg_sample_size = self.sample_size - pos_sample_size
|
|
69
|
-
else:
|
|
70
|
-
pos_sample_size = self.sample_size
|
|
71
|
-
neg_sample_size = 0
|
|
72
|
-
|
|
73
|
-
# Sample positives (stratified by pattern or repository)
|
|
74
|
-
if self.stratify_by == "pattern":
|
|
75
|
-
pos_samples = self._stratified_sample_by_pattern(positives, pos_sample_size)
|
|
76
|
-
elif self.stratify_by == "repository":
|
|
77
|
-
pos_samples = self._stratified_sample_by_repo(positives, commit_lookup, pos_sample_size)
|
|
78
|
-
else:
|
|
79
|
-
pos_samples = random.sample(positives, min(pos_sample_size, len(positives)))
|
|
80
|
-
|
|
81
|
-
# Sample negatives (random)
|
|
82
|
-
if include_negatives and negatives:
|
|
83
|
-
neg_samples = random.sample(negatives, min(neg_sample_size, len(negatives)))
|
|
84
|
-
else:
|
|
85
|
-
neg_samples = []
|
|
86
|
-
|
|
87
|
-
# Create ValidationSample objects
|
|
88
|
-
for result in pos_samples + neg_samples:
|
|
89
|
-
commit_sha = result.get("commit_sha")
|
|
90
|
-
commit = commit_lookup.get(commit_sha, {})
|
|
91
|
-
|
|
92
|
-
sample = ValidationSample(
|
|
93
|
-
commit_sha=commit_sha,
|
|
94
|
-
commit_message=commit.get("message", result.get("commit_message", "")),
|
|
95
|
-
code_diff=result.get("code_diff"),
|
|
96
|
-
repository=commit.get("repository", result.get("repository", "")),
|
|
97
|
-
detected_patterns=result.get("patterns_detected", []),
|
|
98
|
-
detection_method=result.get("detection_method", "keyword"),
|
|
99
|
-
validation_status="pending",
|
|
100
|
-
)
|
|
101
|
-
samples.append(sample)
|
|
102
|
-
|
|
103
|
-
self.samples = samples
|
|
104
|
-
return samples
|
|
105
|
-
|
|
106
|
-
def _stratified_sample_by_pattern(self, results: List[Dict], sample_size: int) -> List[Dict]:
|
|
107
|
-
# Stratified sampling ensuring each pattern category is represented.
|
|
108
|
-
# Group by dominant pattern
|
|
109
|
-
pattern_groups = defaultdict(list)
|
|
110
|
-
for result in results:
|
|
111
|
-
patterns = result.get("patterns_detected", [])
|
|
112
|
-
if patterns:
|
|
113
|
-
# Use first pattern as primary
|
|
114
|
-
primary_pattern = patterns[0]
|
|
115
|
-
pattern_groups[primary_pattern].append(result)
|
|
116
|
-
|
|
117
|
-
# Calculate samples per pattern (proportional)
|
|
118
|
-
total = len(results)
|
|
119
|
-
samples = []
|
|
120
|
-
|
|
121
|
-
for pattern, group in pattern_groups.items():
|
|
122
|
-
proportion = len(group) / total
|
|
123
|
-
pattern_sample_size = max(1, int(sample_size * proportion))
|
|
124
|
-
pattern_samples = random.sample(group, min(pattern_sample_size, len(group)))
|
|
125
|
-
samples.extend(pattern_samples)
|
|
126
|
-
|
|
127
|
-
# If we have fewer than sample_size, add random extras
|
|
128
|
-
if len(samples) < sample_size and len(samples) < len(results):
|
|
129
|
-
remaining = [r for r in results if r not in samples]
|
|
130
|
-
extra_needed = min(sample_size - len(samples), len(remaining))
|
|
131
|
-
samples.extend(random.sample(remaining, extra_needed))
|
|
132
|
-
|
|
133
|
-
return samples[:sample_size]
|
|
134
|
-
|
|
135
|
-
def _stratified_sample_by_repo(
|
|
136
|
-
self, results: List[Dict], commit_lookup: Dict, sample_size: int
|
|
137
|
-
) -> List[Dict]:
|
|
138
|
-
# Stratified sampling ensuring each repository is represented.
|
|
139
|
-
# Group by repository
|
|
140
|
-
repo_groups = defaultdict(list)
|
|
141
|
-
for result in results:
|
|
142
|
-
commit_sha = result.get("commit_sha")
|
|
143
|
-
commit = commit_lookup.get(commit_sha, {})
|
|
144
|
-
repo = commit.get("repository", result.get("repository", "unknown"))
|
|
145
|
-
repo_groups[repo].append(result)
|
|
146
|
-
|
|
147
|
-
# Sample proportionally from each repo
|
|
148
|
-
samples = []
|
|
149
|
-
total = len(results)
|
|
150
|
-
|
|
151
|
-
for repo, group in repo_groups.items():
|
|
152
|
-
proportion = len(group) / total
|
|
153
|
-
repo_sample_size = max(1, int(sample_size * proportion))
|
|
154
|
-
repo_samples = random.sample(group, min(repo_sample_size, len(group)))
|
|
155
|
-
samples.extend(repo_samples)
|
|
156
|
-
|
|
157
|
-
return samples[:sample_size]
|
|
158
|
-
|
|
159
|
-
def export_samples_for_review(self, output_path: str) -> None:
|
|
160
|
-
# Export validation samples to JSON for manual review.
|
|
161
|
-
samples_data = []
|
|
162
|
-
for i, sample in enumerate(self.samples, 1):
|
|
163
|
-
samples_data.append(
|
|
164
|
-
{
|
|
165
|
-
"sample_id": i,
|
|
166
|
-
"commit_sha": sample.commit_sha,
|
|
167
|
-
"repository": sample.repository,
|
|
168
|
-
"commit_message": sample.commit_message,
|
|
169
|
-
"detected_patterns": sample.detected_patterns,
|
|
170
|
-
"detection_method": sample.detection_method,
|
|
171
|
-
"code_diff_preview": sample.code_diff[:500] if sample.code_diff else None,
|
|
172
|
-
"validation_status": sample.validation_status,
|
|
173
|
-
"true_label": sample.true_label,
|
|
174
|
-
"reviewer": sample.reviewer,
|
|
175
|
-
"review_notes": sample.review_notes,
|
|
176
|
-
"___INSTRUCTIONS___": "Set true_label to true/false, add reviewer name, add review_notes",
|
|
177
|
-
}
|
|
178
|
-
)
|
|
179
|
-
|
|
180
|
-
with open(output_path, "w") as f:
|
|
181
|
-
json.dump(samples_data, f, indent=2)
|
|
182
|
-
|
|
183
|
-
def import_validated_samples(self, input_path: str) -> None:
|
|
184
|
-
# Import manually validated samples from JSON.
|
|
185
|
-
with open(input_path, "r") as f:
|
|
186
|
-
samples_data = json.load(f)
|
|
187
|
-
|
|
188
|
-
# Update samples with validation results
|
|
189
|
-
for data in samples_data:
|
|
190
|
-
commit_sha = data["commit_sha"]
|
|
191
|
-
|
|
192
|
-
# Find matching sample
|
|
193
|
-
for sample in self.samples:
|
|
194
|
-
if sample.commit_sha == commit_sha:
|
|
195
|
-
sample.true_label = data.get("true_label")
|
|
196
|
-
sample.reviewer = data.get("reviewer")
|
|
197
|
-
sample.review_notes = data.get("review_notes")
|
|
198
|
-
sample.validation_status = (
|
|
199
|
-
"validated" if sample.true_label is not None else "pending"
|
|
200
|
-
)
|
|
201
|
-
break
|
|
202
|
-
|
|
203
|
-
def calculate_metrics(self) -> ValidationMetrics:
|
|
204
|
-
# Calculate precision, recall, F1, and accuracy.
|
|
205
|
-
# Count outcomes
|
|
206
|
-
tp = 0 # True positive: detected as green, truly green
|
|
207
|
-
fp = 0 # False positive: detected as green, not green
|
|
208
|
-
tn = 0 # True negative: not detected, truly not green
|
|
209
|
-
fn = 0 # False negative: not detected, but is green
|
|
210
|
-
|
|
211
|
-
for sample in self.samples:
|
|
212
|
-
if sample.true_label is None:
|
|
213
|
-
continue # Skip unvalidated samples
|
|
214
|
-
|
|
215
|
-
detected_as_green = len(sample.detected_patterns) > 0
|
|
216
|
-
truly_green = sample.true_label
|
|
217
|
-
|
|
218
|
-
if detected_as_green and truly_green:
|
|
219
|
-
tp += 1
|
|
220
|
-
elif detected_as_green and not truly_green:
|
|
221
|
-
fp += 1
|
|
222
|
-
elif not detected_as_green and not truly_green:
|
|
223
|
-
tn += 1
|
|
224
|
-
elif not detected_as_green and truly_green:
|
|
225
|
-
fn += 1
|
|
226
|
-
|
|
227
|
-
# Calculate metrics
|
|
228
|
-
total = tp + fp + tn + fn
|
|
229
|
-
precision = tp / (tp + fp) if (tp + fp) > 0 else 0
|
|
230
|
-
recall = tp / (tp + fn) if (tp + fn) > 0 else 0
|
|
231
|
-
f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
|
|
232
|
-
accuracy = (tp + tn) / total if total > 0 else 0
|
|
233
|
-
|
|
234
|
-
return ValidationMetrics(
|
|
235
|
-
true_positives=tp,
|
|
236
|
-
false_positives=fp,
|
|
237
|
-
true_negatives=tn,
|
|
238
|
-
false_negatives=fn,
|
|
239
|
-
precision=round(precision, 4),
|
|
240
|
-
recall=round(recall, 4),
|
|
241
|
-
f1_score=round(f1, 4),
|
|
242
|
-
accuracy=round(accuracy, 4),
|
|
243
|
-
)
|
|
244
|
-
|
|
245
|
-
def get_validation_report(self) -> Dict:
|
|
246
|
-
# Generate comprehensive validation report.
|
|
247
|
-
validated_count = sum(1 for s in self.samples if s.validation_status == "validated")
|
|
248
|
-
pending_count = sum(1 for s in self.samples if s.validation_status == "pending")
|
|
249
|
-
|
|
250
|
-
metrics = self.calculate_metrics() if validated_count > 0 else None
|
|
251
|
-
|
|
252
|
-
# Analyze false positives and false negatives
|
|
253
|
-
false_positives = [
|
|
254
|
-
{
|
|
255
|
-
"commit_sha": s.commit_sha,
|
|
256
|
-
"detected_patterns": s.detected_patterns,
|
|
257
|
-
"review_notes": s.review_notes,
|
|
258
|
-
}
|
|
259
|
-
for s in self.samples
|
|
260
|
-
if s.true_label is not None and len(s.detected_patterns) > 0 and not s.true_label
|
|
261
|
-
]
|
|
262
|
-
|
|
263
|
-
false_negatives = [
|
|
264
|
-
{
|
|
265
|
-
"commit_sha": s.commit_sha,
|
|
266
|
-
"commit_message": s.commit_message[:100],
|
|
267
|
-
"review_notes": s.review_notes,
|
|
268
|
-
}
|
|
269
|
-
for s in self.samples
|
|
270
|
-
if s.true_label is not None and len(s.detected_patterns) == 0 and s.true_label
|
|
271
|
-
]
|
|
272
|
-
|
|
273
|
-
# Pattern accuracy breakdown
|
|
274
|
-
pattern_accuracy = self._analyze_pattern_accuracy()
|
|
275
|
-
|
|
276
|
-
return {
|
|
277
|
-
"sampling": {
|
|
278
|
-
"total_samples": len(self.samples),
|
|
279
|
-
"validated_samples": validated_count,
|
|
280
|
-
"pending_samples": pending_count,
|
|
281
|
-
"validation_progress": (
|
|
282
|
-
round(validated_count / len(self.samples) * 100, 1) if self.samples else 0
|
|
283
|
-
),
|
|
284
|
-
"stratification_method": self.stratify_by,
|
|
285
|
-
},
|
|
286
|
-
"metrics": {
|
|
287
|
-
"precision": metrics.precision if metrics else None,
|
|
288
|
-
"recall": metrics.recall if metrics else None,
|
|
289
|
-
"f1_score": metrics.f1_score if metrics else None,
|
|
290
|
-
"accuracy": metrics.accuracy if metrics else None,
|
|
291
|
-
"true_positives": metrics.true_positives if metrics else None,
|
|
292
|
-
"false_positives": metrics.false_positives if metrics else None,
|
|
293
|
-
"true_negatives": metrics.true_negatives if metrics else None,
|
|
294
|
-
"false_negatives": metrics.false_negatives if metrics else None,
|
|
295
|
-
},
|
|
296
|
-
"error_analysis": {
|
|
297
|
-
"false_positive_count": len(false_positives),
|
|
298
|
-
"false_negative_count": len(false_negatives),
|
|
299
|
-
"false_positives": false_positives[:5], # Top 5
|
|
300
|
-
"false_negatives": false_negatives[:5], # Top 5
|
|
301
|
-
},
|
|
302
|
-
"pattern_accuracy": pattern_accuracy,
|
|
303
|
-
}
|
|
304
|
-
|
|
305
|
-
def _analyze_pattern_accuracy(self) -> Dict:
|
|
306
|
-
# Analyze accuracy per pattern category.
|
|
307
|
-
pattern_stats = defaultdict(lambda: {"tp": 0, "fp": 0})
|
|
308
|
-
|
|
309
|
-
for sample in self.samples:
|
|
310
|
-
if sample.true_label is None:
|
|
311
|
-
continue
|
|
312
|
-
|
|
313
|
-
for pattern in sample.detected_patterns:
|
|
314
|
-
if sample.true_label:
|
|
315
|
-
pattern_stats[pattern]["tp"] += 1
|
|
316
|
-
else:
|
|
317
|
-
pattern_stats[pattern]["fp"] += 1
|
|
318
|
-
|
|
319
|
-
# Calculate precision per pattern
|
|
320
|
-
pattern_accuracy = {}
|
|
321
|
-
for pattern, stats in pattern_stats.items():
|
|
322
|
-
total = stats["tp"] + stats["fp"]
|
|
323
|
-
precision = stats["tp"] / total if total > 0 else 0
|
|
324
|
-
pattern_accuracy[pattern] = {
|
|
325
|
-
"true_positives": stats["tp"],
|
|
326
|
-
"false_positives": stats["fp"],
|
|
327
|
-
"precision": round(precision, 4),
|
|
328
|
-
}
|
|
329
|
-
|
|
330
|
-
return pattern_accuracy
|
|
331
|
-
|
|
332
|
-
def get_inter_rater_reliability(
|
|
333
|
-
self,
|
|
334
|
-
samples_from_reviewer_a: List[ValidationSample],
|
|
335
|
-
samples_from_reviewer_b: List[ValidationSample],
|
|
336
|
-
) -> Dict:
|
|
337
|
-
# Calculate inter-rater reliability (Cohen's Kappa).
|
|
338
|
-
# Match samples by commit_sha
|
|
339
|
-
matched_samples = []
|
|
340
|
-
for sample_a in samples_from_reviewer_a:
|
|
341
|
-
for sample_b in samples_from_reviewer_b:
|
|
342
|
-
if sample_a.commit_sha == sample_b.commit_sha:
|
|
343
|
-
matched_samples.append((sample_a, sample_b))
|
|
344
|
-
break
|
|
345
|
-
|
|
346
|
-
if not matched_samples:
|
|
347
|
-
return {"error": "No matching samples between reviewers"}
|
|
348
|
-
|
|
349
|
-
# Calculate agreement
|
|
350
|
-
agreements = 0
|
|
351
|
-
for sample_a, sample_b in matched_samples:
|
|
352
|
-
if sample_a.true_label == sample_b.true_label:
|
|
353
|
-
agreements += 1
|
|
354
|
-
|
|
355
|
-
observed_agreement = agreements / len(matched_samples)
|
|
356
|
-
|
|
357
|
-
# Calculate expected agreement (by chance)
|
|
358
|
-
a_positive = sum(1 for s, _ in matched_samples if s.true_label)
|
|
359
|
-
b_positive = sum(1 for _, s in matched_samples if s.true_label)
|
|
360
|
-
n = len(matched_samples)
|
|
361
|
-
|
|
362
|
-
p_a_yes = a_positive / n
|
|
363
|
-
p_b_yes = b_positive / n
|
|
364
|
-
expected_agreement = (p_a_yes * p_b_yes) + ((1 - p_a_yes) * (1 - p_b_yes))
|
|
365
|
-
|
|
366
|
-
# Cohen's Kappa
|
|
367
|
-
kappa = (
|
|
368
|
-
(observed_agreement - expected_agreement) / (1 - expected_agreement)
|
|
369
|
-
if expected_agreement < 1
|
|
370
|
-
else 1
|
|
371
|
-
)
|
|
372
|
-
|
|
373
|
-
return {
|
|
374
|
-
"cohens_kappa": round(kappa, 4),
|
|
375
|
-
"observed_agreement": round(observed_agreement, 4),
|
|
376
|
-
"expected_agreement": round(expected_agreement, 4),
|
|
377
|
-
"sample_count": n,
|
|
378
|
-
"interpretation": self._interpret_kappa(kappa),
|
|
379
|
-
}
|
|
380
|
-
|
|
381
|
-
def _interpret_kappa(self, kappa: float) -> str:
|
|
382
|
-
# Interpret Cohen's Kappa value.
|
|
383
|
-
if kappa < 0:
|
|
384
|
-
return "Poor (less than chance)"
|
|
385
|
-
elif kappa < 0.20:
|
|
386
|
-
return "Slight"
|
|
387
|
-
elif kappa < 0.40:
|
|
388
|
-
return "Fair"
|
|
389
|
-
elif kappa < 0.60:
|
|
390
|
-
return "Moderate"
|
|
391
|
-
elif kappa < 0.80:
|
|
392
|
-
return "Substantial"
|
|
393
|
-
else:
|
|
394
|
-
return "Almost perfect"
|