ecobertx 1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ecobertx-1.0/PKG-INFO +9 -0
- ecobertx-1.0/ecobertX/__init__.py +0 -0
- ecobertx-1.0/ecobertX/cli.py +18 -0
- ecobertx-1.0/ecobertX/explain/__init__.py +0 -0
- ecobertx-1.0/ecobertX/explain/causal_reasoning.py +42 -0
- ecobertx-1.0/ecobertX/explain/dual_interpreter.py +29 -0
- ecobertx-1.0/ecobertX/explain/predict_explain.py +211 -0
- ecobertx-1.0/ecobertX/observe/__init__.py +0 -0
- ecobertx-1.0/ecobertX/observe/file_logger.py +45 -0
- ecobertx-1.0/ecobertX/observe/telemetry_Setup.py +8 -0
- ecobertx-1.0/ecobertX/pipeline.py +82 -0
- ecobertx-1.0/ecobertX/run_prediction_xai.py +26 -0
- ecobertx-1.0/ecobertX/use_case.py +57 -0
- ecobertx-1.0/ecobertX.egg-info/PKG-INFO +9 -0
- ecobertx-1.0/ecobertX.egg-info/SOURCES.txt +64 -0
- ecobertx-1.0/ecobertX.egg-info/dependency_links.txt +1 -0
- ecobertx-1.0/ecobertX.egg-info/entry_points.txt +2 -0
- ecobertx-1.0/ecobertX.egg-info/requires.txt +5 -0
- ecobertx-1.0/ecobertX.egg-info/top_level.txt +2 -0
- ecobertx-1.0/pyproject.toml +3 -0
- ecobertx-1.0/setup.cfg +4 -0
- ecobertx-1.0/setup.py +19 -0
- ecobertx-1.0/src/__init__.py +0 -0
- ecobertx-1.0/src/data/__init__.py +0 -0
- ecobertx-1.0/src/data/make_dataset.py +74 -0
- ecobertx-1.0/src/data_collection/__init__.py +0 -0
- ecobertx-1.0/src/data_collection/main.py +75 -0
- ecobertx-1.0/src/data_collection/new.py +0 -0
- ecobertx-1.0/src/data_collection/run_experiment.py +92 -0
- ecobertx-1.0/src/data_collection/run_experiment_tpu.py +90 -0
- ecobertx-1.0/src/data_collection/run_single_heavy_Exp.py +108 -0
- ecobertx-1.0/src/modeling/__init__.py +0 -0
- ecobertx-1.0/src/modeling/evaluate.py +61 -0
- ecobertx-1.0/src/modeling/predict.py +105 -0
- ecobertx-1.0/src/modeling/predict_new_model.py +98 -0
- ecobertx-1.0/src/modeling/predict_trial.py +80 -0
- ecobertx-1.0/src/modeling/run_prediction_on_validation.py +154 -0
- ecobertx-1.0/src/modeling/tab_1.py +57 -0
- ecobertx-1.0/src/modeling/train_model.py +129 -0
- ecobertx-1.0/src/modeling/vi_test.py +100 -0
- ecobertx-1.0/src/modeling/vi_val.py +95 -0
- ecobertx-1.0/src/modeling/vis_table.py +67 -0
- ecobertx-1.0/src/modeling/visualise.py +189 -0
- ecobertx-1.0/src/modeling/visualise_all_models.py +81 -0
- ecobertx-1.0/src/modeling/visualise_test.py +96 -0
- ecobertx-1.0/src/modeling/visualise_validation.py +83 -0
- ecobertx-1.0/src/preprocessing/__init__.py +0 -0
- ecobertx-1.0/src/preprocessing/build_features.py +184 -0
ecobertx-1.0/PKG-INFO
ADDED
|
File without changes
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
from ecobertx.pipeline import run_ecobertx
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def main():
|
|
6
|
+
parser = argparse.ArgumentParser(
|
|
7
|
+
description="EcoBERT-X CO2 Optimization Tool"
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
parser.add_argument(
|
|
11
|
+
"--input",
|
|
12
|
+
required=True,
|
|
13
|
+
help="Path to input CSV file"
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
args = parser.parse_args()
|
|
17
|
+
|
|
18
|
+
run_ecobertx(args.input)
|
|
File without changes
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
def generate_causal_reason(feature, value, impact):
|
|
2
|
+
|
|
3
|
+
direction = "increased" if impact > 0 else "reduced"
|
|
4
|
+
|
|
5
|
+
explanations = {
|
|
6
|
+
|
|
7
|
+
"batch_size":
|
|
8
|
+
f"Batch size of {value} {direction} CO₂ because batch size directly controls GPU memory usage and parallel compute load. Smaller batch sizes require fewer GPU operations, reducing energy consumption.",
|
|
9
|
+
|
|
10
|
+
"max_sequence_length":
|
|
11
|
+
f"Sequence length of {value} {direction} CO₂ because longer sequences increase transformer attention computation complexity O(n²), increasing GPU energy usage.",
|
|
12
|
+
|
|
13
|
+
"log_model_parameters":
|
|
14
|
+
f"Model parameter scale {value} {direction} CO₂ because larger models require more matrix multiplications and GPU compute.",
|
|
15
|
+
|
|
16
|
+
"compute_log":
|
|
17
|
+
f"Compute workload level {value} {direction} CO₂ because compute intensity directly correlates with GPU power draw.",
|
|
18
|
+
|
|
19
|
+
"dataset_name":
|
|
20
|
+
f"Dataset '{value}' {direction} CO₂ due to differences in training complexity and convergence efficiency.",
|
|
21
|
+
|
|
22
|
+
"total_tokens":
|
|
23
|
+
f"Token count {value} {direction} CO₂ because more tokens require more forward and backward passes.",
|
|
24
|
+
|
|
25
|
+
"model_name":
|
|
26
|
+
f"Model architecture '{value}' {direction} CO₂ due to differences in computational efficiency.",
|
|
27
|
+
|
|
28
|
+
"num_epochs":
|
|
29
|
+
f"Training epochs {value} {direction} CO₂ because more epochs require repeated training cycles.",
|
|
30
|
+
|
|
31
|
+
"learning_rate":
|
|
32
|
+
f"Learning rate {value} {direction} CO₂ because inefficient learning rates increase training duration.",
|
|
33
|
+
|
|
34
|
+
"fp16":
|
|
35
|
+
f"FP16 precision {direction} CO₂ because lower precision reduces compute and memory load."
|
|
36
|
+
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
if feature in explanations:
|
|
40
|
+
return explanations[feature]
|
|
41
|
+
|
|
42
|
+
return f"{feature} {direction} CO₂ due to its influence on compute workload."
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from explain.causal_reasoning import generate_causal_reason
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def interpret_prediction_full(raw_row, explainer):
|
|
5
|
+
|
|
6
|
+
pred = explainer.predict(raw_row)
|
|
7
|
+
|
|
8
|
+
explanation = explainer.explain_prediction_detailed(raw_row)
|
|
9
|
+
|
|
10
|
+
confidence = explainer.confidence_score(raw_row)
|
|
11
|
+
|
|
12
|
+
print("\n===================================")
|
|
13
|
+
print("EcoBERT-X MECHANISTIC EXPLANATION")
|
|
14
|
+
print("===================================\n")
|
|
15
|
+
|
|
16
|
+
print(f"Predicted CO₂: {pred:.6f} kg")
|
|
17
|
+
print(f"Confidence Score: {confidence:.3f}\n")
|
|
18
|
+
|
|
19
|
+
print("Causal Explanation:\n")
|
|
20
|
+
|
|
21
|
+
for e in explanation[:10]:
|
|
22
|
+
|
|
23
|
+
reason = generate_causal_reason(
|
|
24
|
+
e["feature"],
|
|
25
|
+
e["value"],
|
|
26
|
+
e["impact"]
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
print("•", reason)
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
import joblib
|
|
2
|
+
import shap
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import numpy as np
|
|
5
|
+
import os
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class PredictionExplainer:
|
|
9
|
+
|
|
10
|
+
def __init__(self, project_root):
|
|
11
|
+
|
|
12
|
+
print("EcoBERT-X Prediction Explainer initialized.")
|
|
13
|
+
|
|
14
|
+
self.models_path = os.path.join(project_root, "models")
|
|
15
|
+
|
|
16
|
+
# Load trained model
|
|
17
|
+
self.model = joblib.load(
|
|
18
|
+
os.path.join(self.models_path, "best_model.joblib")
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
# Load preprocessor
|
|
22
|
+
self.preprocessor = joblib.load(
|
|
23
|
+
os.path.join(self.models_path, "preprocessor.joblib")
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
# Load training data sample as SHAP background
|
|
27
|
+
try:
|
|
28
|
+
background = joblib.load(
|
|
29
|
+
os.path.join(self.models_path, "X_background.joblib")
|
|
30
|
+
)
|
|
31
|
+
except:
|
|
32
|
+
background = None
|
|
33
|
+
|
|
34
|
+
# Create SHAP explainer safely
|
|
35
|
+
if background is not None:
|
|
36
|
+
background_transformed = self.preprocessor.transform(background)
|
|
37
|
+
self.explainer = shap.TreeExplainer(
|
|
38
|
+
self.model,
|
|
39
|
+
data=background_transformed,
|
|
40
|
+
feature_perturbation="interventional"
|
|
41
|
+
)
|
|
42
|
+
else:
|
|
43
|
+
self.explainer = shap.TreeExplainer(
|
|
44
|
+
self.model,
|
|
45
|
+
feature_perturbation="auto"
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
# Load target normalization
|
|
49
|
+
self.y_mean = np.load(
|
|
50
|
+
os.path.join(self.models_path, "target_mean.npy")
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
self.y_std = np.load(
|
|
54
|
+
os.path.join(self.models_path, "target_std.npy")
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# ---------------------------------------
|
|
59
|
+
# CLEAN INPUT
|
|
60
|
+
# ---------------------------------------
|
|
61
|
+
|
|
62
|
+
def clean_row(self, raw_row):
|
|
63
|
+
|
|
64
|
+
row = raw_row.copy()
|
|
65
|
+
|
|
66
|
+
row = row.drop(
|
|
67
|
+
labels=[
|
|
68
|
+
c for c in row.index
|
|
69
|
+
if "Unnamed" in str(c)
|
|
70
|
+
],
|
|
71
|
+
errors="ignore"
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
return row
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
# ---------------------------------------
|
|
78
|
+
# SAFE SHAP VALUES
|
|
79
|
+
# ---------------------------------------
|
|
80
|
+
|
|
81
|
+
def get_shap_values(self, X):
|
|
82
|
+
|
|
83
|
+
return self.explainer.shap_values(
|
|
84
|
+
X,
|
|
85
|
+
check_additivity=False
|
|
86
|
+
)[0]
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
# ---------------------------------------
|
|
90
|
+
# PREDICT CO2
|
|
91
|
+
# ---------------------------------------
|
|
92
|
+
|
|
93
|
+
def predict(self, raw_row):
|
|
94
|
+
|
|
95
|
+
row = self.clean_row(raw_row)
|
|
96
|
+
|
|
97
|
+
X = self.preprocessor.transform(
|
|
98
|
+
pd.DataFrame([row])
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
pred_norm = self.model.predict(X)[0]
|
|
102
|
+
|
|
103
|
+
pred_log = pred_norm * self.y_std + self.y_mean
|
|
104
|
+
|
|
105
|
+
pred = np.expm1(pred_log)
|
|
106
|
+
|
|
107
|
+
return float(max(pred, 0))
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
# ---------------------------------------
|
|
111
|
+
# DETAILED EXPLANATION
|
|
112
|
+
# ---------------------------------------
|
|
113
|
+
|
|
114
|
+
def explain_prediction_detailed(self, raw_row):
|
|
115
|
+
|
|
116
|
+
row = self.clean_row(raw_row)
|
|
117
|
+
|
|
118
|
+
X = self.preprocessor.transform(
|
|
119
|
+
pd.DataFrame([row])
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
shap_vals = self.get_shap_values(X)
|
|
123
|
+
|
|
124
|
+
explanation = []
|
|
125
|
+
|
|
126
|
+
for feature, shap_val in zip(row.index, shap_vals):
|
|
127
|
+
|
|
128
|
+
val = row[feature]
|
|
129
|
+
|
|
130
|
+
# Handle numeric and categorical safely
|
|
131
|
+
try:
|
|
132
|
+
val = float(val)
|
|
133
|
+
except:
|
|
134
|
+
val = str(val)
|
|
135
|
+
|
|
136
|
+
explanation.append({
|
|
137
|
+
|
|
138
|
+
"feature": feature,
|
|
139
|
+
|
|
140
|
+
"value": val,
|
|
141
|
+
|
|
142
|
+
"impact": float(shap_val),
|
|
143
|
+
|
|
144
|
+
"effect":
|
|
145
|
+
"increase"
|
|
146
|
+
if shap_val > 0
|
|
147
|
+
else "decrease",
|
|
148
|
+
|
|
149
|
+
"importance": abs(float(shap_val))
|
|
150
|
+
|
|
151
|
+
})
|
|
152
|
+
|
|
153
|
+
explanation.sort(
|
|
154
|
+
key=lambda x: x["importance"],
|
|
155
|
+
reverse=True
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
return explanation
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
# ---------------------------------------
|
|
162
|
+
# CONFIDENCE SCORE
|
|
163
|
+
# ---------------------------------------
|
|
164
|
+
|
|
165
|
+
def confidence_score(self, raw_row):
|
|
166
|
+
|
|
167
|
+
row = self.clean_row(raw_row)
|
|
168
|
+
|
|
169
|
+
X = self.preprocessor.transform(
|
|
170
|
+
pd.DataFrame([row])
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
shap_vals = self.get_shap_values(X)
|
|
174
|
+
|
|
175
|
+
score = np.mean(np.abs(shap_vals))
|
|
176
|
+
|
|
177
|
+
confidence = score / (score + 1)
|
|
178
|
+
|
|
179
|
+
return float(confidence)
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
# ---------------------------------------
|
|
183
|
+
# MECHANISTIC TRACE
|
|
184
|
+
# ---------------------------------------
|
|
185
|
+
|
|
186
|
+
def mechanistic_trace(self, raw_row):
|
|
187
|
+
|
|
188
|
+
explanation = self.explain_prediction_detailed(raw_row)
|
|
189
|
+
|
|
190
|
+
trace = []
|
|
191
|
+
|
|
192
|
+
cumulative = 0
|
|
193
|
+
|
|
194
|
+
for e in explanation:
|
|
195
|
+
|
|
196
|
+
cumulative += e["impact"]
|
|
197
|
+
|
|
198
|
+
trace.append({
|
|
199
|
+
|
|
200
|
+
"feature": e["feature"],
|
|
201
|
+
|
|
202
|
+
"value": e["value"],
|
|
203
|
+
|
|
204
|
+
"impact": e["impact"],
|
|
205
|
+
|
|
206
|
+
"cumulative_effect": cumulative
|
|
207
|
+
|
|
208
|
+
})
|
|
209
|
+
return trace
|
|
210
|
+
def get_model(self):
|
|
211
|
+
return self.model
|
|
File without changes
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
LOG_DIR = "logs"
|
|
7
|
+
|
|
8
|
+
os.makedirs(LOG_DIR, exist_ok=True)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def save_prediction_log(config, prediction, explanation, confidence, trace):
|
|
12
|
+
|
|
13
|
+
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
|
14
|
+
|
|
15
|
+
record = {
|
|
16
|
+
|
|
17
|
+
"timestamp": timestamp,
|
|
18
|
+
|
|
19
|
+
"input": config,
|
|
20
|
+
|
|
21
|
+
"prediction": prediction,
|
|
22
|
+
|
|
23
|
+
"confidence": confidence,
|
|
24
|
+
|
|
25
|
+
"explanation": explanation,
|
|
26
|
+
|
|
27
|
+
"mechanistic_trace": trace
|
|
28
|
+
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
path = os.path.join(
|
|
32
|
+
|
|
33
|
+
LOG_DIR,
|
|
34
|
+
|
|
35
|
+
f"log_{timestamp}.json"
|
|
36
|
+
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
with open(path, "w") as f:
|
|
40
|
+
|
|
41
|
+
json.dump(record, f, indent=4)
|
|
42
|
+
|
|
43
|
+
print("Log saved:", path)
|
|
44
|
+
|
|
45
|
+
return path
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
from ecobertx.explain.predict_explain import PredictionExplainer
|
|
2
|
+
from ecobertx.optimisation.optuna_optimiser import OptunaOptimizer
|
|
3
|
+
from ecobertx.optimisation.advanced_optimizer import AdvancedEcoOptimizer
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class EcoBERTXPipeline:
|
|
7
|
+
def __init__(self, model_path=None):
|
|
8
|
+
print("EcoBERT-X Prediction Explainer initialized.")
|
|
9
|
+
|
|
10
|
+
self.explainer = PredictionExplainer(model_path)
|
|
11
|
+
self.predictor = self.explainer
|
|
12
|
+
|
|
13
|
+
self.feature_cols = [
|
|
14
|
+
"num_epochs",
|
|
15
|
+
"batch_size",
|
|
16
|
+
"num_train_samples",
|
|
17
|
+
"model_size",
|
|
18
|
+
"num_layers"
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
def _safe_validation(self, explanation):
|
|
22
|
+
if isinstance(explanation, list):
|
|
23
|
+
explanation = explanation[0]
|
|
24
|
+
|
|
25
|
+
if not isinstance(explanation, dict):
|
|
26
|
+
return {}
|
|
27
|
+
|
|
28
|
+
return explanation.get("validation", {})
|
|
29
|
+
|
|
30
|
+
def run(self, row):
|
|
31
|
+
print("\n🚀 Running EcoBERT-X Pipeline...\n")
|
|
32
|
+
|
|
33
|
+
original_co2 = float(self.predictor.predict(row))
|
|
34
|
+
original_explanation = self.explainer.explain_prediction_detailed(row)
|
|
35
|
+
|
|
36
|
+
print("📊 ORIGINAL")
|
|
37
|
+
print("CO2:", original_co2)
|
|
38
|
+
|
|
39
|
+
print("\n⚙️ Running Optuna Optimization...")
|
|
40
|
+
|
|
41
|
+
basic_optimizer = OptunaOptimizer(self.predictor, self.explainer)
|
|
42
|
+
best_params, _ = basic_optimizer.optimize(row, n_trials=20)
|
|
43
|
+
|
|
44
|
+
print("\n⚙️ BEST CONFIG FOUND")
|
|
45
|
+
print(best_params)
|
|
46
|
+
|
|
47
|
+
print("\n🧠 Running Advanced Eco Optimization...")
|
|
48
|
+
|
|
49
|
+
real_model = self.explainer.get_model()
|
|
50
|
+
|
|
51
|
+
import pandas as pd
|
|
52
|
+
background = pd.DataFrame([row])
|
|
53
|
+
|
|
54
|
+
adv_optimizer = AdvancedEcoOptimizer(
|
|
55
|
+
model=real_model,
|
|
56
|
+
feature_cols=self.feature_cols,
|
|
57
|
+
background_data=background
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
adv_result = adv_optimizer.run(row)
|
|
61
|
+
|
|
62
|
+
print("\n🌱 OPTIMIZED")
|
|
63
|
+
print("CO2:", adv_result["optimized_co2"])
|
|
64
|
+
|
|
65
|
+
print("\n📉 Improvement:", adv_result["reduction"])
|
|
66
|
+
|
|
67
|
+
print("\n🧠 CAUSAL EXPLANATION:")
|
|
68
|
+
for r in adv_result["causal_explanations"]:
|
|
69
|
+
print("•", r)
|
|
70
|
+
|
|
71
|
+
print("\n🧪 VALIDATION:")
|
|
72
|
+
print(adv_result["validation"])
|
|
73
|
+
|
|
74
|
+
return adv_result
|
|
75
|
+
def run_ecobertx(input_csv):
|
|
76
|
+
import pandas as pd
|
|
77
|
+
|
|
78
|
+
df = pd.read_csv(input_csv)
|
|
79
|
+
row = df.iloc[0]
|
|
80
|
+
|
|
81
|
+
pipeline = EcoBERTXPipeline("D:/EcoPredictor+")
|
|
82
|
+
return pipeline.run(row)
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from explain.predict_explain import PredictionExplainer
|
|
2
|
+
from explain.dual_interpreter import interpret_prediction_full
|
|
3
|
+
from visualize.heatmap import plot_heatmap
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
PROJECT_ROOT = r"D:\EcoPredictor+"
|
|
8
|
+
|
|
9
|
+
X_test = pd.read_csv(
|
|
10
|
+
r"D:\EcoPredictor+\data\processed\X_test_raw.csv"
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
xai = PredictionExplainer(PROJECT_ROOT)
|
|
14
|
+
|
|
15
|
+
row = X_test.iloc[0]
|
|
16
|
+
|
|
17
|
+
prediction = xai.predict(row)
|
|
18
|
+
|
|
19
|
+
explanation = xai.explain_prediction_detailed(row)
|
|
20
|
+
|
|
21
|
+
interpret_prediction_full(row, xai)
|
|
22
|
+
|
|
23
|
+
plot_heatmap(
|
|
24
|
+
explanation,
|
|
25
|
+
r"D:\EcoPredictor+\ecobertX\logs\heatmap.png"
|
|
26
|
+
)
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import glob
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
LOG_FOLDER = r"D:\EcoPredictor+\ecobertX\logs"
|
|
6
|
+
|
|
7
|
+
# ----------------------------------------------------
|
|
8
|
+
# 1. Load all prediction logs
|
|
9
|
+
# ----------------------------------------------------
|
|
10
|
+
files = glob.glob(os.path.join(LOG_FOLDER, "*.json"))
|
|
11
|
+
|
|
12
|
+
if not files:
|
|
13
|
+
print("No logs found!")
|
|
14
|
+
exit()
|
|
15
|
+
|
|
16
|
+
# ----------------------------------------------------
|
|
17
|
+
# 2. Select lowest-carbon configuration
|
|
18
|
+
# ----------------------------------------------------
|
|
19
|
+
best = None
|
|
20
|
+
|
|
21
|
+
for f in files:
|
|
22
|
+
log = json.load(open(f))
|
|
23
|
+
|
|
24
|
+
if not best or log["predicted_co2"] < best["predicted_co2"]:
|
|
25
|
+
best = log
|
|
26
|
+
best_file = f
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
print("\n🌱 ECOBERT-X LOG-BASED DECISION")
|
|
30
|
+
print("================================")
|
|
31
|
+
print("Selected log :", best_file)
|
|
32
|
+
print("Predicted CO₂:", round(best["predicted_co2"], 6), "kg")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
# ----------------------------------------------------
|
|
36
|
+
# 3. Explain WHY this was low carbon
|
|
37
|
+
# ----------------------------------------------------
|
|
38
|
+
print("\nKey Reasons from Log:")
|
|
39
|
+
|
|
40
|
+
for r in best["shap_explanation"][:5]:
|
|
41
|
+
|
|
42
|
+
direction = "reduces" if r["impact"] < 0 else "increases"
|
|
43
|
+
|
|
44
|
+
print(f"- {r['feature']} {direction} CO₂ by {abs(r['impact']):.4f} kg")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# ----------------------------------------------------
|
|
48
|
+
# 4. Practical Usage Demonstration
|
|
49
|
+
# ----------------------------------------------------
|
|
50
|
+
print("\nHow this log is used in practice:")
|
|
51
|
+
print("""
|
|
52
|
+
• Before real training begins, several candidate configs are evaluated.
|
|
53
|
+
• EcoBERT-X predicts CO₂ for each and stores JSON logs.
|
|
54
|
+
• This script selects the lowest-emission option.
|
|
55
|
+
• SHAP reasons explain which hyperparameters caused the change.
|
|
56
|
+
• The team trains only the sustainable configuration.
|
|
57
|
+
""")
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
pyproject.toml
|
|
2
|
+
setup.py
|
|
3
|
+
ecobertX/__init__.py
|
|
4
|
+
ecobertX/cli.py
|
|
5
|
+
ecobertX/pipeline.py
|
|
6
|
+
ecobertX/run_prediction_xai.py
|
|
7
|
+
ecobertX/use_case.py
|
|
8
|
+
ecobertX.egg-info/PKG-INFO
|
|
9
|
+
ecobertX.egg-info/SOURCES.txt
|
|
10
|
+
ecobertX.egg-info/dependency_links.txt
|
|
11
|
+
ecobertX.egg-info/entry_points.txt
|
|
12
|
+
ecobertX.egg-info/requires.txt
|
|
13
|
+
ecobertX.egg-info/top_level.txt
|
|
14
|
+
ecobertX/explain/__init__.py
|
|
15
|
+
ecobertX/explain/causal_reasoning.py
|
|
16
|
+
ecobertX/explain/dual_interpreter.py
|
|
17
|
+
ecobertX/explain/predict_explain.py
|
|
18
|
+
ecobertX/observe/__init__.py
|
|
19
|
+
ecobertX/observe/file_logger.py
|
|
20
|
+
ecobertX/observe/telemetry_Setup.py
|
|
21
|
+
ecobertx/__init__.py
|
|
22
|
+
ecobertx/cli.py
|
|
23
|
+
ecobertx/pipeline.py
|
|
24
|
+
ecobertx/run_prediction_xai.py
|
|
25
|
+
ecobertx/use_case.py
|
|
26
|
+
ecobertx.egg-info/PKG-INFO
|
|
27
|
+
ecobertx.egg-info/SOURCES.txt
|
|
28
|
+
ecobertx.egg-info/dependency_links.txt
|
|
29
|
+
ecobertx.egg-info/entry_points.txt
|
|
30
|
+
ecobertx.egg-info/requires.txt
|
|
31
|
+
ecobertx.egg-info/top_level.txt
|
|
32
|
+
ecobertx/explain/__init__.py
|
|
33
|
+
ecobertx/explain/causal_reasoning.py
|
|
34
|
+
ecobertx/explain/dual_interpreter.py
|
|
35
|
+
ecobertx/explain/predict_explain.py
|
|
36
|
+
ecobertx/observe/__init__.py
|
|
37
|
+
ecobertx/observe/file_logger.py
|
|
38
|
+
ecobertx/observe/telemetry_Setup.py
|
|
39
|
+
src/__init__.py
|
|
40
|
+
src/data/__init__.py
|
|
41
|
+
src/data/make_dataset.py
|
|
42
|
+
src/data_collection/__init__.py
|
|
43
|
+
src/data_collection/main.py
|
|
44
|
+
src/data_collection/new.py
|
|
45
|
+
src/data_collection/run_experiment.py
|
|
46
|
+
src/data_collection/run_experiment_tpu.py
|
|
47
|
+
src/data_collection/run_single_heavy_Exp.py
|
|
48
|
+
src/modeling/__init__.py
|
|
49
|
+
src/modeling/evaluate.py
|
|
50
|
+
src/modeling/predict.py
|
|
51
|
+
src/modeling/predict_new_model.py
|
|
52
|
+
src/modeling/predict_trial.py
|
|
53
|
+
src/modeling/run_prediction_on_validation.py
|
|
54
|
+
src/modeling/tab_1.py
|
|
55
|
+
src/modeling/train_model.py
|
|
56
|
+
src/modeling/vi_test.py
|
|
57
|
+
src/modeling/vi_val.py
|
|
58
|
+
src/modeling/vis_table.py
|
|
59
|
+
src/modeling/visualise.py
|
|
60
|
+
src/modeling/visualise_all_models.py
|
|
61
|
+
src/modeling/visualise_test.py
|
|
62
|
+
src/modeling/visualise_validation.py
|
|
63
|
+
src/preprocessing/__init__.py
|
|
64
|
+
src/preprocessing/build_features.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
ecobertx-1.0/setup.cfg
ADDED
ecobertx-1.0/setup.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from setuptools import setup, find_packages
|
|
2
|
+
|
|
3
|
+
setup(
|
|
4
|
+
name="ecobertx",
|
|
5
|
+
version="1.0",
|
|
6
|
+
packages=find_packages(),
|
|
7
|
+
install_requires=[
|
|
8
|
+
"pandas",
|
|
9
|
+
"numpy",
|
|
10
|
+
"scikit-learn",
|
|
11
|
+
"shap",
|
|
12
|
+
"matplotlib"
|
|
13
|
+
],
|
|
14
|
+
entry_points={
|
|
15
|
+
"console_scripts": [
|
|
16
|
+
"ecobertx=ecobertx.cli:main"
|
|
17
|
+
]
|
|
18
|
+
}
|
|
19
|
+
)
|
|
File without changes
|
|
File without changes
|