geocif 0.1.38__tar.gz → 0.1.40__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {geocif-0.1.38/geocif.egg-info → geocif-0.1.40}/PKG-INFO +1 -1
- {geocif-0.1.38 → geocif-0.1.40}/geocif/experiments.py +15 -11
- {geocif-0.1.38 → geocif-0.1.40}/geocif/geocif.py +3 -1
- {geocif-0.1.38 → geocif-0.1.40}/geocif/ml/feature_selection.py +47 -18
- {geocif-0.1.38 → geocif-0.1.40}/geocif/ml/xai.py +2 -1
- {geocif-0.1.38 → geocif-0.1.40/geocif.egg-info}/PKG-INFO +1 -1
- {geocif-0.1.38 → geocif-0.1.40}/setup.py +1 -1
- {geocif-0.1.38 → geocif-0.1.40}/LICENSE +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/MANIFEST.in +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/README.md +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif/__init__.py +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif/agmet/__init__.py +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif/agmet/geoagmet.py +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif/agmet/plot.py +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif/agmet/utils.py +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif/analysis.py +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif/backup/__init__.py +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif/backup/constants.py +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif/backup/features.py +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif/backup/geo.py +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif/backup/geocif.py +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif/backup/metadata.py +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif/backup/models.py +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif/cei/__init__.py +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif/cei/definitions.py +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif/cei/indices.py +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif/indices_runner.py +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif/indices_runner_v2.py +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif/logger.py +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif/ml/__init__.py +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif/ml/correlations.py +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif/ml/correlations_backup.py +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif/ml/embedding.py +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif/ml/feature_engineering.py +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif/ml/outliers.py +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif/ml/outlook.py +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif/ml/output.py +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif/ml/spatial_autocorrelation.py +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif/ml/stages.py +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif/ml/stats.py +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif/ml/trainers.py +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif/ml/trend.py +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif/playground/__init__.py +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif/playground/automl.py +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif/playground/misc.py +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif/utils.py +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif/viz/__init__.py +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif/viz/plot.py +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif.egg-info/SOURCES.txt +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif.egg-info/dependency_links.txt +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif.egg-info/not-zip-safe +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/geocif.egg-info/top_level.txt +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/requirements.txt +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/setup.cfg +0 -0
- {geocif-0.1.38 → geocif-0.1.40}/tests/test_geocif.py +0 -0
@@ -14,7 +14,11 @@ import warnings
|
|
14
14
|
warnings.simplefilter(action="ignore", category=FutureWarning)
|
15
15
|
|
16
16
|
|
17
|
-
def
|
17
|
+
def main(inputs, logger, parser, section, item, type, values):
|
18
|
+
# Set experiment_name
|
19
|
+
experiment_name = f"{section}_{item}"
|
20
|
+
parser.set("DEFAULT", "experiment_name", experiment_name)
|
21
|
+
|
18
22
|
if type == "str":
|
19
23
|
original_value = parser.get(section, item)
|
20
24
|
elif type == "bool":
|
@@ -41,7 +45,7 @@ def run(inputs, logger, parser, section, item, type, values):
|
|
41
45
|
return parser
|
42
46
|
|
43
47
|
|
44
|
-
def
|
48
|
+
def run(path_config_files=[Path("../config/geocif.txt")]):
|
45
49
|
logger, parser = log.setup_logger_parser(path_config_files)
|
46
50
|
inputs = gc.gather_inputs(parser)
|
47
51
|
|
@@ -51,13 +55,13 @@ def main(path_config_files=[Path("../config/geocif.txt")]):
|
|
51
55
|
|
52
56
|
# Experiment 1: include_lat_lon
|
53
57
|
logger.info("Experiment 1: include_lat_lon")
|
54
|
-
parser =
|
58
|
+
parser = main(
|
55
59
|
inputs, logger, parser, "ML", "include_lat_lon", "bool", [True, False]
|
56
60
|
)
|
57
61
|
|
58
62
|
# Experiment 2: feature_selection
|
59
63
|
logger.info("Experiment 2: feature_selection")
|
60
|
-
parser =
|
64
|
+
parser = main(
|
61
65
|
inputs,
|
62
66
|
logger,
|
63
67
|
parser,
|
@@ -69,13 +73,13 @@ def main(path_config_files=[Path("../config/geocif.txt")]):
|
|
69
73
|
|
70
74
|
# Experiment 3: lag_years
|
71
75
|
logger.info("Experiment 3: lag_years")
|
72
|
-
parser =
|
76
|
+
parser = main(
|
73
77
|
inputs, logger, parser, "ML", "lag_years", "int", [1, 2, 3, 4, 5]
|
74
78
|
)
|
75
79
|
|
76
80
|
# Experiment 4: lag_yield_as_feature
|
77
81
|
logger.info("Experiment 4: lag_yield_as_feature")
|
78
|
-
parser =
|
82
|
+
parser = main(
|
79
83
|
inputs,
|
80
84
|
logger,
|
81
85
|
parser,
|
@@ -87,13 +91,13 @@ def main(path_config_files=[Path("../config/geocif.txt")]):
|
|
87
91
|
|
88
92
|
# Experiment 5: median_years
|
89
93
|
logger.info("Experiment 5: median_years")
|
90
|
-
parser =
|
94
|
+
parser = main(
|
91
95
|
inputs, logger, parser, "ML", "median_years", "int", [2, 3, 4, 5]
|
92
96
|
)
|
93
97
|
|
94
98
|
# Experiment 6: median_yield_as_feature
|
95
99
|
logger.info("Experiment 6: median_yield_as_feature")
|
96
|
-
parser =
|
100
|
+
parser = main(
|
97
101
|
inputs,
|
98
102
|
logger,
|
99
103
|
parser,
|
@@ -105,7 +109,7 @@ def main(path_config_files=[Path("../config/geocif.txt")]):
|
|
105
109
|
|
106
110
|
# Experiment 7: analogous_year_yield_as_feature
|
107
111
|
logger.info("Experiment 7: analogous_year_yield_as_feature")
|
108
|
-
parser =
|
112
|
+
parser = main(
|
109
113
|
inputs,
|
110
114
|
logger,
|
111
115
|
parser,
|
@@ -117,10 +121,10 @@ def main(path_config_files=[Path("../config/geocif.txt")]):
|
|
117
121
|
|
118
122
|
# Experiment 8: optimize
|
119
123
|
logger.info("Experiment 8: optimize")
|
120
|
-
parser =
|
124
|
+
parser = main(
|
121
125
|
inputs, logger, parser, "DEFAULT", "optimize", "bool", [True, False]
|
122
126
|
)
|
123
127
|
|
124
128
|
|
125
129
|
if __name__ == "__main__":
|
126
|
-
|
130
|
+
run()
|
@@ -535,7 +535,7 @@ class Geocif:
|
|
535
535
|
cei = parts[0] if parts[1].isdigit() else "_".join(parts[:2])
|
536
536
|
|
537
537
|
# Check if any element of dict_selected_features is in _t
|
538
|
-
for x in selected_features:
|
538
|
+
for x in selected_features["CEI"].values:
|
539
539
|
if x not in cei:
|
540
540
|
continue
|
541
541
|
|
@@ -644,6 +644,7 @@ class Geocif:
|
|
644
644
|
kwargs = {
|
645
645
|
"cluster_strategy": self.cluster_strategy,
|
646
646
|
"model": self.model,
|
647
|
+
"model_name": self.model_name,
|
647
648
|
"forecast_season": self.forecast_season,
|
648
649
|
"crop": self.crop,
|
649
650
|
"country": self.country,
|
@@ -659,6 +660,7 @@ class Geocif:
|
|
659
660
|
model = self.model.estimator_
|
660
661
|
else:
|
661
662
|
model = self.model
|
663
|
+
|
662
664
|
output.store(self.db_path, experiment_id, df, model, self.model_name)
|
663
665
|
|
664
666
|
def get_cei_column_names(self, df):
|
@@ -3,6 +3,31 @@ from tqdm import tqdm
|
|
3
3
|
from sklearn.ensemble import RandomForestRegressor
|
4
4
|
|
5
5
|
|
6
|
+
def are_all_features_non_eo(features):
|
7
|
+
"""
|
8
|
+
Check if all the features non eo features
|
9
|
+
|
10
|
+
Args:
|
11
|
+
feature:
|
12
|
+
|
13
|
+
Returns:
|
14
|
+
|
15
|
+
"""
|
16
|
+
non_eo_features = ['Median Yield (tn per ha)',
|
17
|
+
'Analogous Year',
|
18
|
+
'Analogous Year Yield',
|
19
|
+
'lon',
|
20
|
+
'lat',
|
21
|
+
't -1 Yield (tn per ha)',
|
22
|
+
't -2 Yield (tn per ha)',
|
23
|
+
't -3 Yield (tn per ha)',
|
24
|
+
't -4 Yield (tn per ha)',
|
25
|
+
't -5 Yield (tn per ha)']
|
26
|
+
|
27
|
+
# Check if all features are non-eo features, return True if they are
|
28
|
+
return all(feature in non_eo_features for feature in features)
|
29
|
+
|
30
|
+
|
6
31
|
def select_features(X, y, method="RFE", min_features_to_select=3):
|
7
32
|
"""
|
8
33
|
|
@@ -29,6 +54,7 @@ def select_features(X, y, method="RFE", min_features_to_select=3):
|
|
29
54
|
# selector = VarianceThreshold(threshold=scaled_data.var().mean())
|
30
55
|
# X = selector.fit_transform(scaled_data)
|
31
56
|
selector = None
|
57
|
+
X_original = X.copy()
|
32
58
|
|
33
59
|
# Fill in columns with median of that column
|
34
60
|
X = X.fillna(X.median())
|
@@ -46,24 +72,7 @@ def select_features(X, y, method="RFE", min_features_to_select=3):
|
|
46
72
|
np.float = np.float64
|
47
73
|
np.bool = np.bool_
|
48
74
|
|
49
|
-
if method == "
|
50
|
-
from sklearn.feature_selection import SelectKBest, f_regression
|
51
|
-
|
52
|
-
k = 15 # Number of features to select
|
53
|
-
selector = SelectKBest(score_func=f_regression, k=k)
|
54
|
-
|
55
|
-
# Fit the selector to the data and transform the data to select the best features
|
56
|
-
try:
|
57
|
-
X_new = selector.fit_transform(X, y)
|
58
|
-
except:
|
59
|
-
breakpoint()
|
60
|
-
|
61
|
-
# Get the selected feature indices
|
62
|
-
selected_features = selector.get_support(indices=True)
|
63
|
-
|
64
|
-
# Get the selected feature names
|
65
|
-
selected_features = X.columns[selected_features].tolist()
|
66
|
-
elif method == "SHAP":
|
75
|
+
if method == "SHAP":
|
67
76
|
import pandas as pd
|
68
77
|
from catboost import CatBoostRegressor
|
69
78
|
from fasttreeshap import TreeExplainer as FastTreeExplainer
|
@@ -257,6 +266,26 @@ def select_features(X, y, method="RFE", min_features_to_select=3):
|
|
257
266
|
raise ValueError("Method not recognized. Use BorutaPy, Genetic, or RFE")
|
258
267
|
# tentative_features = X.columns[selector.support_weak_].tolist()
|
259
268
|
|
269
|
+
non_eo = are_all_features_non_eo(selected_features)
|
270
|
+
if non_eo or method == "SelectKBest":
|
271
|
+
from sklearn.feature_selection import SelectKBest, f_regression
|
272
|
+
|
273
|
+
k = 15 # Number of features to select
|
274
|
+
selector = SelectKBest(score_func=f_regression, k=k)
|
275
|
+
|
276
|
+
# Fit the selector to the data and transform the data to select the best features
|
277
|
+
try:
|
278
|
+
X_new = selector.fit_transform(X, y)
|
279
|
+
except:
|
280
|
+
breakpoint()
|
281
|
+
|
282
|
+
# Get the selected feature indices
|
283
|
+
selected_features = selector.get_support(indices=True)
|
284
|
+
|
285
|
+
# Get the selected feature names
|
286
|
+
selected_features = X.columns[selected_features].tolist()
|
287
|
+
|
288
|
+
print(selected_features)
|
260
289
|
# Filter the dataset for selected features
|
261
290
|
X_filtered = X.loc[:, selected_features]
|
262
291
|
|
@@ -8,6 +8,7 @@ from tqdm import tqdm
|
|
8
8
|
def explain(df_train, df_test, **kwargs):
|
9
9
|
cluster_strategy = kwargs.get("cluster_strategy", "auto_detect")
|
10
10
|
model = kwargs.get("model")
|
11
|
+
model_name = kwargs.get("model_name")
|
11
12
|
forecast_season = kwargs.get("forecast_season")
|
12
13
|
crop = kwargs.get("crop")
|
13
14
|
country = kwargs.get("country")
|
@@ -48,7 +49,7 @@ def explain(df_train, df_test, **kwargs):
|
|
48
49
|
plt.tight_layout()
|
49
50
|
|
50
51
|
fname = f"beeswarm_{region_name}_{forecast_season}.png"
|
51
|
-
out_dir = analysis_dir / country / crop / str(forecast_season)
|
52
|
+
out_dir = analysis_dir / country / crop / model_name / str(forecast_season)
|
52
53
|
os.makedirs(out_dir, exist_ok=True)
|
53
54
|
plt.savefig(out_dir / fname, dpi=250)
|
54
55
|
plt.close()
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|