geocif 0.1.37__tar.gz → 0.1.39__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {geocif-0.1.37/geocif.egg-info → geocif-0.1.39}/PKG-INFO +1 -1
- {geocif-0.1.37 → geocif-0.1.39}/geocif/experiments.py +19 -20
- {geocif-0.1.37 → geocif-0.1.39}/geocif/geocif.py +1 -1
- {geocif-0.1.37 → geocif-0.1.39}/geocif/ml/feature_selection.py +47 -18
- {geocif-0.1.37 → geocif-0.1.39/geocif.egg-info}/PKG-INFO +1 -1
- {geocif-0.1.37 → geocif-0.1.39}/setup.py +1 -1
- {geocif-0.1.37 → geocif-0.1.39}/LICENSE +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/MANIFEST.in +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/README.md +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif/__init__.py +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif/agmet/__init__.py +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif/agmet/geoagmet.py +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif/agmet/plot.py +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif/agmet/utils.py +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif/analysis.py +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif/backup/__init__.py +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif/backup/constants.py +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif/backup/features.py +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif/backup/geo.py +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif/backup/geocif.py +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif/backup/metadata.py +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif/backup/models.py +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif/cei/__init__.py +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif/cei/definitions.py +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif/cei/indices.py +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif/indices_runner.py +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif/indices_runner_v2.py +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif/logger.py +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif/ml/__init__.py +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif/ml/correlations.py +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif/ml/correlations_backup.py +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif/ml/embedding.py +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif/ml/feature_engineering.py +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif/ml/outliers.py +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif/ml/outlook.py +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif/ml/output.py +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif/ml/spatial_autocorrelation.py +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif/ml/stages.py +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif/ml/stats.py +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif/ml/trainers.py +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif/ml/trend.py +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif/ml/xai.py +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif/playground/__init__.py +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif/playground/automl.py +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif/playground/misc.py +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif/utils.py +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif/viz/__init__.py +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif/viz/plot.py +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif.egg-info/SOURCES.txt +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif.egg-info/dependency_links.txt +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif.egg-info/not-zip-safe +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/geocif.egg-info/top_level.txt +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/requirements.txt +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/setup.cfg +0 -0
- {geocif-0.1.37 → geocif-0.1.39}/tests/test_geocif.py +0 -0
@@ -1,4 +1,3 @@
|
|
1
|
-
import configparser
|
2
1
|
from pathlib import Path
|
3
2
|
|
4
3
|
import matplotlib.pyplot as plt
|
@@ -15,7 +14,7 @@ import warnings
|
|
15
14
|
warnings.simplefilter(action="ignore", category=FutureWarning)
|
16
15
|
|
17
16
|
|
18
|
-
def
|
17
|
+
def main(inputs, logger, parser, section, item, type, values):
|
19
18
|
if type == "str":
|
20
19
|
original_value = parser.get(section, item)
|
21
20
|
elif type == "bool":
|
@@ -42,7 +41,7 @@ def run(inputs, logger, parser, section, item, type, values):
|
|
42
41
|
return parser
|
43
42
|
|
44
43
|
|
45
|
-
def
|
44
|
+
def run(path_config_files=[Path("../config/geocif.txt")]):
|
46
45
|
logger, parser = log.setup_logger_parser(path_config_files)
|
47
46
|
inputs = gc.gather_inputs(parser)
|
48
47
|
|
@@ -52,17 +51,17 @@ def main(path_config_files=[Path("../config/geocif.txt")]):
|
|
52
51
|
|
53
52
|
# Experiment 1: include_lat_lon
|
54
53
|
logger.info("Experiment 1: include_lat_lon")
|
55
|
-
parser =
|
56
|
-
inputs, logger, parser, "
|
54
|
+
parser = main(
|
55
|
+
inputs, logger, parser, "ML", "include_lat_lon", "bool", [True, False]
|
57
56
|
)
|
58
57
|
|
59
58
|
# Experiment 2: feature_selection
|
60
59
|
logger.info("Experiment 2: feature_selection")
|
61
|
-
parser =
|
60
|
+
parser = main(
|
62
61
|
inputs,
|
63
62
|
logger,
|
64
63
|
parser,
|
65
|
-
"
|
64
|
+
"ML",
|
66
65
|
"feature_selection",
|
67
66
|
"str",
|
68
67
|
["SelectKBest", "BorutaPy", "Leshy", "RFECV", "RFE"],
|
@@ -70,17 +69,17 @@ def main(path_config_files=[Path("../config/geocif.txt")]):
|
|
70
69
|
|
71
70
|
# Experiment 3: lag_years
|
72
71
|
logger.info("Experiment 3: lag_years")
|
73
|
-
parser =
|
74
|
-
inputs, logger, parser, "
|
72
|
+
parser = main(
|
73
|
+
inputs, logger, parser, "ML", "lag_years", "int", [1, 2, 3, 4, 5]
|
75
74
|
)
|
76
75
|
|
77
76
|
# Experiment 4: lag_yield_as_feature
|
78
77
|
logger.info("Experiment 4: lag_yield_as_feature")
|
79
|
-
parser =
|
78
|
+
parser = main(
|
80
79
|
inputs,
|
81
80
|
logger,
|
82
81
|
parser,
|
83
|
-
"
|
82
|
+
"ML",
|
84
83
|
"lag_yield_as_feature",
|
85
84
|
"bool",
|
86
85
|
[True, False],
|
@@ -88,17 +87,17 @@ def main(path_config_files=[Path("../config/geocif.txt")]):
|
|
88
87
|
|
89
88
|
# Experiment 5: median_years
|
90
89
|
logger.info("Experiment 5: median_years")
|
91
|
-
parser =
|
92
|
-
inputs, logger, parser, "
|
90
|
+
parser = main(
|
91
|
+
inputs, logger, parser, "ML", "median_years", "int", [2, 3, 4, 5]
|
93
92
|
)
|
94
93
|
|
95
94
|
# Experiment 6: median_yield_as_feature
|
96
95
|
logger.info("Experiment 6: median_yield_as_feature")
|
97
|
-
parser =
|
96
|
+
parser = main(
|
98
97
|
inputs,
|
99
98
|
logger,
|
100
99
|
parser,
|
101
|
-
"
|
100
|
+
"ML",
|
102
101
|
"median_yield_as_feature",
|
103
102
|
"bool",
|
104
103
|
[True, False],
|
@@ -106,11 +105,11 @@ def main(path_config_files=[Path("../config/geocif.txt")]):
|
|
106
105
|
|
107
106
|
# Experiment 7: analogous_year_yield_as_feature
|
108
107
|
logger.info("Experiment 7: analogous_year_yield_as_feature")
|
109
|
-
parser =
|
108
|
+
parser = main(
|
110
109
|
inputs,
|
111
110
|
logger,
|
112
111
|
parser,
|
113
|
-
"
|
112
|
+
"ML",
|
114
113
|
"analogous_year_yield_as_feature",
|
115
114
|
"bool",
|
116
115
|
[True, False],
|
@@ -118,10 +117,10 @@ def main(path_config_files=[Path("../config/geocif.txt")]):
|
|
118
117
|
|
119
118
|
# Experiment 8: optimize
|
120
119
|
logger.info("Experiment 8: optimize")
|
121
|
-
parser =
|
122
|
-
inputs, logger, parser, "
|
120
|
+
parser = main(
|
121
|
+
inputs, logger, parser, "DEFAULT", "optimize", "bool", [True, False]
|
123
122
|
)
|
124
123
|
|
125
124
|
|
126
125
|
if __name__ == "__main__":
|
127
|
-
|
126
|
+
run()
|
@@ -535,7 +535,7 @@ class Geocif:
|
|
535
535
|
cei = parts[0] if parts[1].isdigit() else "_".join(parts[:2])
|
536
536
|
|
537
537
|
# Check if any element of dict_selected_features is in _t
|
538
|
-
for x in selected_features:
|
538
|
+
for x in selected_features["CEI"].values:
|
539
539
|
if x not in cei:
|
540
540
|
continue
|
541
541
|
|
@@ -3,6 +3,31 @@ from tqdm import tqdm
|
|
3
3
|
from sklearn.ensemble import RandomForestRegressor
|
4
4
|
|
5
5
|
|
6
|
+
def are_all_features_non_eo(features):
|
7
|
+
"""
|
8
|
+
Check if all the features non eo features
|
9
|
+
|
10
|
+
Args:
|
11
|
+
feature:
|
12
|
+
|
13
|
+
Returns:
|
14
|
+
|
15
|
+
"""
|
16
|
+
non_eo_features = ['Median Yield (tn per ha)',
|
17
|
+
'Analogous Year',
|
18
|
+
'Analogous Year Yield',
|
19
|
+
'lon',
|
20
|
+
'lat',
|
21
|
+
't -1 Yield (tn per ha)',
|
22
|
+
't -2 Yield (tn per ha)',
|
23
|
+
't -3 Yield (tn per ha)',
|
24
|
+
't -4 Yield (tn per ha)',
|
25
|
+
't -5 Yield (tn per ha)']
|
26
|
+
|
27
|
+
# Check if all features are non-eo features, return True if they are
|
28
|
+
return all(feature in non_eo_features for feature in features)
|
29
|
+
|
30
|
+
|
6
31
|
def select_features(X, y, method="RFE", min_features_to_select=3):
|
7
32
|
"""
|
8
33
|
|
@@ -29,6 +54,7 @@ def select_features(X, y, method="RFE", min_features_to_select=3):
|
|
29
54
|
# selector = VarianceThreshold(threshold=scaled_data.var().mean())
|
30
55
|
# X = selector.fit_transform(scaled_data)
|
31
56
|
selector = None
|
57
|
+
X_original = X.copy()
|
32
58
|
|
33
59
|
# Fill in columns with median of that column
|
34
60
|
X = X.fillna(X.median())
|
@@ -46,24 +72,7 @@ def select_features(X, y, method="RFE", min_features_to_select=3):
|
|
46
72
|
np.float = np.float64
|
47
73
|
np.bool = np.bool_
|
48
74
|
|
49
|
-
if method == "
|
50
|
-
from sklearn.feature_selection import SelectKBest, f_regression
|
51
|
-
|
52
|
-
k = 15 # Number of features to select
|
53
|
-
selector = SelectKBest(score_func=f_regression, k=k)
|
54
|
-
|
55
|
-
# Fit the selector to the data and transform the data to select the best features
|
56
|
-
try:
|
57
|
-
X_new = selector.fit_transform(X, y)
|
58
|
-
except:
|
59
|
-
breakpoint()
|
60
|
-
|
61
|
-
# Get the selected feature indices
|
62
|
-
selected_features = selector.get_support(indices=True)
|
63
|
-
|
64
|
-
# Get the selected feature names
|
65
|
-
selected_features = X.columns[selected_features].tolist()
|
66
|
-
elif method == "SHAP":
|
75
|
+
if method == "SHAP":
|
67
76
|
import pandas as pd
|
68
77
|
from catboost import CatBoostRegressor
|
69
78
|
from fasttreeshap import TreeExplainer as FastTreeExplainer
|
@@ -257,6 +266,26 @@ def select_features(X, y, method="RFE", min_features_to_select=3):
|
|
257
266
|
raise ValueError("Method not recognized. Use BorutaPy, Genetic, or RFE")
|
258
267
|
# tentative_features = X.columns[selector.support_weak_].tolist()
|
259
268
|
|
269
|
+
non_eo = are_all_features_non_eo(selected_features)
|
270
|
+
if non_eo or method == "SelectKBest":
|
271
|
+
from sklearn.feature_selection import SelectKBest, f_regression
|
272
|
+
|
273
|
+
k = 15 # Number of features to select
|
274
|
+
selector = SelectKBest(score_func=f_regression, k=k)
|
275
|
+
|
276
|
+
# Fit the selector to the data and transform the data to select the best features
|
277
|
+
try:
|
278
|
+
X_new = selector.fit_transform(X, y)
|
279
|
+
except:
|
280
|
+
breakpoint()
|
281
|
+
|
282
|
+
# Get the selected feature indices
|
283
|
+
selected_features = selector.get_support(indices=True)
|
284
|
+
|
285
|
+
# Get the selected feature names
|
286
|
+
selected_features = X.columns[selected_features].tolist()
|
287
|
+
|
288
|
+
print(selected_features)
|
260
289
|
# Filter the dataset for selected features
|
261
290
|
X_filtered = X.loc[:, selected_features]
|
262
291
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|