geocif 0.1.35__tar.gz → 0.1.37__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {geocif-0.1.35/geocif.egg-info → geocif-0.1.37}/PKG-INFO +1 -1
- {geocif-0.1.35 → geocif-0.1.37}/geocif/analysis.py +3 -5
- geocif-0.1.37/geocif/experiments.py +127 -0
- {geocif-0.1.35 → geocif-0.1.37}/geocif/geocif.py +20 -8
- {geocif-0.1.35 → geocif-0.1.37}/geocif/indices_runner.py +6 -6
- {geocif-0.1.35 → geocif-0.1.37}/geocif/indices_runner_v2.py +6 -8
- {geocif-0.1.35 → geocif-0.1.37/geocif.egg-info}/PKG-INFO +1 -1
- {geocif-0.1.35 → geocif-0.1.37}/geocif.egg-info/SOURCES.txt +1 -0
- {geocif-0.1.35 → geocif-0.1.37}/setup.py +1 -1
- {geocif-0.1.35 → geocif-0.1.37}/LICENSE +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/MANIFEST.in +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/README.md +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/geocif/__init__.py +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/geocif/agmet/__init__.py +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/geocif/agmet/geoagmet.py +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/geocif/agmet/plot.py +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/geocif/agmet/utils.py +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/geocif/backup/__init__.py +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/geocif/backup/constants.py +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/geocif/backup/features.py +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/geocif/backup/geo.py +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/geocif/backup/geocif.py +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/geocif/backup/metadata.py +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/geocif/backup/models.py +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/geocif/cei/__init__.py +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/geocif/cei/definitions.py +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/geocif/cei/indices.py +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/geocif/logger.py +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/geocif/ml/__init__.py +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/geocif/ml/correlations.py +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/geocif/ml/correlations_backup.py +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/geocif/ml/embedding.py +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/geocif/ml/feature_engineering.py +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/geocif/ml/feature_selection.py +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/geocif/ml/outliers.py +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/geocif/ml/outlook.py +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/geocif/ml/output.py +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/geocif/ml/spatial_autocorrelation.py +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/geocif/ml/stages.py +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/geocif/ml/stats.py +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/geocif/ml/trainers.py +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/geocif/ml/trend.py +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/geocif/ml/xai.py +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/geocif/playground/__init__.py +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/geocif/playground/automl.py +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/geocif/playground/misc.py +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/geocif/utils.py +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/geocif/viz/__init__.py +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/geocif/viz/plot.py +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/geocif.egg-info/dependency_links.txt +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/geocif.egg-info/not-zip-safe +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/geocif.egg-info/top_level.txt +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/requirements.txt +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/setup.cfg +0 -0
- {geocif-0.1.35 → geocif-0.1.37}/tests/test_geocif.py +0 -0
@@ -162,8 +162,8 @@ class Geoanalysis:
|
|
162
162
|
return pd.DataFrame(), pd.DataFrame()
|
163
163
|
|
164
164
|
df_metrics = self._compute_metrics(df)
|
165
|
-
#df_metrics = self._process_metrics(df_metrics)
|
166
|
-
#self._plot_metrics(df_metrics)
|
165
|
+
# df_metrics = self._process_metrics(df_metrics)
|
166
|
+
# self._plot_metrics(df_metrics)
|
167
167
|
|
168
168
|
df_regional_metrics_by_year = self._compute_regional_metrics(
|
169
169
|
df, by="Harvest Year"
|
@@ -173,9 +173,7 @@ class Geoanalysis:
|
|
173
173
|
)
|
174
174
|
df_regional_metrics = self._average_mape(df_regional_metrics_by_year)
|
175
175
|
breakpoint()
|
176
|
-
self._store_results(
|
177
|
-
None, df_regional_metrics, df_regional_metrics_by_year
|
178
|
-
)
|
176
|
+
self._store_results(None, df_regional_metrics, df_regional_metrics_by_year)
|
179
177
|
|
180
178
|
df_national_yield = self._compute_national_yield(df)
|
181
179
|
self._plot_national_yield(df_national_yield)
|
@@ -0,0 +1,127 @@
|
|
1
|
+
import configparser
|
2
|
+
from pathlib import Path
|
3
|
+
|
4
|
+
import matplotlib.pyplot as plt
|
5
|
+
import sklearn
|
6
|
+
|
7
|
+
from geocif import geocif as gc
|
8
|
+
from geocif import logger as log
|
9
|
+
|
10
|
+
plt.style.use("default")
|
11
|
+
sklearn.set_config(transform_output="pandas")
|
12
|
+
|
13
|
+
import warnings
|
14
|
+
|
15
|
+
warnings.simplefilter(action="ignore", category=FutureWarning)
|
16
|
+
|
17
|
+
|
18
|
+
def run(inputs, logger, parser, section, item, type, values):
|
19
|
+
if type == "str":
|
20
|
+
original_value = parser.get(section, item)
|
21
|
+
elif type == "bool":
|
22
|
+
original_value = parser.getboolean(section, item)
|
23
|
+
elif type == "int":
|
24
|
+
original_value = parser.getint(section, item)
|
25
|
+
elif type == "float":
|
26
|
+
original_value = parser.getfloat(section, item)
|
27
|
+
|
28
|
+
for value in values:
|
29
|
+
if type == "str":
|
30
|
+
parser.set(section, item, value)
|
31
|
+
elif type == "bool":
|
32
|
+
parser.set(section, item, str(value))
|
33
|
+
elif type == "int":
|
34
|
+
parser.set(section, item, str(value))
|
35
|
+
elif type == "float":
|
36
|
+
parser.set(section, item, str(value))
|
37
|
+
|
38
|
+
gc.execute_models(inputs, logger, parser)
|
39
|
+
|
40
|
+
parser.set(section, item, original_value)
|
41
|
+
|
42
|
+
return parser
|
43
|
+
|
44
|
+
|
45
|
+
def main(path_config_files=[Path("../config/geocif.txt")]):
|
46
|
+
logger, parser = log.setup_logger_parser(path_config_files)
|
47
|
+
inputs = gc.gather_inputs(parser)
|
48
|
+
|
49
|
+
logger.info("=============================")
|
50
|
+
logger.info("\tStarting GEOCIF Experiments")
|
51
|
+
logger.info("=============================")
|
52
|
+
|
53
|
+
# Experiment 1: include_lat_lon
|
54
|
+
logger.info("Experiment 1: include_lat_lon")
|
55
|
+
parser = run(
|
56
|
+
inputs, logger, parser, "experiments", "include_lat_lon", "bool", [True, False]
|
57
|
+
)
|
58
|
+
|
59
|
+
# Experiment 2: feature_selection
|
60
|
+
logger.info("Experiment 2: feature_selection")
|
61
|
+
parser = run(
|
62
|
+
inputs,
|
63
|
+
logger,
|
64
|
+
parser,
|
65
|
+
"experiments",
|
66
|
+
"feature_selection",
|
67
|
+
"str",
|
68
|
+
["SelectKBest", "BorutaPy", "Leshy", "RFECV", "RFE"],
|
69
|
+
)
|
70
|
+
|
71
|
+
# Experiment 3: lag_years
|
72
|
+
logger.info("Experiment 3: lag_years")
|
73
|
+
parser = run(
|
74
|
+
inputs, logger, parser, "experiments", "lag_years", "int", [1, 2, 3, 4, 5]
|
75
|
+
)
|
76
|
+
|
77
|
+
# Experiment 4: lag_yield_as_feature
|
78
|
+
logger.info("Experiment 4: lag_yield_as_feature")
|
79
|
+
parser = run(
|
80
|
+
inputs,
|
81
|
+
logger,
|
82
|
+
parser,
|
83
|
+
"experiments",
|
84
|
+
"lag_yield_as_feature",
|
85
|
+
"bool",
|
86
|
+
[True, False],
|
87
|
+
)
|
88
|
+
|
89
|
+
# Experiment 5: median_years
|
90
|
+
logger.info("Experiment 5: median_years")
|
91
|
+
parser = run(
|
92
|
+
inputs, logger, parser, "experiments", "median_years", "int", [2, 3, 4, 5]
|
93
|
+
)
|
94
|
+
|
95
|
+
# Experiment 6: median_yield_as_feature
|
96
|
+
logger.info("Experiment 6: median_yield_as_feature")
|
97
|
+
parser = run(
|
98
|
+
inputs,
|
99
|
+
logger,
|
100
|
+
parser,
|
101
|
+
"experiments",
|
102
|
+
"median_yield_as_feature",
|
103
|
+
"bool",
|
104
|
+
[True, False],
|
105
|
+
)
|
106
|
+
|
107
|
+
# Experiment 7: analogous_year_yield_as_feature
|
108
|
+
logger.info("Experiment 7: analogous_year_yield_as_feature")
|
109
|
+
parser = run(
|
110
|
+
inputs,
|
111
|
+
logger,
|
112
|
+
parser,
|
113
|
+
"experiments",
|
114
|
+
"analogous_year_yield_as_feature",
|
115
|
+
"bool",
|
116
|
+
[True, False],
|
117
|
+
)
|
118
|
+
|
119
|
+
# Experiment 8: optimize
|
120
|
+
logger.info("Experiment 8: optimize")
|
121
|
+
parser = run(
|
122
|
+
inputs, logger, parser, "experiments", "optimize", "bool", [True, False]
|
123
|
+
)
|
124
|
+
|
125
|
+
|
126
|
+
if __name__ == "__main__":
|
127
|
+
main()
|
@@ -114,6 +114,7 @@ class Geocif:
|
|
114
114
|
self.analogous_year_yield_as_feature = self.parser.getboolean(
|
115
115
|
"ML", "analogous_year_yield_as_feature"
|
116
116
|
)
|
117
|
+
self.include_lat_lon = self.parser.getboolean("ML", "include_lat_lon")
|
117
118
|
self.spatial_autocorrelation = self.parser.getboolean(
|
118
119
|
"ML", "spatial_autocorrelation"
|
119
120
|
)
|
@@ -187,7 +188,6 @@ class Geocif:
|
|
187
188
|
os.makedirs(self.dir_analysis, exist_ok=True)
|
188
189
|
|
189
190
|
self.db_path = self.dir_db / self.db_forecasts
|
190
|
-
output.config_to_db(self.db_path, self.parser, self.today_full)
|
191
191
|
|
192
192
|
# self.pickle_file = self.base_dir / self.parser.get("outlook", "pickle_file")
|
193
193
|
# obj_pickle = outlook.Outlook(self.pickle_file)
|
@@ -222,9 +222,9 @@ class Geocif:
|
|
222
222
|
self.logger.info(f"Selected features: {self.selected_features}")
|
223
223
|
|
224
224
|
""" Update model to include conformal estimates """
|
225
|
-
if "lat" not in self.selected_features:
|
225
|
+
if "lat" not in self.selected_features and self.include_lat_lon:
|
226
226
|
self.selected_features.append("lat")
|
227
|
-
if "lon" not in self.selected_features:
|
227
|
+
if "lon" not in self.selected_features and self.include_lat_lon:
|
228
228
|
self.selected_features.append("lon")
|
229
229
|
X_train = df_region[self.selected_features + self.cat_features]
|
230
230
|
dir_output = (
|
@@ -358,7 +358,9 @@ class Geocif:
|
|
358
358
|
# Remove any categorical features
|
359
359
|
X_test = X_test.drop(columns=self.cat_features)
|
360
360
|
X = torch.from_numpy(X_test.to_numpy()).float()
|
361
|
-
coord = torch.from_numpy(
|
361
|
+
coord = torch.from_numpy(
|
362
|
+
self.df_test[["lon", "lat"]].to_numpy()
|
363
|
+
).float()
|
362
364
|
|
363
365
|
p = X.shape[1]
|
364
366
|
n = X.shape[0]
|
@@ -367,7 +369,9 @@ class Geocif:
|
|
367
369
|
data = geospaNN.make_graph(X, Y, coord, nn)
|
368
370
|
|
369
371
|
# remove categorical features from df_train
|
370
|
-
data_train = df_region[
|
372
|
+
data_train = df_region[
|
373
|
+
self.selected_features + self.cat_features + [self.target]
|
374
|
+
]
|
371
375
|
w_train = data_train.y - self.estimate(data_train.x)
|
372
376
|
|
373
377
|
else:
|
@@ -401,6 +405,7 @@ class Geocif:
|
|
401
405
|
df = pd.DataFrame(
|
402
406
|
{
|
403
407
|
"Experiment_ID": np.full(shp, experiment_id),
|
408
|
+
"Experiment Name": np.full(shp, self.experiment_name),
|
404
409
|
"Date": np.full(shp, self.today),
|
405
410
|
"Time": np.full(shp, now),
|
406
411
|
"Country": np.full(shp, self.country),
|
@@ -479,6 +484,7 @@ class Geocif:
|
|
479
484
|
|
480
485
|
# Create an index based on following columns
|
481
486
|
index_columns = [
|
487
|
+
"Experiment Name",
|
482
488
|
"Model",
|
483
489
|
"Cluster Strategy",
|
484
490
|
"Country",
|
@@ -557,8 +563,9 @@ class Geocif:
|
|
557
563
|
self.feature_names.append("FCST")
|
558
564
|
|
559
565
|
# Add lat and lon to feature names
|
560
|
-
self.
|
561
|
-
|
566
|
+
if self.include_lat_lon:
|
567
|
+
self.feature_names.append("lat")
|
568
|
+
self.feature_names.append("lon")
|
562
569
|
|
563
570
|
self.selected_features = []
|
564
571
|
|
@@ -820,7 +827,11 @@ class Geocif:
|
|
820
827
|
self.dg_country["lon"] = self.dg_country.centroid.x
|
821
828
|
|
822
829
|
# Add lat and lon columns to df by merging on Country Region column
|
823
|
-
df = df.merge(
|
830
|
+
df = df.merge(
|
831
|
+
self.dg_country[["Country Region", "lat", "lon"]].drop_duplicates(),
|
832
|
+
on="Country Region",
|
833
|
+
how="left",
|
834
|
+
)
|
824
835
|
|
825
836
|
dict_kwargs = {}
|
826
837
|
dict_kwargs["all_stages"] = self.all_stages
|
@@ -917,6 +928,7 @@ class Geocif:
|
|
917
928
|
|
918
929
|
self.forecast_season = forecast_season
|
919
930
|
self.model_name = model
|
931
|
+
self.experiment_name = self.parser.get("ML", "experiment_name")
|
920
932
|
self.ml_model = self.parser.getboolean(self.model_name, "ML_model")
|
921
933
|
self.model_names = ast.literal_eval(self.parser.get(self.country, "models"))
|
922
934
|
self.optimize = self.parser.getboolean(self.country, "optimize")
|
@@ -165,13 +165,13 @@ class cei_runner(base.BaseGeo):
|
|
165
165
|
combinations = [
|
166
166
|
i
|
167
167
|
for i in combinations
|
168
|
-
if "angola_maize" in i[3] or
|
169
|
-
"lesotho_maize" in i[3] or
|
168
|
+
if "angola_maize" in i[3] or "lesotho_maize" in i[3] or
|
170
169
|
# "namibia_" in i[2] or
|
171
|
-
|
172
|
-
|
173
|
-
"
|
174
|
-
"
|
170
|
+
"united_republic_of_tanzania_maize" in i[3]
|
171
|
+
or "zambia_maize" in i[3]
|
172
|
+
or "zimbabwe_maize" in i[3]
|
173
|
+
or "south_africa_maize" in i[3]
|
174
|
+
or "mozambique_maize" in i[3]
|
175
175
|
]
|
176
176
|
# "malawi" in i[2]]
|
177
177
|
|
@@ -46,7 +46,9 @@ class cei_runner(base.BaseGeo):
|
|
46
46
|
self.parse_config()
|
47
47
|
|
48
48
|
self.dir_input = Path(self.parser.get("PATHS", "dir_input"))
|
49
|
-
self.base_dir = Path(
|
49
|
+
self.base_dir = Path(
|
50
|
+
r"D:\Users\ritvik\projects\GEOGLAM\Output\countries\malawi"
|
51
|
+
) # Path(self.parser.get("PATHS", "dir_crop_inputs"))
|
50
52
|
self.do_parallel = self.parser.getboolean("DEFAULT", "do_parallel")
|
51
53
|
|
52
54
|
def collect_files(self):
|
@@ -162,14 +164,10 @@ class cei_runner(base.BaseGeo):
|
|
162
164
|
# Only keep those entries in combinations where the third elemt is
|
163
165
|
# mozambique, south_africa, angola or dem_people's_rep_of_korea
|
164
166
|
# This is done to test the code for these countries
|
165
|
-
combinations = [
|
166
|
-
i
|
167
|
-
for i in combinations
|
168
|
-
if "malawi_maize_s1" in i[3]
|
169
|
-
]
|
167
|
+
combinations = [i for i in combinations if "malawi_maize_s1" in i[3]]
|
170
168
|
|
171
|
-
if
|
172
|
-
num_cpu = int(cpu_count() * 0.
|
169
|
+
if True:
|
170
|
+
num_cpu = int(cpu_count() * 0.5)
|
173
171
|
with Pool(num_cpu) as p:
|
174
172
|
for i, _ in enumerate(p.imap_unordered(indices.process, combinations)):
|
175
173
|
pass
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|