geocif 0.1.33__tar.gz → 0.1.35__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {geocif-0.1.33/geocif.egg-info → geocif-0.1.35}/PKG-INFO +1 -1
- {geocif-0.1.33 → geocif-0.1.35}/geocif/analysis.py +5 -5
- {geocif-0.1.33 → geocif-0.1.35}/geocif/cei/indices.py +12 -3
- {geocif-0.1.33 → geocif-0.1.35}/geocif/indices_runner.py +5 -4
- geocif-0.1.35/geocif/indices_runner_v2.py +207 -0
- {geocif-0.1.33 → geocif-0.1.35}/geocif/ml/correlations.py +35 -16
- geocif-0.1.35/geocif/ml/correlations_backup.py +412 -0
- {geocif-0.1.33 → geocif-0.1.35}/geocif/ml/stages.py +6 -3
- {geocif-0.1.33 → geocif-0.1.35}/geocif/playground/misc.py +72 -2
- {geocif-0.1.33 → geocif-0.1.35/geocif.egg-info}/PKG-INFO +1 -1
- {geocif-0.1.33 → geocif-0.1.35}/geocif.egg-info/SOURCES.txt +2 -0
- {geocif-0.1.33 → geocif-0.1.35}/setup.py +1 -1
- {geocif-0.1.33 → geocif-0.1.35}/LICENSE +0 -0
- {geocif-0.1.33 → geocif-0.1.35}/MANIFEST.in +0 -0
- {geocif-0.1.33 → geocif-0.1.35}/README.md +0 -0
- {geocif-0.1.33 → geocif-0.1.35}/geocif/__init__.py +0 -0
- {geocif-0.1.33 → geocif-0.1.35}/geocif/agmet/__init__.py +0 -0
- {geocif-0.1.33 → geocif-0.1.35}/geocif/agmet/geoagmet.py +0 -0
- {geocif-0.1.33 → geocif-0.1.35}/geocif/agmet/plot.py +0 -0
- {geocif-0.1.33 → geocif-0.1.35}/geocif/agmet/utils.py +0 -0
- {geocif-0.1.33 → geocif-0.1.35}/geocif/backup/__init__.py +0 -0
- {geocif-0.1.33 → geocif-0.1.35}/geocif/backup/constants.py +0 -0
- {geocif-0.1.33 → geocif-0.1.35}/geocif/backup/features.py +0 -0
- {geocif-0.1.33 → geocif-0.1.35}/geocif/backup/geo.py +0 -0
- {geocif-0.1.33 → geocif-0.1.35}/geocif/backup/geocif.py +0 -0
- {geocif-0.1.33 → geocif-0.1.35}/geocif/backup/metadata.py +0 -0
- {geocif-0.1.33 → geocif-0.1.35}/geocif/backup/models.py +0 -0
- {geocif-0.1.33 → geocif-0.1.35}/geocif/cei/__init__.py +0 -0
- {geocif-0.1.33 → geocif-0.1.35}/geocif/cei/definitions.py +0 -0
- {geocif-0.1.33 → geocif-0.1.35}/geocif/geocif.py +0 -0
- {geocif-0.1.33 → geocif-0.1.35}/geocif/logger.py +0 -0
- {geocif-0.1.33 → geocif-0.1.35}/geocif/ml/__init__.py +0 -0
- {geocif-0.1.33 → geocif-0.1.35}/geocif/ml/embedding.py +0 -0
- {geocif-0.1.33 → geocif-0.1.35}/geocif/ml/feature_engineering.py +0 -0
- {geocif-0.1.33 → geocif-0.1.35}/geocif/ml/feature_selection.py +0 -0
- {geocif-0.1.33 → geocif-0.1.35}/geocif/ml/outliers.py +0 -0
- {geocif-0.1.33 → geocif-0.1.35}/geocif/ml/outlook.py +0 -0
- {geocif-0.1.33 → geocif-0.1.35}/geocif/ml/output.py +0 -0
- {geocif-0.1.33 → geocif-0.1.35}/geocif/ml/spatial_autocorrelation.py +0 -0
- {geocif-0.1.33 → geocif-0.1.35}/geocif/ml/stats.py +0 -0
- {geocif-0.1.33 → geocif-0.1.35}/geocif/ml/trainers.py +0 -0
- {geocif-0.1.33 → geocif-0.1.35}/geocif/ml/trend.py +0 -0
- {geocif-0.1.33 → geocif-0.1.35}/geocif/ml/xai.py +0 -0
- {geocif-0.1.33 → geocif-0.1.35}/geocif/playground/__init__.py +0 -0
- {geocif-0.1.33 → geocif-0.1.35}/geocif/playground/automl.py +0 -0
- {geocif-0.1.33 → geocif-0.1.35}/geocif/utils.py +0 -0
- {geocif-0.1.33 → geocif-0.1.35}/geocif/viz/__init__.py +0 -0
- {geocif-0.1.33 → geocif-0.1.35}/geocif/viz/plot.py +0 -0
- {geocif-0.1.33 → geocif-0.1.35}/geocif.egg-info/dependency_links.txt +0 -0
- {geocif-0.1.33 → geocif-0.1.35}/geocif.egg-info/not-zip-safe +0 -0
- {geocif-0.1.33 → geocif-0.1.35}/geocif.egg-info/top_level.txt +0 -0
- {geocif-0.1.33 → geocif-0.1.35}/requirements.txt +0 -0
- {geocif-0.1.33 → geocif-0.1.35}/setup.cfg +0 -0
- {geocif-0.1.33 → geocif-0.1.35}/tests/test_geocif.py +0 -0
@@ -162,8 +162,8 @@ class Geoanalysis:
|
|
162
162
|
return pd.DataFrame(), pd.DataFrame()
|
163
163
|
|
164
164
|
df_metrics = self._compute_metrics(df)
|
165
|
-
df_metrics = self._process_metrics(df_metrics)
|
166
|
-
self._plot_metrics(df_metrics)
|
165
|
+
#df_metrics = self._process_metrics(df_metrics)
|
166
|
+
#self._plot_metrics(df_metrics)
|
167
167
|
|
168
168
|
df_regional_metrics_by_year = self._compute_regional_metrics(
|
169
169
|
df, by="Harvest Year"
|
@@ -172,9 +172,9 @@ class Geoanalysis:
|
|
172
172
|
df_regional_metrics_by_year
|
173
173
|
)
|
174
174
|
df_regional_metrics = self._average_mape(df_regional_metrics_by_year)
|
175
|
-
|
175
|
+
breakpoint()
|
176
176
|
self._store_results(
|
177
|
-
|
177
|
+
None, df_regional_metrics, df_regional_metrics_by_year
|
178
178
|
)
|
179
179
|
|
180
180
|
df_national_yield = self._compute_national_yield(df)
|
@@ -195,7 +195,7 @@ class Geoanalysis:
|
|
195
195
|
.apply(self.annual_metrics)
|
196
196
|
.reset_index()
|
197
197
|
)
|
198
|
-
|
198
|
+
breakpoint()
|
199
199
|
return df_metrics.pivot_table(
|
200
200
|
index=["Country", "Model", "Harvest Year", "Stage Name", "Stage Range"],
|
201
201
|
columns="level_5",
|
@@ -393,6 +393,7 @@ class CEIs:
|
|
393
393
|
/ self.admin_zone
|
394
394
|
/ self.country
|
395
395
|
)
|
396
|
+
|
396
397
|
os.makedirs(self.dir_output, exist_ok=True)
|
397
398
|
os.makedirs(self.dir_intermediate, exist_ok=True)
|
398
399
|
|
@@ -465,7 +466,7 @@ class CEIs:
|
|
465
466
|
|
466
467
|
extended_stages_list = []
|
467
468
|
if self.method in ["phenological_stages", "fraction_season", "full_season"]:
|
468
|
-
extended_stages_list =
|
469
|
+
extended_stages_list = stages
|
469
470
|
elif self.method in ["dekad_r", "biweekly_r", "monthly_r"]:
|
470
471
|
# reverse stages
|
471
472
|
stages = stages[::-1]
|
@@ -566,10 +567,10 @@ class CEIs:
|
|
566
567
|
|
567
568
|
"""
|
568
569
|
if self.method in ["phenological_stages", "fraction_season"]:
|
569
|
-
mask = df_harvest_year_region[col].isin(stages)
|
570
|
+
mask = df_harvest_year_region[col].isin([stages])
|
570
571
|
df_time_period = df_harvest_year_region[mask]
|
571
572
|
|
572
|
-
mask = df_all_years[col].isin(stages)
|
573
|
+
mask = df_all_years[col].isin([stages])
|
573
574
|
df_base_period = df_all_years[mask]
|
574
575
|
elif self.method in [
|
575
576
|
"dekad",
|
@@ -605,6 +606,10 @@ class CEIs:
|
|
605
606
|
Returns:
|
606
607
|
|
607
608
|
"""
|
609
|
+
# If stage is not a list then convert it to a list
|
610
|
+
if not isinstance(stage, list):
|
611
|
+
stage = [stage]
|
612
|
+
|
608
613
|
columns = [
|
609
614
|
"Description",
|
610
615
|
"CEI",
|
@@ -721,6 +726,10 @@ class CEIs:
|
|
721
726
|
:param index_details:
|
722
727
|
:return:
|
723
728
|
"""
|
729
|
+
# If stage is not a list then convert it to a list
|
730
|
+
if not isinstance(stage, list):
|
731
|
+
stage = [stage]
|
732
|
+
|
724
733
|
df = df[df["bounds"] == 1]
|
725
734
|
# Exclude lat, lon, time, bounds and time_bounds columns
|
726
735
|
df = df.drop(columns=["lat", "lon", "time", "bounds", "time_bounds"])
|
@@ -165,11 +165,12 @@ class cei_runner(base.BaseGeo):
|
|
165
165
|
combinations = [
|
166
166
|
i
|
167
167
|
for i in combinations
|
168
|
-
if "angola_maize" in i[3] or
|
169
|
-
|
170
|
-
#
|
168
|
+
if "angola_maize" in i[3] or
|
169
|
+
"lesotho_maize" in i[3] or
|
170
|
+
# "namibia_" in i[2] or
|
171
|
+
"united_republic_of_tanzania_maize" in i[3] or
|
171
172
|
"zambia_maize" in i[3] or "zimbabwe_maize" in i[3] or
|
172
|
-
|
173
|
+
"south_africa_maize" in i[3] or
|
173
174
|
"mozambique_maize" in i[3]
|
174
175
|
]
|
175
176
|
# "malawi" in i[2]]
|
@@ -0,0 +1,207 @@
|
|
1
|
+
import itertools
|
2
|
+
import warnings
|
3
|
+
from multiprocessing import Pool, cpu_count
|
4
|
+
from pathlib import Path
|
5
|
+
|
6
|
+
import arrow as ar
|
7
|
+
import pandas as pd
|
8
|
+
from tqdm import tqdm
|
9
|
+
|
10
|
+
warnings.filterwarnings("ignore")
|
11
|
+
|
12
|
+
from .cei import indices
|
13
|
+
from geoprepare import base
|
14
|
+
|
15
|
+
|
16
|
+
def remove_duplicates(lst):
|
17
|
+
"""
|
18
|
+
|
19
|
+
:param lst:
|
20
|
+
:return:
|
21
|
+
"""
|
22
|
+
return list(set([i for i in lst]))
|
23
|
+
|
24
|
+
|
25
|
+
def get_admin_zone(country, dg_shp):
|
26
|
+
admin_zone = "admin_1"
|
27
|
+
country = country.title().replace(" ", "_")
|
28
|
+
|
29
|
+
# Read in shapefile
|
30
|
+
dg_country = dg_shp[dg_shp["ADMIN0"] == country]
|
31
|
+
|
32
|
+
# Is the ADMIN2 column all None? If so, return admin_1 else return admin_2
|
33
|
+
if dg_country.empty:
|
34
|
+
admin_zone = "admin_1"
|
35
|
+
elif not dg_country["ADMIN2"].isna().all():
|
36
|
+
admin_zone = "admin_2"
|
37
|
+
|
38
|
+
return admin_zone
|
39
|
+
|
40
|
+
|
41
|
+
class cei_runner(base.BaseGeo):
|
42
|
+
def __init__(self, path_config_file):
|
43
|
+
super().__init__(path_config_file)
|
44
|
+
|
45
|
+
# Parse configuration files
|
46
|
+
self.parse_config()
|
47
|
+
|
48
|
+
self.dir_input = Path(self.parser.get("PATHS", "dir_input"))
|
49
|
+
self.base_dir = Path(self.parser.get("PATHS", "dir_crop_inputs"))
|
50
|
+
self.do_parallel = self.parser.getboolean("DEFAULT", "do_parallel")
|
51
|
+
|
52
|
+
def collect_files(self):
|
53
|
+
"""
|
54
|
+
1. Collect all the files which contain EO information
|
55
|
+
2. Exclude files from the `processed` directory if it is already in
|
56
|
+
processed_include_fall directory
|
57
|
+
3. Create a dataframe that contains the following columns:
|
58
|
+
- directory: name of directory where file is located
|
59
|
+
- path: full path to file
|
60
|
+
- filename: name of file
|
61
|
+
:return: Return the dataframe created above
|
62
|
+
"""
|
63
|
+
import geopandas as gp
|
64
|
+
|
65
|
+
dg_shp = gp.read_file(
|
66
|
+
self.dir_input
|
67
|
+
/ "Global_Datasets"
|
68
|
+
/ "Regions"
|
69
|
+
/ "Shps"
|
70
|
+
/ "adm_shapefile.shp",
|
71
|
+
engine="pyogrio",
|
72
|
+
)
|
73
|
+
|
74
|
+
# Collect all the files which contain EO information
|
75
|
+
df_files = pd.DataFrame(columns=["directory", "path", "filename", "admin_zone"])
|
76
|
+
for filepath in self.base_dir.rglob("*.csv"):
|
77
|
+
country = filepath.parents[0].name
|
78
|
+
|
79
|
+
admin_zone = get_admin_zone(country, dg_shp)
|
80
|
+
|
81
|
+
# If country is not in cc.COUNTRIES then skip
|
82
|
+
# HACK: Skip korea for now, as it is giving errors
|
83
|
+
if country == "republic_of_korea":
|
84
|
+
continue
|
85
|
+
|
86
|
+
# Get name of directory one level up
|
87
|
+
process_type = filepath.parents[1].name
|
88
|
+
|
89
|
+
# Get name of file
|
90
|
+
filename = filepath.name
|
91
|
+
|
92
|
+
# Add to dataframe
|
93
|
+
df_files.loc[len(df_files)] = [process_type, filepath, filename, admin_zone]
|
94
|
+
|
95
|
+
# Exclude those rows where directory is processed and file is already in
|
96
|
+
# processed_include_fall directory
|
97
|
+
no_fall = df_files["directory"] == "processed"
|
98
|
+
include_fall = df_files[df_files["directory"] == "processed_include_fall"][
|
99
|
+
"filename"
|
100
|
+
]
|
101
|
+
|
102
|
+
df_files = df_files[~(no_fall & (df_files["filename"].isin(include_fall)))]
|
103
|
+
|
104
|
+
return df_files
|
105
|
+
|
106
|
+
def process_combinations(self, df, method):
|
107
|
+
"""
|
108
|
+
Create a list of tuples of the following:
|
109
|
+
- directory: name of directory where file is located
|
110
|
+
- path: full path to file
|
111
|
+
- filename: name of file
|
112
|
+
- method: whether to compute indices for phenological stages or not
|
113
|
+
This tuple will be used as input to the `process` function
|
114
|
+
:param df:
|
115
|
+
:param method:
|
116
|
+
:return:
|
117
|
+
"""
|
118
|
+
combinations = []
|
119
|
+
|
120
|
+
for index, row in tqdm(df.iterrows()):
|
121
|
+
combinations.extend(
|
122
|
+
list(
|
123
|
+
itertools.product([row[0]], [row[1]], [row[2]], [row[3]], [method])
|
124
|
+
)
|
125
|
+
)
|
126
|
+
|
127
|
+
combinations = remove_duplicates(combinations)
|
128
|
+
|
129
|
+
return combinations
|
130
|
+
|
131
|
+
def main(self, method):
|
132
|
+
"""
|
133
|
+
|
134
|
+
:param method:
|
135
|
+
:return:
|
136
|
+
"""
|
137
|
+
# Create a dataframe of the files to be analyzed
|
138
|
+
df_files = self.collect_files()
|
139
|
+
|
140
|
+
combinations = self.process_combinations(df_files, method)
|
141
|
+
|
142
|
+
# Add an element to the tuple to indicate the season
|
143
|
+
# Last element is redo flag which is True if the analysis is to be redone
|
144
|
+
# and False otherwise. Analysis is always redone for the current year
|
145
|
+
# and last year whether file exists or not
|
146
|
+
combinations = [
|
147
|
+
(
|
148
|
+
self.parser,
|
149
|
+
status,
|
150
|
+
path,
|
151
|
+
filename,
|
152
|
+
admin_zone,
|
153
|
+
category,
|
154
|
+
year,
|
155
|
+
"ndvi",
|
156
|
+
False, # redo
|
157
|
+
)
|
158
|
+
for year in range(2024, ar.utcnow().year + 1)
|
159
|
+
for status, path, filename, admin_zone, category in combinations
|
160
|
+
]
|
161
|
+
|
162
|
+
# Only keep those entries in combinations where the third elemt is
|
163
|
+
# mozambique, south_africa, angola or dem_people's_rep_of_korea
|
164
|
+
# This is done to test the code for these countries
|
165
|
+
combinations = [
|
166
|
+
i
|
167
|
+
for i in combinations
|
168
|
+
if "malawi_maize_s1" in i[3]
|
169
|
+
]
|
170
|
+
|
171
|
+
if False:
|
172
|
+
num_cpu = int(cpu_count() * 0.3)
|
173
|
+
with Pool(num_cpu) as p:
|
174
|
+
for i, _ in enumerate(p.imap_unordered(indices.process, combinations)):
|
175
|
+
pass
|
176
|
+
else:
|
177
|
+
# Use the code below if you want to test without parallelization or
|
178
|
+
# if you want to debug by using pdb
|
179
|
+
pbar = tqdm(combinations)
|
180
|
+
for i, val in enumerate(pbar):
|
181
|
+
pbar.set_description(
|
182
|
+
f"Main loop {combinations[i][2]} {combinations[i][5]}"
|
183
|
+
)
|
184
|
+
indices.process(val)
|
185
|
+
|
186
|
+
|
187
|
+
def run(path_config_files=[]):
|
188
|
+
"""
|
189
|
+
|
190
|
+
Args:
|
191
|
+
path_config_files:
|
192
|
+
|
193
|
+
Returns:
|
194
|
+
|
195
|
+
"""
|
196
|
+
""" Check dictionary keys to have no spaces"""
|
197
|
+
indices.validate_index_definitions()
|
198
|
+
|
199
|
+
for method in [
|
200
|
+
"biweekly_r", # "dekad_r" # "dekad_r"
|
201
|
+
]: # , "full_season", "phenological_stages", "fraction_season"]:
|
202
|
+
obj = cei_runner(path_config_files)
|
203
|
+
obj.main(method)
|
204
|
+
|
205
|
+
|
206
|
+
if __name__ == "__main__":
|
207
|
+
run()
|
@@ -246,6 +246,7 @@ def all_correlated_feature_by_time(df, **kwargs):
|
|
246
246
|
Returns:
|
247
247
|
|
248
248
|
"""
|
249
|
+
THRESHOLD = 0.1
|
249
250
|
national_correlation = kwargs.get("national_correlation")
|
250
251
|
group_by = kwargs.get("groupby")
|
251
252
|
combined_dict = kwargs.get("combined_dict")
|
@@ -260,9 +261,20 @@ def all_correlated_feature_by_time(df, **kwargs):
|
|
260
261
|
):
|
261
262
|
df_corr = _all_correlated_feature_by_time(group, **kwargs)
|
262
263
|
|
264
|
+
# Remove columns with more than 50% NaN values
|
265
|
+
df_corr = df_corr.dropna(thresh=len(df_corr) / 2, axis=1)
|
266
|
+
|
263
267
|
if not df_corr.empty:
|
264
|
-
df_tmp = df_corr[df_corr.columns[(df_corr.mean() >
|
265
|
-
dict_selected_features
|
268
|
+
df_tmp = df_corr[df_corr.columns[(abs(df_corr.mean()) > THRESHOLD)]]
|
269
|
+
# Add the columns to dict_selected_features along with the absolute mean value
|
270
|
+
absolute_medians = df_tmp.abs().median()
|
271
|
+
|
272
|
+
# Create a DataFrame to display the column names and their absolute median values
|
273
|
+
absolute_median_df = absolute_medians.reset_index()
|
274
|
+
absolute_median_df.columns = ['CEI', 'Median']
|
275
|
+
|
276
|
+
# Add the CEI and Median value to dict_selected_features
|
277
|
+
dict_selected_features[region_id] = absolute_median_df
|
266
278
|
|
267
279
|
df_tmp2 = (
|
268
280
|
df_tmp.median(axis=0)
|
@@ -290,24 +302,31 @@ def all_correlated_feature_by_time(df, **kwargs):
|
|
290
302
|
else:
|
291
303
|
# HACK
|
292
304
|
df_corr = _all_correlated_feature_by_time(df, **kwargs)
|
293
|
-
dict_selected_features[region_id] = df_corr.columns
|
294
|
-
dict_best_cei[region_id] = {}
|
295
305
|
|
296
|
-
|
297
|
-
#
|
298
|
-
|
299
|
-
|
300
|
-
#
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
#
|
305
|
-
|
306
|
+
df_tmp = df_corr[df_corr.columns[(abs(df_corr.mean()) > THRESHOLD)]]
|
307
|
+
# Add the columns to dict_selected_features along with the absolute mean value
|
308
|
+
absolute_medians = df_tmp.abs().median()
|
309
|
+
|
310
|
+
# Create a DataFrame to display the column names and their absolute median values
|
311
|
+
absolute_median_df = absolute_medians.reset_index()
|
312
|
+
absolute_median_df.columns = ['CEI', 'Median']
|
313
|
+
|
314
|
+
# Add the CEI and Median value to dict_selected_features
|
315
|
+
dict_selected_features[region_id] = absolute_median_df
|
316
|
+
dict_best_cei[region_id] = {}
|
306
317
|
else:
|
307
318
|
df_corr = _all_correlated_feature_by_time(df, **kwargs)
|
308
|
-
|
319
|
+
df_tmp = df_corr[df_corr.columns[(abs(df_corr.mean()) > THRESHOLD)]]
|
320
|
+
# Add the columns to dict_selected_features along with the absolute mean value
|
321
|
+
absolute_medians = df_tmp.abs().median()
|
322
|
+
|
323
|
+
# Create a DataFrame to display the column names and their absolute median values
|
324
|
+
absolute_median_df = absolute_medians.reset_index()
|
325
|
+
absolute_median_df.columns = ['CEI', 'Median']
|
326
|
+
|
327
|
+
# Add the CEI and Median value to dict_selected_features
|
328
|
+
dict_selected_features[0] = absolute_median_df
|
309
329
|
|
310
|
-
df_corr = df_corr[df_corr.columns[(df_corr.mean() > 0.1)]]
|
311
330
|
plot_feature_corr_by_time(df_corr, **kwargs)
|
312
331
|
|
313
332
|
return dict_selected_features, dict_best_cei
|
@@ -0,0 +1,412 @@
|
|
1
|
+
import os
|
2
|
+
|
3
|
+
import matplotlib.pyplot as plt
|
4
|
+
import palettable as pal
|
5
|
+
import pandas as pd
|
6
|
+
import seaborn as sns
|
7
|
+
from tqdm import tqdm
|
8
|
+
|
9
|
+
from geocif import utils
|
10
|
+
from geocif.ml import embedding
|
11
|
+
from geocif.ml import stages
|
12
|
+
|
13
|
+
|
14
|
+
def most_correlated_feature_by_time(df_train, simulation_stages, target_col):
|
15
|
+
"""
|
16
|
+
|
17
|
+
Args:
|
18
|
+
df_train:
|
19
|
+
simulation_stages:
|
20
|
+
target_col:
|
21
|
+
|
22
|
+
Returns:
|
23
|
+
|
24
|
+
"""
|
25
|
+
frames = []
|
26
|
+
|
27
|
+
stages = [simulation_stages[: idx + 1] for idx in range(len(simulation_stages))]
|
28
|
+
|
29
|
+
# Only select columns that have been observed till the current stage
|
30
|
+
for stage in tqdm(stages, leave=False, desc="Compute most correlated feature"):
|
31
|
+
current_feature_set = [
|
32
|
+
col for col in df_train.columns if col.endswith(f"_{stage[-1]}")
|
33
|
+
]
|
34
|
+
|
35
|
+
# Get the most correlated feature for each region
|
36
|
+
top_feature_by_region, counter = embedding.get_top_correlated_features(
|
37
|
+
df_train[current_feature_set + ["Region"]],
|
38
|
+
df_train[target_col],
|
39
|
+
)
|
40
|
+
|
41
|
+
# Create a dataframe with the most common top feature and number of occurrences over timestep
|
42
|
+
_feature = counter.most_common(1)[0][0]
|
43
|
+
# Loop through top_feature_by_region and find the average score for _feature
|
44
|
+
# Calculate the average score for 'DTR_36'
|
45
|
+
_feature_scores = [
|
46
|
+
value[1][0]
|
47
|
+
for key, value in top_feature_by_region.items()
|
48
|
+
if _feature in value[0]
|
49
|
+
]
|
50
|
+
average_score = sum(_feature_scores) / len(_feature_scores)
|
51
|
+
_feature = utils.remove_last_part(_feature)
|
52
|
+
|
53
|
+
df = pd.DataFrame(
|
54
|
+
{
|
55
|
+
"Stage": [stage[-1]],
|
56
|
+
"Date": [utils.dict_growth_stages[stage[-1]]],
|
57
|
+
"Feature with Highest Correlation": [counter.most_common(1)[0][0]],
|
58
|
+
"Feature Category": [_feature],
|
59
|
+
"Score": [average_score],
|
60
|
+
# "Type": [ci.dict_indices[_feature][0]],
|
61
|
+
"Number of Occurrences": [counter.most_common(1)[0][1]],
|
62
|
+
# "Current Feature Set": [current_feature_set],
|
63
|
+
}
|
64
|
+
)
|
65
|
+
frames.append(df)
|
66
|
+
|
67
|
+
df_most_corr_feature_by_time = pd.concat(frames)
|
68
|
+
|
69
|
+
|
70
|
+
def plot_feature_corr_by_time(df, **kwargs):
|
71
|
+
country = kwargs.get("country")
|
72
|
+
crop = kwargs.get("crop")
|
73
|
+
dir_output = kwargs.get("dir_output")
|
74
|
+
forecast_season = kwargs.get("forecast_season")
|
75
|
+
national_correlation = kwargs.get("national_correlation")
|
76
|
+
group_by = kwargs.get("groupby")
|
77
|
+
|
78
|
+
# Setup the figure and gridspec
|
79
|
+
fig = plt.figure(figsize=(10, 5))
|
80
|
+
gs = fig.add_gridspec(
|
81
|
+
3, 2, height_ratios=[6, 5, 1], width_ratios=[5, 1.5], hspace=0.6, wspace=0.0
|
82
|
+
)
|
83
|
+
|
84
|
+
# Assign subplots
|
85
|
+
ax_heatmap = fig.add_subplot(gs[0:2, 0])
|
86
|
+
ax_map = fig.add_subplot(gs[0, 1])
|
87
|
+
cbar_ax = fig.add_subplot(gs[2, 0])
|
88
|
+
ax4 = fig.add_subplot(gs[2, 1])
|
89
|
+
|
90
|
+
# Transpose and reverse the columns of the dataframe
|
91
|
+
#breakpoint()
|
92
|
+
## Only select foll. columns:
|
93
|
+
|
94
|
+
df = df[
|
95
|
+
[
|
96
|
+
"TG",
|
97
|
+
"TG10p",
|
98
|
+
"DTR",
|
99
|
+
"vDTR",
|
100
|
+
"R99p",
|
101
|
+
"RX5day",
|
102
|
+
"MEAN_ESI4WK",
|
103
|
+
]
|
104
|
+
]
|
105
|
+
df_transpose = df.T
|
106
|
+
df = df_transpose[df_transpose.columns[::-1]]
|
107
|
+
|
108
|
+
# Split column names and only use value before space
|
109
|
+
df.columns = df.columns.str.split(" ").str[0]
|
110
|
+
# In row names, replace ESI4WK by ES
|
111
|
+
df.index = df.index.str.replace("MEAN_ESI4WK", "ZScore_ES")
|
112
|
+
df.index = df.index.str.replace("R99p", "MEAN_SM")
|
113
|
+
df.index = df.index.str.replace("RX5day", "AUC_SM")
|
114
|
+
# Remove the last row
|
115
|
+
# Select the first, third and fifth column
|
116
|
+
df = df[["Dec", "Feb", "Apr"]]
|
117
|
+
# Rename Dec to Planting - Early Vegetative
|
118
|
+
# Rename Feb to Early Vegetative - Senescence
|
119
|
+
# Rename Apr to Senescence - Harvest
|
120
|
+
df.columns = ["Planting - Early Vegetative", "Early Vegetative - Senescence", "Senescence - Harvest"]
|
121
|
+
ax_heatmap = sns.heatmap(
|
122
|
+
df,
|
123
|
+
ax=ax_heatmap,
|
124
|
+
annot=True,
|
125
|
+
cmap=pal.cartocolors.diverging.Earth_5.get_mpl_colormap(),
|
126
|
+
fmt=".2f",
|
127
|
+
square=False,
|
128
|
+
linewidths=0.5,
|
129
|
+
linecolor="white",
|
130
|
+
cbar_ax=cbar_ax,
|
131
|
+
cbar_kws={"orientation": "horizontal"}, # , "shrink": 0.5},
|
132
|
+
annot_kws={"size": 6},
|
133
|
+
xticklabels=True,
|
134
|
+
yticklabels=True,
|
135
|
+
)
|
136
|
+
ax_heatmap.tick_params(left=False, bottom=False)
|
137
|
+
|
138
|
+
# Plot the map using GeoPandas
|
139
|
+
dg_country = kwargs.get("dg_country")
|
140
|
+
|
141
|
+
ax_map = dg_country.plot(
|
142
|
+
ax=ax_map,
|
143
|
+
color="white",
|
144
|
+
edgecolor="black",
|
145
|
+
linewidth=1.0,
|
146
|
+
facecolor=None,
|
147
|
+
legend=False,
|
148
|
+
)
|
149
|
+
|
150
|
+
if not national_correlation:
|
151
|
+
id = kwargs["region_id"]
|
152
|
+
dg_region = dg_country[dg_country[group_by] == id]
|
153
|
+
ax_map = dg_region.plot(
|
154
|
+
ax=ax_map, color="blue", edgecolor="blue", linewidth=1.0, legend=False
|
155
|
+
)
|
156
|
+
# Set title with color blue
|
157
|
+
ax_map.set_title(f"Region: {id}", color="blue")
|
158
|
+
|
159
|
+
# No colorbar for the map
|
160
|
+
ax_map.axis("off")
|
161
|
+
# Remove borders
|
162
|
+
ax_map.spines["top"].set_visible(False)
|
163
|
+
ax_map.spines["right"].set_visible(False)
|
164
|
+
ax_map.spines["bottom"].set_visible(False)
|
165
|
+
ax_map.spines["left"].set_visible(False)
|
166
|
+
# ax4 should not be visible
|
167
|
+
ax4.axis("off")
|
168
|
+
|
169
|
+
# Add colorbar label
|
170
|
+
# cbar_ax.set_xlabel("Correlation Coefficient", labelpad=3, size="small")
|
171
|
+
cbar_ax.set_title("Correlation Coefficient", loc="left", size="small")
|
172
|
+
ax_heatmap.set_xticklabels(
|
173
|
+
ax_heatmap.get_xticklabels(), size="x-small", rotation=0, fontsize=7
|
174
|
+
)
|
175
|
+
ax_heatmap.set_yticklabels(ax_heatmap.get_yticklabels(), size="x-small", fontsize=7)
|
176
|
+
ax_heatmap.set_xlabel("")
|
177
|
+
ax_heatmap.set_ylabel(" ")
|
178
|
+
# Reduce font size of ticks of colorbar
|
179
|
+
cbar_ax.tick_params(axis="both", which="major", labelsize=6)
|
180
|
+
|
181
|
+
_country = country.title().replace("_", " ")
|
182
|
+
_crop = crop.title().replace("_", " ")
|
183
|
+
if not national_correlation:
|
184
|
+
fname = f"{country}_{crop}_{id}_corr_feature_by_time.png"
|
185
|
+
else:
|
186
|
+
fname = f"{country}_{crop}_corr_feature_by_time.png"
|
187
|
+
ax_heatmap.set_title(f"{_country}\n{_crop}")
|
188
|
+
|
189
|
+
# plt.tight_layout()
|
190
|
+
os.makedirs(dir_output, exist_ok=True)
|
191
|
+
plt.savefig(dir_output / fname, dpi=250)
|
192
|
+
plt.close()
|
193
|
+
|
194
|
+
|
195
|
+
def _all_correlated_feature_by_time(df, **kwargs):
|
196
|
+
"""
|
197
|
+
|
198
|
+
Args:
|
199
|
+
df:
|
200
|
+
**kwargs:
|
201
|
+
|
202
|
+
Returns:
|
203
|
+
|
204
|
+
"""
|
205
|
+
frames = []
|
206
|
+
all_stages = kwargs.get("all_stages")
|
207
|
+
target_col = kwargs.get("target_col")
|
208
|
+
method = kwargs.get("method")
|
209
|
+
|
210
|
+
longest_stage = max(all_stages, key=len)
|
211
|
+
|
212
|
+
# Split the original string into a list of its parts
|
213
|
+
longest_stage = longest_stage.split("_")
|
214
|
+
|
215
|
+
# Generate the list of strings as described by the user, removing one element from the start each time
|
216
|
+
stages_features = ["_".join(longest_stage[i:]) for i in range(len(longest_stage))]
|
217
|
+
|
218
|
+
# Drop columns with no yield information
|
219
|
+
df = df.dropna(subset=[target_col])
|
220
|
+
|
221
|
+
# Only select columns that have been observed till the current stage
|
222
|
+
pbar = tqdm(stages_features, total=len(stages_features), leave=False)
|
223
|
+
for stage in pbar:
|
224
|
+
pbar.set_description(f"Calculating correlations")
|
225
|
+
pbar.update()
|
226
|
+
|
227
|
+
stage_name = stages.get_stage_information_dict(f"GD4_{stage}", method)[
|
228
|
+
"Stage Name"
|
229
|
+
]
|
230
|
+
# starting_stage = stage_name.split("-")[0]
|
231
|
+
current_feature_set = [col for col in df.columns if stage_name in col]
|
232
|
+
|
233
|
+
# Get the most correlated feature for each region
|
234
|
+
df_tmp = embedding.get_all_features_correlation(
|
235
|
+
df[current_feature_set + ["Region"]], df[target_col], method
|
236
|
+
)
|
237
|
+
|
238
|
+
frames.append(df_tmp)
|
239
|
+
|
240
|
+
df_results = pd.concat(frames)
|
241
|
+
if not df_results.empty:
|
242
|
+
# Exclude Region column
|
243
|
+
df_results = df_results.drop(columns="Region")
|
244
|
+
# Groupby Dekad and compute mean of all columns apart from Region
|
245
|
+
df_results = df_results.groupby(method).mean()
|
246
|
+
|
247
|
+
all_stage_names = []
|
248
|
+
for stage in stages_features:
|
249
|
+
_tmp = stages.get_stage_information_dict(f"GD4_{stage}", method)[
|
250
|
+
"Stage Name"
|
251
|
+
]
|
252
|
+
all_stage_names.append(_tmp)
|
253
|
+
|
254
|
+
df_results = df_results.reindex(all_stage_names)
|
255
|
+
|
256
|
+
# Drop rows with all NaN values
|
257
|
+
df_results = df_results.dropna(how="all")
|
258
|
+
|
259
|
+
# Split the index based on - and only keep the first element
|
260
|
+
df_results.index = df_results.index.str.split("-").str[0]
|
261
|
+
|
262
|
+
return df_results
|
263
|
+
else:
|
264
|
+
return pd.DataFrame()
|
265
|
+
|
266
|
+
|
267
|
+
def all_correlated_feature_by_time(df, **kwargs):
|
268
|
+
"""
|
269
|
+
|
270
|
+
Args:
|
271
|
+
df:
|
272
|
+
**kwargs:
|
273
|
+
|
274
|
+
Returns:
|
275
|
+
|
276
|
+
"""
|
277
|
+
THRESHOLD = 0.1
|
278
|
+
national_correlation = kwargs.get("national_correlation")
|
279
|
+
group_by = kwargs.get("groupby")
|
280
|
+
combined_dict = kwargs.get("combined_dict")
|
281
|
+
|
282
|
+
dict_selected_features = {}
|
283
|
+
dict_best_cei = {}
|
284
|
+
|
285
|
+
if not national_correlation:
|
286
|
+
groups = df.groupby(group_by)
|
287
|
+
for region_id, group in tqdm(
|
288
|
+
groups, desc=f"Compute all correlated feature by {group_by}", leave=False
|
289
|
+
):
|
290
|
+
df_corr = _all_correlated_feature_by_time(group, **kwargs)
|
291
|
+
|
292
|
+
# Remove columns with more than 50% NaN values
|
293
|
+
df_corr = df_corr.dropna(thresh=len(df_corr) / 2, axis=1)
|
294
|
+
|
295
|
+
if not df_corr.empty:
|
296
|
+
df_tmp = df_corr[df_corr.columns[(abs(df_corr.mean()) > THRESHOLD)]]
|
297
|
+
# Add the columns to dict_selected_features along with the absolute mean value
|
298
|
+
absolute_medians = df_tmp.abs().median()
|
299
|
+
|
300
|
+
# Create a DataFrame to display the column names and their absolute median values
|
301
|
+
absolute_median_df = absolute_medians.reset_index()
|
302
|
+
absolute_median_df.columns = ['CEI', 'Median']
|
303
|
+
|
304
|
+
# Add the CEI and Median value to dict_selected_features
|
305
|
+
dict_selected_features[region_id] = absolute_median_df
|
306
|
+
|
307
|
+
df_tmp2 = (
|
308
|
+
df_tmp.median(axis=0)
|
309
|
+
.abs()
|
310
|
+
.sort_values(ascending=False)
|
311
|
+
.reset_index()
|
312
|
+
)
|
313
|
+
df_tmp2.columns = ["Metric", "Value"]
|
314
|
+
# Add another column based on Type of Metric
|
315
|
+
for idx, row in df_tmp2.iterrows():
|
316
|
+
df_tmp2.loc[idx, "Type"] = combined_dict[row[0]][0]
|
317
|
+
|
318
|
+
# Compute median of each CEI and sort the dataframe based on the absolute value of the median
|
319
|
+
dict_best_cei[region_id] = (
|
320
|
+
df_tmp2.groupby("Type")
|
321
|
+
.max()
|
322
|
+
.reset_index()
|
323
|
+
.sort_values("Value", ascending=False)["Metric"]
|
324
|
+
.values
|
325
|
+
)
|
326
|
+
|
327
|
+
kwargs["region_id"] = region_id
|
328
|
+
plot_feature_corr_by_time(df_tmp, **kwargs)
|
329
|
+
# For each element in dict_best_cei, add the type of the cei
|
330
|
+
else:
|
331
|
+
# HACK
|
332
|
+
df_corr = _all_correlated_feature_by_time(df, **kwargs)
|
333
|
+
|
334
|
+
df_tmp = df_corr[df_corr.columns[(abs(df_corr.mean()) > THRESHOLD)]]
|
335
|
+
# Add the columns to dict_selected_features along with the absolute mean value
|
336
|
+
absolute_medians = df_tmp.abs().median()
|
337
|
+
|
338
|
+
# Create a DataFrame to display the column names and their absolute median values
|
339
|
+
absolute_median_df = absolute_medians.reset_index()
|
340
|
+
absolute_median_df.columns = ['CEI', 'Median']
|
341
|
+
|
342
|
+
# Add the CEI and Median value to dict_selected_features
|
343
|
+
dict_selected_features[region_id] = absolute_median_df
|
344
|
+
dict_best_cei[region_id] = {}
|
345
|
+
else:
|
346
|
+
df_corr = _all_correlated_feature_by_time(df, **kwargs)
|
347
|
+
df_tmp = df_corr[df_corr.columns[(abs(df_corr.mean()) > THRESHOLD)]]
|
348
|
+
# Add the columns to dict_selected_features along with the absolute mean value
|
349
|
+
absolute_medians = df_tmp.abs().median()
|
350
|
+
|
351
|
+
# Create a DataFrame to display the column names and their absolute median values
|
352
|
+
absolute_median_df = absolute_medians.reset_index()
|
353
|
+
absolute_median_df.columns = ['CEI', 'Median']
|
354
|
+
|
355
|
+
# Add the CEI and Median value to dict_selected_features
|
356
|
+
dict_selected_features[0] = absolute_median_df
|
357
|
+
|
358
|
+
plot_feature_corr_by_time(df_corr, **kwargs)
|
359
|
+
|
360
|
+
return dict_selected_features, dict_best_cei
|
361
|
+
|
362
|
+
|
363
|
+
def feature_correlation_by_time(**kwargs):
|
364
|
+
raise NotImplementedError()
|
365
|
+
|
366
|
+
frames = []
|
367
|
+
simulation_stages = kwargs.get("simulation_stages")
|
368
|
+
df_train = kwargs.get("df_train")
|
369
|
+
target_col = kwargs.get("target_col")
|
370
|
+
|
371
|
+
stages = [simulation_stages[: idx + 1] for idx in range(len(simulation_stages))]
|
372
|
+
|
373
|
+
# Only select columns that have been observed till the current stage
|
374
|
+
for stage in tqdm(stages, leave=False, desc="Compute feature correlation by time"):
|
375
|
+
current_feature_set = [
|
376
|
+
col for col in df_train.columns if col.endswith(f"_{stage[-1]}")
|
377
|
+
]
|
378
|
+
|
379
|
+
# Get the most correlated feature for each region
|
380
|
+
top_feature_by_region, counter = embedding.compute_feature_correlations(
|
381
|
+
df_train[current_feature_set + ["Region"]],
|
382
|
+
df_train[target_col],
|
383
|
+
"all",
|
384
|
+
)
|
385
|
+
|
386
|
+
# Create a dataframe with the most common top feature and number of occurrences over timestep
|
387
|
+
_feature = counter.most_common(1)[0][0]
|
388
|
+
# Loop through top_feature_by_region and find the average score for _feature
|
389
|
+
# Calculate the average score for 'DTR_36'
|
390
|
+
_feature_scores = [
|
391
|
+
value[1][0]
|
392
|
+
for key, value in top_feature_by_region.items()
|
393
|
+
if _feature in value[0]
|
394
|
+
]
|
395
|
+
average_score = sum(_feature_scores) / len(_feature_scores)
|
396
|
+
_feature = utils.remove_last_part(_feature)
|
397
|
+
|
398
|
+
df = pd.DataFrame(
|
399
|
+
{
|
400
|
+
"Stage": [stage[-1]],
|
401
|
+
"Date": [utils.dict_growth_stages[stage[-1]]],
|
402
|
+
"Feature with Highest Correlation": [counter.most_common(1)[0][0]],
|
403
|
+
"Feature Category": [_feature],
|
404
|
+
"Score": [average_score],
|
405
|
+
# "Type": [ci.dict_indices[_feature][0]],
|
406
|
+
"Number of Occurrences": [counter.most_common(1)[0][1]],
|
407
|
+
# "Current Feature Set": [current_feature_set],
|
408
|
+
}
|
409
|
+
)
|
410
|
+
frames.append(df)
|
411
|
+
|
412
|
+
df_corr_feature_by_time = pd.concat(frames)
|
@@ -144,10 +144,13 @@ def select_stages_for_ml(stages_features, method="latest", n=100):
|
|
144
144
|
|
145
145
|
selected_stages = []
|
146
146
|
if method == "latest":
|
147
|
+
# Find the longest array in the list of arrays
|
148
|
+
selected_stages = [max(stages_features, key=len)]
|
149
|
+
|
147
150
|
# Only select those arrays in the list of arrays that are starting with latest_stage
|
148
|
-
for stage in stages_features:
|
149
|
-
|
150
|
-
|
151
|
+
# for stage in stages_features:
|
152
|
+
# if stage[0] == latest_stage[0]:
|
153
|
+
# selected_stages.append(stage)
|
151
154
|
elif method == "fraction":
|
152
155
|
# Filter arrays with exactly 2 elements
|
153
156
|
two_element_arrays = []
|
@@ -1,6 +1,76 @@
|
|
1
|
-
import
|
1
|
+
import geopandas as gpd
|
2
|
+
import pygmt
|
2
3
|
import matplotlib.pyplot as plt
|
3
|
-
|
4
|
+
from matplotlib.lines import Line2D
|
5
|
+
import matplotlib.patches as mpatches
|
6
|
+
import os
|
7
|
+
filtered_shapefile_path = r"D:\Users\ritvik\projects\GEOGLAM\Input\Global_Datasets\Regions\Shps\filtered_shapefile5.shp"
|
8
|
+
|
9
|
+
if not os.path.isfile(filtered_shapefile_path):
|
10
|
+
|
11
|
+
# Load the shapefile using GeoPandas
|
12
|
+
shapefile_path = r"D:\Users\ritvik\projects\GEOGLAM\Input\Global_Datasets\Regions\Shps\adm_shapefile.shp"
|
13
|
+
gdf = gpd.read_file(shapefile_path, engine="pyogrio")
|
14
|
+
|
15
|
+
# Only keep one row per ADMIN0
|
16
|
+
gdf = gdf.drop_duplicates(subset="ADMIN0")
|
17
|
+
|
18
|
+
sh2_path = r"D:\Users\ritvik\projects\GEOGLAM\Input\Global_Datasets\Regions\Shps\Level_1.shp"
|
19
|
+
gdf2 = gpd.read_file(sh2_path, engine="pyogrio")
|
20
|
+
|
21
|
+
# Subset gdf2 to USA, Pakistan and Afghanistan
|
22
|
+
gdf2 = gdf2[gdf2["ADM0_NAME"].isin(["United States of America"])]
|
23
|
+
|
24
|
+
# Exclude Alska and Hawaii from the USA
|
25
|
+
gdf2 = gdf2[~gdf2["ADM1_NAME"].isin(["Alaska", "Hawaii"])]
|
26
|
+
|
27
|
+
# Now combine all the states into one polygon
|
28
|
+
gdf2 = gdf2.dissolve(by="ADM0_NAME")
|
29
|
+
gdf2 = gdf2.reset_index()
|
30
|
+
|
31
|
+
# Rename ADM0_NAME to ADMIN0 for consistency
|
32
|
+
gdf2.rename(columns={"ADM0_NAME": "ADMIN0"}, inplace=True)
|
33
|
+
|
34
|
+
# Only keep ADMIN0 and geometry columns in gdf and gdf2
|
35
|
+
gdf = gdf[["ADMIN0", "geometry"]]
|
36
|
+
gdf2 = gdf2[["ADMIN0", "geometry"]]
|
37
|
+
|
38
|
+
# Merge gdf and gdf2
|
39
|
+
import pandas as pd
|
40
|
+
gdf = pd.concat([gdf, gdf2], ignore_index=True)
|
41
|
+
|
42
|
+
# Save the filtered shapefile as a temporary file
|
43
|
+
|
44
|
+
gdf.to_file(filtered_shapefile_path)
|
45
|
+
else:
|
46
|
+
gdf = gpd.read_file(filtered_shapefile_path, engine="pyogrio")
|
47
|
+
|
48
|
+
# Create the global map with highlighted countries
|
49
|
+
fig = pygmt.Figure()
|
50
|
+
|
51
|
+
# Define the region of interest and projection
|
52
|
+
# fig.basemap(region="g", projection="R12c/20", frame=True)
|
53
|
+
fig.basemap(region=[-135, 60, -35, 53], projection="Q12c", frame=True)
|
54
|
+
|
55
|
+
# Use the coast function to draw land and water
|
56
|
+
fig.coast(land="lightgray", water="lightcyan")
|
57
|
+
|
58
|
+
# Highlight the countries using the filtered shapefile
|
59
|
+
fig.plot(data=filtered_shapefile_path, pen="0.35p,black")
|
60
|
+
|
61
|
+
# Add hatches to Pakistan and Afghanistan
|
62
|
+
gdf_filled = gdf[gdf["ADMIN0"].isin(["Pakistan", "Afghanistan"])]
|
63
|
+
for _, row in gdf_filled.iterrows():
|
64
|
+
fill_gdf = gpd.GeoDataFrame([row], columns=gdf.columns)
|
65
|
+
with pygmt.helpers.GMTTempFile() as tmpfile:
|
66
|
+
fill_gdf.to_file(tmpfile.name, driver="GeoJSON")
|
67
|
+
fig.plot(data=tmpfile.name, pen="0.35p,black", fill="black@50+h")
|
68
|
+
|
69
|
+
# Save the figure
|
70
|
+
fig.savefig("global_choropleth_highlighted_v1.png", dpi=1000)
|
71
|
+
|
72
|
+
# Show the figure
|
73
|
+
fig.show()
|
4
74
|
|
5
75
|
import matplotlib.pyplot as plt
|
6
76
|
import cartopy.crs as ccrs
|
@@ -8,6 +8,7 @@ geocif/__init__.py
|
|
8
8
|
geocif/analysis.py
|
9
9
|
geocif/geocif.py
|
10
10
|
geocif/indices_runner.py
|
11
|
+
geocif/indices_runner_v2.py
|
11
12
|
geocif/logger.py
|
12
13
|
geocif/utils.py
|
13
14
|
geocif.egg-info/PKG-INFO
|
@@ -31,6 +32,7 @@ geocif/cei/definitions.py
|
|
31
32
|
geocif/cei/indices.py
|
32
33
|
geocif/ml/__init__.py
|
33
34
|
geocif/ml/correlations.py
|
35
|
+
geocif/ml/correlations_backup.py
|
34
36
|
geocif/ml/embedding.py
|
35
37
|
geocif/ml/feature_engineering.py
|
36
38
|
geocif/ml/feature_selection.py
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|