geocif 0.1.54__tar.gz → 0.1.56__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {geocif-0.1.54/geocif.egg-info → geocif-0.1.56}/PKG-INFO +1 -1
- {geocif-0.1.54 → geocif-0.1.56}/geocif/analysis.py +12 -12
- {geocif-0.1.54 → geocif-0.1.56}/geocif/geocif.py +4 -2
- geocif-0.1.54/geocif/indices_runner_v2.py → geocif-0.1.56/geocif/indices_runner_angola.py +12 -6
- geocif-0.1.56/geocif/indices_runner_madagascar.py +212 -0
- geocif-0.1.54/geocif/indices_runner_v3.py → geocif-0.1.56/geocif/indices_runner_malawi.py +13 -6
- geocif-0.1.56/geocif/indices_runner_mozambique.py +212 -0
- geocif-0.1.56/geocif/indices_runner_south_africa.py +212 -0
- geocif-0.1.56/geocif/indices_runner_zambia.py +212 -0
- geocif-0.1.56/geocif/indices_runner_zimbabwe.py +212 -0
- geocif-0.1.56/geocif/playground/eval.py +241 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/playground/gamtest.py +8 -0
- geocif-0.1.56/geocif/playground/gee_access.py +566 -0
- geocif-0.1.56/geocif/playground/play_xagg.py +87 -0
- geocif-0.1.56/geocif/playground/reg.py +36 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/viz/plot.py +5 -3
- {geocif-0.1.54 → geocif-0.1.56/geocif.egg-info}/PKG-INFO +1 -1
- {geocif-0.1.54 → geocif-0.1.56}/geocif.egg-info/SOURCES.txt +11 -2
- {geocif-0.1.54 → geocif-0.1.56}/setup.py +1 -1
- {geocif-0.1.54 → geocif-0.1.56}/LICENSE +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/MANIFEST.in +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/README.md +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/__init__.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/agmet/__init__.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/agmet/geoagmet.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/agmet/plot.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/agmet/utils.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/backup/__init__.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/backup/constants.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/backup/features.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/backup/geo.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/backup/geocif.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/backup/metadata.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/backup/models.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/cei/__init__.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/cei/definitions.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/cei/indices.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/experiments.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/geocif_runner.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/indices_runner.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/logger.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/ml/__init__.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/ml/correlations.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/ml/embedding.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/ml/feature_engineering.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/ml/feature_selection.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/ml/outliers.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/ml/outlook.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/ml/output.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/ml/spatial_autocorrelation.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/ml/stages.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/ml/stats.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/ml/trainers.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/ml/trend.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/ml/xai.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/mm.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/playground/__init__.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/playground/aa.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/playground/automl.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/playground/download_esi.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/playground/enso.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/playground/misc.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/playground/sustain.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/playground/test_catboost.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/playground/tmp.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/playground/tmp2.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/playground/tmp3.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/playground/tmp4.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/playground/tmp5.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/risk/__init__.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/risk/impact_assessment.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/utils.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif/viz/__init__.py +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif.egg-info/dependency_links.txt +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif.egg-info/not-zip-safe +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/geocif.egg-info/top_level.txt +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/requirements.txt +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/setup.cfg +0 -0
- {geocif-0.1.54 → geocif-0.1.56}/tests/test_geocif.py +0 -0
@@ -1085,18 +1085,18 @@ class RegionalMapper(Geoanalysis):
|
|
1085
1085
|
plt.minorticks_on()
|
1086
1086
|
plt.xlabel("Mean Absolute Percentage Error (%)")
|
1087
1087
|
plt.ylabel("Frequency")
|
1088
|
-
plt.legend(title="Country", title_fontsize="
|
1088
|
+
plt.legend(title="Country", title_fontsize="16")
|
1089
1089
|
|
1090
1090
|
# Adding the title at the top-right corner
|
1091
|
-
plt.text(
|
1092
|
-
|
1093
|
-
|
1094
|
-
|
1095
|
-
|
1096
|
-
|
1097
|
-
|
1098
|
-
|
1099
|
-
)
|
1091
|
+
# plt.text(
|
1092
|
+
# 0.95, 0.95, # Coordinates in axes fraction
|
1093
|
+
# f"Model: {model}",
|
1094
|
+
# transform=plt.gca().transAxes,
|
1095
|
+
# fontsize=14,
|
1096
|
+
# verticalalignment="top",
|
1097
|
+
# horizontalalignment="right",
|
1098
|
+
# bbox=dict(facecolor="white", alpha=0.6, edgecolor="none")
|
1099
|
+
# )
|
1100
1100
|
|
1101
1101
|
plt.tight_layout()
|
1102
1102
|
plt.savefig(self.dir_analysis / f"histogram_region_{model}_mape.png", dpi=250)
|
@@ -1208,8 +1208,8 @@ def run(path_config_files=[Path("../config/geocif.txt")]):
|
|
1208
1208
|
df = pd.concat(frames)
|
1209
1209
|
|
1210
1210
|
""" Map regional error metrics """
|
1211
|
-
mapper = RegionalMapper(path_config_files, logger, parser)
|
1212
|
-
mapper.map_regional()
|
1211
|
+
#mapper = RegionalMapper(path_config_files, logger, parser)
|
1212
|
+
#mapper.map_regional()
|
1213
1213
|
|
1214
1214
|
""" For each country, plot yields, conditions, anomalies, etc. """
|
1215
1215
|
obj.map(df)
|
@@ -798,6 +798,7 @@ class Geocif:
|
|
798
798
|
[self.target, self.target_class]
|
799
799
|
+ self.statistics_columns
|
800
800
|
+ self.feature_names
|
801
|
+
+ [f"Median {self.target}"]
|
801
802
|
+ ["Region_ID"]
|
802
803
|
)
|
803
804
|
if self.check_yield_trend:
|
@@ -1141,6 +1142,9 @@ class Geocif:
|
|
1141
1142
|
group_by = ["Region"]
|
1142
1143
|
groups = self.df_train.groupby(group_by)
|
1143
1144
|
for key, group in groups:
|
1145
|
+
if group.empty:
|
1146
|
+
continue
|
1147
|
+
|
1144
1148
|
if self.check_yield_trend:
|
1145
1149
|
if group[self.target].any():
|
1146
1150
|
detrended_data = trend.detrend_dataframe(
|
@@ -1156,8 +1160,6 @@ class Geocif:
|
|
1156
1160
|
group.index, "Detrended Model Type"
|
1157
1161
|
] = detrended_data.model_type
|
1158
1162
|
|
1159
|
-
if group.empty:
|
1160
|
-
breakpoint()
|
1161
1163
|
# Create categorical classes for target column
|
1162
1164
|
group, new_target_column, bins = fe.classify_target(
|
1163
1165
|
group, self.target, self.number_classes
|
@@ -12,6 +12,7 @@ warnings.filterwarnings("ignore")
|
|
12
12
|
from .cei import indices
|
13
13
|
from geoprepare import base
|
14
14
|
|
15
|
+
country = "angola"
|
15
16
|
|
16
17
|
def remove_duplicates(lst):
|
17
18
|
"""
|
@@ -46,10 +47,15 @@ class cei_runner(base.BaseGeo):
|
|
46
47
|
self.parse_config()
|
47
48
|
|
48
49
|
self.dir_input = Path(self.parser.get("PATHS", "dir_input"))
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
50
|
+
import platform
|
51
|
+
if platform.system() == "Linux":
|
52
|
+
self.base_dir = Path(
|
53
|
+
rf"/gpfs/data1/cmongp1/GEOGLAM/Output/countries/{country}"
|
54
|
+
)
|
55
|
+
else:
|
56
|
+
self.base_dir = Path(
|
57
|
+
rf"D:\Users\ritvik\projects\GEOGLAM\Output\countries\{country}"
|
58
|
+
) # Path(self.parser.get("PATHS", "dir_crop_inputs"))
|
53
59
|
self.do_parallel = self.parser.getboolean("DEFAULT", "do_parallel")
|
54
60
|
|
55
61
|
def collect_files(self):
|
@@ -165,10 +171,10 @@ class cei_runner(base.BaseGeo):
|
|
165
171
|
# Only keep those entries in combinations where the third elemt is
|
166
172
|
# mozambique, south_africa, angola or dem_people's_rep_of_korea
|
167
173
|
# This is done to test the code for these countries
|
168
|
-
combinations = [i for i in combinations if "
|
174
|
+
combinations = [i for i in combinations if f"{country}_maize_s1" in i[3]]
|
169
175
|
|
170
176
|
if True:
|
171
|
-
num_cpu = int(cpu_count() * 0.
|
177
|
+
num_cpu = int(cpu_count() * 0.1)
|
172
178
|
with Pool(num_cpu) as p:
|
173
179
|
for i, _ in enumerate(p.imap_unordered(indices.process, combinations)):
|
174
180
|
pass
|
@@ -0,0 +1,212 @@
|
|
1
|
+
import itertools
|
2
|
+
import warnings
|
3
|
+
from multiprocessing import Pool, cpu_count
|
4
|
+
from pathlib import Path
|
5
|
+
|
6
|
+
import arrow as ar
|
7
|
+
import pandas as pd
|
8
|
+
from tqdm import tqdm
|
9
|
+
|
10
|
+
warnings.filterwarnings("ignore")
|
11
|
+
|
12
|
+
from .cei import indices
|
13
|
+
from geoprepare import base
|
14
|
+
|
15
|
+
country = "madagascar"
|
16
|
+
|
17
|
+
def remove_duplicates(lst):
|
18
|
+
"""
|
19
|
+
|
20
|
+
:param lst:
|
21
|
+
:return:
|
22
|
+
"""
|
23
|
+
return list(set([i for i in lst]))
|
24
|
+
|
25
|
+
|
26
|
+
def get_admin_zone(country, dg_shp):
|
27
|
+
admin_zone = "admin_1"
|
28
|
+
country = country.title().replace(" ", "_")
|
29
|
+
|
30
|
+
# Read in shapefile
|
31
|
+
dg_country = dg_shp[dg_shp["ADMIN0"] == country]
|
32
|
+
|
33
|
+
# Is the ADMIN2 column all None? If so, return admin_1 else return admin_2
|
34
|
+
if dg_country.empty:
|
35
|
+
admin_zone = "admin_1"
|
36
|
+
elif not dg_country["ADMIN2"].isna().all():
|
37
|
+
admin_zone = "admin_2"
|
38
|
+
|
39
|
+
return admin_zone
|
40
|
+
|
41
|
+
|
42
|
+
class cei_runner(base.BaseGeo):
|
43
|
+
def __init__(self, path_config_file):
|
44
|
+
super().__init__(path_config_file)
|
45
|
+
|
46
|
+
# Parse configuration files
|
47
|
+
self.parse_config()
|
48
|
+
|
49
|
+
self.dir_input = Path(self.parser.get("PATHS", "dir_input"))
|
50
|
+
import platform
|
51
|
+
if platform.system() == "Linux":
|
52
|
+
self.base_dir = Path(
|
53
|
+
rf"/gpfs/data1/cmongp1/GEOGLAM/Output/countries/{country}"
|
54
|
+
)
|
55
|
+
else:
|
56
|
+
self.base_dir = Path(
|
57
|
+
rf"D:\Users\ritvik\projects\GEOGLAM\Output\countries\{country}"
|
58
|
+
) # Path(self.parser.get("PATHS", "dir_crop_inputs"))
|
59
|
+
self.do_parallel = self.parser.getboolean("DEFAULT", "do_parallel")
|
60
|
+
|
61
|
+
def collect_files(self):
|
62
|
+
"""
|
63
|
+
1. Collect all the files which contain EO information
|
64
|
+
2. Exclude files from the `processed` directory if it is already in
|
65
|
+
processed_include_fall directory
|
66
|
+
3. Create a dataframe that contains the following columns:
|
67
|
+
- directory: name of directory where file is located
|
68
|
+
- path: full path to file
|
69
|
+
- filename: name of file
|
70
|
+
:return: Return the dataframe created above
|
71
|
+
"""
|
72
|
+
import geopandas as gp
|
73
|
+
|
74
|
+
dg_shp = gp.read_file(
|
75
|
+
self.dir_input
|
76
|
+
/ "Global_Datasets"
|
77
|
+
/ "Regions"
|
78
|
+
/ "Shps"
|
79
|
+
/ "adm_shapefile.shp",
|
80
|
+
engine="pyogrio",
|
81
|
+
)
|
82
|
+
|
83
|
+
# Collect all the files which contain EO information
|
84
|
+
df_files = pd.DataFrame(columns=["directory", "path", "filename", "admin_zone"])
|
85
|
+
for filepath in self.base_dir.rglob("*.csv"):
|
86
|
+
country = filepath.parents[0].name
|
87
|
+
|
88
|
+
admin_zone = get_admin_zone(country, dg_shp)
|
89
|
+
|
90
|
+
# If country is not in cc.COUNTRIES then skip
|
91
|
+
# HACK: Skip korea for now, as it is giving errors
|
92
|
+
if country == "republic_of_korea":
|
93
|
+
continue
|
94
|
+
|
95
|
+
# Get name of directory one level up
|
96
|
+
process_type = filepath.parents[1].name
|
97
|
+
|
98
|
+
# Get name of file
|
99
|
+
filename = filepath.name
|
100
|
+
|
101
|
+
# Add to dataframe
|
102
|
+
df_files.loc[len(df_files)] = [process_type, filepath, filename, admin_zone]
|
103
|
+
|
104
|
+
# Exclude those rows where directory is processed and file is already in
|
105
|
+
# processed_include_fall directory
|
106
|
+
no_fall = df_files["directory"] == "processed"
|
107
|
+
include_fall = df_files[df_files["directory"] == "processed_include_fall"][
|
108
|
+
"filename"
|
109
|
+
]
|
110
|
+
|
111
|
+
df_files = df_files[~(no_fall & (df_files["filename"].isin(include_fall)))]
|
112
|
+
|
113
|
+
return df_files
|
114
|
+
|
115
|
+
def process_combinations(self, df, method):
|
116
|
+
"""
|
117
|
+
Create a list of tuples of the following:
|
118
|
+
- directory: name of directory where file is located
|
119
|
+
- path: full path to file
|
120
|
+
- filename: name of file
|
121
|
+
- method: whether to compute indices for phenological stages or not
|
122
|
+
This tuple will be used as input to the `process` function
|
123
|
+
:param df:
|
124
|
+
:param method:
|
125
|
+
:return:
|
126
|
+
"""
|
127
|
+
combinations = []
|
128
|
+
|
129
|
+
for index, row in tqdm(df.iterrows()):
|
130
|
+
combinations.extend(
|
131
|
+
list(
|
132
|
+
itertools.product([row[0]], [row[1]], [row[2]], [row[3]], [method])
|
133
|
+
)
|
134
|
+
)
|
135
|
+
|
136
|
+
combinations = remove_duplicates(combinations)
|
137
|
+
|
138
|
+
return combinations
|
139
|
+
|
140
|
+
def main(self, method):
|
141
|
+
"""
|
142
|
+
|
143
|
+
:param method:
|
144
|
+
:return:
|
145
|
+
"""
|
146
|
+
# Create a dataframe of the files to be analyzed
|
147
|
+
df_files = self.collect_files()
|
148
|
+
|
149
|
+
combinations = self.process_combinations(df_files, method)
|
150
|
+
|
151
|
+
# Add an element to the tuple to indicate the season
|
152
|
+
# Last element is redo flag which is True if the analysis is to be redone
|
153
|
+
# and False otherwise. Analysis is always redone for the current year
|
154
|
+
# and last year whether file exists or not
|
155
|
+
combinations = [
|
156
|
+
(
|
157
|
+
self.parser,
|
158
|
+
status,
|
159
|
+
path,
|
160
|
+
filename,
|
161
|
+
admin_zone,
|
162
|
+
category,
|
163
|
+
year,
|
164
|
+
"ndvi",
|
165
|
+
False, # redo
|
166
|
+
)
|
167
|
+
for year in range(2001, ar.utcnow().year + 1)
|
168
|
+
for status, path, filename, admin_zone, category in combinations
|
169
|
+
]
|
170
|
+
|
171
|
+
# Only keep those entries in combinations where the third elemt is
|
172
|
+
# mozambique, south_africa, angola or dem_people's_rep_of_korea
|
173
|
+
# This is done to test the code for these countries
|
174
|
+
combinations = [i for i in combinations if f"{country}_maize_s1" in i[3]]
|
175
|
+
|
176
|
+
if True:
|
177
|
+
num_cpu = int(cpu_count() * 0.1)
|
178
|
+
with Pool(num_cpu) as p:
|
179
|
+
for i, _ in enumerate(p.imap_unordered(indices.process, combinations)):
|
180
|
+
pass
|
181
|
+
else:
|
182
|
+
# Use the code below if you want to test without parallelization or
|
183
|
+
# if you want to debug by using pdb
|
184
|
+
pbar = tqdm(combinations)
|
185
|
+
for i, val in enumerate(pbar):
|
186
|
+
pbar.set_description(
|
187
|
+
f"Main loop {combinations[i][2]} {combinations[i][5]}"
|
188
|
+
)
|
189
|
+
indices.process(val)
|
190
|
+
|
191
|
+
|
192
|
+
def run(path_config_files=[]):
|
193
|
+
"""
|
194
|
+
|
195
|
+
Args:
|
196
|
+
path_config_files:
|
197
|
+
|
198
|
+
Returns:
|
199
|
+
|
200
|
+
"""
|
201
|
+
""" Check dictionary keys to have no spaces"""
|
202
|
+
indices.validate_index_definitions()
|
203
|
+
|
204
|
+
for method in [
|
205
|
+
"monthly_r", # "dekad_r" # "dekad_r"
|
206
|
+
]: # , "full_season", "phenological_stages", "fraction_season"]:
|
207
|
+
obj = cei_runner(path_config_files)
|
208
|
+
obj.main(method)
|
209
|
+
|
210
|
+
|
211
|
+
if __name__ == "__main__":
|
212
|
+
run()
|
@@ -12,6 +12,7 @@ warnings.filterwarnings("ignore")
|
|
12
12
|
from .cei import indices
|
13
13
|
from geoprepare import base
|
14
14
|
|
15
|
+
country = "malawi"
|
15
16
|
|
16
17
|
def remove_duplicates(lst):
|
17
18
|
"""
|
@@ -46,9 +47,15 @@ class cei_runner(base.BaseGeo):
|
|
46
47
|
self.parse_config()
|
47
48
|
|
48
49
|
self.dir_input = Path(self.parser.get("PATHS", "dir_input"))
|
49
|
-
|
50
|
-
|
51
|
-
|
50
|
+
import platform
|
51
|
+
if platform.system() == "Linux":
|
52
|
+
self.base_dir = Path(
|
53
|
+
rf"/gpfs/data1/cmongp1/GEOGLAM/Output/countries/{country}"
|
54
|
+
)
|
55
|
+
else:
|
56
|
+
self.base_dir = Path(
|
57
|
+
rf"D:\Users\ritvik\projects\GEOGLAM\Output\countries\{country}"
|
58
|
+
) # Path(self.parser.get("PATHS", "dir_crop_inputs"))
|
52
59
|
self.do_parallel = self.parser.getboolean("DEFAULT", "do_parallel")
|
53
60
|
|
54
61
|
def collect_files(self):
|
@@ -164,10 +171,10 @@ class cei_runner(base.BaseGeo):
|
|
164
171
|
# Only keep those entries in combinations where the third elemt is
|
165
172
|
# mozambique, south_africa, angola or dem_people's_rep_of_korea
|
166
173
|
# This is done to test the code for these countries
|
167
|
-
combinations = [i for i in combinations if "
|
174
|
+
combinations = [i for i in combinations if f"{country}_maize_s1" in i[3]]
|
168
175
|
|
169
|
-
if
|
170
|
-
num_cpu = int(cpu_count() * 0.
|
176
|
+
if True:
|
177
|
+
num_cpu = int(cpu_count() * 0.1)
|
171
178
|
with Pool(num_cpu) as p:
|
172
179
|
for i, _ in enumerate(p.imap_unordered(indices.process, combinations)):
|
173
180
|
pass
|
@@ -0,0 +1,212 @@
|
|
1
|
+
import itertools
|
2
|
+
import warnings
|
3
|
+
from multiprocessing import Pool, cpu_count
|
4
|
+
from pathlib import Path
|
5
|
+
|
6
|
+
import arrow as ar
|
7
|
+
import pandas as pd
|
8
|
+
from tqdm import tqdm
|
9
|
+
|
10
|
+
warnings.filterwarnings("ignore")
|
11
|
+
|
12
|
+
from .cei import indices
|
13
|
+
from geoprepare import base
|
14
|
+
|
15
|
+
country = "mozambique"
|
16
|
+
|
17
|
+
def remove_duplicates(lst):
|
18
|
+
"""
|
19
|
+
|
20
|
+
:param lst:
|
21
|
+
:return:
|
22
|
+
"""
|
23
|
+
return list(set([i for i in lst]))
|
24
|
+
|
25
|
+
|
26
|
+
def get_admin_zone(country, dg_shp):
|
27
|
+
admin_zone = "admin_1"
|
28
|
+
country = country.title().replace(" ", "_")
|
29
|
+
|
30
|
+
# Read in shapefile
|
31
|
+
dg_country = dg_shp[dg_shp["ADMIN0"] == country]
|
32
|
+
|
33
|
+
# Is the ADMIN2 column all None? If so, return admin_1 else return admin_2
|
34
|
+
if dg_country.empty:
|
35
|
+
admin_zone = "admin_1"
|
36
|
+
elif not dg_country["ADMIN2"].isna().all():
|
37
|
+
admin_zone = "admin_2"
|
38
|
+
|
39
|
+
return admin_zone
|
40
|
+
|
41
|
+
|
42
|
+
class cei_runner(base.BaseGeo):
|
43
|
+
def __init__(self, path_config_file):
|
44
|
+
super().__init__(path_config_file)
|
45
|
+
|
46
|
+
# Parse configuration files
|
47
|
+
self.parse_config()
|
48
|
+
|
49
|
+
self.dir_input = Path(self.parser.get("PATHS", "dir_input"))
|
50
|
+
import platform
|
51
|
+
if platform.system() == "Linux":
|
52
|
+
self.base_dir = Path(
|
53
|
+
rf"/gpfs/data1/cmongp1/GEOGLAM/Output/countries/{country}"
|
54
|
+
)
|
55
|
+
else:
|
56
|
+
self.base_dir = Path(
|
57
|
+
rf"D:\Users\ritvik\projects\GEOGLAM\Output\countries\{country}"
|
58
|
+
) # Path(self.parser.get("PATHS", "dir_crop_inputs"))
|
59
|
+
self.do_parallel = self.parser.getboolean("DEFAULT", "do_parallel")
|
60
|
+
|
61
|
+
def collect_files(self):
|
62
|
+
"""
|
63
|
+
1. Collect all the files which contain EO information
|
64
|
+
2. Exclude files from the `processed` directory if it is already in
|
65
|
+
processed_include_fall directory
|
66
|
+
3. Create a dataframe that contains the following columns:
|
67
|
+
- directory: name of directory where file is located
|
68
|
+
- path: full path to file
|
69
|
+
- filename: name of file
|
70
|
+
:return: Return the dataframe created above
|
71
|
+
"""
|
72
|
+
import geopandas as gp
|
73
|
+
|
74
|
+
dg_shp = gp.read_file(
|
75
|
+
self.dir_input
|
76
|
+
/ "Global_Datasets"
|
77
|
+
/ "Regions"
|
78
|
+
/ "Shps"
|
79
|
+
/ "adm_shapefile.shp",
|
80
|
+
engine="pyogrio",
|
81
|
+
)
|
82
|
+
|
83
|
+
# Collect all the files which contain EO information
|
84
|
+
df_files = pd.DataFrame(columns=["directory", "path", "filename", "admin_zone"])
|
85
|
+
for filepath in self.base_dir.rglob("*.csv"):
|
86
|
+
country = filepath.parents[0].name
|
87
|
+
|
88
|
+
admin_zone = get_admin_zone(country, dg_shp)
|
89
|
+
|
90
|
+
# If country is not in cc.COUNTRIES then skip
|
91
|
+
# HACK: Skip korea for now, as it is giving errors
|
92
|
+
if country == "republic_of_korea":
|
93
|
+
continue
|
94
|
+
|
95
|
+
# Get name of directory one level up
|
96
|
+
process_type = filepath.parents[1].name
|
97
|
+
|
98
|
+
# Get name of file
|
99
|
+
filename = filepath.name
|
100
|
+
|
101
|
+
# Add to dataframe
|
102
|
+
df_files.loc[len(df_files)] = [process_type, filepath, filename, admin_zone]
|
103
|
+
|
104
|
+
# Exclude those rows where directory is processed and file is already in
|
105
|
+
# processed_include_fall directory
|
106
|
+
no_fall = df_files["directory"] == "processed"
|
107
|
+
include_fall = df_files[df_files["directory"] == "processed_include_fall"][
|
108
|
+
"filename"
|
109
|
+
]
|
110
|
+
|
111
|
+
df_files = df_files[~(no_fall & (df_files["filename"].isin(include_fall)))]
|
112
|
+
|
113
|
+
return df_files
|
114
|
+
|
115
|
+
def process_combinations(self, df, method):
|
116
|
+
"""
|
117
|
+
Create a list of tuples of the following:
|
118
|
+
- directory: name of directory where file is located
|
119
|
+
- path: full path to file
|
120
|
+
- filename: name of file
|
121
|
+
- method: whether to compute indices for phenological stages or not
|
122
|
+
This tuple will be used as input to the `process` function
|
123
|
+
:param df:
|
124
|
+
:param method:
|
125
|
+
:return:
|
126
|
+
"""
|
127
|
+
combinations = []
|
128
|
+
|
129
|
+
for index, row in tqdm(df.iterrows()):
|
130
|
+
combinations.extend(
|
131
|
+
list(
|
132
|
+
itertools.product([row[0]], [row[1]], [row[2]], [row[3]], [method])
|
133
|
+
)
|
134
|
+
)
|
135
|
+
|
136
|
+
combinations = remove_duplicates(combinations)
|
137
|
+
|
138
|
+
return combinations
|
139
|
+
|
140
|
+
def main(self, method):
|
141
|
+
"""
|
142
|
+
|
143
|
+
:param method:
|
144
|
+
:return:
|
145
|
+
"""
|
146
|
+
# Create a dataframe of the files to be analyzed
|
147
|
+
df_files = self.collect_files()
|
148
|
+
|
149
|
+
combinations = self.process_combinations(df_files, method)
|
150
|
+
|
151
|
+
# Add an element to the tuple to indicate the season
|
152
|
+
# Last element is redo flag which is True if the analysis is to be redone
|
153
|
+
# and False otherwise. Analysis is always redone for the current year
|
154
|
+
# and last year whether file exists or not
|
155
|
+
combinations = [
|
156
|
+
(
|
157
|
+
self.parser,
|
158
|
+
status,
|
159
|
+
path,
|
160
|
+
filename,
|
161
|
+
admin_zone,
|
162
|
+
category,
|
163
|
+
year,
|
164
|
+
"ndvi",
|
165
|
+
False, # redo
|
166
|
+
)
|
167
|
+
for year in range(2001, ar.utcnow().year + 1)
|
168
|
+
for status, path, filename, admin_zone, category in combinations
|
169
|
+
]
|
170
|
+
|
171
|
+
# Only keep those entries in combinations where the third elemt is
|
172
|
+
# mozambique, south_africa, angola or dem_people's_rep_of_korea
|
173
|
+
# This is done to test the code for these countries
|
174
|
+
combinations = [i for i in combinations if f"{country}_maize_s1" in i[3]]
|
175
|
+
|
176
|
+
if True:
|
177
|
+
num_cpu = int(cpu_count() * 0.1)
|
178
|
+
with Pool(num_cpu) as p:
|
179
|
+
for i, _ in enumerate(p.imap_unordered(indices.process, combinations)):
|
180
|
+
pass
|
181
|
+
else:
|
182
|
+
# Use the code below if you want to test without parallelization or
|
183
|
+
# if you want to debug by using pdb
|
184
|
+
pbar = tqdm(combinations)
|
185
|
+
for i, val in enumerate(pbar):
|
186
|
+
pbar.set_description(
|
187
|
+
f"Main loop {combinations[i][2]} {combinations[i][5]}"
|
188
|
+
)
|
189
|
+
indices.process(val)
|
190
|
+
|
191
|
+
|
192
|
+
def run(path_config_files=[]):
|
193
|
+
"""
|
194
|
+
|
195
|
+
Args:
|
196
|
+
path_config_files:
|
197
|
+
|
198
|
+
Returns:
|
199
|
+
|
200
|
+
"""
|
201
|
+
""" Check dictionary keys to have no spaces"""
|
202
|
+
indices.validate_index_definitions()
|
203
|
+
|
204
|
+
for method in [
|
205
|
+
"monthly_r", # "dekad_r" # "dekad_r"
|
206
|
+
]: # , "full_season", "phenological_stages", "fraction_season"]:
|
207
|
+
obj = cei_runner(path_config_files)
|
208
|
+
obj.main(method)
|
209
|
+
|
210
|
+
|
211
|
+
if __name__ == "__main__":
|
212
|
+
run()
|