geocif 0.1.55__tar.gz → 0.1.56__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. {geocif-0.1.55/geocif.egg-info → geocif-0.1.56}/PKG-INFO +1 -1
  2. {geocif-0.1.55 → geocif-0.1.56}/geocif/analysis.py +2 -2
  3. {geocif-0.1.55 → geocif-0.1.56}/geocif/geocif.py +1 -0
  4. geocif-0.1.55/geocif/indices_runner_v2.py → geocif-0.1.56/geocif/indices_runner_angola.py +12 -6
  5. geocif-0.1.56/geocif/indices_runner_madagascar.py +212 -0
  6. geocif-0.1.55/geocif/indices_runner_v3.py → geocif-0.1.56/geocif/indices_runner_malawi.py +13 -6
  7. geocif-0.1.56/geocif/indices_runner_mozambique.py +212 -0
  8. geocif-0.1.56/geocif/indices_runner_south_africa.py +212 -0
  9. geocif-0.1.56/geocif/indices_runner_zambia.py +212 -0
  10. geocif-0.1.56/geocif/indices_runner_zimbabwe.py +212 -0
  11. geocif-0.1.56/geocif/playground/eval.py +241 -0
  12. {geocif-0.1.55 → geocif-0.1.56}/geocif/playground/gamtest.py +8 -0
  13. geocif-0.1.56/geocif/playground/gee_access.py +566 -0
  14. geocif-0.1.56/geocif/playground/play_xagg.py +87 -0
  15. geocif-0.1.56/geocif/playground/reg.py +36 -0
  16. {geocif-0.1.55 → geocif-0.1.56}/geocif/viz/plot.py +5 -3
  17. {geocif-0.1.55 → geocif-0.1.56/geocif.egg-info}/PKG-INFO +1 -1
  18. {geocif-0.1.55 → geocif-0.1.56}/geocif.egg-info/SOURCES.txt +11 -2
  19. {geocif-0.1.55 → geocif-0.1.56}/setup.py +1 -1
  20. {geocif-0.1.55 → geocif-0.1.56}/LICENSE +0 -0
  21. {geocif-0.1.55 → geocif-0.1.56}/MANIFEST.in +0 -0
  22. {geocif-0.1.55 → geocif-0.1.56}/README.md +0 -0
  23. {geocif-0.1.55 → geocif-0.1.56}/geocif/__init__.py +0 -0
  24. {geocif-0.1.55 → geocif-0.1.56}/geocif/agmet/__init__.py +0 -0
  25. {geocif-0.1.55 → geocif-0.1.56}/geocif/agmet/geoagmet.py +0 -0
  26. {geocif-0.1.55 → geocif-0.1.56}/geocif/agmet/plot.py +0 -0
  27. {geocif-0.1.55 → geocif-0.1.56}/geocif/agmet/utils.py +0 -0
  28. {geocif-0.1.55 → geocif-0.1.56}/geocif/backup/__init__.py +0 -0
  29. {geocif-0.1.55 → geocif-0.1.56}/geocif/backup/constants.py +0 -0
  30. {geocif-0.1.55 → geocif-0.1.56}/geocif/backup/features.py +0 -0
  31. {geocif-0.1.55 → geocif-0.1.56}/geocif/backup/geo.py +0 -0
  32. {geocif-0.1.55 → geocif-0.1.56}/geocif/backup/geocif.py +0 -0
  33. {geocif-0.1.55 → geocif-0.1.56}/geocif/backup/metadata.py +0 -0
  34. {geocif-0.1.55 → geocif-0.1.56}/geocif/backup/models.py +0 -0
  35. {geocif-0.1.55 → geocif-0.1.56}/geocif/cei/__init__.py +0 -0
  36. {geocif-0.1.55 → geocif-0.1.56}/geocif/cei/definitions.py +0 -0
  37. {geocif-0.1.55 → geocif-0.1.56}/geocif/cei/indices.py +0 -0
  38. {geocif-0.1.55 → geocif-0.1.56}/geocif/experiments.py +0 -0
  39. {geocif-0.1.55 → geocif-0.1.56}/geocif/geocif_runner.py +0 -0
  40. {geocif-0.1.55 → geocif-0.1.56}/geocif/indices_runner.py +0 -0
  41. {geocif-0.1.55 → geocif-0.1.56}/geocif/logger.py +0 -0
  42. {geocif-0.1.55 → geocif-0.1.56}/geocif/ml/__init__.py +0 -0
  43. {geocif-0.1.55 → geocif-0.1.56}/geocif/ml/correlations.py +0 -0
  44. {geocif-0.1.55 → geocif-0.1.56}/geocif/ml/embedding.py +0 -0
  45. {geocif-0.1.55 → geocif-0.1.56}/geocif/ml/feature_engineering.py +0 -0
  46. {geocif-0.1.55 → geocif-0.1.56}/geocif/ml/feature_selection.py +0 -0
  47. {geocif-0.1.55 → geocif-0.1.56}/geocif/ml/outliers.py +0 -0
  48. {geocif-0.1.55 → geocif-0.1.56}/geocif/ml/outlook.py +0 -0
  49. {geocif-0.1.55 → geocif-0.1.56}/geocif/ml/output.py +0 -0
  50. {geocif-0.1.55 → geocif-0.1.56}/geocif/ml/spatial_autocorrelation.py +0 -0
  51. {geocif-0.1.55 → geocif-0.1.56}/geocif/ml/stages.py +0 -0
  52. {geocif-0.1.55 → geocif-0.1.56}/geocif/ml/stats.py +0 -0
  53. {geocif-0.1.55 → geocif-0.1.56}/geocif/ml/trainers.py +0 -0
  54. {geocif-0.1.55 → geocif-0.1.56}/geocif/ml/trend.py +0 -0
  55. {geocif-0.1.55 → geocif-0.1.56}/geocif/ml/xai.py +0 -0
  56. {geocif-0.1.55 → geocif-0.1.56}/geocif/mm.py +0 -0
  57. {geocif-0.1.55 → geocif-0.1.56}/geocif/playground/__init__.py +0 -0
  58. {geocif-0.1.55 → geocif-0.1.56}/geocif/playground/aa.py +0 -0
  59. {geocif-0.1.55 → geocif-0.1.56}/geocif/playground/automl.py +0 -0
  60. {geocif-0.1.55 → geocif-0.1.56}/geocif/playground/download_esi.py +0 -0
  61. {geocif-0.1.55 → geocif-0.1.56}/geocif/playground/enso.py +0 -0
  62. {geocif-0.1.55 → geocif-0.1.56}/geocif/playground/misc.py +0 -0
  63. {geocif-0.1.55 → geocif-0.1.56}/geocif/playground/sustain.py +0 -0
  64. {geocif-0.1.55 → geocif-0.1.56}/geocif/playground/test_catboost.py +0 -0
  65. {geocif-0.1.55 → geocif-0.1.56}/geocif/playground/tmp.py +0 -0
  66. {geocif-0.1.55 → geocif-0.1.56}/geocif/playground/tmp2.py +0 -0
  67. {geocif-0.1.55 → geocif-0.1.56}/geocif/playground/tmp3.py +0 -0
  68. {geocif-0.1.55 → geocif-0.1.56}/geocif/playground/tmp4.py +0 -0
  69. {geocif-0.1.55 → geocif-0.1.56}/geocif/playground/tmp5.py +0 -0
  70. {geocif-0.1.55 → geocif-0.1.56}/geocif/risk/__init__.py +0 -0
  71. {geocif-0.1.55 → geocif-0.1.56}/geocif/risk/impact_assessment.py +0 -0
  72. {geocif-0.1.55 → geocif-0.1.56}/geocif/utils.py +0 -0
  73. {geocif-0.1.55 → geocif-0.1.56}/geocif/viz/__init__.py +0 -0
  74. {geocif-0.1.55 → geocif-0.1.56}/geocif.egg-info/dependency_links.txt +0 -0
  75. {geocif-0.1.55 → geocif-0.1.56}/geocif.egg-info/not-zip-safe +0 -0
  76. {geocif-0.1.55 → geocif-0.1.56}/geocif.egg-info/top_level.txt +0 -0
  77. {geocif-0.1.55 → geocif-0.1.56}/requirements.txt +0 -0
  78. {geocif-0.1.55 → geocif-0.1.56}/setup.cfg +0 -0
  79. {geocif-0.1.55 → geocif-0.1.56}/tests/test_geocif.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geocif
3
- Version: 0.1.55
3
+ Version: 0.1.56
4
4
  Summary: Models to visualize and forecast crop conditions and yields
5
5
  Home-page: https://ritviksahajpal.github.io/yield_forecasting/
6
6
  Author: Ritvik Sahajpal
@@ -1208,8 +1208,8 @@ def run(path_config_files=[Path("../config/geocif.txt")]):
1208
1208
  df = pd.concat(frames)
1209
1209
 
1210
1210
  """ Map regional error metrics """
1211
- mapper = RegionalMapper(path_config_files, logger, parser)
1212
- mapper.map_regional()
1211
+ #mapper = RegionalMapper(path_config_files, logger, parser)
1212
+ #mapper.map_regional()
1213
1213
 
1214
1214
  """ For each country, plot yields, conditions, anomalies, etc. """
1215
1215
  obj.map(df)
@@ -798,6 +798,7 @@ class Geocif:
798
798
  [self.target, self.target_class]
799
799
  + self.statistics_columns
800
800
  + self.feature_names
801
+ + [f"Median {self.target}"]
801
802
  + ["Region_ID"]
802
803
  )
803
804
  if self.check_yield_trend:
@@ -12,6 +12,7 @@ warnings.filterwarnings("ignore")
12
12
  from .cei import indices
13
13
  from geoprepare import base
14
14
 
15
+ country = "angola"
15
16
 
16
17
  def remove_duplicates(lst):
17
18
  """
@@ -46,10 +47,15 @@ class cei_runner(base.BaseGeo):
46
47
  self.parse_config()
47
48
 
48
49
  self.dir_input = Path(self.parser.get("PATHS", "dir_input"))
49
- self.base_dir = Path(
50
- r"D:\Users\ritvik\projects\GEOGLAM\Output\countries\nepal"
51
- #r"/gpfs/data1/cmongp1/GEOGLAM/Output/countries/nepal"
52
- ) # Path(self.parser.get("PATHS", "dir_crop_inputs"))
50
+ import platform
51
+ if platform.system() == "Linux":
52
+ self.base_dir = Path(
53
+ rf"/gpfs/data1/cmongp1/GEOGLAM/Output/countries/{country}"
54
+ )
55
+ else:
56
+ self.base_dir = Path(
57
+ rf"D:\Users\ritvik\projects\GEOGLAM\Output\countries\{country}"
58
+ ) # Path(self.parser.get("PATHS", "dir_crop_inputs"))
53
59
  self.do_parallel = self.parser.getboolean("DEFAULT", "do_parallel")
54
60
 
55
61
  def collect_files(self):
@@ -165,10 +171,10 @@ class cei_runner(base.BaseGeo):
165
171
  # Only keep those entries in combinations where the third elemt is
166
172
  # mozambique, south_africa, angola or dem_people's_rep_of_korea
167
173
  # This is done to test the code for these countries
168
- combinations = [i for i in combinations if "nepal_rice_s1" in i[3]]
174
+ combinations = [i for i in combinations if f"{country}_maize_s1" in i[3]]
169
175
 
170
176
  if True:
171
- num_cpu = int(cpu_count() * 0.5)
177
+ num_cpu = int(cpu_count() * 0.1)
172
178
  with Pool(num_cpu) as p:
173
179
  for i, _ in enumerate(p.imap_unordered(indices.process, combinations)):
174
180
  pass
@@ -0,0 +1,212 @@
1
+ import itertools
2
+ import warnings
3
+ from multiprocessing import Pool, cpu_count
4
+ from pathlib import Path
5
+
6
+ import arrow as ar
7
+ import pandas as pd
8
+ from tqdm import tqdm
9
+
10
+ warnings.filterwarnings("ignore")
11
+
12
+ from .cei import indices
13
+ from geoprepare import base
14
+
15
+ country = "madagascar"
16
+
17
+ def remove_duplicates(lst):
18
+ """
19
+
20
+ :param lst:
21
+ :return:
22
+ """
23
+ return list(set([i for i in lst]))
24
+
25
+
26
+ def get_admin_zone(country, dg_shp):
27
+ admin_zone = "admin_1"
28
+ country = country.title().replace(" ", "_")
29
+
30
+ # Read in shapefile
31
+ dg_country = dg_shp[dg_shp["ADMIN0"] == country]
32
+
33
+ # Is the ADMIN2 column all None? If so, return admin_1 else return admin_2
34
+ if dg_country.empty:
35
+ admin_zone = "admin_1"
36
+ elif not dg_country["ADMIN2"].isna().all():
37
+ admin_zone = "admin_2"
38
+
39
+ return admin_zone
40
+
41
+
42
+ class cei_runner(base.BaseGeo):
43
+ def __init__(self, path_config_file):
44
+ super().__init__(path_config_file)
45
+
46
+ # Parse configuration files
47
+ self.parse_config()
48
+
49
+ self.dir_input = Path(self.parser.get("PATHS", "dir_input"))
50
+ import platform
51
+ if platform.system() == "Linux":
52
+ self.base_dir = Path(
53
+ rf"/gpfs/data1/cmongp1/GEOGLAM/Output/countries/{country}"
54
+ )
55
+ else:
56
+ self.base_dir = Path(
57
+ rf"D:\Users\ritvik\projects\GEOGLAM\Output\countries\{country}"
58
+ ) # Path(self.parser.get("PATHS", "dir_crop_inputs"))
59
+ self.do_parallel = self.parser.getboolean("DEFAULT", "do_parallel")
60
+
61
+ def collect_files(self):
62
+ """
63
+ 1. Collect all the files which contain EO information
64
+ 2. Exclude files from the `processed` directory if it is already in
65
+ processed_include_fall directory
66
+ 3. Create a dataframe that contains the following columns:
67
+ - directory: name of directory where file is located
68
+ - path: full path to file
69
+ - filename: name of file
70
+ :return: Return the dataframe created above
71
+ """
72
+ import geopandas as gp
73
+
74
+ dg_shp = gp.read_file(
75
+ self.dir_input
76
+ / "Global_Datasets"
77
+ / "Regions"
78
+ / "Shps"
79
+ / "adm_shapefile.shp",
80
+ engine="pyogrio",
81
+ )
82
+
83
+ # Collect all the files which contain EO information
84
+ df_files = pd.DataFrame(columns=["directory", "path", "filename", "admin_zone"])
85
+ for filepath in self.base_dir.rglob("*.csv"):
86
+ country = filepath.parents[0].name
87
+
88
+ admin_zone = get_admin_zone(country, dg_shp)
89
+
90
+ # If country is not in cc.COUNTRIES then skip
91
+ # HACK: Skip korea for now, as it is giving errors
92
+ if country == "republic_of_korea":
93
+ continue
94
+
95
+ # Get name of directory one level up
96
+ process_type = filepath.parents[1].name
97
+
98
+ # Get name of file
99
+ filename = filepath.name
100
+
101
+ # Add to dataframe
102
+ df_files.loc[len(df_files)] = [process_type, filepath, filename, admin_zone]
103
+
104
+ # Exclude those rows where directory is processed and file is already in
105
+ # processed_include_fall directory
106
+ no_fall = df_files["directory"] == "processed"
107
+ include_fall = df_files[df_files["directory"] == "processed_include_fall"][
108
+ "filename"
109
+ ]
110
+
111
+ df_files = df_files[~(no_fall & (df_files["filename"].isin(include_fall)))]
112
+
113
+ return df_files
114
+
115
+ def process_combinations(self, df, method):
116
+ """
117
+ Create a list of tuples of the following:
118
+ - directory: name of directory where file is located
119
+ - path: full path to file
120
+ - filename: name of file
121
+ - method: whether to compute indices for phenological stages or not
122
+ This tuple will be used as input to the `process` function
123
+ :param df:
124
+ :param method:
125
+ :return:
126
+ """
127
+ combinations = []
128
+
129
+ for index, row in tqdm(df.iterrows()):
130
+ combinations.extend(
131
+ list(
132
+ itertools.product([row[0]], [row[1]], [row[2]], [row[3]], [method])
133
+ )
134
+ )
135
+
136
+ combinations = remove_duplicates(combinations)
137
+
138
+ return combinations
139
+
140
+ def main(self, method):
141
+ """
142
+
143
+ :param method:
144
+ :return:
145
+ """
146
+ # Create a dataframe of the files to be analyzed
147
+ df_files = self.collect_files()
148
+
149
+ combinations = self.process_combinations(df_files, method)
150
+
151
+ # Add an element to the tuple to indicate the season
152
+ # Last element is redo flag which is True if the analysis is to be redone
153
+ # and False otherwise. Analysis is always redone for the current year
154
+ # and last year whether file exists or not
155
+ combinations = [
156
+ (
157
+ self.parser,
158
+ status,
159
+ path,
160
+ filename,
161
+ admin_zone,
162
+ category,
163
+ year,
164
+ "ndvi",
165
+ False, # redo
166
+ )
167
+ for year in range(2001, ar.utcnow().year + 1)
168
+ for status, path, filename, admin_zone, category in combinations
169
+ ]
170
+
171
+ # Only keep those entries in combinations where the third elemt is
172
+ # mozambique, south_africa, angola or dem_people's_rep_of_korea
173
+ # This is done to test the code for these countries
174
+ combinations = [i for i in combinations if f"{country}_maize_s1" in i[3]]
175
+
176
+ if True:
177
+ num_cpu = int(cpu_count() * 0.1)
178
+ with Pool(num_cpu) as p:
179
+ for i, _ in enumerate(p.imap_unordered(indices.process, combinations)):
180
+ pass
181
+ else:
182
+ # Use the code below if you want to test without parallelization or
183
+ # if you want to debug by using pdb
184
+ pbar = tqdm(combinations)
185
+ for i, val in enumerate(pbar):
186
+ pbar.set_description(
187
+ f"Main loop {combinations[i][2]} {combinations[i][5]}"
188
+ )
189
+ indices.process(val)
190
+
191
+
192
+ def run(path_config_files=[]):
193
+ """
194
+
195
+ Args:
196
+ path_config_files:
197
+
198
+ Returns:
199
+
200
+ """
201
+ """ Check dictionary keys to have no spaces"""
202
+ indices.validate_index_definitions()
203
+
204
+ for method in [
205
+ "monthly_r", # "dekad_r" # "dekad_r"
206
+ ]: # , "full_season", "phenological_stages", "fraction_season"]:
207
+ obj = cei_runner(path_config_files)
208
+ obj.main(method)
209
+
210
+
211
+ if __name__ == "__main__":
212
+ run()
@@ -12,6 +12,7 @@ warnings.filterwarnings("ignore")
12
12
  from .cei import indices
13
13
  from geoprepare import base
14
14
 
15
+ country = "malawi"
15
16
 
16
17
  def remove_duplicates(lst):
17
18
  """
@@ -46,9 +47,15 @@ class cei_runner(base.BaseGeo):
46
47
  self.parse_config()
47
48
 
48
49
  self.dir_input = Path(self.parser.get("PATHS", "dir_input"))
49
- self.base_dir = Path(
50
- r"D:\Users\ritvik\projects\GEOGLAM\Output\countries\nepal"
51
- ) # Path(self.parser.get("PATHS", "dir_crop_inputs"))
50
+ import platform
51
+ if platform.system() == "Linux":
52
+ self.base_dir = Path(
53
+ rf"/gpfs/data1/cmongp1/GEOGLAM/Output/countries/{country}"
54
+ )
55
+ else:
56
+ self.base_dir = Path(
57
+ rf"D:\Users\ritvik\projects\GEOGLAM\Output\countries\{country}"
58
+ ) # Path(self.parser.get("PATHS", "dir_crop_inputs"))
52
59
  self.do_parallel = self.parser.getboolean("DEFAULT", "do_parallel")
53
60
 
54
61
  def collect_files(self):
@@ -164,10 +171,10 @@ class cei_runner(base.BaseGeo):
164
171
  # Only keep those entries in combinations where the third elemt is
165
172
  # mozambique, south_africa, angola or dem_people's_rep_of_korea
166
173
  # This is done to test the code for these countries
167
- combinations = [i for i in combinations if "nepal_rice_s1" in i[3]]
174
+ combinations = [i for i in combinations if f"{country}_maize_s1" in i[3]]
168
175
 
169
- if False:
170
- num_cpu = int(cpu_count() * 0.5)
176
+ if True:
177
+ num_cpu = int(cpu_count() * 0.1)
171
178
  with Pool(num_cpu) as p:
172
179
  for i, _ in enumerate(p.imap_unordered(indices.process, combinations)):
173
180
  pass
@@ -0,0 +1,212 @@
1
+ import itertools
2
+ import warnings
3
+ from multiprocessing import Pool, cpu_count
4
+ from pathlib import Path
5
+
6
+ import arrow as ar
7
+ import pandas as pd
8
+ from tqdm import tqdm
9
+
10
+ warnings.filterwarnings("ignore")
11
+
12
+ from .cei import indices
13
+ from geoprepare import base
14
+
15
+ country = "mozambique"
16
+
17
+ def remove_duplicates(lst):
18
+ """
19
+
20
+ :param lst:
21
+ :return:
22
+ """
23
+ return list(set([i for i in lst]))
24
+
25
+
26
+ def get_admin_zone(country, dg_shp):
27
+ admin_zone = "admin_1"
28
+ country = country.title().replace(" ", "_")
29
+
30
+ # Read in shapefile
31
+ dg_country = dg_shp[dg_shp["ADMIN0"] == country]
32
+
33
+ # Is the ADMIN2 column all None? If so, return admin_1 else return admin_2
34
+ if dg_country.empty:
35
+ admin_zone = "admin_1"
36
+ elif not dg_country["ADMIN2"].isna().all():
37
+ admin_zone = "admin_2"
38
+
39
+ return admin_zone
40
+
41
+
42
+ class cei_runner(base.BaseGeo):
43
+ def __init__(self, path_config_file):
44
+ super().__init__(path_config_file)
45
+
46
+ # Parse configuration files
47
+ self.parse_config()
48
+
49
+ self.dir_input = Path(self.parser.get("PATHS", "dir_input"))
50
+ import platform
51
+ if platform.system() == "Linux":
52
+ self.base_dir = Path(
53
+ rf"/gpfs/data1/cmongp1/GEOGLAM/Output/countries/{country}"
54
+ )
55
+ else:
56
+ self.base_dir = Path(
57
+ rf"D:\Users\ritvik\projects\GEOGLAM\Output\countries\{country}"
58
+ ) # Path(self.parser.get("PATHS", "dir_crop_inputs"))
59
+ self.do_parallel = self.parser.getboolean("DEFAULT", "do_parallel")
60
+
61
+ def collect_files(self):
62
+ """
63
+ 1. Collect all the files which contain EO information
64
+ 2. Exclude files from the `processed` directory if it is already in
65
+ processed_include_fall directory
66
+ 3. Create a dataframe that contains the following columns:
67
+ - directory: name of directory where file is located
68
+ - path: full path to file
69
+ - filename: name of file
70
+ :return: Return the dataframe created above
71
+ """
72
+ import geopandas as gp
73
+
74
+ dg_shp = gp.read_file(
75
+ self.dir_input
76
+ / "Global_Datasets"
77
+ / "Regions"
78
+ / "Shps"
79
+ / "adm_shapefile.shp",
80
+ engine="pyogrio",
81
+ )
82
+
83
+ # Collect all the files which contain EO information
84
+ df_files = pd.DataFrame(columns=["directory", "path", "filename", "admin_zone"])
85
+ for filepath in self.base_dir.rglob("*.csv"):
86
+ country = filepath.parents[0].name
87
+
88
+ admin_zone = get_admin_zone(country, dg_shp)
89
+
90
+ # If country is not in cc.COUNTRIES then skip
91
+ # HACK: Skip korea for now, as it is giving errors
92
+ if country == "republic_of_korea":
93
+ continue
94
+
95
+ # Get name of directory one level up
96
+ process_type = filepath.parents[1].name
97
+
98
+ # Get name of file
99
+ filename = filepath.name
100
+
101
+ # Add to dataframe
102
+ df_files.loc[len(df_files)] = [process_type, filepath, filename, admin_zone]
103
+
104
+ # Exclude those rows where directory is processed and file is already in
105
+ # processed_include_fall directory
106
+ no_fall = df_files["directory"] == "processed"
107
+ include_fall = df_files[df_files["directory"] == "processed_include_fall"][
108
+ "filename"
109
+ ]
110
+
111
+ df_files = df_files[~(no_fall & (df_files["filename"].isin(include_fall)))]
112
+
113
+ return df_files
114
+
115
+ def process_combinations(self, df, method):
116
+ """
117
+ Create a list of tuples of the following:
118
+ - directory: name of directory where file is located
119
+ - path: full path to file
120
+ - filename: name of file
121
+ - method: whether to compute indices for phenological stages or not
122
+ This tuple will be used as input to the `process` function
123
+ :param df:
124
+ :param method:
125
+ :return:
126
+ """
127
+ combinations = []
128
+
129
+ for index, row in tqdm(df.iterrows()):
130
+ combinations.extend(
131
+ list(
132
+ itertools.product([row[0]], [row[1]], [row[2]], [row[3]], [method])
133
+ )
134
+ )
135
+
136
+ combinations = remove_duplicates(combinations)
137
+
138
+ return combinations
139
+
140
+ def main(self, method):
141
+ """
142
+
143
+ :param method:
144
+ :return:
145
+ """
146
+ # Create a dataframe of the files to be analyzed
147
+ df_files = self.collect_files()
148
+
149
+ combinations = self.process_combinations(df_files, method)
150
+
151
+ # Add an element to the tuple to indicate the season
152
+ # Last element is redo flag which is True if the analysis is to be redone
153
+ # and False otherwise. Analysis is always redone for the current year
154
+ # and last year whether file exists or not
155
+ combinations = [
156
+ (
157
+ self.parser,
158
+ status,
159
+ path,
160
+ filename,
161
+ admin_zone,
162
+ category,
163
+ year,
164
+ "ndvi",
165
+ False, # redo
166
+ )
167
+ for year in range(2001, ar.utcnow().year + 1)
168
+ for status, path, filename, admin_zone, category in combinations
169
+ ]
170
+
171
+ # Only keep those entries in combinations where the third elemt is
172
+ # mozambique, south_africa, angola or dem_people's_rep_of_korea
173
+ # This is done to test the code for these countries
174
+ combinations = [i for i in combinations if f"{country}_maize_s1" in i[3]]
175
+
176
+ if True:
177
+ num_cpu = int(cpu_count() * 0.1)
178
+ with Pool(num_cpu) as p:
179
+ for i, _ in enumerate(p.imap_unordered(indices.process, combinations)):
180
+ pass
181
+ else:
182
+ # Use the code below if you want to test without parallelization or
183
+ # if you want to debug by using pdb
184
+ pbar = tqdm(combinations)
185
+ for i, val in enumerate(pbar):
186
+ pbar.set_description(
187
+ f"Main loop {combinations[i][2]} {combinations[i][5]}"
188
+ )
189
+ indices.process(val)
190
+
191
+
192
+ def run(path_config_files=[]):
193
+ """
194
+
195
+ Args:
196
+ path_config_files:
197
+
198
+ Returns:
199
+
200
+ """
201
+ """ Check dictionary keys to have no spaces"""
202
+ indices.validate_index_definitions()
203
+
204
+ for method in [
205
+ "monthly_r", # "dekad_r" # "dekad_r"
206
+ ]: # , "full_season", "phenological_stages", "fraction_season"]:
207
+ obj = cei_runner(path_config_files)
208
+ obj.main(method)
209
+
210
+
211
+ if __name__ == "__main__":
212
+ run()