geocif 0.1.67__tar.gz → 0.1.69__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. {geocif-0.1.67/geocif.egg-info → geocif-0.1.69}/PKG-INFO +1 -1
  2. {geocif-0.1.67 → geocif-0.1.69}/geocif/cei/definitions.py +8 -8
  3. {geocif-0.1.67 → geocif-0.1.69}/geocif/geocif.py +12 -9
  4. {geocif-0.1.67 → geocif-0.1.69}/geocif/geocif_runner.py +0 -1
  5. geocif-0.1.69/geocif/indices_runner_angola.py +212 -0
  6. {geocif-0.1.67 → geocif-0.1.69}/geocif/ml/correlations.py +10 -7
  7. {geocif-0.1.67 → geocif-0.1.69}/geocif/ml/embedding.py +11 -8
  8. {geocif-0.1.67 → geocif-0.1.69}/geocif/ml/feature_engineering.py +6 -5
  9. {geocif-0.1.67 → geocif-0.1.69}/geocif/ml/feature_selection.py +15 -1
  10. {geocif-0.1.67 → geocif-0.1.69}/geocif/ml/stats.py +1 -1
  11. {geocif-0.1.67 → geocif-0.1.69}/geocif/ml/trainers.py +1 -1
  12. geocif-0.1.69/geocif/playground/wolayita_maize_mask.py +156 -0
  13. geocif-0.1.69/geocif/viz/gt.py +69 -0
  14. {geocif-0.1.67 → geocif-0.1.69}/geocif/viz/tmp.py +20 -7
  15. {geocif-0.1.67 → geocif-0.1.69/geocif.egg-info}/PKG-INFO +1 -1
  16. {geocif-0.1.67 → geocif-0.1.69}/geocif.egg-info/SOURCES.txt +2 -0
  17. {geocif-0.1.67 → geocif-0.1.69}/setup.py +1 -1
  18. geocif-0.1.67/geocif/indices_runner_angola.py +0 -212
  19. {geocif-0.1.67 → geocif-0.1.69}/LICENSE +0 -0
  20. {geocif-0.1.67 → geocif-0.1.69}/MANIFEST.in +0 -0
  21. {geocif-0.1.67 → geocif-0.1.69}/README.md +0 -0
  22. {geocif-0.1.67 → geocif-0.1.69}/geocif/__init__.py +0 -0
  23. {geocif-0.1.67 → geocif-0.1.69}/geocif/agmet/__init__.py +0 -0
  24. {geocif-0.1.67 → geocif-0.1.69}/geocif/agmet/geoagmet.py +0 -0
  25. {geocif-0.1.67 → geocif-0.1.69}/geocif/agmet/plot.py +0 -0
  26. {geocif-0.1.67 → geocif-0.1.69}/geocif/agmet/utils.py +0 -0
  27. {geocif-0.1.67 → geocif-0.1.69}/geocif/analysis.py +0 -0
  28. {geocif-0.1.67 → geocif-0.1.69}/geocif/backup/__init__.py +0 -0
  29. {geocif-0.1.67 → geocif-0.1.69}/geocif/backup/constants.py +0 -0
  30. {geocif-0.1.67 → geocif-0.1.69}/geocif/backup/features.py +0 -0
  31. {geocif-0.1.67 → geocif-0.1.69}/geocif/backup/geo.py +0 -0
  32. {geocif-0.1.67 → geocif-0.1.69}/geocif/backup/geocif.py +0 -0
  33. {geocif-0.1.67 → geocif-0.1.69}/geocif/backup/metadata.py +0 -0
  34. {geocif-0.1.67 → geocif-0.1.69}/geocif/backup/models.py +0 -0
  35. {geocif-0.1.67 → geocif-0.1.69}/geocif/cei/__init__.py +0 -0
  36. {geocif-0.1.67 → geocif-0.1.69}/geocif/cei/indices.py +0 -0
  37. {geocif-0.1.67 → geocif-0.1.69}/geocif/experiments.py +0 -0
  38. {geocif-0.1.67 → geocif-0.1.69}/geocif/indices_runner.py +0 -0
  39. {geocif-0.1.67 → geocif-0.1.69}/geocif/indices_runner_madagascar.py +0 -0
  40. {geocif-0.1.67 → geocif-0.1.69}/geocif/indices_runner_malawi.py +0 -0
  41. {geocif-0.1.67 → geocif-0.1.69}/geocif/indices_runner_mozambique.py +0 -0
  42. {geocif-0.1.67 → geocif-0.1.69}/geocif/indices_runner_south_africa.py +0 -0
  43. {geocif-0.1.67 → geocif-0.1.69}/geocif/indices_runner_zambia.py +0 -0
  44. {geocif-0.1.67 → geocif-0.1.69}/geocif/indices_runner_zimbabwe.py +0 -0
  45. {geocif-0.1.67 → geocif-0.1.69}/geocif/logger.py +0 -0
  46. {geocif-0.1.67 → geocif-0.1.69}/geocif/ml/__init__.py +0 -0
  47. {geocif-0.1.67 → geocif-0.1.69}/geocif/ml/outliers.py +0 -0
  48. {geocif-0.1.67 → geocif-0.1.69}/geocif/ml/outlook.py +0 -0
  49. {geocif-0.1.67 → geocif-0.1.69}/geocif/ml/output.py +0 -0
  50. {geocif-0.1.67 → geocif-0.1.69}/geocif/ml/spatial_autocorrelation.py +0 -0
  51. {geocif-0.1.67 → geocif-0.1.69}/geocif/ml/stages.py +0 -0
  52. {geocif-0.1.67 → geocif-0.1.69}/geocif/ml/trend.py +0 -0
  53. {geocif-0.1.67 → geocif-0.1.69}/geocif/ml/xai.py +0 -0
  54. {geocif-0.1.67 → geocif-0.1.69}/geocif/mm.py +0 -0
  55. {geocif-0.1.67 → geocif-0.1.69}/geocif/playground/__init__.py +0 -0
  56. {geocif-0.1.67 → geocif-0.1.69}/geocif/playground/aa.py +0 -0
  57. {geocif-0.1.67 → geocif-0.1.69}/geocif/playground/area.py +0 -0
  58. {geocif-0.1.67 → geocif-0.1.69}/geocif/playground/automl.py +0 -0
  59. {geocif-0.1.67 → geocif-0.1.69}/geocif/playground/download_esi.py +0 -0
  60. {geocif-0.1.67 → geocif-0.1.69}/geocif/playground/enso.py +0 -0
  61. {geocif-0.1.67 → geocif-0.1.69}/geocif/playground/eval.py +0 -0
  62. {geocif-0.1.67 → geocif-0.1.69}/geocif/playground/gamtest.py +0 -0
  63. {geocif-0.1.67 → geocif-0.1.69}/geocif/playground/gee_access.py +0 -0
  64. {geocif-0.1.67 → geocif-0.1.69}/geocif/playground/misc.py +0 -0
  65. {geocif-0.1.67 → geocif-0.1.69}/geocif/playground/play_xagg.py +0 -0
  66. {geocif-0.1.67 → geocif-0.1.69}/geocif/playground/reg.py +0 -0
  67. {geocif-0.1.67 → geocif-0.1.69}/geocif/playground/sustain.py +0 -0
  68. {geocif-0.1.67 → geocif-0.1.69}/geocif/playground/test_catboost.py +0 -0
  69. {geocif-0.1.67 → geocif-0.1.69}/geocif/playground/tmp.py +0 -0
  70. {geocif-0.1.67 → geocif-0.1.69}/geocif/playground/tmp2.py +0 -0
  71. {geocif-0.1.67 → geocif-0.1.69}/geocif/playground/tmp3.py +0 -0
  72. {geocif-0.1.67 → geocif-0.1.69}/geocif/playground/tmp4.py +0 -0
  73. {geocif-0.1.67 → geocif-0.1.69}/geocif/playground/tmp5.py +0 -0
  74. {geocif-0.1.67 → geocif-0.1.69}/geocif/risk/__init__.py +0 -0
  75. {geocif-0.1.67 → geocif-0.1.69}/geocif/risk/impact_assessment.py +0 -0
  76. {geocif-0.1.67 → geocif-0.1.69}/geocif/utils.py +0 -0
  77. {geocif-0.1.67 → geocif-0.1.69}/geocif/viz/__init__.py +0 -0
  78. {geocif-0.1.67 → geocif-0.1.69}/geocif/viz/plot.py +0 -0
  79. {geocif-0.1.67 → geocif-0.1.69}/geocif.egg-info/dependency_links.txt +0 -0
  80. {geocif-0.1.67 → geocif-0.1.69}/geocif.egg-info/not-zip-safe +0 -0
  81. {geocif-0.1.67 → geocif-0.1.69}/geocif.egg-info/top_level.txt +0 -0
  82. {geocif-0.1.67 → geocif-0.1.69}/requirements.txt +0 -0
  83. {geocif-0.1.67 → geocif-0.1.69}/setup.cfg +0 -0
  84. {geocif-0.1.67 → geocif-0.1.69}/tests/test_geocif.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geocif
3
- Version: 0.1.67
3
+ Version: 0.1.69
4
4
  Summary: Models to visualize and forecast crop conditions and yields
5
5
  Home-page: https://ritviksahajpal.github.io/yield_forecasting/
6
6
  Author: Ritvik Sahajpal
@@ -1,11 +1,11 @@
1
1
  PHENOLOGICAL_STAGES = [1, 2, 3]
2
2
  dict_indices = {
3
3
  "GD4": ["Cold", "Growing degree days (sum of Tmean > 4 C)"],
4
- #"CFD": ["Cold", "Maximum number of consecutive frost days (Tmin < 0 C)"],
5
- #"FD": ["Cold", "Number of Frost Days (Tmin < 0C)"],
4
+ "CFD": ["Cold", "Maximum number of consecutive frost days (Tmin < 0 C)"],
5
+ "FD": ["Cold", "Number of Frost Days (Tmin < 0C)"],
6
6
  "HD17": ["Cold", "Heating degree days (sum of Tmean < 17 C)"],
7
- #"ID": ["Cold", "Number of sharp Ice Days (Tmax < 0C)"],
8
- #"CSDI": ["Cold", "Cold-spell duration index"],
7
+ "ID": ["Cold", "Number of sharp Ice Days (Tmax < 0C)"],
8
+ "CSDI": ["Cold", "Cold-spell duration index"],
9
9
  "TG10p": ["Cold", "Percentage of days when Tmean < 10th percentile"],
10
10
  "TN10p": ["Cold", "Percentage of days when Tmin < 10th percentile"],
11
11
  "TXn": ["Cold", "Minimum daily maximum temperature"],
@@ -70,10 +70,10 @@ dict_indices = {
70
70
  "Compound",
71
71
  "Days with TG > 75th percentile of daily mean temperature and RR >75th percentile of daily precipitation sum",
72
72
  ],
73
- # "SD": ["Snow", "Mean of daily snow depth"],
74
- # "SD1": ["Snow", "Number of days with snow depth >= 1 cm"],
75
- # "SD5cm": ["Snow", "Number of days with snow depth >= 5 cm"],
76
- # "SD50cm": ["Snow", "Number of days with snow depth >= 50 cm"],
73
+ "SD": ["Snow", "Mean of daily snow depth"],
74
+ "SD1": ["Snow", "Number of days with snow depth >= 1 cm"],
75
+ "SD5cm": ["Snow", "Number of days with snow depth >= 5 cm"],
76
+ "SD50cm": ["Snow", "Number of days with snow depth >= 50 cm"],
77
77
  }
78
78
 
79
79
  dict_ndvi = {
@@ -243,11 +243,11 @@ class Geocif:
243
243
  if any(cei in column for cei in self.use_ceis)
244
244
  ]
245
245
  else:
246
- # self.logger.info(f"Selecting features for {self.country} {self.crop}")
246
+ self.logger.info(f"Selecting features for {self.country} {self.crop}")
247
247
  selector, _, self.selected_features = fs.select_features(
248
248
  X_train, y_train, method=self.feature_selection
249
249
  )
250
- # self.logger.info(f"Selected features: {self.selected_features}")
250
+ self.logger.info(f"Selected features: {self.selected_features}")
251
251
 
252
252
  """ Update model to include conformal estimates """
253
253
  if "lat" not in self.selected_features and self.include_lat_lon_as_feature:
@@ -306,7 +306,7 @@ class Geocif:
306
306
  X_train,
307
307
  y_train,
308
308
  cat_features=self.cat_features,
309
- verbose=False,
309
+ verbose=True,
310
310
  )
311
311
  elif self.model_name in ["ngboost", "oblique", "tabpfn"]:
312
312
  X_train = X_train.drop(
@@ -598,9 +598,9 @@ class Geocif:
598
598
  df_region[f"Median {self.target}"].values, 3
599
599
  )
600
600
 
601
- if f"Median {self.target} (2014-2018)" in df_region.columns:
602
- df.loc[:, f"Median {self.target} (2014-2018)"] = np.around(
603
- df_region[f"Median {self.target} (2014-2018)"].values, 3
601
+ if f"Median {self.target} (2018-2022)" in df_region.columns:
602
+ df.loc[:, f"Median {self.target} (2018-2022)"] = np.around(
603
+ df_region[f"Median {self.target} (2018-2022)"].values, 3
604
604
  )
605
605
 
606
606
  if f"Median {self.target} (2013-2017)" in df_region.columns:
@@ -820,7 +820,7 @@ class Geocif:
820
820
  + self.statistics_columns
821
821
  + self.feature_names
822
822
  + [f"Median {self.target}"]
823
- + [f"Median {self.target} (2014-2018)"]
823
+ + [f"Median {self.target} (2018-2022)"]
824
824
  + [f"Median {self.target} (2013-2017)"]
825
825
  + ["Region_ID"]
826
826
  )
@@ -1012,11 +1012,11 @@ class Geocif:
1012
1012
  )
1013
1013
 
1014
1014
  df = fe.compute_user_median_statistics(
1015
- df, [2014, 2015, 2016, 2017, 2018]
1015
+ df, range(2018, 2023)
1016
1016
  )
1017
1017
 
1018
1018
  df = fe.compute_user_median_statistics(
1019
- df, [2013, 2014, 2015, 2016, 2017]
1019
+ df, range(2013, 2018)
1020
1020
  )
1021
1021
 
1022
1022
  if self.median_area_as_feature:
@@ -1393,6 +1393,9 @@ class Geocif:
1393
1393
  self.dg["ADM0_NAME"].str.lower().str.replace(" ", "_") == self.country
1394
1394
  ]
1395
1395
 
1396
+ # Drop any duplicates based on Country Region column
1397
+ self.dg_country = self.dg_country.drop_duplicates(subset=["Country Region"])
1398
+
1396
1399
  def read_data(self, country, crop, season):
1397
1400
  """
1398
1401
 
@@ -36,7 +36,6 @@ def loop_execute(inputs):
36
36
  # config.trace_filter = GlobbingFilter(
37
37
  # exclude=[
38
38
  # "pycallgraph.*",
39
- # "torch*",
40
39
  # ]
41
40
  # )
42
41
  #
@@ -0,0 +1,212 @@
1
+ import itertools
2
+ import warnings
3
+ from multiprocessing import Pool, cpu_count
4
+ from pathlib import Path
5
+
6
+ import arrow as ar
7
+ import pandas as pd
8
+ from tqdm import tqdm
9
+
10
+ warnings.filterwarnings("ignore")
11
+
12
+ from .cei import indices
13
+ from geoprepare import base
14
+
15
+ country = "angola"
16
+
17
+ def remove_duplicates(lst):
18
+ """
19
+
20
+ :param lst:
21
+ :return:
22
+ """
23
+ return list(set([i for i in lst]))
24
+
25
+
26
+ def get_admin_zone(country, dg_shp):
27
+ admin_zone = "admin_1"
28
+ country = country.title().replace(" ", "_")
29
+
30
+ # Read in shapefile
31
+ dg_country = dg_shp[dg_shp["ADMIN0"] == country]
32
+
33
+ # Is the ADMIN2 column all None? If so, return admin_1 else return admin_2
34
+ if dg_country.empty:
35
+ admin_zone = "admin_1"
36
+ elif not dg_country["ADMIN2"].isna().all():
37
+ admin_zone = "admin_2"
38
+
39
+ return admin_zone
40
+
41
+
42
+ class cei_runner(base.BaseGeo):
43
+ def __init__(self, path_config_file):
44
+ super().__init__(path_config_file)
45
+
46
+ # Parse configuration files
47
+ self.parse_config()
48
+
49
+ self.dir_input = Path(self.parser.get("PATHS", "dir_input"))
50
+ import platform
51
+ if platform.system() == "Linux":
52
+ self.base_dir = Path(
53
+ rf"/gpfs/data1/cmongp1/GEOGLAM/Output/countries/{country}"
54
+ )
55
+ else:
56
+ self.base_dir = Path(
57
+ rf"D:\Users\ritvik\projects\GEOGLAM\Output\countries\{country}"
58
+ ) # Path(self.parser.get("PATHS", "dir_crop_inputs"))
59
+ self.do_parallel = self.parser.getboolean("DEFAULT", "do_parallel")
60
+
61
+ def collect_files(self):
62
+ """
63
+ 1. Collect all the files which contain EO information
64
+ 2. Exclude files from the `processed` directory if it is already in
65
+ processed_include_fall directory
66
+ 3. Create a dataframe that contains the following columns:
67
+ - directory: name of directory where file is located
68
+ - path: full path to file
69
+ - filename: name of file
70
+ :return: Return the dataframe created above
71
+ """
72
+ import geopandas as gp
73
+
74
+ dg_shp = gp.read_file(
75
+ self.dir_input
76
+ / "Global_Datasets"
77
+ / "Regions"
78
+ / "Shps"
79
+ / "adm_shapefile.shp",
80
+ engine="pyogrio",
81
+ )
82
+
83
+ # Collect all the files which contain EO information
84
+ df_files = pd.DataFrame(columns=["directory", "path", "filename", "admin_zone"])
85
+ for filepath in self.base_dir.rglob("*.csv"):
86
+ country = filepath.parents[0].name
87
+
88
+ admin_zone = get_admin_zone(country, dg_shp)
89
+
90
+ # If country is not in cc.COUNTRIES then skip
91
+ # HACK: Skip korea for now, as it is giving errors
92
+ if country == "republic_of_korea":
93
+ continue
94
+
95
+ # Get name of directory one level up
96
+ process_type = filepath.parents[1].name
97
+
98
+ # Get name of file
99
+ filename = filepath.name
100
+
101
+ # Add to dataframe
102
+ df_files.loc[len(df_files)] = [process_type, filepath, filename, admin_zone]
103
+
104
+ # Exclude those rows where directory is processed and file is already in
105
+ # processed_include_fall directory
106
+ no_fall = df_files["directory"] == "processed"
107
+ include_fall = df_files[df_files["directory"] == "processed_include_fall"][
108
+ "filename"
109
+ ]
110
+
111
+ df_files = df_files[~(no_fall & (df_files["filename"].isin(include_fall)))]
112
+
113
+ return df_files
114
+
115
+ def process_combinations(self, df, method):
116
+ """
117
+ Create a list of tuples of the following:
118
+ - directory: name of directory where file is located
119
+ - path: full path to file
120
+ - filename: name of file
121
+ - method: whether to compute indices for phenological stages or not
122
+ This tuple will be used as input to the `process` function
123
+ :param df:
124
+ :param method:
125
+ :return:
126
+ """
127
+ combinations = []
128
+
129
+ for index, row in tqdm(df.iterrows()):
130
+ combinations.extend(
131
+ list(
132
+ itertools.product([row[0]], [row[1]], [row[2]], [row[3]], [method])
133
+ )
134
+ )
135
+
136
+ combinations = remove_duplicates(combinations)
137
+
138
+ return combinations
139
+
140
+ def main(self, method):
141
+ """
142
+
143
+ :param method:
144
+ :return:
145
+ """
146
+ # Create a dataframe of the files to be analyzed
147
+ df_files = self.collect_files()
148
+
149
+ combinations = self.process_combinations(df_files, method)
150
+
151
+ # Add an element to the tuple to indicate the season
152
+ # Last element is redo flag which is True if the analysis is to be redone
153
+ # and False otherwise. Analysis is always redone for the current year
154
+ # and last year whether file exists or not
155
+ combinations = [
156
+ (
157
+ self.parser,
158
+ status,
159
+ path,
160
+ filename,
161
+ admin_zone,
162
+ category,
163
+ year,
164
+ "ndvi",
165
+ False, # redo
166
+ )
167
+ for year in range(2001, ar.utcnow().year + 1)
168
+ for status, path, filename, admin_zone, category in combinations
169
+ ]
170
+
171
+ # Only keep those entries in combinations where the third elemt is
172
+ # mozambique, south_africa, angola or dem_people's_rep_of_korea
173
+ # This is done to test the code for these countries
174
+ combinations = [i for i in combinations if f"{country}_maize_s1" in i[3]]
175
+
176
+ if True:
177
+ num_cpu = int(cpu_count() * 0.9)
178
+ with Pool(num_cpu) as p:
179
+ for i, _ in enumerate(p.imap_unordered(indices.process, combinations)):
180
+ pass
181
+ else:
182
+ # Use the code below if you want to test without parallelization or
183
+ # if you want to debug by using pdb
184
+ pbar = tqdm(combinations)
185
+ for i, val in enumerate(pbar):
186
+ pbar.set_description(
187
+ f"Main loop {combinations[i][2]} {combinations[i][5]}"
188
+ )
189
+ indices.process(val)
190
+
191
+
192
+ def run(path_config_files=[]):
193
+ """
194
+
195
+ Args:
196
+ path_config_files:
197
+
198
+ Returns:
199
+
200
+ """
201
+ """ Check dictionary keys to have no spaces"""
202
+ indices.validate_index_definitions()
203
+
204
+ for method in [
205
+ "monthly_r", # "dekad_r" # "dekad_r"
206
+ ]: # , "full_season", "phenological_stages", "fraction_season"]:
207
+ obj = cei_runner(path_config_files)
208
+ obj.main(method)
209
+
210
+
211
+ if __name__ == "__main__":
212
+ run()
@@ -295,13 +295,16 @@ def all_correlated_feature_by_time(df, **kwargs):
295
295
  df_tmp2.loc[idx, "Type"] = combined_dict[row[0]][0]
296
296
 
297
297
  # Compute median of each CEI and sort the dataframe based on the absolute value of the median
298
- dict_best_cei[region_id] = (
299
- df_tmp2.groupby("Type")
300
- .max()
301
- .reset_index()
302
- .sort_values("Value", ascending=False)["Metric"]
303
- .values
304
- )
298
+ try:
299
+ dict_best_cei[region_id] = (
300
+ df_tmp2.groupby("Type")
301
+ .max()
302
+ .reset_index()
303
+ .sort_values("Value", ascending=False)["Metric"]
304
+ .values
305
+ )
306
+ except:
307
+ breakpoint()
305
308
 
306
309
  kwargs["region_id"] = region_id
307
310
  _region_names = ", ".join([str(x) for x in group['Region'].unique()])
@@ -25,29 +25,32 @@ def _compute_correlations(X, y):
25
25
  feature_correlations = {}
26
26
 
27
27
  for feature in X.columns:
28
- # Ignore columns that are object or categorical type
28
+ # Ignore object or categorical type columns
29
29
  if X[feature].dtypes.name in ["object", "category"]:
30
30
  continue
31
31
 
32
32
  f_series = X[feature]
33
33
 
34
- # Ignore NaN values in either y and f_series
35
- mask = ~ (np.isnan(y) | np.isnan(f_series))
36
- y = y[mask]
37
- f_series = f_series[mask]
34
+ # Ignore NaN values in either y or f_series
35
+ mask = ~(np.isnan(y) | np.isnan(f_series))
36
+ y_filtered = y[mask]
37
+ f_series_filtered = f_series[mask]
38
38
 
39
- if np.std(f_series) == 0 or np.std(y) == 0:
39
+ # Handle cases where std is zero
40
+ if np.std(f_series_filtered) == 0 or np.std(y_filtered) == 0:
40
41
  feature_correlations[feature] = np.nan
41
42
  else:
42
43
  try:
43
- r = pearsonr(y, f_series)[0]
44
+ r = pearsonr(y_filtered, f_series_filtered)[0]
44
45
  feature_correlations[feature] = round(r, 3)
45
- except:
46
+ except Exception as e:
47
+ print(f"Error computing correlation for {feature}: {e}")
46
48
  feature_correlations[feature] = np.nan
47
49
 
48
50
  return feature_correlations
49
51
 
50
52
 
53
+
51
54
  def find_most_common_top_feature(top_feature_by_region):
52
55
  """
53
56
  Find the most common top feature and number of occurences
@@ -39,10 +39,10 @@ def compute_last_year_yield(df, target_col="Yield (tn per ha)"):
39
39
 
40
40
  return df
41
41
 
42
-
43
42
  def compute_closest_years(all_years, harvest_year, number_lag_years):
44
43
  """
45
- Finds the years closest to a given harvest year, excluding the harvest year itself.
44
+ Finds the historical years closest to a given harvest year,
45
+ excluding any future year (harvest_year itself and beyond).
46
46
 
47
47
  Args:
48
48
  all_years (array-like): List or array of all years to consider.
@@ -50,7 +50,8 @@ def compute_closest_years(all_years, harvest_year, number_lag_years):
50
50
  number_lag_years (int): Number of closest years to return.
51
51
 
52
52
  Returns:
53
- list: Years closest to the given harvest year.
53
+ list: The historical years closest to the given harvest year.
54
+ Returns an empty list if no historical years exist.
54
55
  """
55
56
  # Exclude the harvest year before computation to simplify logic
56
57
  filtered_years = [year for year in all_years if year != harvest_year]
@@ -96,7 +97,7 @@ def compute_median_statistics(
96
97
  mask = (group["Harvest Year"].isin(closest_years)) & (
97
98
  group["Region"] == region
98
99
  )
99
- median_yield = group.loc[mask, target_col].median()
100
+ median_yield = group.loc[mask, target_col].mean()
100
101
  df.loc[
101
102
  (df["Region"] == region) & (df["Harvest Year"] == harvest_year),
102
103
  f"Median {target_col}",
@@ -186,7 +187,7 @@ def compute_lag_yield(
186
187
  else:
187
188
  # Add median yield
188
189
  mask_group_median = group["Harvest Year"].isin(closest_years)
189
- median_yield = group.loc[mask_group_median, target_col].median()
190
+ median_yield = group.loc[mask_group_median, target_col].mean()
190
191
 
191
192
  df.loc[mask_region, col] = median_yield
192
193
 
@@ -188,10 +188,24 @@ def select_features(X, y, method="RFE", min_features_to_select=3, threshold_nan=
188
188
 
189
189
  # Get the selected feature names
190
190
  selected_features = X.columns[selected_features].tolist()
191
+ elif method == "lasso":
192
+ from sklearn.linear_model import LassoLarsCV
193
+ from sklearn.feature_selection import SelectFromModel
194
+
195
+ # Fit Lasso model (L1 regularization) to perform feature selection
196
+ lasso = LassoLarsCV(cv=5)
197
+ lasso.fit(X, y)
198
+
199
+ # Use SelectFromModel to remove features with zero coefficients
200
+ selector = SelectFromModel(lasso, prefit=True)
201
+
202
+ # Get the selected features
203
+ selected_features = X.columns[selector.get_support()].tolist()
204
+ print(selected_features)
191
205
  elif method == "BorutaPy":
192
206
  from boruta import BorutaPy
193
207
 
194
- selector = BorutaPy(forest, n_estimators="auto", random_state=42)
208
+ selector = BorutaPy(forest, n_estimators="auto", random_state=42, verbose=0)
195
209
  selector.fit(X.values, y.values)
196
210
  selected_features_mask = selector.support_
197
211
  selected_features = X.columns[selected_features_mask].tolist()
@@ -203,7 +203,7 @@ def add_statistics(
203
203
  fn = "illinois.csv"
204
204
  elif country == "Ethiopia":
205
205
  # HACK
206
- fn = "ethiopia_wheat_summary_formatted.csv"
206
+ fn = "adm_crop_production.csv"
207
207
  else:
208
208
  fn = "adm_crop_production.csv"
209
209
  df_fewsnet = pd.read_csv(dir_stats / fn, low_memory=False)
@@ -278,7 +278,7 @@ def auto_train(
278
278
  "loss_function": loss_function,
279
279
  "early_stopping_rounds": 20,
280
280
  "random_seed": seed,
281
- "verbose": False,
281
+ "verbose": True,
282
282
  }
283
283
 
284
284
  if model_name == "catboost":
@@ -0,0 +1,156 @@
1
+ import rasterio
2
+ from rasterio.warp import calculate_default_transform, reproject, Resampling
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import math
6
+
7
+ # Input / Output paths
8
+ input_path = r"D:\Users\ritvik\projects\GEOGLAM\Input\Global_Datasets\Masks\wolayita_maize.tif"
9
+ output_path = r"D:\Users\ritvik\projects\GEOGLAM\Input\Global_Datasets\Masks\wolayita_maize_5km_percentage.tif"
10
+
11
+ import rasterio
12
+ from rasterio.warp import calculate_default_transform, reproject, Resampling
13
+ from math import ceil
14
+ import numpy as np
15
+
16
+ input_path = r"D:\Users\ritvik\projects\GEOGLAM\Input\Global_Datasets\Masks\wolayita_maize.tif"
17
+ output_path = r"D:\Users\ritvik\projects\GEOGLAM\Input\Global_Datasets\Masks\wolayita_maize_5km_percentage.tif"
18
+
19
+ with rasterio.open(input_path) as src:
20
+ # 1) If needed, assign correct CRS
21
+ # Example: if you know it's actually EPSG:32637 but isn't set
22
+ # src_crs = rasterio.crs.CRS.from_epsg(32637)
23
+ # else if it's already correct, do:
24
+ src_crs = src.crs
25
+
26
+ # 2) Decide your pixel size.
27
+ # If src_crs is lat/lon (EPSG:4326), use ~0.045 deg for ~5 km.
28
+ # If src_crs is UTM in meters, use 5000 for 5 km.
29
+ pixel_size = 0.045 # or 5000 if in meters
30
+
31
+ transform, width, height = calculate_default_transform(
32
+ src_crs, # source crs
33
+ src_crs, # target crs (same if you just want coarser in place)
34
+ src.width,
35
+ src.height,
36
+ *src.bounds,
37
+ resolution=pixel_size
38
+ )
39
+
40
+ # Prepare output fraction array
41
+ fraction_array = np.full((height, width), -9999, dtype=np.float32)
42
+
43
+ # Reproject with average -> fraction
44
+ reproject(
45
+ source=rasterio.band(src, 1),
46
+ destination=fraction_array,
47
+ src_transform=src.transform,
48
+ src_crs=src_crs,
49
+ dst_transform=transform,
50
+ dst_crs=src_crs,
51
+ resampling=Resampling.average,
52
+ dst_nodata=-9999
53
+ )
54
+
55
+ # Now fraction_array should have values in [0..1], with -9999 for nodata.
56
+ valid_mask = (fraction_array != -9999)
57
+
58
+ if not np.any(valid_mask):
59
+ print("No valid cells at all (everything is nodata). This indicates a bounding box or CRS mismatch.")
60
+ else:
61
+ frac_min = fraction_array[valid_mask].min()
62
+ frac_max = fraction_array[valid_mask].max()
63
+ print("Fraction min:", frac_min)
64
+ print("Fraction max:", frac_max)
65
+
66
+ # If both min and max are 0.0, it means there's truly no coverage or it's extremely small.
67
+ # Otherwise you might see something like 0.0, 0.01, 0.5, etc.
68
+
69
+ # Then let's see if maybe they're all below 0.005:
70
+ below_005 = (fraction_array[valid_mask] < 0.005).all()
71
+ print("All fractions < 0.5%?", below_005)
72
+
73
+ breakpoint()
74
+ with rasterio.open(input_path) as src:
75
+ # If src.crs is None but you KNOW it's EPSG:4326, assign it:
76
+ # src_crs = rasterio.crs.CRS.from_epsg(4326)
77
+ # Otherwise, just use what's in the file:
78
+ src_crs = src.crs
79
+
80
+ # Let's assume the file is already lat/lon (EPSG:4326).
81
+ # We'll define ~0.045° as "5 km" at the equator.
82
+ new_res = 0.045
83
+
84
+ # Calculate a new transform and new shape
85
+ # for coarser resolution in the SAME EPSG:4326.
86
+ transform, width, height = calculate_default_transform(
87
+ src_crs, # src CRS
88
+ src_crs, # dst CRS (same if you want to stay in lat/lon)
89
+ src.width,
90
+ src.height,
91
+ *src.bounds,
92
+ resolution=new_res # sets pixel size to 0.045 degrees
93
+ )
94
+
95
+ # Read full data for histogram plotting
96
+ data_in = src.read(1, masked=True)
97
+ in_profile = src.profile.copy()
98
+
99
+ # Plot input histogram (0 or 1)
100
+ arr_in = data_in.compressed()
101
+ plt.figure()
102
+ plt.hist(arr_in, bins=[-0.5, 0.5, 1.5], edgecolor='black')
103
+ plt.title("Input (0/1)")
104
+ plt.xlabel("Value")
105
+ plt.ylabel("Frequency")
106
+ plt.show()
107
+
108
+ # Prepare output array, float32 with sentinel -9999
109
+ out_array = np.full((height, width), -9999, dtype=np.float32)
110
+
111
+ with rasterio.open(input_path) as src:
112
+ reproject(
113
+ source=rasterio.band(src, 1),
114
+ destination=out_array,
115
+ src_transform=src.transform,
116
+ src_crs=src.crs,
117
+ dst_transform=transform,
118
+ dst_crs=src_crs, # same
119
+ resampling=Resampling.average,
120
+ dst_nodata=-9999
121
+ )
122
+
123
+ # Now out_array has fraction in [0..1]. Convert to % (0..100).
124
+ breakpoint()
125
+ mask_valid = (out_array != -9999)
126
+ out_array[mask_valid] *= 100.0
127
+ out_array[mask_valid] = np.rint(out_array[mask_valid]) # round
128
+ out_array = out_array.astype(np.int32)
129
+
130
+ # Update profile
131
+ out_profile = in_profile.copy()
132
+ out_profile.update({
133
+ 'driver': 'GTiff',
134
+ 'width': width,
135
+ 'height': height,
136
+ 'transform': transform,
137
+ 'crs': src_crs,
138
+ 'dtype': 'int32',
139
+ 'nodata': -9999
140
+ })
141
+
142
+ # Write out
143
+ with rasterio.open(output_path, 'w', **out_profile) as dst:
144
+ dst.write(out_array, 1)
145
+
146
+ print("Wrote:", output_path)
147
+
148
+ # Plot histogram of output (ignore -9999)
149
+ out_data = np.where(out_array == -9999, np.nan, out_array)
150
+ valid_data = out_data[~np.isnan(out_data)]
151
+ plt.figure()
152
+ plt.hist(valid_data, bins=50, edgecolor="black")
153
+ plt.title("5km Percentage (0-100)")
154
+ plt.xlabel("Percent cropped")
155
+ plt.ylabel("Frequency")
156
+ plt.show()
@@ -0,0 +1,69 @@
1
+ from great_tables import GT, md, system_fonts
2
+ import pandas as pd
3
+
4
+ # Your data as a pandas DataFrame
5
+ data = [
6
+ [2, "10<sup>th</sup>", "<=14; >14", 89.2, "2010 - 2021"],
7
+ [2, "25<sup>th</sup>", "<=18.7; >18.7", 82.2, "2010 - 2021"],
8
+ [2, "50<sup>th</sup>", "<=24.6; >24.6", 83.7, "2010 - 2021"],
9
+ [2, "75<sup>th</sup>", "<=31; >31", 88.3, "2010 - 2021"],
10
+ [2, "90<sup>th</sup>", "<=38.9; >38.9", 96.9, "2010 - 2021"],
11
+ [3, "33<sup>rd</sup>, 67<sup>th</sup>", "<=20.3; 20.3 - 29.6; >29.6", 60.5, "2010 - 2021"],
12
+ [4, "25<sup>th</sup>, 50<sup>th</sup>, 75<sup>th</sup>",
13
+ "<=18.7; 18.7-24.6; 24.6-31; >31", 64.4, "2010 - 2021"]
14
+ ]
15
+ cols = ["Number of classes", "Percentile(s)", "Yield categories", "Accuracy (%)", "Years"]
16
+
17
+ df = pd.DataFrame(data, columns=cols)
18
+
19
+ # Create a Great Tables object
20
+ gt_tbl = GT(data=df)
21
+
22
+ # Example formatting, coloring, and styling
23
+ gt_tbl = (gt_tbl
24
+ # Format the "Accuracy (%)" column to show one decimal place
25
+ .fmt_number(
26
+ columns=["Accuracy (%)"],
27
+ decimals=1
28
+ )
29
+ # Color-scale the "Accuracy (%)" column (optional)
30
+ #.data_color(
31
+ # columns=["Accuracy (%)"],
32
+ # palette=["tomato", "gold", "palegreen"],
33
+ # domain=[50, 100] # Range from the lowest to highest accuracy
34
+ #)
35
+ # Set column widths
36
+ .cols_width({
37
+ "Number classes": "60px",
38
+ "Percentile(s)": "140px",
39
+ "Yield categories": "220px",
40
+ "Accuracy (%)": "100px",
41
+ "Years": "90px"
42
+ })
43
+ # Add a table header/title
44
+ .tab_header(
45
+ title=md("**Accuracy of Model for Different Yield Categories**")
46
+ )
47
+ # Add a source note (optional)
48
+ # .tab_source_note(
49
+ # md(
50
+ # "**Source**: Internal records<br>"
51
+ # "**Note**: Data from 2010-2021"
52
+ # )
53
+ # )
54
+ # Customize general table options
55
+ .tab_options(
56
+ heading_background_color='antiquewhite',
57
+ column_labels_background_color='antiquewhite',
58
+ source_notes_background_color='antiquewhite',
59
+ table_background_color='snow',
60
+ table_font_names=system_fonts("humanist"),
61
+ data_row_padding='2px'
62
+ )
63
+ # Align all columns center except "Yield categories", which might be longer text
64
+ .cols_align(align="center")
65
+ .cols_align(align="left", columns=["Yield categories"])
66
+ )
67
+
68
+ # Display the table
69
+ GT.save(gt_tbl, file="aa.png")
@@ -1,6 +1,4 @@
1
1
  import geopandas as gpd
2
- import pandas as pd
3
- import matplotlib.pyplot as plt
4
2
  import palettable as pal
5
3
  import matplotlib.colors as mcolors
6
4
 
@@ -9,7 +7,7 @@ import glob
9
7
  import os
10
8
 
11
9
  # 1. Specify the directory containing your .dta files:
12
- data_dir = r"C:\Users\ritvik\Downloads\maize_yield\maize_yield"
10
+ data_dir = r"C:\Users\ritvik\Downloads\maize_yield (2)\maize_yield"
13
11
 
14
12
  # 2. Use glob to find all .dta files in that directory:
15
13
  dta_files = glob.glob(os.path.join(data_dir, "*.dta"))
@@ -20,6 +18,13 @@ dataframes = [pd.read_stata(f) for f in dta_files]
20
18
  # 4. Concatenate them all into one DataFrame (row-wise):
21
19
  merged_df = pd.concat(dataframes, ignore_index=True)
22
20
 
21
+ # Replace null values in PROD98CQ with those in PROD columns
22
+ merged_df['PROD98CQ'] = merged_df['PROD98CQ'].fillna(merged_df['PROD'])
23
+ merged_df['YEAR'] = merged_df['YEAR'].fillna(merged_df['year'])
24
+
25
+ # Drop rows where AREAH is 0
26
+ merged_df = merged_df[merged_df['AREAH'] != 0]
27
+
23
28
  merged_df['ZONE'] = merged_df['ZONE'].astype(int)
24
29
  merged_df['DIST'] = merged_df['DIST'].astype(int)
25
30
 
@@ -36,7 +41,7 @@ merged_df['W_CODE'] = '7' + merged_df['W_CODE']
36
41
  merged_df['W_CODE'] = merged_df['W_CODE'].str.replace('.0', '')
37
42
  merged_df['W_CODE'] = merged_df['W_CODE'].astype(int)
38
43
 
39
- dg = gpd.read_file(r"D:\Users\ritvik\projects\GEOGLAM\Input\countries\wolayita\wolayita_dissolved.shp")
44
+ dg = gpd.read_file(r"wolayita_dissolved.shp")
40
45
  dg = dg[['W_CODE', 'W_NAME']]
41
46
 
42
47
  # Merge the two dataframes on W_CODE
@@ -48,8 +53,8 @@ merged_df = merged_df.dropna(subset=['PROD98CQ', 'AREAH'])
48
53
  # Compte yield column
49
54
  merged_df['yield'] = merged_df['PROD98CQ'] / merged_df['AREAH']
50
55
 
51
- # create a new dataframe which computes average yield by W_NAME for each year
52
- df_avg_yield = merged_df.groupby(['W_NAME', 'YEAR'])['yield'].mean().reset_index()
56
+ # create a new dataframe which computes average yield by W_NAME for each year, do a weighted average using FWEIGHT column
57
+ df_avg_yield = merged_df.groupby(['W_NAME', 'YEAR']).apply(lambda x: np.average(x['yield'], weights=x['FWEIGHT'])).reset_index(name='yield')
53
58
 
54
59
  # Change W_NAME column to title case
55
60
  df_avg_yield['W_NAME'] = df_avg_yield['W_NAME'].str.title()
@@ -64,7 +69,15 @@ df_avg_yield = df_avg_yield.pivot(index='W_NAME', columns='YEAR', values='yield'
64
69
  df_avg_yield.index.name = None
65
70
  df_avg_yield.columns.name = None
66
71
 
67
- df_avg_yield.to_csv('wolayita_yields.csv')
72
+ df_avg_yield.to_csv('wolayita_yields_v8.csv')
73
+ breakpoint()
74
+ # Compare wolayita_yields_v2.csv with wolayita_yields.csv
75
+ # 1. Load the two CSV files
76
+ df_v1 = pd.read_csv('wolayita_yields.csv')
77
+ df_v2 = pd.read_csv('wolayita_yields_v2.csv')
78
+
79
+ # 2. Check if the two DataFrames are equal
80
+ print(df_v1.equals(df_v2))
68
81
 
69
82
  breakpoint()
70
83
  # 5. (Optional) Inspect the merged DataFrame
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geocif
3
- Version: 0.1.67
3
+ Version: 0.1.69
4
4
  Summary: Models to visualize and forecast crop conditions and yields
5
5
  Home-page: https://ritviksahajpal.github.io/yield_forecasting/
6
6
  Author: Ritvik Sahajpal
@@ -72,9 +72,11 @@ geocif/playground/tmp2.py
72
72
  geocif/playground/tmp3.py
73
73
  geocif/playground/tmp4.py
74
74
  geocif/playground/tmp5.py
75
+ geocif/playground/wolayita_maize_mask.py
75
76
  geocif/risk/__init__.py
76
77
  geocif/risk/impact_assessment.py
77
78
  geocif/viz/__init__.py
79
+ geocif/viz/gt.py
78
80
  geocif/viz/plot.py
79
81
  geocif/viz/tmp.py
80
82
  tests/test_geocif.py
@@ -50,6 +50,6 @@ setup(
50
50
  test_suite="tests",
51
51
  tests_require=test_requirements,
52
52
  url="https://ritviksahajpal.github.io/yield_forecasting/",
53
- version="0.1.67",
53
+ version="0.1.69",
54
54
  zip_safe=False,
55
55
  )
@@ -1,212 +0,0 @@
1
- import itertools
2
- import warnings
3
- from multiprocessing import Pool, cpu_count
4
- from pathlib import Path
5
-
6
- import arrow as ar
7
- import pandas as pd
8
- from tqdm import tqdm
9
-
10
- warnings.filterwarnings("ignore")
11
-
12
- from .cei import indices
13
- from geoprepare import base
14
-
15
- country = "ethiopia"
16
-
17
- def remove_duplicates(lst):
18
- """
19
-
20
- :param lst:
21
- :return:
22
- """
23
- return list(set([i for i in lst]))
24
-
25
-
26
- def get_admin_zone(country, dg_shp):
27
- admin_zone = "admin_1"
28
- country = country.title().replace(" ", "_")
29
-
30
- # Read in shapefile
31
- dg_country = dg_shp[dg_shp["ADMIN0"] == country]
32
-
33
- # Is the ADMIN2 column all None? If so, return admin_1 else return admin_2
34
- if dg_country.empty:
35
- admin_zone = "admin_1"
36
- elif not dg_country["ADMIN2"].isna().all():
37
- admin_zone = "admin_2"
38
-
39
- return admin_zone
40
-
41
-
42
- class cei_runner(base.BaseGeo):
43
- def __init__(self, path_config_file):
44
- super().__init__(path_config_file)
45
-
46
- # Parse configuration files
47
- self.parse_config()
48
-
49
- self.dir_input = Path(self.parser.get("PATHS", "dir_input"))
50
- import platform
51
- if platform.system() == "Linux":
52
- self.base_dir = Path(
53
- rf"/gpfs/data1/cmongp1/GEOGLAM/Output/countries/{country}"
54
- )
55
- else:
56
- self.base_dir = Path(
57
- rf"D:\Users\ritvik\projects\GEOGLAM\Output\countries\{country}"
58
- ) # Path(self.parser.get("PATHS", "dir_crop_inputs"))
59
- self.do_parallel = self.parser.getboolean("DEFAULT", "do_parallel")
60
-
61
- def collect_files(self):
62
- """
63
- 1. Collect all the files which contain EO information
64
- 2. Exclude files from the `processed` directory if it is already in
65
- processed_include_fall directory
66
- 3. Create a dataframe that contains the following columns:
67
- - directory: name of directory where file is located
68
- - path: full path to file
69
- - filename: name of file
70
- :return: Return the dataframe created above
71
- """
72
- import geopandas as gp
73
-
74
- dg_shp = gp.read_file(
75
- self.dir_input
76
- / "Global_Datasets"
77
- / "Regions"
78
- / "Shps"
79
- / "adm_shapefile.shp",
80
- engine="pyogrio",
81
- )
82
-
83
- # Collect all the files which contain EO information
84
- df_files = pd.DataFrame(columns=["directory", "path", "filename", "admin_zone"])
85
- for filepath in self.base_dir.rglob("*.csv"):
86
- country = filepath.parents[0].name
87
-
88
- admin_zone = get_admin_zone(country, dg_shp)
89
-
90
- # If country is not in cc.COUNTRIES then skip
91
- # HACK: Skip korea for now, as it is giving errors
92
- if country == "republic_of_korea":
93
- continue
94
-
95
- # Get name of directory one level up
96
- process_type = filepath.parents[1].name
97
-
98
- # Get name of file
99
- filename = filepath.name
100
-
101
- # Add to dataframe
102
- df_files.loc[len(df_files)] = [process_type, filepath, filename, admin_zone]
103
-
104
- # Exclude those rows where directory is processed and file is already in
105
- # processed_include_fall directory
106
- no_fall = df_files["directory"] == "processed"
107
- include_fall = df_files[df_files["directory"] == "processed_include_fall"][
108
- "filename"
109
- ]
110
-
111
- df_files = df_files[~(no_fall & (df_files["filename"].isin(include_fall)))]
112
-
113
- return df_files
114
-
115
- def process_combinations(self, df, method):
116
- """
117
- Create a list of tuples of the following:
118
- - directory: name of directory where file is located
119
- - path: full path to file
120
- - filename: name of file
121
- - method: whether to compute indices for phenological stages or not
122
- This tuple will be used as input to the `process` function
123
- :param df:
124
- :param method:
125
- :return:
126
- """
127
- combinations = []
128
-
129
- for index, row in tqdm(df.iterrows()):
130
- combinations.extend(
131
- list(
132
- itertools.product([row[0]], [row[1]], [row[2]], [row[3]], [method])
133
- )
134
- )
135
-
136
- combinations = remove_duplicates(combinations)
137
-
138
- return combinations
139
-
140
- def main(self, method):
141
- """
142
-
143
- :param method:
144
- :return:
145
- """
146
- # Create a dataframe of the files to be analyzed
147
- df_files = self.collect_files()
148
-
149
- combinations = self.process_combinations(df_files, method)
150
-
151
- # Add an element to the tuple to indicate the season
152
- # Last element is redo flag which is True if the analysis is to be redone
153
- # and False otherwise. Analysis is always redone for the current year
154
- # and last year whether file exists or not
155
- combinations = [
156
- (
157
- self.parser,
158
- status,
159
- path,
160
- filename,
161
- admin_zone,
162
- category,
163
- year,
164
- "ndvi",
165
- False, # redo
166
- )
167
- for year in range(2001, ar.utcnow().year + 1)
168
- for status, path, filename, admin_zone, category in combinations
169
- ]
170
-
171
- # Only keep those entries in combinations where the third elemt is
172
- # mozambique, south_africa, angola or dem_people's_rep_of_korea
173
- # This is done to test the code for these countries
174
- combinations = [i for i in combinations if f"{country}_winter_wheat_s1" in i[3]]
175
-
176
- if True:
177
- num_cpu = int(cpu_count() * 0.9)
178
- with Pool(num_cpu) as p:
179
- for i, _ in enumerate(p.imap_unordered(indices.process, combinations)):
180
- pass
181
- else:
182
- # Use the code below if you want to test without parallelization or
183
- # if you want to debug by using pdb
184
- pbar = tqdm(combinations)
185
- for i, val in enumerate(pbar):
186
- pbar.set_description(
187
- f"Main loop {combinations[i][2]} {combinations[i][5]}"
188
- )
189
- indices.process(val)
190
-
191
-
192
- def run(path_config_files=[]):
193
- """
194
-
195
- Args:
196
- path_config_files:
197
-
198
- Returns:
199
-
200
- """
201
- """ Check dictionary keys to have no spaces"""
202
- indices.validate_index_definitions()
203
-
204
- for method in [
205
- "monthly_r", # "dekad_r" # "dekad_r"
206
- ]: # , "full_season", "phenological_stages", "fraction_season"]:
207
- obj = cei_runner(path_config_files)
208
- obj.main(method)
209
-
210
-
211
- if __name__ == "__main__":
212
- run()
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes