geocif 0.2.25__tar.gz → 0.2.27__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. {geocif-0.2.25/geocif.egg-info → geocif-0.2.27}/PKG-INFO +1 -1
  2. {geocif-0.2.25 → geocif-0.2.27}/geocif/geocif.py +64 -62
  3. {geocif-0.2.25 → geocif-0.2.27}/geocif/ml/feature_selection.py +9 -8
  4. {geocif-0.2.25 → geocif-0.2.27/geocif.egg-info}/PKG-INFO +1 -1
  5. {geocif-0.2.25 → geocif-0.2.27}/setup.py +1 -1
  6. {geocif-0.2.25 → geocif-0.2.27}/LICENSE +0 -0
  7. {geocif-0.2.25 → geocif-0.2.27}/MANIFEST.in +0 -0
  8. {geocif-0.2.25 → geocif-0.2.27}/README.md +0 -0
  9. {geocif-0.2.25 → geocif-0.2.27}/geocif/__init__.py +0 -0
  10. {geocif-0.2.25 → geocif-0.2.27}/geocif/agmet/__init__.py +0 -0
  11. {geocif-0.2.25 → geocif-0.2.27}/geocif/agmet/geoagmet.py +0 -0
  12. {geocif-0.2.25 → geocif-0.2.27}/geocif/agmet/plot.py +0 -0
  13. {geocif-0.2.25 → geocif-0.2.27}/geocif/agmet/utils.py +0 -0
  14. {geocif-0.2.25 → geocif-0.2.27}/geocif/analysis.py +0 -0
  15. {geocif-0.2.25 → geocif-0.2.27}/geocif/backup/__init__.py +0 -0
  16. {geocif-0.2.25 → geocif-0.2.27}/geocif/backup/constants.py +0 -0
  17. {geocif-0.2.25 → geocif-0.2.27}/geocif/backup/features.py +0 -0
  18. {geocif-0.2.25 → geocif-0.2.27}/geocif/backup/geo.py +0 -0
  19. {geocif-0.2.25 → geocif-0.2.27}/geocif/backup/geocif.py +0 -0
  20. {geocif-0.2.25 → geocif-0.2.27}/geocif/backup/metadata.py +0 -0
  21. {geocif-0.2.25 → geocif-0.2.27}/geocif/backup/models.py +0 -0
  22. {geocif-0.2.25 → geocif-0.2.27}/geocif/cei/__init__.py +0 -0
  23. {geocif-0.2.25 → geocif-0.2.27}/geocif/cei/definitions.py +0 -0
  24. {geocif-0.2.25 → geocif-0.2.27}/geocif/cei/indices.py +0 -0
  25. {geocif-0.2.25 → geocif-0.2.27}/geocif/experiments.py +0 -0
  26. {geocif-0.2.25 → geocif-0.2.27}/geocif/geocif_runner.py +0 -0
  27. {geocif-0.2.25 → geocif-0.2.27}/geocif/indices_runner.py +0 -0
  28. {geocif-0.2.25 → geocif-0.2.27}/geocif/indices_runner_angola.py +0 -0
  29. {geocif-0.2.25 → geocif-0.2.27}/geocif/indices_runner_madagascar.py +0 -0
  30. {geocif-0.2.25 → geocif-0.2.27}/geocif/indices_runner_malawi.py +0 -0
  31. {geocif-0.2.25 → geocif-0.2.27}/geocif/indices_runner_mozambique.py +0 -0
  32. {geocif-0.2.25 → geocif-0.2.27}/geocif/indices_runner_south_africa.py +0 -0
  33. {geocif-0.2.25 → geocif-0.2.27}/geocif/indices_runner_zambia.py +0 -0
  34. {geocif-0.2.25 → geocif-0.2.27}/geocif/indices_runner_zimbabwe.py +0 -0
  35. {geocif-0.2.25 → geocif-0.2.27}/geocif/logger.py +0 -0
  36. {geocif-0.2.25 → geocif-0.2.27}/geocif/ml/__init__.py +0 -0
  37. {geocif-0.2.25 → geocif-0.2.27}/geocif/ml/correlations.py +0 -0
  38. {geocif-0.2.25 → geocif-0.2.27}/geocif/ml/embedding.py +0 -0
  39. {geocif-0.2.25 → geocif-0.2.27}/geocif/ml/feature_engineering.py +0 -0
  40. {geocif-0.2.25 → geocif-0.2.27}/geocif/ml/outliers.py +0 -0
  41. {geocif-0.2.25 → geocif-0.2.27}/geocif/ml/outlook.py +0 -0
  42. {geocif-0.2.25 → geocif-0.2.27}/geocif/ml/output.py +0 -0
  43. {geocif-0.2.25 → geocif-0.2.27}/geocif/ml/spatial_autocorrelation.py +0 -0
  44. {geocif-0.2.25 → geocif-0.2.27}/geocif/ml/stages.py +0 -0
  45. {geocif-0.2.25 → geocif-0.2.27}/geocif/ml/stats.py +0 -0
  46. {geocif-0.2.25 → geocif-0.2.27}/geocif/ml/trainers.py +0 -0
  47. {geocif-0.2.25 → geocif-0.2.27}/geocif/ml/trend.py +0 -0
  48. {geocif-0.2.25 → geocif-0.2.27}/geocif/ml/xai.py +0 -0
  49. {geocif-0.2.25 → geocif-0.2.27}/geocif/mm.py +0 -0
  50. {geocif-0.2.25 → geocif-0.2.27}/geocif/playground/__init__.py +0 -0
  51. {geocif-0.2.25 → geocif-0.2.27}/geocif/playground/aa.py +0 -0
  52. {geocif-0.2.25 → geocif-0.2.27}/geocif/playground/area.py +0 -0
  53. {geocif-0.2.25 → geocif-0.2.27}/geocif/playground/automl.py +0 -0
  54. {geocif-0.2.25 → geocif-0.2.27}/geocif/playground/download_esi.py +0 -0
  55. {geocif-0.2.25 → geocif-0.2.27}/geocif/playground/enso.py +0 -0
  56. {geocif-0.2.25 → geocif-0.2.27}/geocif/playground/eval.py +0 -0
  57. {geocif-0.2.25 → geocif-0.2.27}/geocif/playground/gamtest.py +0 -0
  58. {geocif-0.2.25 → geocif-0.2.27}/geocif/playground/gee_access.py +0 -0
  59. {geocif-0.2.25 → geocif-0.2.27}/geocif/playground/misc.py +0 -0
  60. {geocif-0.2.25 → geocif-0.2.27}/geocif/playground/play_xagg.py +0 -0
  61. {geocif-0.2.25 → geocif-0.2.27}/geocif/playground/reg.py +0 -0
  62. {geocif-0.2.25 → geocif-0.2.27}/geocif/playground/sustain.py +0 -0
  63. {geocif-0.2.25 → geocif-0.2.27}/geocif/playground/test_catboost.py +0 -0
  64. {geocif-0.2.25 → geocif-0.2.27}/geocif/playground/tmp.py +0 -0
  65. {geocif-0.2.25 → geocif-0.2.27}/geocif/playground/tmp2.py +0 -0
  66. {geocif-0.2.25 → geocif-0.2.27}/geocif/playground/tmp3.py +0 -0
  67. {geocif-0.2.25 → geocif-0.2.27}/geocif/playground/tmp4.py +0 -0
  68. {geocif-0.2.25 → geocif-0.2.27}/geocif/playground/tmp5.py +0 -0
  69. {geocif-0.2.25 → geocif-0.2.27}/geocif/playground/wolayita_maize_mask.py +0 -0
  70. {geocif-0.2.25 → geocif-0.2.27}/geocif/risk/__init__.py +0 -0
  71. {geocif-0.2.25 → geocif-0.2.27}/geocif/risk/impact_assessment.py +0 -0
  72. {geocif-0.2.25 → geocif-0.2.27}/geocif/utils.py +0 -0
  73. {geocif-0.2.25 → geocif-0.2.27}/geocif/viz/__init__.py +0 -0
  74. {geocif-0.2.25 → geocif-0.2.27}/geocif/viz/gt.py +0 -0
  75. {geocif-0.2.25 → geocif-0.2.27}/geocif/viz/plot.py +0 -0
  76. {geocif-0.2.25 → geocif-0.2.27}/geocif/viz/tmp.py +0 -0
  77. {geocif-0.2.25 → geocif-0.2.27}/geocif.egg-info/SOURCES.txt +0 -0
  78. {geocif-0.2.25 → geocif-0.2.27}/geocif.egg-info/dependency_links.txt +0 -0
  79. {geocif-0.2.25 → geocif-0.2.27}/geocif.egg-info/not-zip-safe +0 -0
  80. {geocif-0.2.25 → geocif-0.2.27}/geocif.egg-info/top_level.txt +0 -0
  81. {geocif-0.2.25 → geocif-0.2.27}/requirements.txt +0 -0
  82. {geocif-0.2.25 → geocif-0.2.27}/setup.cfg +0 -0
  83. {geocif-0.2.25 → geocif-0.2.27}/tests/test_geocif.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geocif
3
- Version: 0.2.25
3
+ Version: 0.2.27
4
4
  Summary: Models to visualize and forecast crop conditions and yields
5
5
  Home-page: https://ritviksahajpal.github.io/yield_forecasting/
6
6
  Author: Ritvik Sahajpal
@@ -179,6 +179,13 @@ class Geocif:
179
179
  "Production (tn)",
180
180
  ]
181
181
 
182
+ if self.model_type == "REGRESSION":
183
+ self.target_column = (
184
+ f"Detrended {self.target}" if self.check_yield_trend else self.target
185
+ )
186
+ elif self.model_type == "CLASSIFICATION":
187
+ self.target_column = self.target_class
188
+
182
189
  self.combined_dict = {
183
190
  **di.dict_indices,
184
191
  **di.dict_ndvi,
@@ -204,7 +211,34 @@ class Geocif:
204
211
  # obj_pickle = outlook.Outlook(self.pickle_file)
205
212
  # self.df_outlook = obj_pickle.read_outlook_file()
206
213
 
207
- def train(self, df_region, scaler=None):
214
+ def apply_feature_selector(self, region, dir_output):
215
+ if self.model_name in ["cumulative_1", "cumulative_2", "cumulative_3"]:
216
+ all_features = self.X_train.columns
217
+
218
+ # Select the columns with use_ceis in it
219
+ self.selected_features = [
220
+ column
221
+ for column in all_features
222
+ if any(cei in column for cei in self.use_ceis)
223
+ ]
224
+ else:
225
+ self.logger.info(f"Selecting features for {self.country} {self.crop}")
226
+ selector, _, self.selected_features = fs.select_features(
227
+ self.X_train,
228
+ self.y_train,
229
+ method=self.feature_selection,
230
+ dir_output=dir_output,
231
+ region=region
232
+ )
233
+ self.logger.info(f"Selected features: {self.selected_features}")
234
+
235
+ """ Update model to include conformal estimates """
236
+ if "lat" not in self.selected_features and self.include_lat_lon_as_feature:
237
+ self.selected_features.append("lat")
238
+ if "lon" not in self.selected_features and self.include_lat_lon_as_feature:
239
+ self.selected_features.append("lon")
240
+
241
+ def train_model(self, df_region, scaler=None):
208
242
  """
209
243
 
210
244
  Args:
@@ -214,55 +248,9 @@ class Geocif:
214
248
  Returns:
215
249
 
216
250
  """
217
-
218
- """ Perform feature selection """
219
- if self.model_type == "REGRESSION":
220
- target_column = (
221
- f"Detrended {self.target}" if self.check_yield_trend else self.target
222
- )
223
- elif self.model_type == "CLASSIFICATION":
224
- target_column = self.target_class
225
-
226
- # Drop rows where target_column is NaN
227
- df_region = df_region.dropna(subset=[target_column])
228
-
229
- X_train = df_region[self.feature_names]
230
- # Drop any columns with NaNs
231
- X_train = X_train.dropna(axis=1, how="any")
232
- y_train = df_region[target_column]
233
-
234
251
  if self.ml_model:
235
- if self.model_name in ["cumulative_1", "cumulative_2", "cumulative_3"]:
236
- all_features = X_train.columns
237
-
238
- # Select the columns with use_ceis in it
239
- self.selected_features = [
240
- column
241
- for column in all_features
242
- if any(cei in column for cei in self.use_ceis)
243
- ]
244
- else:
245
- self.logger.info(f"Selecting features for {self.country} {self.crop}")
246
- selector, _, self.selected_features = fs.select_features(
247
- X_train, y_train, method=self.feature_selection
248
- )
249
- self.logger.info(f"Selected features: {self.selected_features}")
250
-
251
- """ Update model to include conformal estimates """
252
- if "lat" not in self.selected_features and self.include_lat_lon_as_feature:
253
- self.selected_features.append("lat")
254
- if "lon" not in self.selected_features and self.include_lat_lon_as_feature:
255
- self.selected_features.append("lon")
256
252
  X_train = df_region[self.selected_features + self.cat_features]
257
253
 
258
- dir_output = (
259
- self.dir_analysis
260
- / self.country
261
- / self.crop
262
- / self.model_name
263
- / str(self.forecast_season)
264
- )
265
-
266
254
  region_id = df_region["Region_ID"].unique()[0]
267
255
  X_train.to_csv(dir_output / f"X_train_{region_id}.csv", index=False)
268
256
  if scaler:
@@ -284,9 +272,9 @@ class Geocif:
284
272
  "Harvest Year",
285
273
  df_region[self.selected_features + self.cat_features + [self.target]],
286
274
  X_train_scaled,
287
- y_train,
275
+ self.y_train,
288
276
  feature_names=self.selected_features,
289
- target_col=target_column,
277
+ target_col=self.target_column,
290
278
  optimize=self.optimize,
291
279
  fraction_loocv=self.fraction_loocv,
292
280
  cat_features=self.cat_features,
@@ -303,7 +291,7 @@ class Geocif:
303
291
  if self.model_name == "catboost":
304
292
  self.model.fit(
305
293
  X_train,
306
- y_train,
294
+ self.y_train,
307
295
  cat_features=self.cat_features,
308
296
  verbose=True,
309
297
  )
@@ -313,16 +301,16 @@ class Geocif:
313
301
  item for item in self.cat_features if item != "Harvest Year"
314
302
  ]
315
303
  )
316
- self.model.fit(X_train, y_train)
304
+ self.model.fit(X_train, self.y_train)
317
305
  elif self.model_name == "ydf":
318
306
  # Combine X_train and y_train
319
- df_train = pd.concat([X_train, y_train], axis=1)
307
+ df_train = pd.concat([X_train, self.y_train], axis=1)
320
308
 
321
309
  self.model = self.model.train(df_train)
322
310
  elif self.model_name == "geospaNN":
323
311
  self.model.fit(
324
312
  X_train,
325
- y_train,
313
+ self.y_train,
326
314
  # callbacks=[TQDMCallback(self.best_hyperparams["iterations"])],
327
315
  )
328
316
  elif self.model_name == "merf":
@@ -334,15 +322,15 @@ class Geocif:
334
322
  X_train,
335
323
  Z_train,
336
324
  clusters_train.astype("object"),
337
- y_train.values,
325
+ self.y_train.values,
338
326
  )
339
327
  elif self.model_name == "linear":
340
- self.model.fit(X_train_scaled, y_train)
328
+ self.model.fit(X_train_scaled, self.y_train)
341
329
  elif self.model_name == "gam":
342
- self.model.fit(X_train_scaled.values, y_train.values)
330
+ self.model.fit(X_train_scaled.values, self.y_train.values)
343
331
  self.best_hyperparams = {}
344
332
  elif self.model_name in ["cubist"]:
345
- self.model.fit(X_train, y_train)
333
+ self.model.fit(X_train, self.y_train)
346
334
  elif self.model_name in [
347
335
  "cumulative_1",
348
336
  "cumulative_2",
@@ -377,7 +365,7 @@ class Geocif:
377
365
  # Combine scaled numeric features and encoded region
378
366
  X_train_scaled = pd.concat([X_scaled_numeric, X_region], axis=1)
379
367
 
380
- self.model.fit(X_train_scaled, y_train)
368
+ self.model.fit(X_train_scaled, self.y_train)
381
369
  except Exception as e:
382
370
  self.logger.error(
383
371
  f"Error fitting model for {self.country} {self.crop} {e}"
@@ -782,6 +770,14 @@ class Geocif:
782
770
  Returns:
783
771
 
784
772
  """
773
+ dir_output = (
774
+ self.dir_analysis
775
+ / self.country
776
+ / self.crop
777
+ / self.model_name
778
+ / str(self.forecast_season)
779
+ )
780
+
785
781
  from sklearn.preprocessing import StandardScaler
786
782
 
787
783
  scaler = StandardScaler() if self.model_name in ["linear", "gam"] else None
@@ -807,8 +803,6 @@ class Geocif:
807
803
  mask_train = self.df_train["Region_ID"] == region
808
804
  mask_test = self.df_test["Region_ID"] == region
809
805
 
810
- num_regions_in_cluster = self.df_train[mask_train]["Region"].unique()
811
-
812
806
  if self.cluster_strategy == "individual":
813
807
  region_name = self.df_train["Region"].unique()[idx]
814
808
  pbar.set_description(f"Fit/Predict for {region_name}")
@@ -836,12 +830,20 @@ class Geocif:
836
830
  if self.last_year_yield_as_feature:
837
831
  common_columns += [f"Last Year {self.target}"]
838
832
 
839
- """ Train """
833
+ """ Feature selection and then Train """
840
834
  # Filter dataframe based on region and self.feature_names
841
835
  df_region_train = self.df_train[mask_train]
842
836
  df_region_train = df_region_train[self.fixed_columns + common_columns]
843
837
  df_region_train.reset_index(drop=True, inplace=True)
844
- self.train(df_region_train, scaler)
838
+ df_region_train = df_region_train.dropna(subset=[self.target_column])
839
+
840
+ self.X_train = df_region_train[self.feature_names]
841
+ # Drop any columns with NaNs
842
+ self.X_train.dropna(axis=1, how="any", inplace=True)
843
+ self.y_train = df_region_train[self.target_column]
844
+
845
+ self.apply_feature_selector(region, dir_output)
846
+ self.train_model(df_region_train, scaler, dir_output)
845
847
 
846
848
  """ Predict """
847
849
  if self.check_yield_trend:
@@ -34,10 +34,12 @@ def are_all_features_non_eo(features):
34
34
 
35
35
  def select_features(
36
36
  X, y,
37
- method="RFE",
37
+ method="multi",
38
38
  min_features_to_select=3,
39
39
  threshold_nan=0.2,
40
- threshold_unique=0.6
40
+ threshold_unique=0.6,
41
+ dir_output=".",
42
+ region=None
41
43
  ):
42
44
  """
43
45
  Feature-selection wrapper supporting many methods plus a new 'multi' option.
@@ -75,8 +77,9 @@ def select_features(
75
77
  # --- multi-method ensemble -------------------------------
76
78
  if method == "multi":
77
79
  counter = Counter()
80
+ models = ["BorutaPy", "mrmr"]
78
81
  # run three selectors and count feature picks
79
- for sub_m in ["BorutaPy", "mrmr"]:
82
+ for sub_m in models:
80
83
  _, _, feats = select_features(
81
84
  X_clean, y,
82
85
  method=sub_m,
@@ -84,7 +87,6 @@ def select_features(
84
87
  threshold_nan=threshold_nan,
85
88
  threshold_unique=threshold_unique
86
89
  )
87
- print(sub_m, feats)
88
90
  counter.update(feats)
89
91
 
90
92
  # union of all features
@@ -97,12 +99,11 @@ def select_features(
97
99
  fig = freq.plot(kind="bar", width=0.9).get_figure()
98
100
  plt.title("Feature selection frequency across methods")
99
101
  plt.xlabel("Feature")
100
- plt.ylabel("Times selected (out of 3)")
102
+ plt.ylabel(f"Times selected (out of {len(models)})")
101
103
  plt.tight_layout()
102
104
 
103
- out_dir = Path("feature_selection_multi")
104
- out_dir.mkdir(parents=True, exist_ok=True)
105
- fig.savefig(out_dir / "feature_selection_frequency.png", dpi=300)
105
+ dir_output = dir_output / Path("feature_selection")
106
+ fig.savefig(dir_output / f"feature_selection_frequency_{region}.png", dpi=300)
106
107
  plt.close(fig)
107
108
 
108
109
  return None, X_out, combined
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geocif
3
- Version: 0.2.25
3
+ Version: 0.2.27
4
4
  Summary: Models to visualize and forecast crop conditions and yields
5
5
  Home-page: https://ritviksahajpal.github.io/yield_forecasting/
6
6
  Author: Ritvik Sahajpal
@@ -50,6 +50,6 @@ setup(
50
50
  test_suite="tests",
51
51
  tests_require=test_requirements,
52
52
  url="https://ritviksahajpal.github.io/yield_forecasting/",
53
- version="0.2.25",
53
+ version="0.2.27",
54
54
  zip_safe=False,
55
55
  )
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes