geocif 0.2.24__tar.gz → 0.2.26__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. {geocif-0.2.24/geocif.egg-info → geocif-0.2.26}/PKG-INFO +1 -1
  2. {geocif-0.2.24 → geocif-0.2.26}/geocif/geocif.py +60 -62
  3. {geocif-0.2.24 → geocif-0.2.26}/geocif/ml/feature_selection.py +8 -8
  4. {geocif-0.2.24 → geocif-0.2.26}/geocif/ml/trainers.py +2 -2
  5. {geocif-0.2.24 → geocif-0.2.26/geocif.egg-info}/PKG-INFO +1 -1
  6. {geocif-0.2.24 → geocif-0.2.26}/setup.py +1 -1
  7. {geocif-0.2.24 → geocif-0.2.26}/LICENSE +0 -0
  8. {geocif-0.2.24 → geocif-0.2.26}/MANIFEST.in +0 -0
  9. {geocif-0.2.24 → geocif-0.2.26}/README.md +0 -0
  10. {geocif-0.2.24 → geocif-0.2.26}/geocif/__init__.py +0 -0
  11. {geocif-0.2.24 → geocif-0.2.26}/geocif/agmet/__init__.py +0 -0
  12. {geocif-0.2.24 → geocif-0.2.26}/geocif/agmet/geoagmet.py +0 -0
  13. {geocif-0.2.24 → geocif-0.2.26}/geocif/agmet/plot.py +0 -0
  14. {geocif-0.2.24 → geocif-0.2.26}/geocif/agmet/utils.py +0 -0
  15. {geocif-0.2.24 → geocif-0.2.26}/geocif/analysis.py +0 -0
  16. {geocif-0.2.24 → geocif-0.2.26}/geocif/backup/__init__.py +0 -0
  17. {geocif-0.2.24 → geocif-0.2.26}/geocif/backup/constants.py +0 -0
  18. {geocif-0.2.24 → geocif-0.2.26}/geocif/backup/features.py +0 -0
  19. {geocif-0.2.24 → geocif-0.2.26}/geocif/backup/geo.py +0 -0
  20. {geocif-0.2.24 → geocif-0.2.26}/geocif/backup/geocif.py +0 -0
  21. {geocif-0.2.24 → geocif-0.2.26}/geocif/backup/metadata.py +0 -0
  22. {geocif-0.2.24 → geocif-0.2.26}/geocif/backup/models.py +0 -0
  23. {geocif-0.2.24 → geocif-0.2.26}/geocif/cei/__init__.py +0 -0
  24. {geocif-0.2.24 → geocif-0.2.26}/geocif/cei/definitions.py +0 -0
  25. {geocif-0.2.24 → geocif-0.2.26}/geocif/cei/indices.py +0 -0
  26. {geocif-0.2.24 → geocif-0.2.26}/geocif/experiments.py +0 -0
  27. {geocif-0.2.24 → geocif-0.2.26}/geocif/geocif_runner.py +0 -0
  28. {geocif-0.2.24 → geocif-0.2.26}/geocif/indices_runner.py +0 -0
  29. {geocif-0.2.24 → geocif-0.2.26}/geocif/indices_runner_angola.py +0 -0
  30. {geocif-0.2.24 → geocif-0.2.26}/geocif/indices_runner_madagascar.py +0 -0
  31. {geocif-0.2.24 → geocif-0.2.26}/geocif/indices_runner_malawi.py +0 -0
  32. {geocif-0.2.24 → geocif-0.2.26}/geocif/indices_runner_mozambique.py +0 -0
  33. {geocif-0.2.24 → geocif-0.2.26}/geocif/indices_runner_south_africa.py +0 -0
  34. {geocif-0.2.24 → geocif-0.2.26}/geocif/indices_runner_zambia.py +0 -0
  35. {geocif-0.2.24 → geocif-0.2.26}/geocif/indices_runner_zimbabwe.py +0 -0
  36. {geocif-0.2.24 → geocif-0.2.26}/geocif/logger.py +0 -0
  37. {geocif-0.2.24 → geocif-0.2.26}/geocif/ml/__init__.py +0 -0
  38. {geocif-0.2.24 → geocif-0.2.26}/geocif/ml/correlations.py +0 -0
  39. {geocif-0.2.24 → geocif-0.2.26}/geocif/ml/embedding.py +0 -0
  40. {geocif-0.2.24 → geocif-0.2.26}/geocif/ml/feature_engineering.py +0 -0
  41. {geocif-0.2.24 → geocif-0.2.26}/geocif/ml/outliers.py +0 -0
  42. {geocif-0.2.24 → geocif-0.2.26}/geocif/ml/outlook.py +0 -0
  43. {geocif-0.2.24 → geocif-0.2.26}/geocif/ml/output.py +0 -0
  44. {geocif-0.2.24 → geocif-0.2.26}/geocif/ml/spatial_autocorrelation.py +0 -0
  45. {geocif-0.2.24 → geocif-0.2.26}/geocif/ml/stages.py +0 -0
  46. {geocif-0.2.24 → geocif-0.2.26}/geocif/ml/stats.py +0 -0
  47. {geocif-0.2.24 → geocif-0.2.26}/geocif/ml/trend.py +0 -0
  48. {geocif-0.2.24 → geocif-0.2.26}/geocif/ml/xai.py +0 -0
  49. {geocif-0.2.24 → geocif-0.2.26}/geocif/mm.py +0 -0
  50. {geocif-0.2.24 → geocif-0.2.26}/geocif/playground/__init__.py +0 -0
  51. {geocif-0.2.24 → geocif-0.2.26}/geocif/playground/aa.py +0 -0
  52. {geocif-0.2.24 → geocif-0.2.26}/geocif/playground/area.py +0 -0
  53. {geocif-0.2.24 → geocif-0.2.26}/geocif/playground/automl.py +0 -0
  54. {geocif-0.2.24 → geocif-0.2.26}/geocif/playground/download_esi.py +0 -0
  55. {geocif-0.2.24 → geocif-0.2.26}/geocif/playground/enso.py +0 -0
  56. {geocif-0.2.24 → geocif-0.2.26}/geocif/playground/eval.py +0 -0
  57. {geocif-0.2.24 → geocif-0.2.26}/geocif/playground/gamtest.py +0 -0
  58. {geocif-0.2.24 → geocif-0.2.26}/geocif/playground/gee_access.py +0 -0
  59. {geocif-0.2.24 → geocif-0.2.26}/geocif/playground/misc.py +0 -0
  60. {geocif-0.2.24 → geocif-0.2.26}/geocif/playground/play_xagg.py +0 -0
  61. {geocif-0.2.24 → geocif-0.2.26}/geocif/playground/reg.py +0 -0
  62. {geocif-0.2.24 → geocif-0.2.26}/geocif/playground/sustain.py +0 -0
  63. {geocif-0.2.24 → geocif-0.2.26}/geocif/playground/test_catboost.py +0 -0
  64. {geocif-0.2.24 → geocif-0.2.26}/geocif/playground/tmp.py +0 -0
  65. {geocif-0.2.24 → geocif-0.2.26}/geocif/playground/tmp2.py +0 -0
  66. {geocif-0.2.24 → geocif-0.2.26}/geocif/playground/tmp3.py +0 -0
  67. {geocif-0.2.24 → geocif-0.2.26}/geocif/playground/tmp4.py +0 -0
  68. {geocif-0.2.24 → geocif-0.2.26}/geocif/playground/tmp5.py +0 -0
  69. {geocif-0.2.24 → geocif-0.2.26}/geocif/playground/wolayita_maize_mask.py +0 -0
  70. {geocif-0.2.24 → geocif-0.2.26}/geocif/risk/__init__.py +0 -0
  71. {geocif-0.2.24 → geocif-0.2.26}/geocif/risk/impact_assessment.py +0 -0
  72. {geocif-0.2.24 → geocif-0.2.26}/geocif/utils.py +0 -0
  73. {geocif-0.2.24 → geocif-0.2.26}/geocif/viz/__init__.py +0 -0
  74. {geocif-0.2.24 → geocif-0.2.26}/geocif/viz/gt.py +0 -0
  75. {geocif-0.2.24 → geocif-0.2.26}/geocif/viz/plot.py +0 -0
  76. {geocif-0.2.24 → geocif-0.2.26}/geocif/viz/tmp.py +0 -0
  77. {geocif-0.2.24 → geocif-0.2.26}/geocif.egg-info/SOURCES.txt +0 -0
  78. {geocif-0.2.24 → geocif-0.2.26}/geocif.egg-info/dependency_links.txt +0 -0
  79. {geocif-0.2.24 → geocif-0.2.26}/geocif.egg-info/not-zip-safe +0 -0
  80. {geocif-0.2.24 → geocif-0.2.26}/geocif.egg-info/top_level.txt +0 -0
  81. {geocif-0.2.24 → geocif-0.2.26}/requirements.txt +0 -0
  82. {geocif-0.2.24 → geocif-0.2.26}/setup.cfg +0 -0
  83. {geocif-0.2.24 → geocif-0.2.26}/tests/test_geocif.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geocif
3
- Version: 0.2.24
3
+ Version: 0.2.26
4
4
  Summary: Models to visualize and forecast crop conditions and yields
5
5
  Home-page: https://ritviksahajpal.github.io/yield_forecasting/
6
6
  Author: Ritvik Sahajpal
@@ -179,6 +179,13 @@ class Geocif:
179
179
  "Production (tn)",
180
180
  ]
181
181
 
182
+ if self.model_type == "REGRESSION":
183
+ self.target_column = (
184
+ f"Detrended {self.target}" if self.check_yield_trend else self.target
185
+ )
186
+ elif self.model_type == "CLASSIFICATION":
187
+ self.target_column = self.target_class
188
+
182
189
  self.combined_dict = {
183
190
  **di.dict_indices,
184
191
  **di.dict_ndvi,
@@ -204,7 +211,30 @@ class Geocif:
204
211
  # obj_pickle = outlook.Outlook(self.pickle_file)
205
212
  # self.df_outlook = obj_pickle.read_outlook_file()
206
213
 
207
- def train(self, df_region, scaler=None):
214
+ def apply_feature_selector(self, dir_output):
215
+ if self.model_name in ["cumulative_1", "cumulative_2", "cumulative_3"]:
216
+ all_features = self.X_train.columns
217
+
218
+ # Select the columns with use_ceis in it
219
+ self.selected_features = [
220
+ column
221
+ for column in all_features
222
+ if any(cei in column for cei in self.use_ceis)
223
+ ]
224
+ else:
225
+ self.logger.info(f"Selecting features for {self.country} {self.crop}")
226
+ selector, _, self.selected_features = fs.select_features(
227
+ self.X_train, self.y_train, method=self.feature_selection, dir_output=dir_output
228
+ )
229
+ self.logger.info(f"Selected features: {self.selected_features}")
230
+
231
+ """ Update model to include conformal estimates """
232
+ if "lat" not in self.selected_features and self.include_lat_lon_as_feature:
233
+ self.selected_features.append("lat")
234
+ if "lon" not in self.selected_features and self.include_lat_lon_as_feature:
235
+ self.selected_features.append("lon")
236
+
237
+ def train_model(self, df_region, scaler=None):
208
238
  """
209
239
 
210
240
  Args:
@@ -214,55 +244,9 @@ class Geocif:
214
244
  Returns:
215
245
 
216
246
  """
217
-
218
- """ Perform feature selection """
219
- if self.model_type == "REGRESSION":
220
- target_column = (
221
- f"Detrended {self.target}" if self.check_yield_trend else self.target
222
- )
223
- elif self.model_type == "CLASSIFICATION":
224
- target_column = self.target_class
225
-
226
- # Drop rows where target_column is NaN
227
- df_region = df_region.dropna(subset=[target_column])
228
-
229
- X_train = df_region[self.feature_names]
230
- # Drop any columns with NaNs
231
- X_train = X_train.dropna(axis=1, how="any")
232
- y_train = df_region[target_column]
233
-
234
247
  if self.ml_model:
235
- if self.model_name in ["cumulative_1", "cumulative_2", "cumulative_3"]:
236
- all_features = X_train.columns
237
-
238
- # Select the columns with use_ceis in it
239
- self.selected_features = [
240
- column
241
- for column in all_features
242
- if any(cei in column for cei in self.use_ceis)
243
- ]
244
- else:
245
- self.logger.info(f"Selecting features for {self.country} {self.crop}")
246
- selector, _, self.selected_features = fs.select_features(
247
- X_train, y_train, method=self.feature_selection
248
- )
249
- self.logger.info(f"Selected features: {self.selected_features}")
250
-
251
- """ Update model to include conformal estimates """
252
- if "lat" not in self.selected_features and self.include_lat_lon_as_feature:
253
- self.selected_features.append("lat")
254
- if "lon" not in self.selected_features and self.include_lat_lon_as_feature:
255
- self.selected_features.append("lon")
256
248
  X_train = df_region[self.selected_features + self.cat_features]
257
249
 
258
- dir_output = (
259
- self.dir_analysis
260
- / self.country
261
- / self.crop
262
- / self.model_name
263
- / str(self.forecast_season)
264
- )
265
-
266
250
  region_id = df_region["Region_ID"].unique()[0]
267
251
  X_train.to_csv(dir_output / f"X_train_{region_id}.csv", index=False)
268
252
  if scaler:
@@ -284,9 +268,9 @@ class Geocif:
284
268
  "Harvest Year",
285
269
  df_region[self.selected_features + self.cat_features + [self.target]],
286
270
  X_train_scaled,
287
- y_train,
271
+ self.y_train,
288
272
  feature_names=self.selected_features,
289
- target_col=target_column,
273
+ target_col=self.target_column,
290
274
  optimize=self.optimize,
291
275
  fraction_loocv=self.fraction_loocv,
292
276
  cat_features=self.cat_features,
@@ -303,7 +287,7 @@ class Geocif:
303
287
  if self.model_name == "catboost":
304
288
  self.model.fit(
305
289
  X_train,
306
- y_train,
290
+ self.y_train,
307
291
  cat_features=self.cat_features,
308
292
  verbose=True,
309
293
  )
@@ -313,16 +297,16 @@ class Geocif:
313
297
  item for item in self.cat_features if item != "Harvest Year"
314
298
  ]
315
299
  )
316
- self.model.fit(X_train, y_train)
300
+ self.model.fit(X_train, self.y_train)
317
301
  elif self.model_name == "ydf":
318
302
  # Combine X_train and y_train
319
- df_train = pd.concat([X_train, y_train], axis=1)
303
+ df_train = pd.concat([X_train, self.y_train], axis=1)
320
304
 
321
305
  self.model = self.model.train(df_train)
322
306
  elif self.model_name == "geospaNN":
323
307
  self.model.fit(
324
308
  X_train,
325
- y_train,
309
+ self.y_train,
326
310
  # callbacks=[TQDMCallback(self.best_hyperparams["iterations"])],
327
311
  )
328
312
  elif self.model_name == "merf":
@@ -334,15 +318,15 @@ class Geocif:
334
318
  X_train,
335
319
  Z_train,
336
320
  clusters_train.astype("object"),
337
- y_train.values,
321
+ self.y_train.values,
338
322
  )
339
323
  elif self.model_name == "linear":
340
- self.model.fit(X_train_scaled, y_train)
324
+ self.model.fit(X_train_scaled, self.y_train)
341
325
  elif self.model_name == "gam":
342
- self.model.fit(X_train_scaled.values, y_train.values)
326
+ self.model.fit(X_train_scaled.values, self.y_train.values)
343
327
  self.best_hyperparams = {}
344
328
  elif self.model_name in ["cubist"]:
345
- self.model.fit(X_train, y_train)
329
+ self.model.fit(X_train, self.y_train)
346
330
  elif self.model_name in [
347
331
  "cumulative_1",
348
332
  "cumulative_2",
@@ -377,7 +361,7 @@ class Geocif:
377
361
  # Combine scaled numeric features and encoded region
378
362
  X_train_scaled = pd.concat([X_scaled_numeric, X_region], axis=1)
379
363
 
380
- self.model.fit(X_train_scaled, y_train)
364
+ self.model.fit(X_train_scaled, self.y_train)
381
365
  except Exception as e:
382
366
  self.logger.error(
383
367
  f"Error fitting model for {self.country} {self.crop} {e}"
@@ -782,6 +766,14 @@ class Geocif:
782
766
  Returns:
783
767
 
784
768
  """
769
+ dir_output = (
770
+ self.dir_analysis
771
+ / self.country
772
+ / self.crop
773
+ / self.model_name
774
+ / str(self.forecast_season)
775
+ )
776
+
785
777
  from sklearn.preprocessing import StandardScaler
786
778
 
787
779
  scaler = StandardScaler() if self.model_name in ["linear", "gam"] else None
@@ -807,8 +799,6 @@ class Geocif:
807
799
  mask_train = self.df_train["Region_ID"] == region
808
800
  mask_test = self.df_test["Region_ID"] == region
809
801
 
810
- num_regions_in_cluster = self.df_train[mask_train]["Region"].unique()
811
-
812
802
  if self.cluster_strategy == "individual":
813
803
  region_name = self.df_train["Region"].unique()[idx]
814
804
  pbar.set_description(f"Fit/Predict for {region_name}")
@@ -836,12 +826,20 @@ class Geocif:
836
826
  if self.last_year_yield_as_feature:
837
827
  common_columns += [f"Last Year {self.target}"]
838
828
 
839
- """ Train """
829
+ """ Feature selection and then Train """
840
830
  # Filter dataframe based on region and self.feature_names
841
831
  df_region_train = self.df_train[mask_train]
842
832
  df_region_train = df_region_train[self.fixed_columns + common_columns]
843
833
  df_region_train.reset_index(drop=True, inplace=True)
844
- self.train(df_region_train, scaler)
834
+ df_region_train = df_region_train.dropna(subset=[self.target_column])
835
+
836
+ self.X_train = df_region_train[self.feature_names]
837
+ # Drop any columns with NaNs
838
+ self.X_train.dropna(axis=1, how="any", inplace=True)
839
+ self.y_train = df_region_train[self.target_column]
840
+ breakpoint()
841
+ self.apply_feature_selector(dir_output)
842
+ self.train_model(df_region_train, scaler, dir_output)
845
843
 
846
844
  """ Predict """
847
845
  if self.check_yield_trend:
@@ -34,10 +34,11 @@ def are_all_features_non_eo(features):
34
34
 
35
35
  def select_features(
36
36
  X, y,
37
- method="RFE",
37
+ method="multi",
38
38
  min_features_to_select=3,
39
39
  threshold_nan=0.2,
40
- threshold_unique=0.6
40
+ threshold_unique=0.6,
41
+ dir_output="."
41
42
  ):
42
43
  """
43
44
  Feature-selection wrapper supporting many methods plus a new 'multi' option.
@@ -75,8 +76,9 @@ def select_features(
75
76
  # --- multi-method ensemble -------------------------------
76
77
  if method == "multi":
77
78
  counter = Counter()
79
+ models = ["BorutaPy", "mrmr"]
78
80
  # run three selectors and count feature picks
79
- for sub_m in ["BorutaPy", "mrmr"]:
81
+ for sub_m in models:
80
82
  _, _, feats = select_features(
81
83
  X_clean, y,
82
84
  method=sub_m,
@@ -84,7 +86,6 @@ def select_features(
84
86
  threshold_nan=threshold_nan,
85
87
  threshold_unique=threshold_unique
86
88
  )
87
- print(sub_m, feats)
88
89
  counter.update(feats)
89
90
 
90
91
  # union of all features
@@ -97,12 +98,11 @@ def select_features(
97
98
  fig = freq.plot(kind="bar", width=0.9).get_figure()
98
99
  plt.title("Feature selection frequency across methods")
99
100
  plt.xlabel("Feature")
100
- plt.ylabel("Times selected (out of 3)")
101
+ plt.ylabel(f"Times selected (out of {len(models)})")
101
102
  plt.tight_layout()
102
103
 
103
- out_dir = Path("feature_selection_multi")
104
- out_dir.mkdir(parents=True, exist_ok=True)
105
- fig.savefig(out_dir / "feature_selection_frequency.png", dpi=300)
104
+ dir_output = dir_output / Path("feature_selection")
105
+ fig.savefig(dir_output / "feature_selection_frequency.png", dpi=300)
106
106
  plt.close(fig)
107
107
 
108
108
  return None, X_out, combined
@@ -268,7 +268,7 @@ def auto_train(
268
268
  loss_function = "MAPE" if model_type == "REGRESSION" else "MultiClass"
269
269
  bootstrap_type = "Bernoulli" if model_type == "CLASSIFICATION" else "MVS"
270
270
  hyperparams = {
271
- "iterations": 2500,
271
+ "iterations": 1500,
272
272
  "learning_rate": 0.025,
273
273
  "depth": 6,
274
274
  "subsample": 1.0,
@@ -278,7 +278,7 @@ def auto_train(
278
278
  "loss_function": loss_function,
279
279
  "early_stopping_rounds": 20,
280
280
  "random_seed": seed,
281
- "verbose": True,
281
+ "verbose": False,
282
282
  }
283
283
 
284
284
  if model_name == "catboost":
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geocif
3
- Version: 0.2.24
3
+ Version: 0.2.26
4
4
  Summary: Models to visualize and forecast crop conditions and yields
5
5
  Home-page: https://ritviksahajpal.github.io/yield_forecasting/
6
6
  Author: Ritvik Sahajpal
@@ -50,6 +50,6 @@ setup(
50
50
  test_suite="tests",
51
51
  tests_require=test_requirements,
52
52
  url="https://ritviksahajpal.github.io/yield_forecasting/",
53
- version="0.2.24",
53
+ version="0.2.26",
54
54
  zip_safe=False,
55
55
  )
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes