geocif 0.2.59__tar.gz → 0.2.60__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. {geocif-0.2.59/geocif.egg-info → geocif-0.2.60}/PKG-INFO +1 -1
  2. {geocif-0.2.59 → geocif-0.2.60}/geocif/geocif.py +141 -125
  3. {geocif-0.2.59 → geocif-0.2.60}/geocif/ml/trainers.py +15 -1
  4. {geocif-0.2.59 → geocif-0.2.60/geocif.egg-info}/PKG-INFO +1 -1
  5. {geocif-0.2.59 → geocif-0.2.60}/setup.py +1 -1
  6. {geocif-0.2.59 → geocif-0.2.60}/LICENSE +0 -0
  7. {geocif-0.2.59 → geocif-0.2.60}/MANIFEST.in +0 -0
  8. {geocif-0.2.59 → geocif-0.2.60}/README.md +0 -0
  9. {geocif-0.2.59 → geocif-0.2.60}/geocif/__init__.py +0 -0
  10. {geocif-0.2.59 → geocif-0.2.60}/geocif/agmet/__init__.py +0 -0
  11. {geocif-0.2.59 → geocif-0.2.60}/geocif/agmet/geoagmet.py +0 -0
  12. {geocif-0.2.59 → geocif-0.2.60}/geocif/agmet/plot.py +0 -0
  13. {geocif-0.2.59 → geocif-0.2.60}/geocif/agmet/utils.py +0 -0
  14. {geocif-0.2.59 → geocif-0.2.60}/geocif/analysis.py +0 -0
  15. {geocif-0.2.59 → geocif-0.2.60}/geocif/backup/__init__.py +0 -0
  16. {geocif-0.2.59 → geocif-0.2.60}/geocif/backup/constants.py +0 -0
  17. {geocif-0.2.59 → geocif-0.2.60}/geocif/backup/features.py +0 -0
  18. {geocif-0.2.59 → geocif-0.2.60}/geocif/backup/geo.py +0 -0
  19. {geocif-0.2.59 → geocif-0.2.60}/geocif/backup/geocif.py +0 -0
  20. {geocif-0.2.59 → geocif-0.2.60}/geocif/backup/metadata.py +0 -0
  21. {geocif-0.2.59 → geocif-0.2.60}/geocif/backup/models.py +0 -0
  22. {geocif-0.2.59 → geocif-0.2.60}/geocif/cei/__init__.py +0 -0
  23. {geocif-0.2.59 → geocif-0.2.60}/geocif/cei/definitions.py +0 -0
  24. {geocif-0.2.59 → geocif-0.2.60}/geocif/cei/indices.py +0 -0
  25. {geocif-0.2.59 → geocif-0.2.60}/geocif/experiments.py +0 -0
  26. {geocif-0.2.59 → geocif-0.2.60}/geocif/geocif_runner.py +0 -0
  27. {geocif-0.2.59 → geocif-0.2.60}/geocif/indices_runner.py +0 -0
  28. {geocif-0.2.59 → geocif-0.2.60}/geocif/indices_runner_algeria.py +0 -0
  29. {geocif-0.2.59 → geocif-0.2.60}/geocif/indices_runner_angola.py +0 -0
  30. {geocif-0.2.59 → geocif-0.2.60}/geocif/indices_runner_madagascar.py +0 -0
  31. {geocif-0.2.59 → geocif-0.2.60}/geocif/indices_runner_malawi.py +0 -0
  32. {geocif-0.2.59 → geocif-0.2.60}/geocif/indices_runner_mozambique.py +0 -0
  33. {geocif-0.2.59 → geocif-0.2.60}/geocif/indices_runner_south_africa.py +0 -0
  34. {geocif-0.2.59 → geocif-0.2.60}/geocif/indices_runner_zambia.py +0 -0
  35. {geocif-0.2.59 → geocif-0.2.60}/geocif/indices_runner_zimbabwe.py +0 -0
  36. {geocif-0.2.59 → geocif-0.2.60}/geocif/logger.py +0 -0
  37. {geocif-0.2.59 → geocif-0.2.60}/geocif/ml/__init__.py +0 -0
  38. {geocif-0.2.59 → geocif-0.2.60}/geocif/ml/correlations.py +0 -0
  39. {geocif-0.2.59 → geocif-0.2.60}/geocif/ml/embedding.py +0 -0
  40. {geocif-0.2.59 → geocif-0.2.60}/geocif/ml/feature_engineering.py +0 -0
  41. {geocif-0.2.59 → geocif-0.2.60}/geocif/ml/feature_selection.py +0 -0
  42. {geocif-0.2.59 → geocif-0.2.60}/geocif/ml/outliers.py +0 -0
  43. {geocif-0.2.59 → geocif-0.2.60}/geocif/ml/outlook.py +0 -0
  44. {geocif-0.2.59 → geocif-0.2.60}/geocif/ml/output.py +0 -0
  45. {geocif-0.2.59 → geocif-0.2.60}/geocif/ml/spatial_autocorrelation.py +0 -0
  46. {geocif-0.2.59 → geocif-0.2.60}/geocif/ml/stages.py +0 -0
  47. {geocif-0.2.59 → geocif-0.2.60}/geocif/ml/stats.py +0 -0
  48. {geocif-0.2.59 → geocif-0.2.60}/geocif/ml/trend.py +0 -0
  49. {geocif-0.2.59 → geocif-0.2.60}/geocif/ml/xai.py +0 -0
  50. {geocif-0.2.59 → geocif-0.2.60}/geocif/mm.py +0 -0
  51. {geocif-0.2.59 → geocif-0.2.60}/geocif/playground/__init__.py +0 -0
  52. {geocif-0.2.59 → geocif-0.2.60}/geocif/playground/aa.py +0 -0
  53. {geocif-0.2.59 → geocif-0.2.60}/geocif/playground/aaaa.py +0 -0
  54. {geocif-0.2.59 → geocif-0.2.60}/geocif/playground/area.py +0 -0
  55. {geocif-0.2.59 → geocif-0.2.60}/geocif/playground/automl.py +0 -0
  56. {geocif-0.2.59 → geocif-0.2.60}/geocif/playground/download_esi.py +0 -0
  57. {geocif-0.2.59 → geocif-0.2.60}/geocif/playground/enso.py +0 -0
  58. {geocif-0.2.59 → geocif-0.2.60}/geocif/playground/eval.py +0 -0
  59. {geocif-0.2.59 → geocif-0.2.60}/geocif/playground/gamtest.py +0 -0
  60. {geocif-0.2.59 → geocif-0.2.60}/geocif/playground/gee_access.py +0 -0
  61. {geocif-0.2.59 → geocif-0.2.60}/geocif/playground/misc.py +0 -0
  62. {geocif-0.2.59 → geocif-0.2.60}/geocif/playground/play_xagg.py +0 -0
  63. {geocif-0.2.59 → geocif-0.2.60}/geocif/playground/reg.py +0 -0
  64. {geocif-0.2.59 → geocif-0.2.60}/geocif/playground/sustain.py +0 -0
  65. {geocif-0.2.59 → geocif-0.2.60}/geocif/playground/test_catboost.py +0 -0
  66. {geocif-0.2.59 → geocif-0.2.60}/geocif/playground/tmp.py +0 -0
  67. {geocif-0.2.59 → geocif-0.2.60}/geocif/playground/tmp2.py +0 -0
  68. {geocif-0.2.59 → geocif-0.2.60}/geocif/playground/tmp3.py +0 -0
  69. {geocif-0.2.59 → geocif-0.2.60}/geocif/playground/tmp4.py +0 -0
  70. {geocif-0.2.59 → geocif-0.2.60}/geocif/playground/tmp5.py +0 -0
  71. {geocif-0.2.59 → geocif-0.2.60}/geocif/playground/wolayita.py +0 -0
  72. {geocif-0.2.59 → geocif-0.2.60}/geocif/playground/wolayita_maize_mask.py +0 -0
  73. {geocif-0.2.59 → geocif-0.2.60}/geocif/playground/wolayita_v2.py +0 -0
  74. {geocif-0.2.59 → geocif-0.2.60}/geocif/playground/wolayita_v3.py +0 -0
  75. {geocif-0.2.59 → geocif-0.2.60}/geocif/risk/__init__.py +0 -0
  76. {geocif-0.2.59 → geocif-0.2.60}/geocif/risk/impact_assessment.py +0 -0
  77. {geocif-0.2.59 → geocif-0.2.60}/geocif/utils.py +0 -0
  78. {geocif-0.2.59 → geocif-0.2.60}/geocif/viz/__init__.py +0 -0
  79. {geocif-0.2.59 → geocif-0.2.60}/geocif/viz/gt.py +0 -0
  80. {geocif-0.2.59 → geocif-0.2.60}/geocif/viz/plot.py +0 -0
  81. {geocif-0.2.59 → geocif-0.2.60}/geocif/viz/tmp.py +0 -0
  82. {geocif-0.2.59 → geocif-0.2.60}/geocif/viz/viz_ml.py +0 -0
  83. {geocif-0.2.59 → geocif-0.2.60}/geocif.egg-info/SOURCES.txt +0 -0
  84. {geocif-0.2.59 → geocif-0.2.60}/geocif.egg-info/dependency_links.txt +0 -0
  85. {geocif-0.2.59 → geocif-0.2.60}/geocif.egg-info/not-zip-safe +0 -0
  86. {geocif-0.2.59 → geocif-0.2.60}/geocif.egg-info/top_level.txt +0 -0
  87. {geocif-0.2.59 → geocif-0.2.60}/requirements.txt +0 -0
  88. {geocif-0.2.59 → geocif-0.2.60}/setup.cfg +0 -0
  89. {geocif-0.2.59 → geocif-0.2.60}/tests/test_geocif.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geocif
3
- Version: 0.2.59
3
+ Version: 0.2.60
4
4
  Summary: Models to visualize and forecast crop conditions and yields
5
5
  Home-page: https://ritviksahajpal.github.io/yield_forecasting/
6
6
  Author: Ritvik Sahajpal
@@ -249,140 +249,155 @@ class Geocif:
249
249
  Returns:
250
250
 
251
251
  """
252
- if self.ml_model:
253
- X_train = df_region[self.selected_features + self.cat_features]
252
+ X_train = df_region[self.selected_features + self.cat_features]
253
+
254
+ # Drop columns in X_train that have any NaNs, log the number of columns dropped
255
+ initial_columns = X_train.shape[1]
256
+ X_train = X_train.dropna(axis=1, how="any")
257
+ dropped_columns = initial_columns - X_train.shape[1]
258
+ if dropped_columns > 0:
259
+ self.logger.info(
260
+ f"Dropped {dropped_columns} columns with NaNs from X_train for {self.country} {self.crop}"
261
+ )
262
+ # Reset index of X_train
263
+ X_train.reset_index(drop=True, inplace=True)
264
+
265
+ region_id = df_region["Region_ID"].unique()[0]
266
+ X_train.to_csv(dir_output / f"X_train_{region_id}.csv", index=False)
267
+ if scaler:
268
+ X_train_nocat = X_train.drop(
269
+ columns=[
270
+ item for item in self.cat_features if item != "Harvest Year"
271
+ ]
272
+ )
273
+ X_train_scaled = scaler.fit_transform(X_train_nocat)
274
+ else:
275
+ X_train_scaled = X_train
276
+
277
+ """ Train model """
278
+ self.best_hyperparams, self.model = trainers.auto_train(
279
+ self.cluster_strategy,
280
+ self.model_name,
281
+ self.model_type,
282
+ False,
283
+ "Harvest Year",
284
+ df_region[self.selected_features + self.cat_features + [self.target]],
285
+ X_train_scaled,
286
+ self.y_train,
287
+ feature_names=self.selected_features,
288
+ target_col=self.target_column,
289
+ optimize=self.optimize,
290
+ fraction_loocv=self.fraction_loocv,
291
+ cat_features=self.cat_features,
292
+ )
293
+
294
+ """ Estimate CI only if flag is True """
295
+ if self.estimate_ci:
296
+ if self.estimate_ci_for_all or self.forecast_season == self.today_year:
297
+ self.model = trainers.estimate_ci(
298
+ self.model_type, self.model_name, self.model
299
+ )
254
300
 
255
- region_id = df_region["Region_ID"].unique()[0]
256
- X_train.to_csv(dir_output / f"X_train_{region_id}.csv", index=False)
257
- if scaler:
258
- X_train_nocat = X_train.drop(
301
+ try:
302
+ if self.model_name == "catboost":
303
+ self.model.fit(
304
+ X_train,
305
+ self.y_train,
306
+ cat_features=self.cat_features,
307
+ verbose=False,
308
+ )
309
+ elif self.model_name in ["tabpfn"]:
310
+ # Identify the column indices for cat_features in X_train
311
+ if self.cat_features is None:
312
+ cat_feature_indices = []
313
+ cat_feature_indices = [X_train.columns.get_loc(col) for col in self.cat_features if
314
+ col in X_train.columns]
315
+
316
+ self.model.fit(X_train, self.y_train, categorical_feature_indices=cat_feature_indices)
317
+ elif self.model_name in ["ngboost", "oblique"]:
318
+ X_train = X_train.drop(
259
319
  columns=[
260
320
  item for item in self.cat_features if item != "Harvest Year"
261
321
  ]
262
322
  )
263
- X_train_scaled = scaler.fit_transform(X_train_nocat)
264
- else:
265
- X_train_scaled = X_train
266
-
267
- """ Train model """
268
- self.best_hyperparams, self.model = trainers.auto_train(
269
- self.cluster_strategy,
270
- self.model_name,
271
- self.model_type,
272
- False,
273
- "Harvest Year",
274
- df_region[self.selected_features + self.cat_features + [self.target]],
275
- X_train_scaled,
276
- self.y_train,
277
- feature_names=self.selected_features,
278
- target_col=self.target_column,
279
- optimize=self.optimize,
280
- fraction_loocv=self.fraction_loocv,
281
- cat_features=self.cat_features,
282
- )
283
323
 
284
- """ Estimate CI only if flag is True """
285
- if self.estimate_ci:
286
- if self.estimate_ci_for_all or self.forecast_season == self.today_year:
287
- self.model = trainers.estimate_ci(
288
- self.model_type, self.model_name, self.model
289
- )
290
-
291
- try:
292
- if self.model_name == "catboost":
293
- self.model.fit(
294
- X_train,
295
- self.y_train,
296
- cat_features=self.cat_features,
297
- verbose=False,
298
- )
299
- elif self.model_name in ["tabpfn"]:
300
- # Identify the column indices for cat_features in X_train
301
- if self.cat_features is None:
302
- cat_feature_indices = []
303
- cat_feature_indices = [X_train.columns.get_loc(col) for col in self.cat_features if
304
- col in X_train.columns]
305
-
306
- self.model.fit(X_train, self.y_train, categorical_feature_indices=cat_feature_indices)
307
- elif self.model_name in ["ngboost", "oblique"]:
308
- X_train = X_train.drop(
309
- columns=[
310
- item for item in self.cat_features if item != "Harvest Year"
311
- ]
312
- )
324
+ self.model.fit(X_train, self.y_train)
325
+ elif self.model_name == "ydf":
326
+ # Combine X_train and y_train
327
+ df_train = pd.concat([X_train, self.y_train], axis=1)
313
328
 
314
- self.model.fit(X_train, self.y_train)
315
- elif self.model_name == "ydf":
316
- # Combine X_train and y_train
317
- df_train = pd.concat([X_train, self.y_train], axis=1)
318
-
319
- self.model = self.model.train(df_train)
320
- elif self.model_name == "geospaNN":
321
- self.model.fit(
322
- X_train,
323
- self.y_train,
324
- # callbacks=[TQDMCallback(self.best_hyperparams["iterations"])],
325
- )
326
- elif self.model_name == "merf":
327
- Z_train = np.ones((len(X_train), 1))
328
- clusters_train = df_region["Region"]
329
- clusters_train.reset_index(drop=True, inplace=True)
330
-
331
- self.model.fit(
332
- X_train,
333
- Z_train,
334
- clusters_train.astype("object"),
335
- self.y_train.values,
336
- )
337
- elif self.model_name == "linear":
338
- self.model.fit(X_train_scaled, self.y_train)
339
- elif self.model_name == "gam":
340
- self.model.fit(X_train_scaled, self.y_train.values)
341
- self.best_hyperparams = {}
342
- elif self.model_name in ["cubist"]:
343
- self.model.fit(X_train, self.y_train)
344
- elif self.model_name in [
345
- "cumulative_1",
346
- "cumulative_2",
347
- "cumulative_3",
348
- ]:
349
- from sklearn.preprocessing import StandardScaler, LabelEncoder
350
-
351
- if self.model_name == "cumulative_1":
352
- num_columns = 1
353
- elif self.model_name == "cumulative_2":
354
- num_columns = 2
355
- elif self.model_name == "cumulative_3":
356
- num_columns = 3
357
-
358
- # Standardize the numeric features
359
- scaler = StandardScaler()
360
- X_numeric = X_train.iloc[:, :num_columns]
361
- X_scaled_numeric = pd.DataFrame(
362
- scaler.fit_transform(X_numeric),
363
- columns=X_numeric.columns,
364
- index=X_train.index,
365
- )
329
+ self.model = self.model.train(df_train)
330
+ elif self.model_name == "geospaNN":
331
+ self.model.fit(
332
+ X_train,
333
+ self.y_train,
334
+ # callbacks=[TQDMCallback(self.best_hyperparams["iterations"])],
335
+ )
336
+ elif self.model_name == "merf":
337
+ Z_train = np.ones((len(X_train), 1))
338
+ clusters_train = df_region["Region"]
339
+ clusters_train.reset_index(drop=True, inplace=True)
340
+
341
+ self.model.fit(
342
+ X_train,
343
+ Z_train,
344
+ clusters_train.astype("object"),
345
+ self.y_train.values,
346
+ )
347
+ elif self.model_name == "linear":
348
+ self.model.fit(X_train_scaled, self.y_train)
349
+ elif self.model_name == "gam":
350
+ self.model.fit(X_train_scaled, self.y_train.values)
351
+ self.best_hyperparams = {}
352
+ elif self.model_name in ["cubist"]:
353
+ self.model.fit(X_train, self.y_train)
354
+ elif self.model_name in [
355
+ "cumulative_1",
356
+ "cumulative_2",
357
+ "cumulative_3",
358
+ ]:
359
+ from sklearn.preprocessing import StandardScaler, LabelEncoder
366
360
 
367
- # Encode the Region as categorical
368
- le = LabelEncoder()
369
- X_region = pd.Series(
370
- le.fit_transform(X_train["Region"]),
371
- name="Region",
372
- index=X_train.index,
373
- )
361
+ if self.model_name == "cumulative_1":
362
+ num_columns = 1
363
+ elif self.model_name == "cumulative_2":
364
+ num_columns = 2
365
+ elif self.model_name == "cumulative_3":
366
+ num_columns = 3
374
367
 
375
- # Combine scaled numeric features and encoded region
376
- X_train_scaled = pd.concat([X_scaled_numeric, X_region], axis=1)
368
+ # Standardize the numeric features
369
+ scaler = StandardScaler()
370
+ X_numeric = X_train.iloc[:, :num_columns]
371
+ X_scaled_numeric = pd.DataFrame(
372
+ scaler.fit_transform(X_numeric),
373
+ columns=X_numeric.columns,
374
+ index=X_train.index,
375
+ )
377
376
 
378
- self.model.fit(X_train_scaled, self.y_train)
379
- elif self.model_name in ["desreg"]:
380
- self.model.fit(X_train, self.y_train)
381
- except Exception as e:
382
- self.logger.error(
383
- f"Error fitting model for {self.country} {self.crop} {e}"
377
+ # Encode the Region as categorical
378
+ le = LabelEncoder()
379
+ X_region = pd.Series(
380
+ le.fit_transform(X_train["Region"]),
381
+ name="Region",
382
+ index=X_train.index,
384
383
  )
385
384
 
385
+ # Combine scaled numeric features and encoded region
386
+ X_train_scaled = pd.concat([X_scaled_numeric, X_region], axis=1)
387
+
388
+ self.model.fit(X_train_scaled, self.y_train)
389
+ elif self.model_name in ["desreg"]:
390
+ # Convert any string columns to categorical
391
+
392
+ # Fit the model
393
+ breakpoint()
394
+ self.model.fit(X_train, self.y_train)
395
+ except Exception as e:
396
+ self.logger.error(
397
+ f"Error fitting model for {self.country} {self.crop} {e}"
398
+ )
399
+ breakpoint()
400
+
386
401
  def predict(self, df_region, scaler=None):
387
402
  """
388
403
  Predict yield for the current stage
@@ -864,7 +879,8 @@ class Geocif:
864
879
  elif self.cluster_strategy in ["auto_detect", "single"]:
865
880
  pbar.set_description(f"Fit/Predict for group {idx + 1}")
866
881
  pbar.update()
867
- self.train_model(df_region_train, dir_output, scaler)
882
+ if self.ml_model:
883
+ self.train_model(df_region_train, dir_output, scaler)
868
884
 
869
885
  """ Predict """
870
886
  if self.check_yield_trend:
@@ -1057,9 +1073,9 @@ class Geocif:
1057
1073
  df, self.all_seasons_with_yield, self.number_median_years, self.target
1058
1074
  )
1059
1075
 
1060
- df = fe.compute_user_median_statistics(df, range(2018, 2023))
1076
+ df = fe.compute_user_median_statistics(df, range(2018, 2023), self.target)
1061
1077
 
1062
- df = fe.compute_user_median_statistics(df, range(2013, 2018))
1078
+ df = fe.compute_user_median_statistics(df, range(2013, 2018), self.target)
1063
1079
 
1064
1080
  if self.median_area_as_feature:
1065
1081
  df = fe.compute_median_statistics(
@@ -329,7 +329,21 @@ def auto_train(
329
329
  model_tabpfn = AutoTabPFNRegressor(max_time=600,
330
330
  # categorical_feature_indices=cat_feature_indices,
331
331
  ignore_pretraining_limits=True)
332
- model = DESRegression(regressors_list=[model_catboost, model_tabpfn])
332
+
333
+ import ydf
334
+ templates = ydf.GradientBoostedTreesLearner.hyperparameter_templates()
335
+ task = ydf.Task.REGRESSION if model_type == "REGRESSION" else ydf.Task.CLASSIFICATION
336
+ model_ydf = ydf.GradientBoostedTreesLearner(
337
+ label=target_col, task=task,
338
+ growing_strategy='BEST_FIRST_GLOBAL',
339
+ categorical_algorithm='RANDOM',
340
+ split_axis='SPARSE_OBLIQUE',
341
+ sparse_oblique_normalization='MIN_MAX',
342
+ sparse_oblique_num_projections_exponent=2.0
343
+ )
344
+ hyperparams = templates["benchmark_rank1v1"]
345
+
346
+ model = DESRegression(regressors_list=[model_catboost, model_ydf])
333
347
  elif model_name == "ngboost":
334
348
  if model_type == "REGRESSION":
335
349
  from ngboost import NGBRegressor
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geocif
3
- Version: 0.2.59
3
+ Version: 0.2.60
4
4
  Summary: Models to visualize and forecast crop conditions and yields
5
5
  Home-page: https://ritviksahajpal.github.io/yield_forecasting/
6
6
  Author: Ritvik Sahajpal
@@ -50,6 +50,6 @@ setup(
50
50
  test_suite="tests",
51
51
  tests_require=test_requirements,
52
52
  url="https://ritviksahajpal.github.io/yield_forecasting/",
53
- version="0.2.59",
53
+ version="0.2.60",
54
54
  zip_safe=False,
55
55
  )
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes