geocif 0.1.38__tar.gz → 0.1.40__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. {geocif-0.1.38/geocif.egg-info → geocif-0.1.40}/PKG-INFO +1 -1
  2. {geocif-0.1.38 → geocif-0.1.40}/geocif/experiments.py +15 -11
  3. {geocif-0.1.38 → geocif-0.1.40}/geocif/geocif.py +3 -1
  4. {geocif-0.1.38 → geocif-0.1.40}/geocif/ml/feature_selection.py +47 -18
  5. {geocif-0.1.38 → geocif-0.1.40}/geocif/ml/xai.py +2 -1
  6. {geocif-0.1.38 → geocif-0.1.40/geocif.egg-info}/PKG-INFO +1 -1
  7. {geocif-0.1.38 → geocif-0.1.40}/setup.py +1 -1
  8. {geocif-0.1.38 → geocif-0.1.40}/LICENSE +0 -0
  9. {geocif-0.1.38 → geocif-0.1.40}/MANIFEST.in +0 -0
  10. {geocif-0.1.38 → geocif-0.1.40}/README.md +0 -0
  11. {geocif-0.1.38 → geocif-0.1.40}/geocif/__init__.py +0 -0
  12. {geocif-0.1.38 → geocif-0.1.40}/geocif/agmet/__init__.py +0 -0
  13. {geocif-0.1.38 → geocif-0.1.40}/geocif/agmet/geoagmet.py +0 -0
  14. {geocif-0.1.38 → geocif-0.1.40}/geocif/agmet/plot.py +0 -0
  15. {geocif-0.1.38 → geocif-0.1.40}/geocif/agmet/utils.py +0 -0
  16. {geocif-0.1.38 → geocif-0.1.40}/geocif/analysis.py +0 -0
  17. {geocif-0.1.38 → geocif-0.1.40}/geocif/backup/__init__.py +0 -0
  18. {geocif-0.1.38 → geocif-0.1.40}/geocif/backup/constants.py +0 -0
  19. {geocif-0.1.38 → geocif-0.1.40}/geocif/backup/features.py +0 -0
  20. {geocif-0.1.38 → geocif-0.1.40}/geocif/backup/geo.py +0 -0
  21. {geocif-0.1.38 → geocif-0.1.40}/geocif/backup/geocif.py +0 -0
  22. {geocif-0.1.38 → geocif-0.1.40}/geocif/backup/metadata.py +0 -0
  23. {geocif-0.1.38 → geocif-0.1.40}/geocif/backup/models.py +0 -0
  24. {geocif-0.1.38 → geocif-0.1.40}/geocif/cei/__init__.py +0 -0
  25. {geocif-0.1.38 → geocif-0.1.40}/geocif/cei/definitions.py +0 -0
  26. {geocif-0.1.38 → geocif-0.1.40}/geocif/cei/indices.py +0 -0
  27. {geocif-0.1.38 → geocif-0.1.40}/geocif/indices_runner.py +0 -0
  28. {geocif-0.1.38 → geocif-0.1.40}/geocif/indices_runner_v2.py +0 -0
  29. {geocif-0.1.38 → geocif-0.1.40}/geocif/logger.py +0 -0
  30. {geocif-0.1.38 → geocif-0.1.40}/geocif/ml/__init__.py +0 -0
  31. {geocif-0.1.38 → geocif-0.1.40}/geocif/ml/correlations.py +0 -0
  32. {geocif-0.1.38 → geocif-0.1.40}/geocif/ml/correlations_backup.py +0 -0
  33. {geocif-0.1.38 → geocif-0.1.40}/geocif/ml/embedding.py +0 -0
  34. {geocif-0.1.38 → geocif-0.1.40}/geocif/ml/feature_engineering.py +0 -0
  35. {geocif-0.1.38 → geocif-0.1.40}/geocif/ml/outliers.py +0 -0
  36. {geocif-0.1.38 → geocif-0.1.40}/geocif/ml/outlook.py +0 -0
  37. {geocif-0.1.38 → geocif-0.1.40}/geocif/ml/output.py +0 -0
  38. {geocif-0.1.38 → geocif-0.1.40}/geocif/ml/spatial_autocorrelation.py +0 -0
  39. {geocif-0.1.38 → geocif-0.1.40}/geocif/ml/stages.py +0 -0
  40. {geocif-0.1.38 → geocif-0.1.40}/geocif/ml/stats.py +0 -0
  41. {geocif-0.1.38 → geocif-0.1.40}/geocif/ml/trainers.py +0 -0
  42. {geocif-0.1.38 → geocif-0.1.40}/geocif/ml/trend.py +0 -0
  43. {geocif-0.1.38 → geocif-0.1.40}/geocif/playground/__init__.py +0 -0
  44. {geocif-0.1.38 → geocif-0.1.40}/geocif/playground/automl.py +0 -0
  45. {geocif-0.1.38 → geocif-0.1.40}/geocif/playground/misc.py +0 -0
  46. {geocif-0.1.38 → geocif-0.1.40}/geocif/utils.py +0 -0
  47. {geocif-0.1.38 → geocif-0.1.40}/geocif/viz/__init__.py +0 -0
  48. {geocif-0.1.38 → geocif-0.1.40}/geocif/viz/plot.py +0 -0
  49. {geocif-0.1.38 → geocif-0.1.40}/geocif.egg-info/SOURCES.txt +0 -0
  50. {geocif-0.1.38 → geocif-0.1.40}/geocif.egg-info/dependency_links.txt +0 -0
  51. {geocif-0.1.38 → geocif-0.1.40}/geocif.egg-info/not-zip-safe +0 -0
  52. {geocif-0.1.38 → geocif-0.1.40}/geocif.egg-info/top_level.txt +0 -0
  53. {geocif-0.1.38 → geocif-0.1.40}/requirements.txt +0 -0
  54. {geocif-0.1.38 → geocif-0.1.40}/setup.cfg +0 -0
  55. {geocif-0.1.38 → geocif-0.1.40}/tests/test_geocif.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geocif
3
- Version: 0.1.38
3
+ Version: 0.1.40
4
4
  Summary: Models to visualize and forecast crop conditions and yields
5
5
  Home-page: https://ritviksahajpal.github.io/yield_forecasting/
6
6
  Author: Ritvik Sahajpal
@@ -14,7 +14,11 @@ import warnings
14
14
  warnings.simplefilter(action="ignore", category=FutureWarning)
15
15
 
16
16
 
17
- def run(inputs, logger, parser, section, item, type, values):
17
+ def main(inputs, logger, parser, section, item, type, values):
18
+ # Set experiment_name
19
+ experiment_name = f"{section}_{item}"
20
+ parser.set("DEFAULT", "experiment_name", experiment_name)
21
+
18
22
  if type == "str":
19
23
  original_value = parser.get(section, item)
20
24
  elif type == "bool":
@@ -41,7 +45,7 @@ def run(inputs, logger, parser, section, item, type, values):
41
45
  return parser
42
46
 
43
47
 
44
- def main(path_config_files=[Path("../config/geocif.txt")]):
48
+ def run(path_config_files=[Path("../config/geocif.txt")]):
45
49
  logger, parser = log.setup_logger_parser(path_config_files)
46
50
  inputs = gc.gather_inputs(parser)
47
51
 
@@ -51,13 +55,13 @@ def main(path_config_files=[Path("../config/geocif.txt")]):
51
55
 
52
56
  # Experiment 1: include_lat_lon
53
57
  logger.info("Experiment 1: include_lat_lon")
54
- parser = run(
58
+ parser = main(
55
59
  inputs, logger, parser, "ML", "include_lat_lon", "bool", [True, False]
56
60
  )
57
61
 
58
62
  # Experiment 2: feature_selection
59
63
  logger.info("Experiment 2: feature_selection")
60
- parser = run(
64
+ parser = main(
61
65
  inputs,
62
66
  logger,
63
67
  parser,
@@ -69,13 +73,13 @@ def main(path_config_files=[Path("../config/geocif.txt")]):
69
73
 
70
74
  # Experiment 3: lag_years
71
75
  logger.info("Experiment 3: lag_years")
72
- parser = run(
76
+ parser = main(
73
77
  inputs, logger, parser, "ML", "lag_years", "int", [1, 2, 3, 4, 5]
74
78
  )
75
79
 
76
80
  # Experiment 4: lag_yield_as_feature
77
81
  logger.info("Experiment 4: lag_yield_as_feature")
78
- parser = run(
82
+ parser = main(
79
83
  inputs,
80
84
  logger,
81
85
  parser,
@@ -87,13 +91,13 @@ def main(path_config_files=[Path("../config/geocif.txt")]):
87
91
 
88
92
  # Experiment 5: median_years
89
93
  logger.info("Experiment 5: median_years")
90
- parser = run(
94
+ parser = main(
91
95
  inputs, logger, parser, "ML", "median_years", "int", [2, 3, 4, 5]
92
96
  )
93
97
 
94
98
  # Experiment 6: median_yield_as_feature
95
99
  logger.info("Experiment 6: median_yield_as_feature")
96
- parser = run(
100
+ parser = main(
97
101
  inputs,
98
102
  logger,
99
103
  parser,
@@ -105,7 +109,7 @@ def main(path_config_files=[Path("../config/geocif.txt")]):
105
109
 
106
110
  # Experiment 7: analogous_year_yield_as_feature
107
111
  logger.info("Experiment 7: analogous_year_yield_as_feature")
108
- parser = run(
112
+ parser = main(
109
113
  inputs,
110
114
  logger,
111
115
  parser,
@@ -117,10 +121,10 @@ def main(path_config_files=[Path("../config/geocif.txt")]):
117
121
 
118
122
  # Experiment 8: optimize
119
123
  logger.info("Experiment 8: optimize")
120
- parser = run(
124
+ parser = main(
121
125
  inputs, logger, parser, "DEFAULT", "optimize", "bool", [True, False]
122
126
  )
123
127
 
124
128
 
125
129
  if __name__ == "__main__":
126
- main()
130
+ run()
@@ -535,7 +535,7 @@ class Geocif:
535
535
  cei = parts[0] if parts[1].isdigit() else "_".join(parts[:2])
536
536
 
537
537
  # Check if any element of dict_selected_features is in _t
538
- for x in selected_features:
538
+ for x in selected_features["CEI"].values:
539
539
  if x not in cei:
540
540
  continue
541
541
 
@@ -644,6 +644,7 @@ class Geocif:
644
644
  kwargs = {
645
645
  "cluster_strategy": self.cluster_strategy,
646
646
  "model": self.model,
647
+ "model_name": self.model_name,
647
648
  "forecast_season": self.forecast_season,
648
649
  "crop": self.crop,
649
650
  "country": self.country,
@@ -659,6 +660,7 @@ class Geocif:
659
660
  model = self.model.estimator_
660
661
  else:
661
662
  model = self.model
663
+
662
664
  output.store(self.db_path, experiment_id, df, model, self.model_name)
663
665
 
664
666
  def get_cei_column_names(self, df):
@@ -3,6 +3,31 @@ from tqdm import tqdm
3
3
  from sklearn.ensemble import RandomForestRegressor
4
4
 
5
5
 
6
+ def are_all_features_non_eo(features):
7
+ """
8
+ Check if all the features non eo features
9
+
10
+ Args:
11
+ feature:
12
+
13
+ Returns:
14
+
15
+ """
16
+ non_eo_features = ['Median Yield (tn per ha)',
17
+ 'Analogous Year',
18
+ 'Analogous Year Yield',
19
+ 'lon',
20
+ 'lat',
21
+ 't -1 Yield (tn per ha)',
22
+ 't -2 Yield (tn per ha)',
23
+ 't -3 Yield (tn per ha)',
24
+ 't -4 Yield (tn per ha)',
25
+ 't -5 Yield (tn per ha)']
26
+
27
+ # Check if all features are non-eo features, return True if they are
28
+ return all(feature in non_eo_features for feature in features)
29
+
30
+
6
31
  def select_features(X, y, method="RFE", min_features_to_select=3):
7
32
  """
8
33
 
@@ -29,6 +54,7 @@ def select_features(X, y, method="RFE", min_features_to_select=3):
29
54
  # selector = VarianceThreshold(threshold=scaled_data.var().mean())
30
55
  # X = selector.fit_transform(scaled_data)
31
56
  selector = None
57
+ X_original = X.copy()
32
58
 
33
59
  # Fill in columns with median of that column
34
60
  X = X.fillna(X.median())
@@ -46,24 +72,7 @@ def select_features(X, y, method="RFE", min_features_to_select=3):
46
72
  np.float = np.float64
47
73
  np.bool = np.bool_
48
74
 
49
- if method == "SelectKBest":
50
- from sklearn.feature_selection import SelectKBest, f_regression
51
-
52
- k = 15 # Number of features to select
53
- selector = SelectKBest(score_func=f_regression, k=k)
54
-
55
- # Fit the selector to the data and transform the data to select the best features
56
- try:
57
- X_new = selector.fit_transform(X, y)
58
- except:
59
- breakpoint()
60
-
61
- # Get the selected feature indices
62
- selected_features = selector.get_support(indices=True)
63
-
64
- # Get the selected feature names
65
- selected_features = X.columns[selected_features].tolist()
66
- elif method == "SHAP":
75
+ if method == "SHAP":
67
76
  import pandas as pd
68
77
  from catboost import CatBoostRegressor
69
78
  from fasttreeshap import TreeExplainer as FastTreeExplainer
@@ -257,6 +266,26 @@ def select_features(X, y, method="RFE", min_features_to_select=3):
257
266
  raise ValueError("Method not recognized. Use BorutaPy, Genetic, or RFE")
258
267
  # tentative_features = X.columns[selector.support_weak_].tolist()
259
268
 
269
+ non_eo = are_all_features_non_eo(selected_features)
270
+ if non_eo or method == "SelectKBest":
271
+ from sklearn.feature_selection import SelectKBest, f_regression
272
+
273
+ k = 15 # Number of features to select
274
+ selector = SelectKBest(score_func=f_regression, k=k)
275
+
276
+ # Fit the selector to the data and transform the data to select the best features
277
+ try:
278
+ X_new = selector.fit_transform(X, y)
279
+ except:
280
+ breakpoint()
281
+
282
+ # Get the selected feature indices
283
+ selected_features = selector.get_support(indices=True)
284
+
285
+ # Get the selected feature names
286
+ selected_features = X.columns[selected_features].tolist()
287
+
288
+ print(selected_features)
260
289
  # Filter the dataset for selected features
261
290
  X_filtered = X.loc[:, selected_features]
262
291
 
@@ -8,6 +8,7 @@ from tqdm import tqdm
8
8
  def explain(df_train, df_test, **kwargs):
9
9
  cluster_strategy = kwargs.get("cluster_strategy", "auto_detect")
10
10
  model = kwargs.get("model")
11
+ model_name = kwargs.get("model_name")
11
12
  forecast_season = kwargs.get("forecast_season")
12
13
  crop = kwargs.get("crop")
13
14
  country = kwargs.get("country")
@@ -48,7 +49,7 @@ def explain(df_train, df_test, **kwargs):
48
49
  plt.tight_layout()
49
50
 
50
51
  fname = f"beeswarm_{region_name}_{forecast_season}.png"
51
- out_dir = analysis_dir / country / crop / str(forecast_season)
52
+ out_dir = analysis_dir / country / crop / model_name / str(forecast_season)
52
53
  os.makedirs(out_dir, exist_ok=True)
53
54
  plt.savefig(out_dir / fname, dpi=250)
54
55
  plt.close()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geocif
3
- Version: 0.1.38
3
+ Version: 0.1.40
4
4
  Summary: Models to visualize and forecast crop conditions and yields
5
5
  Home-page: https://ritviksahajpal.github.io/yield_forecasting/
6
6
  Author: Ritvik Sahajpal
@@ -50,6 +50,6 @@ setup(
50
50
  test_suite="tests",
51
51
  tests_require=test_requirements,
52
52
  url="https://ritviksahajpal.github.io/yield_forecasting/",
53
- version="0.1.38",
53
+ version="0.1.40",
54
54
  zip_safe=False,
55
55
  )
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes