geocif 0.1.38__tar.gz → 0.1.39__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. {geocif-0.1.38/geocif.egg-info → geocif-0.1.39}/PKG-INFO +1 -1
  2. {geocif-0.1.38 → geocif-0.1.39}/geocif/experiments.py +11 -11
  3. {geocif-0.1.38 → geocif-0.1.39}/geocif/geocif.py +1 -1
  4. {geocif-0.1.38 → geocif-0.1.39}/geocif/ml/feature_selection.py +47 -18
  5. {geocif-0.1.38 → geocif-0.1.39/geocif.egg-info}/PKG-INFO +1 -1
  6. {geocif-0.1.38 → geocif-0.1.39}/setup.py +1 -1
  7. {geocif-0.1.38 → geocif-0.1.39}/LICENSE +0 -0
  8. {geocif-0.1.38 → geocif-0.1.39}/MANIFEST.in +0 -0
  9. {geocif-0.1.38 → geocif-0.1.39}/README.md +0 -0
  10. {geocif-0.1.38 → geocif-0.1.39}/geocif/__init__.py +0 -0
  11. {geocif-0.1.38 → geocif-0.1.39}/geocif/agmet/__init__.py +0 -0
  12. {geocif-0.1.38 → geocif-0.1.39}/geocif/agmet/geoagmet.py +0 -0
  13. {geocif-0.1.38 → geocif-0.1.39}/geocif/agmet/plot.py +0 -0
  14. {geocif-0.1.38 → geocif-0.1.39}/geocif/agmet/utils.py +0 -0
  15. {geocif-0.1.38 → geocif-0.1.39}/geocif/analysis.py +0 -0
  16. {geocif-0.1.38 → geocif-0.1.39}/geocif/backup/__init__.py +0 -0
  17. {geocif-0.1.38 → geocif-0.1.39}/geocif/backup/constants.py +0 -0
  18. {geocif-0.1.38 → geocif-0.1.39}/geocif/backup/features.py +0 -0
  19. {geocif-0.1.38 → geocif-0.1.39}/geocif/backup/geo.py +0 -0
  20. {geocif-0.1.38 → geocif-0.1.39}/geocif/backup/geocif.py +0 -0
  21. {geocif-0.1.38 → geocif-0.1.39}/geocif/backup/metadata.py +0 -0
  22. {geocif-0.1.38 → geocif-0.1.39}/geocif/backup/models.py +0 -0
  23. {geocif-0.1.38 → geocif-0.1.39}/geocif/cei/__init__.py +0 -0
  24. {geocif-0.1.38 → geocif-0.1.39}/geocif/cei/definitions.py +0 -0
  25. {geocif-0.1.38 → geocif-0.1.39}/geocif/cei/indices.py +0 -0
  26. {geocif-0.1.38 → geocif-0.1.39}/geocif/indices_runner.py +0 -0
  27. {geocif-0.1.38 → geocif-0.1.39}/geocif/indices_runner_v2.py +0 -0
  28. {geocif-0.1.38 → geocif-0.1.39}/geocif/logger.py +0 -0
  29. {geocif-0.1.38 → geocif-0.1.39}/geocif/ml/__init__.py +0 -0
  30. {geocif-0.1.38 → geocif-0.1.39}/geocif/ml/correlations.py +0 -0
  31. {geocif-0.1.38 → geocif-0.1.39}/geocif/ml/correlations_backup.py +0 -0
  32. {geocif-0.1.38 → geocif-0.1.39}/geocif/ml/embedding.py +0 -0
  33. {geocif-0.1.38 → geocif-0.1.39}/geocif/ml/feature_engineering.py +0 -0
  34. {geocif-0.1.38 → geocif-0.1.39}/geocif/ml/outliers.py +0 -0
  35. {geocif-0.1.38 → geocif-0.1.39}/geocif/ml/outlook.py +0 -0
  36. {geocif-0.1.38 → geocif-0.1.39}/geocif/ml/output.py +0 -0
  37. {geocif-0.1.38 → geocif-0.1.39}/geocif/ml/spatial_autocorrelation.py +0 -0
  38. {geocif-0.1.38 → geocif-0.1.39}/geocif/ml/stages.py +0 -0
  39. {geocif-0.1.38 → geocif-0.1.39}/geocif/ml/stats.py +0 -0
  40. {geocif-0.1.38 → geocif-0.1.39}/geocif/ml/trainers.py +0 -0
  41. {geocif-0.1.38 → geocif-0.1.39}/geocif/ml/trend.py +0 -0
  42. {geocif-0.1.38 → geocif-0.1.39}/geocif/ml/xai.py +0 -0
  43. {geocif-0.1.38 → geocif-0.1.39}/geocif/playground/__init__.py +0 -0
  44. {geocif-0.1.38 → geocif-0.1.39}/geocif/playground/automl.py +0 -0
  45. {geocif-0.1.38 → geocif-0.1.39}/geocif/playground/misc.py +0 -0
  46. {geocif-0.1.38 → geocif-0.1.39}/geocif/utils.py +0 -0
  47. {geocif-0.1.38 → geocif-0.1.39}/geocif/viz/__init__.py +0 -0
  48. {geocif-0.1.38 → geocif-0.1.39}/geocif/viz/plot.py +0 -0
  49. {geocif-0.1.38 → geocif-0.1.39}/geocif.egg-info/SOURCES.txt +0 -0
  50. {geocif-0.1.38 → geocif-0.1.39}/geocif.egg-info/dependency_links.txt +0 -0
  51. {geocif-0.1.38 → geocif-0.1.39}/geocif.egg-info/not-zip-safe +0 -0
  52. {geocif-0.1.38 → geocif-0.1.39}/geocif.egg-info/top_level.txt +0 -0
  53. {geocif-0.1.38 → geocif-0.1.39}/requirements.txt +0 -0
  54. {geocif-0.1.38 → geocif-0.1.39}/setup.cfg +0 -0
  55. {geocif-0.1.38 → geocif-0.1.39}/tests/test_geocif.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geocif
3
- Version: 0.1.38
3
+ Version: 0.1.39
4
4
  Summary: Models to visualize and forecast crop conditions and yields
5
5
  Home-page: https://ritviksahajpal.github.io/yield_forecasting/
6
6
  Author: Ritvik Sahajpal
@@ -14,7 +14,7 @@ import warnings
14
14
  warnings.simplefilter(action="ignore", category=FutureWarning)
15
15
 
16
16
 
17
- def run(inputs, logger, parser, section, item, type, values):
17
+ def main(inputs, logger, parser, section, item, type, values):
18
18
  if type == "str":
19
19
  original_value = parser.get(section, item)
20
20
  elif type == "bool":
@@ -41,7 +41,7 @@ def run(inputs, logger, parser, section, item, type, values):
41
41
  return parser
42
42
 
43
43
 
44
- def main(path_config_files=[Path("../config/geocif.txt")]):
44
+ def run(path_config_files=[Path("../config/geocif.txt")]):
45
45
  logger, parser = log.setup_logger_parser(path_config_files)
46
46
  inputs = gc.gather_inputs(parser)
47
47
 
@@ -51,13 +51,13 @@ def main(path_config_files=[Path("../config/geocif.txt")]):
51
51
 
52
52
  # Experiment 1: include_lat_lon
53
53
  logger.info("Experiment 1: include_lat_lon")
54
- parser = run(
54
+ parser = main(
55
55
  inputs, logger, parser, "ML", "include_lat_lon", "bool", [True, False]
56
56
  )
57
57
 
58
58
  # Experiment 2: feature_selection
59
59
  logger.info("Experiment 2: feature_selection")
60
- parser = run(
60
+ parser = main(
61
61
  inputs,
62
62
  logger,
63
63
  parser,
@@ -69,13 +69,13 @@ def main(path_config_files=[Path("../config/geocif.txt")]):
69
69
 
70
70
  # Experiment 3: lag_years
71
71
  logger.info("Experiment 3: lag_years")
72
- parser = run(
72
+ parser = main(
73
73
  inputs, logger, parser, "ML", "lag_years", "int", [1, 2, 3, 4, 5]
74
74
  )
75
75
 
76
76
  # Experiment 4: lag_yield_as_feature
77
77
  logger.info("Experiment 4: lag_yield_as_feature")
78
- parser = run(
78
+ parser = main(
79
79
  inputs,
80
80
  logger,
81
81
  parser,
@@ -87,13 +87,13 @@ def main(path_config_files=[Path("../config/geocif.txt")]):
87
87
 
88
88
  # Experiment 5: median_years
89
89
  logger.info("Experiment 5: median_years")
90
- parser = run(
90
+ parser = main(
91
91
  inputs, logger, parser, "ML", "median_years", "int", [2, 3, 4, 5]
92
92
  )
93
93
 
94
94
  # Experiment 6: median_yield_as_feature
95
95
  logger.info("Experiment 6: median_yield_as_feature")
96
- parser = run(
96
+ parser = main(
97
97
  inputs,
98
98
  logger,
99
99
  parser,
@@ -105,7 +105,7 @@ def main(path_config_files=[Path("../config/geocif.txt")]):
105
105
 
106
106
  # Experiment 7: analogous_year_yield_as_feature
107
107
  logger.info("Experiment 7: analogous_year_yield_as_feature")
108
- parser = run(
108
+ parser = main(
109
109
  inputs,
110
110
  logger,
111
111
  parser,
@@ -117,10 +117,10 @@ def main(path_config_files=[Path("../config/geocif.txt")]):
117
117
 
118
118
  # Experiment 8: optimize
119
119
  logger.info("Experiment 8: optimize")
120
- parser = run(
120
+ parser = main(
121
121
  inputs, logger, parser, "DEFAULT", "optimize", "bool", [True, False]
122
122
  )
123
123
 
124
124
 
125
125
  if __name__ == "__main__":
126
- main()
126
+ run()
@@ -535,7 +535,7 @@ class Geocif:
535
535
  cei = parts[0] if parts[1].isdigit() else "_".join(parts[:2])
536
536
 
537
537
  # Check if any element of dict_selected_features is in _t
538
- for x in selected_features:
538
+ for x in selected_features["CEI"].values:
539
539
  if x not in cei:
540
540
  continue
541
541
 
@@ -3,6 +3,31 @@ from tqdm import tqdm
3
3
  from sklearn.ensemble import RandomForestRegressor
4
4
 
5
5
 
6
+ def are_all_features_non_eo(features):
7
+ """
8
+ Check if all the features non eo features
9
+
10
+ Args:
11
+ feature:
12
+
13
+ Returns:
14
+
15
+ """
16
+ non_eo_features = ['Median Yield (tn per ha)',
17
+ 'Analogous Year',
18
+ 'Analogous Year Yield',
19
+ 'lon',
20
+ 'lat',
21
+ 't -1 Yield (tn per ha)',
22
+ 't -2 Yield (tn per ha)',
23
+ 't -3 Yield (tn per ha)',
24
+ 't -4 Yield (tn per ha)',
25
+ 't -5 Yield (tn per ha)']
26
+
27
+ # Check if all features are non-eo features, return True if they are
28
+ return all(feature in non_eo_features for feature in features)
29
+
30
+
6
31
  def select_features(X, y, method="RFE", min_features_to_select=3):
7
32
  """
8
33
 
@@ -29,6 +54,7 @@ def select_features(X, y, method="RFE", min_features_to_select=3):
29
54
  # selector = VarianceThreshold(threshold=scaled_data.var().mean())
30
55
  # X = selector.fit_transform(scaled_data)
31
56
  selector = None
57
+ X_original = X.copy()
32
58
 
33
59
  # Fill in columns with median of that column
34
60
  X = X.fillna(X.median())
@@ -46,24 +72,7 @@ def select_features(X, y, method="RFE", min_features_to_select=3):
46
72
  np.float = np.float64
47
73
  np.bool = np.bool_
48
74
 
49
- if method == "SelectKBest":
50
- from sklearn.feature_selection import SelectKBest, f_regression
51
-
52
- k = 15 # Number of features to select
53
- selector = SelectKBest(score_func=f_regression, k=k)
54
-
55
- # Fit the selector to the data and transform the data to select the best features
56
- try:
57
- X_new = selector.fit_transform(X, y)
58
- except:
59
- breakpoint()
60
-
61
- # Get the selected feature indices
62
- selected_features = selector.get_support(indices=True)
63
-
64
- # Get the selected feature names
65
- selected_features = X.columns[selected_features].tolist()
66
- elif method == "SHAP":
75
+ if method == "SHAP":
67
76
  import pandas as pd
68
77
  from catboost import CatBoostRegressor
69
78
  from fasttreeshap import TreeExplainer as FastTreeExplainer
@@ -257,6 +266,26 @@ def select_features(X, y, method="RFE", min_features_to_select=3):
257
266
  raise ValueError("Method not recognized. Use BorutaPy, Genetic, or RFE")
258
267
  # tentative_features = X.columns[selector.support_weak_].tolist()
259
268
 
269
+ non_eo = are_all_features_non_eo(selected_features)
270
+ if non_eo or method == "SelectKBest":
271
+ from sklearn.feature_selection import SelectKBest, f_regression
272
+
273
+ k = 15 # Number of features to select
274
+ selector = SelectKBest(score_func=f_regression, k=k)
275
+
276
+ # Fit the selector to the data and transform the data to select the best features
277
+ try:
278
+ X_new = selector.fit_transform(X, y)
279
+ except:
280
+ breakpoint()
281
+
282
+ # Get the selected feature indices
283
+ selected_features = selector.get_support(indices=True)
284
+
285
+ # Get the selected feature names
286
+ selected_features = X.columns[selected_features].tolist()
287
+
288
+ print(selected_features)
260
289
  # Filter the dataset for selected features
261
290
  X_filtered = X.loc[:, selected_features]
262
291
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geocif
3
- Version: 0.1.38
3
+ Version: 0.1.39
4
4
  Summary: Models to visualize and forecast crop conditions and yields
5
5
  Home-page: https://ritviksahajpal.github.io/yield_forecasting/
6
6
  Author: Ritvik Sahajpal
@@ -50,6 +50,6 @@ setup(
50
50
  test_suite="tests",
51
51
  tests_require=test_requirements,
52
52
  url="https://ritviksahajpal.github.io/yield_forecasting/",
53
- version="0.1.38",
53
+ version="0.1.39",
54
54
  zip_safe=False,
55
55
  )
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes