geocif 0.1.37__tar.gz → 0.1.39__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. {geocif-0.1.37/geocif.egg-info → geocif-0.1.39}/PKG-INFO +1 -1
  2. {geocif-0.1.37 → geocif-0.1.39}/geocif/experiments.py +19 -20
  3. {geocif-0.1.37 → geocif-0.1.39}/geocif/geocif.py +1 -1
  4. {geocif-0.1.37 → geocif-0.1.39}/geocif/ml/feature_selection.py +47 -18
  5. {geocif-0.1.37 → geocif-0.1.39/geocif.egg-info}/PKG-INFO +1 -1
  6. {geocif-0.1.37 → geocif-0.1.39}/setup.py +1 -1
  7. {geocif-0.1.37 → geocif-0.1.39}/LICENSE +0 -0
  8. {geocif-0.1.37 → geocif-0.1.39}/MANIFEST.in +0 -0
  9. {geocif-0.1.37 → geocif-0.1.39}/README.md +0 -0
  10. {geocif-0.1.37 → geocif-0.1.39}/geocif/__init__.py +0 -0
  11. {geocif-0.1.37 → geocif-0.1.39}/geocif/agmet/__init__.py +0 -0
  12. {geocif-0.1.37 → geocif-0.1.39}/geocif/agmet/geoagmet.py +0 -0
  13. {geocif-0.1.37 → geocif-0.1.39}/geocif/agmet/plot.py +0 -0
  14. {geocif-0.1.37 → geocif-0.1.39}/geocif/agmet/utils.py +0 -0
  15. {geocif-0.1.37 → geocif-0.1.39}/geocif/analysis.py +0 -0
  16. {geocif-0.1.37 → geocif-0.1.39}/geocif/backup/__init__.py +0 -0
  17. {geocif-0.1.37 → geocif-0.1.39}/geocif/backup/constants.py +0 -0
  18. {geocif-0.1.37 → geocif-0.1.39}/geocif/backup/features.py +0 -0
  19. {geocif-0.1.37 → geocif-0.1.39}/geocif/backup/geo.py +0 -0
  20. {geocif-0.1.37 → geocif-0.1.39}/geocif/backup/geocif.py +0 -0
  21. {geocif-0.1.37 → geocif-0.1.39}/geocif/backup/metadata.py +0 -0
  22. {geocif-0.1.37 → geocif-0.1.39}/geocif/backup/models.py +0 -0
  23. {geocif-0.1.37 → geocif-0.1.39}/geocif/cei/__init__.py +0 -0
  24. {geocif-0.1.37 → geocif-0.1.39}/geocif/cei/definitions.py +0 -0
  25. {geocif-0.1.37 → geocif-0.1.39}/geocif/cei/indices.py +0 -0
  26. {geocif-0.1.37 → geocif-0.1.39}/geocif/indices_runner.py +0 -0
  27. {geocif-0.1.37 → geocif-0.1.39}/geocif/indices_runner_v2.py +0 -0
  28. {geocif-0.1.37 → geocif-0.1.39}/geocif/logger.py +0 -0
  29. {geocif-0.1.37 → geocif-0.1.39}/geocif/ml/__init__.py +0 -0
  30. {geocif-0.1.37 → geocif-0.1.39}/geocif/ml/correlations.py +0 -0
  31. {geocif-0.1.37 → geocif-0.1.39}/geocif/ml/correlations_backup.py +0 -0
  32. {geocif-0.1.37 → geocif-0.1.39}/geocif/ml/embedding.py +0 -0
  33. {geocif-0.1.37 → geocif-0.1.39}/geocif/ml/feature_engineering.py +0 -0
  34. {geocif-0.1.37 → geocif-0.1.39}/geocif/ml/outliers.py +0 -0
  35. {geocif-0.1.37 → geocif-0.1.39}/geocif/ml/outlook.py +0 -0
  36. {geocif-0.1.37 → geocif-0.1.39}/geocif/ml/output.py +0 -0
  37. {geocif-0.1.37 → geocif-0.1.39}/geocif/ml/spatial_autocorrelation.py +0 -0
  38. {geocif-0.1.37 → geocif-0.1.39}/geocif/ml/stages.py +0 -0
  39. {geocif-0.1.37 → geocif-0.1.39}/geocif/ml/stats.py +0 -0
  40. {geocif-0.1.37 → geocif-0.1.39}/geocif/ml/trainers.py +0 -0
  41. {geocif-0.1.37 → geocif-0.1.39}/geocif/ml/trend.py +0 -0
  42. {geocif-0.1.37 → geocif-0.1.39}/geocif/ml/xai.py +0 -0
  43. {geocif-0.1.37 → geocif-0.1.39}/geocif/playground/__init__.py +0 -0
  44. {geocif-0.1.37 → geocif-0.1.39}/geocif/playground/automl.py +0 -0
  45. {geocif-0.1.37 → geocif-0.1.39}/geocif/playground/misc.py +0 -0
  46. {geocif-0.1.37 → geocif-0.1.39}/geocif/utils.py +0 -0
  47. {geocif-0.1.37 → geocif-0.1.39}/geocif/viz/__init__.py +0 -0
  48. {geocif-0.1.37 → geocif-0.1.39}/geocif/viz/plot.py +0 -0
  49. {geocif-0.1.37 → geocif-0.1.39}/geocif.egg-info/SOURCES.txt +0 -0
  50. {geocif-0.1.37 → geocif-0.1.39}/geocif.egg-info/dependency_links.txt +0 -0
  51. {geocif-0.1.37 → geocif-0.1.39}/geocif.egg-info/not-zip-safe +0 -0
  52. {geocif-0.1.37 → geocif-0.1.39}/geocif.egg-info/top_level.txt +0 -0
  53. {geocif-0.1.37 → geocif-0.1.39}/requirements.txt +0 -0
  54. {geocif-0.1.37 → geocif-0.1.39}/setup.cfg +0 -0
  55. {geocif-0.1.37 → geocif-0.1.39}/tests/test_geocif.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geocif
3
- Version: 0.1.37
3
+ Version: 0.1.39
4
4
  Summary: Models to visualize and forecast crop conditions and yields
5
5
  Home-page: https://ritviksahajpal.github.io/yield_forecasting/
6
6
  Author: Ritvik Sahajpal
@@ -1,4 +1,3 @@
1
- import configparser
2
1
  from pathlib import Path
3
2
 
4
3
  import matplotlib.pyplot as plt
@@ -15,7 +14,7 @@ import warnings
15
14
  warnings.simplefilter(action="ignore", category=FutureWarning)
16
15
 
17
16
 
18
- def run(inputs, logger, parser, section, item, type, values):
17
+ def main(inputs, logger, parser, section, item, type, values):
19
18
  if type == "str":
20
19
  original_value = parser.get(section, item)
21
20
  elif type == "bool":
@@ -42,7 +41,7 @@ def run(inputs, logger, parser, section, item, type, values):
42
41
  return parser
43
42
 
44
43
 
45
- def main(path_config_files=[Path("../config/geocif.txt")]):
44
+ def run(path_config_files=[Path("../config/geocif.txt")]):
46
45
  logger, parser = log.setup_logger_parser(path_config_files)
47
46
  inputs = gc.gather_inputs(parser)
48
47
 
@@ -52,17 +51,17 @@ def main(path_config_files=[Path("../config/geocif.txt")]):
52
51
 
53
52
  # Experiment 1: include_lat_lon
54
53
  logger.info("Experiment 1: include_lat_lon")
55
- parser = run(
56
- inputs, logger, parser, "experiments", "include_lat_lon", "bool", [True, False]
54
+ parser = main(
55
+ inputs, logger, parser, "ML", "include_lat_lon", "bool", [True, False]
57
56
  )
58
57
 
59
58
  # Experiment 2: feature_selection
60
59
  logger.info("Experiment 2: feature_selection")
61
- parser = run(
60
+ parser = main(
62
61
  inputs,
63
62
  logger,
64
63
  parser,
65
- "experiments",
64
+ "ML",
66
65
  "feature_selection",
67
66
  "str",
68
67
  ["SelectKBest", "BorutaPy", "Leshy", "RFECV", "RFE"],
@@ -70,17 +69,17 @@ def main(path_config_files=[Path("../config/geocif.txt")]):
70
69
 
71
70
  # Experiment 3: lag_years
72
71
  logger.info("Experiment 3: lag_years")
73
- parser = run(
74
- inputs, logger, parser, "experiments", "lag_years", "int", [1, 2, 3, 4, 5]
72
+ parser = main(
73
+ inputs, logger, parser, "ML", "lag_years", "int", [1, 2, 3, 4, 5]
75
74
  )
76
75
 
77
76
  # Experiment 4: lag_yield_as_feature
78
77
  logger.info("Experiment 4: lag_yield_as_feature")
79
- parser = run(
78
+ parser = main(
80
79
  inputs,
81
80
  logger,
82
81
  parser,
83
- "experiments",
82
+ "ML",
84
83
  "lag_yield_as_feature",
85
84
  "bool",
86
85
  [True, False],
@@ -88,17 +87,17 @@ def main(path_config_files=[Path("../config/geocif.txt")]):
88
87
 
89
88
  # Experiment 5: median_years
90
89
  logger.info("Experiment 5: median_years")
91
- parser = run(
92
- inputs, logger, parser, "experiments", "median_years", "int", [2, 3, 4, 5]
90
+ parser = main(
91
+ inputs, logger, parser, "ML", "median_years", "int", [2, 3, 4, 5]
93
92
  )
94
93
 
95
94
  # Experiment 6: median_yield_as_feature
96
95
  logger.info("Experiment 6: median_yield_as_feature")
97
- parser = run(
96
+ parser = main(
98
97
  inputs,
99
98
  logger,
100
99
  parser,
101
- "experiments",
100
+ "ML",
102
101
  "median_yield_as_feature",
103
102
  "bool",
104
103
  [True, False],
@@ -106,11 +105,11 @@ def main(path_config_files=[Path("../config/geocif.txt")]):
106
105
 
107
106
  # Experiment 7: analogous_year_yield_as_feature
108
107
  logger.info("Experiment 7: analogous_year_yield_as_feature")
109
- parser = run(
108
+ parser = main(
110
109
  inputs,
111
110
  logger,
112
111
  parser,
113
- "experiments",
112
+ "ML",
114
113
  "analogous_year_yield_as_feature",
115
114
  "bool",
116
115
  [True, False],
@@ -118,10 +117,10 @@ def main(path_config_files=[Path("../config/geocif.txt")]):
118
117
 
119
118
  # Experiment 8: optimize
120
119
  logger.info("Experiment 8: optimize")
121
- parser = run(
122
- inputs, logger, parser, "experiments", "optimize", "bool", [True, False]
120
+ parser = main(
121
+ inputs, logger, parser, "DEFAULT", "optimize", "bool", [True, False]
123
122
  )
124
123
 
125
124
 
126
125
  if __name__ == "__main__":
127
- main()
126
+ run()
@@ -535,7 +535,7 @@ class Geocif:
535
535
  cei = parts[0] if parts[1].isdigit() else "_".join(parts[:2])
536
536
 
537
537
  # Check if any element of dict_selected_features is in _t
538
- for x in selected_features:
538
+ for x in selected_features["CEI"].values:
539
539
  if x not in cei:
540
540
  continue
541
541
 
@@ -3,6 +3,31 @@ from tqdm import tqdm
3
3
  from sklearn.ensemble import RandomForestRegressor
4
4
 
5
5
 
6
+ def are_all_features_non_eo(features):
7
+ """
8
+ Check if all the features non eo features
9
+
10
+ Args:
11
+ feature:
12
+
13
+ Returns:
14
+
15
+ """
16
+ non_eo_features = ['Median Yield (tn per ha)',
17
+ 'Analogous Year',
18
+ 'Analogous Year Yield',
19
+ 'lon',
20
+ 'lat',
21
+ 't -1 Yield (tn per ha)',
22
+ 't -2 Yield (tn per ha)',
23
+ 't -3 Yield (tn per ha)',
24
+ 't -4 Yield (tn per ha)',
25
+ 't -5 Yield (tn per ha)']
26
+
27
+ # Check if all features are non-eo features, return True if they are
28
+ return all(feature in non_eo_features for feature in features)
29
+
30
+
6
31
  def select_features(X, y, method="RFE", min_features_to_select=3):
7
32
  """
8
33
 
@@ -29,6 +54,7 @@ def select_features(X, y, method="RFE", min_features_to_select=3):
29
54
  # selector = VarianceThreshold(threshold=scaled_data.var().mean())
30
55
  # X = selector.fit_transform(scaled_data)
31
56
  selector = None
57
+ X_original = X.copy()
32
58
 
33
59
  # Fill in columns with median of that column
34
60
  X = X.fillna(X.median())
@@ -46,24 +72,7 @@ def select_features(X, y, method="RFE", min_features_to_select=3):
46
72
  np.float = np.float64
47
73
  np.bool = np.bool_
48
74
 
49
- if method == "SelectKBest":
50
- from sklearn.feature_selection import SelectKBest, f_regression
51
-
52
- k = 15 # Number of features to select
53
- selector = SelectKBest(score_func=f_regression, k=k)
54
-
55
- # Fit the selector to the data and transform the data to select the best features
56
- try:
57
- X_new = selector.fit_transform(X, y)
58
- except:
59
- breakpoint()
60
-
61
- # Get the selected feature indices
62
- selected_features = selector.get_support(indices=True)
63
-
64
- # Get the selected feature names
65
- selected_features = X.columns[selected_features].tolist()
66
- elif method == "SHAP":
75
+ if method == "SHAP":
67
76
  import pandas as pd
68
77
  from catboost import CatBoostRegressor
69
78
  from fasttreeshap import TreeExplainer as FastTreeExplainer
@@ -257,6 +266,26 @@ def select_features(X, y, method="RFE", min_features_to_select=3):
257
266
  raise ValueError("Method not recognized. Use BorutaPy, Genetic, or RFE")
258
267
  # tentative_features = X.columns[selector.support_weak_].tolist()
259
268
 
269
+ non_eo = are_all_features_non_eo(selected_features)
270
+ if non_eo or method == "SelectKBest":
271
+ from sklearn.feature_selection import SelectKBest, f_regression
272
+
273
+ k = 15 # Number of features to select
274
+ selector = SelectKBest(score_func=f_regression, k=k)
275
+
276
+ # Fit the selector to the data and transform the data to select the best features
277
+ try:
278
+ X_new = selector.fit_transform(X, y)
279
+ except:
280
+ breakpoint()
281
+
282
+ # Get the selected feature indices
283
+ selected_features = selector.get_support(indices=True)
284
+
285
+ # Get the selected feature names
286
+ selected_features = X.columns[selected_features].tolist()
287
+
288
+ print(selected_features)
260
289
  # Filter the dataset for selected features
261
290
  X_filtered = X.loc[:, selected_features]
262
291
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geocif
3
- Version: 0.1.37
3
+ Version: 0.1.39
4
4
  Summary: Models to visualize and forecast crop conditions and yields
5
5
  Home-page: https://ritviksahajpal.github.io/yield_forecasting/
6
6
  Author: Ritvik Sahajpal
@@ -50,6 +50,6 @@ setup(
50
50
  test_suite="tests",
51
51
  tests_require=test_requirements,
52
52
  url="https://ritviksahajpal.github.io/yield_forecasting/",
53
- version="0.1.37",
53
+ version="0.1.39",
54
54
  zip_safe=False,
55
55
  )
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes