PyPI - geocif - Versions diffs - 0.1.38__tar.gz → 0.1.40__tar.gz - Mend

geocif 0.1.38tar.gz → 0.1.40tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

{geocif-0.1.38/geocif.egg-info → geocif-0.1.40}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: geocif
-Version: 0.1.38
+Version: 0.1.40
 Summary: Models to visualize and forecast crop conditions and yields
 Home-page: https://ritviksahajpal.github.io/yield_forecasting/
 Author: Ritvik Sahajpal

{geocif-0.1.38 → geocif-0.1.40}/geocif/experiments.py RENAMED Viewed

@@ -14,7 +14,11 @@ import warnings
 warnings.simplefilter(action="ignore", category=FutureWarning)
-def run(inputs, logger, parser, section, item, type, values):
+def main(inputs, logger, parser, section, item, type, values):
+    # Set experiment_name
+    experiment_name = f"{section}_{item}"
+    parser.set("DEFAULT", "experiment_name", experiment_name)
     if type == "str":
         original_value = parser.get(section, item)
     elif type == "bool":
@@ -41,7 +45,7 @@ def run(inputs, logger, parser, section, item, type, values):
     return parser
-def main(path_config_files=[Path("../config/geocif.txt")]):
+def run(path_config_files=[Path("../config/geocif.txt")]):
     logger, parser = log.setup_logger_parser(path_config_files)
     inputs = gc.gather_inputs(parser)
@@ -51,13 +55,13 @@ def main(path_config_files=[Path("../config/geocif.txt")]):
     # Experiment 1: include_lat_lon
     logger.info("Experiment 1: include_lat_lon")
-    parser = run(
+    parser = main(
         inputs, logger, parser, "ML", "include_lat_lon", "bool", [True, False]
     )
     # Experiment 2: feature_selection
     logger.info("Experiment 2: feature_selection")
-    parser = run(
+    parser = main(
         inputs,
         logger,
         parser,
@@ -69,13 +73,13 @@ def main(path_config_files=[Path("../config/geocif.txt")]):
     # Experiment 3: lag_years
     logger.info("Experiment 3: lag_years")
-    parser = run(
+    parser = main(
         inputs, logger, parser, "ML", "lag_years", "int", [1, 2, 3, 4, 5]
     )
     # Experiment 4: lag_yield_as_feature
     logger.info("Experiment 4: lag_yield_as_feature")
-    parser = run(
+    parser = main(
         inputs,
         logger,
         parser,
@@ -87,13 +91,13 @@ def main(path_config_files=[Path("../config/geocif.txt")]):
     # Experiment 5: median_years
     logger.info("Experiment 5: median_years")
-    parser = run(
+    parser = main(
         inputs, logger, parser, "ML", "median_years", "int", [2, 3, 4, 5]
     )
     # Experiment 6: median_yield_as_feature
     logger.info("Experiment 6: median_yield_as_feature")
-    parser = run(
+    parser = main(
         inputs,
         logger,
         parser,
@@ -105,7 +109,7 @@ def main(path_config_files=[Path("../config/geocif.txt")]):
     # Experiment 7: analogous_year_yield_as_feature
     logger.info("Experiment 7: analogous_year_yield_as_feature")
-    parser = run(
+    parser = main(
         inputs,
         logger,
         parser,
@@ -117,10 +121,10 @@ def main(path_config_files=[Path("../config/geocif.txt")]):
     # Experiment 8: optimize
     logger.info("Experiment 8: optimize")
-    parser = run(
+    parser = main(
         inputs, logger, parser, "DEFAULT", "optimize", "bool", [True, False]
     )
 if __name__ == "__main__":
-    main()
+    run()

{geocif-0.1.38 → geocif-0.1.40}/geocif/geocif.py RENAMED Viewed

@@ -535,7 +535,7 @@ class Geocif:
                 cei = parts[0] if parts[1].isdigit() else "_".join(parts[:2])
                 # Check if any element of dict_selected_features is in _t
-                for x in selected_features:
+                for x in selected_features["CEI"].values:
                     if x not in cei:
                         continue
@@ -644,6 +644,7 @@ class Geocif:
                 kwargs = {
                     "cluster_strategy": self.cluster_strategy,
                     "model": self.model,
+                    "model_name": self.model_name,
                     "forecast_season": self.forecast_season,
                     "crop": self.crop,
                     "country": self.country,
@@ -659,6 +660,7 @@ class Geocif:
                 model = self.model.estimator_
             else:
                 model = self.model
             output.store(self.db_path, experiment_id, df, model, self.model_name)
     def get_cei_column_names(self, df):

{geocif-0.1.38 → geocif-0.1.40}/geocif/ml/feature_selection.py RENAMED Viewed

@@ -3,6 +3,31 @@ from tqdm import tqdm
 from sklearn.ensemble import RandomForestRegressor
+def are_all_features_non_eo(features):
+    """
+    Check if all the features non eo features
+    Args:
+        feature:
+    Returns:
+    """
+    non_eo_features = ['Median Yield (tn per ha)',
+                       'Analogous Year',
+                       'Analogous Year Yield',
+                       'lon',
+                       'lat',
+                       't -1 Yield (tn per ha)',
+                       't -2 Yield (tn per ha)',
+                       't -3 Yield (tn per ha)',
+                       't -4 Yield (tn per ha)',
+                       't -5 Yield (tn per ha)']
+    # Check if all features are non-eo features, return True if they are
+    return all(feature in non_eo_features for feature in features)
 def select_features(X, y, method="RFE", min_features_to_select=3):
     """
@@ -29,6 +54,7 @@ def select_features(X, y, method="RFE", min_features_to_select=3):
     # selector = VarianceThreshold(threshold=scaled_data.var().mean())
     # X = selector.fit_transform(scaled_data)
     selector = None
+    X_original = X.copy()
     # Fill in columns with median of that column
     X = X.fillna(X.median())
@@ -46,24 +72,7 @@ def select_features(X, y, method="RFE", min_features_to_select=3):
     np.float = np.float64
     np.bool = np.bool_
-    if method == "SelectKBest":
-        from sklearn.feature_selection import SelectKBest, f_regression
-        k = 15  # Number of features to select
-        selector = SelectKBest(score_func=f_regression, k=k)
-        # Fit the selector to the data and transform the data to select the best features
-        try:
-            X_new = selector.fit_transform(X, y)
-        except:
-            breakpoint()
-        # Get the selected feature indices
-        selected_features = selector.get_support(indices=True)
-        # Get the selected feature names
-        selected_features = X.columns[selected_features].tolist()
-    elif method == "SHAP":
+    if method == "SHAP":
         import pandas as pd
         from catboost import CatBoostRegressor
         from fasttreeshap import TreeExplainer as FastTreeExplainer
@@ -257,6 +266,26 @@ def select_features(X, y, method="RFE", min_features_to_select=3):
         raise ValueError("Method not recognized. Use BorutaPy, Genetic, or RFE")
     # tentative_features = X.columns[selector.support_weak_].tolist()
+    non_eo = are_all_features_non_eo(selected_features)
+    if non_eo or method == "SelectKBest":
+        from sklearn.feature_selection import SelectKBest, f_regression
+        k = 15  # Number of features to select
+        selector = SelectKBest(score_func=f_regression, k=k)
+        # Fit the selector to the data and transform the data to select the best features
+        try:
+            X_new = selector.fit_transform(X, y)
+        except:
+            breakpoint()
+        # Get the selected feature indices
+        selected_features = selector.get_support(indices=True)
+        # Get the selected feature names
+        selected_features = X.columns[selected_features].tolist()
+    print(selected_features)
     # Filter the dataset for selected features
     X_filtered = X.loc[:, selected_features]

{geocif-0.1.38 → geocif-0.1.40}/geocif/ml/xai.py RENAMED Viewed

@@ -8,6 +8,7 @@ from tqdm import tqdm
 def explain(df_train, df_test, **kwargs):
     cluster_strategy = kwargs.get("cluster_strategy", "auto_detect")
     model = kwargs.get("model")
+    model_name = kwargs.get("model_name")
     forecast_season = kwargs.get("forecast_season")
     crop = kwargs.get("crop")
     country = kwargs.get("country")
@@ -48,7 +49,7 @@ def explain(df_train, df_test, **kwargs):
     plt.tight_layout()
     fname = f"beeswarm_{region_name}_{forecast_season}.png"
-    out_dir = analysis_dir / country / crop / str(forecast_season)
+    out_dir = analysis_dir / country / crop / model_name / str(forecast_season)
     os.makedirs(out_dir, exist_ok=True)
     plt.savefig(out_dir / fname, dpi=250)
     plt.close()

{geocif-0.1.38 → geocif-0.1.40/geocif.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: geocif
-Version: 0.1.38
+Version: 0.1.40
 Summary: Models to visualize and forecast crop conditions and yields
 Home-page: https://ritviksahajpal.github.io/yield_forecasting/
 Author: Ritvik Sahajpal

{geocif-0.1.38 → geocif-0.1.40}/setup.py RENAMED Viewed

@@ -50,6 +50,6 @@ setup(
     test_suite="tests",
     tests_require=test_requirements,
     url="https://ritviksahajpal.github.io/yield_forecasting/",
-    version="0.1.38",
+    version="0.1.40",
     zip_safe=False,
 )