PyPI - geocif - Versions diffs - 0.1.39__tar.gz → 0.1.41__tar.gz - Mend

geocif 0.1.39tar.gz → 0.1.41tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

{geocif-0.1.39/geocif.egg-info → geocif-0.1.41}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: geocif
-Version: 0.1.39
+Version: 0.1.41
 Summary: Models to visualize and forecast crop conditions and yields
 Home-page: https://ritviksahajpal.github.io/yield_forecasting/
 Author: Ritvik Sahajpal

{geocif-0.1.39 → geocif-0.1.41}/geocif/experiments.py RENAMED Viewed

@@ -15,6 +15,10 @@ warnings.simplefilter(action="ignore", category=FutureWarning)
 def main(inputs, logger, parser, section, item, type, values):
+    # Set experiment_name
+    experiment_name = f"{section}_{item}"
+    parser.set("DEFAULT", "experiment_name", experiment_name)
     if type == "str":
         original_value = parser.get(section, item)
     elif type == "bool":
@@ -49,13 +53,25 @@ def run(path_config_files=[Path("../config/geocif.txt")]):
     logger.info("\tStarting GEOCIF Experiments")
     logger.info("=============================")
-    # Experiment 1: include_lat_lon
+    # Experiment: Models
+    logger.info("Experiment 0: Models")
+    parser = main(
+        inputs,
+        logger,
+        parser,
+        "DEFAULT",
+        "model",
+        "str",
+        ["catboost", "merf", "linear"],
+    )
+    # Experiment: include_lat_lon
     logger.info("Experiment 1: include_lat_lon")
     parser = main(
         inputs, logger, parser, "ML", "include_lat_lon", "bool", [True, False]
     )
-    # Experiment 2: feature_selection
+    # Experiment: feature_selection
     logger.info("Experiment 2: feature_selection")
     parser = main(
         inputs,
@@ -67,13 +83,13 @@ def run(path_config_files=[Path("../config/geocif.txt")]):
         ["SelectKBest", "BorutaPy", "Leshy", "RFECV", "RFE"],
     )
-    # Experiment 3: lag_years
+    # Experiment: lag_years
     logger.info("Experiment 3: lag_years")
     parser = main(
         inputs, logger, parser, "ML", "lag_years", "int", [1, 2, 3, 4, 5]
     )
-    # Experiment 4: lag_yield_as_feature
+    # Experiment: lag_yield_as_feature
     logger.info("Experiment 4: lag_yield_as_feature")
     parser = main(
         inputs,
@@ -85,13 +101,13 @@ def run(path_config_files=[Path("../config/geocif.txt")]):
         [True, False],
     )
-    # Experiment 5: median_years
+    # Experiment: median_years
     logger.info("Experiment 5: median_years")
     parser = main(
         inputs, logger, parser, "ML", "median_years", "int", [2, 3, 4, 5]
     )
-    # Experiment 6: median_yield_as_feature
+    # Experiment: median_yield_as_feature
     logger.info("Experiment 6: median_yield_as_feature")
     parser = main(
         inputs,
@@ -103,7 +119,7 @@ def run(path_config_files=[Path("../config/geocif.txt")]):
         [True, False],
     )
-    # Experiment 7: analogous_year_yield_as_feature
+    # Experiment: analogous_year_yield_as_feature
     logger.info("Experiment 7: analogous_year_yield_as_feature")
     parser = main(
         inputs,
@@ -115,7 +131,7 @@ def run(path_config_files=[Path("../config/geocif.txt")]):
         [True, False],
     )
-    # Experiment 8: optimize
+    # Experiment: optimize
     logger.info("Experiment 8: optimize")
     parser = main(
         inputs, logger, parser, "DEFAULT", "optimize", "bool", [True, False]

{geocif-0.1.39 → geocif-0.1.41}/geocif/geocif.py RENAMED Viewed

@@ -132,6 +132,9 @@ class Geocif:
         self.median_yield_as_feature = self.parser.getboolean(
             "ML", "median_yield_as_feature"
         )
+        self.median_area_as_feature = self.parser.getboolean(
+            "ML", "median_area_as_feature"
+        )
         self.number_lag_years = self.parser.getint("ML", "lag_years")
         self.cluster_strategy = self.parser.get("ML", "cluster_strategy")
         self.feature_selection = self.parser.get("ML", "feature_selection")
@@ -644,6 +647,7 @@ class Geocif:
                 kwargs = {
                     "cluster_strategy": self.cluster_strategy,
                     "model": self.model,
+                    "model_name": self.model_name,
                     "forecast_season": self.forecast_season,
                     "crop": self.crop,
                     "country": self.country,
@@ -659,6 +663,7 @@ class Geocif:
                 model = self.model.estimator_
             else:
                 model = self.model
             output.store(self.db_path, experiment_id, df, model, self.model_name)
     def get_cei_column_names(self, df):
@@ -734,10 +739,15 @@ class Geocif:
             df = fe.compute_last_year_yield(df)
         if self.median_yield_as_feature:
-            df = fe.compute_median_yield(
+            df = fe.compute_median_statistics(
                 df, self.all_seasons_with_yield, self.number_median_years
             )
+        if self.median_area_as_feature:
+            df = fe.compute_median_statistics(
+                df, self.all_seasons_with_area, self.number_median_years, "Area (ha)"
+            )
         if self.lag_yield_as_feature:
             df = fe.compute_lag_yield(
                 df, self.all_seasons_with_yield, self.number_lag_years
@@ -1105,6 +1115,10 @@ def loop_execute(inputs):
     with PyCallGraph(output=graphviz, config=config):
         country, crop, season, model, logger, parser = inputs
+        logger.info("=====================================================")
+        logger.info(f"\tStarting GEOCIF: {country} {crop} {season} {model}")
+        logger.info("=====================================================")
         obj = Geocif(logger=logger, parser=parser)
         obj.read_data(country, crop, season)
@@ -1176,10 +1190,6 @@ def main(logger, parser):
     Returns:
     """
-    logger.info("===========================")
-    logger.info("\tStarting GEOCIF")
-    logger.info("===========================")
     inputs = gather_inputs(parser)
     execute_models(inputs, logger, parser)

{geocif-0.1.39 → geocif-0.1.41}/geocif/indices_runner_v2.py RENAMED Viewed

@@ -157,7 +157,7 @@ class cei_runner(base.BaseGeo):
                 "ndvi",
                 False,  # redo
             )
-            for year in range(2024, ar.utcnow().year + 1)
+            for year in range(2001, ar.utcnow().year + 1)
             for status, path, filename, admin_zone, category in combinations
         ]

{geocif-0.1.39 → geocif-0.1.41}/geocif/ml/feature_engineering.py RENAMED Viewed

@@ -62,7 +62,7 @@ def compute_closest_years(all_years, harvest_year, number_lag_years):
     return closest_years.tolist()
-def compute_median_yield(
+def compute_median_statistics(
     df, all_seasons_with_yield, number_median_years, target_col="Yield (tn per ha)"
 ):
     """

{geocif-0.1.39 → geocif-0.1.41}/geocif/ml/output.py RENAMED Viewed

@@ -107,6 +107,7 @@ def store(db_path, experiment_id, df, model, model_name):
     try:
         utils.to_db(db_path, experiment_id, df)
     except Exception as e:
+        breakpoint()
         print(f"Error: {e}")
     index_columns = ["Country", "Region", "Crop", "Harvest Year", "Stages"]
@@ -127,6 +128,7 @@ def store(db_path, experiment_id, df, model, model_name):
         df_model.index.set_names(["Index"], inplace=True)
         utils.to_db(db_path, "models", df_model)
     except Exception as e:
+        breakpoint()
         print(f"Error: {e}")
     con.commit()

{geocif-0.1.39 → geocif-0.1.41}/geocif/ml/xai.py RENAMED Viewed

@@ -8,6 +8,7 @@ from tqdm import tqdm
 def explain(df_train, df_test, **kwargs):
     cluster_strategy = kwargs.get("cluster_strategy", "auto_detect")
     model = kwargs.get("model")
+    model_name = kwargs.get("model_name")
     forecast_season = kwargs.get("forecast_season")
     crop = kwargs.get("crop")
     country = kwargs.get("country")
@@ -48,7 +49,7 @@ def explain(df_train, df_test, **kwargs):
     plt.tight_layout()
     fname = f"beeswarm_{region_name}_{forecast_season}.png"
-    out_dir = analysis_dir / country / crop / str(forecast_season)
+    out_dir = analysis_dir / country / crop / model_name / str(forecast_season)
     os.makedirs(out_dir, exist_ok=True)
     plt.savefig(out_dir / fname, dpi=250)
     plt.close()

{geocif-0.1.39 → geocif-0.1.41}/geocif/utils.py RENAMED Viewed

@@ -332,7 +332,7 @@ def create_output_directory(method, admin_zone, country, crop, path_output):
     :return:
     """
-    dir_output = path_output / "fao" / "indices" / method / admin_zone / country / crop
+    dir_output = path_output / "cei" / "indices" / method / admin_zone / country / crop
     os.makedirs(dir_output, exist_ok=True)
     return dir_output

{geocif-0.1.39 → geocif-0.1.41/geocif.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: geocif
-Version: 0.1.39
+Version: 0.1.41
 Summary: Models to visualize and forecast crop conditions and yields
 Home-page: https://ritviksahajpal.github.io/yield_forecasting/
 Author: Ritvik Sahajpal

{geocif-0.1.39 → geocif-0.1.41}/setup.py RENAMED Viewed

@@ -50,6 +50,6 @@ setup(
     test_suite="tests",
     tests_require=test_requirements,
     url="https://ritviksahajpal.github.io/yield_forecasting/",
-    version="0.1.39",
+    version="0.1.41",
     zip_safe=False,
 )