PyPI - geocif - Versions diffs - 0.1.45__tar.gz → 0.1.47__tar.gz - Mend

geocif 0.1.45tar.gz → 0.1.47tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

{geocif-0.1.45/geocif.egg-info → geocif-0.1.47}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: geocif
-Version: 0.1.45
+Version: 0.1.47
 Summary: Models to visualize and forecast crop conditions and yields
 Home-page: https://ritviksahajpal.github.io/yield_forecasting/
 Author: Ritvik Sahajpal

{geocif-0.1.45 → geocif-0.1.47}/geocif/analysis.py RENAMED Viewed

@@ -162,8 +162,8 @@ class Geoanalysis:
             return pd.DataFrame(), pd.DataFrame()
         df_metrics = self._compute_metrics(df)
-        # df_metrics = self._process_metrics(df_metrics)
-        # self._plot_metrics(df_metrics)
+        df_metrics = self._process_metrics(df_metrics)
+        self._plot_metrics(df_metrics)
         df_regional_metrics_by_year = self._compute_regional_metrics(
             df, by="Harvest Year"
@@ -172,8 +172,10 @@ class Geoanalysis:
             df_regional_metrics_by_year
         )
         df_regional_metrics = self._average_mape(df_regional_metrics_by_year)
-        breakpoint()
-        self._store_results(None, df_regional_metrics, df_regional_metrics_by_year)
+        self._store_results(
+            df_metrics, df_regional_metrics, df_regional_metrics_by_year
+        )
         df_national_yield = self._compute_national_yield(df)
         self._plot_national_yield(df_national_yield)
@@ -193,7 +195,7 @@ class Geoanalysis:
             .apply(self.annual_metrics)
             .reset_index()
         )
-        breakpoint()
         return df_metrics.pivot_table(
             index=["Country", "Model", "Harvest Year", "Stage Name", "Stage Range"],
             columns="level_5",

{geocif-0.1.45 → geocif-0.1.47}/geocif/experiments.py RENAMED Viewed

@@ -85,9 +85,7 @@ def run(path_config_files=[Path("../config/geocif.txt")]):
     # Experiment: lag_years
     logger.info("Experiment 3: lag_years")
-    parser = main(
-        inputs, logger, parser, "ML", "lag_years", "int", [1, 2, 3, 4, 5]
-    )
+    parser = main(inputs, logger, parser, "ML", "lag_years", "int", [1, 2, 3, 4, 5])
     # Experiment: lag_yield_as_feature
     logger.info("Experiment 4: lag_yield_as_feature")
@@ -103,9 +101,7 @@ def run(path_config_files=[Path("../config/geocif.txt")]):
     # Experiment: median_years
     logger.info("Experiment 5: median_years")
-    parser = main(
-        inputs, logger, parser, "ML", "median_years", "int", [2, 3, 4, 5]
-    )
+    parser = main(inputs, logger, parser, "ML", "median_years", "int", [2, 3, 4, 5])
     # Experiment: median_yield_as_feature
     logger.info("Experiment 6: median_yield_as_feature")
@@ -133,9 +129,7 @@ def run(path_config_files=[Path("../config/geocif.txt")]):
     # Experiment: optimize
     logger.info("Experiment 8: optimize")
-    parser = main(
-        inputs, logger, parser, "DEFAULT", "optimize", "bool", [True, False]
-    )
+    parser = main(inputs, logger, parser, "DEFAULT", "optimize", "bool", [True, False])
 if __name__ == "__main__":

{geocif-0.1.45 → geocif-0.1.47}/geocif/geocif.py RENAMED Viewed

@@ -108,13 +108,18 @@ class Geocif:
                                 Config file: ML
         ====================================================================
         """
-        self.use_ceis = ast.literal_eval(self.parser.get("ML", "use_ceis"))
         self.model_type = self.parser.get("ML", "model_type")
         self.fraction_simulate = self.parser.getint("ML", "fraction_simulate")
         self.analogous_year_yield_as_feature = self.parser.getboolean(
             "ML", "analogous_year_yield_as_feature"
         )
-        self.include_lat_lon = self.parser.getboolean("ML", "include_lat_lon")
+        self.plot_map_for_correlation_plot = self.parser.getboolean(
+            "ML", "plot_map_for_correlation_plot"
+        )
+        self.correlation_threshold = self.parser.getfloat("ML", "correlation_threshold")
+        self.include_lat_lon_as_feature = self.parser.getboolean(
+            "ML", "include_lat_lon_as_feature"
+        )
         self.spatial_autocorrelation = self.parser.getboolean(
             "ML", "spatial_autocorrelation"
         )
@@ -147,6 +152,9 @@ class Geocif:
             self.parser.get("ML", "cat_features")
         )
+        self.use_cumulative_features = self.parser.getboolean(
+            "DEFAULT", "use_cumulative_features"
+        )
         """
         ====================================================================
                                 Variables, Paths
@@ -192,6 +200,9 @@ class Geocif:
         self.db_path = self.dir_db / self.db_forecasts
+        # Store config file in database
+        output.config_to_db(self.db_path, self.parser, self.today)
         # self.pickle_file = self.base_dir / self.parser.get("outlook", "pickle_file")
         # obj_pickle = outlook.Outlook(self.pickle_file)
         # self.df_outlook = obj_pickle.read_outlook_file()
@@ -218,18 +229,29 @@ class Geocif:
         y_train = df_region[target_col]
         if self.ml_model:
-            self.logger.info(f"Selecting features for {self.country} {self.crop}")
-            selector, _, self.selected_features = fs.select_features(
-                X_train, y_train, method=self.feature_selection
-            )
-            self.logger.info(f"Selected features: {self.selected_features}")
+            if self.model_name in ["cumulative_1", "cumulative_2", "cumulative_3"]:
+                all_features = X_train.columns
+                # Select the columns with use_ceis in it
+                self.selected_features = [
+                    column
+                    for column in all_features
+                    if any(cei in column for cei in self.use_ceis)
+                ]
+            else:
+                self.logger.info(f"Selecting features for {self.country} {self.crop}")
+                selector, _, self.selected_features = fs.select_features(
+                    X_train, y_train, method=self.feature_selection
+                )
+                self.logger.info(f"Selected features: {self.selected_features}")
             """ Update model to include conformal estimates """
-            if "lat" not in self.selected_features and self.include_lat_lon:
+            if "lat" not in self.selected_features and self.include_lat_lon_as_feature:
                 self.selected_features.append("lat")
-            if "lon" not in self.selected_features and self.include_lat_lon:
+            if "lon" not in self.selected_features and self.include_lat_lon_as_feature:
                 self.selected_features.append("lon")
             X_train = df_region[self.selected_features + self.cat_features]
             dir_output = (
                 self.dir_analysis
                 / self.country
@@ -306,8 +328,38 @@ class Geocif:
                     self.best_hyperparams = {}
                 elif self.model_name in ["cubist"]:
                     self.model.fit(X_train, y_train)
-            except:
-                self.logger.error(f"Error fitting model for {self.country} {self.crop}")
+                elif self.model_name in [
+                    "cumulative_1",
+                    "cumulative_2",
+                    "cumulative_3",
+                ]:
+                    from sklearn.preprocessing import StandardScaler, LabelEncoder
+                    # Standardize the numeric features
+                    scaler = StandardScaler()
+                    X_numeric = X_train.iloc[:, :3]
+                    X_scaled_numeric = pd.DataFrame(
+                        scaler.fit_transform(X_numeric),
+                        columns=X_numeric.columns,
+                        index=X_train.index,
+                    )
+                    # Encode the Region as categorical
+                    le = LabelEncoder()
+                    X_region = pd.Series(
+                        le.fit_transform(X_train["Region"]),
+                        name="Region",
+                        index=X_train.index,
+                    )
+                    # Combine scaled numeric features and encoded region
+                    X_train_scaled = pd.concat([X_scaled_numeric, X_region], axis=1)
+                    self.model.fit(X_train_scaled, y_train)
+            except Exception as e:
+                self.logger.error(
+                    f"Error fitting model for {self.country} {self.crop} {e}"
+                )
     def predict(self, df_region, scaler=None):
         """
@@ -354,6 +406,33 @@ class Geocif:
                     X_test, Z_test, clusters_test.astype("object")
                 )
                 best_hyperparameters = self.model.fe_model.get_params().copy()
+            elif self.model_name in ["cumulative_1", "cumulative_2", "cumulative_3"]:
+                from sklearn.preprocessing import StandardScaler, LabelEncoder
+                # Standardize the numeric features
+                scaler = StandardScaler()
+                X_numeric = X_test.iloc[:, :3]
+                try:
+                    X_scaled_numeric = pd.DataFrame(
+                        scaler.fit_transform(X_numeric),
+                        columns=X_numeric.columns,
+                        index=X_test.index,
+                    )
+                except:
+                    breakpoint()
+                # Encode the Region as categorical
+                le = LabelEncoder()
+                X_region = pd.Series(
+                    le.fit_transform(X_test["Region"]),
+                    name="Region",
+                    index=X_test.index,
+                )
+                # Combine scaled numeric features and encoded region
+                X_test_scaled = pd.concat([X_scaled_numeric, X_region], axis=1)
+                y_pred = self.model.predict(X_test_scaled)
+                best_hyperparameters = {}  # self.model.get_params().copy()
             elif self.model_name == "geospaNN":
                 import torch
                 import geospaNN
@@ -495,7 +574,9 @@ class Geocif:
             "Crop",
             "Harvest Year",
             "Stage Name",
+            "Time",
         ]
         df.index = df.apply(
             lambda row: "_".join([str(row[col]) for col in index_columns]), axis=1
         )
@@ -507,28 +588,37 @@ class Geocif:
     def create_feature_names(self, stages_features, selected_features):
         """
+        Create feature names for machine learning stages.
         Args:
-            stages_features:
-            selected_features:
+            stages_features (list): List of features for different stages.
+            selected_features (dict): Dictionary of selected features.
         Returns:
+            None
         """
+        # Assert stages_features is a list
+        assert isinstance(stages_features, list), "stages_features should be a list"
         # Clear out feature names
         self.feature_names = []
-        """ Select stages that will be used for ML
+        """
+        Select stages that will be used for ML
          1. method = "latest" - Select the latest stage
          2. method = "fraction" - Select a fraction (1-100) of all stages
         """
+        method = "fraction"
+        if self.model_name in ["cumulative_1", "cumulative_2", "cumulative_3"]:
+            method = "latest"
         stages_features = stages.select_stages_for_ml(
-            stages_features, method="fraction", n=60
+            stages_features, method=method, n=60
         )
         for stage in stages_features:
             # Convert each element of stage to str and join with _
-            _stage = "_".join([str(x) for x in stage])
+            _stage = "_".join(map(str, stage))
             # Create a list appending _stage to each element of combined_keys
             _tmp = [f"{col}_{_stage}" for col in self.combined_keys]
@@ -537,17 +627,33 @@ class Geocif:
                 parts = _t.split("_")
                 cei = parts[0] if parts[1].isdigit() else "_".join(parts[:2])
-                # Check if any element of dict_selected_features is in _t
-                for x in selected_features["CEI"].values:
-                    if x not in cei:
-                        continue
-                    dict_fn = stages.get_stage_information_dict(_t, self.method)
-                    tmp_col = dict_fn["CEI"] + " " + dict_fn["Stage Name"]
-                    if tmp_col in self.df_train.columns:
-                        self.feature_names.append(tmp_col)
+                try:
+                    if self.model_name in [
+                        "cumulative_1",
+                        "cumulative_2",
+                        "cumulative_3",
+                    ]:
+                        dict_fn = stages.get_stage_information_dict(_t, self.method)
+                        tmp_col = f"{dict_fn['CEI']}"
+                        if tmp_col in self.df_train.columns:
+                            self.feature_names.append(tmp_col)
+                    else:
+                        # Check if any element of dict_selected_features is in _t
+                        if selected_features["CEI"].any():
+                            for x in selected_features["CEI"].values:
+                                if x not in cei:
+                                    continue
+                                dict_fn = stages.get_stage_information_dict(
+                                    _t, self.method
+                                )
+                                tmp_col = f"{dict_fn['CEI']} {dict_fn['Stage Name']}"
+                                if tmp_col in self.df_train.columns:
+                                    self.feature_names.append(tmp_col)
+                except:
+                    breakpoint()
         self.feature_names = list(set(self.feature_names))
         if self.median_yield_as_feature:
@@ -559,16 +665,14 @@ class Geocif:
                 self.feature_names.append(f"t -{i} {self.target}")
         if self.analogous_year_yield_as_feature:
-            self.feature_names.append("Analogous Year")
-            self.feature_names.append("Analogous Year Yield")
+            self.feature_names.extend(["Analogous Year", "Analogous Year Yield"])
         if self.use_outlook_as_feature:
             self.feature_names.append("FCST")
         # Add lat and lon to feature names
-        if self.include_lat_lon:
-            self.feature_names.append("lat")
-            self.feature_names.append("lon")
+        if self.include_lat_lon_as_feature:
+            self.feature_names.extend(["lat", "lon"])
         self.selected_features = []
@@ -592,6 +696,8 @@ class Geocif:
         for idx, region in enumerate(pbar):
             if self.model_name in ["linear", "gam"]:
                 self.create_feature_names(stages, dict_best_cei[region][0:3].tolist())
+            elif self.model_name in ["cumulative_1", "cumulative_2", "cumulative_3"]:
+                self.create_feature_names(stages, {})
             elif self.ml_model:
                 self.create_feature_names(stages, dict_selected_features[region])
             elif self.model_name in ["median"]:
@@ -721,11 +827,52 @@ class Geocif:
         parts = all_cei_columns[-1].split("_")
         cei = parts[0] if parts[1].isdigit() else "_".join(parts[:2])
-        # HACK: Get feature name with GD4 in it to extract first and last stage id and name
-        cei_column = df[df.columns[df.columns.str.contains(cei)]].columns
-        # Select the longest string in cei_column
-        cei_col = max(cei_column, key=len)
-        self.stage_info = stages.get_stage_information_dict(cei_col, self.method)
+        # For each region, find the column with the longest string in cei_column
+        group_by = ["Region"]
+        groups = df.groupby(group_by)
+        if self.use_cumulative_features:
+            frames = []
+            for name, group in groups:
+                # Drop columns with all NaNs
+                group.dropna(axis=1, how="all", inplace=True)
+                cei_column = group[
+                    group.columns[group.columns.str.contains(cei)]
+                ].columns
+                max_cei_col = max(cei_column, key=len)
+                self.stage_info = stages.get_stage_information_dict(
+                    max_cei_col, self.method
+                )
+                # Subset dataframes to columns that contain self.stage_info["Stage_ID"]
+                all_columns = group.columns[
+                    group.columns.str.contains(self.stage_info["Stage_ID"])
+                ].tolist()
+                group = group[
+                    self.fixed_columns
+                    + [self.target]
+                    + self.statistics_columns
+                    + all_columns
+                ]
+                # rename all_columns to self.stage_info["CEI"]
+                group.rename(
+                    columns={
+                        col: stages.get_stage_information_dict(col, self.method)["CEI"]
+                        for col in all_columns
+                    },
+                    inplace=True,
+                )
+                frames.append(group)
+            df = pd.concat(frames)
+        else:
+            # HACK: Get feature name with GD4 in it to extract first and last stage id and name
+            cei_column = df[df.columns[df.columns.str.contains(cei)]].columns
+            # Select the longest string in cei_column
+            cei_col = max(cei_column, key=len)
+            self.stage_info = stages.get_stage_information_dict(cei_col, self.method)
         # Change column name
         # e.g. 'vDTR_7_6_5_4_3_2_1_37_36_35_34_33_32_31' to 'vDTR Mar 1-Oct 27'
@@ -789,12 +936,14 @@ class Geocif:
         mask = self.df_results["Stage_ID"].isin(_stages)
         df = self.df_results[mask]
         """ Select which CEI categories to use for ML """
         if "all" in self.use_ceis:
             pass
         else:
-            df = df[df["Type"].isin(self.use_ceis)]
+            if self.select_cei_by == "Type":
+                df = df[df["Type"].isin(self.use_ceis)]
+            elif self.select_cei_by == "Index":
+                df = df[df["Index"].isin(self.use_ceis)]
         """ Convert this dataframe into an ML ready format and save to disk """
         df = self.create_ml_dataframe(df)
@@ -859,12 +1008,17 @@ class Geocif:
         dict_kwargs["method"] = self.method
         dict_kwargs["national_correlation"] = self.national_correlation
         dict_kwargs["groupby"] = self.correlation_plot_groupby
+        dict_kwargs["cluster_strategy"] = self.cluster_strategy
         dict_kwargs["dg_country"] = self.dg_country
         dict_kwargs["combined_dict"] = self.combined_dict
+        dict_kwargs["plot_map"] = self.plot_map_for_correlation_plot
+        dict_kwargs["correlation_threshold"] = self.correlation_threshold
         if self.spatial_autocorrelation:
             sa.compute_spatial_autocorrelation(self.df_results, **dict_kwargs)
+        dict_selected_features = {}
+        dict_best_cei = {}
         if self.correlation_plots:
             self.logger.info(f"Correlation plot for {self.country} {self.crop}")
             (
@@ -940,6 +1094,8 @@ class Geocif:
         self.model_name = model
         self.experiment_name = self.parser.get("ML", "experiment_name")
         self.ml_model = self.parser.getboolean(self.model_name, "ML_model")
+        self.select_cei_by = self.parser.get(self.model_name, "select_cei_by")
+        self.use_ceis = ast.literal_eval(self.parser.get(self.model_name, "use_ceis"))
         self.model_names = ast.literal_eval(self.parser.get(self.country, "models"))
         self.optimize = self.parser.getboolean(self.country, "optimize")
         self.fraction_loocv = self.parser.getfloat(self.country, "fraction_loocv")
@@ -951,6 +1107,21 @@ class Geocif:
             self.estimate_ci = False
             self.check_yield_trend = False
             self.estimate_ci_for_all = False
+        elif self.model_name in ["cumulative_1", "cumulative_2", "cumulative_3"]:
+            self.correlation_plots = False
+            self.lag_yield_as_feature = False
+            self.median_yield_as_feature = False
+            self.median_area_as_feature = False
+            self.analogous_year_yield_as_feature = False
+            self.last_year_yield_as_feature = False
+            self.include_lat_lon_as_feature = False
+            self.do_xai = False
+            self.estimate_ci = False
+            self.estimate_ci_for_all = False
+            self.check_yield_trend = False
+            self.cluster_strategy = "single"
+            self.select_cei_by = "Index"
+            self.use_cumulative_features = True
         else:
             self.do_xai = self.parser.getboolean("ML", "do_xai")
             self.estimate_ci = self.parser.getboolean("ML", "estimate_ci")

{geocif-0.1.45 → geocif-0.1.47}/geocif/indices_runner.py RENAMED Viewed

@@ -173,8 +173,8 @@ class cei_runner(base.BaseGeo):
             or "south_africa_maize" in i[3]
             or "mozambique_maize" in i[3]
             or "united_states_of_america" in i[3]
-               or "russian_federation" in i[3]
-               or "ukraine" in i[3]
+            or "russian_federation" in i[3]
+            or "ukraine" in i[3]
         ]
         #                 "malawi" in i[2]]

{geocif-0.1.45 → geocif-0.1.47}/geocif/indices_runner_v2.py RENAMED Viewed

@@ -47,7 +47,7 @@ class cei_runner(base.BaseGeo):
         self.dir_input = Path(self.parser.get("PATHS", "dir_input"))
         self.base_dir = Path(
-            r"D:\Users\ritvik\projects\GEOGLAM\Output\countries\afghanistan"
+            r"D:\Users\ritvik\projects\GEOGLAM\Output\countries\illinois"
         )  # Path(self.parser.get("PATHS", "dir_crop_inputs"))
         self.do_parallel = self.parser.getboolean("DEFAULT", "do_parallel")
@@ -164,7 +164,7 @@ class cei_runner(base.BaseGeo):
         # Only keep those entries in combinations where the third elemt is
         # mozambique, south_africa, angola or dem_people's_rep_of_korea
         # This is done to test the code for these countries
-        combinations = [i for i in combinations if "afghanistan_maize_s1" in i[3]]
+        combinations = [i for i in combinations if "illinois_maize_s1" in i[3]]
         if True:
             num_cpu = int(cpu_count() * 0.5)

{geocif-0.1.45 → geocif-0.1.47}/geocif/ml/correlations.py RENAMED Viewed

@@ -74,18 +74,24 @@ def plot_feature_corr_by_time(df, **kwargs):
     forecast_season = kwargs.get("forecast_season")
     national_correlation = kwargs.get("national_correlation")
     group_by = kwargs.get("groupby")
+    plot_map = kwargs.get("plot_map")
+    region_name = kwargs.get("region_name")
     # Setup the figure and gridspec
     fig = plt.figure(figsize=(10, 5))
-    gs = fig.add_gridspec(
-        3, 2, height_ratios=[6, 5, 1], width_ratios=[5, 1.5], hspace=0.6, wspace=0.0
-    )
+    if plot_map:
+        gs = fig.add_gridspec(
+            3, 2, height_ratios=[6, 5, 1], width_ratios=[5, 1.5], hspace=0.6, wspace=0.0
+        )
+    else:
+        gs = fig.add_gridspec(3, 1, height_ratios=[6, 5, 1], hspace=0.6, wspace=0.0)
     # Assign subplots
     ax_heatmap = fig.add_subplot(gs[0:2, 0])
-    ax_map = fig.add_subplot(gs[0, 1])
     cbar_ax = fig.add_subplot(gs[2, 0])
-    ax4 = fig.add_subplot(gs[2, 1])
+    if plot_map:
+        ax_map = fig.add_subplot(gs[0, 1])
+        ax4 = fig.add_subplot(gs[2, 1])
     # Transpose and reverse the columns of the dataframe
     df_transpose = df.T
@@ -107,43 +113,43 @@ def plot_feature_corr_by_time(df, **kwargs):
     )
     ax_heatmap.tick_params(left=False, bottom=False)
-    # Plot the map using GeoPandas
-    dg_country = kwargs.get("dg_country")
-    ax_map = dg_country.plot(
-        ax=ax_map,
-        color="white",
-        edgecolor="black",
-        linewidth=1.0,
-        facecolor=None,
-        legend=False,
-    )
-    if not national_correlation:
-        id = kwargs["region_id"]
-        dg_region = dg_country[dg_country[group_by] == id]
-        ax_map = dg_region.plot(
-            ax=ax_map, color="blue", edgecolor="blue", linewidth=1.0, legend=False
+    if plot_map:
+        # Plot the map using GeoPandas
+        dg_country = kwargs.get("dg_country")
+        ax_map = dg_country.plot(
+            ax=ax_map,
+            color="white",
+            edgecolor="black",
+            linewidth=1.0,
+            facecolor=None,
+            legend=False,
         )
-        # Set title with color blue
-        ax_map.set_title(f"Region: {id}", color="blue")
-    # No colorbar for the map
-    ax_map.axis("off")
-    # Remove borders
-    ax_map.spines["top"].set_visible(False)
-    ax_map.spines["right"].set_visible(False)
-    ax_map.spines["bottom"].set_visible(False)
-    ax_map.spines["left"].set_visible(False)
-    # ax4 should not be visible
-    ax4.axis("off")
+    id = kwargs["region_id"]
+    if plot_map:
+        if not national_correlation:
+            dg_region = dg_country[dg_country[group_by] == id]
+            ax_map = dg_region.plot(
+                ax=ax_map, color="blue", edgecolor="blue", linewidth=1.0, legend=False
+            )
+            # Set title with color blue
+            ax_map.set_title(f"Region: {id}", color="blue")
+        # No colorbar for the map
+        ax_map.axis("off")
+        # Remove borders
+        ax_map.spines["top"].set_visible(False)
+        ax_map.spines["right"].set_visible(False)
+        ax_map.spines["bottom"].set_visible(False)
+        ax_map.spines["left"].set_visible(False)
+        # ax4 should not be visible
+        ax4.axis("off")
     # Add colorbar label
     # cbar_ax.set_xlabel("Correlation Coefficient", labelpad=3, size="small")
     cbar_ax.set_title("Correlation Coefficient", loc="left", size="small")
-    ax_heatmap.set_xticklabels(
-        ax_heatmap.get_xticklabels(), size="x-small", rotation=0, fontsize=5
-    )
+    ax_heatmap.set_xticklabels(ax_heatmap.get_xticklabels(), size="x-small", rotation=0, fontsize=5)
     ax_heatmap.set_yticklabels(ax_heatmap.get_yticklabels(), size="x-small", fontsize=5)
     ax_heatmap.set_xlabel("")
     ax_heatmap.set_ylabel(" ")
@@ -151,12 +157,13 @@ def plot_feature_corr_by_time(df, **kwargs):
     cbar_ax.tick_params(axis="both", which="major", labelsize=5)
     _country = country.title().replace("_", " ")
+    _region_name = region_name if not national_correlation else ""
     _crop = crop.title().replace("_", " ")
     if not national_correlation:
         fname = f"{country}_{crop}_{id}_corr_feature_by_time.png"
     else:
         fname = f"{country}_{crop}_corr_feature_by_time.png"
-    ax_heatmap.set_title(f"{_country}\n{_crop}")
+    ax_heatmap.set_title(f"{_country}, {_crop}\n{_region_name}")
     # plt.tight_layout()
     os.makedirs(dir_output, exist_ok=True)
@@ -246,14 +253,14 @@ def all_correlated_feature_by_time(df, **kwargs):
     Returns:
     """
-    THRESHOLD = 0.1
     national_correlation = kwargs.get("national_correlation")
     group_by = kwargs.get("groupby")
     combined_dict = kwargs.get("combined_dict")
+    THRESHOLD = kwargs.get("correlation_threshold")
     dict_selected_features = {}
     dict_best_cei = {}
-    breakpoint()
     if not national_correlation:
         groups = df.groupby(group_by)
         for region_id, group in tqdm(
@@ -297,6 +304,8 @@ def all_correlated_feature_by_time(df, **kwargs):
                 )
                 kwargs["region_id"] = region_id
+                _region_names = ", ".join([str(x) for x in group['Region'].unique()])
+                kwargs["region_name"] = _region_names
                 plot_feature_corr_by_time(df_tmp, **kwargs)
                 # For each element in dict_best_cei, add the type of the cei
             else:

geocif 0.1.45__tar.gz → 0.1.47__tar.gz

geocif 0.1.45tar.gz → 0.1.47tar.gz