PyPI - geocif - Versions diffs - 0.1.46__tar.gz → 0.1.48__tar.gz - Mend

geocif 0.1.46tar.gz → 0.1.48tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

{geocif-0.1.46/geocif.egg-info → geocif-0.1.48}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: geocif
-Version: 0.1.46
+Version: 0.1.48
 Summary: Models to visualize and forecast crop conditions and yields
 Home-page: https://ritviksahajpal.github.io/yield_forecasting/
 Author: Ritvik Sahajpal

{geocif-0.1.46 → geocif-0.1.48}/geocif/analysis.py RENAMED Viewed

@@ -162,8 +162,8 @@ class Geoanalysis:
             return pd.DataFrame(), pd.DataFrame()
         df_metrics = self._compute_metrics(df)
-        # df_metrics = self._process_metrics(df_metrics)
-        # self._plot_metrics(df_metrics)
+        df_metrics = self._process_metrics(df_metrics)
+        self._plot_metrics(df_metrics)
         df_regional_metrics_by_year = self._compute_regional_metrics(
             df, by="Harvest Year"
@@ -172,8 +172,10 @@ class Geoanalysis:
             df_regional_metrics_by_year
         )
         df_regional_metrics = self._average_mape(df_regional_metrics_by_year)
-        breakpoint()
-        self._store_results(None, df_regional_metrics, df_regional_metrics_by_year)
+        self._store_results(
+            df_metrics, df_regional_metrics, df_regional_metrics_by_year
+        )
         df_national_yield = self._compute_national_yield(df)
         self._plot_national_yield(df_national_yield)
@@ -193,7 +195,7 @@ class Geoanalysis:
             .apply(self.annual_metrics)
             .reset_index()
         )
-        breakpoint()
         return df_metrics.pivot_table(
             index=["Country", "Model", "Harvest Year", "Stage Name", "Stage Range"],
             columns="level_5",

{geocif-0.1.46 → geocif-0.1.48}/geocif/experiments.py RENAMED Viewed

@@ -85,9 +85,7 @@ def run(path_config_files=[Path("../config/geocif.txt")]):
     # Experiment: lag_years
     logger.info("Experiment 3: lag_years")
-    parser = main(
-        inputs, logger, parser, "ML", "lag_years", "int", [1, 2, 3, 4, 5]
-    )
+    parser = main(inputs, logger, parser, "ML", "lag_years", "int", [1, 2, 3, 4, 5])
     # Experiment: lag_yield_as_feature
     logger.info("Experiment 4: lag_yield_as_feature")
@@ -103,9 +101,7 @@ def run(path_config_files=[Path("../config/geocif.txt")]):
     # Experiment: median_years
     logger.info("Experiment 5: median_years")
-    parser = main(
-        inputs, logger, parser, "ML", "median_years", "int", [2, 3, 4, 5]
-    )
+    parser = main(inputs, logger, parser, "ML", "median_years", "int", [2, 3, 4, 5])
     # Experiment: median_yield_as_feature
     logger.info("Experiment 6: median_yield_as_feature")
@@ -133,9 +129,7 @@ def run(path_config_files=[Path("../config/geocif.txt")]):
     # Experiment: optimize
     logger.info("Experiment 8: optimize")
-    parser = main(
-        inputs, logger, parser, "DEFAULT", "optimize", "bool", [True, False]
-    )
+    parser = main(inputs, logger, parser, "DEFAULT", "optimize", "bool", [True, False])
 if __name__ == "__main__":

{geocif-0.1.46 → geocif-0.1.48}/geocif/geocif.py RENAMED Viewed

@@ -11,7 +11,6 @@ import geopandas as gp
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
-import sklearn
 from tqdm import tqdm
 from geocif import logger as log
@@ -28,7 +27,6 @@ from .ml import trend
 from .ml import xai
 plt.style.use("default")
-sklearn.set_config(transform_output="pandas")
 import warnings
@@ -108,7 +106,6 @@ class Geocif:
                                 Config file: ML
         ====================================================================
         """
-        self.use_ceis = ast.literal_eval(self.parser.get("ML", "use_ceis"))
         self.model_type = self.parser.get("ML", "model_type")
         self.fraction_simulate = self.parser.getint("ML", "fraction_simulate")
         self.analogous_year_yield_as_feature = self.parser.getboolean(
@@ -117,10 +114,10 @@ class Geocif:
         self.plot_map_for_correlation_plot = self.parser.getboolean(
             "ML", "plot_map_for_correlation_plot"
         )
-        self.correlation_threshold = self.parser.getfloat(
-            "ML", "correlation_threshold"
+        self.correlation_threshold = self.parser.getfloat("ML", "correlation_threshold")
+        self.include_lat_lon_as_feature = self.parser.getboolean(
+            "ML", "include_lat_lon_as_feature"
         )
-        self.include_lat_lon = self.parser.getboolean("ML", "include_lat_lon")
         self.spatial_autocorrelation = self.parser.getboolean(
             "ML", "spatial_autocorrelation"
         )
@@ -153,6 +150,9 @@ class Geocif:
             self.parser.get("ML", "cat_features")
         )
+        self.use_cumulative_features = self.parser.getboolean(
+            "DEFAULT", "use_cumulative_features"
+        )
         """
         ====================================================================
                                 Variables, Paths
@@ -198,6 +198,9 @@ class Geocif:
         self.db_path = self.dir_db / self.db_forecasts
+        # Store config file in database
+        output.config_to_db(self.db_path, self.parser, self.today)
         # self.pickle_file = self.base_dir / self.parser.get("outlook", "pickle_file")
         # obj_pickle = outlook.Outlook(self.pickle_file)
         # self.df_outlook = obj_pickle.read_outlook_file()
@@ -224,18 +227,29 @@ class Geocif:
         y_train = df_region[target_col]
         if self.ml_model:
-            self.logger.info(f"Selecting features for {self.country} {self.crop}")
-            selector, _, self.selected_features = fs.select_features(
-                X_train, y_train, method=self.feature_selection
-            )
-            self.logger.info(f"Selected features: {self.selected_features}")
+            if self.model_name in ["cumulative_1", "cumulative_2", "cumulative_3"]:
+                all_features = X_train.columns
+                # Select the columns with use_ceis in it
+                self.selected_features = [
+                    column
+                    for column in all_features
+                    if any(cei in column for cei in self.use_ceis)
+                ]
+            else:
+                self.logger.info(f"Selecting features for {self.country} {self.crop}")
+                selector, _, self.selected_features = fs.select_features(
+                    X_train, y_train, method=self.feature_selection
+                )
+                self.logger.info(f"Selected features: {self.selected_features}")
             """ Update model to include conformal estimates """
-            if "lat" not in self.selected_features and self.include_lat_lon:
+            if "lat" not in self.selected_features and self.include_lat_lon_as_feature:
                 self.selected_features.append("lat")
-            if "lon" not in self.selected_features and self.include_lat_lon:
+            if "lon" not in self.selected_features and self.include_lat_lon_as_feature:
                 self.selected_features.append("lon")
             X_train = df_region[self.selected_features + self.cat_features]
             dir_output = (
                 self.dir_analysis
                 / self.country
@@ -288,6 +302,8 @@ class Geocif:
                         verbose=False,
                         # callbacks=[TQDMCallback(self.best_hyperparams["iterations"])],
                     )
+                elif self.model_name == "oblique":
+                    self.model.fit(X_train, y_train)
                 elif self.model_name == "geospaNN":
                     self.model.fit(
                         X_train,
@@ -312,8 +328,38 @@ class Geocif:
                     self.best_hyperparams = {}
                 elif self.model_name in ["cubist"]:
                     self.model.fit(X_train, y_train)
-            except:
-                self.logger.error(f"Error fitting model for {self.country} {self.crop}")
+                elif self.model_name in [
+                    "cumulative_1",
+                    "cumulative_2",
+                    "cumulative_3",
+                ]:
+                    from sklearn.preprocessing import StandardScaler, LabelEncoder
+                    # Standardize the numeric features
+                    scaler = StandardScaler()
+                    X_numeric = X_train.iloc[:, :3]
+                    X_scaled_numeric = pd.DataFrame(
+                        scaler.fit_transform(X_numeric),
+                        columns=X_numeric.columns,
+                        index=X_train.index,
+                    )
+                    # Encode the Region as categorical
+                    le = LabelEncoder()
+                    X_region = pd.Series(
+                        le.fit_transform(X_train["Region"]),
+                        name="Region",
+                        index=X_train.index,
+                    )
+                    # Combine scaled numeric features and encoded region
+                    X_train_scaled = pd.concat([X_scaled_numeric, X_region], axis=1)
+                    self.model.fit(X_train_scaled, y_train)
+            except Exception as e:
+                self.logger.error(
+                    f"Error fitting model for {self.country} {self.crop} {e}"
+                )
     def predict(self, df_region, scaler=None):
         """
@@ -360,6 +406,33 @@ class Geocif:
                     X_test, Z_test, clusters_test.astype("object")
                 )
                 best_hyperparameters = self.model.fe_model.get_params().copy()
+            elif self.model_name in ["cumulative_1", "cumulative_2", "cumulative_3"]:
+                from sklearn.preprocessing import StandardScaler, LabelEncoder
+                # Standardize the numeric features
+                scaler = StandardScaler()
+                X_numeric = X_test.iloc[:, :3]
+                try:
+                    X_scaled_numeric = pd.DataFrame(
+                        scaler.fit_transform(X_numeric),
+                        columns=X_numeric.columns,
+                        index=X_test.index,
+                    )
+                except:
+                    breakpoint()
+                # Encode the Region as categorical
+                le = LabelEncoder()
+                X_region = pd.Series(
+                    le.fit_transform(X_test["Region"]),
+                    name="Region",
+                    index=X_test.index,
+                )
+                # Combine scaled numeric features and encoded region
+                X_test_scaled = pd.concat([X_scaled_numeric, X_region], axis=1)
+                y_pred = self.model.predict(X_test_scaled)
+                best_hyperparameters = {}  # self.model.get_params().copy()
             elif self.model_name == "geospaNN":
                 import torch
                 import geospaNN
@@ -501,7 +574,9 @@ class Geocif:
             "Crop",
             "Harvest Year",
             "Stage Name",
+            "Time",
         ]
         df.index = df.apply(
             lambda row: "_".join([str(row[col]) for col in index_columns]), axis=1
         )
@@ -513,28 +588,37 @@ class Geocif:
     def create_feature_names(self, stages_features, selected_features):
         """
+        Create feature names for machine learning stages.
         Args:
-            stages_features:
-            selected_features:
+            stages_features (list): List of features for different stages.
+            selected_features (dict): Dictionary of selected features.
         Returns:
+            None
         """
+        # Assert stages_features is a list
+        assert isinstance(stages_features, list), "stages_features should be a list"
         # Clear out feature names
         self.feature_names = []
-        """ Select stages that will be used for ML
+        """
+        Select stages that will be used for ML
          1. method = "latest" - Select the latest stage
          2. method = "fraction" - Select a fraction (1-100) of all stages
         """
+        method = "fraction"
+        if self.model_name in ["cumulative_1", "cumulative_2", "cumulative_3"]:
+            method = "latest"
         stages_features = stages.select_stages_for_ml(
-            stages_features, method="fraction", n=60
+            stages_features, method=method, n=60
         )
         for stage in stages_features:
             # Convert each element of stage to str and join with _
-            _stage = "_".join([str(x) for x in stage])
+            _stage = "_".join(map(str, stage))
             # Create a list appending _stage to each element of combined_keys
             _tmp = [f"{col}_{_stage}" for col in self.combined_keys]
@@ -543,17 +627,33 @@ class Geocif:
                 parts = _t.split("_")
                 cei = parts[0] if parts[1].isdigit() else "_".join(parts[:2])
-                # Check if any element of dict_selected_features is in _t
-                for x in selected_features["CEI"].values:
-                    if x not in cei:
-                        continue
-                    dict_fn = stages.get_stage_information_dict(_t, self.method)
-                    tmp_col = dict_fn["CEI"] + " " + dict_fn["Stage Name"]
-                    if tmp_col in self.df_train.columns:
-                        self.feature_names.append(tmp_col)
+                try:
+                    if self.model_name in [
+                        "cumulative_1",
+                        "cumulative_2",
+                        "cumulative_3",
+                    ]:
+                        dict_fn = stages.get_stage_information_dict(_t, self.method)
+                        tmp_col = f"{dict_fn['CEI']}"
+                        if tmp_col in self.df_train.columns:
+                            self.feature_names.append(tmp_col)
+                    else:
+                        # Check if any element of dict_selected_features is in _t
+                        if selected_features["CEI"].any():
+                            for x in selected_features["CEI"].values:
+                                if x not in cei:
+                                    continue
+                                dict_fn = stages.get_stage_information_dict(
+                                    _t, self.method
+                                )
+                                tmp_col = f"{dict_fn['CEI']} {dict_fn['Stage Name']}"
+                                if tmp_col in self.df_train.columns:
+                                    self.feature_names.append(tmp_col)
+                except:
+                    breakpoint()
         self.feature_names = list(set(self.feature_names))
         if self.median_yield_as_feature:
@@ -565,16 +665,14 @@ class Geocif:
                 self.feature_names.append(f"t -{i} {self.target}")
         if self.analogous_year_yield_as_feature:
-            self.feature_names.append("Analogous Year")
-            self.feature_names.append("Analogous Year Yield")
+            self.feature_names.extend(["Analogous Year", "Analogous Year Yield"])
         if self.use_outlook_as_feature:
             self.feature_names.append("FCST")
         # Add lat and lon to feature names
-        if self.include_lat_lon:
-            self.feature_names.append("lat")
-            self.feature_names.append("lon")
+        if self.include_lat_lon_as_feature:
+            self.feature_names.extend(["lat", "lon"])
         self.selected_features = []
@@ -598,6 +696,8 @@ class Geocif:
         for idx, region in enumerate(pbar):
             if self.model_name in ["linear", "gam"]:
                 self.create_feature_names(stages, dict_best_cei[region][0:3].tolist())
+            elif self.model_name in ["cumulative_1", "cumulative_2", "cumulative_3"]:
+                self.create_feature_names(stages, {})
             elif self.ml_model:
                 self.create_feature_names(stages, dict_selected_features[region])
             elif self.model_name in ["median"]:
@@ -727,11 +827,52 @@ class Geocif:
         parts = all_cei_columns[-1].split("_")
         cei = parts[0] if parts[1].isdigit() else "_".join(parts[:2])
-        # HACK: Get feature name with GD4 in it to extract first and last stage id and name
-        cei_column = df[df.columns[df.columns.str.contains(cei)]].columns
-        # Select the longest string in cei_column
-        cei_col = max(cei_column, key=len)
-        self.stage_info = stages.get_stage_information_dict(cei_col, self.method)
+        # For each region, find the column with the longest string in cei_column
+        group_by = ["Region"]
+        groups = df.groupby(group_by)
+        if self.use_cumulative_features:
+            frames = []
+            for name, group in groups:
+                # Drop columns with all NaNs
+                group.dropna(axis=1, how="all", inplace=True)
+                cei_column = group[
+                    group.columns[group.columns.str.contains(cei)]
+                ].columns
+                max_cei_col = max(cei_column, key=len)
+                self.stage_info = stages.get_stage_information_dict(
+                    max_cei_col, self.method
+                )
+                # Subset dataframes to columns that contain self.stage_info["Stage_ID"]
+                all_columns = group.columns[
+                    group.columns.str.contains(self.stage_info["Stage_ID"])
+                ].tolist()
+                group = group[
+                    self.fixed_columns
+                    + [self.target]
+                    + self.statistics_columns
+                    + all_columns
+                ]
+                # rename all_columns to self.stage_info["CEI"]
+                group.rename(
+                    columns={
+                        col: stages.get_stage_information_dict(col, self.method)["CEI"]
+                        for col in all_columns
+                    },
+                    inplace=True,
+                )
+                frames.append(group)
+            df = pd.concat(frames)
+        else:
+            # HACK: Get feature name with GD4 in it to extract first and last stage id and name
+            cei_column = df[df.columns[df.columns.str.contains(cei)]].columns
+            # Select the longest string in cei_column
+            cei_col = max(cei_column, key=len)
+            self.stage_info = stages.get_stage_information_dict(cei_col, self.method)
         # Change column name
         # e.g. 'vDTR_7_6_5_4_3_2_1_37_36_35_34_33_32_31' to 'vDTR Mar 1-Oct 27'
@@ -795,12 +936,14 @@ class Geocif:
         mask = self.df_results["Stage_ID"].isin(_stages)
         df = self.df_results[mask]
         """ Select which CEI categories to use for ML """
         if "all" in self.use_ceis:
             pass
         else:
-            df = df[df["Type"].isin(self.use_ceis)]
+            if self.select_cei_by == "Type":
+                df = df[df["Type"].isin(self.use_ceis)]
+            elif self.select_cei_by == "Index":
+                df = df[df["Index"].isin(self.use_ceis)]
         """ Convert this dataframe into an ML ready format and save to disk """
         df = self.create_ml_dataframe(df)
@@ -874,6 +1017,8 @@ class Geocif:
         if self.spatial_autocorrelation:
             sa.compute_spatial_autocorrelation(self.df_results, **dict_kwargs)
+        dict_selected_features = {}
+        dict_best_cei = {}
         if self.correlation_plots:
             self.logger.info(f"Correlation plot for {self.country} {self.crop}")
             (
@@ -949,6 +1094,8 @@ class Geocif:
         self.model_name = model
         self.experiment_name = self.parser.get("ML", "experiment_name")
         self.ml_model = self.parser.getboolean(self.model_name, "ML_model")
+        self.select_cei_by = self.parser.get(self.model_name, "select_cei_by")
+        self.use_ceis = ast.literal_eval(self.parser.get(self.model_name, "use_ceis"))
         self.model_names = ast.literal_eval(self.parser.get(self.country, "models"))
         self.optimize = self.parser.getboolean(self.country, "optimize")
         self.fraction_loocv = self.parser.getfloat(self.country, "fraction_loocv")
@@ -960,6 +1107,21 @@ class Geocif:
             self.estimate_ci = False
             self.check_yield_trend = False
             self.estimate_ci_for_all = False
+        elif self.model_name in ["cumulative_1", "cumulative_2", "cumulative_3"]:
+            self.correlation_plots = False
+            self.lag_yield_as_feature = False
+            self.median_yield_as_feature = False
+            self.median_area_as_feature = False
+            self.analogous_year_yield_as_feature = False
+            self.last_year_yield_as_feature = False
+            self.include_lat_lon_as_feature = False
+            self.do_xai = False
+            self.estimate_ci = False
+            self.estimate_ci_for_all = False
+            self.check_yield_trend = False
+            self.cluster_strategy = "single"
+            self.select_cei_by = "Index"
+            self.use_cumulative_features = True
         else:
             self.do_xai = self.parser.getboolean("ML", "do_xai")
             self.estimate_ci = self.parser.getboolean("ML", "estimate_ci")

{geocif-0.1.46 → geocif-0.1.48}/geocif/indices_runner.py RENAMED Viewed

@@ -173,8 +173,8 @@ class cei_runner(base.BaseGeo):
             or "south_africa_maize" in i[3]
             or "mozambique_maize" in i[3]
             or "united_states_of_america" in i[3]
-               or "russian_federation" in i[3]
-               or "ukraine" in i[3]
+            or "russian_federation" in i[3]
+            or "ukraine" in i[3]
         ]
         #                 "malawi" in i[2]]

{geocif-0.1.46 → geocif-0.1.48}/geocif/indices_runner_v2.py RENAMED Viewed

@@ -47,7 +47,7 @@ class cei_runner(base.BaseGeo):
         self.dir_input = Path(self.parser.get("PATHS", "dir_input"))
         self.base_dir = Path(
-            r"D:\Users\ritvik\projects\GEOGLAM\Output\countries\afghanistan"
+            r"D:\Users\ritvik\projects\GEOGLAM\Output\countries\illinois"
         )  # Path(self.parser.get("PATHS", "dir_crop_inputs"))
         self.do_parallel = self.parser.getboolean("DEFAULT", "do_parallel")
@@ -164,7 +164,7 @@ class cei_runner(base.BaseGeo):
         # Only keep those entries in combinations where the third elemt is
         # mozambique, south_africa, angola or dem_people's_rep_of_korea
         # This is done to test the code for these countries
-        combinations = [i for i in combinations if "afghanistan_maize_s1" in i[3]]
+        combinations = [i for i in combinations if "illinois_maize_s1" in i[3]]
         if True:
             num_cpu = int(cpu_count() * 0.5)

{geocif-0.1.46 → geocif-0.1.48}/geocif/ml/correlations.py RENAMED Viewed

@@ -157,8 +157,8 @@ def plot_feature_corr_by_time(df, **kwargs):
     cbar_ax.tick_params(axis="both", which="major", labelsize=5)
     _country = country.title().replace("_", " ")
-    _region_name = region_name.replace("_", " ") if not national_correlation else ""
-    _crop = "Poppy"  # crop.title().replace("_", " ")
+    _region_name = region_name if not national_correlation else ""
+    _crop = crop.title().replace("_", " ")
     if not national_correlation:
         fname = f"{country}_{crop}_{id}_corr_feature_by_time.png"
     else:
@@ -304,7 +304,7 @@ def all_correlated_feature_by_time(df, **kwargs):
                 )
                 kwargs["region_id"] = region_id
-                _region_names = "_".join([str(x) for x in group['Region'].unique()])
+                _region_names = ", ".join([str(x) for x in group['Region'].unique()])
                 kwargs["region_name"] = _region_names
                 plot_feature_corr_by_time(df_tmp, **kwargs)
                 # For each element in dict_best_cei, add the type of the cei

geocif-0.1.48/geocif/ml/misc.py ADDED Viewed

@@ -0,0 +1,33 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+from taipy.gui import Gui
+# Load the dataset
+file_path = r'D:\Users\ritvik\projects\GEOGLAM\Output\ml\analysis\July_05_2024\russian_federation\maize\cumulative_1\2010\X_train_1.csv'  # Update with the correct file path
+df = pd.read_csv(file_path)
+print(df.head())
+# Define a function to create the plot
+def plot_auc_ndvi(data):
+    fig, ax = plt.subplots(figsize=(14, 8))
+    sns.lineplot(data=data, x="Harvest Year", y="AUC_NDVI Oct 7-Mar 25", hue="Region", marker="o", ax=ax)
+    ax.set_title("Trends of AUC_NDVI by Region (Oct 7 - Mar 25)")
+    ax.set_xlabel("Harvest Year")
+    ax.set_ylabel("AUC_NDVI Oct 7 - Mar 25")
+    ax.legend(title="Region", bbox_to_anchor=(1.05, 1), loc='upper left')
+    plt.show()
+    return fig
+# Create the plot and save it
+plot_fig = plot_auc_ndvi(df)
+# Define the Taipy page with the plot
+page = """
+# Trends of AUC_NDVI by Region
+<|{plot_fig}|chart|>
+"""
+# Create and run the GUI
+gui = Gui(page)
+gui.run()

{geocif-0.1.46 → geocif-0.1.48}/geocif/ml/output.py RENAMED Viewed

@@ -107,7 +107,6 @@ def store(db_path, experiment_id, df, model, model_name):
     try:
         utils.to_db(db_path, experiment_id, df)
     except Exception as e:
-        breakpoint()
         print(f"Error: {e}")
     index_columns = ["Country", "Region", "Crop", "Harvest Year", "Stages"]
@@ -128,7 +127,6 @@ def store(db_path, experiment_id, df, model, model_name):
         df_model.index.set_names(["Index"], inplace=True)
         utils.to_db(db_path, "models", df_model)
     except Exception as e:
-        breakpoint()
         print(f"Error: {e}")
     con.commit()

{geocif-0.1.46 → geocif-0.1.48}/geocif/ml/stages.py RENAMED Viewed

@@ -1,4 +1,5 @@
 import numpy as np
+from typing import Union
 from geocif import utils
@@ -277,23 +278,31 @@ def update_feature_names(df, method):
     return df
-def convert_stage_string(stage_info, to_array=True):
+def convert_stage_string(stage_info: Union[str, np.ndarray], to_array: bool = True) -> Union[np.ndarray, str]:
     """
-    e.g. input: '13_12_11'
-    output: array([13, 12, 11])
-    or vice versa if to_array = False
+    Converts a string of stage information to a numpy array or vice versa.
     Args:
-        stage_info:
-        to_array:
+        stage_info: A string of stages separated by underscores or a numpy array of stages e.g. '13_12_11'
+        to_array: A boolean indicating the direction of conversion. If True, converts string to numpy array e.g. array([13, 12, 11])
+                  If False, converts numpy array to string.
     Returns:
+        A numpy array of stages if to_array is True, or a string of stages if to_array is False.
+    Raises:
+        ValueError: If the input format is incorrect.
     """
     if to_array:
-        stages = stage_info.split("_")
-        stages = np.array([int(stage) for stage in stages])
+        if not isinstance(stage_info, str):
+            raise ValueError("Expected a string for stage_info when to_array is True.")
+        try:
+            stages = np.array([int(stage) for stage in stage_info.split("_")])
+        except ValueError:
+            raise ValueError("Stage info string should contain integers separated by underscores.")
     else:
-        stages = "_".join(stage_info.astype(str))
+        if not isinstance(stage_info, np.ndarray):
+            raise ValueError("Expected a numpy array for stage_info when to_array is False.")
+        stages = "_".join(map(str, stage_info))
     return stages

geocif 0.1.46__tar.gz → 0.1.48__tar.gz

geocif 0.1.46tar.gz → 0.1.48tar.gz