PyPI - geocif - Versions diffs - 0.1.48__tar.gz → 0.1.50__tar.gz - Mend

geocif 0.1.48tar.gz → 0.1.50tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

{geocif-0.1.48/geocif.egg-info → geocif-0.1.50}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: geocif
-Version: 0.1.48
+Version: 0.1.50
 Summary: Models to visualize and forecast crop conditions and yields
 Home-page: https://ritviksahajpal.github.io/yield_forecasting/
 Author: Ritvik Sahajpal

{geocif-0.1.48 → geocif-0.1.50}/geocif/analysis.py RENAMED Viewed

@@ -163,6 +163,7 @@ class Geoanalysis:
         df_metrics = self._compute_metrics(df)
         df_metrics = self._process_metrics(df_metrics)
         self._plot_metrics(df_metrics)
         df_regional_metrics_by_year = self._compute_regional_metrics(
@@ -183,6 +184,11 @@ class Geoanalysis:
         return df_metrics, df_regional_metrics, df_national_yield
     def _clean_data(self):
+        # Hack exclude 2012 if country == "illinois"
+        if self.country == "illinois":
+            self.df_analysis = self.df_analysis[
+                self.df_analysis["Harvest Year"] != 2012
+            ]
         # Remove rows with missing values in Observed Yield (tn per ha)
         return self.df_analysis.dropna(subset=["Observed Yield (tn per ha)"])
@@ -196,11 +202,12 @@ class Geoanalysis:
             .reset_index()
         )
-        return df_metrics.pivot_table(
-            index=["Country", "Model", "Harvest Year", "Stage Name", "Stage Range"],
-            columns="level_5",
-            values=0,
-        ).reset_index()
+        #return df_metrics.pivot_table(
+        #    index=["Country", "Model", "Harvest Year", "Stage Name", "Stage Range"],
+        #    columns="level_5",
+        #    values=0,
+        #).reset_index()
+        return df_metrics
     def _process_metrics(self, df_metrics):
         # Assign each unique Stage Name a unique integer identifier

{geocif-0.1.48 → geocif-0.1.50}/geocif/geocif.py RENAMED Viewed

@@ -82,6 +82,13 @@ class Geocif:
         self.today_full = self._date.format("MMMM_DD_YYYY_HH_mm")
         self.df_forecast = pd.DataFrame()
+        """
+        ====================================================================
+                                        Config file: Logging
+        ====================================================================
+        """
+        self.log_level = self.parser.get("LOGGING", "log_level")
         """
         ====================================================================
                                 Config file: Default
@@ -198,9 +205,6 @@ class Geocif:
         self.db_path = self.dir_db / self.db_forecasts
-        # Store config file in database
-        output.config_to_db(self.db_path, self.parser, self.today)
         # self.pickle_file = self.base_dir / self.parser.get("outlook", "pickle_file")
         # obj_pickle = outlook.Outlook(self.pickle_file)
         # self.df_outlook = obj_pickle.read_outlook_file()
@@ -221,6 +225,9 @@ class Geocif:
             f"Detrended {self.target}" if self.check_yield_trend else self.target
         )
+        # Drop rows where target_col is NaN
+        df_region = df_region.dropna(subset=[target_col])
         X_train = df_region[self.feature_names]
         # Drop any columns with NaNs
         X_train = X_train.dropna(axis=1, how="any")
@@ -280,7 +287,7 @@ class Geocif:
                 X_train_scaled,
                 y_train,
                 feature_names=self.selected_features,
-                target_col=self.target,
+                target_col=target_col,
                 optimize=self.optimize,
                 fraction_loocv=self.fraction_loocv,
                 cat_features=self.cat_features,
@@ -302,8 +309,13 @@ class Geocif:
                         verbose=False,
                         # callbacks=[TQDMCallback(self.best_hyperparams["iterations"])],
                     )
-                elif self.model_name == "oblique":
+                elif self.model_name in ["oblique"]:
                     self.model.fit(X_train, y_train)
+                elif self.model_name == "ydf":
+                    # Combine X_train and y_train
+                    df_train = pd.concat([X_train, y_train], axis=1)
+                    self.model = self.model.train(df_train)
                 elif self.model_name == "geospaNN":
                     self.model.fit(
                         X_train,
@@ -335,9 +347,16 @@ class Geocif:
                 ]:
                     from sklearn.preprocessing import StandardScaler, LabelEncoder
+                    if self.model_name == "cumulative_1":
+                        num_columns = 1
+                    elif self.model_name == "cumulative_2":
+                        num_columns = 2
+                    elif self.model_name == "cumulative_3":
+                        num_columns = 3
                     # Standardize the numeric features
                     scaler = StandardScaler()
-                    X_numeric = X_train.iloc[:, :3]
+                    X_numeric = X_train.iloc[:, :num_columns]
                     X_scaled_numeric = pd.DataFrame(
                         scaler.fit_transform(X_numeric),
                         columns=X_numeric.columns,
@@ -409,9 +428,16 @@ class Geocif:
             elif self.model_name in ["cumulative_1", "cumulative_2", "cumulative_3"]:
                 from sklearn.preprocessing import StandardScaler, LabelEncoder
+                if self.model_name == "cumulative_1":
+                    num_columns = 1
+                elif self.model_name == "cumulative_2":
+                    num_columns = 2
+                elif self.model_name == "cumulative_3":
+                    num_columns = 3
                 # Standardize the numeric features
                 scaler = StandardScaler()
-                X_numeric = X_test.iloc[:, :3]
+                X_numeric = X_test.iloc[:, :num_columns]
                 try:
                     X_scaled_numeric = pd.DataFrame(
                         scaler.fit_transform(X_numeric),
@@ -455,7 +481,9 @@ class Geocif:
                     self.selected_features + self.cat_features + [self.target]
                 ]
                 w_train = data_train.y - self.estimate(data_train.x)
+            elif self.model_name == "ydf":
+                y_pred = self.model.evaluate(X_test)
+                best_hyperparameters = {}
             else:
                 y_pred = self.model.predict(X_test)
                 best_hyperparameters = self.model.get_params().copy()
@@ -468,8 +496,8 @@ class Geocif:
                 obj_trend = trend.DetrendedData(
                     df_tmp[f"Detrended {self.target}"],
-                    df_tmp["Detrend Model"],
-                    df_tmp["Detrend Model Type"],
+                    df_tmp["Detrended Model"],
+                    df_tmp["Detrended Model Type"],
                 )
                 # Retrend the predicted yield
@@ -477,6 +505,8 @@ class Geocif:
                     obj_trend, df_region.iloc[idx][["Harvest Year"]]
                 )[0]
+                df_region.loc[idx, "Detrended Model Type"] = obj_trend.model_type.unique()[0]
         # Create a dataframe with forecast results
         shp = len(X_test)
         experiment_id = f"{self.country}_{self.crop}"
@@ -530,7 +560,6 @@ class Geocif:
         if self.check_yield_trend:
             df.loc[:, "Detrended Model Type"] = df_region["Detrended Model Type"].values
-            df.loc[:, "Detrended Model"] = df_region["Detrended Model"].values
         if self.last_year_yield_as_feature:
             # Add last year yield to dataframe
@@ -729,7 +758,7 @@ class Geocif:
                 + ["Region_ID"]
             )
             if self.check_yield_trend:
-                common_columns += ["Detrended Model Type", "Detrended Model"]
+                common_columns += [f"Detrended {self.target}", "Detrended Model Type", "Detrended Model"]
             if self.last_year_yield_as_feature:
                 common_columns += [f"Last Year {self.target}"]
@@ -738,11 +767,15 @@ class Geocif:
             # Filter dataframe based on region and self.feature_names
             df_region_train = self.df_train[mask_train]
             df_region_train = df_region_train[self.fixed_columns + common_columns]
+            df_region_train.reset_index(drop=True, inplace=True)
             self.train(df_region_train, scaler)
             """ Predict """
+            if self.check_yield_trend:
+                common_columns = common_columns[:-3]
             df_region_test = self.df_test[mask_test]
             df_region_test = df_region_test[self.fixed_columns + common_columns]
+            df_region_test.reset_index(drop=True, inplace=True)
             experiment_id, df = self.predict(df_region_test, scaler)
             # df.reset_index(inplace=True)
@@ -849,12 +882,15 @@ class Geocif:
                     group.columns.str.contains(self.stage_info["Stage_ID"])
                 ].tolist()
-                group = group[
-                    self.fixed_columns
-                    + [self.target]
-                    + self.statistics_columns
-                    + all_columns
-                ]
+                try:
+                    group = group[
+                        self.fixed_columns
+                        + [self.target]
+                        + self.statistics_columns
+                        + all_columns
+                    ]
+                except:
+                    continue
                 # rename all_columns to self.stage_info["CEI"]
                 group.rename(
                     columns={
@@ -897,12 +933,12 @@ class Geocif:
         if self.lag_yield_as_feature:
             df = fe.compute_lag_yield(
-                df, self.all_seasons_with_yield, self.number_lag_years
+                df, self.all_seasons_with_yield, self.number_lag_years, self.target
             )
         if self.analogous_year_yield_as_feature:
             df = fe.compute_analogous_yield(
-                df, self.all_seasons_with_yield, self.number_median_years
+                df, self.all_seasons_with_yield, self.number_median_years, self.target
             )
         # Create Region_ID column based on Region column category code
@@ -912,7 +948,7 @@ class Geocif:
         elif self.cluster_strategy == "individual":
             df["Region_ID"] = df["Region"].cat.codes
         elif self.cluster_strategy == "auto_detect":
-            clusters_assigned = fe.detect_clusters(df)
+            clusters_assigned = fe.detect_clusters(df, self.target)
             # Merge the cluster labels with the original DataFrame
             df = df.merge(clusters_assigned, on="Region")
@@ -1036,8 +1072,8 @@ class Geocif:
         """ Groupby Region column and compute detrended yield """
         self.df_train[f"Detrended {self.target}"] = np.NaN
-        self.df_train["Detrend Model"] = np.NaN
-        self.df_train["Detrend Model Type"] = np.NaN
+        self.df_train["Detrended Model"] = np.NaN
+        self.df_train["Detrended Model Type"] = np.NaN
         if self.check_yield_trend:
             group_by = ["Region"]
             groups = self.df_train.groupby(group_by)
@@ -1050,10 +1086,10 @@ class Geocif:
                         group.index, f"Detrended {self.target}"
                     ] = detrended_data.detrended_series
                     self.df_train.loc[
-                        group.index, "Detrend Model"
+                        group.index, "Detrended Model"
                     ] = detrended_data.trend_model
                     self.df_train.loc[
-                        group.index, "Detrend Model Type"
+                        group.index, "Detrended Model Type"
                     ] = detrended_data.model_type
         # 6. Exclude years without yields from df_train
@@ -1118,10 +1154,19 @@ class Geocif:
             self.do_xai = False
             self.estimate_ci = False
             self.estimate_ci_for_all = False
-            self.check_yield_trend = False
+            self.check_yield_trend = True
             self.cluster_strategy = "single"
             self.select_cei_by = "Index"
             self.use_cumulative_features = True
+        elif self.model_name in ["oblique", "ydf"]:
+            self.do_xai = False
+            self.estimate_ci = False
+            # Remove Region from cat_features as it is object type
+            self.cat_features = [col for col in self.cat_features if col != "Region"]
+            # if self.model_name == "ydf":
+            #     # HACK, for ydf model, target_col is Yield
+            #     self.df_results.rename(columns={self.target: "Yield"}, inplace=True)
+            #     self.target = "Yield"
         else:
             self.do_xai = self.parser.getboolean("ML", "do_xai")
             self.estimate_ci = self.parser.getboolean("ML", "estimate_ci")
@@ -1188,6 +1233,9 @@ class Geocif:
             self.dg["Country Region"] = (
                 self.dg["ADM0_NAME"] + " " + self.dg["ADM1_NAME"]
             )
+        elif self.country == "illinois":
+            self.dg["ADM0_NAME"] = "illinois"
+            self.dg["Country Region"] = self.dg["ADM0_NAME"] + " " + self.dg["NAME"]
         else:
             self.dg["Country Region"] = (
                 self.dg["ADM0_NAME"] + " " + self.dg["ADM2_NAME"]
@@ -1240,6 +1288,9 @@ class Geocif:
             # TODO ignore file with _2000 in its name
             all_files = [f for f in all_files if "_2000" not in f.name]
+            # Assert that all_files is not empty
+            assert all_files, f"No files found in {_dir_country} with {file_name}"
             self.df_results = pd.concat(
                 (pd.read_csv(f) for f in all_files), ignore_index=True
             )
@@ -1284,7 +1335,7 @@ def loop_execute(inputs):
     )
     with PyCallGraph(output=graphviz, config=config):
-        country, crop, season, model, logger, parser = inputs
+        country, crop, season, model, logger, parser, index = inputs
         logger.info("=====================================================")
         logger.info(f"\tStarting GEOCIF: {country} {crop} {season} {model}")
@@ -1293,6 +1344,11 @@ def loop_execute(inputs):
         obj = Geocif(logger=logger, parser=parser)
         obj.read_data(country, crop, season)
+        # Store config file in database, only execute this for
+        # the first iteration of the loop
+        if index == 0:
+            output.config_to_db(obj.db_path, obj.parser, obj.today)
         # Setup metadata and run ML code
         obj.setup(season, model)
         if obj.simulation_stages:
@@ -1336,7 +1392,7 @@ def execute_models(inputs, logger, parser):
     do_parallel = parser.getboolean("DEFAULT", "do_parallel")
     # Add logger and parser to each element in inputs
-    inputs = [item + [logger, parser] for item in inputs]
+    inputs = [item + [logger, parser, idx] for idx, item in enumerate(inputs)]
     if do_parallel:
         cpu_count = int(mp.cpu_count() * 0.3)

{geocif-0.1.48 → geocif-0.1.50}/geocif/logger.py RENAMED Viewed

@@ -71,8 +71,29 @@ class Logger:
         self.logger.error(msg)
+def get_logging_level(level):
+    """
+    Args:
+        level:
+    Returns:
+    """
+    if level == "DEBUG":
+        return logging.DEBUG
+    elif level == "INFO":
+        return logging.INFO
+    elif level == "WARNING":
+        return logging.WARNING
+    elif level == "ERROR":
+        return logging.ERROR
+    else:
+        return logging.INFO
 def setup_logger_parser(
-    path_config_file, name_project="geocif", name_file="ml", level=logging.DEBUG
+    path_config_file, name_project="geocif", name_file="ml"
 ):
     """
@@ -87,6 +108,8 @@ def setup_logger_parser(
     """
     parser = read_config(path_config_file)
     dir_log = parser.get("PATHS", "dir_log")
+    level = parser.get("LOGGING", "log_level")
+    level = get_logging_level(level)
     logger = Logger(
         dir_log=dir_log,

geocif-0.1.50/geocif/ml/aa.py ADDED Viewed

@@ -0,0 +1,28 @@
+import ydf
+import pandas as pd
+# Load dataset with Pandas
+ds_path = "https://raw.githubusercontent.com/google/yggdrasil-decision-forests/main/yggdrasil_decision_forests/test_data/dataset/"
+train_ds = pd.read_csv(ds_path + "adult_train.csv")
+test_ds = pd.read_csv(ds_path + "adult_test.csv")
+# Train a Gradient Boosted Trees model
+model = ydf.GradientBoostedTreesLearner(label="income").train(train_ds)
+# Look at a model (input features, training logs, structure, etc.)
+model.describe()
+# Evaluate a model (e.g. roc, accuracy, confusion matrix, confidence intervals)
+model.evaluate(test_ds)
+# Generate predictions
+model.predict(test_ds)
+# Analyse a model (e.g. partial dependence plot, variable importance)
+model.analyze(test_ds)
+# Benchmark the inference speed of a model
+model.benchmark(test_ds)
+# Save the model
+model.save("/tmp/my_model")

{geocif-0.1.48 → geocif-0.1.50}/geocif/ml/feature_selection.py RENAMED Viewed

@@ -131,6 +131,16 @@ def select_features(X, y, method="RFE", min_features_to_select=3):
         X_filtered = selector.fit_transform(X, y)
         selected_features = X_filtered.columns.tolist()
+    elif method == "mrmr":
+        from mrmr import mrmr_regression
+        try:
+            selected_features = mrmr_regression(X=X, y=y, K=10)
+        except:
+            breakpoint()
+        # combine X and y into a dataframe
+        # df = pd.concat([X, y], axis=1)
     elif method == "RFECV":
         from sklearn.feature_selection import RFECV
         from sklearn.model_selection import KFold

{geocif-0.1.48 → geocif-0.1.50}/geocif/ml/output.py RENAMED Viewed

@@ -109,21 +109,21 @@ def store(db_path, experiment_id, df, model, model_name):
     except Exception as e:
         print(f"Error: {e}")
-    index_columns = ["Country", "Region", "Crop", "Harvest Year", "Stages"]
-    # Output model pickle as a blob to database
-    df_model = pd.DataFrame(
-        {
-            "Experiment_ID": [experiment_id],
-            "Model": [model_name],
-            "Model_Blob": [pickle.dumps(model)],
-        }
-    )
-    # df_model.index = df_model.apply(
-    #     lambda row: "_".join([str(row[col]) for col in index_columns]), axis=1
-    # )
     # name the index level
     try:
+        index_columns = ["Country", "Region", "Crop", "Harvest Year", "Stages"]
+        # Output model pickle as a blob to database
+        df_model = pd.DataFrame(
+            {
+                "Experiment_ID": [experiment_id],
+                "Model": [model_name],
+                "Model_Blob": [pickle.dumps(model)],
+            }
+        )
+        # df_model.index = df_model.apply(
+        #     lambda row: "_".join([str(row[col]) for col in index_columns]), axis=1
+        # )
         df_model.index.set_names(["Index"], inplace=True)
         utils.to_db(db_path, "models", df_model)
     except Exception as e:

{geocif-0.1.48 → geocif-0.1.50}/geocif/ml/stats.py RENAMED Viewed

@@ -191,7 +191,12 @@ def add_statistics(
     """
     # First check if country and crop are in the admin_crop_production.csv file
-    fn = "afghanistan.csv" if country == "Afghanistan" else "adm_crop_production.csv"
+    if country == "Afghanistan":
+        fn = "afghanistan.csv"
+    elif country == "Illinois":
+        fn = "illinois.csv"
+    else:
+        fn = "adm_crop_production.csv"
     df_fewsnet = pd.read_csv(dir_stats / fn, low_memory=False)
     # HACK
@@ -206,6 +211,7 @@ def add_statistics(
         df = add_GEOGLAM_statistics(dir_stats, df, stats, method, admin_zone)
     else:
         group_by = ["Region", "Harvest Year"]
         groups = df.groupby(group_by)
         # Define processing for each group

{geocif-0.1.48 → geocif-0.1.50}/geocif/ml/trainers.py RENAMED Viewed

@@ -264,8 +264,7 @@ def auto_train(
         if model_name in ["catboost", "merf"]:
             hyperparams = {
                 "depth": 6,
-                "learning_rate": 0.01,
-                "iterations": 5000,
                 "subsample": 1.0,
                 "random_strength": 0.5,
                 "reg_lambda": 0.001,
@@ -283,18 +282,33 @@ def auto_train(
                 regr = CatBoostRegressor(**hyperparams, cat_features=cat_features)
                 model = MERF(regr, max_iterations=10)
         elif model_name == "oblique":
-            breakpoint()
-            from sktree.ensemble import ObliqueRandomForestRegressor
+            from treeple import ExtraObliqueRandomForestRegressor
+            # https://docs.neurodata.io/treeple/dev/modules/supervised_tree.html#oblique-trees
+            n_features = X_train.shape[1]
-            print("Training ObliqueRandomForestRegressor")
-            model = ObliqueRandomForestRegressor(
-                n_estimators=500,
-                max_depth=7,
+            model = ExtraObliqueRandomForestRegressor(
+                n_estimators=1500,
+                max_depth=20,
+                max_features=n_features**2,
+                feature_combinations=n_features,
                 n_jobs=-1,
-                verbose=2,
                 random_state=42,
             )
-            print("Finished training ObliqueRandomForestRegressor")
+        elif model_name == "ydf":
+            import ydf
+            templates = ydf.GradientBoostedTreesLearner.hyperparameter_templates()
+            model = ydf.GradientBoostedTreesLearner(
+                label=target_col,
+                task=ydf.Task.REGRESSION,
+                growing_strategy='BEST_FIRST_GLOBAL',
+                categorical_algorithm='RANDOM',
+                split_axis='SPARSE_OBLIQUE',
+                sparse_oblique_normalization='MIN_MAX',
+                sparse_oblique_num_projections_exponent=2.0)
+            hyperparams = templates["benchmark_rank1v1"]
         elif model_name == "linear":
             from sklearn.linear_model import LassoCV
@@ -308,24 +322,15 @@ def auto_train(
         elif model_name == "cumulative_1":
             from pygam import GAM, s, f, te
-            # compute index of column Region
-            region_idx = X_train.columns.get_loc("Region")
-            model = GAM(s(0) + f(region_idx))
+            model = GAM(s(0) + f(1))
         elif model_name == "cumulative_2":
             from pygam import GAM, s, f, te
-            # compute index of column Region
-            region_idx = X_train.columns.get_loc("Region")
-            model = GAM(s(0) + s(1) + te(0, 1) + f(region_idx))
+            model = GAM(s(0) + s(1) + te(0, 1) + f(2))
         elif model_name == "cumulative_3":
             from pygam import GAM, s, f, te
-            # compute index of column Region
-            region_idx = X_train.columns.get_loc("Region")
-            model = GAM(s(0) + s(1) + s(2) + te(0, 1) + te(0, 2) + te(1, 2) + f(region_idx))
+            model = GAM(s(0) + s(1) + s(2) + te(0, 1) + te(0, 2) + te(1, 2) + f(3))
         elif model_name == "geospaNN":
             import torch
             import geospaNN

{geocif-0.1.48 → geocif-0.1.50}/geocif/ml/trend.py RENAMED Viewed

@@ -1,4 +1,5 @@
 import numpy as np
+import pandas as pd
 from statsmodels.regression.linear_model import OLS
 from statsmodels.tools.tools import add_constant
@@ -6,7 +7,7 @@ from statsmodels.tools.tools import add_constant
 class DetrendedData:
     """
     A class to store the detrended series, the model used for detrending,
-    and the type of model ('mean', 'linear', 'quadratic').
+    and the type of model ('mean', 'linear', 'quadratic', 'difference').
     """
     def __init__(self, detrended_series, trend_model, model_type):
@@ -15,14 +16,16 @@ class DetrendedData:
         self.model_type = model_type
-def detrend_dataframe(df, column_name="y"):
+def detrend_dataframe(df, column_name="y", model_type="best"):
     """
-    Removes the trend from the specified column of a DataFrame using the method
-    (mean, linear, quadratic) that results in the lowest AIC value.
+    Removes the trend from the specified column of a DataFrame using the specified method
+    (mean, linear, quadratic, difference) or the method that results in the lowest AIC value.
     Parameters:
     - df: pandas DataFrame containing the time series data.
     - column_name: string name of the column to detrend.
+    - model_type: string specifying which model to use for detrending ('mean', 'linear',
+                  'quadratic', 'difference', or 'best' for automatic selection based on AIC).
     Returns:
     - DetrendedData object containing the detrended series, the statistical model,
@@ -41,16 +44,32 @@ def detrend_dataframe(df, column_name="y"):
     X_quad = add_constant(np.column_stack((df["t"], df["t"] ** 2)))
     quad_model = OLS(df[column_name], X_quad).fit()
-    models = {"mean": mean_model, "linear": linear_model, "quadratic": quad_model}
-    best_model_type = min(models, key=lambda x: models[x].aic)
+    # Differencing method
+    diff_series = df[column_name].diff().dropna()
+    diff_model = OLS(diff_series, np.ones(len(diff_series))).fit()
+    models = {
+        "mean": mean_model,
+        "linear": linear_model,
+        "quadratic": quad_model,
+        "difference": diff_model
+    }
+    if model_type == "best":
+        best_model_type = min(models, key=lambda x: models[x].aic)
+    else:
+        best_model_type = model_type
     best_model = models[best_model_type]
     if best_model_type == "mean":
         detrended = df[column_name] - mean_model.predict(np.ones(len(df)))
     elif best_model_type == "linear":
         detrended = df[column_name] - linear_model.predict(X_linear)
-    else:  # quadratic
+    elif best_model_type == "quadratic":
         detrended = df[column_name] - quad_model.predict(X_quad)
+    else:  # difference
+        detrended = df[column_name].diff().dropna()
     return DetrendedData(detrended, best_model, best_model_type)
@@ -67,11 +86,10 @@ def compute_trend(detrended_data, future_time_points=None):
     Returns:
     - The retrended series as a pandas Series.
     """
-    # if future_time_points is not of type pandas dataframe then convert it to one
     future_time_points = np.array(future_time_points)
-    model_type = detrended_data.model_type[0]
-    model = detrended_data.trend_model[0]
+    model_type = detrended_data.model_type.unique()[0]
+    model = detrended_data.trend_model.unique()[0]
     if model_type == "mean":
         trend_component = model.predict(
@@ -80,11 +98,14 @@ def compute_trend(detrended_data, future_time_points=None):
     elif model_type == "linear":
         X_linear = add_constant(future_time_points, has_constant="add")
         trend_component = model.predict(X_linear)
-    else:  # quadratic
+    elif model_type == "quadratic":
         X_quad = add_constant(
             np.column_stack((future_time_points, future_time_points**2)),
             has_constant="add",
         )
         trend_component = model.predict(X_quad)
+    else:  # difference
+        trend_component = pd.Series(np.nan, index=future_time_points)
+        trend_component.iloc[0] = model.params[0]  # Add mean of differenced series
     return trend_component

geocif-0.1.50/geocif/viz/misc.py ADDED Viewed

@@ -0,0 +1,55 @@
+import pandas as pd
+import hvplot.pandas
+import panel as pn
+# Load the CSV file
+file_path = r'D:\Users\ritvik\projects\GEOGLAM\Output\fao\regional_cei_slope.csv'
+data = pd.read_csv(file_path)
+# Extract unique values for dropdowns
+countries = data['Country'].unique().tolist()
+# Create dropdown widgets
+country_dropdown = pn.widgets.Select(name='Country', options=countries)
+region_dropdown = pn.widgets.Select(name='Region', options=[])
+crop_dropdown = pn.widgets.Select(name='Crop', options=[])
+season_dropdown = pn.widgets.Select(name='Season', options=data['Season'].unique().tolist())
+# Function to update region and crop options based on selected country
+@pn.depends(country_dropdown.param.value, watch=True)
+def update_region_and_crop_options(country):
+    filtered_data = data[data['Country'] == country]
+    regions = filtered_data['Region'].unique().tolist()
+    crops = filtered_data['Crop'].unique().tolist()
+    region_dropdown.options = regions
+    crop_dropdown.options = crops
+# Function to filter data based on dropdown selections
+@pn.depends(country_dropdown.param.value, region_dropdown.param.value, crop_dropdown.param.value,
+            season_dropdown.param.value)
+def update_plot(country, region, crop, season):
+    filtered_data = data[(data['Country'] == country) &
+                         (data['Region'] == region) &
+                         (data['Crop'] == crop) &
+                         (data['Season'] == season)]
+    if not filtered_data.empty:
+        plot = filtered_data.hvplot.scatter(x='Slope', y='Intercept',
+                                            hover_cols=['Growth Stage', 'p-value', 'Index', 'Description'])
+        return plot
+    else:
+        return pn.pane.Markdown("No data available for the selected combination.")
+# Create the dashboard
+dashboard = pn.Column(
+    pn.Row(country_dropdown, region_dropdown, crop_dropdown, season_dropdown),
+    update_plot
+)
+# Save as html page
+dashboard.save('dashboard.html', embed=True)

{geocif-0.1.48 → geocif-0.1.50/geocif.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: geocif
-Version: 0.1.48
+Version: 0.1.50
 Summary: Models to visualize and forecast crop conditions and yields
 Home-page: https://ritviksahajpal.github.io/yield_forecasting/
 Author: Ritvik Sahajpal

{geocif-0.1.48 → geocif-0.1.50}/geocif.egg-info/SOURCES.txt RENAMED Viewed

@@ -32,6 +32,7 @@ geocif/cei/__init__.py
 geocif/cei/definitions.py
 geocif/cei/indices.py
 geocif/ml/__init__.py
+geocif/ml/aa.py
 geocif/ml/correlations.py
 geocif/ml/embedding.py
 geocif/ml/feature_engineering.py
@@ -50,5 +51,6 @@ geocif/playground/__init__.py
 geocif/playground/automl.py
 geocif/playground/misc.py
 geocif/viz/__init__.py
+geocif/viz/misc.py
 geocif/viz/plot.py
 tests/test_geocif.py

{geocif-0.1.48 → geocif-0.1.50}/setup.py RENAMED Viewed

@@ -50,6 +50,6 @@ setup(
     test_suite="tests",
     tests_require=test_requirements,
     url="https://ritviksahajpal.github.io/yield_forecasting/",
-    version="0.1.48",
+    version="0.1.50",
     zip_safe=False,
 )