PyPI - geocif - Versions diffs - 0.1.52__tar.gz → 0.1.54__tar.gz - Mend

geocif 0.1.52tar.gz → 0.1.54tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

{geocif-0.1.52/geocif.egg-info → geocif-0.1.54}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: geocif
-Version: 0.1.52
+Version: 0.1.54
 Summary: Models to visualize and forecast crop conditions and yields
 Home-page: https://ritviksahajpal.github.io/yield_forecasting/
 Author: Ritvik Sahajpal

{geocif-0.1.52 → geocif-0.1.54}/geocif/analysis.py RENAMED Viewed

@@ -93,6 +93,9 @@ class Geoanalysis:
                 & (self.df_analysis["Crop"] == self.crop)
                 & (self.df_analysis["Model"] == self.model)
             ]
+            # Drop columns that are empty
+            # self.df_analysis = self.df_analysis.dropna(axis=1, how="all")
         except Exception as e:
             pass
@@ -384,7 +387,7 @@ class Geoanalysis:
         from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
         # Ensure 'Harvest Year' is numeric
-        df["Harvest Year"] = pd.to_numeric(df["Harvest Year"], errors="coerce")
+        df.loc[:, "Harvest Year"] = pd.to_numeric(df["Harvest Year"], errors="coerce")
         # Extract data
         y_observed = df["Observed Yield (tn per ha)"]
@@ -414,8 +417,15 @@ class Geoanalysis:
             rmse = np.sqrt(mean_squared_error(y_observed, y_predicted))
             mape = mean_absolute_percentage_error(y_observed, y_predicted)
             r2 = r2_score(y_observed, y_predicted)
+            n_points = len(y_observed)  # Number of data points
+            textstr = (
+                f"RMSE: {rmse:.2f} tn/ha\n"
+                f"MAPE: {mape:.2%}\n"
+                f"$r^2$: {r2:.2f}\n"
+                f"N: {n_points}"
+            )
-            textstr = f"RMSE: {rmse:.2f} tn/ha\nMAPE: {mape:.2%}\n$r^2$: {r2:.2f}"
             ax.annotate(
                 textstr,
                 xy=(0.05, 0.95),
@@ -485,7 +495,15 @@ class Geoanalysis:
             mape = mean_absolute_percentage_error(y_observed, y_predicted)
             r2 = r2_score(y_observed, y_predicted)
-            textstr = f"RMSE: {rmse:.2f} tn/ha\nMAPE: {mape:.2%}\nr²: {r2:.2f}"
+            n_points = len(y_observed)  # Number of data points
+            textstr = (
+                f"RMSE: {rmse:.2f} tn/ha\n"
+                f"MAPE: {mape:.2%}\n"
+                f"$r^2$: {r2:.2f}\n"
+                f"N: {n_points}"
+            )
             ax.annotate(
                 textstr,
                 xy=(0.05, 0.95),
@@ -636,7 +654,7 @@ class Geoanalysis:
                     #
                     #                 """ % of total area """
                     if idx == 0:
-                        fname = f"{self.country}_{self.crop}_perc_area.png"
+                        fname = f"map_{self.country}_{self.crop}_perc_area.png"
                         col = "% of total Area (ha)"
                         plot.plot_df_shpfile(
                             self.dg,  # dataframe containing adm1 name and polygon
@@ -652,13 +670,13 @@ class Geoanalysis:
                             cmap=pal.scientific.sequential.Bamako_20_r,
                             series="sequential",
                             show_bg=False,
-                            annotate_regions=True,
+                            annotate_regions=self.annotate_regions,
                             annotate_region_column=annotate_region_column,
                             loc_legend="lower left",
                         )
                     #
                     """ Unique regions """
-                    fname = f"{self.country}_{self.crop}_region_ID.png"
+                    fname = f"map_{self.country}_{self.crop}_region_ID.png"
                     col = "Region_ID"
                     df_model[col] = df_model[col].astype(int) + 1
                     if len(df_model["Region_ID"].unique() > 1):
@@ -668,27 +686,27 @@ class Geoanalysis:
                             for key in df_time_period["Region_ID"].unique()
                         }
-                        # plot.plot_df_shpfile(
-                        #     self.dg,  # dataframe containing adm1 name and polygon
-                        #     df_model,  # dataframe containing information that will be mapped
-                        #     dict_lup=dict_region,
-                        #     merge_col="Country Region",  # Column on which to merge
-                        #     name_country=countries,  # Plot global map
-                        #     name_col=col,  # Which column to plot
-                        #     dir_out=self.dir_plot / str(year),  # Output directory
-                        #     fname=fname,  # Output file name
-                        #     label=f"Region Cluster\n{self.crop.title()}",
-                        #     vmin=df_model[col].min(),
-                        #     vmax=df_model[col].max(),
-                        #     cmap=pal.tableau.Tableau_20.mpl_colors,
-                        #     series="qualitative",
-                        #     show_bg=False,
-                        #     alpha_feature=1,
-                        #     use_key=True,
-                        #     annotate_regions=True,
-                        #     annotate_region_column=annotate_region_column,
-                        #     loc_legend="lower left",
-                        # )
+                        plot.plot_df_shpfile(
+                            self.dg,  # dataframe containing adm1 name and polygon
+                            df_model,  # dataframe containing information that will be mapped
+                            dict_lup=dict_region,
+                            merge_col="Country Region",  # Column on which to merge
+                            name_country=countries,  # Plot global map
+                            name_col=col,  # Which column to plot
+                            dir_out=self.dir_plot / str(year),  # Output directory
+                            fname=fname,  # Output file name
+                            label=f"Region Cluster\n{self.crop.title()}",
+                            vmin=df_model[col].min(),
+                            vmax=df_model[col].max(),
+                            cmap=pal.tableau.Tableau_20.mpl_colors,
+                            series="qualitative",
+                            show_bg=False,
+                            alpha_feature=1,
+                            use_key=True,
+                            annotate_regions=self.annotate_regions,
+                            annotate_region_column=annotate_region_column,
+                            loc_legend="lower left",
+                        )
                     #                     breakpoint()
                     # """ Anomaly """
@@ -715,7 +733,7 @@ class Geoanalysis:
                     # )
                     """ Predicted Yield """
-                    fname = f"{fname_prefix}_{self.crop}_{time_period}_{year}_predicted_yield.png"
+                    fname = f"map_{fname_prefix}_{self.crop}_{time_period}_{year}_predicted_yield.png"
                     plot.plot_df_shpfile(
                         self.dg,  # dataframe containing adm1 name and polygon
                         df_harvest_year,  # dataframe containing information that will be mapped
@@ -730,7 +748,7 @@ class Geoanalysis:
                         cmap=pal.scientific.sequential.Bamako_20_r,
                         series="sequential",
                         show_bg=False,
-                        annotate_regions=True,
+                        annotate_regions=self.annotate_regions,
                         annotate_region_column=annotate_region_column,
                         loc_legend="lower left",
                     )
@@ -759,7 +777,7 @@ class Geoanalysis:
                     # Area
                     # breakpoint()
                     if df_time_period["Area (ha)"].notna().all():
-                        fname = f"{self.country}_{self.crop}_{year}_area.png"
+                        fname = f"map_{self.country}_{self.crop}_{year}_area.png"
                         plot.plot_df_shpfile(
                             self.dg,  # dataframe containing adm1 name and polygon
                             df_time_period,  # dataframe containing information that will be mapped
@@ -774,7 +792,7 @@ class Geoanalysis:
                             cmap=pal.scientific.sequential.Bamako_20_r,
                             series="sequential",
                             show_bg=False,
-                            annotate_regions=True,
+                            annotate_regions=self.annotate_regions,
                             loc_legend="lower left",
                         )
@@ -884,6 +902,7 @@ class Geoanalysis:
             engine="pyogrio",
         )
         self.admin_col_name = self.parser.get(country, "admin_col_name")
+        self.annotate_regions = self.parser.getboolean(country, "annotate_regions")
         # If ADMIN0 or ADM0_NAME is not in the shapefile, then add ADM0_NAME
         if "ADMIN0" or "ADM0_NAME" not in self.dg.columns:
@@ -950,18 +969,28 @@ class RegionalMapper(Geoanalysis):
         con = sqlite3.connect(self.db_path)
         query = "SELECT * FROM regional_metrics"
-        self.df_regional = pd.read_sql_query(query, con)
+        try:
+            self.df_regional = pd.read_sql_query(query, con)
+        except:
+            self.logger.error("Failed to read data from regional_metrics")
+            self.df_regional = pd.DataFrame()
         query = "SELECT * FROM regional_metrics_by_year"
-        self.df_regional_by_year = pd.read_sql_query(query, con)
+        try:
+            self.df_regional_by_year = pd.read_sql_query(query, con)
+        except:
+            self.logger.error("Failed to read data from regional_metrics_by_year")
+            self.df_regional_by_year = pd.DataFrame()
         con.close()
     def clean_data(self):
         """Clean and format the data."""
-        self.df_regional["Country"] = (
-            self.df_regional["Country"].str.replace("_", " ").str.title()
-        )
-        self.df_regional["Model"] = self.df_regional["Model"].str.title()
+        if not self.df_regional.empty:
+            self.df_regional["Country"] = (
+                self.df_regional["Country"].str.replace("_", " ").str.title()
+            )
+            self.df_regional["Model"] = self.df_regional["Model"].str.title()
     def plot_heatmap(self):
         """Generate heatmaps of MAPE bins vs. % total area bins."""
@@ -1070,7 +1099,7 @@ class RegionalMapper(Geoanalysis):
                 )
                 plt.tight_layout()
-                plt.savefig(self.dir_analysis / f"mape_histogram_{model}.png", dpi=250)
+                plt.savefig(self.dir_analysis / f"histogram_region_{model}_mape.png", dpi=250)
                 plt.close()
     def plot_mape_map(self):
@@ -1101,7 +1130,7 @@ class RegionalMapper(Geoanalysis):
             df = df_model[df_model["Country"].isin(countries)]
             self.dg = self.dg[self.dg["ADM0_NAME"].isin(countries)]
-            fname = f"mape_{crop}_{df_model['Model'].iloc[0]}.png"
+            fname = f"map_{crop}_{df_model['Model'].iloc[0]}_mape.png"
             plot.plot_df_shpfile(
                 self.dg,
                 df,
@@ -1116,7 +1145,7 @@ class RegionalMapper(Geoanalysis):
                 cmap=pal.scientific.sequential.Bamako_20_r,
                 series="sequential",
                 show_bg=False,
-                annotate_regions=True,
+                annotate_regions=self.annotate_regions,
                 loc_legend="lower left",
             )
@@ -1145,7 +1174,7 @@ class RegionalMapper(Geoanalysis):
             plt.xticks(rotation=0)
             plt.tight_layout()
-            plt.savefig(self.dir_analysis / "mape_by_year.png", dpi=250)
+            plt.savefig(self.dir_analysis / "bar_mape_by_year.png", dpi=250)
             plt.close()

{geocif-0.1.52 → geocif-0.1.54}/geocif/geocif.py RENAMED Viewed

@@ -582,11 +582,10 @@ class Geocif:
             }
         )
-        if self.median_yield_as_feature:
-            # Add median yield to dataframe
-            df.loc[:, f"Median {self.target}"] = np.around(
-                df_region[f"Median {self.target}"].values, 3
-            )
+        # Add median yield to dataframe
+        df.loc[:, f"Median {self.target}"] = np.around(
+            df_region[f"Median {self.target}"].values, 3
+        )
         if self.estimate_ci:
             if self.estimate_ci_for_all or self.forecast_season == self.today_year:
@@ -1157,6 +1156,8 @@ class Geocif:
                         group.index, "Detrended Model Type"
                     ] = detrended_data.model_type
+            if group.empty:
+                breakpoint()
             # Create categorical classes for target column
             group, new_target_column, bins = fe.classify_target(
                 group, self.target, self.number_classes

{geocif-0.1.52 → geocif-0.1.54}/geocif/indices_runner_v2.py RENAMED Viewed

@@ -47,8 +47,8 @@ class cei_runner(base.BaseGeo):
         self.dir_input = Path(self.parser.get("PATHS", "dir_input"))
         self.base_dir = Path(
-            #r"D:\Users\ritvik\projects\GEOGLAM\Output\countries\nepal"
-            r"/gpfs/data1/cmongp1/GEOGLAM/Output/countries/nepal"
+            r"D:\Users\ritvik\projects\GEOGLAM\Output\countries\nepal"
+            #r"/gpfs/data1/cmongp1/GEOGLAM/Output/countries/nepal"
         )  # Path(self.parser.get("PATHS", "dir_crop_inputs"))
         self.do_parallel = self.parser.getboolean("DEFAULT", "do_parallel")

{geocif-0.1.52 → geocif-0.1.54}/geocif/ml/feature_engineering.py RENAMED Viewed

@@ -361,7 +361,11 @@ def classify_target(df, target_col, number_classes):
     new_target_col = f"{target_col}_class"
     # Change the target column to categorical with the specified number of classes
-    df[new_target_col], bins = pd.qcut(df[target_col], q=number_classes, labels=False, retbins=True)
+    df[new_target_col], bins = pd.qcut(df[target_col],
+                                       q=number_classes,
+                                       labels=False,
+                                       retbins=True,
+                                       duplicates='drop')
     return df, new_target_col, bins

{geocif-0.1.52 → geocif-0.1.54/geocif.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: geocif
-Version: 0.1.52
+Version: 0.1.54
 Summary: Models to visualize and forecast crop conditions and yields
 Home-page: https://ritviksahajpal.github.io/yield_forecasting/
 Author: Ritvik Sahajpal

{geocif-0.1.52 → geocif-0.1.54}/setup.py RENAMED Viewed

@@ -50,6 +50,6 @@ setup(
     test_suite="tests",
     tests_require=test_requirements,
     url="https://ritviksahajpal.github.io/yield_forecasting/",
-    version="0.1.52",
+    version="0.1.54",
     zip_safe=False,
 )