PyPI - geocif - Versions diffs - 0.1.93__tar.gz → 0.1.95__tar.gz - Mend

geocif 0.1.93tar.gz → 0.1.95tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (83) hide show

{geocif-0.1.93/geocif.egg-info → geocif-0.1.95}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: geocif
-Version: 0.1.93
+Version: 0.1.95
 Summary: Models to visualize and forecast crop conditions and yields
 Home-page: https://ritviksahajpal.github.io/yield_forecasting/
 Author: Ritvik Sahajpal

{geocif-0.1.93 → geocif-0.1.95}/geocif/analysis.py RENAMED Viewed

@@ -627,7 +627,7 @@ class Geoanalysis:
         self.df_analysis["Anomaly"] = (
             self.df_analysis[self.predicted]
             * 100.0
-            / self.df_analysis["Median Yield (tn per ha)"]
+            / self.df_analysis["Median Yield (tn per ha) (2018-2022)_y"]
         )
         # Compute the yield from the last year
@@ -782,6 +782,73 @@ class Geoanalysis:
                         loc_legend="lower left",
                     )
+                    # Make map of predicted yield by country
+                    for country in countries:
+                        df_country = df_model[df_model["Country"] == country.lower().replace(" ", "_")]
+                        fname = f"map_perc_area_{self.country}_{self.crop}.png"
+                        col = "% of total Area (ha)"
+                        plot.plot_df_shpfile(
+                            self.dg,  # dataframe containing adm1 name and polygon
+                            df_country,  # dataframe containing information that will be mapped
+                            merge_col="Country Region",  # Column on which to merge
+                            name_country=[country],  # Plot global map
+                            name_col=col,  # Which column to plot
+                            dir_out=self.dir_plot / str(year),  # Output directory
+                            fname=fname,  # Output file name
+                            label=f"% of Total Area (ha)\n{self.crop.title()}",
+                            vmin=df_country[col].min(),
+                            vmax=df_country[col].max(),
+                            cmap=pal.scientific.sequential.Bamako_20_r,
+                            series="sequential",
+                            show_bg=False,
+                            annotate_regions=self.annotate_regions,
+                            annotate_region_column=annotate_region_column,
+                            loc_legend="lower left",
+                        )
+                        df_country = df_harvest_year[df_harvest_year["Country"] == country.lower().replace(" ", "_")]
+                        fname = f"map_predicted_yield_{country}_{self.crop}_{time_period}_{year}.png"
+                        plot.plot_df_shpfile(
+                            self.dg,  # dataframe containing adm1 name and polygon
+                            df_country,  # dataframe containing information that will be mapped
+                            merge_col="Country Region",  # Column on which to merge
+                            name_country=[country],  # Plot global map
+                            name_col="Predicted Yield (tn per ha)",  # Which column to plot
+                            dir_out=self.dir_plot / str(year),  # Output directory
+                            fname=fname,  # Output file name
+                            label=f"Predicted Yield (Mg/ha)\n{self.crop.title()}, {year}",
+                            vmin=df_country[self.predicted].min(),
+                            vmax=df_country[self.predicted].max(),
+                            cmap=pal.scientific.sequential.Bamako_20_r,
+                            series="sequential",
+                            show_bg=False,
+                            annotate_regions=self.annotate_regions,
+                            annotate_region_column=annotate_region_column,
+                            loc_legend="lower left",
+                        )
+                        fname = (
+                            f"map_anomaly_{country}_{self.crop}_{time_period}_{year}.png"
+                        )
+                        plot.plot_df_shpfile(
+                            self.dg,  # dataframe containing adm1 name and polygon
+                            df_country,  # dataframe containing information that will be mapped
+                            merge_col="Country Region",  # Column on which to merge
+                            name_country=[country],  # Plot global map
+                            name_col="Anomaly",  # Which column to plot
+                            dir_out=self.dir_plot / str(year),  # Output directory
+                            fname=fname,  # Output file name
+                            label=f"% of {self.number_lag_years}-year Median Yield\n{self.crop.title()}, {year}",
+                            vmin=df_country["Anomaly"].min(),
+                            vmax=110,  # df_harvest_year["Anomaly"].max(),
+                            cmap=pal.cartocolors.diverging.Geyser_5_r,
+                            series="sequential",
+                            show_bg=False,
+                            annotate_regions=self.annotate_regions,
+                            annotate_region_column=annotate_region_column,
+                            loc_legend="lower left",
+                        )
                     """ Ratio of Predicted to last Year Yield """
                     # fname = f"{self.country}_{self.crop}_{time_period}_{year}_ratio_last_year_yield.png"
                     # plot.plot_df_shpfile(
@@ -934,18 +1001,14 @@ class Geoanalysis:
         self.annotate_regions = self.parser.getboolean(country, "annotate_regions")
         # If ADMIN0 or ADM0_NAME is not in the shapefile, then add ADM0_NAME
-        if "ADMIN0" or "ADM0_NAME" not in self.dg.columns:
+        if "ADMIN0" not in self.dg.columns and "ADM0_NAME" not in self.dg.columns:
             self.dg.loc[:, "ADMIN0"] = country.title().replace("_", " ")
-        # if ADMIN1 or ADM1_NAME is not in the shapefile, then rename admin_col_name to ADM1_NAME
-        if "ADMIN1" or "ADM1_NAME" not in self.dg.columns:
+        # If ADMIN1 or ADM1_NAME is not in the shapefile, then rename admin_col_name to ADMIN1
+        if "ADMIN1" not in self.dg.columns and "ADM1_NAME" not in self.dg.columns:
             if admin_zone == "admin_1":
                 self.dg.rename(columns={self.admin_col_name: "ADMIN1"}, inplace=True)
-        # Hack rename Tanzania to United Republic of Tanzania
-        self.dg["ADMIN0"] = self.dg["ADMIN0"].replace(
-            "Tanzania", "United Republic of Tanzania"
-        )
         # Rename ADMIN0 to ADM0_NAME and ADMIN1 to ADM1_NAME and ADMIN2 to ADM2_NAME
         self.dg = self.dg.rename(
             columns={

{geocif-0.1.93 → geocif-0.1.95}/geocif/geocif.py RENAMED Viewed

@@ -1005,6 +1005,7 @@ class Geocif:
         # and will confuse the model
         if self.forecast_season == self.today_year:
             current_month = ar.utcnow().month
+            current_day = ar.utcnow().day
             # Identify columns where the second chunk equals the current month index
             cols_to_drop = []
@@ -1014,51 +1015,30 @@ class Geocif:
                         "Starting Stage"
                     ]
-                    if mon == current_month:
+                    if mon == current_month and current_day > 25:
                         cols_to_drop.append(col)
             # Drop those columns
             df = df.drop(columns=cols_to_drop)
-        from collections import Counter
-        esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
-        dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
-        print("<0>", dupes)
-        # Hack: If
         # Change column name
         # e.g. 'vDTR_7_6_5_4_3_2_1_37_36_35_34_33_32_31' to 'vDTR Mar 1-Oct 27'
         df = stages.update_feature_names(df, self.method)
-        from collections import Counter
-        esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
-        dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
-        print("<111>", dupes)
         all_cei_columns = self.get_cei_column_names(df)
         # Fill in any missing values with 0
         df.loc[:, all_cei_columns].fillna(0, inplace=True)
-        from collections import Counter
-        esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
-        dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
-        print("<1>", dupes)
         df = fe.compute_last_year_yield(df, self.target)
-        from collections import Counter
-        esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
-        dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
-        print("<2>", dupes)
         df = fe.compute_median_statistics(
             df, self.all_seasons_with_yield, self.number_median_years, self.target
         )
-        from collections import Counter
-        esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
-        dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
-        print("<3>", dupes)
         df = fe.compute_user_median_statistics(df, range(2018, 2023))
         df = fe.compute_user_median_statistics(df, range(2013, 2018))
-        from collections import Counter
-        esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
-        dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
-        print("<4>", dupes)
         if self.median_area_as_feature:
             df = fe.compute_median_statistics(
                 df, self.all_seasons_with_yield, self.number_median_years, "Area (ha)"
@@ -1073,10 +1053,7 @@ class Geocif:
             df = fe.compute_analogous_yield(
                 df, self.all_seasons_with_yield, self.number_median_years, self.target
             )
-        from collections import Counter
-        esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
-        dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
-        print("5", dupes)
         # Create Region_ID column based on Region column category code
         df["Region"] = df["Region"].astype("category")
         if self.cluster_strategy == "single":
@@ -1090,7 +1067,7 @@ class Geocif:
             # Region_ID should be type category
             df["Region_ID"] = df["Region_ID"].astype("category")
-        breakpoint()
         return df
     def execute(self):
@@ -1147,7 +1124,14 @@ class Geocif:
         # Join with dg based on Country Region column, only keeping rows that are in df
         # Only use geometry column from self.dg
-        self.dg_country = self.dg_country[["Country Region", "geometry"]].merge(
+        if self.admin_zone == "admin_1":
+            cols = ["Country Region", "geometry", "ADM1_NAME"]
+        elif self.admin_zone == "admin_2":
+            cols = ["Country Region", "geometry", "ADM2_NAME"]
+        else:
+            raise ValueError(f"Unsopported {self.admin_zone}")
+        self.dg_country = self.dg_country[cols].merge(
             df[["Country Region", self.correlation_plot_groupby]],
             on="Country Region",
             how="outer",

{geocif-0.1.93 → geocif-0.1.95}/geocif/ml/embedding.py RENAMED Viewed

@@ -47,7 +47,7 @@ def _compute_correlations(X, y):
                 r = pearsonr(y_filtered, f_series_filtered)[0]
                 feature_correlations[feature] = round(r, 3)
             except Exception as e:
-                print(f"Error computing correlation for {feature}: {e}")
+                # print(f"Error computing correlation for {feature}: {e}")
                 feature_correlations[feature] = np.nan
     return feature_correlations

{geocif-0.1.93 → geocif-0.1.95}/geocif/ml/stages.py RENAMED Viewed

@@ -268,11 +268,6 @@ def update_feature_names(df, method):
         # Saving the result in the dictionary
         stages_info[element] = (cei, start_stage, end_stage, new_column_name)
-        # Check if any duplicates exist in the dictionary
-        if len(stages_info) != len(set(stages_info.values())):
-            breakpoint()
-            raise ValueError(f"Duplicate stage information found for {element}")
-    breakpoint()
     # For each column in df, check if it exists in stages_info, and
     # replace it with the new column name
     # Precompute the rename mapping outside the loop

{geocif-0.1.93 → geocif-0.1.95}/geocif/viz/plot.py RENAMED Viewed

@@ -375,6 +375,11 @@ def plot_df_shpfile(
             )
             df_country = gpd.read_file(shpfilename, engine="pyogrio")
+            # Hack
+            # Rename Russia to Russian Federation, in the ADMIN column
+            df_country.loc[
+                df_country["ADMIN"].str.lower() == "russia", "ADMIN"
+            ] = "Russian Federation"
             # read the country borders
             _name_country = []
             for cntr in name_country:
@@ -390,14 +395,23 @@ def plot_df_shpfile(
                 )
                 _name_country.append(cntr.replace(" ", "_").lower())
-            extent = rgeo.get_country_lat_lon_extent(
-                _name_country, buffer=1.0
-            )  # left, right, bottom, top
-            # Hack: Add space to the top for adding title
-            extent[3] = extent[3] + 2
-            # Add some space to the bottom for adding legend and colorbar
-            extent[2] = extent[2] - 3
-            ax.set_extent(extent)
+            # Hack
+            if _name_country[0] == "russian_federation":
+                extent = [20, 80, 40, 80]
+            else:
+                extent = rgeo.get_country_lat_lon_extent(
+                    _name_country, buffer=1.0
+                )  # left, right, bottom, top
+                # Hack: Add space to the top for adding title
+                extent[3] = extent[3] + 2
+                # Add some space to the bottom for adding legend and colorbar
+                extent[2] = extent[2] - 3
+            try:
+                ax.set_extent(extent)
+            except:
+                breakpoint()
         elif name_country == "world":
             ax.add_feature(
                 cartopy.feature.LAND.with_scale("50m"), color="white"

{geocif-0.1.93 → geocif-0.1.95/geocif.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: geocif
-Version: 0.1.93
+Version: 0.1.95
 Summary: Models to visualize and forecast crop conditions and yields
 Home-page: https://ritviksahajpal.github.io/yield_forecasting/
 Author: Ritvik Sahajpal

{geocif-0.1.93 → geocif-0.1.95}/setup.py RENAMED Viewed

@@ -50,6 +50,6 @@ setup(
     test_suite="tests",
     tests_require=test_requirements,
     url="https://ritviksahajpal.github.io/yield_forecasting/",
-    version="0.1.93",
+    version="0.1.95",
     zip_safe=False,
 )