PyPI - geocif - Versions diffs - 0.1.30__tar.gz → 0.1.31__tar.gz - Mend

geocif 0.1.30tar.gz → 0.1.31tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

{geocif-0.1.30/geocif.egg-info → geocif-0.1.31}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: geocif
-Version: 0.1.30
+Version: 0.1.31
 Summary: Models to visualize and forecast crop conditions and yields
 Home-page: https://ritviksahajpal.github.io/yield_forecasting/
 Author: Ritvik Sahajpal

{geocif-0.1.30 → geocif-0.1.31}/geocif/geocif.py RENAMED Viewed

@@ -355,6 +355,8 @@ class Geocif:
         experiment_id = f"{self.country}_{self.crop}"
         now = ar.utcnow().to("America/New_York").format("MMMM-DD-YYYY HH:mm:ss")
         selected_features = self.selected_features + self.cat_features
+        # Compute percentage difference between y_pred and y_test
+        ape = np.abs((y_pred - y_test) / y_test) * 100
         df = pd.DataFrame(
             {
                 "Experiment_ID": np.full(shp, experiment_id),
@@ -378,6 +380,7 @@ class Geocif:
                 "Area (ha)": df_region["Area (ha)"].values,
                 f"Observed {self.target}": np.around(y_test, 3).ravel(),
                 f"Predicted {self.target}": np.around(y_pred, 3).ravel(),
+                f"APE": np.around(ape, 3).ravel(),
             }
         )
@@ -720,7 +723,7 @@ class Geocif:
         """ Convert this dataframe into an ML ready format and save to disk """
         df = self.create_ml_dataframe(df)
         dir_output = (
-            self.dir_analysis / self.country / self.crop / str(self.forecast_season)
+            self.dir_analysis / self.country / self.crop / self.model_name / str(self.forecast_season)
         )
         os.makedirs(dir_output, exist_ok=True)
         df.to_csv(
@@ -768,6 +771,9 @@ class Geocif:
         dict_kwargs["dg_country"] = self.dg_country
         dict_kwargs["combined_dict"] = self.combined_dict
+        if self.spatial_autocorrelation:
+           sa.compute_spatial_autocorrelation(self.df_results, **dict_kwargs)
         if self.correlation_plots:
             self.logger.info(f"Correlation plot for {self.country} {self.crop}")
             (
@@ -775,9 +781,6 @@ class Geocif:
                 dict_best_cei,
             ) = correlations.all_correlated_feature_by_time(df, **dict_kwargs)
-        if self.spatial_autocorrelation:
-           sa.compute_spatial_autocorrelation(self.df_results, **dict_kwargs)
         """ Separate into train and test datasets based on forecast_season """
         mask = df["Harvest Year"] == self.forecast_season
         self.df_train = df[~mask]

{geocif-0.1.30 → geocif-0.1.31}/geocif/ml/spatial_autocorrelation.py RENAMED Viewed

@@ -1,7 +1,11 @@
+import warnings
+from tqdm import tqdm
+import matplotlib.pyplot as plt
 import pandas as pd
 from pysal.lib import weights
-from pysal.explore import esda
-import matplotlib.pyplot as plt
+warnings.filterwarnings("ignore")
 def validate_inputs(df_results, required_columns):
@@ -40,24 +44,26 @@ def preprocess_data(df_results, dg_country):
     dg_country["Country Region"] = dg_country["Country Region"].str.lower()
     dg_country = dg_country[dg_country["Country Region"].isin(df["Country Region"])]
+    dg_country.reset_index(drop=True, inplace=True)
     merged_df = dg_country.merge(df, on="Country Region", how="inner")
-    return merged_df, dg_country
+    return merged_df
-def create_base_weights(dg_country):
+def create_base_weights(merged_df):
     """
     Args:
-        dg_country:
+        merged_df:
     Returns:
     """
-    dg_subset = dg_country[["Country Region", "geometry"]].drop_duplicates()
+    dg = merged_df[["Country Region", "geometry"]].drop_duplicates()
     try:
-        w_base = weights.Queen.from_dataframe(dg_subset)
+        w_base = weights.Queen.from_dataframe(dg)
     except Exception as e:
         raise RuntimeError(f"Failed to create spatial weights: {e}")
@@ -65,13 +71,10 @@ def create_base_weights(dg_country):
         index for index, neighbors in w_base.neighbors.items() if len(neighbors) == 0
     ]
     if no_neighbors:
-        print(f"Removing {len(no_neighbors)} polygons with 0 neighbors")
-        dg_country = dg_country.drop(index=no_neighbors).reset_index(drop=True)
-        w_base = weights.Queen.from_dataframe(
-            dg_country[["Country Region", "geometry"]]
-        )
+        dg = dg.drop(index=no_neighbors[0]).reset_index(drop=True)
+        w_base = weights.Queen.from_dataframe(dg[["Country Region", "geometry"]])
-    return w_base, dg_country
+    return w_base, dg
 def create_weights_for_year(dg_country, regions_with_data):
@@ -84,14 +87,22 @@ def create_weights_for_year(dg_country, regions_with_data):
     Returns:
     """
-    dg_subset = dg_country[dg_country["Country Region"].isin(regions_with_data)]
+    dg = dg_country[dg_country["Country Region"].isin(regions_with_data)]
+    dg = dg.reset_index(drop=True)
+    wt = weights.Queen.from_dataframe(dg)
-    wt = weights.Queen.from_dataframe(dg_subset)
+    no_neighbors = [
+        index for index, neighbors in wt.neighbors.items() if len(neighbors) == 0
+    ]
+    if no_neighbors:
+        dg = dg.drop(index=no_neighbors[0]).reset_index(drop=True)
+        wt = weights.Queen.from_dataframe(dg[["Country Region", "geometry"]])
-    return wt
+    return wt, dg
-def compute_morans_i(merged_df, dg_country):
+def compute_morans_i(merged_df):
     """
     Args:
@@ -101,25 +112,35 @@ def compute_morans_i(merged_df, dg_country):
     Returns:
     """
+    from pysal.explore import esda
+    # Drop any regions with missing data
+    merged_df = merged_df.dropna(subset=["Yield (tn per ha)"])
     years = merged_df["Harvest Year"].unique()
     results = {"Harvest Year": [], "Moran's I": [], "p-value": [], "Significant": []}
-    for year in years:
+    for year in tqdm(years, desc="Compute Moran's I"):
         year_data = merged_df[merged_df["Harvest Year"] == year]
         regions_with_data = year_data["Country Region"].unique()
         year_data = year_data[year_data["Country Region"].isin(regions_with_data)]
-        y = year_data[["Region", "Yield (tn per ha)"]].drop_duplicates()
+        y = year_data[["Country Region", "Region", "Yield (tn per ha)"]].drop_duplicates()
+        dg_country = year_data[["Country Region", "geometry"]].drop_duplicates()
         if len(y) > 1:
-            w = create_weights_for_year(dg_country, regions_with_data)
+            w, x = create_weights_for_year(dg_country, regions_with_data)
+            y = y[y["Country Region"].isin(x["Country Region"])]
             try:
                 mi = esda.Moran(y["Yield (tn per ha)"].values, w, permutations=999)
             except:
                 breakpoint()
             results["Harvest Year"].append(year)
-            results["Moran's I"].append(mi.I)
+            try:
+                results["Moran's I"].append(mi.I)
+            except:
+                breakpoint()
             results["p-value"].append(mi.p_sim)
             results["Significant"].append(mi.p_sim < 0.1)
         else:
@@ -131,7 +152,7 @@ def compute_morans_i(merged_df, dg_country):
     return pd.DataFrame(results)
-def plot_moransi_time_series(results_df, country, crop, dir_output):
+def plot_morans_i_time_series(results_df, country, crop, dir_output):
     """
     Args:
@@ -194,12 +215,10 @@ def compute_spatial_autocorrelation(df_results, **kwargs):
     ]
     validate_inputs(df_results, required_columns)
-    merged_df, dg_country = preprocess_data(df_results, dg_country)
+    merged_df = preprocess_data(df_results, dg_country)
     if merged_df.empty:
         raise ValueError("No valid data available after preprocessing")
-    w_base, dg_country = create_base_weights(dg_country)
-    results_df = compute_morans_i(merged_df, dg_country)
+    results_df = compute_morans_i(merged_df)
-    plot_moransi_time_series(results_df, country, crop, dir_output)
+    plot_morans_i_time_series(results_df, country, crop, dir_output)

{geocif-0.1.30 → geocif-0.1.31/geocif.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: geocif
-Version: 0.1.30
+Version: 0.1.31
 Summary: Models to visualize and forecast crop conditions and yields
 Home-page: https://ritviksahajpal.github.io/yield_forecasting/
 Author: Ritvik Sahajpal

{geocif-0.1.30 → geocif-0.1.31}/setup.py RENAMED Viewed

@@ -50,6 +50,6 @@ setup(
     test_suite="tests",
     tests_require=test_requirements,
     url="https://ritviksahajpal.github.io/yield_forecasting/",
-    version="0.1.30",
+    version="0.1.31",
     zip_safe=False,
 )