PyPI - geocif - Versions diffs - 0.1.59__tar.gz → 0.1.61__tar.gz - Mend

geocif 0.1.59tar.gz → 0.1.61tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (79) hide show

{geocif-0.1.59/geocif.egg-info → geocif-0.1.61}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: geocif
-Version: 0.1.59
+Version: 0.1.61
 Summary: Models to visualize and forecast crop conditions and yields
 Home-page: https://ritviksahajpal.github.io/yield_forecasting/
 Author: Ritvik Sahajpal

{geocif-0.1.59 → geocif-0.1.61}/geocif/analysis.py RENAMED Viewed

@@ -186,11 +186,6 @@ class Geoanalysis:
         return df_metrics, df_regional_metrics, df_national_yield
     def _clean_data(self):
-        # Hack exclude 2012 if country == "illinois"
-        if self.country == "illinois":
-            self.df_analysis = self.df_analysis[
-                self.df_analysis["Harvest Year"] != 2012
-            ]
         # Remove rows with missing values in Observed Yield (tn per ha)
         return self.df_analysis.dropna(subset=["Observed Yield (tn per ha)"])

{geocif-0.1.59 → geocif-0.1.61}/geocif/geocif.py RENAMED Viewed

@@ -587,6 +587,16 @@ class Geocif:
             df_region[f"Median {self.target}"].values, 3
         )
+        if f"Median {self.target} (2014-2018)" in df_region.columns:
+            df.loc[:, f"Median {self.target} (2014-2018)"] = np.around(
+                df_region[f"Median {self.target} (2014-2018)"].values, 3
+            )
+        if f"Median {self.target} (2013-2017)" in df_region.columns:
+            df.loc[:, f"Median {self.target} (2013-2017)"] = np.around(
+                df_region[f"Median {self.target} (2013-2017)"].values, 3
+            )
         if self.estimate_ci:
             if self.estimate_ci_for_all or self.forecast_season == self.today_year:
                 # Iterate over each element in y_pred_ci
@@ -730,6 +740,8 @@ class Geocif:
         if self.median_yield_as_feature:
             self.feature_names.append(f"Median {self.target}")
+            self.feature_names.append(f"Median {self.target} (2014-2018)")
+            self.feature_names.append(f"Median {self.target} (2013-2017)")
         if self.lag_yield_as_feature:
             # For the number of years specified in self.number_lag_years
@@ -988,6 +1000,14 @@ class Geocif:
             df, self.all_seasons_with_yield, self.number_median_years, self.target
         )
+        df = fe.compute_user_median_statistics(
+            df, [2014, 2015, 2016, 2017, 2018]
+        )
+        df = fe.compute_user_median_statistics(
+            df, [2013, 2014, 2015, 2016, 2017]
+        )
         if self.median_area_as_feature:
             df = fe.compute_median_statistics(
                 df, self.all_seasons_with_yield, self.number_median_years, "Area (ha)"

{geocif-0.1.59 → geocif-0.1.61}/geocif/ml/feature_engineering.py RENAMED Viewed

@@ -105,6 +105,49 @@ def compute_median_statistics(
     return df
+def compute_user_median_statistics(df, user_years, target_col="Yield (tn per ha)"):
+    """
+    Enhances the DataFrame with a new column that contains the median yield computed
+    using only the yields from the user-specified list of years.
+    Args:
+        df (DataFrame): The original DataFrame containing yield data.
+        user_years (array-like): List of years to consider for computing the median yield.
+        target_col (str): The column name from which to compute the median yield.
+    Returns:
+        DataFrame: The original DataFrame enhanced with a new column for median yield.
+    """
+    # Ensure 'Harvest Year' is treated as integer for accurate comparisons.
+    df["Harvest Year"] = df["Harvest Year"].astype(int)
+    # Sort the user_years list to reliably extract the earliest and latest years.
+    user_years_sorted = sorted(user_years)
+    first_year = user_years_sorted[0]
+    last_year = user_years_sorted[-1]
+    # Define the new column name to include the range of years.
+    new_col_name = f"Median {target_col} ({first_year}-{last_year})"
+    # Initialize the new column with NaN values.
+    df[new_col_name] = np.nan
+    # Group by region and compute the median yield for the specified years.
+    for region, group in tqdm(df.groupby("Region"), desc="Median yield", leave=False):
+        # Skip if the target column is completely null for this region.
+        if group[target_col].isnull().all():
+            continue
+        # Filter the rows to only include harvest years that are in the user provided list.
+        mask = group["Harvest Year"].isin(user_years)
+        median_yield = group.loc[mask, target_col].median()
+        # Assign the computed median yield to all rows in the current region.
+        df.loc[df["Region"] == region, new_col_name] = median_yield
+    return df
 def compute_lag_yield(
     df, all_seasons_with_yield, number_lag_years, target_col="Yield (tn per ha)"
 ):

{geocif-0.1.59 → geocif-0.1.61}/geocif/ml/feature_selection.py RENAMED Viewed

@@ -192,7 +192,6 @@ def select_features(X, y, method="RFE", min_features_to_select=3, threshold_nan=
         from boruta import BorutaPy
         selector = BorutaPy(forest, n_estimators="auto", random_state=42)
-        breakpoint()
         selector.fit(X.values, y.values)
         selected_features_mask = selector.support_
         selected_features = X.columns[selected_features_mask].tolist()

{geocif-0.1.59 → geocif-0.1.61/geocif.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: geocif
-Version: 0.1.59
+Version: 0.1.61
 Summary: Models to visualize and forecast crop conditions and yields
 Home-page: https://ritviksahajpal.github.io/yield_forecasting/
 Author: Ritvik Sahajpal

{geocif-0.1.59 → geocif-0.1.61}/setup.py RENAMED Viewed

@@ -50,6 +50,6 @@ setup(
     test_suite="tests",
     tests_require=test_requirements,
     url="https://ritviksahajpal.github.io/yield_forecasting/",
-    version="0.1.59",
+    version="0.1.61",
     zip_safe=False,
 )