PyPI - geocif - Versions diffs - 0.1.67__tar.gz → 0.1.68__tar.gz - Mend

geocif 0.1.67tar.gz → 0.1.68tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (81) hide show

{geocif-0.1.67/geocif.egg-info → geocif-0.1.68}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: geocif
-Version: 0.1.67
+Version: 0.1.68
 Summary: Models to visualize and forecast crop conditions and yields
 Home-page: https://ritviksahajpal.github.io/yield_forecasting/
 Author: Ritvik Sahajpal

{geocif-0.1.67 → geocif-0.1.68}/geocif/cei/definitions.py RENAMED Viewed

@@ -1,11 +1,11 @@
 PHENOLOGICAL_STAGES = [1, 2, 3]
 dict_indices = {
     "GD4": ["Cold", "Growing degree days (sum of Tmean > 4 C)"],
-    #"CFD": ["Cold", "Maximum number of consecutive frost days (Tmin < 0 C)"],
-    #"FD": ["Cold", "Number of Frost Days (Tmin < 0C)"],
+    "CFD": ["Cold", "Maximum number of consecutive frost days (Tmin < 0 C)"],
+    "FD": ["Cold", "Number of Frost Days (Tmin < 0C)"],
     "HD17": ["Cold", "Heating degree days (sum of Tmean < 17 C)"],
-    #"ID": ["Cold", "Number of sharp Ice Days (Tmax < 0C)"],
-    #"CSDI": ["Cold", "Cold-spell duration index"],
+    "ID": ["Cold", "Number of sharp Ice Days (Tmax < 0C)"],
+    "CSDI": ["Cold", "Cold-spell duration index"],
     "TG10p": ["Cold", "Percentage of days when Tmean < 10th percentile"],
     "TN10p": ["Cold", "Percentage of days when Tmin < 10th percentile"],
     "TXn": ["Cold", "Minimum daily maximum temperature"],
@@ -70,10 +70,10 @@ dict_indices = {
         "Compound",
         "Days with TG > 75th percentile of daily mean temperature and RR >75th percentile of daily precipitation sum",
     ],
-    # "SD": ["Snow", "Mean of daily snow depth"],
-    # "SD1": ["Snow", "Number of days with snow depth >= 1 cm"],
-    # "SD5cm": ["Snow", "Number of days with snow depth >= 5 cm"],
-    # "SD50cm": ["Snow", "Number of days with snow depth >= 50 cm"],
+    "SD": ["Snow", "Mean of daily snow depth"],
+    "SD1": ["Snow", "Number of days with snow depth >= 1 cm"],
+    "SD5cm": ["Snow", "Number of days with snow depth >= 5 cm"],
+    "SD50cm": ["Snow", "Number of days with snow depth >= 50 cm"],
 }
 dict_ndvi = {

{geocif-0.1.67 → geocif-0.1.68}/geocif/geocif.py RENAMED Viewed

@@ -598,15 +598,15 @@ class Geocif:
             df_region[f"Median {self.target}"].values, 3
         )
-        if f"Median {self.target} (2014-2018)" in df_region.columns:
-            df.loc[:, f"Median {self.target} (2014-2018)"] = np.around(
-                df_region[f"Median {self.target} (2014-2018)"].values, 3
-            )
-        if f"Median {self.target} (2013-2017)" in df_region.columns:
-            df.loc[:, f"Median {self.target} (2013-2017)"] = np.around(
-                df_region[f"Median {self.target} (2013-2017)"].values, 3
-            )
+        # if f"Median {self.target} (2014-2018)" in df_region.columns:
+        #     df.loc[:, f"Median {self.target} (2014-2018)"] = np.around(
+        #         df_region[f"Median {self.target} (2014-2018)"].values, 3
+        #     )
+        #
+        # if f"Median {self.target} (2013-2017)" in df_region.columns:
+        #     df.loc[:, f"Median {self.target} (2013-2017)"] = np.around(
+        #         df_region[f"Median {self.target} (2013-2017)"].values, 3
+        #     )
         if self.estimate_ci:
             if self.estimate_ci_for_all or self.forecast_season == self.today_year:
@@ -820,8 +820,8 @@ class Geocif:
                 + self.statistics_columns
                 + self.feature_names
                 + [f"Median {self.target}"]
-                + [f"Median {self.target} (2014-2018)"]
-                + [f"Median {self.target} (2013-2017)"]
+                #+ [f"Median {self.target} (2014-2018)"]
+                #+ [f"Median {self.target} (2013-2017)"]
                 + ["Region_ID"]
             )
             if self.check_yield_trend:
@@ -1011,13 +1011,13 @@ class Geocif:
             df, self.all_seasons_with_yield, self.number_median_years, self.target
         )
-        df = fe.compute_user_median_statistics(
-            df, [2014, 2015, 2016, 2017, 2018]
-        )
-        df = fe.compute_user_median_statistics(
-            df, [2013, 2014, 2015, 2016, 2017]
-        )
+        # df = fe.compute_user_median_statistics(
+        #     df, [2014, 2015, 2016, 2017, 2018]
+        # )
+        #
+        # df = fe.compute_user_median_statistics(
+        #     df, [2013, 2014, 2015, 2016, 2017]
+        # )
         if self.median_area_as_feature:
             df = fe.compute_median_statistics(
@@ -1393,6 +1393,9 @@ class Geocif:
             self.dg["ADM0_NAME"].str.lower().str.replace(" ", "_") == self.country
         ]
+        # Drop any duplicates based on Country Region column
+        self.dg_country = self.dg_country.drop_duplicates(subset=["Country Region"])
     def read_data(self, country, crop, season):
         """

{geocif-0.1.67 → geocif-0.1.68}/geocif/geocif_runner.py RENAMED Viewed

@@ -26,41 +26,40 @@ def loop_execute(inputs):
     Returns:
     """
-    # from pycallgraph2 import Config, PyCallGraph, GlobbingFilter
-    # from pycallgraph2.output import GraphvizOutput
-    #
-    # graphviz = GraphvizOutput()
-    # graphviz.output_file = "geocif_visualization.png"
-    # plt.rcParams["figure.dpi"] = 600
-    # config = Config(max_depth=5)
-    # config.trace_filter = GlobbingFilter(
-    #     exclude=[
-    #         "pycallgraph.*",
-    #         "torch*",
-    #     ]
-    # )
-    #
-    # with PyCallGraph(output=graphviz, config=config):
-    project_name, country, crop, season, model, logger, parser, index = inputs
-    logger.info("=====================================================")
-    logger.info(f"\tStarting GEOCIF: {country} {crop} {season} {model}")
-    logger.info("=====================================================")
-    obj = geocif.Geocif(logger=logger,
-                        parser=parser,
-                        project_name=project_name)
-    obj.read_data(country, crop, season)
-    # Store config file in database, only execute this for
-    # the first iteration of the loop
-    if index == 0:
-        output.config_to_db(obj.db_path, obj.parser, obj.today)
-    # Setup metadata and run ML code
-    obj.setup(season, model)
-    if obj.simulation_stages:
-        obj.execute()
+    from pycallgraph2 import Config, PyCallGraph, GlobbingFilter
+    from pycallgraph2.output import GraphvizOutput
+    graphviz = GraphvizOutput()
+    graphviz.output_file = "geocif_visualization.png"
+    plt.rcParams["figure.dpi"] = 600
+    config = Config(max_depth=5)
+    config.trace_filter = GlobbingFilter(
+        exclude=[
+            "pycallgraph.*",
+        ]
+    )
+    with PyCallGraph(output=graphviz, config=config):
+        project_name, country, crop, season, model, logger, parser, index = inputs
+        logger.info("=====================================================")
+        logger.info(f"\tStarting GEOCIF: {country} {crop} {season} {model}")
+        logger.info("=====================================================")
+        obj = geocif.Geocif(logger=logger,
+                            parser=parser,
+                            project_name=project_name)
+        obj.read_data(country, crop, season)
+        # Store config file in database, only execute this for
+        # the first iteration of the loop
+        if index == 0:
+            output.config_to_db(obj.db_path, obj.parser, obj.today)
+        # Setup metadata and run ML code
+        obj.setup(season, model)
+        if obj.simulation_stages:
+            obj.execute()
 def gather_inputs(parser):

{geocif-0.1.67 → geocif-0.1.68}/geocif/ml/feature_selection.py RENAMED Viewed

@@ -188,10 +188,24 @@ def select_features(X, y, method="RFE", min_features_to_select=3, threshold_nan=
         # Get the selected feature names
         selected_features = X.columns[selected_features].tolist()
+    elif method == "lasso":
+        from sklearn.linear_model import LassoLarsCV
+        from sklearn.feature_selection import SelectFromModel
+        # Fit Lasso model (L1 regularization) to perform feature selection
+        lasso = LassoLarsCV(cv=5)
+        lasso.fit(X, y)
+        # Use SelectFromModel to remove features with zero coefficients
+        selector = SelectFromModel(lasso, prefit=True)
+        # Get the selected features
+        selected_features = X.columns[selector.get_support()].tolist()
+        print(selected_features)
     elif method == "BorutaPy":
         from boruta import BorutaPy
-        selector = BorutaPy(forest, n_estimators="auto", random_state=42)
+        selector = BorutaPy(forest, n_estimators="auto", random_state=42, verbose=0)
         selector.fit(X.values, y.values)
         selected_features_mask = selector.support_
         selected_features = X.columns[selected_features_mask].tolist()

{geocif-0.1.67 → geocif-0.1.68}/geocif/ml/stats.py RENAMED Viewed

@@ -203,7 +203,7 @@ def add_statistics(
         fn = "illinois.csv"
     elif country == "Ethiopia":
         # HACK
-        fn = "ethiopia_wheat_summary_formatted.csv"
+        fn = "adm_crop_production.csv"
     else:
         fn = "adm_crop_production.csv"
     df_fewsnet = pd.read_csv(dir_stats / fn, low_memory=False)

{geocif-0.1.67 → geocif-0.1.68}/geocif/viz/tmp.py RENAMED Viewed

@@ -1,6 +1,4 @@
 import geopandas as gpd
-import pandas as pd
-import matplotlib.pyplot as plt
 import palettable as pal
 import matplotlib.colors as mcolors
@@ -9,7 +7,7 @@ import glob
 import os
 # 1. Specify the directory containing your .dta files:
-data_dir = r"C:\Users\ritvik\Downloads\maize_yield\maize_yield"
+data_dir = r"."
 # 2. Use glob to find all .dta files in that directory:
 dta_files = glob.glob(os.path.join(data_dir, "*.dta"))
@@ -20,6 +18,13 @@ dataframes = [pd.read_stata(f) for f in dta_files]
 # 4. Concatenate them all into one DataFrame (row-wise):
 merged_df = pd.concat(dataframes, ignore_index=True)
+# Replace null values in PROD98CQ with those in PROD columns
+merged_df['PROD98CQ'] = merged_df['PROD98CQ'].fillna(merged_df['PROD'])
+merged_df['YEAR'] = merged_df['YEAR'].fillna(merged_df['year'])
+# Drop rows where AREAH is 0
+merged_df = merged_df[merged_df['AREAH'] != 0]
 merged_df['ZONE'] = merged_df['ZONE'].astype(int)
 merged_df['DIST'] = merged_df['DIST'].astype(int)
@@ -36,7 +41,7 @@ merged_df['W_CODE'] = '7' + merged_df['W_CODE']
 merged_df['W_CODE'] = merged_df['W_CODE'].str.replace('.0', '')
 merged_df['W_CODE'] = merged_df['W_CODE'].astype(int)
-dg = gpd.read_file(r"D:\Users\ritvik\projects\GEOGLAM\Input\countries\wolayita\wolayita_dissolved.shp")
+dg = gpd.read_file(r"wolayita_dissolved.shp")
 dg = dg[['W_CODE', 'W_NAME']]
 # Merge the two dataframes on W_CODE
@@ -48,8 +53,8 @@ merged_df = merged_df.dropna(subset=['PROD98CQ', 'AREAH'])
 # Compte yield column
 merged_df['yield'] = merged_df['PROD98CQ'] / merged_df['AREAH']
-# create a new dataframe which computes average yield by W_NAME for each year
-df_avg_yield = merged_df.groupby(['W_NAME', 'YEAR'])['yield'].mean().reset_index()
+# create a new dataframe which computes average yield by W_NAME for each year, do a weighted average using FWEIGHT column
+df_avg_yield = merged_df.groupby(['W_NAME', 'YEAR']).apply(lambda x: np.average(x['yield'], weights=x['FWEIGHT'])).reset_index(name='yield')
 # Change W_NAME column to title case
 df_avg_yield['W_NAME'] = df_avg_yield['W_NAME'].str.title()
@@ -64,7 +69,15 @@ df_avg_yield = df_avg_yield.pivot(index='W_NAME', columns='YEAR', values='yield'
 df_avg_yield.index.name = None
 df_avg_yield.columns.name = None
-df_avg_yield.to_csv('wolayita_yields.csv')
+df_avg_yield.to_csv('wolayita_yields_v8.csv')
+breakpoint()
+# Compare wolayita_yields_v2.csv with wolayita_yields.csv
+# 1. Load the two CSV files
+df_v1 = pd.read_csv('wolayita_yields.csv')
+df_v2 = pd.read_csv('wolayita_yields_v2.csv')
+# 2. Check if the two DataFrames are equal
+print(df_v1.equals(df_v2))
 breakpoint()
 # 5. (Optional) Inspect the merged DataFrame

{geocif-0.1.67 → geocif-0.1.68/geocif.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: geocif
-Version: 0.1.67
+Version: 0.1.68
 Summary: Models to visualize and forecast crop conditions and yields
 Home-page: https://ritviksahajpal.github.io/yield_forecasting/
 Author: Ritvik Sahajpal

{geocif-0.1.67 → geocif-0.1.68}/setup.py RENAMED Viewed

@@ -50,6 +50,6 @@ setup(
     test_suite="tests",
     tests_require=test_requirements,
     url="https://ritviksahajpal.github.io/yield_forecasting/",
-    version="0.1.67",
+    version="0.1.68",
     zip_safe=False,
 )