PyPI - geocif - Versions diffs - 0.1.34__tar.gz → 0.1.35__tar.gz - Mend

geocif 0.1.34tar.gz → 0.1.35tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

{geocif-0.1.34/geocif.egg-info → geocif-0.1.35}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: geocif
-Version: 0.1.34
+Version: 0.1.35
 Summary: Models to visualize and forecast crop conditions and yields
 Home-page: https://ritviksahajpal.github.io/yield_forecasting/
 Author: Ritvik Sahajpal

{geocif-0.1.34 → geocif-0.1.35}/geocif/cei/indices.py RENAMED Viewed

@@ -393,6 +393,7 @@ class CEIs:
             / self.admin_zone
             / self.country
         )
         os.makedirs(self.dir_output, exist_ok=True)
         os.makedirs(self.dir_intermediate, exist_ok=True)

{geocif-0.1.34 → geocif-0.1.35}/geocif/geocif.py RENAMED Viewed

@@ -487,10 +487,9 @@ class Geocif:
             "Harvest Year",
             "Stage Name",
         ]
-        try:
-            df.index = df.apply(lambda row: "_".join([str(row[col]) for col in index_columns]), axis=1)
-        except Exception as e:
-            breakpoint()
+        df.index = df.apply(
+            lambda row: "_".join([str(row[col]) for col in index_columns]), axis=1
+        )
         # name the index level
         df.index.set_names(["Index"], inplace=True)

{geocif-0.1.34 → geocif-0.1.35}/geocif/indices_runner_v2.py RENAMED Viewed

@@ -155,22 +155,21 @@ class cei_runner(base.BaseGeo):
                 "ndvi",
                 False,  # redo
             )
-            for year in range(2001, ar.utcnow().year + 1)
+            for year in range(2024, ar.utcnow().year + 1)
             for status, path, filename, admin_zone, category in combinations
         ]
         # Only keep those entries in combinations where the third elemt is
         # mozambique, south_africa, angola or dem_people's_rep_of_korea
         # This is done to test the code for these countries
-        #combinations = [
-        #    i
-        #    for i in combinations
-        #    if "ethiopia_maize_s1" in i[3]
-        #]
-        #                 "malawi" in i[2]]
-        if True:
-            num_cpu = int(cpu_count() * 0.8)
+        combinations = [
+           i
+           for i in combinations
+           if "malawi_maize_s1" in i[3]
+        ]
+        if False:
+            num_cpu = int(cpu_count() * 0.3)
             with Pool(num_cpu) as p:
                 for i, _ in enumerate(p.imap_unordered(indices.process, combinations)):
                     pass
@@ -198,7 +197,7 @@ def run(path_config_files=[]):
     indices.validate_index_definitions()
     for method in [
-        "phenological_stages",  # "dekad_r"  # "dekad_r"
+        "biweekly_r",  # "dekad_r"  # "dekad_r"
     ]:  # , "full_season", "phenological_stages", "fraction_season"]:
         obj = cei_runner(path_config_files)
         obj.main(method)

{geocif-0.1.34 → geocif-0.1.35}/geocif/ml/correlations.py RENAMED Viewed

@@ -246,6 +246,7 @@ def all_correlated_feature_by_time(df, **kwargs):
     Returns:
     """
+    THRESHOLD = 0.1
     national_correlation = kwargs.get("national_correlation")
     group_by = kwargs.get("groupby")
     combined_dict = kwargs.get("combined_dict")
@@ -264,8 +265,16 @@ def all_correlated_feature_by_time(df, **kwargs):
             df_corr = df_corr.dropna(thresh=len(df_corr) / 2, axis=1)
             if not df_corr.empty:
-                df_tmp = df_corr[df_corr.columns[(df_corr.mean() > 0.1)]]
-                dict_selected_features[region_id] = df_tmp.columns
+                df_tmp = df_corr[df_corr.columns[(abs(df_corr.mean()) > THRESHOLD)]]
+                # Add the columns to dict_selected_features along with the absolute mean value
+                absolute_medians = df_tmp.abs().median()
+                # Create a DataFrame to display the column names and their absolute median values
+                absolute_median_df = absolute_medians.reset_index()
+                absolute_median_df.columns = ['CEI', 'Median']
+                # Add the CEI and Median value to dict_selected_features
+                dict_selected_features[region_id] = absolute_median_df
                 df_tmp2 = (
                     df_tmp.median(axis=0)
@@ -293,24 +302,31 @@ def all_correlated_feature_by_time(df, **kwargs):
             else:
                 # HACK
                 df_corr = _all_correlated_feature_by_time(df, **kwargs)
-                dict_selected_features[region_id] = df_corr.columns
-                dict_best_cei[region_id] = {}
-                # dict_selected_features[region_id] = dict_selected_features[0]
-                # dict_best_cei[region_id] = dict_best_cei[0]
-                # Combine all unique values from the existing dictionary elements
-                # combined_metrics = set()
-                # for key in dict_selected_features:
-                #     breakpoint()
-                #     combined_metrics.update(dict_selected_features[key])
-                #
-                # # Add the combined set as a new element with key 3
-                # dict_selected_features[region_id] = sorted(list(combined_metrics))
+                df_tmp = df_corr[df_corr.columns[(abs(df_corr.mean()) > THRESHOLD)]]
+                # Add the columns to dict_selected_features along with the absolute mean value
+                absolute_medians = df_tmp.abs().median()
+                # Create a DataFrame to display the column names and their absolute median values
+                absolute_median_df = absolute_medians.reset_index()
+                absolute_median_df.columns = ['CEI', 'Median']
+                # Add the CEI and Median value to dict_selected_features
+                dict_selected_features[region_id] = absolute_median_df
+                dict_best_cei[region_id] = {}
     else:
         df_corr = _all_correlated_feature_by_time(df, **kwargs)
-        dict_selected_features[0] = df_corr.columns
+        df_tmp = df_corr[df_corr.columns[(abs(df_corr.mean()) > THRESHOLD)]]
+        # Add the columns to dict_selected_features along with the absolute mean value
+        absolute_medians = df_tmp.abs().median()
+        # Create a DataFrame to display the column names and their absolute median values
+        absolute_median_df = absolute_medians.reset_index()
+        absolute_median_df.columns = ['CEI', 'Median']
+        # Add the CEI and Median value to dict_selected_features
+        dict_selected_features[0] = absolute_median_df
-        df_corr = df_corr[df_corr.columns[(df_corr.mean() > 0.1)]]
         plot_feature_corr_by_time(df_corr, **kwargs)
     return dict_selected_features, dict_best_cei

{geocif-0.1.34 → geocif-0.1.35/geocif.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: geocif
-Version: 0.1.34
+Version: 0.1.35
 Summary: Models to visualize and forecast crop conditions and yields
 Home-page: https://ritviksahajpal.github.io/yield_forecasting/
 Author: Ritvik Sahajpal

{geocif-0.1.34 → geocif-0.1.35}/setup.py RENAMED Viewed

@@ -50,6 +50,6 @@ setup(
     test_suite="tests",
     tests_require=test_requirements,
     url="https://ritviksahajpal.github.io/yield_forecasting/",
-    version="0.1.34",
+    version="0.1.35",
     zip_safe=False,
 )