PyPI - geocif - Versions diffs - 0.1.81__tar.gz → 0.1.83__tar.gz - Mend

geocif 0.1.81tar.gz → 0.1.83tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (83) hide show

{geocif-0.1.81/geocif.egg-info → geocif-0.1.83}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: geocif
-Version: 0.1.81
+Version: 0.1.83
 Summary: Models to visualize and forecast crop conditions and yields
 Home-page: https://ritviksahajpal.github.io/yield_forecasting/
 Author: Ritvik Sahajpal

{geocif-0.1.81 → geocif-0.1.83}/geocif/cei/indices.py RENAMED Viewed

@@ -555,7 +555,7 @@ class CEIs:
         if not col:
             raise ValueError(f"Unknown method: {self.method}")
-        stages = sorted(df[col].unique())
+        stages = df[col].unique()
         valid_stages = None
         if self.method == "phenological_stages":

{geocif-0.1.81 → geocif-0.1.83}/geocif/geocif.py RENAMED Viewed

@@ -945,11 +945,12 @@ class Geocif:
         parts = all_cei_columns[-1].split("_")
         cei = parts[0] if parts[1].isdigit() else "_".join(parts[:2])
-        # For each region, find the column with the longest string in cei_column
-        group_by = ["Region"]
-        groups = df.groupby(group_by)
         if self.use_cumulative_features:
             frames = []
+            # For each region, find the column with the longest string in cei_column
+            group_by = ["Region"]
+            groups = df.groupby(group_by)
             for name, group in groups:
                 # Drop columns with all NaNs
                 group.dropna(axis=1, how="all", inplace=True)
@@ -1019,26 +1020,45 @@ class Geocif:
             # Drop those columns
             df = df.drop(columns=cols_to_drop)
+        from collections import Counter
+        esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
+        dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
+        print("<0>", dupes)
         # Hack: If
         # Change column name
         # e.g. 'vDTR_7_6_5_4_3_2_1_37_36_35_34_33_32_31' to 'vDTR Mar 1-Oct 27'
         df = stages.update_feature_names(df, self.method)
+        from collections import Counter
+        esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
+        dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
+        print("<111>", dupes)
         all_cei_columns = self.get_cei_column_names(df)
         # Fill in any missing values with 0
         df.loc[:, all_cei_columns].fillna(0, inplace=True)
+        from collections import Counter
+        esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
+        dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
+        print("<1>", dupes)
         df = fe.compute_last_year_yield(df, self.target)
+        from collections import Counter
+        esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
+        dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
+        print("<2>", dupes)
         df = fe.compute_median_statistics(
             df, self.all_seasons_with_yield, self.number_median_years, self.target
         )
+        from collections import Counter
+        esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
+        dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
+        print("<3>", dupes)
         df = fe.compute_user_median_statistics(df, range(2018, 2023))
         df = fe.compute_user_median_statistics(df, range(2013, 2018))
+        from collections import Counter
+        esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
+        dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
+        print("<4>", dupes)
         if self.median_area_as_feature:
             df = fe.compute_median_statistics(
                 df, self.all_seasons_with_yield, self.number_median_years, "Area (ha)"
@@ -1053,7 +1073,10 @@ class Geocif:
             df = fe.compute_analogous_yield(
                 df, self.all_seasons_with_yield, self.number_median_years, self.target
             )
+        from collections import Counter
+        esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
+        dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
+        print("5", dupes)
         # Create Region_ID column based on Region column category code
         df["Region"] = df["Region"].astype("category")
         if self.cluster_strategy == "single":
@@ -1067,7 +1090,7 @@ class Geocif:
             # Region_ID should be type category
             df["Region_ID"] = df["Region_ID"].astype("category")
+        breakpoint()
         return df
     def execute(self):
@@ -1474,7 +1497,8 @@ class Geocif:
             assert all_files, f"No files found in {_dir_country} with {file_name}"
             self.df_inputs = pd.concat(
-                (pd.read_csv(f) for f in all_files), ignore_index=True
+                (pd.read_csv(f, engine="pyarrow") for f in tqdm(all_files, desc="Reading CSVs", leave=False)),
+                ignore_index=True
             )
             self.df_inputs = stats.add_statistics(
@@ -1486,7 +1510,9 @@ class Geocif:
                 [self.target] + self.statistics_columns,
                 self.method,
             )
             """ Add information on starting and ending time period for each stage"""
+            self.logger.info("Adding starting and ending time period for each stage")
             self.df_inputs = stages.add_stage_information(self.df_inputs, self.method)
             self.df_inputs.to_csv(file, index=False)

{geocif-0.1.81 → geocif-0.1.83}/geocif/ml/embedding.py RENAMED Viewed

@@ -32,7 +32,10 @@ def _compute_correlations(X, y):
         f_series = X[feature]
         # Ignore NaN values in either y or f_series
-        mask = ~(np.isnan(y) | np.isnan(f_series))
+        try:
+            mask = ~(np.isnan(y) | np.isnan(f_series))
+        except:
+            breakpoint()
         y_filtered = y[mask]
         f_series_filtered = f_series[mask]

{geocif-0.1.81 → geocif-0.1.83}/geocif/ml/stages.py RENAMED Viewed

@@ -268,6 +268,11 @@ def update_feature_names(df, method):
         # Saving the result in the dictionary
         stages_info[element] = (cei, start_stage, end_stage, new_column_name)
+        # Check if any duplicates exist in the dictionary
+        if len(stages_info) != len(set(stages_info.values())):
+            breakpoint()
+            raise ValueError(f"Duplicate stage information found for {element}")
+    breakpoint()
     # For each column in df, check if it exists in stages_info, and
     # replace it with the new column name
     # Precompute the rename mapping outside the loop

{geocif-0.1.81 → geocif-0.1.83/geocif.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: geocif
-Version: 0.1.81
+Version: 0.1.83
 Summary: Models to visualize and forecast crop conditions and yields
 Home-page: https://ritviksahajpal.github.io/yield_forecasting/
 Author: Ritvik Sahajpal

{geocif-0.1.81 → geocif-0.1.83}/setup.py RENAMED Viewed

@@ -50,6 +50,6 @@ setup(
     test_suite="tests",
     tests_require=test_requirements,
     url="https://ritviksahajpal.github.io/yield_forecasting/",
-    version="0.1.81",
+    version="0.1.83",
     zip_safe=False,
 )