PyPI - pyreclaim - Versions diffs - 0.3.0__tar.gz → 0.4.0__tar.gz - Mend

pyreclaim 0.3.0tar.gz → 0.4.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

{pyreclaim-0.3.0 → pyreclaim-0.4.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pyreclaim
-Version: 0.3.0
+Version: 0.4.0
 Summary: Reservoir Estimation of Capacity Loss using AI based Methods
 Author-email: Sanchit Minocha <msanchit@uw.edu>
 License:                     GNU GENERAL PUBLIC LICENSE
@@ -698,6 +698,10 @@ Requires-Dist: joblib
 Requires-Dist: xgboost
 Requires-Dist: lightgbm
 Requires-Dist: catboost
+Requires-Dist: openpyxl
+Requires-Dist: netcdf4
+Requires-Dist: dask
+Requires-Dist: rioxarray
 Dynamic: license-file
 <div align="center">
@@ -737,7 +741,7 @@ pip install pyreclaim
 To generate features for reservoirs using the **RECLAIM** framework and the [`pyreclaim`](https://pypi.org/project/pyreclaim/) Python package, you will need the global datasets.
-You can download all required global datasets from the Open Science Framework (OSF):
+You can download all required global datasets from the Zenodo Repository:
 [Download Global Datasets](https://doi.org/10.5281/zenodo.17230533)
@@ -763,7 +767,7 @@ from reclaim.reclaim import Reclaim
 reservoir_static = {
     "obc": 150.0,
     "hgt": 45.0,
-    "mrb": "Ganges",
+    "mrb": 4030033640,
     "lat": 25.6,
     "lon": 81.9,
     "reservoir_polygon": reservoir_polygon,

{pyreclaim-0.3.0 → pyreclaim-0.4.0}/README.md RENAMED Viewed

@@ -35,7 +35,7 @@ pip install pyreclaim
 To generate features for reservoirs using the **RECLAIM** framework and the [`pyreclaim`](https://pypi.org/project/pyreclaim/) Python package, you will need the global datasets.
-You can download all required global datasets from the Open Science Framework (OSF):
+You can download all required global datasets from the Zenodo Repository:
 [Download Global Datasets](https://doi.org/10.5281/zenodo.17230533)
@@ -61,7 +61,7 @@ from reclaim.reclaim import Reclaim
 reservoir_static = {
     "obc": 150.0,
     "hgt": 45.0,
-    "mrb": "Ganges",
+    "mrb": 4030033640,
     "lat": 25.6,
     "lon": 81.9,
     "reservoir_polygon": reservoir_polygon,

{pyreclaim-0.3.0 → pyreclaim-0.4.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "pyreclaim"
-version = "v0.3.0"
+version = "v0.4.0"
 authors = [
   { name="Sanchit Minocha", email="msanchit@uw.edu" },
 ]
@@ -30,6 +30,10 @@ dependencies = [
     "xgboost",
     "lightgbm",
     "catboost",
+    "openpyxl",
+    "netcdf4",
+    "dask",
+    "rioxarray"
 ]
 [project.urls]

{pyreclaim-0.3.0 → pyreclaim-0.4.0}/setup.py RENAMED Viewed

@@ -3,7 +3,7 @@ from setuptools import setup, find_packages
 setup(
     name = "pyreclaim",
-    version = "v0.3.0",
+    version = "v0.4.0",
     license = "GPL-3.0",
     package_dir = {"": "src"}
 )

{pyreclaim-0.3.0 → pyreclaim-0.4.0}/src/pyreclaim.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pyreclaim
-Version: 0.3.0
+Version: 0.4.0
 Summary: Reservoir Estimation of Capacity Loss using AI based Methods
 Author-email: Sanchit Minocha <msanchit@uw.edu>
 License:                     GNU GENERAL PUBLIC LICENSE
@@ -698,6 +698,10 @@ Requires-Dist: joblib
 Requires-Dist: xgboost
 Requires-Dist: lightgbm
 Requires-Dist: catboost
+Requires-Dist: openpyxl
+Requires-Dist: netcdf4
+Requires-Dist: dask
+Requires-Dist: rioxarray
 Dynamic: license-file
 <div align="center">
@@ -737,7 +741,7 @@ pip install pyreclaim
 To generate features for reservoirs using the **RECLAIM** framework and the [`pyreclaim`](https://pypi.org/project/pyreclaim/) Python package, you will need the global datasets.
-You can download all required global datasets from the Open Science Framework (OSF):
+You can download all required global datasets from the Zenodo Repository:
 [Download Global Datasets](https://doi.org/10.5281/zenodo.17230533)
@@ -763,7 +767,7 @@ from reclaim.reclaim import Reclaim
 reservoir_static = {
     "obc": 150.0,
     "hgt": 45.0,
-    "mrb": "Ganges",
+    "mrb": 4030033640,
     "lat": 25.6,
     "lon": 81.9,
     "reservoir_polygon": reservoir_polygon,

{pyreclaim-0.3.0 → pyreclaim-0.4.0}/src/pyreclaim.egg-info/requires.txt RENAMED Viewed

@@ -11,3 +11,7 @@ joblib
 xgboost
 lightgbm
 catboost
+openpyxl
+netcdf4
+dask
+rioxarray

{pyreclaim-0.3.0 → pyreclaim-0.4.0}/src/reclaim/derived_features/feature_engineering_and_transformation.py RENAMED Viewed

@@ -54,22 +54,28 @@ def engineer_and_transform_features(df: pd.DataFrame) -> pd.DataFrame:
         "SOUT": df["MAO"] * df["NSSC2_mean"],
     }
+    df = pd.concat([df, pd.DataFrame(feature_dict)], axis=1)
     # Land cover log-area features
     lc_cols = ['LCAS','LCC','LCG','LCT','LCS','LCHV','LCM','LCSV','LCBS','LCSG','LCWB']
     for col in lc_cols:
-        feature_dict[col] = df["CA"] * df[col] / 100
-    df = pd.concat([df, pd.DataFrame(feature_dict)], axis=1)
+        df[col] = df["CA"] * df[col] / 100
     # -------------------------
     # APPLY LOG TRANSFORMATIONS
     # -------------------------
     log_candidates = ['CA','DCA','OBC','HGT','RA','RP','FL',
                       'SA_mean','SA_mean_clip','SA_std','SA_kurt','PAI','MAI','MAO','I_std','O_std','MAR',
-                      'rain_per_area','GC','TE','ECLR','SIN','SOUT'] + lc_cols
+                      'ROBC','rain_per_area','GC','TE','RT','ECLR','ESR','SIN','SOUT'] + lc_cols
     for col in log_candidates:
         log_col = f'log_{col}'  # add prefix to avoid double log
-        df[log_col] = np.log(df[col].clip(lower=1e-15))
+        try:
+            df[log_col] = np.log(df[col].clip(lower=1e-15))
+        except Exception as e:
+            raise ValueError(f"Error applying log transform to column '{col}': {e}")
+    # Process DLc as categorical column
+    df['DLC'] = df['DLC'].astype(int).fillna(0)
     return df

{pyreclaim-0.3.0 → pyreclaim-0.4.0}/src/reclaim/dynamic_features/reservoir_dynamic.py RENAMED Viewed

@@ -142,7 +142,8 @@ def reservoir_based_dynamic_features(
                     path, time_col, data_col, func, feat, obs_period
                 )
                 results[feat] = df_feat.iloc[0, 0]  # single value
-            except Exception:
+            except Exception as e:
+                print(f"Failed to compute {feat} due to error: {e}. Setting as NaN.")
                 results[feat] = np.nan
     return pd.DataFrame([results])

{pyreclaim-0.3.0 → pyreclaim-0.4.0}/src/reclaim/dynamic_features/utils/ts_aggregate.py RENAMED Viewed

@@ -37,6 +37,12 @@ def compute_ts_aggregate(
     df = pd.read_csv(ts_csv_path)
     if df.empty:
         raise ValueError(f"CSV at {ts_csv_path} is empty.")
+    # Ensure columns exist
+    if time_column not in df.columns:
+        raise ValueError(f"Time column '{time_column}' not found in CSV.")
+    if value_column not in df.columns:
+        raise ValueError(f"Value column '{value_column}' not found in CSV.")
     # Ensure time column is datetime
     df[time_column] = pd.to_datetime(df[time_column], errors='coerce')

{pyreclaim-0.3.0 → pyreclaim-0.4.0}/src/reclaim/generate_features.py RENAMED Viewed

@@ -12,17 +12,24 @@ from reclaim.derived_features.feature_engineering_and_transformation import engi
 def create_features_per_row(
+    idx: int,
+    observation_period: List[int],
     reservoir_static_params: dict,
     catchment_static_params: dict,
     reservoir_dynamic_info: dict = None,
     catchment_dynamic_info: dict = None,
-    observation_period: List[int] = None
 ) -> pd.DataFrame:
     """
     Compute all static, dynamic, and derived features for a single reservoir observation.
     Parameters
     ----------
+    idx : int
+        Index of the reservoir sedimentation observation (for tracking/logging purposes).
+    observation_period : list of int
+        Two-element list [OSY, OEY] for observation start year and end year.
     reservoir_static_params : dict
         Parameters for reservoir_based_static_features(). Expected keys:
             - obc : float, Original Built Capacity (MCM)
@@ -30,6 +37,7 @@ def create_features_per_row(
             - mrb : str, Major River Basin, optional
             - lat : float, Latitude (deg)
             - lon : float, Longitude (deg)
+            - by : int, Build Year
             - reservoir_polygon : shapely.geometry.Polygon
             - inlet_point : shapely.geometry.Point, optional
             - resolution : float, optional
@@ -61,9 +69,6 @@ def create_features_per_row(
             - "tmax":   {"path": str, "time_column": str, "data_column": str}
             - "wind":   {"path": str, "time_column": str, "data_column": str}
-    observation_period : list of int, optional
-        Two-element list [OSY, OEY] for observation start year and end year.
     Returns
     -------
     pd.DataFrame
@@ -74,6 +79,14 @@ def create_features_per_row(
         - Catchment dynamic
         - Derived/log-transformed
     """
+    # --- Observevation period features ---
+    osy, oey = observation_period
+    df_obs_period = pd.DataFrame({
+        "idx": [idx],
+        "OSY": [osy],
+        "OEY": [oey]
+    })
     # --- Static features ---
     df_res_static = reservoir_based_static_features(**reservoir_static_params)
@@ -88,9 +101,9 @@ def create_features_per_row(
     if catchment_dynamic_info is not None and observation_period is not None:
         df_catch_dyn = catchment_based_dynamic_features(catchment_dynamic_info, observation_period)
     # --- Combine all static + dynamic ---
-    df_combined = pd.concat([df_res_static, df_catch_static, df_res_dyn, df_catch_dyn], axis=1)
+    df_combined = pd.concat([df_obs_period, df_res_static, df_catch_static, df_res_dyn, df_catch_dyn], axis=1)
     # --- Engineer + log-transform features ---
     df_final = engineer_and_transform_features(df_combined)
@@ -109,6 +122,10 @@ def create_features_multi(
     reservoirs_input : list of dict
         Each element should be a dictionary with the following keys:
+        - `idx` : int
+            Index of the reservoir sedimentation observation.
+        - `observation_period` : list of int
+            Two-element list `[OSY, OEY]` specifying the observation period.
         - `reservoir_static_params` : dict
             Parameters for `reservoir_based_static_features()`.
         - `catchment_static_params` : dict
@@ -117,8 +134,7 @@ def create_features_multi(
             Parameters for `reservoir_based_dynamic_features()`.
         - `catchment_dynamic_info` : dict
             Parameters for `catchment_based_dynamic_features()`.
-        - `observation_period` : list of int
-            Two-element list `[OSY, OEY]` specifying the observation period.
     Returns
     -------
@@ -129,11 +145,12 @@ def create_features_multi(
     all_rows = []
     for idx, reservoir_info in enumerate(reservoirs_input):
         df_row = create_features_per_row(
+            idx=reservoir_info.get("idx"),
+            observation_period=reservoir_info.get("observation_period"),
             reservoir_static_params=reservoir_info.get("reservoir_static_params", {}),
             catchment_static_params=reservoir_info.get("catchment_static_params", {}),
             reservoir_dynamic_info=reservoir_info.get("reservoir_dynamic_info", None),
             catchment_dynamic_info=reservoir_info.get("catchment_dynamic_info", None),
-            observation_period=reservoir_info.get("observation_period", None),
         )
         all_rows.append(df_row)

{pyreclaim-0.3.0 → pyreclaim-0.4.0}/src/reclaim/reclaim.py RENAMED Viewed

@@ -189,6 +189,8 @@ class Reclaim:
             if self.feature_order_list is not None:
                 # Reorder columns automatically
                 X = X[self.feature_order_list]
+                # for col in self.cat_features:
+                #     X[col] = X[col].astype("category")
         elif isinstance(X, np.ndarray):
             warnings.warn(
                     "Predicting with NumPy array: assumes column order matches training order. "
@@ -476,19 +478,21 @@ class Reclaim:
             load_dir = os.path.join(package_dir, "pretrained_model")
         # Load XGBoost
-        xgb_path = os.path.join(load_dir, f"{prefix}_xgb.json")
+        xgb_path = os.path.join(load_dir, f"{prefix}_xgb.pkl")
         if os.path.exists(xgb_path):
-            self.xgb_model = xgb.XGBRegressor()
-            self.xgb_model.load_model(xgb_path)
+            import xgboost as xgb
+            self.xgb_model = joblib.load(xgb_path)
         # Load LightGBM
-        lgb_path = os.path.join(load_dir, f"{prefix}_lgb.txt")
+        lgb_path = os.path.join(load_dir, f"{prefix}_lgb.pkl")
         if os.path.exists(lgb_path):
-            self.lgb_model = lgb.Booster(model_file=lgb_path)
+            import lightgbm as lgb
+            self.lgb_model = joblib.load(lgb_path)
         # Load CatBoost
         cat_path = os.path.join(load_dir, f"{prefix}_cat.cbm")
         if os.path.exists(cat_path):
+            from catboost import CatBoostRegressor
             self.cat_model = CatBoostRegressor()
             self.cat_model.load_model(cat_path)

{pyreclaim-0.3.0 → pyreclaim-0.4.0}/src/reclaim/static_features/reservoir_static.py RENAMED Viewed

@@ -1,5 +1,6 @@
 import pandas as pd
 from shapely.geometry import Point, Polygon
+import numpy as np
 # Import utils
 from reclaim.static_features.utils.flow_length import find_actual_flow_path
@@ -13,6 +14,7 @@ def reservoir_based_static_features(
     mrb: str = None,
     lat: float = None,
     lon: float = None,
+    by: int = None,
     reservoir_polygon: Polygon = None,
     inlet_point: Point = None,
     resolution: float = None,
@@ -33,6 +35,8 @@ def reservoir_based_static_features(
         Latitude of dam location (degrees).
     lon : float, optional
         Longitude of dam location (degrees).
+    by : int, optional
+        Build year of the reservoir.
     reservoir_polygon : shapely.geometry.Polygon, optional
         Reservoir polygon geometry used to compute area and perimeter.
     dam_point : shapely.geometry.Point, optional
@@ -53,6 +57,7 @@ def reservoir_based_static_features(
         - MRB: Major River Basin
         - LAT: Latitude (deg)
         - LON: Longitude (deg)
+        - BY: Build Year
         - RA: Reservoir Area (sq km)
         - RP: Reservoir Perimeter (km)
         - FL: Flow Length (km)
@@ -67,6 +72,7 @@ def reservoir_based_static_features(
         "MRB": mrb,
         "LAT": lat,
         "LON": lon,
+        "BY": by,
         "RA": None,
         "RP": None,
         "FL": None,
@@ -85,8 +91,12 @@ def reservoir_based_static_features(
     dam_point = Point(lon, lat)
     if dam_point is not None and reservoir_polygon is not None:
         _, _, features["FL"], _ = (
-            find_actual_flow_path(dam_point, reservoir_polygon, inlet_point, resolution) / 1e3
-        )  # m → km
+            find_actual_flow_path(dam_point, reservoir_polygon, inlet_point, resolution)
+        )
+        if features["FL"]:
+            features["FL"] = calculate_length_area_meters(features["FL"], area=False) / 1e3  # m → km
+        else:
+            features["FL"] = np.nan
     # AEC metrics
     if aec_df is not None:

{pyreclaim-0.3.0 → pyreclaim-0.4.0}/src/reclaim/static_features/utils/aec_shape.py RENAMED Viewed

@@ -94,8 +94,8 @@ def concavity_index(df: pd.DataFrame) -> float:
     line = np.linspace(0, 1, len(area_norm))
     # Area under actual curve vs line
-    auc_curve = np.trapz(elev_norm, area_norm)
-    auc_line = np.trapz(line, area_norm)
+    auc_curve = np.trapezoid(elev_norm, area_norm)
+    auc_line = np.trapezoid(line, area_norm)
     concavity = auc_curve / auc_line if auc_line > 0 else np.nan
     return concavity

{pyreclaim-0.3.0 → pyreclaim-0.4.0}/src/reclaim/static_features/utils/area_perimeter.py RENAMED Viewed

@@ -33,4 +33,4 @@ def calculate_length_area_meters(geometry, area= True):
         area_square_meters = scaled_geometry.area
         return length_meters, area_square_meters
     else:
-        return length_meters[0]
+        return length_meters

{pyreclaim-0.3.0 → pyreclaim-0.4.0}/src/reclaim/static_features/utils/catchment_agreggate.py RENAMED Viewed

@@ -1,6 +1,7 @@
 import pandas as pd
 import numpy as np
 import xarray as xr
+import rioxarray
 import geopandas as gpd
 import regionmask
 from collections import Counter