pyreclaim 0.3.0__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. {pyreclaim-0.3.0 → pyreclaim-0.4.0}/PKG-INFO +7 -3
  2. {pyreclaim-0.3.0 → pyreclaim-0.4.0}/README.md +2 -2
  3. {pyreclaim-0.3.0 → pyreclaim-0.4.0}/pyproject.toml +5 -1
  4. {pyreclaim-0.3.0 → pyreclaim-0.4.0}/setup.py +1 -1
  5. {pyreclaim-0.3.0 → pyreclaim-0.4.0}/src/pyreclaim.egg-info/PKG-INFO +7 -3
  6. {pyreclaim-0.3.0 → pyreclaim-0.4.0}/src/pyreclaim.egg-info/requires.txt +4 -0
  7. {pyreclaim-0.3.0 → pyreclaim-0.4.0}/src/reclaim/derived_features/feature_engineering_and_transformation.py +12 -6
  8. {pyreclaim-0.3.0 → pyreclaim-0.4.0}/src/reclaim/dynamic_features/reservoir_dynamic.py +2 -1
  9. {pyreclaim-0.3.0 → pyreclaim-0.4.0}/src/reclaim/dynamic_features/utils/ts_aggregate.py +6 -0
  10. {pyreclaim-0.3.0 → pyreclaim-0.4.0}/src/reclaim/generate_features.py +26 -9
  11. {pyreclaim-0.3.0 → pyreclaim-0.4.0}/src/reclaim/reclaim.py +9 -5
  12. {pyreclaim-0.3.0 → pyreclaim-0.4.0}/src/reclaim/static_features/reservoir_static.py +12 -2
  13. {pyreclaim-0.3.0 → pyreclaim-0.4.0}/src/reclaim/static_features/utils/aec_shape.py +2 -2
  14. {pyreclaim-0.3.0 → pyreclaim-0.4.0}/src/reclaim/static_features/utils/area_perimeter.py +1 -1
  15. {pyreclaim-0.3.0 → pyreclaim-0.4.0}/src/reclaim/static_features/utils/catchment_agreggate.py +1 -0
  16. {pyreclaim-0.3.0 → pyreclaim-0.4.0}/LICENSE +0 -0
  17. {pyreclaim-0.3.0 → pyreclaim-0.4.0}/setup.cfg +0 -0
  18. {pyreclaim-0.3.0 → pyreclaim-0.4.0}/src/pyreclaim.egg-info/SOURCES.txt +0 -0
  19. {pyreclaim-0.3.0 → pyreclaim-0.4.0}/src/pyreclaim.egg-info/dependency_links.txt +0 -0
  20. {pyreclaim-0.3.0 → pyreclaim-0.4.0}/src/pyreclaim.egg-info/top_level.txt +0 -0
  21. {pyreclaim-0.3.0 → pyreclaim-0.4.0}/src/reclaim/__init__.py +0 -0
  22. {pyreclaim-0.3.0 → pyreclaim-0.4.0}/src/reclaim/derived_features/__init__.py +0 -0
  23. {pyreclaim-0.3.0 → pyreclaim-0.4.0}/src/reclaim/dynamic_features/__init__.py +0 -0
  24. {pyreclaim-0.3.0 → pyreclaim-0.4.0}/src/reclaim/dynamic_features/catchment_dynamic.py +0 -0
  25. {pyreclaim-0.3.0 → pyreclaim-0.4.0}/src/reclaim/dynamic_features/utils/__init__.py +0 -0
  26. {pyreclaim-0.3.0 → pyreclaim-0.4.0}/src/reclaim/dynamic_features/utils/catchment_meteorology.py +0 -0
  27. {pyreclaim-0.3.0 → pyreclaim-0.4.0}/src/reclaim/dynamic_features/utils/inflow_outflow.py +0 -0
  28. {pyreclaim-0.3.0 → pyreclaim-0.4.0}/src/reclaim/dynamic_features/utils/rainfall.py +0 -0
  29. {pyreclaim-0.3.0 → pyreclaim-0.4.0}/src/reclaim/dynamic_features/utils/statistical_metrics.py +0 -0
  30. {pyreclaim-0.3.0 → pyreclaim-0.4.0}/src/reclaim/static_features/__init__.py +0 -0
  31. {pyreclaim-0.3.0 → pyreclaim-0.4.0}/src/reclaim/static_features/catchment_static.py +0 -0
  32. {pyreclaim-0.3.0 → pyreclaim-0.4.0}/src/reclaim/static_features/utils/__init__.py +0 -0
  33. {pyreclaim-0.3.0 → pyreclaim-0.4.0}/src/reclaim/static_features/utils/flow_length.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pyreclaim
3
- Version: 0.3.0
3
+ Version: 0.4.0
4
4
  Summary: Reservoir Estimation of Capacity Loss using AI based Methods
5
5
  Author-email: Sanchit Minocha <msanchit@uw.edu>
6
6
  License: GNU GENERAL PUBLIC LICENSE
@@ -698,6 +698,10 @@ Requires-Dist: joblib
698
698
  Requires-Dist: xgboost
699
699
  Requires-Dist: lightgbm
700
700
  Requires-Dist: catboost
701
+ Requires-Dist: openpyxl
702
+ Requires-Dist: netcdf4
703
+ Requires-Dist: dask
704
+ Requires-Dist: rioxarray
701
705
  Dynamic: license-file
702
706
 
703
707
  <div align="center">
@@ -737,7 +741,7 @@ pip install pyreclaim
737
741
 
738
742
  To generate features for reservoirs using the **RECLAIM** framework and the [`pyreclaim`](https://pypi.org/project/pyreclaim/) Python package, you will need the global datasets.
739
743
 
740
- You can download all required global datasets from the Open Science Framework (OSF):
744
+ You can download all required global datasets from the Zenodo Repository:
741
745
 
742
746
  [Download Global Datasets](https://doi.org/10.5281/zenodo.17230533)
743
747
 
@@ -763,7 +767,7 @@ from reclaim.reclaim import Reclaim
763
767
  reservoir_static = {
764
768
  "obc": 150.0,
765
769
  "hgt": 45.0,
766
- "mrb": "Ganges",
770
+ "mrb": 4030033640,
767
771
  "lat": 25.6,
768
772
  "lon": 81.9,
769
773
  "reservoir_polygon": reservoir_polygon,
@@ -35,7 +35,7 @@ pip install pyreclaim
35
35
 
36
36
  To generate features for reservoirs using the **RECLAIM** framework and the [`pyreclaim`](https://pypi.org/project/pyreclaim/) Python package, you will need the global datasets.
37
37
 
38
- You can download all required global datasets from the Open Science Framework (OSF):
38
+ You can download all required global datasets from the Zenodo Repository:
39
39
 
40
40
  [Download Global Datasets](https://doi.org/10.5281/zenodo.17230533)
41
41
 
@@ -61,7 +61,7 @@ from reclaim.reclaim import Reclaim
61
61
  reservoir_static = {
62
62
  "obc": 150.0,
63
63
  "hgt": 45.0,
64
- "mrb": "Ganges",
64
+ "mrb": 4030033640,
65
65
  "lat": 25.6,
66
66
  "lon": 81.9,
67
67
  "reservoir_polygon": reservoir_polygon,
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "pyreclaim"
7
- version = "v0.3.0"
7
+ version = "v0.4.0"
8
8
  authors = [
9
9
  { name="Sanchit Minocha", email="msanchit@uw.edu" },
10
10
  ]
@@ -30,6 +30,10 @@ dependencies = [
30
30
  "xgboost",
31
31
  "lightgbm",
32
32
  "catboost",
33
+ "openpyxl",
34
+ "netcdf4",
35
+ "dask",
36
+ "rioxarray"
33
37
  ]
34
38
 
35
39
  [project.urls]
@@ -3,7 +3,7 @@ from setuptools import setup, find_packages
3
3
 
4
4
  setup(
5
5
  name = "pyreclaim",
6
- version = "v0.3.0",
6
+ version = "v0.4.0",
7
7
  license = "GPL-3.0",
8
8
  package_dir = {"": "src"}
9
9
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pyreclaim
3
- Version: 0.3.0
3
+ Version: 0.4.0
4
4
  Summary: Reservoir Estimation of Capacity Loss using AI based Methods
5
5
  Author-email: Sanchit Minocha <msanchit@uw.edu>
6
6
  License: GNU GENERAL PUBLIC LICENSE
@@ -698,6 +698,10 @@ Requires-Dist: joblib
698
698
  Requires-Dist: xgboost
699
699
  Requires-Dist: lightgbm
700
700
  Requires-Dist: catboost
701
+ Requires-Dist: openpyxl
702
+ Requires-Dist: netcdf4
703
+ Requires-Dist: dask
704
+ Requires-Dist: rioxarray
701
705
  Dynamic: license-file
702
706
 
703
707
  <div align="center">
@@ -737,7 +741,7 @@ pip install pyreclaim
737
741
 
738
742
  To generate features for reservoirs using the **RECLAIM** framework and the [`pyreclaim`](https://pypi.org/project/pyreclaim/) Python package, you will need the global datasets.
739
743
 
740
- You can download all required global datasets from the Open Science Framework (OSF):
744
+ You can download all required global datasets from the Zenodo Repository:
741
745
 
742
746
  [Download Global Datasets](https://doi.org/10.5281/zenodo.17230533)
743
747
 
@@ -763,7 +767,7 @@ from reclaim.reclaim import Reclaim
763
767
  reservoir_static = {
764
768
  "obc": 150.0,
765
769
  "hgt": 45.0,
766
- "mrb": "Ganges",
770
+ "mrb": 4030033640,
767
771
  "lat": 25.6,
768
772
  "lon": 81.9,
769
773
  "reservoir_polygon": reservoir_polygon,
@@ -11,3 +11,7 @@ joblib
11
11
  xgboost
12
12
  lightgbm
13
13
  catboost
14
+ openpyxl
15
+ netcdf4
16
+ dask
17
+ rioxarray
@@ -54,22 +54,28 @@ def engineer_and_transform_features(df: pd.DataFrame) -> pd.DataFrame:
54
54
  "SOUT": df["MAO"] * df["NSSC2_mean"],
55
55
  }
56
56
 
57
+ df = pd.concat([df, pd.DataFrame(feature_dict)], axis=1)
58
+
57
59
  # Land cover log-area features
58
60
  lc_cols = ['LCAS','LCC','LCG','LCT','LCS','LCHV','LCM','LCSV','LCBS','LCSG','LCWB']
59
61
  for col in lc_cols:
60
- feature_dict[col] = df["CA"] * df[col] / 100
61
-
62
- df = pd.concat([df, pd.DataFrame(feature_dict)], axis=1)
63
-
62
+ df[col] = df["CA"] * df[col] / 100
63
+
64
64
  # -------------------------
65
65
  # APPLY LOG TRANSFORMATIONS
66
66
  # -------------------------
67
67
  log_candidates = ['CA','DCA','OBC','HGT','RA','RP','FL',
68
68
  'SA_mean','SA_mean_clip','SA_std','SA_kurt','PAI','MAI','MAO','I_std','O_std','MAR',
69
- 'rain_per_area','GC','TE','ECLR','SIN','SOUT'] + lc_cols
69
+ 'ROBC','rain_per_area','GC','TE','RT','ECLR','ESR','SIN','SOUT'] + lc_cols
70
70
 
71
71
  for col in log_candidates:
72
72
  log_col = f'log_{col}' # add prefix to avoid double log
73
- df[log_col] = np.log(df[col].clip(lower=1e-15))
73
+ try:
74
+ df[log_col] = np.log(df[col].clip(lower=1e-15))
75
+ except Exception as e:
76
+ raise ValueError(f"Error applying log transform to column '{col}': {e}")
77
+
78
+ # Process DLc as categorical column
79
+ df['DLC'] = df['DLC'].astype(int).fillna(0)
74
80
 
75
81
  return df
@@ -142,7 +142,8 @@ def reservoir_based_dynamic_features(
142
142
  path, time_col, data_col, func, feat, obs_period
143
143
  )
144
144
  results[feat] = df_feat.iloc[0, 0] # single value
145
- except Exception:
145
+ except Exception as e:
146
+ print(f"Failed to compute {feat} due to error: {e}. Setting as NaN.")
146
147
  results[feat] = np.nan
147
148
 
148
149
  return pd.DataFrame([results])
@@ -37,6 +37,12 @@ def compute_ts_aggregate(
37
37
  df = pd.read_csv(ts_csv_path)
38
38
  if df.empty:
39
39
  raise ValueError(f"CSV at {ts_csv_path} is empty.")
40
+
41
+ # Ensure columns exist
42
+ if time_column not in df.columns:
43
+ raise ValueError(f"Time column '{time_column}' not found in CSV.")
44
+ if value_column not in df.columns:
45
+ raise ValueError(f"Value column '{value_column}' not found in CSV.")
40
46
 
41
47
  # Ensure time column is datetime
42
48
  df[time_column] = pd.to_datetime(df[time_column], errors='coerce')
@@ -12,17 +12,24 @@ from reclaim.derived_features.feature_engineering_and_transformation import engi
12
12
 
13
13
 
14
14
  def create_features_per_row(
15
+ idx: int,
16
+ observation_period: List[int],
15
17
  reservoir_static_params: dict,
16
18
  catchment_static_params: dict,
17
19
  reservoir_dynamic_info: dict = None,
18
20
  catchment_dynamic_info: dict = None,
19
- observation_period: List[int] = None
20
21
  ) -> pd.DataFrame:
21
22
  """
22
23
  Compute all static, dynamic, and derived features for a single reservoir observation.
23
24
 
24
25
  Parameters
25
26
  ----------
27
+ idx : int
28
+ Index of the reservoir sedimentation observation (for tracking/logging purposes).
29
+
30
+ observation_period : list of int
31
+ Two-element list [OSY, OEY] for observation start year and end year.
32
+
26
33
  reservoir_static_params : dict
27
34
  Parameters for reservoir_based_static_features(). Expected keys:
28
35
  - obc : float, Original Built Capacity (MCM)
@@ -30,6 +37,7 @@ def create_features_per_row(
30
37
  - mrb : str, Major River Basin, optional
31
38
  - lat : float, Latitude (deg)
32
39
  - lon : float, Longitude (deg)
40
+ - by : int, Build Year
33
41
  - reservoir_polygon : shapely.geometry.Polygon
34
42
  - inlet_point : shapely.geometry.Point, optional
35
43
  - resolution : float, optional
@@ -61,9 +69,6 @@ def create_features_per_row(
61
69
  - "tmax": {"path": str, "time_column": str, "data_column": str}
62
70
  - "wind": {"path": str, "time_column": str, "data_column": str}
63
71
 
64
- observation_period : list of int, optional
65
- Two-element list [OSY, OEY] for observation start year and end year.
66
-
67
72
  Returns
68
73
  -------
69
74
  pd.DataFrame
@@ -74,6 +79,14 @@ def create_features_per_row(
74
79
  - Catchment dynamic
75
80
  - Derived/log-transformed
76
81
  """
82
+
83
+ # --- Observevation period features ---
84
+ osy, oey = observation_period
85
+ df_obs_period = pd.DataFrame({
86
+ "idx": [idx],
87
+ "OSY": [osy],
88
+ "OEY": [oey]
89
+ })
77
90
 
78
91
  # --- Static features ---
79
92
  df_res_static = reservoir_based_static_features(**reservoir_static_params)
@@ -88,9 +101,9 @@ def create_features_per_row(
88
101
 
89
102
  if catchment_dynamic_info is not None and observation_period is not None:
90
103
  df_catch_dyn = catchment_based_dynamic_features(catchment_dynamic_info, observation_period)
91
-
104
+
92
105
  # --- Combine all static + dynamic ---
93
- df_combined = pd.concat([df_res_static, df_catch_static, df_res_dyn, df_catch_dyn], axis=1)
106
+ df_combined = pd.concat([df_obs_period, df_res_static, df_catch_static, df_res_dyn, df_catch_dyn], axis=1)
94
107
 
95
108
  # --- Engineer + log-transform features ---
96
109
  df_final = engineer_and_transform_features(df_combined)
@@ -109,6 +122,10 @@ def create_features_multi(
109
122
  reservoirs_input : list of dict
110
123
  Each element should be a dictionary with the following keys:
111
124
 
125
+ - `idx` : int
126
+ Index of the reservoir sedimentation observation.
127
+ - `observation_period` : list of int
128
+ Two-element list `[OSY, OEY]` specifying the observation period.
112
129
  - `reservoir_static_params` : dict
113
130
  Parameters for `reservoir_based_static_features()`.
114
131
  - `catchment_static_params` : dict
@@ -117,8 +134,7 @@ def create_features_multi(
117
134
  Parameters for `reservoir_based_dynamic_features()`.
118
135
  - `catchment_dynamic_info` : dict
119
136
  Parameters for `catchment_based_dynamic_features()`.
120
- - `observation_period` : list of int
121
- Two-element list `[OSY, OEY]` specifying the observation period.
137
+
122
138
 
123
139
  Returns
124
140
  -------
@@ -129,11 +145,12 @@ def create_features_multi(
129
145
  all_rows = []
130
146
  for idx, reservoir_info in enumerate(reservoirs_input):
131
147
  df_row = create_features_per_row(
148
+ idx=reservoir_info.get("idx"),
149
+ observation_period=reservoir_info.get("observation_period"),
132
150
  reservoir_static_params=reservoir_info.get("reservoir_static_params", {}),
133
151
  catchment_static_params=reservoir_info.get("catchment_static_params", {}),
134
152
  reservoir_dynamic_info=reservoir_info.get("reservoir_dynamic_info", None),
135
153
  catchment_dynamic_info=reservoir_info.get("catchment_dynamic_info", None),
136
- observation_period=reservoir_info.get("observation_period", None),
137
154
  )
138
155
  all_rows.append(df_row)
139
156
 
@@ -189,6 +189,8 @@ class Reclaim:
189
189
  if self.feature_order_list is not None:
190
190
  # Reorder columns automatically
191
191
  X = X[self.feature_order_list]
192
+ # for col in self.cat_features:
193
+ # X[col] = X[col].astype("category")
192
194
  elif isinstance(X, np.ndarray):
193
195
  warnings.warn(
194
196
  "Predicting with NumPy array: assumes column order matches training order. "
@@ -476,19 +478,21 @@ class Reclaim:
476
478
  load_dir = os.path.join(package_dir, "pretrained_model")
477
479
 
478
480
  # Load XGBoost
479
- xgb_path = os.path.join(load_dir, f"{prefix}_xgb.json")
481
+ xgb_path = os.path.join(load_dir, f"{prefix}_xgb.pkl")
480
482
  if os.path.exists(xgb_path):
481
- self.xgb_model = xgb.XGBRegressor()
482
- self.xgb_model.load_model(xgb_path)
483
+ import xgboost as xgb
484
+ self.xgb_model = joblib.load(xgb_path)
483
485
 
484
486
  # Load LightGBM
485
- lgb_path = os.path.join(load_dir, f"{prefix}_lgb.txt")
487
+ lgb_path = os.path.join(load_dir, f"{prefix}_lgb.pkl")
486
488
  if os.path.exists(lgb_path):
487
- self.lgb_model = lgb.Booster(model_file=lgb_path)
489
+ import lightgbm as lgb
490
+ self.lgb_model = joblib.load(lgb_path)
488
491
 
489
492
  # Load CatBoost
490
493
  cat_path = os.path.join(load_dir, f"{prefix}_cat.cbm")
491
494
  if os.path.exists(cat_path):
495
+ from catboost import CatBoostRegressor
492
496
  self.cat_model = CatBoostRegressor()
493
497
  self.cat_model.load_model(cat_path)
494
498
 
@@ -1,5 +1,6 @@
1
1
  import pandas as pd
2
2
  from shapely.geometry import Point, Polygon
3
+ import numpy as np
3
4
 
4
5
  # Import utils
5
6
  from reclaim.static_features.utils.flow_length import find_actual_flow_path
@@ -13,6 +14,7 @@ def reservoir_based_static_features(
13
14
  mrb: str = None,
14
15
  lat: float = None,
15
16
  lon: float = None,
17
+ by: int = None,
16
18
  reservoir_polygon: Polygon = None,
17
19
  inlet_point: Point = None,
18
20
  resolution: float = None,
@@ -33,6 +35,8 @@ def reservoir_based_static_features(
33
35
  Latitude of dam location (degrees).
34
36
  lon : float, optional
35
37
  Longitude of dam location (degrees).
38
+ by : int, optional
39
+ Build year of the reservoir.
36
40
  reservoir_polygon : shapely.geometry.Polygon, optional
37
41
  Reservoir polygon geometry used to compute area and perimeter.
38
42
  dam_point : shapely.geometry.Point, optional
@@ -53,6 +57,7 @@ def reservoir_based_static_features(
53
57
  - MRB: Major River Basin
54
58
  - LAT: Latitude (deg)
55
59
  - LON: Longitude (deg)
60
+ - BY: Build Year
56
61
  - RA: Reservoir Area (sq km)
57
62
  - RP: Reservoir Perimeter (km)
58
63
  - FL: Flow Length (km)
@@ -67,6 +72,7 @@ def reservoir_based_static_features(
67
72
  "MRB": mrb,
68
73
  "LAT": lat,
69
74
  "LON": lon,
75
+ "BY": by,
70
76
  "RA": None,
71
77
  "RP": None,
72
78
  "FL": None,
@@ -85,8 +91,12 @@ def reservoir_based_static_features(
85
91
  dam_point = Point(lon, lat)
86
92
  if dam_point is not None and reservoir_polygon is not None:
87
93
  _, _, features["FL"], _ = (
88
- find_actual_flow_path(dam_point, reservoir_polygon, inlet_point, resolution) / 1e3
89
- ) # m → km
94
+ find_actual_flow_path(dam_point, reservoir_polygon, inlet_point, resolution)
95
+ )
96
+ if features["FL"]:
97
+ features["FL"] = calculate_length_area_meters(features["FL"], area=False) / 1e3 # m → km
98
+ else:
99
+ features["FL"] = np.nan
90
100
 
91
101
  # AEC metrics
92
102
  if aec_df is not None:
@@ -94,8 +94,8 @@ def concavity_index(df: pd.DataFrame) -> float:
94
94
  line = np.linspace(0, 1, len(area_norm))
95
95
 
96
96
  # Area under actual curve vs line
97
- auc_curve = np.trapz(elev_norm, area_norm)
98
- auc_line = np.trapz(line, area_norm)
97
+ auc_curve = np.trapezoid(elev_norm, area_norm)
98
+ auc_line = np.trapezoid(line, area_norm)
99
99
 
100
100
  concavity = auc_curve / auc_line if auc_line > 0 else np.nan
101
101
  return concavity
@@ -33,4 +33,4 @@ def calculate_length_area_meters(geometry, area= True):
33
33
  area_square_meters = scaled_geometry.area
34
34
  return length_meters, area_square_meters
35
35
  else:
36
- return length_meters[0]
36
+ return length_meters
@@ -1,6 +1,7 @@
1
1
  import pandas as pd
2
2
  import numpy as np
3
3
  import xarray as xr
4
+ import rioxarray
4
5
  import geopandas as gpd
5
6
  import regionmask
6
7
  from collections import Counter
File without changes
File without changes