pyreclaim 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,40 +2,56 @@
2
2
 
3
3
  from typing import Dict, List
4
4
  import pandas as pd
5
+ import geopandas as gpd
6
+ from tqdm import tqdm
7
+ import traceback
8
+ from dask import delayed, compute
9
+ from dask.diagnostics import ProgressBar
5
10
 
6
11
  # Import from your package structure
7
12
  from reclaim.static_features.reservoir_static import reservoir_based_static_features
8
- from reclaim.static_features.catchment_static import catchment_based_static_features
13
+ from reclaim.static_features.catchment_static import catchment_based_static_features, catchment_based_static_features_multi_reservoir
9
14
  from reclaim.dynamic_features.reservoir_dynamic import reservoir_based_dynamic_features
10
15
  from reclaim.dynamic_features.catchment_dynamic import catchment_based_dynamic_features
16
+ from reclaim.dynamic_features.utils.ts_aggregate import build_intervals
11
17
  from reclaim.derived_features.feature_engineering_and_transformation import engineer_and_transform_features
12
18
 
13
19
 
14
- def create_features_per_row(
20
+ def create_features_per_reservoir(
21
+ idx: int,
22
+ observation_period: List[int],
15
23
  reservoir_static_params: dict,
16
- catchment_static_params: dict,
24
+ catchment_static_params: dict = None,
17
25
  reservoir_dynamic_info: dict = None,
18
26
  catchment_dynamic_info: dict = None,
19
- observation_period: List[int] = None
27
+ time_interval: int = None,
28
+ feature_engineering: bool = True,
20
29
  ) -> pd.DataFrame:
21
30
  """
22
31
  Compute all static, dynamic, and derived features for a single reservoir observation.
23
32
 
24
33
  Parameters
25
34
  ----------
35
+ idx : int
36
+ Index of the reservoir sedimentation observation (for tracking/logging purposes).
37
+
38
+ observation_period : list of int
39
+ Two-element list [OSY, OEY] for observation start year and end year.
40
+
26
41
  reservoir_static_params : dict
27
42
  Parameters for reservoir_based_static_features(). Expected keys:
28
43
  - obc : float, Original Built Capacity (MCM)
29
44
  - hgt : float, Dam Height (m)
30
- - mrb : str, Major River Basin, optional
45
+ - mrb : int, Major River Basin, optional
31
46
  - lat : float, Latitude (deg)
32
47
  - lon : float, Longitude (deg)
48
+ - by : int, Build Year
33
49
  - reservoir_polygon : shapely.geometry.Polygon
34
50
  - inlet_point : shapely.geometry.Point, optional
35
51
  - resolution : float, optional
36
52
  - aec_df : pd.DataFrame with columns ['area', 'elevation']
37
53
 
38
- catchment_static_params : dict
54
+ catchment_static_params : dict, optional
39
55
  Parameters for catchment_based_static_features(). Expected keys:
40
56
  - ca : float, Catchment Area (sq km)
41
57
  - dca : float, Differential Catchment Area (sq km)
@@ -61,9 +77,9 @@ def create_features_per_row(
61
77
  - "tmax": {"path": str, "time_column": str, "data_column": str}
62
78
  - "wind": {"path": str, "time_column": str, "data_column": str}
63
79
 
64
- observation_period : list of int, optional
65
- Two-element list [OSY, OEY] for observation start year and end year.
66
-
80
+ time_interval: int, optional
81
+ Time interval in years between reservoir observations for dynamic feature calculations. The number of rows in the dynamic features will depend on this interval.
82
+
67
83
  Returns
68
84
  -------
69
85
  pd.DataFrame
@@ -72,34 +88,88 @@ def create_features_per_row(
72
88
  - Catchment static
73
89
  - Reservoir dynamic
74
90
  - Catchment dynamic
75
- - Derived/log-transformed
91
+ - Derived/log-transformed (if requested)
76
92
  """
93
+
94
+ # --- Observevation period features ---
95
+ osy, oey = observation_period
96
+ if time_interval is not None:
97
+ intervals = build_intervals(osy, oey, time_interval)
98
+ else:
99
+ intervals = [(osy, oey)]
100
+ # Create observation period dataframe with rows for each interval with same idx
101
+ df_obs = pd.DataFrame({
102
+ "idx": idx,
103
+ "OSY": [i[0] for i in intervals],
104
+ "OEY": [i[1] for i in intervals],
105
+ })
77
106
 
78
- # --- Static features ---
107
+ # --- Static features (computed ONCE) ---
79
108
  df_res_static = reservoir_based_static_features(**reservoir_static_params)
80
- df_catch_static = catchment_based_static_features(**catchment_static_params)
81
-
82
- # --- Dynamic features ---
83
- df_res_dyn = pd.DataFrame()
84
- df_catch_dyn = pd.DataFrame()
85
-
86
- if reservoir_dynamic_info is not None and observation_period is not None:
87
- df_res_dyn = reservoir_based_dynamic_features(reservoir_dynamic_info, observation_period)
109
+ if catchment_static_params is not None:
110
+ df_catch_static = catchment_based_static_features(**catchment_static_params)
111
+ else:
112
+ df_catch_static = pd.DataFrame()
113
+
114
+ static_block = pd.concat([df_res_static, df_catch_static], axis=1)
115
+ # Repeat static rows to match number of intervals
116
+ static_block = pd.concat(
117
+ [static_block] * len(df_obs),
118
+ ignore_index=True
119
+ )
88
120
 
89
- if catchment_dynamic_info is not None and observation_period is not None:
90
- df_catch_dyn = catchment_based_dynamic_features(catchment_dynamic_info, observation_period)
121
+ # --- Dynamic features (computed ONCE - internally handles intervals) ---
122
+ # Combine dynamic features for all intervals
123
+ df_res_dyn = (
124
+ reservoir_based_dynamic_features(
125
+ reservoir_dynamic_info,
126
+ intervals,
127
+ )
128
+ if reservoir_dynamic_info is not None
129
+ else pd.DataFrame()
130
+ )
91
131
 
92
- # --- Combine all static + dynamic ---
93
- df_combined = pd.concat([df_res_static, df_catch_static, df_res_dyn, df_catch_dyn], axis=1)
132
+ df_catch_dyn = (
133
+ catchment_based_dynamic_features(
134
+ catchment_dynamic_info,
135
+ intervals,
136
+ )
137
+ if catchment_dynamic_info is not None
138
+ else pd.DataFrame()
139
+ )
140
+
141
+ # --- Combine all features for all intervals in single dataframe ---
142
+ df_out = pd.concat(
143
+ [df_obs, static_block, df_res_dyn, df_catch_dyn],
144
+ axis=1
145
+ ).reset_index(drop=True)
94
146
 
95
- # --- Engineer + log-transform features ---
96
- df_final = engineer_and_transform_features(df_combined)
147
+ # --- Engineer ONLY if requested ---
148
+ if feature_engineering:
149
+ df_out = engineer_and_transform_features(df_out)
97
150
 
98
- return df_final
151
+ return df_out
99
152
 
153
+ @delayed
154
+ def process_one_reservoir(r):
155
+ try:
156
+ df = create_features_per_reservoir(
157
+ idx=r["idx"],
158
+ observation_period=r["observation_period"],
159
+ reservoir_static_params=r["reservoir_static_params"],
160
+ catchment_static_params=None,
161
+ reservoir_dynamic_info=r.get("reservoir_dynamic_info"),
162
+ catchment_dynamic_info=r.get("catchment_dynamic_info"),
163
+ time_interval=r.get("time_interval"),
164
+ feature_engineering=False,
165
+ )
166
+ return r["idx"], df, None
167
+ except Exception as e:
168
+ return r["idx"], pd.DataFrame({"idx": [r["idx"]]}), {str(e):traceback.format_exc()}
100
169
 
101
- def create_features_multi(
102
- reservoirs_input: List[Dict]
170
+ def create_features_multi_reservoirs(
171
+ reservoirs_input: List[Dict],
172
+ error_log: bool = False,
103
173
  ) -> pd.DataFrame:
104
174
  """
105
175
  Compute features for multiple reservoirs using structured input.
@@ -109,6 +179,10 @@ def create_features_multi(
109
179
  reservoirs_input : list of dict
110
180
  Each element should be a dictionary with the following keys:
111
181
 
182
+ - `idx` : int
183
+ Index of the reservoir sedimentation observation.
184
+ - `observation_period` : list of int
185
+ Two-element list `[OSY, OEY]` specifying the observation period.
112
186
  - `reservoir_static_params` : dict
113
187
  Parameters for `reservoir_based_static_features()`.
114
188
  - `catchment_static_params` : dict
@@ -117,25 +191,107 @@ def create_features_multi(
117
191
  Parameters for `reservoir_based_dynamic_features()`.
118
192
  - `catchment_dynamic_info` : dict
119
193
  Parameters for `catchment_based_dynamic_features()`.
120
- - `observation_period` : list of int
121
- Two-element list `[OSY, OEY]` specifying the observation period.
194
+ - `time_interval` : int, optional
195
+ Time interval in years between reservoir observations for dynamic feature calculations.
122
196
 
123
197
  Returns
124
198
  -------
125
199
  pd.DataFrame
126
- Combined DataFrame with one row per reservoir observation.
200
+ Combined DataFrame with one row per reservoir and time intervals
201
+ in the observation period.
127
202
  """
128
203
 
129
- all_rows = []
130
- for idx, reservoir_info in enumerate(reservoirs_input):
131
- df_row = create_features_per_row(
132
- reservoir_static_params=reservoir_info.get("reservoir_static_params", {}),
133
- catchment_static_params=reservoir_info.get("catchment_static_params", {}),
134
- reservoir_dynamic_info=reservoir_info.get("reservoir_dynamic_info", None),
135
- catchment_dynamic_info=reservoir_info.get("catchment_dynamic_info", None),
136
- observation_period=reservoir_info.get("observation_period", None),
137
- )
138
- all_rows.append(df_row)
204
+ # -------- Collect catchments first (cheap, no tqdm needed)
205
+ catchment_rows = []
206
+
207
+ for r in reservoirs_input:
208
+ c = r["catchment_static_params"]
209
+ catchment_rows.append({
210
+ "idx": r["idx"],
211
+ "CA": c["ca"],
212
+ "DCA": c["dca"],
213
+ "geometry": c["catchment_geometry"],
214
+ })
215
+
216
+ catchments_gdf = gpd.GeoDataFrame(
217
+ catchment_rows, geometry="geometry", crs="EPSG:4326"
218
+ )
219
+
220
+ # -------- Compute catchment static ONCE
221
+ first = reservoirs_input[0]["catchment_static_params"]
222
+
223
+ df_catch_static_all = catchment_based_static_features_multi_reservoir(
224
+ catchments_gdf,
225
+ glc_share_path=first["glc_share_path"],
226
+ hwsd2_path=first["hwsd2_path"],
227
+ hilda_veg_freq_path=first["hilda_veg_freq_path"],
228
+ terrain_path=first["terrain_path"],
229
+ )
230
+
231
+ catch_static_lookup = df_catch_static_all.set_index("idx")
232
+ # catch_static_lookup = pd.DataFrame() # Placeholder if not computing
233
+
234
+ # -------- Per-reservoir loop (tqdm HERE)
235
+ tasks = [process_one_reservoir(r) for r in reservoirs_input]
236
+ with ProgressBar():
237
+ results = compute(*tasks, scheduler="processes", num_workers=4)
238
+
239
+ all_reservoirs_static_info = []
240
+ errors = {}
241
+
242
+ for idx, df, err in results:
243
+ all_reservoirs_static_info.append(df)
244
+ if err is not None:
245
+ errors[idx] = err
246
+
247
+ # for r in tqdm(
248
+ # reservoirs_input,
249
+ # total=len(reservoirs_input),
250
+ # desc="Generating per-reservoir features",
251
+ # unit="reservoir",
252
+ # ):
253
+ # try:
254
+ # df = create_features_per_reservoir(
255
+ # idx=r["idx"],
256
+ # observation_period=r["observation_period"],
257
+ # reservoir_static_params=r["reservoir_static_params"],
258
+ # catchment_static_params=None, # already handled
259
+ # reservoir_dynamic_info=r.get("reservoir_dynamic_info"),
260
+ # catchment_dynamic_info=r.get("catchment_dynamic_info"),
261
+ # time_interval=r.get("time_interval"),
262
+ # feature_engineering=False,
263
+ # )
264
+ # all_reservoirs_static_info.append(df)
265
+ # except Exception as e:
266
+ # errors[r["idx"]] = e
267
+ # errors['traceback'] = traceback.print_exc()
268
+ # all_reservoirs_static_info.append(
269
+ # pd.DataFrame({"idx": r["idx"]}) # Append empty DataFrame for failed reservoir
270
+ # )
271
+
272
+
273
+
274
+ # -------- Concatenate static info
275
+ df_all = pd.concat(all_reservoirs_static_info, ignore_index=True)
276
+ # CRITICAL: restore logical ordering
277
+ df_all = df_all.sort_values(
278
+ by=["idx", "OSY"], #
279
+ ascending=[True, True],
280
+ ).reset_index(drop=True)
281
+
282
+
283
+ # -------- Merge static catchment features with dynamic ONCE
284
+ df_all = df_all.merge(
285
+ catch_static_lookup,
286
+ left_on="idx",
287
+ right_index=True,
288
+ how="left",
289
+ )
139
290
 
140
- df_all = pd.concat(all_rows, axis=0).reset_index(drop=True)
141
- return df_all
291
+ # -------- Engineer ONCE
292
+ df_all = engineer_and_transform_features(df_all)
293
+
294
+ if error_log:
295
+ return df_all, errors
296
+ else:
297
+ return df_all
reclaim/reclaim.py CHANGED
@@ -1,5 +1,6 @@
1
1
  import os
2
2
  import pandas as pd
3
+ from pandas.api.types import is_integer_dtype, is_string_dtype
3
4
  import numpy as np
4
5
  from sklearn.preprocessing import LabelEncoder
5
6
  from sklearn.metrics import r2_score, mean_absolute_error, root_mean_squared_error
@@ -189,12 +190,22 @@ class Reclaim:
189
190
  if self.feature_order_list is not None:
190
191
  # Reorder columns automatically
191
192
  X = X[self.feature_order_list]
193
+ # for col in self.cat_features:
194
+ # X[col] = X[col].astype("category")
192
195
  elif isinstance(X, np.ndarray):
193
196
  warnings.warn(
194
197
  "Predicting with NumPy array: assumes column order matches training order. "
195
198
  "Safer to use DataFrame with feature names."
196
199
  )
197
200
 
201
+ if self.cat_features is not None:
202
+ for col in self.cat_features:
203
+ if not (is_integer_dtype(X[col]) or is_string_dtype(X[col])):
204
+ raise ValueError(
205
+ f"Column {col} must be integer or string type, "
206
+ f"found {X[col].dtype}"
207
+ )
208
+
198
209
 
199
210
  # Base model predictions
200
211
  pred_xgb = self.xgb_model.predict(X)
@@ -476,19 +487,21 @@ class Reclaim:
476
487
  load_dir = os.path.join(package_dir, "pretrained_model")
477
488
 
478
489
  # Load XGBoost
479
- xgb_path = os.path.join(load_dir, f"{prefix}_xgb.json")
490
+ xgb_path = os.path.join(load_dir, f"{prefix}_xgb.pkl")
480
491
  if os.path.exists(xgb_path):
481
- self.xgb_model = xgb.XGBRegressor()
482
- self.xgb_model.load_model(xgb_path)
492
+ import xgboost as xgb
493
+ self.xgb_model = joblib.load(xgb_path)
483
494
 
484
495
  # Load LightGBM
485
- lgb_path = os.path.join(load_dir, f"{prefix}_lgb.txt")
496
+ lgb_path = os.path.join(load_dir, f"{prefix}_lgb.pkl")
486
497
  if os.path.exists(lgb_path):
487
- self.lgb_model = lgb.Booster(model_file=lgb_path)
498
+ import lightgbm as lgb
499
+ self.lgb_model = joblib.load(lgb_path)
488
500
 
489
501
  # Load CatBoost
490
502
  cat_path = os.path.join(load_dir, f"{prefix}_cat.cbm")
491
503
  if os.path.exists(cat_path):
504
+ from catboost import CatBoostRegressor
492
505
  self.cat_model = CatBoostRegressor()
493
506
  self.cat_model.load_model(cat_path)
494
507
 
@@ -1,6 +1,39 @@
1
+ import geopandas as gpd
1
2
  import pandas as pd
2
3
 
3
- from reclaim.static_features.utils.catchment_agreggate import compute_catchment_aggregate
4
+ from reclaim.static_features.utils.catchment_agreggate import compute_catchment_aggregate, compute_catchment_aggregate_multi_reservoir
5
+
6
+ # ---- Rename columns to abbreviations
7
+ RENAME_VARIABLE_DICT = {
8
+ # Land cover
9
+ "artificial_surfaces_mean": "LCAS",
10
+ "cropland_mean": "LCC",
11
+ "grassland_mean": "LCG",
12
+ "tree_covered_mean": "LCT",
13
+ "shrubs_covered_mean": "LCS",
14
+ "aquatic_herbaceous_mean": "LCHV",
15
+ "mangroves_mean": "LCM",
16
+ "sparse_vegetation_mean": "LCSV",
17
+ "bare_soil_mean": "LCBS",
18
+ "snow_glaciers_mean": "LCSG",
19
+ "waterbodies_mean": "LCWB",
20
+ "dominant_class_mode": "DLC",
21
+ # Soil
22
+ "COARSE_mean": "COAR",
23
+ "SAND_mean": "SAND",
24
+ "SILT_mean": "SILT",
25
+ "CLAY_mean": "CLAY",
26
+ "BULK_mean": "BULK",
27
+ # Terrain
28
+ "elevation_mean": "ELEV",
29
+ "slope_mean": "SLOP",
30
+ "curvature_mean": "CURV",
31
+ "aspect_mean": "ASP",
32
+ "hillshade_mean": "HILL",
33
+ # HILDA (optional, not mapped to abbreviations yet)
34
+ "vegetation_gain_frequency_mean": "VGF",
35
+ "vegetation_loss_frequency_mean": "VLF",
36
+ }
4
37
 
5
38
  def catchment_based_static_features(
6
39
  ca: float,
@@ -89,39 +122,81 @@ def catchment_based_static_features(
89
122
  merged = pd.concat([glc_df, hwsd_df, hilda_df, terrain_df], axis=1)
90
123
  features.update(merged.to_dict(orient="records")[0])
91
124
 
92
- # ---- Rename columns to abbreviations
93
- rename_dict = {
94
- # Land cover
95
- "artificial_surfaces_mean": "LCAS",
96
- "cropland_mean": "LCC",
97
- "grassland_mean": "LCG",
98
- "tree_covered_mean": "LCT",
99
- "shrubs_covered_mean": "LCS",
100
- "aquatic_herbaceous_mean": "LCHV",
101
- "mangroves_mean": "LCM",
102
- "sparse_vegetation_mean": "LCSV",
103
- "bare_soil_mean": "LCBS",
104
- "snow_glaciers_mean": "LCSG",
105
- "waterbodies_mean": "LCWB",
106
- "dominant_class_mode": "DLC",
107
- # Soil
108
- "COARSE_mean": "COAR",
109
- "SAND_mean": "SAND",
110
- "SILT_mean": "SILT",
111
- "CLAY_mean": "CLAY",
112
- "BULK_mean": "BULK",
113
- # Terrain
114
- "elevation_mean": "ELEV",
115
- "slope_mean": "SLOP",
116
- "curvature_mean": "CURV",
117
- "aspect_mean": "ASP",
118
- "hillshade_mean": "HILL",
119
- # HILDA (optional, not mapped to abbreviations yet)
120
- "vegetation_gain_frequency_mean": "VGF",
121
- "vegetation_loss_frequency_mean": "VLF",
125
+ # Apply renaming
126
+ features_df = pd.DataFrame([features]).rename(columns=RENAME_VARIABLE_DICT)
127
+
128
+ return features_df
129
+
130
+ def catchment_based_static_features_multi_reservoir(
131
+ catchments_gdf: gpd.GeoDataFrame,
132
+ glc_share_path: str,
133
+ hwsd2_path: str,
134
+ hilda_veg_freq_path: str,
135
+ terrain_path: str,
136
+ ) -> pd.DataFrame:
137
+ """
138
+ Compute catchment-based static features for MULTIPLE reservoirs efficiently.
139
+
140
+ Parameters
141
+ ----------
142
+ catchments_gdf : geopandas.GeoDataFrame
143
+ Must contain columns:
144
+ - idx
145
+ - CA
146
+ - DCA
147
+ - geometry
148
+ glc_share_path : str
149
+ Path to the GLC-Share NetCDF file (land cover fractions).
150
+ hwsd2_path : str
151
+ Path to the HWSD2 NetCDF file (soil composition).
152
+ hilda_veg_freq_path : str
153
+ Path to the HILDA vegetation frequency NetCDF file.
154
+ terrain_path : str
155
+ Path to the terrain NetCDF file (DEM derivatives).
156
+
157
+ Returns
158
+ -------
159
+ pd.DataFrame
160
+ A single-row DataFrame with abbreviations as columns:
161
+ - CA, DCA, LCAS, LCC, LCG, LCT, LCS, LCHV, LCM, LCSV,
162
+ LCBS, LCSG, LCWB, DLC, COAR, SAND, SILT, CLAY, BULK,
163
+ ELEV, SLOP, CURV, ASP, HILL, VGF, VLF
164
+ """
165
+
166
+ base = catchments_gdf[["idx", "CA", "DCA"]].set_index("idx")
167
+
168
+ # --- Land cover
169
+ glc_dict = {
170
+ "artificial_surfaces": "mean",
171
+ "cropland": "mean",
172
+ "grassland": "mean",
173
+ "tree_covered": "mean",
174
+ "shrubs_covered": "mean",
175
+ "aquatic_herbaceous": "mean",
176
+ "mangroves": "mean",
177
+ "sparse_vegetation": "mean",
178
+ "bare_soil": "mean",
179
+ "snow_glaciers": "mean",
180
+ "waterbodies": "mean",
181
+ "dominant_class": "mode",
122
182
  }
123
183
 
124
- # Apply renaming
125
- features_df = pd.DataFrame([features]).rename(columns=rename_dict)
184
+ glc = compute_catchment_aggregate_multi_reservoir(
185
+ glc_share_path, catchments_gdf, glc_dict
186
+ )
187
+
188
+ hwsd = compute_catchment_aggregate_multi_reservoir(
189
+ hwsd2_path, catchments_gdf, "mean"
190
+ )
191
+
192
+ hilda = compute_catchment_aggregate_multi_reservoir(
193
+ hilda_veg_freq_path, catchments_gdf, "mean"
194
+ )
195
+
196
+ terrain = compute_catchment_aggregate_multi_reservoir(
197
+ terrain_path, catchments_gdf, "mean"
198
+ )
199
+
200
+ df = pd.concat([base, glc, hwsd, hilda, terrain], axis=1)
126
201
 
127
- return features_df
202
+ return df.rename(columns=RENAME_VARIABLE_DICT).reset_index()
@@ -1,8 +1,10 @@
1
1
  import pandas as pd
2
+ import geopandas as gpd
2
3
  from shapely.geometry import Point, Polygon
4
+ import numpy as np
3
5
 
4
6
  # Import utils
5
- from reclaim.static_features.utils.flow_length import find_actual_flow_path
7
+ from reclaim.static_features.utils.flow_length import find_actual_flow_path, plot_flow_length_with_reservoir
6
8
  from reclaim.static_features.utils.area_perimeter import calculate_length_area_meters
7
9
  from reclaim.static_features.utils.aec_shape import concavity_index, mean_curvature, mean_slope
8
10
 
@@ -13,10 +15,12 @@ def reservoir_based_static_features(
13
15
  mrb: str = None,
14
16
  lat: float = None,
15
17
  lon: float = None,
18
+ by: int = None,
16
19
  reservoir_polygon: Polygon = None,
17
20
  inlet_point: Point = None,
18
21
  resolution: float = None,
19
- aec_df: pd.DataFrame = None
22
+ aec_df: pd.DataFrame = None,
23
+ savepath_flowpath_fig: str = None,
20
24
  ) -> pd.DataFrame:
21
25
  """
22
26
  Compute reservoir-based features for RECLAIM input dataset.
@@ -33,6 +37,8 @@ def reservoir_based_static_features(
33
37
  Latitude of dam location (degrees).
34
38
  lon : float, optional
35
39
  Longitude of dam location (degrees).
40
+ by : int, optional
41
+ Build year of the reservoir.
36
42
  reservoir_polygon : shapely.geometry.Polygon, optional
37
43
  Reservoir polygon geometry used to compute area and perimeter.
38
44
  dam_point : shapely.geometry.Point, optional
@@ -43,6 +49,8 @@ def reservoir_based_static_features(
43
49
  Spatial resolution used in flow length calculations.
44
50
  aec_df : pd.DataFrame, optional
45
51
  Area-Elevation Curve dataframe with columns ['area', 'elevation'].
52
+ savepath_flowpath_fig : str, optional
53
+ Path to save the flow path figure, optional.
46
54
 
47
55
  Returns
48
56
  -------
@@ -53,6 +61,7 @@ def reservoir_based_static_features(
53
61
  - MRB: Major River Basin
54
62
  - LAT: Latitude (deg)
55
63
  - LON: Longitude (deg)
64
+ - BY: Build Year
56
65
  - RA: Reservoir Area (sq km)
57
66
  - RP: Reservoir Perimeter (km)
58
67
  - FL: Flow Length (km)
@@ -67,6 +76,7 @@ def reservoir_based_static_features(
67
76
  "MRB": mrb,
68
77
  "LAT": lat,
69
78
  "LON": lon,
79
+ "BY": by,
70
80
  "RA": None,
71
81
  "RP": None,
72
82
  "FL": None,
@@ -76,22 +86,51 @@ def reservoir_based_static_features(
76
86
  }
77
87
 
78
88
  # Area and Perimeter
79
- if reservoir_polygon is not None:
89
+ if reservoir_polygon is not None and not reservoir_polygon.is_empty:
80
90
  features["RP"], features["RA"] = calculate_length_area_meters(reservoir_polygon, area=True)
81
91
  features["RA"] = features["RA"] / 1e6 # m2 → km2
82
92
  features["RP"] = features["RP"] / 1e3 # m → km
93
+ else:
94
+ features["RP"] = np.nan
95
+ features["RA"] = np.nan
83
96
 
84
97
  # Flow Length
85
98
  dam_point = Point(lon, lat)
86
- if dam_point is not None and reservoir_polygon is not None:
87
- _, _, features["FL"], _ = (
88
- find_actual_flow_path(dam_point, reservoir_polygon, inlet_point, resolution) / 1e3
89
- ) # m km
99
+ if dam_point is not None and reservoir_polygon is not None and not reservoir_polygon.is_empty:
100
+ try:
101
+ simplified_reservoir, far_end_point, flow_path, _ = (
102
+ find_actual_flow_path(dam_point, reservoir_polygon, inlet_point, resolution)
103
+ )
104
+ if savepath_flowpath_fig is not None:
105
+ plot_flow_length_with_reservoir(
106
+ dam_point,
107
+ reservoir_polygon,
108
+ far_end_point,
109
+ flow_path,
110
+ simplified_reservoir,
111
+ savepath_flowpath_fig
112
+ )
113
+ if flow_path is not None:
114
+ gseries = gpd.GeoSeries([flow_path], crs="EPSG:4326")
115
+ gseries = gseries.to_crs(epsg=3395)
90
116
 
117
+ features["FL"] = gseries.length.iloc[0] / 1e3 # m → km
118
+ else:
119
+ features["FL"] = np.nan
120
+ except Exception as e:
121
+ print(f"Flow length calculation failed: {e}")
122
+ features["FL"] = np.nan
123
+ else:
124
+ features["FL"] = np.nan
125
+
91
126
  # AEC metrics
92
- if aec_df is not None:
127
+ if isinstance(aec_df, pd.DataFrame) and not aec_df.empty:
93
128
  features["AECS"] = mean_slope(aec_df)
94
129
  features["AECC"] = mean_curvature(aec_df)
95
130
  features["AECI"] = concavity_index(aec_df)
131
+ else:
132
+ features["AECS"] = np.nan
133
+ features["AECC"] = np.nan
134
+ features["AECI"] = np.nan
96
135
 
97
136
  return pd.DataFrame([features])
@@ -94,8 +94,8 @@ def concavity_index(df: pd.DataFrame) -> float:
94
94
  line = np.linspace(0, 1, len(area_norm))
95
95
 
96
96
  # Area under actual curve vs line
97
- auc_curve = np.trapz(elev_norm, area_norm)
98
- auc_line = np.trapz(line, area_norm)
97
+ auc_curve = np.trapezoid(elev_norm, area_norm)
98
+ auc_line = np.trapezoid(line, area_norm)
99
99
 
100
100
  concavity = auc_curve / auc_line if auc_line > 0 else np.nan
101
101
  return concavity
@@ -33,4 +33,4 @@ def calculate_length_area_meters(geometry, area= True):
33
33
  area_square_meters = scaled_geometry.area
34
34
  return length_meters, area_square_meters
35
35
  else:
36
- return length_meters[0]
36
+ return length_meters