pyreclaim 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyreclaim-0.3.0.dist-info → pyreclaim-0.5.0.dist-info}/METADATA +9 -3
- pyreclaim-0.5.0.dist-info/RECORD +28 -0
- {pyreclaim-0.3.0.dist-info → pyreclaim-0.5.0.dist-info}/WHEEL +1 -1
- reclaim/derived_features/feature_engineering_and_transformation.py +68 -7
- reclaim/dynamic_features/catchment_dynamic.py +50 -44
- reclaim/dynamic_features/reservoir_dynamic.py +68 -70
- reclaim/dynamic_features/utils/ts_aggregate.py +68 -27
- reclaim/generate_features.py +199 -43
- reclaim/reclaim.py +18 -5
- reclaim/static_features/catchment_static.py +109 -34
- reclaim/static_features/reservoir_static.py +47 -8
- reclaim/static_features/utils/aec_shape.py +2 -2
- reclaim/static_features/utils/area_perimeter.py +1 -1
- reclaim/static_features/utils/basin_names.py +78 -0
- reclaim/static_features/utils/catchment_agreggate.py +209 -1
- reclaim/static_features/utils/flow_length.py +65 -1
- pyreclaim-0.3.0.dist-info/RECORD +0 -27
- {pyreclaim-0.3.0.dist-info → pyreclaim-0.5.0.dist-info}/licenses/LICENSE +0 -0
- {pyreclaim-0.3.0.dist-info → pyreclaim-0.5.0.dist-info}/top_level.txt +0 -0
reclaim/generate_features.py
CHANGED
|
@@ -2,40 +2,56 @@
|
|
|
2
2
|
|
|
3
3
|
from typing import Dict, List
|
|
4
4
|
import pandas as pd
|
|
5
|
+
import geopandas as gpd
|
|
6
|
+
from tqdm import tqdm
|
|
7
|
+
import traceback
|
|
8
|
+
from dask import delayed, compute
|
|
9
|
+
from dask.diagnostics import ProgressBar
|
|
5
10
|
|
|
6
11
|
# Import from your package structure
|
|
7
12
|
from reclaim.static_features.reservoir_static import reservoir_based_static_features
|
|
8
|
-
from reclaim.static_features.catchment_static import catchment_based_static_features
|
|
13
|
+
from reclaim.static_features.catchment_static import catchment_based_static_features, catchment_based_static_features_multi_reservoir
|
|
9
14
|
from reclaim.dynamic_features.reservoir_dynamic import reservoir_based_dynamic_features
|
|
10
15
|
from reclaim.dynamic_features.catchment_dynamic import catchment_based_dynamic_features
|
|
16
|
+
from reclaim.dynamic_features.utils.ts_aggregate import build_intervals
|
|
11
17
|
from reclaim.derived_features.feature_engineering_and_transformation import engineer_and_transform_features
|
|
12
18
|
|
|
13
19
|
|
|
14
|
-
def
|
|
20
|
+
def create_features_per_reservoir(
|
|
21
|
+
idx: int,
|
|
22
|
+
observation_period: List[int],
|
|
15
23
|
reservoir_static_params: dict,
|
|
16
|
-
catchment_static_params: dict,
|
|
24
|
+
catchment_static_params: dict = None,
|
|
17
25
|
reservoir_dynamic_info: dict = None,
|
|
18
26
|
catchment_dynamic_info: dict = None,
|
|
19
|
-
|
|
27
|
+
time_interval: int = None,
|
|
28
|
+
feature_engineering: bool = True,
|
|
20
29
|
) -> pd.DataFrame:
|
|
21
30
|
"""
|
|
22
31
|
Compute all static, dynamic, and derived features for a single reservoir observation.
|
|
23
32
|
|
|
24
33
|
Parameters
|
|
25
34
|
----------
|
|
35
|
+
idx : int
|
|
36
|
+
Index of the reservoir sedimentation observation (for tracking/logging purposes).
|
|
37
|
+
|
|
38
|
+
observation_period : list of int
|
|
39
|
+
Two-element list [OSY, OEY] for observation start year and end year.
|
|
40
|
+
|
|
26
41
|
reservoir_static_params : dict
|
|
27
42
|
Parameters for reservoir_based_static_features(). Expected keys:
|
|
28
43
|
- obc : float, Original Built Capacity (MCM)
|
|
29
44
|
- hgt : float, Dam Height (m)
|
|
30
|
-
- mrb :
|
|
45
|
+
- mrb : int, Major River Basin, optional
|
|
31
46
|
- lat : float, Latitude (deg)
|
|
32
47
|
- lon : float, Longitude (deg)
|
|
48
|
+
- by : int, Build Year
|
|
33
49
|
- reservoir_polygon : shapely.geometry.Polygon
|
|
34
50
|
- inlet_point : shapely.geometry.Point, optional
|
|
35
51
|
- resolution : float, optional
|
|
36
52
|
- aec_df : pd.DataFrame with columns ['area', 'elevation']
|
|
37
53
|
|
|
38
|
-
catchment_static_params : dict
|
|
54
|
+
catchment_static_params : dict, optional
|
|
39
55
|
Parameters for catchment_based_static_features(). Expected keys:
|
|
40
56
|
- ca : float, Catchment Area (sq km)
|
|
41
57
|
- dca : float, Differential Catchment Area (sq km)
|
|
@@ -61,9 +77,9 @@ def create_features_per_row(
|
|
|
61
77
|
- "tmax": {"path": str, "time_column": str, "data_column": str}
|
|
62
78
|
- "wind": {"path": str, "time_column": str, "data_column": str}
|
|
63
79
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
80
|
+
time_interval: int, optional
|
|
81
|
+
Time interval in years between reservoir observations for dynamic feature calculations. The number of rows in the dynamic features will depend on this interval.
|
|
82
|
+
|
|
67
83
|
Returns
|
|
68
84
|
-------
|
|
69
85
|
pd.DataFrame
|
|
@@ -72,34 +88,88 @@ def create_features_per_row(
|
|
|
72
88
|
- Catchment static
|
|
73
89
|
- Reservoir dynamic
|
|
74
90
|
- Catchment dynamic
|
|
75
|
-
- Derived/log-transformed
|
|
91
|
+
- Derived/log-transformed (if requested)
|
|
76
92
|
"""
|
|
93
|
+
|
|
94
|
+
# --- Observevation period features ---
|
|
95
|
+
osy, oey = observation_period
|
|
96
|
+
if time_interval is not None:
|
|
97
|
+
intervals = build_intervals(osy, oey, time_interval)
|
|
98
|
+
else:
|
|
99
|
+
intervals = [(osy, oey)]
|
|
100
|
+
# Create observation period dataframe with rows for each interval with same idx
|
|
101
|
+
df_obs = pd.DataFrame({
|
|
102
|
+
"idx": idx,
|
|
103
|
+
"OSY": [i[0] for i in intervals],
|
|
104
|
+
"OEY": [i[1] for i in intervals],
|
|
105
|
+
})
|
|
77
106
|
|
|
78
|
-
# --- Static features ---
|
|
107
|
+
# --- Static features (computed ONCE) ---
|
|
79
108
|
df_res_static = reservoir_based_static_features(**reservoir_static_params)
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
109
|
+
if catchment_static_params is not None:
|
|
110
|
+
df_catch_static = catchment_based_static_features(**catchment_static_params)
|
|
111
|
+
else:
|
|
112
|
+
df_catch_static = pd.DataFrame()
|
|
113
|
+
|
|
114
|
+
static_block = pd.concat([df_res_static, df_catch_static], axis=1)
|
|
115
|
+
# Repeat static rows to match number of intervals
|
|
116
|
+
static_block = pd.concat(
|
|
117
|
+
[static_block] * len(df_obs),
|
|
118
|
+
ignore_index=True
|
|
119
|
+
)
|
|
88
120
|
|
|
89
|
-
|
|
90
|
-
|
|
121
|
+
# --- Dynamic features (computed ONCE - internally handles intervals) ---
|
|
122
|
+
# Combine dynamic features for all intervals
|
|
123
|
+
df_res_dyn = (
|
|
124
|
+
reservoir_based_dynamic_features(
|
|
125
|
+
reservoir_dynamic_info,
|
|
126
|
+
intervals,
|
|
127
|
+
)
|
|
128
|
+
if reservoir_dynamic_info is not None
|
|
129
|
+
else pd.DataFrame()
|
|
130
|
+
)
|
|
91
131
|
|
|
92
|
-
|
|
93
|
-
|
|
132
|
+
df_catch_dyn = (
|
|
133
|
+
catchment_based_dynamic_features(
|
|
134
|
+
catchment_dynamic_info,
|
|
135
|
+
intervals,
|
|
136
|
+
)
|
|
137
|
+
if catchment_dynamic_info is not None
|
|
138
|
+
else pd.DataFrame()
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
# --- Combine all features for all intervals in single dataframe ---
|
|
142
|
+
df_out = pd.concat(
|
|
143
|
+
[df_obs, static_block, df_res_dyn, df_catch_dyn],
|
|
144
|
+
axis=1
|
|
145
|
+
).reset_index(drop=True)
|
|
94
146
|
|
|
95
|
-
# --- Engineer
|
|
96
|
-
|
|
147
|
+
# --- Engineer ONLY if requested ---
|
|
148
|
+
if feature_engineering:
|
|
149
|
+
df_out = engineer_and_transform_features(df_out)
|
|
97
150
|
|
|
98
|
-
return
|
|
151
|
+
return df_out
|
|
99
152
|
|
|
153
|
+
@delayed
|
|
154
|
+
def process_one_reservoir(r):
|
|
155
|
+
try:
|
|
156
|
+
df = create_features_per_reservoir(
|
|
157
|
+
idx=r["idx"],
|
|
158
|
+
observation_period=r["observation_period"],
|
|
159
|
+
reservoir_static_params=r["reservoir_static_params"],
|
|
160
|
+
catchment_static_params=None,
|
|
161
|
+
reservoir_dynamic_info=r.get("reservoir_dynamic_info"),
|
|
162
|
+
catchment_dynamic_info=r.get("catchment_dynamic_info"),
|
|
163
|
+
time_interval=r.get("time_interval"),
|
|
164
|
+
feature_engineering=False,
|
|
165
|
+
)
|
|
166
|
+
return r["idx"], df, None
|
|
167
|
+
except Exception as e:
|
|
168
|
+
return r["idx"], pd.DataFrame({"idx": [r["idx"]]}), {str(e):traceback.format_exc()}
|
|
100
169
|
|
|
101
|
-
def
|
|
102
|
-
reservoirs_input: List[Dict]
|
|
170
|
+
def create_features_multi_reservoirs(
|
|
171
|
+
reservoirs_input: List[Dict],
|
|
172
|
+
error_log: bool = False,
|
|
103
173
|
) -> pd.DataFrame:
|
|
104
174
|
"""
|
|
105
175
|
Compute features for multiple reservoirs using structured input.
|
|
@@ -109,6 +179,10 @@ def create_features_multi(
|
|
|
109
179
|
reservoirs_input : list of dict
|
|
110
180
|
Each element should be a dictionary with the following keys:
|
|
111
181
|
|
|
182
|
+
- `idx` : int
|
|
183
|
+
Index of the reservoir sedimentation observation.
|
|
184
|
+
- `observation_period` : list of int
|
|
185
|
+
Two-element list `[OSY, OEY]` specifying the observation period.
|
|
112
186
|
- `reservoir_static_params` : dict
|
|
113
187
|
Parameters for `reservoir_based_static_features()`.
|
|
114
188
|
- `catchment_static_params` : dict
|
|
@@ -117,25 +191,107 @@ def create_features_multi(
|
|
|
117
191
|
Parameters for `reservoir_based_dynamic_features()`.
|
|
118
192
|
- `catchment_dynamic_info` : dict
|
|
119
193
|
Parameters for `catchment_based_dynamic_features()`.
|
|
120
|
-
- `
|
|
121
|
-
|
|
194
|
+
- `time_interval` : int, optional
|
|
195
|
+
Time interval in years between reservoir observations for dynamic feature calculations.
|
|
122
196
|
|
|
123
197
|
Returns
|
|
124
198
|
-------
|
|
125
199
|
pd.DataFrame
|
|
126
|
-
Combined DataFrame with one row per reservoir
|
|
200
|
+
Combined DataFrame with one row per reservoir and time intervals
|
|
201
|
+
in the observation period.
|
|
127
202
|
"""
|
|
128
203
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
204
|
+
# -------- Collect catchments first (cheap, no tqdm needed)
|
|
205
|
+
catchment_rows = []
|
|
206
|
+
|
|
207
|
+
for r in reservoirs_input:
|
|
208
|
+
c = r["catchment_static_params"]
|
|
209
|
+
catchment_rows.append({
|
|
210
|
+
"idx": r["idx"],
|
|
211
|
+
"CA": c["ca"],
|
|
212
|
+
"DCA": c["dca"],
|
|
213
|
+
"geometry": c["catchment_geometry"],
|
|
214
|
+
})
|
|
215
|
+
|
|
216
|
+
catchments_gdf = gpd.GeoDataFrame(
|
|
217
|
+
catchment_rows, geometry="geometry", crs="EPSG:4326"
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
# -------- Compute catchment static ONCE
|
|
221
|
+
first = reservoirs_input[0]["catchment_static_params"]
|
|
222
|
+
|
|
223
|
+
df_catch_static_all = catchment_based_static_features_multi_reservoir(
|
|
224
|
+
catchments_gdf,
|
|
225
|
+
glc_share_path=first["glc_share_path"],
|
|
226
|
+
hwsd2_path=first["hwsd2_path"],
|
|
227
|
+
hilda_veg_freq_path=first["hilda_veg_freq_path"],
|
|
228
|
+
terrain_path=first["terrain_path"],
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
catch_static_lookup = df_catch_static_all.set_index("idx")
|
|
232
|
+
# catch_static_lookup = pd.DataFrame() # Placeholder if not computing
|
|
233
|
+
|
|
234
|
+
# -------- Per-reservoir loop (tqdm HERE)
|
|
235
|
+
tasks = [process_one_reservoir(r) for r in reservoirs_input]
|
|
236
|
+
with ProgressBar():
|
|
237
|
+
results = compute(*tasks, scheduler="processes", num_workers=4)
|
|
238
|
+
|
|
239
|
+
all_reservoirs_static_info = []
|
|
240
|
+
errors = {}
|
|
241
|
+
|
|
242
|
+
for idx, df, err in results:
|
|
243
|
+
all_reservoirs_static_info.append(df)
|
|
244
|
+
if err is not None:
|
|
245
|
+
errors[idx] = err
|
|
246
|
+
|
|
247
|
+
# for r in tqdm(
|
|
248
|
+
# reservoirs_input,
|
|
249
|
+
# total=len(reservoirs_input),
|
|
250
|
+
# desc="Generating per-reservoir features",
|
|
251
|
+
# unit="reservoir",
|
|
252
|
+
# ):
|
|
253
|
+
# try:
|
|
254
|
+
# df = create_features_per_reservoir(
|
|
255
|
+
# idx=r["idx"],
|
|
256
|
+
# observation_period=r["observation_period"],
|
|
257
|
+
# reservoir_static_params=r["reservoir_static_params"],
|
|
258
|
+
# catchment_static_params=None, # already handled
|
|
259
|
+
# reservoir_dynamic_info=r.get("reservoir_dynamic_info"),
|
|
260
|
+
# catchment_dynamic_info=r.get("catchment_dynamic_info"),
|
|
261
|
+
# time_interval=r.get("time_interval"),
|
|
262
|
+
# feature_engineering=False,
|
|
263
|
+
# )
|
|
264
|
+
# all_reservoirs_static_info.append(df)
|
|
265
|
+
# except Exception as e:
|
|
266
|
+
# errors[r["idx"]] = e
|
|
267
|
+
# errors['traceback'] = traceback.print_exc()
|
|
268
|
+
# all_reservoirs_static_info.append(
|
|
269
|
+
# pd.DataFrame({"idx": r["idx"]}) # Append empty DataFrame for failed reservoir
|
|
270
|
+
# )
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
# -------- Concatenate static info
|
|
275
|
+
df_all = pd.concat(all_reservoirs_static_info, ignore_index=True)
|
|
276
|
+
# CRITICAL: restore logical ordering
|
|
277
|
+
df_all = df_all.sort_values(
|
|
278
|
+
by=["idx", "OSY"], #
|
|
279
|
+
ascending=[True, True],
|
|
280
|
+
).reset_index(drop=True)
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
# -------- Merge static catchment features with dynamic ONCE
|
|
284
|
+
df_all = df_all.merge(
|
|
285
|
+
catch_static_lookup,
|
|
286
|
+
left_on="idx",
|
|
287
|
+
right_index=True,
|
|
288
|
+
how="left",
|
|
289
|
+
)
|
|
139
290
|
|
|
140
|
-
|
|
141
|
-
|
|
291
|
+
# -------- Engineer ONCE
|
|
292
|
+
df_all = engineer_and_transform_features(df_all)
|
|
293
|
+
|
|
294
|
+
if error_log:
|
|
295
|
+
return df_all, errors
|
|
296
|
+
else:
|
|
297
|
+
return df_all
|
reclaim/reclaim.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import pandas as pd
|
|
3
|
+
from pandas.api.types import is_integer_dtype, is_string_dtype
|
|
3
4
|
import numpy as np
|
|
4
5
|
from sklearn.preprocessing import LabelEncoder
|
|
5
6
|
from sklearn.metrics import r2_score, mean_absolute_error, root_mean_squared_error
|
|
@@ -189,12 +190,22 @@ class Reclaim:
|
|
|
189
190
|
if self.feature_order_list is not None:
|
|
190
191
|
# Reorder columns automatically
|
|
191
192
|
X = X[self.feature_order_list]
|
|
193
|
+
# for col in self.cat_features:
|
|
194
|
+
# X[col] = X[col].astype("category")
|
|
192
195
|
elif isinstance(X, np.ndarray):
|
|
193
196
|
warnings.warn(
|
|
194
197
|
"Predicting with NumPy array: assumes column order matches training order. "
|
|
195
198
|
"Safer to use DataFrame with feature names."
|
|
196
199
|
)
|
|
197
200
|
|
|
201
|
+
if self.cat_features is not None:
|
|
202
|
+
for col in self.cat_features:
|
|
203
|
+
if not (is_integer_dtype(X[col]) or is_string_dtype(X[col])):
|
|
204
|
+
raise ValueError(
|
|
205
|
+
f"Column {col} must be integer or string type, "
|
|
206
|
+
f"found {X[col].dtype}"
|
|
207
|
+
)
|
|
208
|
+
|
|
198
209
|
|
|
199
210
|
# Base model predictions
|
|
200
211
|
pred_xgb = self.xgb_model.predict(X)
|
|
@@ -476,19 +487,21 @@ class Reclaim:
|
|
|
476
487
|
load_dir = os.path.join(package_dir, "pretrained_model")
|
|
477
488
|
|
|
478
489
|
# Load XGBoost
|
|
479
|
-
xgb_path = os.path.join(load_dir, f"{prefix}_xgb.
|
|
490
|
+
xgb_path = os.path.join(load_dir, f"{prefix}_xgb.pkl")
|
|
480
491
|
if os.path.exists(xgb_path):
|
|
481
|
-
|
|
482
|
-
self.xgb_model.
|
|
492
|
+
import xgboost as xgb
|
|
493
|
+
self.xgb_model = joblib.load(xgb_path)
|
|
483
494
|
|
|
484
495
|
# Load LightGBM
|
|
485
|
-
lgb_path = os.path.join(load_dir, f"{prefix}_lgb.
|
|
496
|
+
lgb_path = os.path.join(load_dir, f"{prefix}_lgb.pkl")
|
|
486
497
|
if os.path.exists(lgb_path):
|
|
487
|
-
|
|
498
|
+
import lightgbm as lgb
|
|
499
|
+
self.lgb_model = joblib.load(lgb_path)
|
|
488
500
|
|
|
489
501
|
# Load CatBoost
|
|
490
502
|
cat_path = os.path.join(load_dir, f"{prefix}_cat.cbm")
|
|
491
503
|
if os.path.exists(cat_path):
|
|
504
|
+
from catboost import CatBoostRegressor
|
|
492
505
|
self.cat_model = CatBoostRegressor()
|
|
493
506
|
self.cat_model.load_model(cat_path)
|
|
494
507
|
|
|
@@ -1,6 +1,39 @@
|
|
|
1
|
+
import geopandas as gpd
|
|
1
2
|
import pandas as pd
|
|
2
3
|
|
|
3
|
-
from reclaim.static_features.utils.catchment_agreggate import compute_catchment_aggregate
|
|
4
|
+
from reclaim.static_features.utils.catchment_agreggate import compute_catchment_aggregate, compute_catchment_aggregate_multi_reservoir
|
|
5
|
+
|
|
6
|
+
# ---- Rename columns to abbreviations
|
|
7
|
+
RENAME_VARIABLE_DICT = {
|
|
8
|
+
# Land cover
|
|
9
|
+
"artificial_surfaces_mean": "LCAS",
|
|
10
|
+
"cropland_mean": "LCC",
|
|
11
|
+
"grassland_mean": "LCG",
|
|
12
|
+
"tree_covered_mean": "LCT",
|
|
13
|
+
"shrubs_covered_mean": "LCS",
|
|
14
|
+
"aquatic_herbaceous_mean": "LCHV",
|
|
15
|
+
"mangroves_mean": "LCM",
|
|
16
|
+
"sparse_vegetation_mean": "LCSV",
|
|
17
|
+
"bare_soil_mean": "LCBS",
|
|
18
|
+
"snow_glaciers_mean": "LCSG",
|
|
19
|
+
"waterbodies_mean": "LCWB",
|
|
20
|
+
"dominant_class_mode": "DLC",
|
|
21
|
+
# Soil
|
|
22
|
+
"COARSE_mean": "COAR",
|
|
23
|
+
"SAND_mean": "SAND",
|
|
24
|
+
"SILT_mean": "SILT",
|
|
25
|
+
"CLAY_mean": "CLAY",
|
|
26
|
+
"BULK_mean": "BULK",
|
|
27
|
+
# Terrain
|
|
28
|
+
"elevation_mean": "ELEV",
|
|
29
|
+
"slope_mean": "SLOP",
|
|
30
|
+
"curvature_mean": "CURV",
|
|
31
|
+
"aspect_mean": "ASP",
|
|
32
|
+
"hillshade_mean": "HILL",
|
|
33
|
+
# HILDA (optional, not mapped to abbreviations yet)
|
|
34
|
+
"vegetation_gain_frequency_mean": "VGF",
|
|
35
|
+
"vegetation_loss_frequency_mean": "VLF",
|
|
36
|
+
}
|
|
4
37
|
|
|
5
38
|
def catchment_based_static_features(
|
|
6
39
|
ca: float,
|
|
@@ -89,39 +122,81 @@ def catchment_based_static_features(
|
|
|
89
122
|
merged = pd.concat([glc_df, hwsd_df, hilda_df, terrain_df], axis=1)
|
|
90
123
|
features.update(merged.to_dict(orient="records")[0])
|
|
91
124
|
|
|
92
|
-
#
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
125
|
+
# Apply renaming
|
|
126
|
+
features_df = pd.DataFrame([features]).rename(columns=RENAME_VARIABLE_DICT)
|
|
127
|
+
|
|
128
|
+
return features_df
|
|
129
|
+
|
|
130
|
+
def catchment_based_static_features_multi_reservoir(
|
|
131
|
+
catchments_gdf: gpd.GeoDataFrame,
|
|
132
|
+
glc_share_path: str,
|
|
133
|
+
hwsd2_path: str,
|
|
134
|
+
hilda_veg_freq_path: str,
|
|
135
|
+
terrain_path: str,
|
|
136
|
+
) -> pd.DataFrame:
|
|
137
|
+
"""
|
|
138
|
+
Compute catchment-based static features for MULTIPLE reservoirs efficiently.
|
|
139
|
+
|
|
140
|
+
Parameters
|
|
141
|
+
----------
|
|
142
|
+
catchments_gdf : geopandas.GeoDataFrame
|
|
143
|
+
Must contain columns:
|
|
144
|
+
- idx
|
|
145
|
+
- CA
|
|
146
|
+
- DCA
|
|
147
|
+
- geometry
|
|
148
|
+
glc_share_path : str
|
|
149
|
+
Path to the GLC-Share NetCDF file (land cover fractions).
|
|
150
|
+
hwsd2_path : str
|
|
151
|
+
Path to the HWSD2 NetCDF file (soil composition).
|
|
152
|
+
hilda_veg_freq_path : str
|
|
153
|
+
Path to the HILDA vegetation frequency NetCDF file.
|
|
154
|
+
terrain_path : str
|
|
155
|
+
Path to the terrain NetCDF file (DEM derivatives).
|
|
156
|
+
|
|
157
|
+
Returns
|
|
158
|
+
-------
|
|
159
|
+
pd.DataFrame
|
|
160
|
+
A single-row DataFrame with abbreviations as columns:
|
|
161
|
+
- CA, DCA, LCAS, LCC, LCG, LCT, LCS, LCHV, LCM, LCSV,
|
|
162
|
+
LCBS, LCSG, LCWB, DLC, COAR, SAND, SILT, CLAY, BULK,
|
|
163
|
+
ELEV, SLOP, CURV, ASP, HILL, VGF, VLF
|
|
164
|
+
"""
|
|
165
|
+
|
|
166
|
+
base = catchments_gdf[["idx", "CA", "DCA"]].set_index("idx")
|
|
167
|
+
|
|
168
|
+
# --- Land cover
|
|
169
|
+
glc_dict = {
|
|
170
|
+
"artificial_surfaces": "mean",
|
|
171
|
+
"cropland": "mean",
|
|
172
|
+
"grassland": "mean",
|
|
173
|
+
"tree_covered": "mean",
|
|
174
|
+
"shrubs_covered": "mean",
|
|
175
|
+
"aquatic_herbaceous": "mean",
|
|
176
|
+
"mangroves": "mean",
|
|
177
|
+
"sparse_vegetation": "mean",
|
|
178
|
+
"bare_soil": "mean",
|
|
179
|
+
"snow_glaciers": "mean",
|
|
180
|
+
"waterbodies": "mean",
|
|
181
|
+
"dominant_class": "mode",
|
|
122
182
|
}
|
|
123
183
|
|
|
124
|
-
|
|
125
|
-
|
|
184
|
+
glc = compute_catchment_aggregate_multi_reservoir(
|
|
185
|
+
glc_share_path, catchments_gdf, glc_dict
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
hwsd = compute_catchment_aggregate_multi_reservoir(
|
|
189
|
+
hwsd2_path, catchments_gdf, "mean"
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
hilda = compute_catchment_aggregate_multi_reservoir(
|
|
193
|
+
hilda_veg_freq_path, catchments_gdf, "mean"
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
terrain = compute_catchment_aggregate_multi_reservoir(
|
|
197
|
+
terrain_path, catchments_gdf, "mean"
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
df = pd.concat([base, glc, hwsd, hilda, terrain], axis=1)
|
|
126
201
|
|
|
127
|
-
return
|
|
202
|
+
return df.rename(columns=RENAME_VARIABLE_DICT).reset_index()
|
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
import pandas as pd
|
|
2
|
+
import geopandas as gpd
|
|
2
3
|
from shapely.geometry import Point, Polygon
|
|
4
|
+
import numpy as np
|
|
3
5
|
|
|
4
6
|
# Import utils
|
|
5
|
-
from reclaim.static_features.utils.flow_length import find_actual_flow_path
|
|
7
|
+
from reclaim.static_features.utils.flow_length import find_actual_flow_path, plot_flow_length_with_reservoir
|
|
6
8
|
from reclaim.static_features.utils.area_perimeter import calculate_length_area_meters
|
|
7
9
|
from reclaim.static_features.utils.aec_shape import concavity_index, mean_curvature, mean_slope
|
|
8
10
|
|
|
@@ -13,10 +15,12 @@ def reservoir_based_static_features(
|
|
|
13
15
|
mrb: str = None,
|
|
14
16
|
lat: float = None,
|
|
15
17
|
lon: float = None,
|
|
18
|
+
by: int = None,
|
|
16
19
|
reservoir_polygon: Polygon = None,
|
|
17
20
|
inlet_point: Point = None,
|
|
18
21
|
resolution: float = None,
|
|
19
|
-
aec_df: pd.DataFrame = None
|
|
22
|
+
aec_df: pd.DataFrame = None,
|
|
23
|
+
savepath_flowpath_fig: str = None,
|
|
20
24
|
) -> pd.DataFrame:
|
|
21
25
|
"""
|
|
22
26
|
Compute reservoir-based features for RECLAIM input dataset.
|
|
@@ -33,6 +37,8 @@ def reservoir_based_static_features(
|
|
|
33
37
|
Latitude of dam location (degrees).
|
|
34
38
|
lon : float, optional
|
|
35
39
|
Longitude of dam location (degrees).
|
|
40
|
+
by : int, optional
|
|
41
|
+
Build year of the reservoir.
|
|
36
42
|
reservoir_polygon : shapely.geometry.Polygon, optional
|
|
37
43
|
Reservoir polygon geometry used to compute area and perimeter.
|
|
38
44
|
dam_point : shapely.geometry.Point, optional
|
|
@@ -43,6 +49,8 @@ def reservoir_based_static_features(
|
|
|
43
49
|
Spatial resolution used in flow length calculations.
|
|
44
50
|
aec_df : pd.DataFrame, optional
|
|
45
51
|
Area-Elevation Curve dataframe with columns ['area', 'elevation'].
|
|
52
|
+
savepath_flowpath_fig : str, optional
|
|
53
|
+
Path to save the flow path figure, optional.
|
|
46
54
|
|
|
47
55
|
Returns
|
|
48
56
|
-------
|
|
@@ -53,6 +61,7 @@ def reservoir_based_static_features(
|
|
|
53
61
|
- MRB: Major River Basin
|
|
54
62
|
- LAT: Latitude (deg)
|
|
55
63
|
- LON: Longitude (deg)
|
|
64
|
+
- BY: Build Year
|
|
56
65
|
- RA: Reservoir Area (sq km)
|
|
57
66
|
- RP: Reservoir Perimeter (km)
|
|
58
67
|
- FL: Flow Length (km)
|
|
@@ -67,6 +76,7 @@ def reservoir_based_static_features(
|
|
|
67
76
|
"MRB": mrb,
|
|
68
77
|
"LAT": lat,
|
|
69
78
|
"LON": lon,
|
|
79
|
+
"BY": by,
|
|
70
80
|
"RA": None,
|
|
71
81
|
"RP": None,
|
|
72
82
|
"FL": None,
|
|
@@ -76,22 +86,51 @@ def reservoir_based_static_features(
|
|
|
76
86
|
}
|
|
77
87
|
|
|
78
88
|
# Area and Perimeter
|
|
79
|
-
if reservoir_polygon is not None:
|
|
89
|
+
if reservoir_polygon is not None and not reservoir_polygon.is_empty:
|
|
80
90
|
features["RP"], features["RA"] = calculate_length_area_meters(reservoir_polygon, area=True)
|
|
81
91
|
features["RA"] = features["RA"] / 1e6 # m2 → km2
|
|
82
92
|
features["RP"] = features["RP"] / 1e3 # m → km
|
|
93
|
+
else:
|
|
94
|
+
features["RP"] = np.nan
|
|
95
|
+
features["RA"] = np.nan
|
|
83
96
|
|
|
84
97
|
# Flow Length
|
|
85
98
|
dam_point = Point(lon, lat)
|
|
86
|
-
if dam_point is not None and reservoir_polygon is not None:
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
99
|
+
if dam_point is not None and reservoir_polygon is not None and not reservoir_polygon.is_empty:
|
|
100
|
+
try:
|
|
101
|
+
simplified_reservoir, far_end_point, flow_path, _ = (
|
|
102
|
+
find_actual_flow_path(dam_point, reservoir_polygon, inlet_point, resolution)
|
|
103
|
+
)
|
|
104
|
+
if savepath_flowpath_fig is not None:
|
|
105
|
+
plot_flow_length_with_reservoir(
|
|
106
|
+
dam_point,
|
|
107
|
+
reservoir_polygon,
|
|
108
|
+
far_end_point,
|
|
109
|
+
flow_path,
|
|
110
|
+
simplified_reservoir,
|
|
111
|
+
savepath_flowpath_fig
|
|
112
|
+
)
|
|
113
|
+
if flow_path is not None:
|
|
114
|
+
gseries = gpd.GeoSeries([flow_path], crs="EPSG:4326")
|
|
115
|
+
gseries = gseries.to_crs(epsg=3395)
|
|
90
116
|
|
|
117
|
+
features["FL"] = gseries.length.iloc[0] / 1e3 # m → km
|
|
118
|
+
else:
|
|
119
|
+
features["FL"] = np.nan
|
|
120
|
+
except Exception as e:
|
|
121
|
+
print(f"Flow length calculation failed: {e}")
|
|
122
|
+
features["FL"] = np.nan
|
|
123
|
+
else:
|
|
124
|
+
features["FL"] = np.nan
|
|
125
|
+
|
|
91
126
|
# AEC metrics
|
|
92
|
-
if aec_df
|
|
127
|
+
if isinstance(aec_df, pd.DataFrame) and not aec_df.empty:
|
|
93
128
|
features["AECS"] = mean_slope(aec_df)
|
|
94
129
|
features["AECC"] = mean_curvature(aec_df)
|
|
95
130
|
features["AECI"] = concavity_index(aec_df)
|
|
131
|
+
else:
|
|
132
|
+
features["AECS"] = np.nan
|
|
133
|
+
features["AECC"] = np.nan
|
|
134
|
+
features["AECI"] = np.nan
|
|
96
135
|
|
|
97
136
|
return pd.DataFrame([features])
|
|
@@ -94,8 +94,8 @@ def concavity_index(df: pd.DataFrame) -> float:
|
|
|
94
94
|
line = np.linspace(0, 1, len(area_norm))
|
|
95
95
|
|
|
96
96
|
# Area under actual curve vs line
|
|
97
|
-
auc_curve = np.
|
|
98
|
-
auc_line = np.
|
|
97
|
+
auc_curve = np.trapezoid(elev_norm, area_norm)
|
|
98
|
+
auc_line = np.trapezoid(line, area_norm)
|
|
99
99
|
|
|
100
100
|
concavity = auc_curve / auc_line if auc_line > 0 else np.nan
|
|
101
101
|
return concavity
|