pyreclaim 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyreclaim-0.4.0.dist-info → pyreclaim-0.5.0.dist-info}/METADATA +3 -1
- pyreclaim-0.5.0.dist-info/RECORD +28 -0
- {pyreclaim-0.4.0.dist-info → pyreclaim-0.5.0.dist-info}/WHEEL +1 -1
- reclaim/derived_features/feature_engineering_and_transformation.py +59 -4
- reclaim/dynamic_features/catchment_dynamic.py +50 -44
- reclaim/dynamic_features/reservoir_dynamic.py +68 -71
- reclaim/dynamic_features/utils/ts_aggregate.py +62 -27
- reclaim/generate_features.py +181 -42
- reclaim/reclaim.py +9 -0
- reclaim/static_features/catchment_static.py +109 -34
- reclaim/static_features/reservoir_static.py +41 -12
- reclaim/static_features/utils/basin_names.py +78 -0
- reclaim/static_features/utils/catchment_agreggate.py +208 -1
- reclaim/static_features/utils/flow_length.py +65 -1
- pyreclaim-0.4.0.dist-info/RECORD +0 -27
- {pyreclaim-0.4.0.dist-info → pyreclaim-0.5.0.dist-info}/licenses/LICENSE +0 -0
- {pyreclaim-0.4.0.dist-info → pyreclaim-0.5.0.dist-info}/top_level.txt +0 -0
reclaim/generate_features.py
CHANGED
|
@@ -2,22 +2,30 @@
|
|
|
2
2
|
|
|
3
3
|
from typing import Dict, List
|
|
4
4
|
import pandas as pd
|
|
5
|
+
import geopandas as gpd
|
|
6
|
+
from tqdm import tqdm
|
|
7
|
+
import traceback
|
|
8
|
+
from dask import delayed, compute
|
|
9
|
+
from dask.diagnostics import ProgressBar
|
|
5
10
|
|
|
6
11
|
# Import from your package structure
|
|
7
12
|
from reclaim.static_features.reservoir_static import reservoir_based_static_features
|
|
8
|
-
from reclaim.static_features.catchment_static import catchment_based_static_features
|
|
13
|
+
from reclaim.static_features.catchment_static import catchment_based_static_features, catchment_based_static_features_multi_reservoir
|
|
9
14
|
from reclaim.dynamic_features.reservoir_dynamic import reservoir_based_dynamic_features
|
|
10
15
|
from reclaim.dynamic_features.catchment_dynamic import catchment_based_dynamic_features
|
|
16
|
+
from reclaim.dynamic_features.utils.ts_aggregate import build_intervals
|
|
11
17
|
from reclaim.derived_features.feature_engineering_and_transformation import engineer_and_transform_features
|
|
12
18
|
|
|
13
19
|
|
|
14
|
-
def
|
|
20
|
+
def create_features_per_reservoir(
|
|
15
21
|
idx: int,
|
|
16
22
|
observation_period: List[int],
|
|
17
23
|
reservoir_static_params: dict,
|
|
18
|
-
catchment_static_params: dict,
|
|
24
|
+
catchment_static_params: dict = None,
|
|
19
25
|
reservoir_dynamic_info: dict = None,
|
|
20
26
|
catchment_dynamic_info: dict = None,
|
|
27
|
+
time_interval: int = None,
|
|
28
|
+
feature_engineering: bool = True,
|
|
21
29
|
) -> pd.DataFrame:
|
|
22
30
|
"""
|
|
23
31
|
Compute all static, dynamic, and derived features for a single reservoir observation.
|
|
@@ -34,7 +42,7 @@ def create_features_per_row(
|
|
|
34
42
|
Parameters for reservoir_based_static_features(). Expected keys:
|
|
35
43
|
- obc : float, Original Built Capacity (MCM)
|
|
36
44
|
- hgt : float, Dam Height (m)
|
|
37
|
-
- mrb :
|
|
45
|
+
- mrb : int, Major River Basin, optional
|
|
38
46
|
- lat : float, Latitude (deg)
|
|
39
47
|
- lon : float, Longitude (deg)
|
|
40
48
|
- by : int, Build Year
|
|
@@ -43,7 +51,7 @@ def create_features_per_row(
|
|
|
43
51
|
- resolution : float, optional
|
|
44
52
|
- aec_df : pd.DataFrame with columns ['area', 'elevation']
|
|
45
53
|
|
|
46
|
-
catchment_static_params : dict
|
|
54
|
+
catchment_static_params : dict, optional
|
|
47
55
|
Parameters for catchment_based_static_features(). Expected keys:
|
|
48
56
|
- ca : float, Catchment Area (sq km)
|
|
49
57
|
- dca : float, Differential Catchment Area (sq km)
|
|
@@ -69,6 +77,9 @@ def create_features_per_row(
|
|
|
69
77
|
- "tmax": {"path": str, "time_column": str, "data_column": str}
|
|
70
78
|
- "wind": {"path": str, "time_column": str, "data_column": str}
|
|
71
79
|
|
|
80
|
+
time_interval: int, optional
|
|
81
|
+
Time interval in years between reservoir observations for dynamic feature calculations. The number of rows in the dynamic features will depend on this interval.
|
|
82
|
+
|
|
72
83
|
Returns
|
|
73
84
|
-------
|
|
74
85
|
pd.DataFrame
|
|
@@ -77,42 +88,88 @@ def create_features_per_row(
|
|
|
77
88
|
- Catchment static
|
|
78
89
|
- Reservoir dynamic
|
|
79
90
|
- Catchment dynamic
|
|
80
|
-
- Derived/log-transformed
|
|
91
|
+
- Derived/log-transformed (if requested)
|
|
81
92
|
"""
|
|
82
93
|
|
|
83
94
|
# --- Observevation period features ---
|
|
84
95
|
osy, oey = observation_period
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
96
|
+
if time_interval is not None:
|
|
97
|
+
intervals = build_intervals(osy, oey, time_interval)
|
|
98
|
+
else:
|
|
99
|
+
intervals = [(osy, oey)]
|
|
100
|
+
# Create observation period dataframe with rows for each interval with same idx
|
|
101
|
+
df_obs = pd.DataFrame({
|
|
102
|
+
"idx": idx,
|
|
103
|
+
"OSY": [i[0] for i in intervals],
|
|
104
|
+
"OEY": [i[1] for i in intervals],
|
|
89
105
|
})
|
|
90
106
|
|
|
91
|
-
# --- Static features ---
|
|
107
|
+
# --- Static features (computed ONCE) ---
|
|
92
108
|
df_res_static = reservoir_based_static_features(**reservoir_static_params)
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
109
|
+
if catchment_static_params is not None:
|
|
110
|
+
df_catch_static = catchment_based_static_features(**catchment_static_params)
|
|
111
|
+
else:
|
|
112
|
+
df_catch_static = pd.DataFrame()
|
|
113
|
+
|
|
114
|
+
static_block = pd.concat([df_res_static, df_catch_static], axis=1)
|
|
115
|
+
# Repeat static rows to match number of intervals
|
|
116
|
+
static_block = pd.concat(
|
|
117
|
+
[static_block] * len(df_obs),
|
|
118
|
+
ignore_index=True
|
|
119
|
+
)
|
|
98
120
|
|
|
99
|
-
|
|
100
|
-
|
|
121
|
+
# --- Dynamic features (computed ONCE - internally handles intervals) ---
|
|
122
|
+
# Combine dynamic features for all intervals
|
|
123
|
+
df_res_dyn = (
|
|
124
|
+
reservoir_based_dynamic_features(
|
|
125
|
+
reservoir_dynamic_info,
|
|
126
|
+
intervals,
|
|
127
|
+
)
|
|
128
|
+
if reservoir_dynamic_info is not None
|
|
129
|
+
else pd.DataFrame()
|
|
130
|
+
)
|
|
101
131
|
|
|
102
|
-
|
|
103
|
-
|
|
132
|
+
df_catch_dyn = (
|
|
133
|
+
catchment_based_dynamic_features(
|
|
134
|
+
catchment_dynamic_info,
|
|
135
|
+
intervals,
|
|
136
|
+
)
|
|
137
|
+
if catchment_dynamic_info is not None
|
|
138
|
+
else pd.DataFrame()
|
|
139
|
+
)
|
|
104
140
|
|
|
105
|
-
# --- Combine all
|
|
106
|
-
|
|
141
|
+
# --- Combine all features for all intervals in single dataframe ---
|
|
142
|
+
df_out = pd.concat(
|
|
143
|
+
[df_obs, static_block, df_res_dyn, df_catch_dyn],
|
|
144
|
+
axis=1
|
|
145
|
+
).reset_index(drop=True)
|
|
107
146
|
|
|
108
|
-
# --- Engineer
|
|
109
|
-
|
|
147
|
+
# --- Engineer ONLY if requested ---
|
|
148
|
+
if feature_engineering:
|
|
149
|
+
df_out = engineer_and_transform_features(df_out)
|
|
110
150
|
|
|
111
|
-
return
|
|
151
|
+
return df_out
|
|
112
152
|
|
|
153
|
+
@delayed
|
|
154
|
+
def process_one_reservoir(r):
|
|
155
|
+
try:
|
|
156
|
+
df = create_features_per_reservoir(
|
|
157
|
+
idx=r["idx"],
|
|
158
|
+
observation_period=r["observation_period"],
|
|
159
|
+
reservoir_static_params=r["reservoir_static_params"],
|
|
160
|
+
catchment_static_params=None,
|
|
161
|
+
reservoir_dynamic_info=r.get("reservoir_dynamic_info"),
|
|
162
|
+
catchment_dynamic_info=r.get("catchment_dynamic_info"),
|
|
163
|
+
time_interval=r.get("time_interval"),
|
|
164
|
+
feature_engineering=False,
|
|
165
|
+
)
|
|
166
|
+
return r["idx"], df, None
|
|
167
|
+
except Exception as e:
|
|
168
|
+
return r["idx"], pd.DataFrame({"idx": [r["idx"]]}), {str(e):traceback.format_exc()}
|
|
113
169
|
|
|
114
|
-
def
|
|
115
|
-
reservoirs_input: List[Dict]
|
|
170
|
+
def create_features_multi_reservoirs(
|
|
171
|
+
reservoirs_input: List[Dict],
|
|
172
|
+
error_log: bool = False,
|
|
116
173
|
) -> pd.DataFrame:
|
|
117
174
|
"""
|
|
118
175
|
Compute features for multiple reservoirs using structured input.
|
|
@@ -134,25 +191,107 @@ def create_features_multi(
|
|
|
134
191
|
Parameters for `reservoir_based_dynamic_features()`.
|
|
135
192
|
- `catchment_dynamic_info` : dict
|
|
136
193
|
Parameters for `catchment_based_dynamic_features()`.
|
|
137
|
-
|
|
194
|
+
- `time_interval` : int, optional
|
|
195
|
+
Time interval in years between reservoir observations for dynamic feature calculations.
|
|
138
196
|
|
|
139
197
|
Returns
|
|
140
198
|
-------
|
|
141
199
|
pd.DataFrame
|
|
142
|
-
Combined DataFrame with one row per reservoir
|
|
200
|
+
Combined DataFrame with one row per reservoir and time intervals
|
|
201
|
+
in the observation period.
|
|
143
202
|
"""
|
|
144
203
|
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
204
|
+
# -------- Collect catchments first (cheap, no tqdm needed)
|
|
205
|
+
catchment_rows = []
|
|
206
|
+
|
|
207
|
+
for r in reservoirs_input:
|
|
208
|
+
c = r["catchment_static_params"]
|
|
209
|
+
catchment_rows.append({
|
|
210
|
+
"idx": r["idx"],
|
|
211
|
+
"CA": c["ca"],
|
|
212
|
+
"DCA": c["dca"],
|
|
213
|
+
"geometry": c["catchment_geometry"],
|
|
214
|
+
})
|
|
215
|
+
|
|
216
|
+
catchments_gdf = gpd.GeoDataFrame(
|
|
217
|
+
catchment_rows, geometry="geometry", crs="EPSG:4326"
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
# -------- Compute catchment static ONCE
|
|
221
|
+
first = reservoirs_input[0]["catchment_static_params"]
|
|
156
222
|
|
|
157
|
-
|
|
158
|
-
|
|
223
|
+
df_catch_static_all = catchment_based_static_features_multi_reservoir(
|
|
224
|
+
catchments_gdf,
|
|
225
|
+
glc_share_path=first["glc_share_path"],
|
|
226
|
+
hwsd2_path=first["hwsd2_path"],
|
|
227
|
+
hilda_veg_freq_path=first["hilda_veg_freq_path"],
|
|
228
|
+
terrain_path=first["terrain_path"],
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
catch_static_lookup = df_catch_static_all.set_index("idx")
|
|
232
|
+
# catch_static_lookup = pd.DataFrame() # Placeholder if not computing
|
|
233
|
+
|
|
234
|
+
# -------- Per-reservoir loop (tqdm HERE)
|
|
235
|
+
tasks = [process_one_reservoir(r) for r in reservoirs_input]
|
|
236
|
+
with ProgressBar():
|
|
237
|
+
results = compute(*tasks, scheduler="processes", num_workers=4)
|
|
238
|
+
|
|
239
|
+
all_reservoirs_static_info = []
|
|
240
|
+
errors = {}
|
|
241
|
+
|
|
242
|
+
for idx, df, err in results:
|
|
243
|
+
all_reservoirs_static_info.append(df)
|
|
244
|
+
if err is not None:
|
|
245
|
+
errors[idx] = err
|
|
246
|
+
|
|
247
|
+
# for r in tqdm(
|
|
248
|
+
# reservoirs_input,
|
|
249
|
+
# total=len(reservoirs_input),
|
|
250
|
+
# desc="Generating per-reservoir features",
|
|
251
|
+
# unit="reservoir",
|
|
252
|
+
# ):
|
|
253
|
+
# try:
|
|
254
|
+
# df = create_features_per_reservoir(
|
|
255
|
+
# idx=r["idx"],
|
|
256
|
+
# observation_period=r["observation_period"],
|
|
257
|
+
# reservoir_static_params=r["reservoir_static_params"],
|
|
258
|
+
# catchment_static_params=None, # already handled
|
|
259
|
+
# reservoir_dynamic_info=r.get("reservoir_dynamic_info"),
|
|
260
|
+
# catchment_dynamic_info=r.get("catchment_dynamic_info"),
|
|
261
|
+
# time_interval=r.get("time_interval"),
|
|
262
|
+
# feature_engineering=False,
|
|
263
|
+
# )
|
|
264
|
+
# all_reservoirs_static_info.append(df)
|
|
265
|
+
# except Exception as e:
|
|
266
|
+
# errors[r["idx"]] = e
|
|
267
|
+
# errors['traceback'] = traceback.print_exc()
|
|
268
|
+
# all_reservoirs_static_info.append(
|
|
269
|
+
# pd.DataFrame({"idx": r["idx"]}) # Append empty DataFrame for failed reservoir
|
|
270
|
+
# )
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
# -------- Concatenate static info
|
|
275
|
+
df_all = pd.concat(all_reservoirs_static_info, ignore_index=True)
|
|
276
|
+
# CRITICAL: restore logical ordering
|
|
277
|
+
df_all = df_all.sort_values(
|
|
278
|
+
by=["idx", "OSY"], #
|
|
279
|
+
ascending=[True, True],
|
|
280
|
+
).reset_index(drop=True)
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
# -------- Merge static catchment features with dynamic ONCE
|
|
284
|
+
df_all = df_all.merge(
|
|
285
|
+
catch_static_lookup,
|
|
286
|
+
left_on="idx",
|
|
287
|
+
right_index=True,
|
|
288
|
+
how="left",
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
# -------- Engineer ONCE
|
|
292
|
+
df_all = engineer_and_transform_features(df_all)
|
|
293
|
+
|
|
294
|
+
if error_log:
|
|
295
|
+
return df_all, errors
|
|
296
|
+
else:
|
|
297
|
+
return df_all
|
reclaim/reclaim.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import pandas as pd
|
|
3
|
+
from pandas.api.types import is_integer_dtype, is_string_dtype
|
|
3
4
|
import numpy as np
|
|
4
5
|
from sklearn.preprocessing import LabelEncoder
|
|
5
6
|
from sklearn.metrics import r2_score, mean_absolute_error, root_mean_squared_error
|
|
@@ -197,6 +198,14 @@ class Reclaim:
|
|
|
197
198
|
"Safer to use DataFrame with feature names."
|
|
198
199
|
)
|
|
199
200
|
|
|
201
|
+
if self.cat_features is not None:
|
|
202
|
+
for col in self.cat_features:
|
|
203
|
+
if not (is_integer_dtype(X[col]) or is_string_dtype(X[col])):
|
|
204
|
+
raise ValueError(
|
|
205
|
+
f"Column {col} must be integer or string type, "
|
|
206
|
+
f"found {X[col].dtype}"
|
|
207
|
+
)
|
|
208
|
+
|
|
200
209
|
|
|
201
210
|
# Base model predictions
|
|
202
211
|
pred_xgb = self.xgb_model.predict(X)
|
|
@@ -1,6 +1,39 @@
|
|
|
1
|
+
import geopandas as gpd
|
|
1
2
|
import pandas as pd
|
|
2
3
|
|
|
3
|
-
from reclaim.static_features.utils.catchment_agreggate import compute_catchment_aggregate
|
|
4
|
+
from reclaim.static_features.utils.catchment_agreggate import compute_catchment_aggregate, compute_catchment_aggregate_multi_reservoir
|
|
5
|
+
|
|
6
|
+
# ---- Rename columns to abbreviations
|
|
7
|
+
RENAME_VARIABLE_DICT = {
|
|
8
|
+
# Land cover
|
|
9
|
+
"artificial_surfaces_mean": "LCAS",
|
|
10
|
+
"cropland_mean": "LCC",
|
|
11
|
+
"grassland_mean": "LCG",
|
|
12
|
+
"tree_covered_mean": "LCT",
|
|
13
|
+
"shrubs_covered_mean": "LCS",
|
|
14
|
+
"aquatic_herbaceous_mean": "LCHV",
|
|
15
|
+
"mangroves_mean": "LCM",
|
|
16
|
+
"sparse_vegetation_mean": "LCSV",
|
|
17
|
+
"bare_soil_mean": "LCBS",
|
|
18
|
+
"snow_glaciers_mean": "LCSG",
|
|
19
|
+
"waterbodies_mean": "LCWB",
|
|
20
|
+
"dominant_class_mode": "DLC",
|
|
21
|
+
# Soil
|
|
22
|
+
"COARSE_mean": "COAR",
|
|
23
|
+
"SAND_mean": "SAND",
|
|
24
|
+
"SILT_mean": "SILT",
|
|
25
|
+
"CLAY_mean": "CLAY",
|
|
26
|
+
"BULK_mean": "BULK",
|
|
27
|
+
# Terrain
|
|
28
|
+
"elevation_mean": "ELEV",
|
|
29
|
+
"slope_mean": "SLOP",
|
|
30
|
+
"curvature_mean": "CURV",
|
|
31
|
+
"aspect_mean": "ASP",
|
|
32
|
+
"hillshade_mean": "HILL",
|
|
33
|
+
# HILDA (optional, not mapped to abbreviations yet)
|
|
34
|
+
"vegetation_gain_frequency_mean": "VGF",
|
|
35
|
+
"vegetation_loss_frequency_mean": "VLF",
|
|
36
|
+
}
|
|
4
37
|
|
|
5
38
|
def catchment_based_static_features(
|
|
6
39
|
ca: float,
|
|
@@ -89,39 +122,81 @@ def catchment_based_static_features(
|
|
|
89
122
|
merged = pd.concat([glc_df, hwsd_df, hilda_df, terrain_df], axis=1)
|
|
90
123
|
features.update(merged.to_dict(orient="records")[0])
|
|
91
124
|
|
|
92
|
-
#
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
125
|
+
# Apply renaming
|
|
126
|
+
features_df = pd.DataFrame([features]).rename(columns=RENAME_VARIABLE_DICT)
|
|
127
|
+
|
|
128
|
+
return features_df
|
|
129
|
+
|
|
130
|
+
def catchment_based_static_features_multi_reservoir(
|
|
131
|
+
catchments_gdf: gpd.GeoDataFrame,
|
|
132
|
+
glc_share_path: str,
|
|
133
|
+
hwsd2_path: str,
|
|
134
|
+
hilda_veg_freq_path: str,
|
|
135
|
+
terrain_path: str,
|
|
136
|
+
) -> pd.DataFrame:
|
|
137
|
+
"""
|
|
138
|
+
Compute catchment-based static features for MULTIPLE reservoirs efficiently.
|
|
139
|
+
|
|
140
|
+
Parameters
|
|
141
|
+
----------
|
|
142
|
+
catchments_gdf : geopandas.GeoDataFrame
|
|
143
|
+
Must contain columns:
|
|
144
|
+
- idx
|
|
145
|
+
- CA
|
|
146
|
+
- DCA
|
|
147
|
+
- geometry
|
|
148
|
+
glc_share_path : str
|
|
149
|
+
Path to the GLC-Share NetCDF file (land cover fractions).
|
|
150
|
+
hwsd2_path : str
|
|
151
|
+
Path to the HWSD2 NetCDF file (soil composition).
|
|
152
|
+
hilda_veg_freq_path : str
|
|
153
|
+
Path to the HILDA vegetation frequency NetCDF file.
|
|
154
|
+
terrain_path : str
|
|
155
|
+
Path to the terrain NetCDF file (DEM derivatives).
|
|
156
|
+
|
|
157
|
+
Returns
|
|
158
|
+
-------
|
|
159
|
+
pd.DataFrame
|
|
160
|
+
A single-row DataFrame with abbreviations as columns:
|
|
161
|
+
- CA, DCA, LCAS, LCC, LCG, LCT, LCS, LCHV, LCM, LCSV,
|
|
162
|
+
LCBS, LCSG, LCWB, DLC, COAR, SAND, SILT, CLAY, BULK,
|
|
163
|
+
ELEV, SLOP, CURV, ASP, HILL, VGF, VLF
|
|
164
|
+
"""
|
|
165
|
+
|
|
166
|
+
base = catchments_gdf[["idx", "CA", "DCA"]].set_index("idx")
|
|
167
|
+
|
|
168
|
+
# --- Land cover
|
|
169
|
+
glc_dict = {
|
|
170
|
+
"artificial_surfaces": "mean",
|
|
171
|
+
"cropland": "mean",
|
|
172
|
+
"grassland": "mean",
|
|
173
|
+
"tree_covered": "mean",
|
|
174
|
+
"shrubs_covered": "mean",
|
|
175
|
+
"aquatic_herbaceous": "mean",
|
|
176
|
+
"mangroves": "mean",
|
|
177
|
+
"sparse_vegetation": "mean",
|
|
178
|
+
"bare_soil": "mean",
|
|
179
|
+
"snow_glaciers": "mean",
|
|
180
|
+
"waterbodies": "mean",
|
|
181
|
+
"dominant_class": "mode",
|
|
122
182
|
}
|
|
123
183
|
|
|
124
|
-
|
|
125
|
-
|
|
184
|
+
glc = compute_catchment_aggregate_multi_reservoir(
|
|
185
|
+
glc_share_path, catchments_gdf, glc_dict
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
hwsd = compute_catchment_aggregate_multi_reservoir(
|
|
189
|
+
hwsd2_path, catchments_gdf, "mean"
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
hilda = compute_catchment_aggregate_multi_reservoir(
|
|
193
|
+
hilda_veg_freq_path, catchments_gdf, "mean"
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
terrain = compute_catchment_aggregate_multi_reservoir(
|
|
197
|
+
terrain_path, catchments_gdf, "mean"
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
df = pd.concat([base, glc, hwsd, hilda, terrain], axis=1)
|
|
126
201
|
|
|
127
|
-
return
|
|
202
|
+
return df.rename(columns=RENAME_VARIABLE_DICT).reset_index()
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import pandas as pd
|
|
2
|
+
import geopandas as gpd
|
|
2
3
|
from shapely.geometry import Point, Polygon
|
|
3
4
|
import numpy as np
|
|
4
5
|
|
|
5
6
|
# Import utils
|
|
6
|
-
from reclaim.static_features.utils.flow_length import find_actual_flow_path
|
|
7
|
+
from reclaim.static_features.utils.flow_length import find_actual_flow_path, plot_flow_length_with_reservoir
|
|
7
8
|
from reclaim.static_features.utils.area_perimeter import calculate_length_area_meters
|
|
8
9
|
from reclaim.static_features.utils.aec_shape import concavity_index, mean_curvature, mean_slope
|
|
9
10
|
|
|
@@ -18,7 +19,8 @@ def reservoir_based_static_features(
|
|
|
18
19
|
reservoir_polygon: Polygon = None,
|
|
19
20
|
inlet_point: Point = None,
|
|
20
21
|
resolution: float = None,
|
|
21
|
-
aec_df: pd.DataFrame = None
|
|
22
|
+
aec_df: pd.DataFrame = None,
|
|
23
|
+
savepath_flowpath_fig: str = None,
|
|
22
24
|
) -> pd.DataFrame:
|
|
23
25
|
"""
|
|
24
26
|
Compute reservoir-based features for RECLAIM input dataset.
|
|
@@ -47,6 +49,8 @@ def reservoir_based_static_features(
|
|
|
47
49
|
Spatial resolution used in flow length calculations.
|
|
48
50
|
aec_df : pd.DataFrame, optional
|
|
49
51
|
Area-Elevation Curve dataframe with columns ['area', 'elevation'].
|
|
52
|
+
savepath_flowpath_fig : str, optional
|
|
53
|
+
Path to save the flow path figure, optional.
|
|
50
54
|
|
|
51
55
|
Returns
|
|
52
56
|
-------
|
|
@@ -82,26 +86,51 @@ def reservoir_based_static_features(
|
|
|
82
86
|
}
|
|
83
87
|
|
|
84
88
|
# Area and Perimeter
|
|
85
|
-
if reservoir_polygon is not None:
|
|
89
|
+
if reservoir_polygon is not None and not reservoir_polygon.is_empty:
|
|
86
90
|
features["RP"], features["RA"] = calculate_length_area_meters(reservoir_polygon, area=True)
|
|
87
91
|
features["RA"] = features["RA"] / 1e6 # m2 → km2
|
|
88
92
|
features["RP"] = features["RP"] / 1e3 # m → km
|
|
93
|
+
else:
|
|
94
|
+
features["RP"] = np.nan
|
|
95
|
+
features["RA"] = np.nan
|
|
89
96
|
|
|
90
97
|
# Flow Length
|
|
91
98
|
dam_point = Point(lon, lat)
|
|
92
|
-
if dam_point is not None and reservoir_polygon is not None:
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
99
|
+
if dam_point is not None and reservoir_polygon is not None and not reservoir_polygon.is_empty:
|
|
100
|
+
try:
|
|
101
|
+
simplified_reservoir, far_end_point, flow_path, _ = (
|
|
102
|
+
find_actual_flow_path(dam_point, reservoir_polygon, inlet_point, resolution)
|
|
103
|
+
)
|
|
104
|
+
if savepath_flowpath_fig is not None:
|
|
105
|
+
plot_flow_length_with_reservoir(
|
|
106
|
+
dam_point,
|
|
107
|
+
reservoir_polygon,
|
|
108
|
+
far_end_point,
|
|
109
|
+
flow_path,
|
|
110
|
+
simplified_reservoir,
|
|
111
|
+
savepath_flowpath_fig
|
|
112
|
+
)
|
|
113
|
+
if flow_path is not None:
|
|
114
|
+
gseries = gpd.GeoSeries([flow_path], crs="EPSG:4326")
|
|
115
|
+
gseries = gseries.to_crs(epsg=3395)
|
|
100
116
|
|
|
117
|
+
features["FL"] = gseries.length.iloc[0] / 1e3 # m → km
|
|
118
|
+
else:
|
|
119
|
+
features["FL"] = np.nan
|
|
120
|
+
except Exception as e:
|
|
121
|
+
print(f"Flow length calculation failed: {e}")
|
|
122
|
+
features["FL"] = np.nan
|
|
123
|
+
else:
|
|
124
|
+
features["FL"] = np.nan
|
|
125
|
+
|
|
101
126
|
# AEC metrics
|
|
102
|
-
if aec_df
|
|
127
|
+
if isinstance(aec_df, pd.DataFrame) and not aec_df.empty:
|
|
103
128
|
features["AECS"] = mean_slope(aec_df)
|
|
104
129
|
features["AECC"] = mean_curvature(aec_df)
|
|
105
130
|
features["AECI"] = concavity_index(aec_df)
|
|
131
|
+
else:
|
|
132
|
+
features["AECS"] = np.nan
|
|
133
|
+
features["AECC"] = np.nan
|
|
134
|
+
features["AECI"] = np.nan
|
|
106
135
|
|
|
107
136
|
return pd.DataFrame([features])
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
ID_to_BASIN = {1030008110: 'RUFIJI',
|
|
2
|
+
1030011660: 'ZAMBEZI',
|
|
3
|
+
1030012590: 'LIMPOPO',
|
|
4
|
+
1030012600: 'GOURITZ',
|
|
5
|
+
1030015850: 'ORANGE',
|
|
6
|
+
1030022420: 'NIGER',
|
|
7
|
+
1030022430: 'OUEME',
|
|
8
|
+
1030023300: 'VOLTA',
|
|
9
|
+
1030023310: 'BANDAMA',
|
|
10
|
+
1030027430: 'DRAA',
|
|
11
|
+
1030029810: 'MOULOUYA',
|
|
12
|
+
1030034260: 'NILE',
|
|
13
|
+
1030040260: 'LAKE TURKANA',
|
|
14
|
+
1030040300: 'MELRHIR',
|
|
15
|
+
4030003020: 'LIAO HE',
|
|
16
|
+
4030006940: 'HAI HE',
|
|
17
|
+
4030007850: 'YELLOW RIVER',
|
|
18
|
+
4030009880: 'YANGTZE',
|
|
19
|
+
4030017020: 'MEKONG',
|
|
20
|
+
4030017030: 'CHAO PHRAYA',
|
|
21
|
+
4030018350: 'MAE KLONG',
|
|
22
|
+
4030022790: 'IRRAWADDY',
|
|
23
|
+
4030024190: 'KARNAPHULI',
|
|
24
|
+
4030025450: 'GANGES',
|
|
25
|
+
4030025460: 'GODAVARI',
|
|
26
|
+
4030029530: 'NARMADA',
|
|
27
|
+
4030031750: 'KUTCH',
|
|
28
|
+
4030033640: 'INDUS',
|
|
29
|
+
4030039450: 'TONE',
|
|
30
|
+
4030046370: 'TAIWAN',
|
|
31
|
+
4030048900: 'SRI LANKA',
|
|
32
|
+
4030050220: 'UPPER ARAL SEA',
|
|
33
|
+
4030050240: 'LOWER ARAL SEA',
|
|
34
|
+
5030007690: 'BENGAWAN SOLO',
|
|
35
|
+
5030015660: 'KAPUAS',
|
|
36
|
+
5030031240: 'INDONESIA',
|
|
37
|
+
5030055010: 'MARIANA_ISLANDS',
|
|
38
|
+
5030067860: 'FITZROY',
|
|
39
|
+
2030000010: 'ORONTES',
|
|
40
|
+
2030003440: 'KIZILIRMAK',
|
|
41
|
+
2030008490: 'DANUBE',
|
|
42
|
+
2030009230: 'MARITSA RIVER',
|
|
43
|
+
2030012730: 'PO',
|
|
44
|
+
2030014550: 'TEVERE',
|
|
45
|
+
2030016230: 'RHONE',
|
|
46
|
+
2030020320: 'RHINE',
|
|
47
|
+
2030026030: 'VISTULA',
|
|
48
|
+
2030046500: 'SICILY',
|
|
49
|
+
2030047500: 'SARDINIA_CORSICA',
|
|
50
|
+
2030068690: 'KURA',
|
|
51
|
+
2030073570: 'SHATT AL ARAB',
|
|
52
|
+
2030085720: 'DEAD SEA',
|
|
53
|
+
7030000010: 'SANTIAGO',
|
|
54
|
+
7030008710: 'COLORADO (also COLORADO RIVER)',
|
|
55
|
+
7030008720: 'SAN JOAQUIN',
|
|
56
|
+
7030014250: 'ROGUE',
|
|
57
|
+
7030014930: 'COLUMBIA',
|
|
58
|
+
7030022240: 'NELSON',
|
|
59
|
+
7030034520: 'SAINT LAWRENCE (also SAINT-LAURENT)',
|
|
60
|
+
7030038340: 'SUSQUEHANNA',
|
|
61
|
+
7030042040: 'MOBILE RIVER',
|
|
62
|
+
7030047060: 'MISSISSIPPI',
|
|
63
|
+
7030047840: 'BRAZOS',
|
|
64
|
+
7030049270: 'RIO GRANDE (also BRAVO)',
|
|
65
|
+
7030049280: 'USUMACINTA',
|
|
66
|
+
7030073620: 'GREAT SALT LAKE',
|
|
67
|
+
6030000010: 'ATRATO',
|
|
68
|
+
6030004470: 'ORINOCO',
|
|
69
|
+
6030007000: 'AMAZON (also AMAZONAS)',
|
|
70
|
+
6030009770: 'JAGUARIBE',
|
|
71
|
+
6030011780: 'SAO FRANCISCO',
|
|
72
|
+
6030011790: 'DOCE',
|
|
73
|
+
6030016970: 'PARANA',
|
|
74
|
+
6030029280: 'CHIRA',
|
|
75
|
+
6030032290: 'GUAYAS'}
|
|
76
|
+
|
|
77
|
+
def get_basin_name(basin_id):
|
|
78
|
+
return ID_to_BASIN[basin_id]
|