pyreclaim 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyreclaim-0.4.0.dist-info → pyreclaim-0.5.0.dist-info}/METADATA +3 -1
- pyreclaim-0.5.0.dist-info/RECORD +28 -0
- {pyreclaim-0.4.0.dist-info → pyreclaim-0.5.0.dist-info}/WHEEL +1 -1
- reclaim/derived_features/feature_engineering_and_transformation.py +59 -4
- reclaim/dynamic_features/catchment_dynamic.py +50 -44
- reclaim/dynamic_features/reservoir_dynamic.py +68 -71
- reclaim/dynamic_features/utils/ts_aggregate.py +62 -27
- reclaim/generate_features.py +181 -42
- reclaim/reclaim.py +9 -0
- reclaim/static_features/catchment_static.py +109 -34
- reclaim/static_features/reservoir_static.py +41 -12
- reclaim/static_features/utils/basin_names.py +78 -0
- reclaim/static_features/utils/catchment_agreggate.py +208 -1
- reclaim/static_features/utils/flow_length.py +65 -1
- pyreclaim-0.4.0.dist-info/RECORD +0 -27
- {pyreclaim-0.4.0.dist-info → pyreclaim-0.5.0.dist-info}/licenses/LICENSE +0 -0
- {pyreclaim-0.4.0.dist-info → pyreclaim-0.5.0.dist-info}/top_level.txt +0 -0
|
@@ -1,11 +1,14 @@
|
|
|
1
|
+
import os
|
|
1
2
|
import pandas as pd
|
|
2
3
|
import numpy as np
|
|
3
4
|
import xarray as xr
|
|
4
5
|
import rioxarray
|
|
5
6
|
import geopandas as gpd
|
|
6
7
|
import regionmask
|
|
8
|
+
import rasterio.features
|
|
7
9
|
from collections import Counter
|
|
8
10
|
from shapely.geometry import Polygon
|
|
11
|
+
from tqdm import tqdm
|
|
9
12
|
|
|
10
13
|
|
|
11
14
|
def compute_catchment_aggregate(
|
|
@@ -145,4 +148,208 @@ def compute_catchment_aggregate(
|
|
|
145
148
|
else:
|
|
146
149
|
raise ValueError(f"Unknown function type '{func}' for variable '{var}'")
|
|
147
150
|
|
|
148
|
-
return pd.DataFrame([results])
|
|
151
|
+
return pd.DataFrame([results])
|
|
152
|
+
|
|
153
|
+
def compute_catchment_aggregate_multi_reservoir(
|
|
154
|
+
netcdf_path,
|
|
155
|
+
catchments_gdf: gpd.GeoDataFrame,
|
|
156
|
+
function_type="mean",
|
|
157
|
+
idx_col: str = "idx",
|
|
158
|
+
) -> pd.DataFrame:
|
|
159
|
+
"""
|
|
160
|
+
Compute catchment-based aggregated features for MULTIPLE catchments
|
|
161
|
+
from a single NetCDF file (opened only once).
|
|
162
|
+
|
|
163
|
+
Parameters
|
|
164
|
+
----------
|
|
165
|
+
netcdf_path : str or Path
|
|
166
|
+
Path to the NetCDF file containing raster variables.
|
|
167
|
+
|
|
168
|
+
catchments_gdf : geopandas.GeoDataFrame
|
|
169
|
+
GeoDataFrame containing multiple catchment geometries.
|
|
170
|
+
Must include:
|
|
171
|
+
- geometry column
|
|
172
|
+
- idx_col identifying each reservoir
|
|
173
|
+
|
|
174
|
+
function_type : str or dict, default="mean"
|
|
175
|
+
Aggregation rule(s), same semantics as compute_catchment_aggregate().
|
|
176
|
+
|
|
177
|
+
idx_col : str, default="idx"
|
|
178
|
+
Column identifying reservoir index.
|
|
179
|
+
|
|
180
|
+
Returns
|
|
181
|
+
-------
|
|
182
|
+
pd.DataFrame
|
|
183
|
+
DataFrame indexed by idx with aggregated features.
|
|
184
|
+
"""
|
|
185
|
+
|
|
186
|
+
ds = xr.open_dataset(netcdf_path, chunks={"x": 200, "y": 200})
|
|
187
|
+
ds = ds.rio.write_crs("EPSG:4326")
|
|
188
|
+
|
|
189
|
+
if "lon" in ds.dims and "lat" in ds.dims:
|
|
190
|
+
ds = ds.rename({"lon": "x", "lat": "y"})
|
|
191
|
+
|
|
192
|
+
variables = list(ds.data_vars)
|
|
193
|
+
|
|
194
|
+
if isinstance(function_type, str):
|
|
195
|
+
apply_func = {v: function_type for v in variables}
|
|
196
|
+
elif isinstance(function_type, dict):
|
|
197
|
+
apply_func = function_type
|
|
198
|
+
else:
|
|
199
|
+
raise ValueError("function_type must be a string or a dictionary.")
|
|
200
|
+
|
|
201
|
+
# Determine coordinate ordering for slicing
|
|
202
|
+
y_order = "descending" if ds.y[0] > ds.y[-1] else "ascending"
|
|
203
|
+
x_order = "descending" if ds.x[0] > ds.x[-1] else "ascending"
|
|
204
|
+
|
|
205
|
+
results = []
|
|
206
|
+
|
|
207
|
+
# ---- Loop over catchments (cheap loop now)
|
|
208
|
+
for _, row_gdf in tqdm(
|
|
209
|
+
catchments_gdf.iterrows(),
|
|
210
|
+
total=catchments_gdf.shape[0],
|
|
211
|
+
desc=f"Aggregating static features of catchments related to {os.path.basename(netcdf_path)}",
|
|
212
|
+
unit="catchment",
|
|
213
|
+
):
|
|
214
|
+
idx = row_gdf[idx_col]
|
|
215
|
+
geom = row_gdf.geometry
|
|
216
|
+
|
|
217
|
+
row = {idx_col: idx}
|
|
218
|
+
|
|
219
|
+
if geom is None or geom.is_empty:
|
|
220
|
+
results.append(row)
|
|
221
|
+
continue
|
|
222
|
+
|
|
223
|
+
# ---- Spatial subset FIRST
|
|
224
|
+
minx, miny, maxx, maxy = geom.bounds
|
|
225
|
+
|
|
226
|
+
dx = float(abs(ds.x[1] - ds.x[0]))
|
|
227
|
+
dy = float(abs(ds.y[1] - ds.y[0]))
|
|
228
|
+
minx -= dx
|
|
229
|
+
maxx += dx
|
|
230
|
+
miny -= dy
|
|
231
|
+
maxy += dy
|
|
232
|
+
|
|
233
|
+
if y_order == "descending":
|
|
234
|
+
y_slice = slice(maxy, miny)
|
|
235
|
+
else:
|
|
236
|
+
y_slice = slice(miny, maxy)
|
|
237
|
+
|
|
238
|
+
if x_order == "descending":
|
|
239
|
+
x_slice = slice(maxx, minx)
|
|
240
|
+
else:
|
|
241
|
+
x_slice = slice(minx, maxx)
|
|
242
|
+
|
|
243
|
+
subset_ds = ds.sel(x=x_slice, y=y_slice)
|
|
244
|
+
|
|
245
|
+
# -- Skip if empty subset
|
|
246
|
+
if subset_ds.x.size == 0 or subset_ds.y.size == 0:
|
|
247
|
+
print(f"Warning: Subset for catchment idx={idx} is empty. Skipping.")
|
|
248
|
+
results.append(row)
|
|
249
|
+
continue
|
|
250
|
+
|
|
251
|
+
# ---- Create mask on SUBSET
|
|
252
|
+
catchment_gdf = gpd.GeoDataFrame(
|
|
253
|
+
{idx_col: [idx]}, geometry=[geom], crs="EPSG:4326"
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
# mask_from_geopandas = regionmask.mask_geopandas(
|
|
257
|
+
# catchment_gdf, subset_ds.x, subset_ds.y
|
|
258
|
+
# )
|
|
259
|
+
|
|
260
|
+
# catchment_mask = mask_from_geopandas == 0
|
|
261
|
+
|
|
262
|
+
# # Skip if no overlap
|
|
263
|
+
# if mask_from_geopandas.notnull().sum().sum().item() == 0:
|
|
264
|
+
# results.append(row)
|
|
265
|
+
# continue
|
|
266
|
+
|
|
267
|
+
transform = subset_ds.rio.transform()
|
|
268
|
+
|
|
269
|
+
mask = rasterio.features.rasterize(
|
|
270
|
+
[(geom, 1)],
|
|
271
|
+
out_shape=(subset_ds.sizes["y"], subset_ds.sizes["x"]),
|
|
272
|
+
transform=transform,
|
|
273
|
+
all_touched=True,
|
|
274
|
+
fill=0,
|
|
275
|
+
dtype="uint8",
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
# --- fallback if rasterized mask is empty ---
|
|
279
|
+
if mask.sum() == 0:
|
|
280
|
+
# Compute centroid in raster CRS
|
|
281
|
+
centroid = geom.centroid
|
|
282
|
+
# Find nearest x, y indices in the subset
|
|
283
|
+
x_idx = np.argmin(np.abs(subset_ds.x.values - centroid.x))
|
|
284
|
+
y_idx = np.argmin(np.abs(subset_ds.y.values - centroid.y))
|
|
285
|
+
|
|
286
|
+
# Create a mask with just the centroid pixel
|
|
287
|
+
mask = np.zeros((subset_ds.sizes["y"], subset_ds.sizes["x"]), dtype=np.uint8)
|
|
288
|
+
mask[y_idx, x_idx] = 1
|
|
289
|
+
|
|
290
|
+
catchment_mask = xr.DataArray(
|
|
291
|
+
mask,
|
|
292
|
+
coords={"y": subset_ds.y, "x": subset_ds.x},
|
|
293
|
+
dims=("y", "x"),
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
# ---- Aggregate variables
|
|
298
|
+
for var, func_list in apply_func.items():
|
|
299
|
+
data = subset_ds[var]
|
|
300
|
+
|
|
301
|
+
masked = data.where(catchment_mask).compute()
|
|
302
|
+
arr = masked.where(~masked.isnull(), drop=True)
|
|
303
|
+
|
|
304
|
+
if arr.size == 0:
|
|
305
|
+
continue
|
|
306
|
+
|
|
307
|
+
func_list = func_list if isinstance(func_list, list) else [func_list]
|
|
308
|
+
|
|
309
|
+
for func_info in func_list:
|
|
310
|
+
if isinstance(func_info, str):
|
|
311
|
+
func = func_info
|
|
312
|
+
threshold = None
|
|
313
|
+
threshold_direction = None
|
|
314
|
+
elif isinstance(func_info, dict):
|
|
315
|
+
func = func_info.get("type")
|
|
316
|
+
threshold = func_info.get("threshold", None)
|
|
317
|
+
threshold_direction = func_info.get("direction", "greater")
|
|
318
|
+
else:
|
|
319
|
+
raise ValueError(f"Invalid function format for variable {var}")
|
|
320
|
+
|
|
321
|
+
if func == "mean":
|
|
322
|
+
row[f"{var}_mean"] = float(arr.mean().item())
|
|
323
|
+
|
|
324
|
+
elif func == "mode":
|
|
325
|
+
vals = arr.values.flatten()
|
|
326
|
+
row[f"{var}_mode"] = Counter(vals).most_common(1)[0][0]
|
|
327
|
+
|
|
328
|
+
elif func == "std":
|
|
329
|
+
row[f"{var}_std"] = float(arr.std().item())
|
|
330
|
+
|
|
331
|
+
elif func == "percent":
|
|
332
|
+
vals = arr.values.flatten()
|
|
333
|
+
total = len(vals)
|
|
334
|
+
class_counts = Counter(vals)
|
|
335
|
+
for cls, count in class_counts.items():
|
|
336
|
+
row[f"{var}_percent_{int(cls)}"] = (count / total) * 100
|
|
337
|
+
|
|
338
|
+
elif func == "threshold_percent":
|
|
339
|
+
if threshold is None:
|
|
340
|
+
raise ValueError(f"Threshold not provided for variable '{var}'")
|
|
341
|
+
vals = arr.values.flatten()
|
|
342
|
+
valid = vals[~np.isnan(vals)]
|
|
343
|
+
if threshold_direction == "greater":
|
|
344
|
+
percent = (valid > threshold).sum() / len(valid) * 100
|
|
345
|
+
row[f"{var}_percent_above_{threshold}"] = percent
|
|
346
|
+
else:
|
|
347
|
+
percent = (valid < threshold).sum() / len(valid) * 100
|
|
348
|
+
row[f"{var}_percent_below_{threshold}"] = percent
|
|
349
|
+
|
|
350
|
+
else:
|
|
351
|
+
raise ValueError(f"Unknown function type '{func}' for variable '{var}'")
|
|
352
|
+
|
|
353
|
+
results.append(row)
|
|
354
|
+
|
|
355
|
+
return pd.DataFrame(results).set_index(idx_col)
|
|
@@ -2,6 +2,9 @@ import numpy as np
|
|
|
2
2
|
import pandas as pd
|
|
3
3
|
import geopandas as gpd
|
|
4
4
|
import networkx as nx # NetworkX library import
|
|
5
|
+
import matplotlib.pyplot as plt
|
|
6
|
+
from matplotlib.lines import Line2D
|
|
7
|
+
from matplotlib.patches import Patch
|
|
5
8
|
|
|
6
9
|
from shapely.geometry import shape, Point, LineString, Polygon, GeometryCollection, MultiLineString, MultiPolygon
|
|
7
10
|
from shapely.ops import transform, split, linemerge, unary_union
|
|
@@ -452,4 +455,65 @@ def find_actual_flow_path(dam_point, reservoir_polygon, inlet_point=None, resolu
|
|
|
452
455
|
# Step4: Find the shortest path within geometry between dam_point and far_end_point
|
|
453
456
|
path, graph = create_continuous_linestring(dam_point,far_end_point, simplified_reservoir, optimal_resolution)
|
|
454
457
|
|
|
455
|
-
return simplified_reservoir,far_end_point, path, graph
|
|
458
|
+
return simplified_reservoir,far_end_point, path, graph
|
|
459
|
+
|
|
460
|
+
def plot_flow_length_with_reservoir(dam, reservoir, farthest_point, actual_flow_path, simplified_reservoir, save_path):
|
|
461
|
+
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(12, 6))
|
|
462
|
+
|
|
463
|
+
# Define plot elements
|
|
464
|
+
geoms = {
|
|
465
|
+
"Dam Point": {"geometry": dam, "color": "green", "marker": "^"},
|
|
466
|
+
"Reservoir": {"geometry": reservoir, "color": "aqua", "edgecolor": "#00008B"},
|
|
467
|
+
"Farthest Point": {"geometry": farthest_point, "color": "red", "marker": "x"},
|
|
468
|
+
"Flow Path": {"geometry": actual_flow_path, "color": "blue"},
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
geoms_simplified = {
|
|
472
|
+
"Dam Point": {"geometry": dam, "color": "green", "marker": "^"},
|
|
473
|
+
"Reservoir (Simplified)": {"geometry": simplified_reservoir, "color": "aqua", "edgecolor": "#00008B"},
|
|
474
|
+
"Farthest Point": {"geometry": farthest_point, "color": "red", "marker": "x"},
|
|
475
|
+
"Flow Path": {"geometry": actual_flow_path, "color": "blue"},
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
# Plot original
|
|
479
|
+
for label, item in geoms.items():
|
|
480
|
+
gdf = gpd.GeoDataFrame(geometry=[item["geometry"]])
|
|
481
|
+
if item["geometry"].geom_type == "Point":
|
|
482
|
+
gdf.plot(ax=ax[0], color=item["color"], marker=item.get("marker", "o"), zorder=3, markersize=90)
|
|
483
|
+
elif item["geometry"].geom_type == "Polygon" or item["geometry"].geom_type == "MultiPolygon":
|
|
484
|
+
gdf.plot(ax=ax[0], color=item["color"], edgecolor=item['edgecolor'], zorder=1)
|
|
485
|
+
else:
|
|
486
|
+
gdf.plot(ax=ax[0], color=item["color"],zorder=2)
|
|
487
|
+
|
|
488
|
+
# Plot simplified
|
|
489
|
+
for label, item in geoms_simplified.items():
|
|
490
|
+
gdf = gpd.GeoDataFrame(geometry=[item["geometry"]])
|
|
491
|
+
if item["geometry"].geom_type == "Point":
|
|
492
|
+
gdf.plot(ax=ax[1], color=item["color"], marker=item.get("marker", "o"), zorder=3, markersize=90)
|
|
493
|
+
elif item["geometry"].geom_type == "Polygon" or item["geometry"].geom_type == "MultiPolygon":
|
|
494
|
+
gdf.plot(ax=ax[1], color=item["color"], edgecolor=item['edgecolor'], zorder=1)
|
|
495
|
+
else:
|
|
496
|
+
gdf.plot(ax=ax[1], color=item["color"], zorder=2)
|
|
497
|
+
|
|
498
|
+
# Custom handles
|
|
499
|
+
custom_handles = [
|
|
500
|
+
Line2D([0], [0], marker='^', color='w', label='Dam', markerfacecolor='green', markersize=12),
|
|
501
|
+
Patch(facecolor='aqua', edgecolor='#00008B', label='Reservoir'),
|
|
502
|
+
Line2D([0], [0], marker='x', color='red', label='Reservoir inlet', markersize=8),
|
|
503
|
+
Line2D([0], [0], color='blue', lw=2, label='Shortest Flow Path')
|
|
504
|
+
]
|
|
505
|
+
|
|
506
|
+
# Set axis labels and titles
|
|
507
|
+
for a in ax:
|
|
508
|
+
a.set_xlabel("Longitude")
|
|
509
|
+
a.set_ylabel("Latitude")
|
|
510
|
+
ax[0].set_title('Original Geometry')
|
|
511
|
+
ax[1].set_title('Simplified Geometry')
|
|
512
|
+
|
|
513
|
+
# Add one shared legend
|
|
514
|
+
fig.legend(handles=custom_handles, loc='lower center', ncol=4, bbox_to_anchor=(0.5, -0.05))
|
|
515
|
+
|
|
516
|
+
plt.tight_layout()
|
|
517
|
+
plt.savefig(save_path, bbox_inches='tight', dpi=300)
|
|
518
|
+
plt.close()
|
|
519
|
+
|
pyreclaim-0.4.0.dist-info/RECORD
DELETED
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
pyreclaim-0.4.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
2
|
-
reclaim/__init__.py,sha256=PnNGOrv5as3H8_WWLE3QqHR2LnqMgutXD8lDA-8PnCE,33
|
|
3
|
-
reclaim/generate_features.py,sha256=yKR-yIySM4wV_vJHTg7L61geh7p-KTKjfQs0hVyF4Ok,6653
|
|
4
|
-
reclaim/reclaim.py,sha256=SqgnChFoOKlhRm6tNnd1MDOOy5QFGQOZixaiJjN4tIA,19983
|
|
5
|
-
reclaim/derived_features/__init__.py,sha256=boN0ilez7nbWdn4RKD4n7whlGrg_X-zxrqMv5RA6cmg,67
|
|
6
|
-
reclaim/derived_features/feature_engineering_and_transformation.py,sha256=01j7XP8ciDQLlPXdpRaUswlpNQDnqfHB-4b7_P4ZrXs,3472
|
|
7
|
-
reclaim/dynamic_features/__init__.py,sha256=gjZmRYl9zCccSenz6f38EKntrV6ueT0_9MHRrjtw2Og,45
|
|
8
|
-
reclaim/dynamic_features/catchment_dynamic.py,sha256=BHRci-Hh4xZ-UviAXKS3oPzGonzw81GQ4DUUOamUF7I,3273
|
|
9
|
-
reclaim/dynamic_features/reservoir_dynamic.py,sha256=4y3agRw0Qwq9k6Qe06upSyCu_TC9kBmy9g-2S8rHLjE,5409
|
|
10
|
-
reclaim/dynamic_features/utils/__init__.py,sha256=_mTSUeDu1jG5oBHvDKkMPx2A3xtIuSCpGpcMIURzmkU,89
|
|
11
|
-
reclaim/dynamic_features/utils/catchment_meteorology.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
-
reclaim/dynamic_features/utils/inflow_outflow.py,sha256=vD5CTAR12gCd56y8zDpzKfzJvG_qU_uJFD5OAajyb-g,2384
|
|
13
|
-
reclaim/dynamic_features/utils/rainfall.py,sha256=oWn1RbeHHmhZ--N7bx9UiJptRrPynYGXe1HTfPjHjWg,1387
|
|
14
|
-
reclaim/dynamic_features/utils/statistical_metrics.py,sha256=qtiLtFCqtOa6sDkpCmZmGsS_kub4rkYqUKgSEODZea4,4881
|
|
15
|
-
reclaim/dynamic_features/utils/ts_aggregate.py,sha256=lv4tIBwCHlmEdaexNEeb2zSvtWo4x4sGTtFZ1KZmZ-4,2469
|
|
16
|
-
reclaim/static_features/__init__.py,sha256=QRDAk3sgrGX-Oq8oRY5RESkWniVhcAPoQUa6jDJ59Zo,44
|
|
17
|
-
reclaim/static_features/catchment_static.py,sha256=vr3ZupzvaEkCn5f7g9AJp5-VfBGv5yPaMnJ33IHP9qU,3790
|
|
18
|
-
reclaim/static_features/reservoir_static.py,sha256=BXdpFO1PAcaEGAgLmorDcPDpuf5Rkr0Xv1UYgyyrV10,3497
|
|
19
|
-
reclaim/static_features/utils/__init__.py,sha256=y-4GVIqARI0g8s2FVlTfx_4XprcPQ9HP_2nbhSw1NVA,88
|
|
20
|
-
reclaim/static_features/utils/aec_shape.py,sha256=Tyew9tvPvNFpMOtPBf2GhdPBXoEjXr-VA2vyfY6ugYk,3270
|
|
21
|
-
reclaim/static_features/utils/area_perimeter.py,sha256=yDLpxjyfGDQdL121w65X5oWLxCSwtSuPvJa7n_mtBl0,1338
|
|
22
|
-
reclaim/static_features/utils/catchment_agreggate.py,sha256=_Pk30g5p7MVg_39R0_zElbkQketNy937WjGa9ugIoOY,5068
|
|
23
|
-
reclaim/static_features/utils/flow_length.py,sha256=VC4rUrmAxrutj4Cj5bTjlH3KXLRmwNjp04FCT74_LUg,16939
|
|
24
|
-
pyreclaim-0.4.0.dist-info/METADATA,sha256=KP_xlJZJpjMtx2-993R4hZXwJpFa5Xy8ixCjYRJ2z2Q,45464
|
|
25
|
-
pyreclaim-0.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
26
|
-
pyreclaim-0.4.0.dist-info/top_level.txt,sha256=uO95g-XnD3UQLLqi3q30muhaC9VqO04IqCbwmfsGmW4,8
|
|
27
|
-
pyreclaim-0.4.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|