pyreclaim 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,78 @@
1
+ ID_to_BASIN = {1030008110: 'RUFIJI',
2
+ 1030011660: 'ZAMBEZI',
3
+ 1030012590: 'LIMPOPO',
4
+ 1030012600: 'GOURITZ',
5
+ 1030015850: 'ORANGE',
6
+ 1030022420: 'NIGER',
7
+ 1030022430: 'OUEME',
8
+ 1030023300: 'VOLTA',
9
+ 1030023310: 'BANDAMA',
10
+ 1030027430: 'DRAA',
11
+ 1030029810: 'MOULOUYA',
12
+ 1030034260: 'NILE',
13
+ 1030040260: 'LAKE TURKANA',
14
+ 1030040300: 'MELRHIR',
15
+ 4030003020: 'LIAO HE',
16
+ 4030006940: 'HAI HE',
17
+ 4030007850: 'YELLOW RIVER',
18
+ 4030009880: 'YANGTZE',
19
+ 4030017020: 'MEKONG',
20
+ 4030017030: 'CHAO PHRAYA',
21
+ 4030018350: 'MAE KLONG',
22
+ 4030022790: 'IRRAWADDY',
23
+ 4030024190: 'KARNAPHULI',
24
+ 4030025450: 'GANGES',
25
+ 4030025460: 'GODAVARI',
26
+ 4030029530: 'NARMADA',
27
+ 4030031750: 'KUTCH',
28
+ 4030033640: 'INDUS',
29
+ 4030039450: 'TONE',
30
+ 4030046370: 'TAIWAN',
31
+ 4030048900: 'SRI LANKA',
32
+ 4030050220: 'UPPER ARAL SEA',
33
+ 4030050240: 'LOWER ARAL SEA',
34
+ 5030007690: 'BENGAWAN SOLO',
35
+ 5030015660: 'KAPUAS',
36
+ 5030031240: 'INDONESIA',
37
+ 5030055010: 'MARIANA_ISLANDS',
38
+ 5030067860: 'FITZROY',
39
+ 2030000010: 'ORONTES',
40
+ 2030003440: 'KIZILIRMAK',
41
+ 2030008490: 'DANUBE',
42
+ 2030009230: 'MARITSA RIVER',
43
+ 2030012730: 'PO',
44
+ 2030014550: 'TEVERE',
45
+ 2030016230: 'RHONE',
46
+ 2030020320: 'RHINE',
47
+ 2030026030: 'VISTULA',
48
+ 2030046500: 'SICILY',
49
+ 2030047500: 'SARDINIA_CORSICA',
50
+ 2030068690: 'KURA',
51
+ 2030073570: 'SHATT AL ARAB',
52
+ 2030085720: 'DEAD SEA',
53
+ 7030000010: 'SANTIAGO',
54
+ 7030008710: 'COLORADO (also COLORADO RIVER)',
55
+ 7030008720: 'SAN JOAQUIN',
56
+ 7030014250: 'ROGUE',
57
+ 7030014930: 'COLUMBIA',
58
+ 7030022240: 'NELSON',
59
+ 7030034520: 'SAINT LAWRENCE (also SAINT-LAURENT)',
60
+ 7030038340: 'SUSQUEHANNA',
61
+ 7030042040: 'MOBILE RIVER',
62
+ 7030047060: 'MISSISSIPPI',
63
+ 7030047840: 'BRAZOS',
64
+ 7030049270: 'RIO GRANDE (also BRAVO)',
65
+ 7030049280: 'USUMACINTA',
66
+ 7030073620: 'GREAT SALT LAKE',
67
+ 6030000010: 'ATRATO',
68
+ 6030004470: 'ORINOCO',
69
+ 6030007000: 'AMAZON (also AMAZONAS)',
70
+ 6030009770: 'JAGUARIBE',
71
+ 6030011780: 'SAO FRANCISCO',
72
+ 6030011790: 'DOCE',
73
+ 6030016970: 'PARANA',
74
+ 6030029280: 'CHIRA',
75
+ 6030032290: 'GUAYAS'}
76
+
77
+ def get_basin_name(basin_id):
78
+ return ID_to_BASIN[basin_id]
@@ -1,10 +1,14 @@
1
+ import os
1
2
  import pandas as pd
2
3
  import numpy as np
3
4
  import xarray as xr
5
+ import rioxarray
4
6
  import geopandas as gpd
5
7
  import regionmask
8
+ import rasterio.features
6
9
  from collections import Counter
7
10
  from shapely.geometry import Polygon
11
+ from tqdm import tqdm
8
12
 
9
13
 
10
14
  def compute_catchment_aggregate(
@@ -144,4 +148,208 @@ def compute_catchment_aggregate(
144
148
  else:
145
149
  raise ValueError(f"Unknown function type '{func}' for variable '{var}'")
146
150
 
147
- return pd.DataFrame([results])
151
+ return pd.DataFrame([results])
152
+
153
+ def compute_catchment_aggregate_multi_reservoir(
154
+ netcdf_path,
155
+ catchments_gdf: gpd.GeoDataFrame,
156
+ function_type="mean",
157
+ idx_col: str = "idx",
158
+ ) -> pd.DataFrame:
159
+ """
160
+ Compute catchment-based aggregated features for MULTIPLE catchments
161
+ from a single NetCDF file (opened only once).
162
+
163
+ Parameters
164
+ ----------
165
+ netcdf_path : str or Path
166
+ Path to the NetCDF file containing raster variables.
167
+
168
+ catchments_gdf : geopandas.GeoDataFrame
169
+ GeoDataFrame containing multiple catchment geometries.
170
+ Must include:
171
+ - geometry column
172
+ - idx_col identifying each reservoir
173
+
174
+ function_type : str or dict, default="mean"
175
+ Aggregation rule(s), same semantics as compute_catchment_aggregate().
176
+
177
+ idx_col : str, default="idx"
178
+ Column identifying reservoir index.
179
+
180
+ Returns
181
+ -------
182
+ pd.DataFrame
183
+ DataFrame indexed by idx with aggregated features.
184
+ """
185
+
186
+ ds = xr.open_dataset(netcdf_path, chunks={"x": 200, "y": 200})
187
+ ds = ds.rio.write_crs("EPSG:4326")
188
+
189
+ if "lon" in ds.dims and "lat" in ds.dims:
190
+ ds = ds.rename({"lon": "x", "lat": "y"})
191
+
192
+ variables = list(ds.data_vars)
193
+
194
+ if isinstance(function_type, str):
195
+ apply_func = {v: function_type for v in variables}
196
+ elif isinstance(function_type, dict):
197
+ apply_func = function_type
198
+ else:
199
+ raise ValueError("function_type must be a string or a dictionary.")
200
+
201
+ # Determine coordinate ordering for slicing
202
+ y_order = "descending" if ds.y[0] > ds.y[-1] else "ascending"
203
+ x_order = "descending" if ds.x[0] > ds.x[-1] else "ascending"
204
+
205
+ results = []
206
+
207
+ # ---- Loop over catchments (cheap loop now)
208
+ for _, row_gdf in tqdm(
209
+ catchments_gdf.iterrows(),
210
+ total=catchments_gdf.shape[0],
211
+ desc=f"Aggregating static features of catchments related to {os.path.basename(netcdf_path)}",
212
+ unit="catchment",
213
+ ):
214
+ idx = row_gdf[idx_col]
215
+ geom = row_gdf.geometry
216
+
217
+ row = {idx_col: idx}
218
+
219
+ if geom is None or geom.is_empty:
220
+ results.append(row)
221
+ continue
222
+
223
+ # ---- Spatial subset FIRST
224
+ minx, miny, maxx, maxy = geom.bounds
225
+
226
+ dx = float(abs(ds.x[1] - ds.x[0]))
227
+ dy = float(abs(ds.y[1] - ds.y[0]))
228
+ minx -= dx
229
+ maxx += dx
230
+ miny -= dy
231
+ maxy += dy
232
+
233
+ if y_order == "descending":
234
+ y_slice = slice(maxy, miny)
235
+ else:
236
+ y_slice = slice(miny, maxy)
237
+
238
+ if x_order == "descending":
239
+ x_slice = slice(maxx, minx)
240
+ else:
241
+ x_slice = slice(minx, maxx)
242
+
243
+ subset_ds = ds.sel(x=x_slice, y=y_slice)
244
+
245
+ # -- Skip if empty subset
246
+ if subset_ds.x.size == 0 or subset_ds.y.size == 0:
247
+ print(f"Warning: Subset for catchment idx={idx} is empty. Skipping.")
248
+ results.append(row)
249
+ continue
250
+
251
+ # ---- Create mask on SUBSET
252
+ catchment_gdf = gpd.GeoDataFrame(
253
+ {idx_col: [idx]}, geometry=[geom], crs="EPSG:4326"
254
+ )
255
+
256
+ # mask_from_geopandas = regionmask.mask_geopandas(
257
+ # catchment_gdf, subset_ds.x, subset_ds.y
258
+ # )
259
+
260
+ # catchment_mask = mask_from_geopandas == 0
261
+
262
+ # # Skip if no overlap
263
+ # if mask_from_geopandas.notnull().sum().sum().item() == 0:
264
+ # results.append(row)
265
+ # continue
266
+
267
+ transform = subset_ds.rio.transform()
268
+
269
+ mask = rasterio.features.rasterize(
270
+ [(geom, 1)],
271
+ out_shape=(subset_ds.sizes["y"], subset_ds.sizes["x"]),
272
+ transform=transform,
273
+ all_touched=True,
274
+ fill=0,
275
+ dtype="uint8",
276
+ )
277
+
278
+ # --- fallback if rasterized mask is empty ---
279
+ if mask.sum() == 0:
280
+ # Compute centroid in raster CRS
281
+ centroid = geom.centroid
282
+ # Find nearest x, y indices in the subset
283
+ x_idx = np.argmin(np.abs(subset_ds.x.values - centroid.x))
284
+ y_idx = np.argmin(np.abs(subset_ds.y.values - centroid.y))
285
+
286
+ # Create a mask with just the centroid pixel
287
+ mask = np.zeros((subset_ds.sizes["y"], subset_ds.sizes["x"]), dtype=np.uint8)
288
+ mask[y_idx, x_idx] = 1
289
+
290
+ catchment_mask = xr.DataArray(
291
+ mask,
292
+ coords={"y": subset_ds.y, "x": subset_ds.x},
293
+ dims=("y", "x"),
294
+ )
295
+
296
+
297
+ # ---- Aggregate variables
298
+ for var, func_list in apply_func.items():
299
+ data = subset_ds[var]
300
+
301
+ masked = data.where(catchment_mask).compute()
302
+ arr = masked.where(~masked.isnull(), drop=True)
303
+
304
+ if arr.size == 0:
305
+ continue
306
+
307
+ func_list = func_list if isinstance(func_list, list) else [func_list]
308
+
309
+ for func_info in func_list:
310
+ if isinstance(func_info, str):
311
+ func = func_info
312
+ threshold = None
313
+ threshold_direction = None
314
+ elif isinstance(func_info, dict):
315
+ func = func_info.get("type")
316
+ threshold = func_info.get("threshold", None)
317
+ threshold_direction = func_info.get("direction", "greater")
318
+ else:
319
+ raise ValueError(f"Invalid function format for variable {var}")
320
+
321
+ if func == "mean":
322
+ row[f"{var}_mean"] = float(arr.mean().item())
323
+
324
+ elif func == "mode":
325
+ vals = arr.values.flatten()
326
+ row[f"{var}_mode"] = Counter(vals).most_common(1)[0][0]
327
+
328
+ elif func == "std":
329
+ row[f"{var}_std"] = float(arr.std().item())
330
+
331
+ elif func == "percent":
332
+ vals = arr.values.flatten()
333
+ total = len(vals)
334
+ class_counts = Counter(vals)
335
+ for cls, count in class_counts.items():
336
+ row[f"{var}_percent_{int(cls)}"] = (count / total) * 100
337
+
338
+ elif func == "threshold_percent":
339
+ if threshold is None:
340
+ raise ValueError(f"Threshold not provided for variable '{var}'")
341
+ vals = arr.values.flatten()
342
+ valid = vals[~np.isnan(vals)]
343
+ if threshold_direction == "greater":
344
+ percent = (valid > threshold).sum() / len(valid) * 100
345
+ row[f"{var}_percent_above_{threshold}"] = percent
346
+ else:
347
+ percent = (valid < threshold).sum() / len(valid) * 100
348
+ row[f"{var}_percent_below_{threshold}"] = percent
349
+
350
+ else:
351
+ raise ValueError(f"Unknown function type '{func}' for variable '{var}'")
352
+
353
+ results.append(row)
354
+
355
+ return pd.DataFrame(results).set_index(idx_col)
@@ -2,6 +2,9 @@ import numpy as np
2
2
  import pandas as pd
3
3
  import geopandas as gpd
4
4
  import networkx as nx # NetworkX library import
5
+ import matplotlib.pyplot as plt
6
+ from matplotlib.lines import Line2D
7
+ from matplotlib.patches import Patch
5
8
 
6
9
  from shapely.geometry import shape, Point, LineString, Polygon, GeometryCollection, MultiLineString, MultiPolygon
7
10
  from shapely.ops import transform, split, linemerge, unary_union
@@ -452,4 +455,65 @@ def find_actual_flow_path(dam_point, reservoir_polygon, inlet_point=None, resolu
452
455
  # Step4: Find the shortest path within geometry between dam_point and far_end_point
453
456
  path, graph = create_continuous_linestring(dam_point,far_end_point, simplified_reservoir, optimal_resolution)
454
457
 
455
- return simplified_reservoir,far_end_point, path, graph
458
+ return simplified_reservoir,far_end_point, path, graph
459
+
460
+ def plot_flow_length_with_reservoir(dam, reservoir, farthest_point, actual_flow_path, simplified_reservoir, save_path):
461
+ fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(12, 6))
462
+
463
+ # Define plot elements
464
+ geoms = {
465
+ "Dam Point": {"geometry": dam, "color": "green", "marker": "^"},
466
+ "Reservoir": {"geometry": reservoir, "color": "aqua", "edgecolor": "#00008B"},
467
+ "Farthest Point": {"geometry": farthest_point, "color": "red", "marker": "x"},
468
+ "Flow Path": {"geometry": actual_flow_path, "color": "blue"},
469
+ }
470
+
471
+ geoms_simplified = {
472
+ "Dam Point": {"geometry": dam, "color": "green", "marker": "^"},
473
+ "Reservoir (Simplified)": {"geometry": simplified_reservoir, "color": "aqua", "edgecolor": "#00008B"},
474
+ "Farthest Point": {"geometry": farthest_point, "color": "red", "marker": "x"},
475
+ "Flow Path": {"geometry": actual_flow_path, "color": "blue"},
476
+ }
477
+
478
+ # Plot original
479
+ for label, item in geoms.items():
480
+ gdf = gpd.GeoDataFrame(geometry=[item["geometry"]])
481
+ if item["geometry"].geom_type == "Point":
482
+ gdf.plot(ax=ax[0], color=item["color"], marker=item.get("marker", "o"), zorder=3, markersize=90)
483
+ elif item["geometry"].geom_type == "Polygon" or item["geometry"].geom_type == "MultiPolygon":
484
+ gdf.plot(ax=ax[0], color=item["color"], edgecolor=item['edgecolor'], zorder=1)
485
+ else:
486
+ gdf.plot(ax=ax[0], color=item["color"],zorder=2)
487
+
488
+ # Plot simplified
489
+ for label, item in geoms_simplified.items():
490
+ gdf = gpd.GeoDataFrame(geometry=[item["geometry"]])
491
+ if item["geometry"].geom_type == "Point":
492
+ gdf.plot(ax=ax[1], color=item["color"], marker=item.get("marker", "o"), zorder=3, markersize=90)
493
+ elif item["geometry"].geom_type == "Polygon" or item["geometry"].geom_type == "MultiPolygon":
494
+ gdf.plot(ax=ax[1], color=item["color"], edgecolor=item['edgecolor'], zorder=1)
495
+ else:
496
+ gdf.plot(ax=ax[1], color=item["color"], zorder=2)
497
+
498
+ # Custom handles
499
+ custom_handles = [
500
+ Line2D([0], [0], marker='^', color='w', label='Dam', markerfacecolor='green', markersize=12),
501
+ Patch(facecolor='aqua', edgecolor='#00008B', label='Reservoir'),
502
+ Line2D([0], [0], marker='x', color='red', label='Reservoir inlet', markersize=8),
503
+ Line2D([0], [0], color='blue', lw=2, label='Shortest Flow Path')
504
+ ]
505
+
506
+ # Set axis labels and titles
507
+ for a in ax:
508
+ a.set_xlabel("Longitude")
509
+ a.set_ylabel("Latitude")
510
+ ax[0].set_title('Original Geometry')
511
+ ax[1].set_title('Simplified Geometry')
512
+
513
+ # Add one shared legend
514
+ fig.legend(handles=custom_handles, loc='lower center', ncol=4, bbox_to_anchor=(0.5, -0.05))
515
+
516
+ plt.tight_layout()
517
+ plt.savefig(save_path, bbox_inches='tight', dpi=300)
518
+ plt.close()
519
+
@@ -1,27 +0,0 @@
1
- pyreclaim-0.3.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
2
- reclaim/__init__.py,sha256=PnNGOrv5as3H8_WWLE3QqHR2LnqMgutXD8lDA-8PnCE,33
3
- reclaim/generate_features.py,sha256=MsHC9POhji9zMAr-OIbaDrIkYiIlO2D5z3hQmRN2BcY,6164
4
- reclaim/reclaim.py,sha256=RVaEmnojfJm6iS8kfxXfxD4SefSDcbC1QlarbSWCtuo,19815
5
- reclaim/derived_features/__init__.py,sha256=boN0ilez7nbWdn4RKD4n7whlGrg_X-zxrqMv5RA6cmg,67
6
- reclaim/derived_features/feature_engineering_and_transformation.py,sha256=E7hzrrWgYYtzCv6S1EWi-bBQdxF52sB26v13XVnq8qU,3234
7
- reclaim/dynamic_features/__init__.py,sha256=gjZmRYl9zCccSenz6f38EKntrV6ueT0_9MHRrjtw2Og,45
8
- reclaim/dynamic_features/catchment_dynamic.py,sha256=BHRci-Hh4xZ-UviAXKS3oPzGonzw81GQ4DUUOamUF7I,3273
9
- reclaim/dynamic_features/reservoir_dynamic.py,sha256=YMd2aiSZEhAk5-qPzLK4Ondpm5Hi6tcAeM_nc4dW-fo,5318
10
- reclaim/dynamic_features/utils/__init__.py,sha256=_mTSUeDu1jG5oBHvDKkMPx2A3xtIuSCpGpcMIURzmkU,89
11
- reclaim/dynamic_features/utils/catchment_meteorology.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
- reclaim/dynamic_features/utils/inflow_outflow.py,sha256=vD5CTAR12gCd56y8zDpzKfzJvG_qU_uJFD5OAajyb-g,2384
13
- reclaim/dynamic_features/utils/rainfall.py,sha256=oWn1RbeHHmhZ--N7bx9UiJptRrPynYGXe1HTfPjHjWg,1387
14
- reclaim/dynamic_features/utils/statistical_metrics.py,sha256=qtiLtFCqtOa6sDkpCmZmGsS_kub4rkYqUKgSEODZea4,4881
15
- reclaim/dynamic_features/utils/ts_aggregate.py,sha256=4qDLgViE51ppyrxzXIh5GQ_z94liHZcfBSNADiUGnGk,2208
16
- reclaim/static_features/__init__.py,sha256=QRDAk3sgrGX-Oq8oRY5RESkWniVhcAPoQUa6jDJ59Zo,44
17
- reclaim/static_features/catchment_static.py,sha256=vr3ZupzvaEkCn5f7g9AJp5-VfBGv5yPaMnJ33IHP9qU,3790
18
- reclaim/static_features/reservoir_static.py,sha256=BNtZOeLHdT324K0VsduATnTLvcYkdTcV1TZjizkkQ9g,3188
19
- reclaim/static_features/utils/__init__.py,sha256=y-4GVIqARI0g8s2FVlTfx_4XprcPQ9HP_2nbhSw1NVA,88
20
- reclaim/static_features/utils/aec_shape.py,sha256=6N0MbbEpVJLQ6AD_LE0sbxuaUTm7Ul9bK2-Ml_krA7Q,3262
21
- reclaim/static_features/utils/area_perimeter.py,sha256=AJlTQ2NZbIfTOSYTG01UdwksutkT9IykA-4uvn4w8Qg,1341
22
- reclaim/static_features/utils/catchment_agreggate.py,sha256=tooDg_-vA6eamUR3iR4g-NcQGnsQYfSE4kH8iyjK9Xo,5051
23
- reclaim/static_features/utils/flow_length.py,sha256=VC4rUrmAxrutj4Cj5bTjlH3KXLRmwNjp04FCT74_LUg,16939
24
- pyreclaim-0.3.0.dist-info/METADATA,sha256=aHqNiLOVfkRWCYxBYvunXr0MNpX4vunewa4-Rl4yt_0,45381
25
- pyreclaim-0.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
26
- pyreclaim-0.3.0.dist-info/top_level.txt,sha256=uO95g-XnD3UQLLqi3q30muhaC9VqO04IqCbwmfsGmW4,8
27
- pyreclaim-0.3.0.dist-info/RECORD,,