pyreclaim 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,97 @@
1
+ import pandas as pd
2
+ from shapely.geometry import Point, Polygon
3
+
4
+ # Import utils
5
+ from reclaim.static_features.utils.flow_length import find_actual_flow_path
6
+ from reclaim.static_features.utils.area_perimeter import calculate_length_area_meters
7
+ from reclaim.static_features.utils.aec_shape import concavity_index, mean_curvature, mean_slope
8
+
9
+
10
+ def reservoir_based_static_features(
11
+ obc: float = None,
12
+ hgt: float = None,
13
+ mrb: str = None,
14
+ lat: float = None,
15
+ lon: float = None,
16
+ reservoir_polygon: Polygon = None,
17
+ inlet_point: Point = None,
18
+ resolution: float = None,
19
+ aec_df: pd.DataFrame = None
20
+ ) -> pd.DataFrame:
21
+ """
22
+ Compute reservoir-based features for RECLAIM input dataset.
23
+
24
+ Parameters
25
+ ----------
26
+ obc : float, optional
27
+ Original Built Capacity (MCM), original design capacity of the reservoir.
28
+ hgt : float, optional
29
+ Dam height (meters).
30
+ mrb : str, optional
31
+ Major river basin name.
32
+ lat : float, optional
33
+ Latitude of dam location (degrees).
34
+ lon : float, optional
35
+ Longitude of dam location (degrees).
36
+ reservoir_polygon : shapely.geometry.Polygon, optional
37
+ Reservoir polygon geometry used to compute area and perimeter.
38
+ dam_point : shapely.geometry.Point, optional
39
+ Location of the dam.
40
+ inlet_point : shapely.geometry.Point, optional
41
+ Reservoir inlet location (if not provided, estimated internally).
42
+ resolution : float, optional
43
+ Spatial resolution used in flow length calculations.
44
+ aec_df : pd.DataFrame, optional
45
+ Area-Elevation Curve dataframe with columns ['area', 'elevation'].
46
+
47
+ Returns
48
+ -------
49
+ pd.DataFrame
50
+ A single-row DataFrame with the following columns:
51
+ - OBC: Original Built Capacity (MCM)
52
+ - HGT: Dam Height (m)
53
+ - MRB: Major River Basin
54
+ - LAT: Latitude (deg)
55
+ - LON: Longitude (deg)
56
+ - RA: Reservoir Area (sq km)
57
+ - RP: Reservoir Perimeter (km)
58
+ - FL: Flow Length (km)
59
+ - AECS: AEC Mean Slope (km2/m)
60
+ - AECC: AEC Mean Curvature (km2/m2)
61
+ - AECI: AEC Concavity Index (DL)
62
+ """
63
+
64
+ features = {
65
+ "OBC": obc,
66
+ "HGT": hgt,
67
+ "MRB": mrb,
68
+ "LAT": lat,
69
+ "LON": lon,
70
+ "RA": None,
71
+ "RP": None,
72
+ "FL": None,
73
+ "AECS": None,
74
+ "AECC": None,
75
+ "AECI": None,
76
+ }
77
+
78
+ # Area and Perimeter
79
+ if reservoir_polygon is not None:
80
+ features["RP"], features["RA"] = calculate_length_area_meters(reservoir_polygon, area=True)
81
+ features["RA"] = features["RA"] / 1e6 # m2 → km2
82
+ features["RP"] = features["RP"] / 1e3 # m → km
83
+
84
+ # Flow Length
85
+ dam_point = Point(lon, lat)
86
+ if dam_point is not None and reservoir_polygon is not None:
87
+ _, _, features["FL"], _ = (
88
+ find_actual_flow_path(dam_point, reservoir_polygon, inlet_point, resolution) / 1e3
89
+ ) # m → km
90
+
91
+ # AEC metrics
92
+ if aec_df is not None:
93
+ features["AECS"] = mean_slope(aec_df)
94
+ features["AECC"] = mean_curvature(aec_df)
95
+ features["AECI"] = concavity_index(aec_df)
96
+
97
+ return pd.DataFrame([features])
@@ -0,0 +1 @@
1
+ """Utility functions to generate static features for RECLAIM input dataset :no-index:"""
@@ -0,0 +1,101 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+
4
+ def mean_slope(df: pd.DataFrame) -> float:
5
+ """
6
+ Computes the mean slope (dA/dz) of reservoir bathymetry
7
+ from area–elevation data.
8
+ """
9
+ if not {'elevation', 'area'}.issubset(df.columns):
10
+ raise ValueError("DataFrame must have columns: 'elevation', 'area'")
11
+ if df.empty:
12
+ return float('nan')
13
+
14
+ # Sort and drop duplicate elevations (keep mean area if duplicates exist)
15
+ df = df.groupby("elevation", as_index=False)["area"].mean()
16
+ df = df.sort_values("elevation").reset_index(drop=True)
17
+
18
+ # Compute slope (Δarea/Δelevation)
19
+ dz = np.diff(df['elevation'])
20
+ da = np.diff(df['area'])
21
+
22
+ # Avoid division by zero
23
+ with np.errstate(divide='ignore', invalid='ignore'):
24
+ slopes = np.where(dz != 0, da / dz, np.nan)
25
+
26
+ return float(np.nanmean(slopes))
27
+
28
+
29
+ def mean_curvature(df: pd.DataFrame) -> float:
30
+ """
31
+ Computes the mean curvature (d²A/dz²) of reservoir bathymetry
32
+ from area–elevation data.
33
+ """
34
+ if not {'elevation', 'area'}.issubset(df.columns):
35
+ raise ValueError("DataFrame must have columns: 'elevation', 'area'")
36
+ if df.empty:
37
+ return float('nan')
38
+
39
+ # Sort and drop duplicate elevations (keep mean area if duplicates exist)
40
+ df = df.groupby("elevation", as_index=False)["area"].mean()
41
+ df = df.sort_values("elevation").reset_index(drop=True)
42
+
43
+ dz = np.diff(df['elevation'])
44
+ da = np.diff(df['area'])
45
+
46
+ # First derivative
47
+ with np.errstate(divide='ignore', invalid='ignore'):
48
+ slopes = np.where(dz != 0, da / dz, np.nan)
49
+
50
+ # Second derivative (curvature)
51
+ dz2 = np.diff(df['elevation'][:-1])
52
+ dslopes = np.diff(slopes)
53
+ with np.errstate(divide='ignore', invalid='ignore'):
54
+ curvature = np.where(dz2 != 0, dslopes / dz2, np.nan)
55
+
56
+ return float(np.nanmean(curvature))
57
+
58
+ def concavity_index(df: pd.DataFrame) -> float:
59
+ """
60
+ Computes the concavity index from a reservoir's area-elevation curve.
61
+
62
+ Parameters
63
+ ----------
64
+ df : pd.DataFrame
65
+ Must contain columns 'area' (km²) and 'elevation' (m).
66
+
67
+ Returns
68
+ -------
69
+ float
70
+ Concavity index (ratio of actual curve to straight line).
71
+ Returns np.nan if invalid.
72
+ """
73
+ if not {'area', 'elevation'}.issubset(df.columns):
74
+ raise ValueError("DataFrame must have columns: 'area' and 'elevation'")
75
+ if df.empty:
76
+ return np.nan
77
+
78
+ # Convert to numpy and filter invalid values
79
+ area = df['area'].to_numpy(dtype=float)
80
+ elevation = df['elevation'].to_numpy(dtype=float)
81
+
82
+ mask = (~np.isnan(area)) & (~np.isnan(elevation)) & (~np.isinf(area)) & (~np.isinf(elevation))
83
+ area = area[mask]
84
+ elevation = elevation[mask]
85
+
86
+ if len(area) < 2:
87
+ return np.nan
88
+
89
+ # Normalize to 0–1
90
+ area_norm = (area - area.min()) / (area.max() - area.min())
91
+ elev_norm = (elevation - elevation.min()) / (elevation.max() - elevation.min())
92
+
93
+ # Straight line between first and last point
94
+ line = np.linspace(0, 1, len(area_norm))
95
+
96
+ # Area under actual curve vs line
97
+ auc_curve = np.trapz(elev_norm, area_norm)
98
+ auc_line = np.trapz(line, area_norm)
99
+
100
+ concavity = auc_curve / auc_line if auc_line > 0 else np.nan
101
+ return concavity
@@ -0,0 +1,36 @@
1
+ import numpy as np
2
+ from shapely.ops import transform
3
+
4
+ def calculate_length_area_meters(geometry, area= True):
5
+ """
6
+ Calculate the length and area of a geometry in meters and square meters using
7
+ approximate conversion factors based on the geometry's centroid latitude.
8
+
9
+ Parameters:
10
+ geometry (shapely.geometry): Geometry in WGS84 (EPSG:4326) to calculate length and area.
11
+
12
+ Returns:
13
+ tuple: (length in meters, area in square meters)
14
+ """
15
+ # Get centroid for latitude reference
16
+ centroid = geometry.centroid
17
+ reference_latitude = centroid.y # latitude of the centroid
18
+
19
+ # Conversion factors
20
+ lat_factor = 111_000 # meters per degree latitude
21
+ lon_factor = 111_320 * np.cos(np.radians(reference_latitude)) # meters per degree longitude at given latitude
22
+
23
+ # Transform function to scale degrees to meters
24
+ def scale_degrees_to_meters(x, y):
25
+ return (x * lon_factor, y * lat_factor)
26
+
27
+ # Scale geometry from degrees to meters using the conversion factors
28
+ scaled_geometry = transform(scale_degrees_to_meters, geometry)
29
+
30
+ # Calculate length and area in meters and square meters
31
+ length_meters = scaled_geometry.length
32
+ if area:
33
+ area_square_meters = scaled_geometry.area
34
+ return length_meters, area_square_meters
35
+ else:
36
+ return length_meters[0]
@@ -0,0 +1,147 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ import xarray as xr
4
+ import geopandas as gpd
5
+ import regionmask
6
+ from collections import Counter
7
+ from shapely.geometry import Polygon
8
+
9
+
10
+ def compute_catchment_aggregate(
11
+ netcdf_path,
12
+ catchment_geometry,
13
+ function_type="mean" # Can be 'mean', 'mode', 'std', 'percent' or dict
14
+ ) -> pd.DataFrame:
15
+ """
16
+ Compute catchment-based features by aggregating raster variables in a NetCDF file
17
+ for a single catchment geometry.
18
+
19
+ Parameters
20
+ ----------
21
+ netcdf_path : str or Path
22
+ Path to the NetCDF file containing raster variables.
23
+
24
+ catchment_geometry : shapely.geometry.Polygon or GeoSeries
25
+ Catchment geometry (single polygon).
26
+
27
+ function_type : str or dict, default="mean"
28
+ Either a string ('mean', 'mode', 'std', 'percent') to apply to all variables,
29
+ or a dictionary specifying function(s) per variable. Example::
30
+
31
+ {
32
+ "precip": "mean",
33
+ "slope": ["mean", "std"],
34
+ "landcover": {"type": "percent"}
35
+ }
36
+
37
+ Returns
38
+ -------
39
+ pd.DataFrame
40
+ A single-row DataFrame with catchment-level features.
41
+ """
42
+
43
+ # Open dataset
44
+ ds = xr.open_dataset(netcdf_path, chunks={'x': 200, 'y': 200})
45
+ ds = ds.rio.write_crs("EPSG:4326")
46
+
47
+ # Rename coords if needed
48
+ if 'lon' in ds.dims and 'lat' in ds.dims:
49
+ ds = ds.rename({'lon': 'x', 'lat': 'y'})
50
+
51
+ variables = list(ds.data_vars)
52
+
53
+ # Build function dict
54
+ if isinstance(function_type, str):
55
+ apply_func = {var: function_type for var in variables}
56
+ elif isinstance(function_type, dict):
57
+ apply_func = function_type
58
+ else:
59
+ raise ValueError("function_type must be a string or a dictionary.")
60
+
61
+ # Order check
62
+ y_order = "descending" if ds.y[0] > ds.y[-1] else "ascending"
63
+ x_order = "descending" if ds.x[0] > ds.x[-1] else "ascending"
64
+
65
+ # Get catchment bounds
66
+ minx, miny, maxx, maxy = catchment_geometry.bounds
67
+ if y_order == "descending":
68
+ y_slice = slice(maxy, miny)
69
+ else:
70
+ y_slice = slice(miny, maxy)
71
+ if x_order == "descending":
72
+ x_slice = slice(maxx, minx)
73
+ else:
74
+ x_slice = slice(minx, maxx)
75
+
76
+ # Subset dataset
77
+ subset_ds = ds.sel(x=x_slice, y=y_slice)
78
+
79
+ # Create mask
80
+ catchment_gdf = gpd.GeoDataFrame({"geometry": [catchment_geometry]}, crs="EPSG:4326")
81
+ mask_from_geopandas = regionmask.mask_geopandas(catchment_gdf, subset_ds.x, subset_ds.y)
82
+ catchment_mask = mask_from_geopandas == 0
83
+
84
+ if mask_from_geopandas.notnull().sum().sum().item() == 0:
85
+ raise ValueError("Catchment mask is empty — geometry may not overlap the raster.")
86
+
87
+ results = {}
88
+
89
+ # Loop over variables
90
+ for var in apply_func.keys():
91
+ data = subset_ds[var]
92
+ masked = data.where(catchment_mask).compute()
93
+ arr = masked.where(~masked.isnull(), drop=True)
94
+
95
+ # Skip if empty
96
+ if arr.size == 0:
97
+ continue
98
+
99
+ func_list = apply_func[var]
100
+ if not isinstance(func_list, list):
101
+ func_list = [func_list] # wrap into list
102
+
103
+ for func_info in func_list:
104
+ if isinstance(func_info, str):
105
+ func = func_info
106
+ threshold = None
107
+ threshold_direction = None
108
+ elif isinstance(func_info, dict):
109
+ func = func_info.get("type")
110
+ threshold = func_info.get("threshold", None)
111
+ threshold_direction = func_info.get("direction", "greater")
112
+ else:
113
+ raise ValueError(f"Invalid function format for variable {var}")
114
+
115
+ if func == "mean":
116
+ results[f"{var}_mean"] = float(arr.mean().item())
117
+
118
+ elif func == "mode":
119
+ vals = arr.values.flatten()
120
+ results[f"{var}_mode"] = Counter(vals).most_common(1)[0][0]
121
+
122
+ elif func == "std":
123
+ results[f"{var}_std"] = float(arr.std().item())
124
+
125
+ elif func == "percent":
126
+ vals = arr.values.flatten()
127
+ total = len(vals)
128
+ class_counts = Counter(vals)
129
+ for cls, count in class_counts.items():
130
+ results[f"{var}_percent_{int(cls)}"] = (count / total) * 100
131
+
132
+ elif func == "threshold_percent":
133
+ if threshold is None:
134
+ raise ValueError(f"Threshold not provided for variable '{var}'")
135
+ vals = arr.values.flatten()
136
+ valid = vals[~np.isnan(vals)]
137
+ if threshold_direction == "greater":
138
+ percent = (valid > threshold).sum() / len(valid) * 100
139
+ results[f"{var}_percent_above_{threshold}"] = percent
140
+ else:
141
+ percent = (valid < threshold).sum() / len(valid) * 100
142
+ results[f"{var}_percent_below_{threshold}"] = percent
143
+
144
+ else:
145
+ raise ValueError(f"Unknown function type '{func}' for variable '{var}'")
146
+
147
+ return pd.DataFrame([results])