pyreclaim 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyreclaim-0.1.0.dist-info/METADATA +803 -0
- pyreclaim-0.1.0.dist-info/RECORD +27 -0
- pyreclaim-0.1.0.dist-info/WHEEL +5 -0
- pyreclaim-0.1.0.dist-info/licenses/LICENSE +674 -0
- pyreclaim-0.1.0.dist-info/top_level.txt +1 -0
- reclaim/__init__.py +1 -0
- reclaim/derived_features/__init__.py +1 -0
- reclaim/derived_features/feature_engineering_and_transformation.py +75 -0
- reclaim/dynamic_features/__init__.py +1 -0
- reclaim/dynamic_features/catchment_dynamic.py +103 -0
- reclaim/dynamic_features/reservoir_dynamic.py +148 -0
- reclaim/dynamic_features/utils/__init__.py +1 -0
- reclaim/dynamic_features/utils/catchment_meteorology.py +0 -0
- reclaim/dynamic_features/utils/inflow_outflow.py +95 -0
- reclaim/dynamic_features/utils/rainfall.py +49 -0
- reclaim/dynamic_features/utils/statistical_metrics.py +190 -0
- reclaim/dynamic_features/utils/ts_aggregate.py +63 -0
- reclaim/generate_features.py +141 -0
- reclaim/reclaim.py +503 -0
- reclaim/static_features/__init__.py +1 -0
- reclaim/static_features/catchment_static.py +127 -0
- reclaim/static_features/reservoir_static.py +97 -0
- reclaim/static_features/utils/__init__.py +1 -0
- reclaim/static_features/utils/aec_shape.py +101 -0
- reclaim/static_features/utils/area_perimeter.py +36 -0
- reclaim/static_features/utils/catchment_agreggate.py +147 -0
- reclaim/static_features/utils/flow_length.py +455 -0
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
from shapely.geometry import Point, Polygon
|
|
3
|
+
|
|
4
|
+
# Import utils
|
|
5
|
+
from reclaim.static_features.utils.flow_length import find_actual_flow_path
|
|
6
|
+
from reclaim.static_features.utils.area_perimeter import calculate_length_area_meters
|
|
7
|
+
from reclaim.static_features.utils.aec_shape import concavity_index, mean_curvature, mean_slope
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def reservoir_based_static_features(
|
|
11
|
+
obc: float = None,
|
|
12
|
+
hgt: float = None,
|
|
13
|
+
mrb: str = None,
|
|
14
|
+
lat: float = None,
|
|
15
|
+
lon: float = None,
|
|
16
|
+
reservoir_polygon: Polygon = None,
|
|
17
|
+
inlet_point: Point = None,
|
|
18
|
+
resolution: float = None,
|
|
19
|
+
aec_df: pd.DataFrame = None
|
|
20
|
+
) -> pd.DataFrame:
|
|
21
|
+
"""
|
|
22
|
+
Compute reservoir-based features for RECLAIM input dataset.
|
|
23
|
+
|
|
24
|
+
Parameters
|
|
25
|
+
----------
|
|
26
|
+
obc : float, optional
|
|
27
|
+
Original Built Capacity (MCM), original design capacity of the reservoir.
|
|
28
|
+
hgt : float, optional
|
|
29
|
+
Dam height (meters).
|
|
30
|
+
mrb : str, optional
|
|
31
|
+
Major river basin name.
|
|
32
|
+
lat : float, optional
|
|
33
|
+
Latitude of dam location (degrees).
|
|
34
|
+
lon : float, optional
|
|
35
|
+
Longitude of dam location (degrees).
|
|
36
|
+
reservoir_polygon : shapely.geometry.Polygon, optional
|
|
37
|
+
Reservoir polygon geometry used to compute area and perimeter.
|
|
38
|
+
dam_point : shapely.geometry.Point, optional
|
|
39
|
+
Location of the dam.
|
|
40
|
+
inlet_point : shapely.geometry.Point, optional
|
|
41
|
+
Reservoir inlet location (if not provided, estimated internally).
|
|
42
|
+
resolution : float, optional
|
|
43
|
+
Spatial resolution used in flow length calculations.
|
|
44
|
+
aec_df : pd.DataFrame, optional
|
|
45
|
+
Area-Elevation Curve dataframe with columns ['area', 'elevation'].
|
|
46
|
+
|
|
47
|
+
Returns
|
|
48
|
+
-------
|
|
49
|
+
pd.DataFrame
|
|
50
|
+
A single-row DataFrame with the following columns:
|
|
51
|
+
- OBC: Original Built Capacity (MCM)
|
|
52
|
+
- HGT: Dam Height (m)
|
|
53
|
+
- MRB: Major River Basin
|
|
54
|
+
- LAT: Latitude (deg)
|
|
55
|
+
- LON: Longitude (deg)
|
|
56
|
+
- RA: Reservoir Area (sq km)
|
|
57
|
+
- RP: Reservoir Perimeter (km)
|
|
58
|
+
- FL: Flow Length (km)
|
|
59
|
+
- AECS: AEC Mean Slope (km2/m)
|
|
60
|
+
- AECC: AEC Mean Curvature (km2/m2)
|
|
61
|
+
- AECI: AEC Concavity Index (DL)
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
features = {
|
|
65
|
+
"OBC": obc,
|
|
66
|
+
"HGT": hgt,
|
|
67
|
+
"MRB": mrb,
|
|
68
|
+
"LAT": lat,
|
|
69
|
+
"LON": lon,
|
|
70
|
+
"RA": None,
|
|
71
|
+
"RP": None,
|
|
72
|
+
"FL": None,
|
|
73
|
+
"AECS": None,
|
|
74
|
+
"AECC": None,
|
|
75
|
+
"AECI": None,
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
# Area and Perimeter
|
|
79
|
+
if reservoir_polygon is not None:
|
|
80
|
+
features["RP"], features["RA"] = calculate_length_area_meters(reservoir_polygon, area=True)
|
|
81
|
+
features["RA"] = features["RA"] / 1e6 # m2 → km2
|
|
82
|
+
features["RP"] = features["RP"] / 1e3 # m → km
|
|
83
|
+
|
|
84
|
+
# Flow Length
|
|
85
|
+
dam_point = Point(lon, lat)
|
|
86
|
+
if dam_point is not None and reservoir_polygon is not None:
|
|
87
|
+
_, _, features["FL"], _ = (
|
|
88
|
+
find_actual_flow_path(dam_point, reservoir_polygon, inlet_point, resolution) / 1e3
|
|
89
|
+
) # m → km
|
|
90
|
+
|
|
91
|
+
# AEC metrics
|
|
92
|
+
if aec_df is not None:
|
|
93
|
+
features["AECS"] = mean_slope(aec_df)
|
|
94
|
+
features["AECC"] = mean_curvature(aec_df)
|
|
95
|
+
features["AECI"] = concavity_index(aec_df)
|
|
96
|
+
|
|
97
|
+
return pd.DataFrame([features])
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Utility functions to generate static features for RECLAIM input dataset :no-index:"""
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
|
|
4
|
+
def mean_slope(df: pd.DataFrame) -> float:
|
|
5
|
+
"""
|
|
6
|
+
Computes the mean slope (dA/dz) of reservoir bathymetry
|
|
7
|
+
from area–elevation data.
|
|
8
|
+
"""
|
|
9
|
+
if not {'elevation', 'area'}.issubset(df.columns):
|
|
10
|
+
raise ValueError("DataFrame must have columns: 'elevation', 'area'")
|
|
11
|
+
if df.empty:
|
|
12
|
+
return float('nan')
|
|
13
|
+
|
|
14
|
+
# Sort and drop duplicate elevations (keep mean area if duplicates exist)
|
|
15
|
+
df = df.groupby("elevation", as_index=False)["area"].mean()
|
|
16
|
+
df = df.sort_values("elevation").reset_index(drop=True)
|
|
17
|
+
|
|
18
|
+
# Compute slope (Δarea/Δelevation)
|
|
19
|
+
dz = np.diff(df['elevation'])
|
|
20
|
+
da = np.diff(df['area'])
|
|
21
|
+
|
|
22
|
+
# Avoid division by zero
|
|
23
|
+
with np.errstate(divide='ignore', invalid='ignore'):
|
|
24
|
+
slopes = np.where(dz != 0, da / dz, np.nan)
|
|
25
|
+
|
|
26
|
+
return float(np.nanmean(slopes))
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def mean_curvature(df: pd.DataFrame) -> float:
|
|
30
|
+
"""
|
|
31
|
+
Computes the mean curvature (d²A/dz²) of reservoir bathymetry
|
|
32
|
+
from area–elevation data.
|
|
33
|
+
"""
|
|
34
|
+
if not {'elevation', 'area'}.issubset(df.columns):
|
|
35
|
+
raise ValueError("DataFrame must have columns: 'elevation', 'area'")
|
|
36
|
+
if df.empty:
|
|
37
|
+
return float('nan')
|
|
38
|
+
|
|
39
|
+
# Sort and drop duplicate elevations (keep mean area if duplicates exist)
|
|
40
|
+
df = df.groupby("elevation", as_index=False)["area"].mean()
|
|
41
|
+
df = df.sort_values("elevation").reset_index(drop=True)
|
|
42
|
+
|
|
43
|
+
dz = np.diff(df['elevation'])
|
|
44
|
+
da = np.diff(df['area'])
|
|
45
|
+
|
|
46
|
+
# First derivative
|
|
47
|
+
with np.errstate(divide='ignore', invalid='ignore'):
|
|
48
|
+
slopes = np.where(dz != 0, da / dz, np.nan)
|
|
49
|
+
|
|
50
|
+
# Second derivative (curvature)
|
|
51
|
+
dz2 = np.diff(df['elevation'][:-1])
|
|
52
|
+
dslopes = np.diff(slopes)
|
|
53
|
+
with np.errstate(divide='ignore', invalid='ignore'):
|
|
54
|
+
curvature = np.where(dz2 != 0, dslopes / dz2, np.nan)
|
|
55
|
+
|
|
56
|
+
return float(np.nanmean(curvature))
|
|
57
|
+
|
|
58
|
+
def concavity_index(df: pd.DataFrame) -> float:
|
|
59
|
+
"""
|
|
60
|
+
Computes the concavity index from a reservoir's area-elevation curve.
|
|
61
|
+
|
|
62
|
+
Parameters
|
|
63
|
+
----------
|
|
64
|
+
df : pd.DataFrame
|
|
65
|
+
Must contain columns 'area' (km²) and 'elevation' (m).
|
|
66
|
+
|
|
67
|
+
Returns
|
|
68
|
+
-------
|
|
69
|
+
float
|
|
70
|
+
Concavity index (ratio of actual curve to straight line).
|
|
71
|
+
Returns np.nan if invalid.
|
|
72
|
+
"""
|
|
73
|
+
if not {'area', 'elevation'}.issubset(df.columns):
|
|
74
|
+
raise ValueError("DataFrame must have columns: 'area' and 'elevation'")
|
|
75
|
+
if df.empty:
|
|
76
|
+
return np.nan
|
|
77
|
+
|
|
78
|
+
# Convert to numpy and filter invalid values
|
|
79
|
+
area = df['area'].to_numpy(dtype=float)
|
|
80
|
+
elevation = df['elevation'].to_numpy(dtype=float)
|
|
81
|
+
|
|
82
|
+
mask = (~np.isnan(area)) & (~np.isnan(elevation)) & (~np.isinf(area)) & (~np.isinf(elevation))
|
|
83
|
+
area = area[mask]
|
|
84
|
+
elevation = elevation[mask]
|
|
85
|
+
|
|
86
|
+
if len(area) < 2:
|
|
87
|
+
return np.nan
|
|
88
|
+
|
|
89
|
+
# Normalize to 0–1
|
|
90
|
+
area_norm = (area - area.min()) / (area.max() - area.min())
|
|
91
|
+
elev_norm = (elevation - elevation.min()) / (elevation.max() - elevation.min())
|
|
92
|
+
|
|
93
|
+
# Straight line between first and last point
|
|
94
|
+
line = np.linspace(0, 1, len(area_norm))
|
|
95
|
+
|
|
96
|
+
# Area under actual curve vs line
|
|
97
|
+
auc_curve = np.trapz(elev_norm, area_norm)
|
|
98
|
+
auc_line = np.trapz(line, area_norm)
|
|
99
|
+
|
|
100
|
+
concavity = auc_curve / auc_line if auc_line > 0 else np.nan
|
|
101
|
+
return concavity
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from shapely.ops import transform
|
|
3
|
+
|
|
4
|
+
def calculate_length_area_meters(geometry, area= True):
|
|
5
|
+
"""
|
|
6
|
+
Calculate the length and area of a geometry in meters and square meters using
|
|
7
|
+
approximate conversion factors based on the geometry's centroid latitude.
|
|
8
|
+
|
|
9
|
+
Parameters:
|
|
10
|
+
geometry (shapely.geometry): Geometry in WGS84 (EPSG:4326) to calculate length and area.
|
|
11
|
+
|
|
12
|
+
Returns:
|
|
13
|
+
tuple: (length in meters, area in square meters)
|
|
14
|
+
"""
|
|
15
|
+
# Get centroid for latitude reference
|
|
16
|
+
centroid = geometry.centroid
|
|
17
|
+
reference_latitude = centroid.y # latitude of the centroid
|
|
18
|
+
|
|
19
|
+
# Conversion factors
|
|
20
|
+
lat_factor = 111_000 # meters per degree latitude
|
|
21
|
+
lon_factor = 111_320 * np.cos(np.radians(reference_latitude)) # meters per degree longitude at given latitude
|
|
22
|
+
|
|
23
|
+
# Transform function to scale degrees to meters
|
|
24
|
+
def scale_degrees_to_meters(x, y):
|
|
25
|
+
return (x * lon_factor, y * lat_factor)
|
|
26
|
+
|
|
27
|
+
# Scale geometry from degrees to meters using the conversion factors
|
|
28
|
+
scaled_geometry = transform(scale_degrees_to_meters, geometry)
|
|
29
|
+
|
|
30
|
+
# Calculate length and area in meters and square meters
|
|
31
|
+
length_meters = scaled_geometry.length
|
|
32
|
+
if area:
|
|
33
|
+
area_square_meters = scaled_geometry.area
|
|
34
|
+
return length_meters, area_square_meters
|
|
35
|
+
else:
|
|
36
|
+
return length_meters[0]
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import numpy as np
|
|
3
|
+
import xarray as xr
|
|
4
|
+
import geopandas as gpd
|
|
5
|
+
import regionmask
|
|
6
|
+
from collections import Counter
|
|
7
|
+
from shapely.geometry import Polygon
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def compute_catchment_aggregate(
|
|
11
|
+
netcdf_path,
|
|
12
|
+
catchment_geometry,
|
|
13
|
+
function_type="mean" # Can be 'mean', 'mode', 'std', 'percent' or dict
|
|
14
|
+
) -> pd.DataFrame:
|
|
15
|
+
"""
|
|
16
|
+
Compute catchment-based features by aggregating raster variables in a NetCDF file
|
|
17
|
+
for a single catchment geometry.
|
|
18
|
+
|
|
19
|
+
Parameters
|
|
20
|
+
----------
|
|
21
|
+
netcdf_path : str or Path
|
|
22
|
+
Path to the NetCDF file containing raster variables.
|
|
23
|
+
|
|
24
|
+
catchment_geometry : shapely.geometry.Polygon or GeoSeries
|
|
25
|
+
Catchment geometry (single polygon).
|
|
26
|
+
|
|
27
|
+
function_type : str or dict, default="mean"
|
|
28
|
+
Either a string ('mean', 'mode', 'std', 'percent') to apply to all variables,
|
|
29
|
+
or a dictionary specifying function(s) per variable. Example::
|
|
30
|
+
|
|
31
|
+
{
|
|
32
|
+
"precip": "mean",
|
|
33
|
+
"slope": ["mean", "std"],
|
|
34
|
+
"landcover": {"type": "percent"}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
Returns
|
|
38
|
+
-------
|
|
39
|
+
pd.DataFrame
|
|
40
|
+
A single-row DataFrame with catchment-level features.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
# Open dataset
|
|
44
|
+
ds = xr.open_dataset(netcdf_path, chunks={'x': 200, 'y': 200})
|
|
45
|
+
ds = ds.rio.write_crs("EPSG:4326")
|
|
46
|
+
|
|
47
|
+
# Rename coords if needed
|
|
48
|
+
if 'lon' in ds.dims and 'lat' in ds.dims:
|
|
49
|
+
ds = ds.rename({'lon': 'x', 'lat': 'y'})
|
|
50
|
+
|
|
51
|
+
variables = list(ds.data_vars)
|
|
52
|
+
|
|
53
|
+
# Build function dict
|
|
54
|
+
if isinstance(function_type, str):
|
|
55
|
+
apply_func = {var: function_type for var in variables}
|
|
56
|
+
elif isinstance(function_type, dict):
|
|
57
|
+
apply_func = function_type
|
|
58
|
+
else:
|
|
59
|
+
raise ValueError("function_type must be a string or a dictionary.")
|
|
60
|
+
|
|
61
|
+
# Order check
|
|
62
|
+
y_order = "descending" if ds.y[0] > ds.y[-1] else "ascending"
|
|
63
|
+
x_order = "descending" if ds.x[0] > ds.x[-1] else "ascending"
|
|
64
|
+
|
|
65
|
+
# Get catchment bounds
|
|
66
|
+
minx, miny, maxx, maxy = catchment_geometry.bounds
|
|
67
|
+
if y_order == "descending":
|
|
68
|
+
y_slice = slice(maxy, miny)
|
|
69
|
+
else:
|
|
70
|
+
y_slice = slice(miny, maxy)
|
|
71
|
+
if x_order == "descending":
|
|
72
|
+
x_slice = slice(maxx, minx)
|
|
73
|
+
else:
|
|
74
|
+
x_slice = slice(minx, maxx)
|
|
75
|
+
|
|
76
|
+
# Subset dataset
|
|
77
|
+
subset_ds = ds.sel(x=x_slice, y=y_slice)
|
|
78
|
+
|
|
79
|
+
# Create mask
|
|
80
|
+
catchment_gdf = gpd.GeoDataFrame({"geometry": [catchment_geometry]}, crs="EPSG:4326")
|
|
81
|
+
mask_from_geopandas = regionmask.mask_geopandas(catchment_gdf, subset_ds.x, subset_ds.y)
|
|
82
|
+
catchment_mask = mask_from_geopandas == 0
|
|
83
|
+
|
|
84
|
+
if mask_from_geopandas.notnull().sum().sum().item() == 0:
|
|
85
|
+
raise ValueError("Catchment mask is empty — geometry may not overlap the raster.")
|
|
86
|
+
|
|
87
|
+
results = {}
|
|
88
|
+
|
|
89
|
+
# Loop over variables
|
|
90
|
+
for var in apply_func.keys():
|
|
91
|
+
data = subset_ds[var]
|
|
92
|
+
masked = data.where(catchment_mask).compute()
|
|
93
|
+
arr = masked.where(~masked.isnull(), drop=True)
|
|
94
|
+
|
|
95
|
+
# Skip if empty
|
|
96
|
+
if arr.size == 0:
|
|
97
|
+
continue
|
|
98
|
+
|
|
99
|
+
func_list = apply_func[var]
|
|
100
|
+
if not isinstance(func_list, list):
|
|
101
|
+
func_list = [func_list] # wrap into list
|
|
102
|
+
|
|
103
|
+
for func_info in func_list:
|
|
104
|
+
if isinstance(func_info, str):
|
|
105
|
+
func = func_info
|
|
106
|
+
threshold = None
|
|
107
|
+
threshold_direction = None
|
|
108
|
+
elif isinstance(func_info, dict):
|
|
109
|
+
func = func_info.get("type")
|
|
110
|
+
threshold = func_info.get("threshold", None)
|
|
111
|
+
threshold_direction = func_info.get("direction", "greater")
|
|
112
|
+
else:
|
|
113
|
+
raise ValueError(f"Invalid function format for variable {var}")
|
|
114
|
+
|
|
115
|
+
if func == "mean":
|
|
116
|
+
results[f"{var}_mean"] = float(arr.mean().item())
|
|
117
|
+
|
|
118
|
+
elif func == "mode":
|
|
119
|
+
vals = arr.values.flatten()
|
|
120
|
+
results[f"{var}_mode"] = Counter(vals).most_common(1)[0][0]
|
|
121
|
+
|
|
122
|
+
elif func == "std":
|
|
123
|
+
results[f"{var}_std"] = float(arr.std().item())
|
|
124
|
+
|
|
125
|
+
elif func == "percent":
|
|
126
|
+
vals = arr.values.flatten()
|
|
127
|
+
total = len(vals)
|
|
128
|
+
class_counts = Counter(vals)
|
|
129
|
+
for cls, count in class_counts.items():
|
|
130
|
+
results[f"{var}_percent_{int(cls)}"] = (count / total) * 100
|
|
131
|
+
|
|
132
|
+
elif func == "threshold_percent":
|
|
133
|
+
if threshold is None:
|
|
134
|
+
raise ValueError(f"Threshold not provided for variable '{var}'")
|
|
135
|
+
vals = arr.values.flatten()
|
|
136
|
+
valid = vals[~np.isnan(vals)]
|
|
137
|
+
if threshold_direction == "greater":
|
|
138
|
+
percent = (valid > threshold).sum() / len(valid) * 100
|
|
139
|
+
results[f"{var}_percent_above_{threshold}"] = percent
|
|
140
|
+
else:
|
|
141
|
+
percent = (valid < threshold).sum() / len(valid) * 100
|
|
142
|
+
results[f"{var}_percent_below_{threshold}"] = percent
|
|
143
|
+
|
|
144
|
+
else:
|
|
145
|
+
raise ValueError(f"Unknown function type '{func}' for variable '{var}'")
|
|
146
|
+
|
|
147
|
+
return pd.DataFrame([results])
|