pyreclaim 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyreclaim-0.1.0.dist-info/METADATA +803 -0
- pyreclaim-0.1.0.dist-info/RECORD +27 -0
- pyreclaim-0.1.0.dist-info/WHEEL +5 -0
- pyreclaim-0.1.0.dist-info/licenses/LICENSE +674 -0
- pyreclaim-0.1.0.dist-info/top_level.txt +1 -0
- reclaim/__init__.py +1 -0
- reclaim/derived_features/__init__.py +1 -0
- reclaim/derived_features/feature_engineering_and_transformation.py +75 -0
- reclaim/dynamic_features/__init__.py +1 -0
- reclaim/dynamic_features/catchment_dynamic.py +103 -0
- reclaim/dynamic_features/reservoir_dynamic.py +148 -0
- reclaim/dynamic_features/utils/__init__.py +1 -0
- reclaim/dynamic_features/utils/catchment_meteorology.py +0 -0
- reclaim/dynamic_features/utils/inflow_outflow.py +95 -0
- reclaim/dynamic_features/utils/rainfall.py +49 -0
- reclaim/dynamic_features/utils/statistical_metrics.py +190 -0
- reclaim/dynamic_features/utils/ts_aggregate.py +63 -0
- reclaim/generate_features.py +141 -0
- reclaim/reclaim.py +503 -0
- reclaim/static_features/__init__.py +1 -0
- reclaim/static_features/catchment_static.py +127 -0
- reclaim/static_features/reservoir_static.py +97 -0
- reclaim/static_features/utils/__init__.py +1 -0
- reclaim/static_features/utils/aec_shape.py +101 -0
- reclaim/static_features/utils/area_perimeter.py +36 -0
- reclaim/static_features/utils/catchment_agreggate.py +147 -0
- reclaim/static_features/utils/flow_length.py +455 -0
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
from typing import Callable, Union, Sequence
|
|
3
|
+
|
|
4
|
+
def compute_ts_aggregate(
|
|
5
|
+
ts_csv_path: str,
|
|
6
|
+
time_column: str,
|
|
7
|
+
value_column: str,
|
|
8
|
+
feature_function: Callable,
|
|
9
|
+
feature_name: str,
|
|
10
|
+
observation_period: Union[Sequence[int], None] = None
|
|
11
|
+
) -> pd.DataFrame:
|
|
12
|
+
"""
|
|
13
|
+
Compute an aggregate feature from a user-provided time series CSV for a single reservoir.
|
|
14
|
+
|
|
15
|
+
Parameters
|
|
16
|
+
----------
|
|
17
|
+
ts_csv_path : str
|
|
18
|
+
Path to the CSV file containing the time series.
|
|
19
|
+
time_column : str
|
|
20
|
+
Name of the column representing dates/timestamps.
|
|
21
|
+
value_column : str
|
|
22
|
+
Name of the column representing the variable values.
|
|
23
|
+
feature_function : Callable
|
|
24
|
+
Function that takes a pd.Series (the time series) and returns a single value.
|
|
25
|
+
feature_name : str
|
|
26
|
+
Name of the column to store the computed feature in the returned DataFrame.
|
|
27
|
+
observation_period : list or tuple of two ints, optional
|
|
28
|
+
[start_year, end_year] to clip the time series. If None, no clipping is applied.
|
|
29
|
+
|
|
30
|
+
Returns
|
|
31
|
+
-------
|
|
32
|
+
pd.DataFrame
|
|
33
|
+
A single-row DataFrame containing the computed feature with the specified column name.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
# Load the CSV
|
|
37
|
+
df = pd.read_csv(ts_csv_path)
|
|
38
|
+
if df.empty:
|
|
39
|
+
raise ValueError(f"CSV at {ts_csv_path} is empty.")
|
|
40
|
+
|
|
41
|
+
# Ensure time column is datetime
|
|
42
|
+
df[time_column] = pd.to_datetime(df[time_column], errors='coerce')
|
|
43
|
+
if df[time_column].isna().all():
|
|
44
|
+
raise ValueError(f"Time column '{time_column}' could not be converted to datetime.")
|
|
45
|
+
|
|
46
|
+
# Set index
|
|
47
|
+
ts = df.set_index(time_column)[value_column]
|
|
48
|
+
|
|
49
|
+
# Clip to observation period if provided
|
|
50
|
+
if observation_period is not None:
|
|
51
|
+
start_year, end_year = observation_period
|
|
52
|
+
ts = ts[(ts.index.year >= start_year) & (ts.index.year <= end_year)]
|
|
53
|
+
|
|
54
|
+
# Remove NaNs
|
|
55
|
+
ts_clean = ts.dropna()
|
|
56
|
+
if ts_clean.empty:
|
|
57
|
+
raise ValueError("Time series has no valid data after clipping/removing NaNs.")
|
|
58
|
+
|
|
59
|
+
# Apply user-defined feature function
|
|
60
|
+
feature_value = feature_function(ts_clean)
|
|
61
|
+
|
|
62
|
+
# Return as single-row DataFrame with user-specified column name
|
|
63
|
+
return pd.DataFrame({feature_name: [feature_value]})
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
### Wrappers to compute all static, dynamic, and derived features for RECLAIM input dataset.
|
|
2
|
+
|
|
3
|
+
from typing import Dict, List
|
|
4
|
+
import pandas as pd
|
|
5
|
+
|
|
6
|
+
# Import from your package structure
|
|
7
|
+
from reclaim.static_features.reservoir_static import reservoir_based_static_features
|
|
8
|
+
from reclaim.static_features.catchment_static import catchment_based_static_features
|
|
9
|
+
from reclaim.dynamic_features.reservoir_dynamic import reservoir_based_dynamic_features
|
|
10
|
+
from reclaim.dynamic_features.catchment_dynamic import catchment_based_dynamic_features
|
|
11
|
+
from reclaim.derived_features.feature_engineering_and_transformation import engineer_and_transform_features
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def create_features_per_row(
|
|
15
|
+
reservoir_static_params: dict,
|
|
16
|
+
catchment_static_params: dict,
|
|
17
|
+
reservoir_dynamic_info: dict = None,
|
|
18
|
+
catchment_dynamic_info: dict = None,
|
|
19
|
+
observation_period: List[int] = None
|
|
20
|
+
) -> pd.DataFrame:
|
|
21
|
+
"""
|
|
22
|
+
Compute all static, dynamic, and derived features for a single reservoir observation.
|
|
23
|
+
|
|
24
|
+
Parameters
|
|
25
|
+
----------
|
|
26
|
+
reservoir_static_params : dict
|
|
27
|
+
Parameters for reservoir_based_static_features(). Expected keys:
|
|
28
|
+
- obc : float, Original Built Capacity (MCM)
|
|
29
|
+
- hgt : float, Dam Height (m)
|
|
30
|
+
- mrb : str, Major River Basin, optional
|
|
31
|
+
- lat : float, Latitude (deg)
|
|
32
|
+
- lon : float, Longitude (deg)
|
|
33
|
+
- reservoir_polygon : shapely.geometry.Polygon
|
|
34
|
+
- inlet_point : shapely.geometry.Point, optional
|
|
35
|
+
- resolution : float, optional
|
|
36
|
+
- aec_df : pd.DataFrame with columns ['area', 'elevation']
|
|
37
|
+
|
|
38
|
+
catchment_static_params : dict
|
|
39
|
+
Parameters for catchment_based_static_features(). Expected keys:
|
|
40
|
+
- ca : float, Catchment Area (sq km)
|
|
41
|
+
- dca : float, Differential Catchment Area (sq km)
|
|
42
|
+
- catchment_geometry : shapely.geometry.Polygon or GeoSeries
|
|
43
|
+
- glc_share_path : str, path to GLC-Share NetCDF (land cover)
|
|
44
|
+
- hwsd2_path : str, path to HWSD2 NetCDF (soils)
|
|
45
|
+
- hilda_veg_freq_path : str, path to HILDA vegetation NetCDF
|
|
46
|
+
- terrain_path : str, path to terrain/DEM derivatives NetCDF
|
|
47
|
+
|
|
48
|
+
reservoir_dynamic_info : dict, optional
|
|
49
|
+
variable_info dict for reservoir time series. Required keys (case-sensitive):
|
|
50
|
+
- "inflow": {"path": str, "time_column": str, "data_column": str}
|
|
51
|
+
- "outflow": {"path": str, "time_column": str, "data_column": str}
|
|
52
|
+
- "evaporation": {"path": str, "time_column": str, "data_column": str}
|
|
53
|
+
- "surface_area": {"path": str, "time_column": str, "data_column": str}
|
|
54
|
+
- "nssc": {"path": str, "time_column": str, "data_column": str}
|
|
55
|
+
- "nssc2": {"path": str, "time_column": str, "data_column": str}
|
|
56
|
+
|
|
57
|
+
catchment_dynamic_info : dict, optional
|
|
58
|
+
variable_info dict for catchment time series. Required keys (case-sensitive):
|
|
59
|
+
- "precip": {"path": str, "time_column": str, "data_column": str}
|
|
60
|
+
- "tmin": {"path": str, "time_column": str, "data_column": str}
|
|
61
|
+
- "tmax": {"path": str, "time_column": str, "data_column": str}
|
|
62
|
+
- "wind": {"path": str, "time_column": str, "data_column": str}
|
|
63
|
+
|
|
64
|
+
observation_period : list of int, optional
|
|
65
|
+
Two-element list [OSY, OEY] for observation start year and end year.
|
|
66
|
+
|
|
67
|
+
Returns
|
|
68
|
+
-------
|
|
69
|
+
pd.DataFrame
|
|
70
|
+
Single-row DataFrame with all features:
|
|
71
|
+
- Reservoir static
|
|
72
|
+
- Catchment static
|
|
73
|
+
- Reservoir dynamic
|
|
74
|
+
- Catchment dynamic
|
|
75
|
+
- Derived/log-transformed
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
# --- Static features ---
|
|
79
|
+
df_res_static = reservoir_based_static_features(**reservoir_static_params)
|
|
80
|
+
df_catch_static = catchment_based_static_features(**catchment_static_params)
|
|
81
|
+
|
|
82
|
+
# --- Dynamic features ---
|
|
83
|
+
df_res_dyn = pd.DataFrame()
|
|
84
|
+
df_catch_dyn = pd.DataFrame()
|
|
85
|
+
|
|
86
|
+
if reservoir_dynamic_info is not None and observation_period is not None:
|
|
87
|
+
df_res_dyn = reservoir_based_dynamic_features(reservoir_dynamic_info, observation_period)
|
|
88
|
+
|
|
89
|
+
if catchment_dynamic_info is not None and observation_period is not None:
|
|
90
|
+
df_catch_dyn = catchment_based_dynamic_features(catchment_dynamic_info, observation_period)
|
|
91
|
+
|
|
92
|
+
# --- Combine all static + dynamic ---
|
|
93
|
+
df_combined = pd.concat([df_res_static, df_catch_static, df_res_dyn, df_catch_dyn], axis=1)
|
|
94
|
+
|
|
95
|
+
# --- Engineer + log-transform features ---
|
|
96
|
+
df_final = engineer_and_transform_features(df_combined)
|
|
97
|
+
|
|
98
|
+
return df_final
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def create_features_multi(
|
|
102
|
+
reservoirs_input: List[Dict]
|
|
103
|
+
) -> pd.DataFrame:
|
|
104
|
+
"""
|
|
105
|
+
Compute features for multiple reservoirs using structured input.
|
|
106
|
+
|
|
107
|
+
Parameters
|
|
108
|
+
----------
|
|
109
|
+
reservoirs_input : list of dict
|
|
110
|
+
Each element should be a dictionary with the following keys:
|
|
111
|
+
|
|
112
|
+
- `reservoir_static_params` : dict
|
|
113
|
+
Parameters for `reservoir_based_static_features()`.
|
|
114
|
+
- `catchment_static_params` : dict
|
|
115
|
+
Parameters for `catchment_based_static_features()`.
|
|
116
|
+
- `reservoir_dynamic_info` : dict
|
|
117
|
+
Parameters for `reservoir_based_dynamic_features()`.
|
|
118
|
+
- `catchment_dynamic_info` : dict
|
|
119
|
+
Parameters for `catchment_based_dynamic_features()`.
|
|
120
|
+
- `observation_period` : list of int
|
|
121
|
+
Two-element list `[OSY, OEY]` specifying the observation period.
|
|
122
|
+
|
|
123
|
+
Returns
|
|
124
|
+
-------
|
|
125
|
+
pd.DataFrame
|
|
126
|
+
Combined DataFrame with one row per reservoir observation.
|
|
127
|
+
"""
|
|
128
|
+
|
|
129
|
+
all_rows = []
|
|
130
|
+
for idx, reservoir_info in enumerate(reservoirs_input):
|
|
131
|
+
df_row = create_features_per_row(
|
|
132
|
+
reservoir_static_params=reservoir_info.get("reservoir_static_params", {}),
|
|
133
|
+
catchment_static_params=reservoir_info.get("catchment_static_params", {}),
|
|
134
|
+
reservoir_dynamic_info=reservoir_info.get("reservoir_dynamic_info", None),
|
|
135
|
+
catchment_dynamic_info=reservoir_info.get("catchment_dynamic_info", None),
|
|
136
|
+
observation_period=reservoir_info.get("observation_period", None),
|
|
137
|
+
)
|
|
138
|
+
all_rows.append(df_row)
|
|
139
|
+
|
|
140
|
+
df_all = pd.concat(all_rows, axis=0).reset_index(drop=True)
|
|
141
|
+
return df_all
|