pyreclaim 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,63 @@
1
+ import pandas as pd
2
+ from typing import Callable, Union, Sequence
3
+
4
+ def compute_ts_aggregate(
5
+ ts_csv_path: str,
6
+ time_column: str,
7
+ value_column: str,
8
+ feature_function: Callable,
9
+ feature_name: str,
10
+ observation_period: Union[Sequence[int], None] = None
11
+ ) -> pd.DataFrame:
12
+ """
13
+ Compute an aggregate feature from a user-provided time series CSV for a single reservoir.
14
+
15
+ Parameters
16
+ ----------
17
+ ts_csv_path : str
18
+ Path to the CSV file containing the time series.
19
+ time_column : str
20
+ Name of the column representing dates/timestamps.
21
+ value_column : str
22
+ Name of the column representing the variable values.
23
+ feature_function : Callable
24
+ Function that takes a pd.Series (the time series) and returns a single value.
25
+ feature_name : str
26
+ Name of the column to store the computed feature in the returned DataFrame.
27
+ observation_period : list or tuple of two ints, optional
28
+ [start_year, end_year] to clip the time series. If None, no clipping is applied.
29
+
30
+ Returns
31
+ -------
32
+ pd.DataFrame
33
+ A single-row DataFrame containing the computed feature with the specified column name.
34
+ """
35
+
36
+ # Load the CSV
37
+ df = pd.read_csv(ts_csv_path)
38
+ if df.empty:
39
+ raise ValueError(f"CSV at {ts_csv_path} is empty.")
40
+
41
+ # Ensure time column is datetime
42
+ df[time_column] = pd.to_datetime(df[time_column], errors='coerce')
43
+ if df[time_column].isna().all():
44
+ raise ValueError(f"Time column '{time_column}' could not be converted to datetime.")
45
+
46
+ # Set index
47
+ ts = df.set_index(time_column)[value_column]
48
+
49
+ # Clip to observation period if provided
50
+ if observation_period is not None:
51
+ start_year, end_year = observation_period
52
+ ts = ts[(ts.index.year >= start_year) & (ts.index.year <= end_year)]
53
+
54
+ # Remove NaNs
55
+ ts_clean = ts.dropna()
56
+ if ts_clean.empty:
57
+ raise ValueError("Time series has no valid data after clipping/removing NaNs.")
58
+
59
+ # Apply user-defined feature function
60
+ feature_value = feature_function(ts_clean)
61
+
62
+ # Return as single-row DataFrame with user-specified column name
63
+ return pd.DataFrame({feature_name: [feature_value]})
@@ -0,0 +1,141 @@
1
+ ### Wrappers to compute all static, dynamic, and derived features for RECLAIM input dataset.
2
+
3
+ from typing import Dict, List
4
+ import pandas as pd
5
+
6
+ # Import from your package structure
7
+ from reclaim.static_features.reservoir_static import reservoir_based_static_features
8
+ from reclaim.static_features.catchment_static import catchment_based_static_features
9
+ from reclaim.dynamic_features.reservoir_dynamic import reservoir_based_dynamic_features
10
+ from reclaim.dynamic_features.catchment_dynamic import catchment_based_dynamic_features
11
+ from reclaim.derived_features.feature_engineering_and_transformation import engineer_and_transform_features
12
+
13
+
14
+ def create_features_per_row(
15
+ reservoir_static_params: dict,
16
+ catchment_static_params: dict,
17
+ reservoir_dynamic_info: dict = None,
18
+ catchment_dynamic_info: dict = None,
19
+ observation_period: List[int] = None
20
+ ) -> pd.DataFrame:
21
+ """
22
+ Compute all static, dynamic, and derived features for a single reservoir observation.
23
+
24
+ Parameters
25
+ ----------
26
+ reservoir_static_params : dict
27
+ Parameters for reservoir_based_static_features(). Expected keys:
28
+ - obc : float, Original Built Capacity (MCM)
29
+ - hgt : float, Dam Height (m)
30
+ - mrb : str, Major River Basin, optional
31
+ - lat : float, Latitude (deg)
32
+ - lon : float, Longitude (deg)
33
+ - reservoir_polygon : shapely.geometry.Polygon
34
+ - inlet_point : shapely.geometry.Point, optional
35
+ - resolution : float, optional
36
+ - aec_df : pd.DataFrame with columns ['area', 'elevation']
37
+
38
+ catchment_static_params : dict
39
+ Parameters for catchment_based_static_features(). Expected keys:
40
+ - ca : float, Catchment Area (sq km)
41
+ - dca : float, Differential Catchment Area (sq km)
42
+ - catchment_geometry : shapely.geometry.Polygon or GeoSeries
43
+ - glc_share_path : str, path to GLC-Share NetCDF (land cover)
44
+ - hwsd2_path : str, path to HWSD2 NetCDF (soils)
45
+ - hilda_veg_freq_path : str, path to HILDA vegetation NetCDF
46
+ - terrain_path : str, path to terrain/DEM derivatives NetCDF
47
+
48
+ reservoir_dynamic_info : dict, optional
49
+ variable_info dict for reservoir time series. Required keys (case-sensitive):
50
+ - "inflow": {"path": str, "time_column": str, "data_column": str}
51
+ - "outflow": {"path": str, "time_column": str, "data_column": str}
52
+ - "evaporation": {"path": str, "time_column": str, "data_column": str}
53
+ - "surface_area": {"path": str, "time_column": str, "data_column": str}
54
+ - "nssc": {"path": str, "time_column": str, "data_column": str}
55
+ - "nssc2": {"path": str, "time_column": str, "data_column": str}
56
+
57
+ catchment_dynamic_info : dict, optional
58
+ variable_info dict for catchment time series. Required keys (case-sensitive):
59
+ - "precip": {"path": str, "time_column": str, "data_column": str}
60
+ - "tmin": {"path": str, "time_column": str, "data_column": str}
61
+ - "tmax": {"path": str, "time_column": str, "data_column": str}
62
+ - "wind": {"path": str, "time_column": str, "data_column": str}
63
+
64
+ observation_period : list of int, optional
65
+ Two-element list [OSY, OEY] for observation start year and end year.
66
+
67
+ Returns
68
+ -------
69
+ pd.DataFrame
70
+ Single-row DataFrame with all features:
71
+ - Reservoir static
72
+ - Catchment static
73
+ - Reservoir dynamic
74
+ - Catchment dynamic
75
+ - Derived/log-transformed
76
+ """
77
+
78
+ # --- Static features ---
79
+ df_res_static = reservoir_based_static_features(**reservoir_static_params)
80
+ df_catch_static = catchment_based_static_features(**catchment_static_params)
81
+
82
+ # --- Dynamic features ---
83
+ df_res_dyn = pd.DataFrame()
84
+ df_catch_dyn = pd.DataFrame()
85
+
86
+ if reservoir_dynamic_info is not None and observation_period is not None:
87
+ df_res_dyn = reservoir_based_dynamic_features(reservoir_dynamic_info, observation_period)
88
+
89
+ if catchment_dynamic_info is not None and observation_period is not None:
90
+ df_catch_dyn = catchment_based_dynamic_features(catchment_dynamic_info, observation_period)
91
+
92
+ # --- Combine all static + dynamic ---
93
+ df_combined = pd.concat([df_res_static, df_catch_static, df_res_dyn, df_catch_dyn], axis=1)
94
+
95
+ # --- Engineer + log-transform features ---
96
+ df_final = engineer_and_transform_features(df_combined)
97
+
98
+ return df_final
99
+
100
+
101
+ def create_features_multi(
102
+ reservoirs_input: List[Dict]
103
+ ) -> pd.DataFrame:
104
+ """
105
+ Compute features for multiple reservoirs using structured input.
106
+
107
+ Parameters
108
+ ----------
109
+ reservoirs_input : list of dict
110
+ Each element should be a dictionary with the following keys:
111
+
112
+ - `reservoir_static_params` : dict
113
+ Parameters for `reservoir_based_static_features()`.
114
+ - `catchment_static_params` : dict
115
+ Parameters for `catchment_based_static_features()`.
116
+ - `reservoir_dynamic_info` : dict
117
+ Parameters for `reservoir_based_dynamic_features()`.
118
+ - `catchment_dynamic_info` : dict
119
+ Parameters for `catchment_based_dynamic_features()`.
120
+ - `observation_period` : list of int
121
+ Two-element list `[OSY, OEY]` specifying the observation period.
122
+
123
+ Returns
124
+ -------
125
+ pd.DataFrame
126
+ Combined DataFrame with one row per reservoir observation.
127
+ """
128
+
129
+ all_rows = []
130
+ for idx, reservoir_info in enumerate(reservoirs_input):
131
+ df_row = create_features_per_row(
132
+ reservoir_static_params=reservoir_info.get("reservoir_static_params", {}),
133
+ catchment_static_params=reservoir_info.get("catchment_static_params", {}),
134
+ reservoir_dynamic_info=reservoir_info.get("reservoir_dynamic_info", None),
135
+ catchment_dynamic_info=reservoir_info.get("catchment_dynamic_info", None),
136
+ observation_period=reservoir_info.get("observation_period", None),
137
+ )
138
+ all_rows.append(df_row)
139
+
140
+ df_all = pd.concat(all_rows, axis=0).reset_index(drop=True)
141
+ return df_all