pyreclaim 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,22 +2,30 @@
2
2
 
3
3
  from typing import Dict, List
4
4
  import pandas as pd
5
+ import geopandas as gpd
6
+ from tqdm import tqdm
7
+ import traceback
8
+ from dask import delayed, compute
9
+ from dask.diagnostics import ProgressBar
5
10
 
6
11
  # Import from your package structure
7
12
  from reclaim.static_features.reservoir_static import reservoir_based_static_features
8
- from reclaim.static_features.catchment_static import catchment_based_static_features
13
+ from reclaim.static_features.catchment_static import catchment_based_static_features, catchment_based_static_features_multi_reservoir
9
14
  from reclaim.dynamic_features.reservoir_dynamic import reservoir_based_dynamic_features
10
15
  from reclaim.dynamic_features.catchment_dynamic import catchment_based_dynamic_features
16
+ from reclaim.dynamic_features.utils.ts_aggregate import build_intervals
11
17
  from reclaim.derived_features.feature_engineering_and_transformation import engineer_and_transform_features
12
18
 
13
19
 
14
- def create_features_per_row(
20
+ def create_features_per_reservoir(
15
21
  idx: int,
16
22
  observation_period: List[int],
17
23
  reservoir_static_params: dict,
18
- catchment_static_params: dict,
24
+ catchment_static_params: dict = None,
19
25
  reservoir_dynamic_info: dict = None,
20
26
  catchment_dynamic_info: dict = None,
27
+ time_interval: int = None,
28
+ feature_engineering: bool = True,
21
29
  ) -> pd.DataFrame:
22
30
  """
23
31
  Compute all static, dynamic, and derived features for a single reservoir observation.
@@ -34,7 +42,7 @@ def create_features_per_row(
34
42
  Parameters for reservoir_based_static_features(). Expected keys:
35
43
  - obc : float, Original Built Capacity (MCM)
36
44
  - hgt : float, Dam Height (m)
37
- - mrb : str, Major River Basin, optional
45
+ - mrb : int, Major River Basin, optional
38
46
  - lat : float, Latitude (deg)
39
47
  - lon : float, Longitude (deg)
40
48
  - by : int, Build Year
@@ -43,7 +51,7 @@ def create_features_per_row(
43
51
  - resolution : float, optional
44
52
  - aec_df : pd.DataFrame with columns ['area', 'elevation']
45
53
 
46
- catchment_static_params : dict
54
+ catchment_static_params : dict, optional
47
55
  Parameters for catchment_based_static_features(). Expected keys:
48
56
  - ca : float, Catchment Area (sq km)
49
57
  - dca : float, Differential Catchment Area (sq km)
@@ -69,6 +77,9 @@ def create_features_per_row(
69
77
  - "tmax": {"path": str, "time_column": str, "data_column": str}
70
78
  - "wind": {"path": str, "time_column": str, "data_column": str}
71
79
 
80
+ time_interval: int, optional
81
+ Time interval in years between reservoir observations for dynamic feature calculations. The number of rows in the dynamic features will depend on this interval.
82
+
72
83
  Returns
73
84
  -------
74
85
  pd.DataFrame
@@ -77,42 +88,88 @@ def create_features_per_row(
77
88
  - Catchment static
78
89
  - Reservoir dynamic
79
90
  - Catchment dynamic
80
- - Derived/log-transformed
91
+ - Derived/log-transformed (if requested)
81
92
  """
82
93
 
83
94
  # --- Observevation period features ---
84
95
  osy, oey = observation_period
85
- df_obs_period = pd.DataFrame({
86
- "idx": [idx],
87
- "OSY": [osy],
88
- "OEY": [oey]
96
+ if time_interval is not None:
97
+ intervals = build_intervals(osy, oey, time_interval)
98
+ else:
99
+ intervals = [(osy, oey)]
100
+ # Create observation period dataframe with rows for each interval with same idx
101
+ df_obs = pd.DataFrame({
102
+ "idx": idx,
103
+ "OSY": [i[0] for i in intervals],
104
+ "OEY": [i[1] for i in intervals],
89
105
  })
90
106
 
91
- # --- Static features ---
107
+ # --- Static features (computed ONCE) ---
92
108
  df_res_static = reservoir_based_static_features(**reservoir_static_params)
93
- df_catch_static = catchment_based_static_features(**catchment_static_params)
94
-
95
- # --- Dynamic features ---
96
- df_res_dyn = pd.DataFrame()
97
- df_catch_dyn = pd.DataFrame()
109
+ if catchment_static_params is not None:
110
+ df_catch_static = catchment_based_static_features(**catchment_static_params)
111
+ else:
112
+ df_catch_static = pd.DataFrame()
113
+
114
+ static_block = pd.concat([df_res_static, df_catch_static], axis=1)
115
+ # Repeat static rows to match number of intervals
116
+ static_block = pd.concat(
117
+ [static_block] * len(df_obs),
118
+ ignore_index=True
119
+ )
98
120
 
99
- if reservoir_dynamic_info is not None and observation_period is not None:
100
- df_res_dyn = reservoir_based_dynamic_features(reservoir_dynamic_info, observation_period)
121
+ # --- Dynamic features (computed ONCE - internally handles intervals) ---
122
+ # Combine dynamic features for all intervals
123
+ df_res_dyn = (
124
+ reservoir_based_dynamic_features(
125
+ reservoir_dynamic_info,
126
+ intervals,
127
+ )
128
+ if reservoir_dynamic_info is not None
129
+ else pd.DataFrame()
130
+ )
101
131
 
102
- if catchment_dynamic_info is not None and observation_period is not None:
103
- df_catch_dyn = catchment_based_dynamic_features(catchment_dynamic_info, observation_period)
132
+ df_catch_dyn = (
133
+ catchment_based_dynamic_features(
134
+ catchment_dynamic_info,
135
+ intervals,
136
+ )
137
+ if catchment_dynamic_info is not None
138
+ else pd.DataFrame()
139
+ )
104
140
 
105
- # --- Combine all static + dynamic ---
106
- df_combined = pd.concat([df_obs_period, df_res_static, df_catch_static, df_res_dyn, df_catch_dyn], axis=1)
141
+ # --- Combine all features for all intervals in single dataframe ---
142
+ df_out = pd.concat(
143
+ [df_obs, static_block, df_res_dyn, df_catch_dyn],
144
+ axis=1
145
+ ).reset_index(drop=True)
107
146
 
108
- # --- Engineer + log-transform features ---
109
- df_final = engineer_and_transform_features(df_combined)
147
+ # --- Engineer ONLY if requested ---
148
+ if feature_engineering:
149
+ df_out = engineer_and_transform_features(df_out)
110
150
 
111
- return df_final
151
+ return df_out
112
152
 
153
+ @delayed
154
+ def process_one_reservoir(r):
155
+ try:
156
+ df = create_features_per_reservoir(
157
+ idx=r["idx"],
158
+ observation_period=r["observation_period"],
159
+ reservoir_static_params=r["reservoir_static_params"],
160
+ catchment_static_params=None,
161
+ reservoir_dynamic_info=r.get("reservoir_dynamic_info"),
162
+ catchment_dynamic_info=r.get("catchment_dynamic_info"),
163
+ time_interval=r.get("time_interval"),
164
+ feature_engineering=False,
165
+ )
166
+ return r["idx"], df, None
167
+ except Exception as e:
168
+ return r["idx"], pd.DataFrame({"idx": [r["idx"]]}), {str(e):traceback.format_exc()}
113
169
 
114
- def create_features_multi(
115
- reservoirs_input: List[Dict]
170
+ def create_features_multi_reservoirs(
171
+ reservoirs_input: List[Dict],
172
+ error_log: bool = False,
116
173
  ) -> pd.DataFrame:
117
174
  """
118
175
  Compute features for multiple reservoirs using structured input.
@@ -134,25 +191,107 @@ def create_features_multi(
134
191
  Parameters for `reservoir_based_dynamic_features()`.
135
192
  - `catchment_dynamic_info` : dict
136
193
  Parameters for `catchment_based_dynamic_features()`.
137
-
194
+ - `time_interval` : int, optional
195
+ Time interval in years between reservoir observations for dynamic feature calculations.
138
196
 
139
197
  Returns
140
198
  -------
141
199
  pd.DataFrame
142
- Combined DataFrame with one row per reservoir observation.
200
+ Combined DataFrame with one row per reservoir and time intervals
201
+ in the observation period.
143
202
  """
144
203
 
145
- all_rows = []
146
- for idx, reservoir_info in enumerate(reservoirs_input):
147
- df_row = create_features_per_row(
148
- idx=reservoir_info.get("idx"),
149
- observation_period=reservoir_info.get("observation_period"),
150
- reservoir_static_params=reservoir_info.get("reservoir_static_params", {}),
151
- catchment_static_params=reservoir_info.get("catchment_static_params", {}),
152
- reservoir_dynamic_info=reservoir_info.get("reservoir_dynamic_info", None),
153
- catchment_dynamic_info=reservoir_info.get("catchment_dynamic_info", None),
154
- )
155
- all_rows.append(df_row)
204
+ # -------- Collect catchments first (cheap, no tqdm needed)
205
+ catchment_rows = []
206
+
207
+ for r in reservoirs_input:
208
+ c = r["catchment_static_params"]
209
+ catchment_rows.append({
210
+ "idx": r["idx"],
211
+ "CA": c["ca"],
212
+ "DCA": c["dca"],
213
+ "geometry": c["catchment_geometry"],
214
+ })
215
+
216
+ catchments_gdf = gpd.GeoDataFrame(
217
+ catchment_rows, geometry="geometry", crs="EPSG:4326"
218
+ )
219
+
220
+ # -------- Compute catchment static ONCE
221
+ first = reservoirs_input[0]["catchment_static_params"]
156
222
 
157
- df_all = pd.concat(all_rows, axis=0).reset_index(drop=True)
158
- return df_all
223
+ df_catch_static_all = catchment_based_static_features_multi_reservoir(
224
+ catchments_gdf,
225
+ glc_share_path=first["glc_share_path"],
226
+ hwsd2_path=first["hwsd2_path"],
227
+ hilda_veg_freq_path=first["hilda_veg_freq_path"],
228
+ terrain_path=first["terrain_path"],
229
+ )
230
+
231
+ catch_static_lookup = df_catch_static_all.set_index("idx")
232
+ # catch_static_lookup = pd.DataFrame() # Placeholder if not computing
233
+
234
+ # -------- Per-reservoir loop (tqdm HERE)
235
+ tasks = [process_one_reservoir(r) for r in reservoirs_input]
236
+ with ProgressBar():
237
+ results = compute(*tasks, scheduler="processes", num_workers=4)
238
+
239
+ all_reservoirs_static_info = []
240
+ errors = {}
241
+
242
+ for idx, df, err in results:
243
+ all_reservoirs_static_info.append(df)
244
+ if err is not None:
245
+ errors[idx] = err
246
+
247
+ # for r in tqdm(
248
+ # reservoirs_input,
249
+ # total=len(reservoirs_input),
250
+ # desc="Generating per-reservoir features",
251
+ # unit="reservoir",
252
+ # ):
253
+ # try:
254
+ # df = create_features_per_reservoir(
255
+ # idx=r["idx"],
256
+ # observation_period=r["observation_period"],
257
+ # reservoir_static_params=r["reservoir_static_params"],
258
+ # catchment_static_params=None, # already handled
259
+ # reservoir_dynamic_info=r.get("reservoir_dynamic_info"),
260
+ # catchment_dynamic_info=r.get("catchment_dynamic_info"),
261
+ # time_interval=r.get("time_interval"),
262
+ # feature_engineering=False,
263
+ # )
264
+ # all_reservoirs_static_info.append(df)
265
+ # except Exception as e:
266
+ # errors[r["idx"]] = e
267
+ # errors['traceback'] = traceback.print_exc()
268
+ # all_reservoirs_static_info.append(
269
+ # pd.DataFrame({"idx": r["idx"]}) # Append empty DataFrame for failed reservoir
270
+ # )
271
+
272
+
273
+
274
+ # -------- Concatenate static info
275
+ df_all = pd.concat(all_reservoirs_static_info, ignore_index=True)
276
+ # CRITICAL: restore logical ordering
277
+ df_all = df_all.sort_values(
278
+ by=["idx", "OSY"], #
279
+ ascending=[True, True],
280
+ ).reset_index(drop=True)
281
+
282
+
283
+ # -------- Merge static catchment features with dynamic ONCE
284
+ df_all = df_all.merge(
285
+ catch_static_lookup,
286
+ left_on="idx",
287
+ right_index=True,
288
+ how="left",
289
+ )
290
+
291
+ # -------- Engineer ONCE
292
+ df_all = engineer_and_transform_features(df_all)
293
+
294
+ if error_log:
295
+ return df_all, errors
296
+ else:
297
+ return df_all
reclaim/reclaim.py CHANGED
@@ -1,5 +1,6 @@
1
1
  import os
2
2
  import pandas as pd
3
+ from pandas.api.types import is_integer_dtype, is_string_dtype
3
4
  import numpy as np
4
5
  from sklearn.preprocessing import LabelEncoder
5
6
  from sklearn.metrics import r2_score, mean_absolute_error, root_mean_squared_error
@@ -197,6 +198,14 @@ class Reclaim:
197
198
  "Safer to use DataFrame with feature names."
198
199
  )
199
200
 
201
+ if self.cat_features is not None:
202
+ for col in self.cat_features:
203
+ if not (is_integer_dtype(X[col]) or is_string_dtype(X[col])):
204
+ raise ValueError(
205
+ f"Column {col} must be integer or string type, "
206
+ f"found {X[col].dtype}"
207
+ )
208
+
200
209
 
201
210
  # Base model predictions
202
211
  pred_xgb = self.xgb_model.predict(X)
@@ -1,6 +1,39 @@
1
+ import geopandas as gpd
1
2
  import pandas as pd
2
3
 
3
- from reclaim.static_features.utils.catchment_agreggate import compute_catchment_aggregate
4
+ from reclaim.static_features.utils.catchment_agreggate import compute_catchment_aggregate, compute_catchment_aggregate_multi_reservoir
5
+
6
+ # ---- Rename columns to abbreviations
7
+ RENAME_VARIABLE_DICT = {
8
+ # Land cover
9
+ "artificial_surfaces_mean": "LCAS",
10
+ "cropland_mean": "LCC",
11
+ "grassland_mean": "LCG",
12
+ "tree_covered_mean": "LCT",
13
+ "shrubs_covered_mean": "LCS",
14
+ "aquatic_herbaceous_mean": "LCHV",
15
+ "mangroves_mean": "LCM",
16
+ "sparse_vegetation_mean": "LCSV",
17
+ "bare_soil_mean": "LCBS",
18
+ "snow_glaciers_mean": "LCSG",
19
+ "waterbodies_mean": "LCWB",
20
+ "dominant_class_mode": "DLC",
21
+ # Soil
22
+ "COARSE_mean": "COAR",
23
+ "SAND_mean": "SAND",
24
+ "SILT_mean": "SILT",
25
+ "CLAY_mean": "CLAY",
26
+ "BULK_mean": "BULK",
27
+ # Terrain
28
+ "elevation_mean": "ELEV",
29
+ "slope_mean": "SLOP",
30
+ "curvature_mean": "CURV",
31
+ "aspect_mean": "ASP",
32
+ "hillshade_mean": "HILL",
33
+ # HILDA (optional, not mapped to abbreviations yet)
34
+ "vegetation_gain_frequency_mean": "VGF",
35
+ "vegetation_loss_frequency_mean": "VLF",
36
+ }
4
37
 
5
38
  def catchment_based_static_features(
6
39
  ca: float,
@@ -89,39 +122,81 @@ def catchment_based_static_features(
89
122
  merged = pd.concat([glc_df, hwsd_df, hilda_df, terrain_df], axis=1)
90
123
  features.update(merged.to_dict(orient="records")[0])
91
124
 
92
- # ---- Rename columns to abbreviations
93
- rename_dict = {
94
- # Land cover
95
- "artificial_surfaces_mean": "LCAS",
96
- "cropland_mean": "LCC",
97
- "grassland_mean": "LCG",
98
- "tree_covered_mean": "LCT",
99
- "shrubs_covered_mean": "LCS",
100
- "aquatic_herbaceous_mean": "LCHV",
101
- "mangroves_mean": "LCM",
102
- "sparse_vegetation_mean": "LCSV",
103
- "bare_soil_mean": "LCBS",
104
- "snow_glaciers_mean": "LCSG",
105
- "waterbodies_mean": "LCWB",
106
- "dominant_class_mode": "DLC",
107
- # Soil
108
- "COARSE_mean": "COAR",
109
- "SAND_mean": "SAND",
110
- "SILT_mean": "SILT",
111
- "CLAY_mean": "CLAY",
112
- "BULK_mean": "BULK",
113
- # Terrain
114
- "elevation_mean": "ELEV",
115
- "slope_mean": "SLOP",
116
- "curvature_mean": "CURV",
117
- "aspect_mean": "ASP",
118
- "hillshade_mean": "HILL",
119
- # HILDA (optional, not mapped to abbreviations yet)
120
- "vegetation_gain_frequency_mean": "VGF",
121
- "vegetation_loss_frequency_mean": "VLF",
125
+ # Apply renaming
126
+ features_df = pd.DataFrame([features]).rename(columns=RENAME_VARIABLE_DICT)
127
+
128
+ return features_df
129
+
130
+ def catchment_based_static_features_multi_reservoir(
131
+ catchments_gdf: gpd.GeoDataFrame,
132
+ glc_share_path: str,
133
+ hwsd2_path: str,
134
+ hilda_veg_freq_path: str,
135
+ terrain_path: str,
136
+ ) -> pd.DataFrame:
137
+ """
138
+ Compute catchment-based static features for MULTIPLE reservoirs efficiently.
139
+
140
+ Parameters
141
+ ----------
142
+ catchments_gdf : geopandas.GeoDataFrame
143
+ Must contain columns:
144
+ - idx
145
+ - CA
146
+ - DCA
147
+ - geometry
148
+ glc_share_path : str
149
+ Path to the GLC-Share NetCDF file (land cover fractions).
150
+ hwsd2_path : str
151
+ Path to the HWSD2 NetCDF file (soil composition).
152
+ hilda_veg_freq_path : str
153
+ Path to the HILDA vegetation frequency NetCDF file.
154
+ terrain_path : str
155
+ Path to the terrain NetCDF file (DEM derivatives).
156
+
157
+ Returns
158
+ -------
159
+ pd.DataFrame
160
+ A single-row DataFrame with abbreviations as columns:
161
+ - CA, DCA, LCAS, LCC, LCG, LCT, LCS, LCHV, LCM, LCSV,
162
+ LCBS, LCSG, LCWB, DLC, COAR, SAND, SILT, CLAY, BULK,
163
+ ELEV, SLOP, CURV, ASP, HILL, VGF, VLF
164
+ """
165
+
166
+ base = catchments_gdf[["idx", "CA", "DCA"]].set_index("idx")
167
+
168
+ # --- Land cover
169
+ glc_dict = {
170
+ "artificial_surfaces": "mean",
171
+ "cropland": "mean",
172
+ "grassland": "mean",
173
+ "tree_covered": "mean",
174
+ "shrubs_covered": "mean",
175
+ "aquatic_herbaceous": "mean",
176
+ "mangroves": "mean",
177
+ "sparse_vegetation": "mean",
178
+ "bare_soil": "mean",
179
+ "snow_glaciers": "mean",
180
+ "waterbodies": "mean",
181
+ "dominant_class": "mode",
122
182
  }
123
183
 
124
- # Apply renaming
125
- features_df = pd.DataFrame([features]).rename(columns=rename_dict)
184
+ glc = compute_catchment_aggregate_multi_reservoir(
185
+ glc_share_path, catchments_gdf, glc_dict
186
+ )
187
+
188
+ hwsd = compute_catchment_aggregate_multi_reservoir(
189
+ hwsd2_path, catchments_gdf, "mean"
190
+ )
191
+
192
+ hilda = compute_catchment_aggregate_multi_reservoir(
193
+ hilda_veg_freq_path, catchments_gdf, "mean"
194
+ )
195
+
196
+ terrain = compute_catchment_aggregate_multi_reservoir(
197
+ terrain_path, catchments_gdf, "mean"
198
+ )
199
+
200
+ df = pd.concat([base, glc, hwsd, hilda, terrain], axis=1)
126
201
 
127
- return features_df
202
+ return df.rename(columns=RENAME_VARIABLE_DICT).reset_index()
@@ -1,9 +1,10 @@
1
1
  import pandas as pd
2
+ import geopandas as gpd
2
3
  from shapely.geometry import Point, Polygon
3
4
  import numpy as np
4
5
 
5
6
  # Import utils
6
- from reclaim.static_features.utils.flow_length import find_actual_flow_path
7
+ from reclaim.static_features.utils.flow_length import find_actual_flow_path, plot_flow_length_with_reservoir
7
8
  from reclaim.static_features.utils.area_perimeter import calculate_length_area_meters
8
9
  from reclaim.static_features.utils.aec_shape import concavity_index, mean_curvature, mean_slope
9
10
 
@@ -18,7 +19,8 @@ def reservoir_based_static_features(
18
19
  reservoir_polygon: Polygon = None,
19
20
  inlet_point: Point = None,
20
21
  resolution: float = None,
21
- aec_df: pd.DataFrame = None
22
+ aec_df: pd.DataFrame = None,
23
+ savepath_flowpath_fig: str = None,
22
24
  ) -> pd.DataFrame:
23
25
  """
24
26
  Compute reservoir-based features for RECLAIM input dataset.
@@ -47,6 +49,8 @@ def reservoir_based_static_features(
47
49
  Spatial resolution used in flow length calculations.
48
50
  aec_df : pd.DataFrame, optional
49
51
  Area-Elevation Curve dataframe with columns ['area', 'elevation'].
52
+ savepath_flowpath_fig : str, optional
53
+ Path to save the flow path figure, optional.
50
54
 
51
55
  Returns
52
56
  -------
@@ -82,26 +86,51 @@ def reservoir_based_static_features(
82
86
  }
83
87
 
84
88
  # Area and Perimeter
85
- if reservoir_polygon is not None:
89
+ if reservoir_polygon is not None and not reservoir_polygon.is_empty:
86
90
  features["RP"], features["RA"] = calculate_length_area_meters(reservoir_polygon, area=True)
87
91
  features["RA"] = features["RA"] / 1e6 # m2 → km2
88
92
  features["RP"] = features["RP"] / 1e3 # m → km
93
+ else:
94
+ features["RP"] = np.nan
95
+ features["RA"] = np.nan
89
96
 
90
97
  # Flow Length
91
98
  dam_point = Point(lon, lat)
92
- if dam_point is not None and reservoir_polygon is not None:
93
- _, _, features["FL"], _ = (
94
- find_actual_flow_path(dam_point, reservoir_polygon, inlet_point, resolution)
95
- )
96
- if features["FL"]:
97
- features["FL"] = calculate_length_area_meters(features["FL"], area=False) / 1e3 # m → km
98
- else:
99
- features["FL"] = np.nan
99
+ if dam_point is not None and reservoir_polygon is not None and not reservoir_polygon.is_empty:
100
+ try:
101
+ simplified_reservoir, far_end_point, flow_path, _ = (
102
+ find_actual_flow_path(dam_point, reservoir_polygon, inlet_point, resolution)
103
+ )
104
+ if savepath_flowpath_fig is not None:
105
+ plot_flow_length_with_reservoir(
106
+ dam_point,
107
+ reservoir_polygon,
108
+ far_end_point,
109
+ flow_path,
110
+ simplified_reservoir,
111
+ savepath_flowpath_fig
112
+ )
113
+ if flow_path is not None:
114
+ gseries = gpd.GeoSeries([flow_path], crs="EPSG:4326")
115
+ gseries = gseries.to_crs(epsg=3395)
100
116
 
117
+ features["FL"] = gseries.length.iloc[0] / 1e3 # m → km
118
+ else:
119
+ features["FL"] = np.nan
120
+ except Exception as e:
121
+ print(f"Flow length calculation failed: {e}")
122
+ features["FL"] = np.nan
123
+ else:
124
+ features["FL"] = np.nan
125
+
101
126
  # AEC metrics
102
- if aec_df is not None:
127
+ if isinstance(aec_df, pd.DataFrame) and not aec_df.empty:
103
128
  features["AECS"] = mean_slope(aec_df)
104
129
  features["AECC"] = mean_curvature(aec_df)
105
130
  features["AECI"] = concavity_index(aec_df)
131
+ else:
132
+ features["AECS"] = np.nan
133
+ features["AECC"] = np.nan
134
+ features["AECI"] = np.nan
106
135
 
107
136
  return pd.DataFrame([features])
@@ -0,0 +1,78 @@
1
+ ID_to_BASIN = {1030008110: 'RUFIJI',
2
+ 1030011660: 'ZAMBEZI',
3
+ 1030012590: 'LIMPOPO',
4
+ 1030012600: 'GOURITZ',
5
+ 1030015850: 'ORANGE',
6
+ 1030022420: 'NIGER',
7
+ 1030022430: 'OUEME',
8
+ 1030023300: 'VOLTA',
9
+ 1030023310: 'BANDAMA',
10
+ 1030027430: 'DRAA',
11
+ 1030029810: 'MOULOUYA',
12
+ 1030034260: 'NILE',
13
+ 1030040260: 'LAKE TURKANA',
14
+ 1030040300: 'MELRHIR',
15
+ 4030003020: 'LIAO HE',
16
+ 4030006940: 'HAI HE',
17
+ 4030007850: 'YELLOW RIVER',
18
+ 4030009880: 'YANGTZE',
19
+ 4030017020: 'MEKONG',
20
+ 4030017030: 'CHAO PHRAYA',
21
+ 4030018350: 'MAE KLONG',
22
+ 4030022790: 'IRRAWADDY',
23
+ 4030024190: 'KARNAPHULI',
24
+ 4030025450: 'GANGES',
25
+ 4030025460: 'GODAVARI',
26
+ 4030029530: 'NARMADA',
27
+ 4030031750: 'KUTCH',
28
+ 4030033640: 'INDUS',
29
+ 4030039450: 'TONE',
30
+ 4030046370: 'TAIWAN',
31
+ 4030048900: 'SRI LANKA',
32
+ 4030050220: 'UPPER ARAL SEA',
33
+ 4030050240: 'LOWER ARAL SEA',
34
+ 5030007690: 'BENGAWAN SOLO',
35
+ 5030015660: 'KAPUAS',
36
+ 5030031240: 'INDONESIA',
37
+ 5030055010: 'MARIANA_ISLANDS',
38
+ 5030067860: 'FITZROY',
39
+ 2030000010: 'ORONTES',
40
+ 2030003440: 'KIZILIRMAK',
41
+ 2030008490: 'DANUBE',
42
+ 2030009230: 'MARITSA RIVER',
43
+ 2030012730: 'PO',
44
+ 2030014550: 'TEVERE',
45
+ 2030016230: 'RHONE',
46
+ 2030020320: 'RHINE',
47
+ 2030026030: 'VISTULA',
48
+ 2030046500: 'SICILY',
49
+ 2030047500: 'SARDINIA_CORSICA',
50
+ 2030068690: 'KURA',
51
+ 2030073570: 'SHATT AL ARAB',
52
+ 2030085720: 'DEAD SEA',
53
+ 7030000010: 'SANTIAGO',
54
+ 7030008710: 'COLORADO (also COLORADO RIVER)',
55
+ 7030008720: 'SAN JOAQUIN',
56
+ 7030014250: 'ROGUE',
57
+ 7030014930: 'COLUMBIA',
58
+ 7030022240: 'NELSON',
59
+ 7030034520: 'SAINT LAWRENCE (also SAINT-LAURENT)',
60
+ 7030038340: 'SUSQUEHANNA',
61
+ 7030042040: 'MOBILE RIVER',
62
+ 7030047060: 'MISSISSIPPI',
63
+ 7030047840: 'BRAZOS',
64
+ 7030049270: 'RIO GRANDE (also BRAVO)',
65
+ 7030049280: 'USUMACINTA',
66
+ 7030073620: 'GREAT SALT LAKE',
67
+ 6030000010: 'ATRATO',
68
+ 6030004470: 'ORINOCO',
69
+ 6030007000: 'AMAZON (also AMAZONAS)',
70
+ 6030009770: 'JAGUARIBE',
71
+ 6030011780: 'SAO FRANCISCO',
72
+ 6030011790: 'DOCE',
73
+ 6030016970: 'PARANA',
74
+ 6030029280: 'CHIRA',
75
+ 6030032290: 'GUAYAS'}
76
+
77
+ def get_basin_name(basin_id):
78
+ return ID_to_BASIN[basin_id]