pyreclaim 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
reclaim/reclaim.py ADDED
@@ -0,0 +1,503 @@
1
+ import os
2
+ import pandas as pd
3
+ import numpy as np
4
+ from sklearn.preprocessing import LabelEncoder
5
+ from sklearn.metrics import r2_score, mean_absolute_error, root_mean_squared_error
6
+ import joblib
7
+
8
+ # Model libraries
9
+ import xgboost as xgb
10
+ import lightgbm as lgb
11
+ from catboost import Pool, CatBoostRegressor
12
+
13
+
14
+ class Reclaim:
15
+ """
16
+ A stacked ensemble predictor for Sedimentation Rate (SR) combining
17
+ XGBoost, LightGBM, and CatBoost base models with a meta-model.
18
+
19
+ Parameters
20
+ ----------
21
+ model : string, optional
22
+ Any regression model to use for model predictions (default: ensemble of base models).
23
+ It can be "ensemble", "XGBoost", "LightGBM", or "CatBoost".
24
+ feature_order_list : list, optional
25
+ List of feature names in the order they should be used in the model.
26
+ """
27
+ def __init__(self, model=None, feature_order_list=None):
28
+ self.feature_order_list = feature_order_list
29
+ self.xgb_model = None
30
+ self.lgb_model = None
31
+ self.cat_model = None
32
+ self.main_model = model if model in ['XGBoost', 'LightGBM', 'CatBoost'] else 'ensemble'
33
+ self.cat_features = None
34
+ self.feature_order_list = None
35
+
36
+ def fit(self, X_train, y_train, weight_train=None, cat_features=None,
37
+ X_val=None, y_val=None, weight_val=None):
38
+ """
39
+ Train the stacked ensemble model.
40
+
41
+ Parameters
42
+ ----------
43
+ X_train : pd.DataFrame or np.array
44
+ Features for training the base models.
45
+ y_train : pd.Series or np.array
46
+ Target variable for training.
47
+ weight_train : pd.Series or np.array, optional
48
+ Sample weights for training base models.
49
+ cat_features : list, optional
50
+ List of categorical feature indices for CatBoost.
51
+ X_val : pd.DataFrame or np.array, optional
52
+ Validation features for early stopping.
53
+ y_val : pd.Series or np.array, optional
54
+ Validation target for early stopping.
55
+ weight_val : pd.Series or np.array, optional
56
+ Validation sample weights.
57
+ """
58
+ self.cat_features = cat_features
59
+
60
+ if isinstance(X_train, pd.DataFrame):
61
+ # Store the column order
62
+ self.feature_order_list = list(X_train.columns)
63
+ elif isinstance(X_train, np.ndarray):
64
+ if self.feature_order_list is None:
65
+ raise ValueError(
66
+ "X_train is a NumPy array without column names. "
67
+ "Please provide 'feature_order_list' explicitly when creating model instance."
68
+ )
69
+
70
+ # ---- Train XGBoost ----
71
+ self.xgb_model = xgb.XGBRegressor(
72
+ n_estimators=800,
73
+ learning_rate=0.05,
74
+ tweedie_variance_power=1.5,
75
+ max_depth=5,
76
+ subsample=0.7,
77
+ colsample_bytree=0.7,
78
+ objective='reg:tweedie',
79
+ reg_alpha=70,
80
+ reg_lambda=30,
81
+ # objective='reg:squaredlogerror', # robust for skewed targets
82
+ random_state=42,
83
+ n_jobs=-1
84
+ )
85
+ self.xgb_model.fit(
86
+ X_train, y_train,
87
+ sample_weight=weight_train,
88
+ eval_set=[(X_val, y_val)] if X_val is not None else None,
89
+ sample_weight_eval_set=[weight_val] if weight_val is not None else None,
90
+ verbose=False
91
+ )
92
+
93
+ # ---- Train LightGBM ----
94
+ train_data = lgb.Dataset(X_train, label=y_train, weight=weight_train)
95
+ val_data = lgb.Dataset(X_val, label=y_val, weight=weight_val, reference=train_data) if X_val is not None else None
96
+ self.lgb_model = lgb.train(
97
+ {
98
+ 'objective': 'tweedie',
99
+ 'tweedie_variance_power': 1.7,
100
+ 'metric': 'rmse',
101
+ 'learning_rate': 0.01,
102
+ 'num_leaves': 31,
103
+ 'feature_fraction': 0.7,
104
+ 'bagging_fraction': 0.7,
105
+ 'bagging_freq': 5,
106
+ 'seed': 42,
107
+ 'verbosity': -1,
108
+ 'lambda_l1': 70,
109
+ 'lambda_l2': 5,
110
+ },
111
+ train_data,
112
+ num_boost_round=2000,
113
+ valid_sets=[val_data] if val_data is not None else None
114
+ )
115
+
116
+ # ---- Train CatBoost ----
117
+ train_pool = Pool(
118
+ data=X_train,
119
+ label=y_train,
120
+ weight=weight_train,
121
+ cat_features=cat_features
122
+ )
123
+ val_pool = Pool(
124
+ data=X_val,
125
+ label=y_val,
126
+ weight=weight_val,
127
+ cat_features=cat_features
128
+ ) if X_val is not None else None
129
+
130
+ self.cat_model = CatBoostRegressor(
131
+ iterations=2000,
132
+ learning_rate=0.12,
133
+ depth=6,
134
+ objective='Huber:delta=12.0',
135
+ l2_leaf_reg=8,
136
+ random_seed=42,
137
+ eval_metric='MAE',
138
+ verbose=100
139
+ )
140
+ self.cat_model.fit(
141
+ train_pool,
142
+ eval_set=val_pool,
143
+ early_stopping_rounds=100,
144
+ use_best_model=True
145
+ )
146
+
147
+
148
+ def predict(self, X, log_transform=True, dynamic_weight=True, threshold=30, sat_point=70, smooth_factor=0.2, return_weights=False):
149
+ """
150
+ Predict using a stacked ensemble with dynamic, instance-wise weights using sigmoid scaling.
151
+
152
+ Weighting Rules
153
+ ---------------
154
+ 1. Above threshold (CatBoost as reference):
155
+ - CatBoost weight fixed at 0.6
156
+ - XGBoost weight decays sigmoid-shaped from 0.15 → 0.05
157
+ - LightGBM weight grows sigmoid-shaped from 0.25 → 0.35
158
+ - Saturation occurs near `sat_point`
159
+ 2. Below threshold:
160
+ - CatBoost weight grows sigmoid-shaped 0.30 → 0.55 near threshold
161
+ - XGBoost weight decays sigmoid-shaped 0.45 → 0.25 farther below threshold
162
+ - LightGBM weight grows sigmoid-shaped 0.25 → 0.30 near threshold
163
+
164
+ Sigmoid scaling ensures smooth transitions instead of abrupt linear changes.
165
+
166
+ Parameters
167
+ ----------
168
+ X : pd.DataFrame or np.array
169
+ Features for prediction.
170
+ log_transform : bool
171
+ If True, apply log1p to stabilize high-value predictions.
172
+ dynamic_weight : bool
173
+ If True, use instance-wise weights based on CatBoost prediction.
174
+ threshold : float
175
+ Threshold separating low/high predictions.
176
+ sat_point : float
177
+ Point where above-threshold weights saturate (~70).
178
+ smooth_factor : float
179
+ Controls the sharpness of the sigmoid transition.
180
+
181
+ Returns
182
+ -------
183
+ np.array
184
+ Blended predictions.
185
+ pd.DataFrame
186
+ Weights used for the three models.
187
+ """
188
+ if isinstance(X, pd.DataFrame):
189
+ if self.feature_order_list is not None:
190
+ # Reorder columns automatically
191
+ X = X[self.feature_order_list]
192
+ elif isinstance(X, np.ndarray):
193
+ warnings.warn(
194
+ "Predicting with NumPy array: assumes column order matches training order. "
195
+ "Safer to use DataFrame with feature names."
196
+ )
197
+
198
+
199
+ # Base model predictions
200
+ pred_xgb = self.xgb_model.predict(X)
201
+ pred_lgb = self.lgb_model.predict(X)
202
+ pred_cat = self.cat_model.predict(X)
203
+
204
+ if self.main_model == 'ensemble':
205
+
206
+ # Log-space transform if needed
207
+ if log_transform:
208
+ pred_xgb = np.log1p(pred_xgb)
209
+ pred_lgb = np.log1p(pred_lgb)
210
+ pred_cat = np.log1p(pred_cat)
211
+ threshold = np.log1p(threshold)
212
+ sat_point = np.log1p(sat_point)
213
+
214
+ if dynamic_weight:
215
+ blended_preds = []
216
+ weights = []
217
+
218
+ for px, pl, pc in zip(pred_xgb, pred_lgb, pred_cat):
219
+ if pc >= threshold:
220
+ # Above threshold: sigmoid scales XGB/LGB weights from threshold → sat_point
221
+ sig = 1 / (1 + np.exp(-smooth_factor * (pc - threshold))) # sigmoid at current pc
222
+ sig_sat = 1 / (1 + np.exp(-smooth_factor * (sat_point - threshold))) # sigmoid at saturation
223
+ factor = (sig - 0.5) / (sig_sat - 0.5) # scale so 0 → threshold, 1 → sat_point
224
+ factor = np.clip(factor, 0, 1)
225
+
226
+ w_cat = 0.6
227
+ w_xgb = 0.15 - 0.10 * factor # decays 0.15 → 0.05
228
+ w_lgb = 0.25 + 0.10 * factor # grows 0.25 → 0.35
229
+
230
+ else:
231
+ # Below threshold: sigmoid scales weights from 0 → threshold
232
+ sig = 1 / (1 + np.exp(-smooth_factor * (pc))) # raw sigmoid
233
+ sig_min = 1 / (1 + np.exp(-smooth_factor * 0)) # sigmoid at 0
234
+ sig_max = 1 / (1 + np.exp(-smooth_factor * threshold)) # sigmoid at threshold
235
+ sig_scaled = (sig - sig_min) / (sig_max - sig_min) # scale 0 → 1
236
+ sig_scaled = np.clip(sig_scaled, 0, 1)
237
+
238
+ w_cat = 0.30 + 0.25 * sig_scaled # grows 0.30 → 0.55
239
+ w_xgb = 0.45 - 0.20 * sig_scaled # decays 0.45 → 0.25
240
+ w_lgb = 0.25 + 0.05 * sig_scaled # grows 0.25 → 0.30
241
+
242
+ # Normalize weights
243
+ total = w_cat + w_xgb + w_lgb
244
+ w_cat, w_xgb, w_lgb = w_cat/total, w_xgb/total, w_lgb/total
245
+ weights.append([w_xgb, w_lgb, w_cat])
246
+
247
+ # Weighted prediction
248
+ blended_preds.append(w_cat * pc + w_xgb * px + w_lgb * pl)
249
+
250
+ weight_df = pd.DataFrame(weights, columns=['XGBoost','LightGBM','CatBoost'])
251
+ pred_blend = np.array(blended_preds)
252
+
253
+ else:
254
+ # Simple average
255
+ pred_blend = (pred_xgb + pred_lgb + pred_cat) / 3
256
+
257
+ # Convert back from log-space if applied
258
+ if log_transform:
259
+ pred_blend = np.expm1(pred_blend)
260
+
261
+ if return_weights:
262
+ return (pred_blend,weight_df)
263
+ else:
264
+ return pred_blend
265
+
266
+ elif self.main_model == 'XGBoost':
267
+ return pred_xgb
268
+ elif self.main_model == 'LightGBM':
269
+ return pred_lgb
270
+ elif self.main_model == 'CatBoost':
271
+ return pred_cat
272
+ else:
273
+ return None
274
+
275
+
276
+ def evaluate(self, X, y_true):
277
+ """
278
+ Evaluate the ensemble model on a dataset.
279
+
280
+ Parameters
281
+ ----------
282
+ X : pd.DataFrame or np.array
283
+ Features for evaluation.
284
+ y_true : pd.Series or np.array
285
+ True target values.
286
+
287
+ Returns
288
+ -------
289
+ dict
290
+ Dictionary containing RMSE, MAE, and R2 metrics.
291
+ """
292
+ preds = self.predict(X)
293
+ rmse = root_mean_squared_error(y_true, preds)
294
+ mae = mean_absolute_error(y_true, preds)
295
+ r2 = r2_score(y_true, preds)
296
+ return {'RMSE': rmse, 'MAE': mae, 'R2': r2}
297
+
298
+ def eval_metrics(self, y_true, y_pred):
299
+ rmse = root_mean_squared_error(y_true, y_pred)
300
+ mae = mean_absolute_error(y_true, y_pred)
301
+ r2 = r2_score(y_true, y_pred)
302
+ return {'RMSE': rmse, 'MAE': mae, 'R2': r2}
303
+
304
+ def _extract_importance(self, model, model_name):
305
+ """Helper to get importance + feature names for any base model."""
306
+ if model is None:
307
+ raise ValueError(f"{model_name} model is not trained or assigned.")
308
+
309
+ if model_name == "xgb":
310
+ # XGBoost can be sklearn wrapper or Booster
311
+ try:
312
+ imp = model.feature_importances_
313
+ names = getattr(model, "feature_names_in_", np.arange(len(imp)))
314
+ except AttributeError:
315
+ imp_dict = model.get_score(importance_type="weight")
316
+ names, imp = zip(*imp_dict.items())
317
+ imp = np.array(imp)
318
+ elif model_name == "lgb":
319
+ try:
320
+ imp = model.feature_importance(importance_type="split")
321
+ names = model.feature_name()
322
+ except AttributeError:
323
+ imp = model.feature_importances_
324
+ names = getattr(model, "feature_names_in_", np.arange(len(imp)))
325
+ elif model_name == "cat":
326
+ try:
327
+ imp = model.get_feature_importance()
328
+ names = model.feature_names_
329
+ except AttributeError:
330
+ imp = model.feature_importances_
331
+ names = getattr(model, "feature_names_in_", np.arange(len(imp)))
332
+ else:
333
+ raise ValueError(f"Unknown model name {model_name}")
334
+
335
+ return np.array(imp, dtype=float), np.array(names)
336
+
337
+ def get_feature_importance(self, model_name: str = "average", normalize: bool = True, percentage: bool = False, weights=None):
338
+ """
339
+ Get feature importance from base models or their weighted average.
340
+
341
+ Parameters
342
+ ----------
343
+ model_name : str, default="average"
344
+ - "average": return importance across all models (with weighted average column)
345
+ - "xgb", "lgb", "cat": return importance from that specific model
346
+ normalize : bool, default=True
347
+ Whether to normalize importances so they sum to 1 for each model
348
+ (before averaging in case of "average").
349
+ percentage : bool, default=False
350
+ Whether to scale importances to percentages (0–100).
351
+ - For "average": returns DataFrame with each model + weighted average.
352
+ - For single model: returns Series.
353
+ weights : list of float or None, default=None
354
+ Weights for ["xgb", "lgb", "cat"] when computing the average.
355
+ - If None, defaults to equal weights for available models.
356
+ - Length must equal the number of models used.
357
+
358
+ Returns
359
+ -------
360
+ pd.DataFrame or pd.Series
361
+ - If model_name="average": DataFrame with each model + weighted average.
362
+ - If specific model: Series with feature importances.
363
+ """
364
+ models = {
365
+ "xgb": self.xgb_model,
366
+ "lgb": self.lgb_model,
367
+ "cat": self.cat_model
368
+ }
369
+
370
+ if model_name == "average":
371
+ df_list = []
372
+ available_models = []
373
+ for name, model in models.items():
374
+ if model is None:
375
+ continue
376
+ imp, names = self._extract_importance(model, name)
377
+ imp_series = pd.Series(imp, index=names)
378
+
379
+ if normalize or percentage:
380
+ imp_series = imp_series / imp_series.sum()
381
+ if percentage:
382
+ imp_series = imp_series * 100
383
+
384
+ df = pd.DataFrame({name: imp_series})
385
+ df_list.append(df)
386
+ available_models.append(name)
387
+
388
+ if not df_list:
389
+ raise ValueError("No fitted models found with feature importance.")
390
+
391
+ # Merge on feature names
392
+ all_importances = pd.concat(df_list, axis=1).fillna(0)
393
+
394
+ # Handle weights
395
+ if weights is None:
396
+ weights = [1.0] * len(available_models)
397
+ if len(weights) != len(available_models):
398
+ raise ValueError(f"Length of weights ({len(weights)}) does not match number of available models ({len(available_models)}).")
399
+
400
+ weights = np.array(weights, dtype=float)
401
+ weights = weights / weights.sum() # normalize weights to sum = 1
402
+
403
+ # Compute weighted average
404
+ all_importances["average"] = (all_importances[available_models] * weights).sum(axis=1)
405
+
406
+ # Sort by average importance
407
+ all_importances = all_importances.sort_values("average", ascending=False)
408
+
409
+ return all_importances
410
+
411
+ else:
412
+ if model_name not in models:
413
+ raise ValueError(f"Invalid model_name '{model_name}'. Choose from 'xgb', 'lgb', 'cat', or 'average'.")
414
+ model = models[model_name]
415
+ if model is None:
416
+ raise ValueError(f"{model_name} model is not trained or assigned.")
417
+ imp, names = self._extract_importance(model, model_name)
418
+ imp_series = pd.Series(imp, index=names, name=f"{model_name}_importance").sort_values(ascending=False)
419
+
420
+ if normalize or percentage:
421
+ imp_series = imp_series / imp_series.sum()
422
+ if percentage:
423
+ imp_series = imp_series * 100
424
+
425
+ return imp_series
426
+ def save_model(self, save_dir: str = "models", prefix: str = "v1"):
427
+ """
428
+ Save trained models (XGBoost, LightGBM, CatBoost) and metadata.
429
+
430
+ Parameters
431
+ ----------
432
+ save_dir : str, default="models"
433
+ Directory to save the models.
434
+ prefix : str, default="v1"
435
+ Prefix for filenames.
436
+ """
437
+ os.makedirs(save_dir, exist_ok=True)
438
+
439
+ # Save XGBoost
440
+ if self.xgb_model is not None:
441
+ self.xgb_model.save_model(os.path.join(save_dir, f"{prefix}_xgb.json"))
442
+
443
+ # Save LightGBM
444
+ if self.lgb_model is not None:
445
+ self.lgb_model.save_model(os.path.join(save_dir, f"{prefix}_lgb.txt"))
446
+
447
+ # Save CatBoost
448
+ if self.cat_model is not None:
449
+ self.cat_model.save_model(os.path.join(save_dir, f"{prefix}_cat.cbm"))
450
+
451
+ # Save metadata (like which model is primary, cat_features)
452
+ metadata = {
453
+ "main_model": self.main_model,
454
+ "cat_features": self.cat_features,
455
+ "feature_order_list": self.feature_order_list
456
+ }
457
+ joblib.dump(metadata, os.path.join(save_dir, f"{prefix}_meta.pkl"))
458
+
459
+ print(f"Models saved successfully in '{save_dir}'")
460
+
461
+ def load_model(self, load_dir: str = None, prefix: str = "reclaim"):
462
+ """
463
+ Load trained models (XGBoost, LightGBM, CatBoost) and metadata.
464
+
465
+ Parameters
466
+ ----------
467
+ load_dir : str, optional
468
+ Directory where models are stored.
469
+ If None, defaults to the installed package's `pretrained_model` folder.
470
+ prefix : str, default="reclaim"
471
+ Prefix for filenames.
472
+ """
473
+ if load_dir is None:
474
+ # Default: look inside the package directory
475
+ package_dir = os.path.dirname(__file__) # folder of this file
476
+ load_dir = os.path.join(package_dir, "pretrained_model")
477
+
478
+ # Load XGBoost
479
+ xgb_path = os.path.join(load_dir, f"{prefix}_xgb.json")
480
+ if os.path.exists(xgb_path):
481
+ self.xgb_model = xgb.XGBRegressor()
482
+ self.xgb_model.load_model(xgb_path)
483
+
484
+ # Load LightGBM
485
+ lgb_path = os.path.join(load_dir, f"{prefix}_lgb.txt")
486
+ if os.path.exists(lgb_path):
487
+ self.lgb_model = lgb.Booster(model_file=lgb_path)
488
+
489
+ # Load CatBoost
490
+ cat_path = os.path.join(load_dir, f"{prefix}_cat.cbm")
491
+ if os.path.exists(cat_path):
492
+ self.cat_model = CatBoostRegressor()
493
+ self.cat_model.load_model(cat_path)
494
+
495
+ # Load metadata
496
+ meta_path = os.path.join(load_dir, f"{prefix}_meta.pkl")
497
+ if os.path.exists(meta_path):
498
+ metadata = joblib.load(meta_path)
499
+ self.main_model = metadata.get("main_model", "ensemble")
500
+ self.cat_features = metadata.get("cat_features", None)
501
+ self.feature_order_list = metadata.get("feature_order_list", None)
502
+
503
+ print(f"Models loaded successfully from '{load_dir}'")
@@ -0,0 +1 @@
1
+ """Static features for RECLAIM :no-index:"""
@@ -0,0 +1,127 @@
1
+ import pandas as pd
2
+
3
+ from reclaim.static_features.utils.catchment_agreggate import compute_catchment_aggregate
4
+
5
+ def catchment_based_static_features(
6
+ ca: float,
7
+ dca: float,
8
+ catchment_geometry,
9
+ glc_share_path: str,
10
+ hwsd2_path: str,
11
+ hilda_veg_freq_path: str,
12
+ terrain_path: str,
13
+ ) -> pd.DataFrame:
14
+ """
15
+ Compute catchment-based features for a reservoir.
16
+
17
+ Parameters
18
+ ----------
19
+ ca : float
20
+ Catchment Area (sq km).
21
+ dca : float
22
+ Differential Catchment Area (sq km).
23
+ catchment_geometry : shapely.geometry.Polygon or GeoSeries
24
+ Catchment polygon.
25
+ glc_share_path : str
26
+ Path to the GLC-Share NetCDF file (land cover fractions).
27
+ hwsd2_path : str
28
+ Path to the HWSD2 NetCDF file (soil composition).
29
+ hilda_veg_freq_path : str
30
+ Path to the HILDA vegetation frequency NetCDF file.
31
+ terrain_path : str
32
+ Path to the terrain NetCDF file (DEM derivatives).
33
+
34
+ Returns
35
+ -------
36
+ pd.DataFrame
37
+ A single-row DataFrame with abbreviations as columns:
38
+ - CA, DCA, LCAS, LCC, LCG, LCT, LCS, LCHV, LCM, LCSV,
39
+ LCBS, LCSG, LCWB, DLC, COAR, SAND, SILT, CLAY, BULK,
40
+ ELEV, SLOP, CURV, ASP, HILL, VGF, VLF
41
+
42
+ """
43
+
44
+ features = {"CA": ca, "DCA": dca}
45
+
46
+ # ---- Land cover (GLC-Share)
47
+ glc_dict = {
48
+ "artificial_surfaces": "mean",
49
+ "cropland": "mean",
50
+ "grassland": "mean",
51
+ "tree_covered": "mean",
52
+ "shrubs_covered": "mean",
53
+ "aquatic_herbaceous": "mean",
54
+ "mangroves": "mean",
55
+ "sparse_vegetation": "mean",
56
+ "bare_soil": "mean",
57
+ "snow_glaciers": "mean",
58
+ "waterbodies": "mean",
59
+ "dominant_class": "mode",
60
+ }
61
+ glc_df = compute_catchment_aggregate(
62
+ netcdf_path=glc_share_path,
63
+ catchment_geometry=catchment_geometry,
64
+ function_type=glc_dict,
65
+ )
66
+
67
+ # ---- Soil composition (HWSD2)
68
+ hwsd_df = compute_catchment_aggregate(
69
+ netcdf_path=hwsd2_path,
70
+ catchment_geometry=catchment_geometry,
71
+ function_type="mean",
72
+ )
73
+
74
+ # ---- HILDA vegetation frequency
75
+ hilda_df = compute_catchment_aggregate(
76
+ netcdf_path=hilda_veg_freq_path,
77
+ catchment_geometry=catchment_geometry,
78
+ function_type="mean",
79
+ )
80
+
81
+ # ---- Terrain
82
+ terrain_df = compute_catchment_aggregate(
83
+ netcdf_path=terrain_path,
84
+ catchment_geometry=catchment_geometry,
85
+ function_type="mean",
86
+ )
87
+
88
+ # Merge everything
89
+ merged = pd.concat([glc_df, hwsd_df, hilda_df, terrain_df], axis=1)
90
+ features.update(merged.to_dict(orient="records")[0])
91
+
92
+ # ---- Rename columns to abbreviations
93
+ rename_dict = {
94
+ # Land cover
95
+ "artificial_surfaces_mean": "LCAS",
96
+ "cropland_mean": "LCC",
97
+ "grassland_mean": "LCG",
98
+ "tree_covered_mean": "LCT",
99
+ "shrubs_covered_mean": "LCS",
100
+ "aquatic_herbaceous_mean": "LCHV",
101
+ "mangroves_mean": "LCM",
102
+ "sparse_vegetation_mean": "LCSV",
103
+ "bare_soil_mean": "LCBS",
104
+ "snow_glaciers_mean": "LCSG",
105
+ "waterbodies_mean": "LCWB",
106
+ "dominant_class_mode": "DLC",
107
+ # Soil
108
+ "COARSE_mean": "COAR",
109
+ "SAND_mean": "SAND",
110
+ "SILT_mean": "SILT",
111
+ "CLAY_mean": "CLAY",
112
+ "BULK_mean": "BULK",
113
+ # Terrain
114
+ "elevation_mean": "ELEV",
115
+ "slope_mean": "SLOP",
116
+ "curvature_mean": "CURV",
117
+ "aspect_mean": "ASP",
118
+ "hillshade_mean": "HILL",
119
+ # HILDA (optional, not mapped to abbreviations yet)
120
+ "vegetation_gain_frequency_mean": "VGF",
121
+ "vegetation_loss_frequency_mean": "VLF",
122
+ }
123
+
124
+ # Apply renaming
125
+ features_df = pd.DataFrame([features]).rename(columns=rename_dict)
126
+
127
+ return features_df