PyPI - pyreclaim - Versions diffs - 0.1.0__py3-none-any.whl - Mend

pyreclaim 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

pyreclaim-0.1.0.dist-info/METADATA +803 -0
pyreclaim-0.1.0.dist-info/RECORD +27 -0
pyreclaim-0.1.0.dist-info/WHEEL +5 -0
pyreclaim-0.1.0.dist-info/licenses/LICENSE +674 -0
pyreclaim-0.1.0.dist-info/top_level.txt +1 -0
reclaim/__init__.py +1 -0
reclaim/derived_features/__init__.py +1 -0
reclaim/derived_features/feature_engineering_and_transformation.py +75 -0
reclaim/dynamic_features/__init__.py +1 -0
reclaim/dynamic_features/catchment_dynamic.py +103 -0
reclaim/dynamic_features/reservoir_dynamic.py +148 -0
reclaim/dynamic_features/utils/__init__.py +1 -0
reclaim/dynamic_features/utils/catchment_meteorology.py +0 -0
reclaim/dynamic_features/utils/inflow_outflow.py +95 -0
reclaim/dynamic_features/utils/rainfall.py +49 -0
reclaim/dynamic_features/utils/statistical_metrics.py +190 -0
reclaim/dynamic_features/utils/ts_aggregate.py +63 -0
reclaim/generate_features.py +141 -0
reclaim/reclaim.py +503 -0
reclaim/static_features/__init__.py +1 -0
reclaim/static_features/catchment_static.py +127 -0
reclaim/static_features/reservoir_static.py +97 -0
reclaim/static_features/utils/__init__.py +1 -0
reclaim/static_features/utils/aec_shape.py +101 -0
reclaim/static_features/utils/area_perimeter.py +36 -0
reclaim/static_features/utils/catchment_agreggate.py +147 -0
reclaim/static_features/utils/flow_length.py +455 -0

reclaim/derived_features/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Feature engineering and transformation for RECLAIM :no-index:"""

reclaim/derived_features/feature_engineering_and_transformation.py ADDED Viewed

@@ -0,0 +1,75 @@
+import pandas as pd
+import numpy as np
+def engineer_and_transform_features(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Engineer and transform features in reservoir/catchment dataset.
+    Features are first engineered in raw space (linear), then log-transformations
+    are applied in a single pass to avoid double-logging.
+    Log-transformed columns are prefixed with ``log_`` to clearly indicate their state.
+    Required input columns (abbreviations):
+        - CA, DCA, OBC, HGT, RA, RP, FL
+        - SA_mean, SA_mean_clip, SA_std, SA_kurt
+        - PAI, MAI, MAO, I_std, O_std, MAR
+        - OEY, BY, VGF, VLF
+        - Land cover: LCAS, LCC, LCG, LCT, LCS, LCHV, LCM, LCSV, LCBS, LCSG, LCWB
+        - COAR, SAND, NSSC2_mean
+    """
+    # Ensure required columns exist
+    required_cols = ['CA', 'DCA', 'OBC', 'HGT', 'RA', 'RP', 'FL',
+                     'SA_mean', 'SA_mean_clip', 'SA_std', 'SA_kurt',
+                     'PAI', 'MAI', 'MAO', 'I_std', 'O_std', 'MAR',
+                     'OEY', 'BY', 'VGF', 'VLF',
+                     'LCAS','LCC','LCG','LCT','LCS','LCHV','LCM','LCSV','LCBS','LCSG','LCWB',
+                     'COAR','SAND','NSSC2_mean']
+    for col in required_cols:
+        if col not in df.columns:
+            df[col] = np.nan
+    # -------------------------
+    # ENGINEER RAW FEATURES
+    # -------------------------
+    inflow_cap_ratio = (df['MAI'] * 3600 * 24 * 365.25 / 1e6) / df['OBC']
+    feature_dict = {
+        "AGE": df["OEY"] - df["BY"],
+        "ROBC": df["OBC"] / df["CA"],
+        "NVGF": df["VGF"] - df["VLF"],
+        "GC": df["RA"] / (df["RP"]**2),
+        "rain_per_area": np.where(df["CA"]!=0, df["MAR"]/df["CA"], df["MAR"]),
+        "R_tree_bare": np.where(df["LCBS"]!=0, df["LCT"]/df["LCBS"], df["LCT"]),
+        "R_shrub_bare": np.where(df["LCBS"]!=0, df["LCS"]/df["LCBS"], df["LCS"]),
+        "R_coarse_sand": df["COAR"]/df["SAND"],
+        "RT": df["OBC"] * 1e6 / (df["MAI"] * 3600 * 24 * 365.25),
+        "TE": np.exp(-0.0079 * inflow_cap_ratio) * 100,
+        "ECLR": np.exp(-0.0079 * inflow_cap_ratio) * 100 * df["NSSC2_mean"] * inflow_cap_ratio,
+        "ESR": np.exp(-0.0079 * inflow_cap_ratio) * 100 * df["NSSC2_mean"] * inflow_cap_ratio * df["OBC"] / 100,
+        "rel_SA_mean_clip": df["SA_mean_clip"] / df["RA"],
+        "R_SA_cap": df["SA_mean_clip"] / df["OBC"],
+        "SIN": df["MAI"] * df["NSSC2_mean"],
+        "SOUT": df["MAO"] * df["NSSC2_mean"],
+    }
+    # Land cover log-area features
+    lc_cols = ['LCAS','LCC','LCG','LCT','LCS','LCHV','LCM','LCSV','LCBS','LCSG','LCWB']
+    for col in lc_cols:
+        feature_dict[col] = df["CA"] * df[col] / 100
+    df = pd.concat([df, pd.DataFrame(feature_dict)], axis=1)
+    # -------------------------
+    # APPLY LOG TRANSFORMATIONS
+    # -------------------------
+    log_candidates = ['CA','DCA','OBC','HGT','RA','RP','FL',
+                      'SA_mean','SA_mean_clip','SA_std','SA_kurt','PAI','MAI','MAO','I_std','O_std','MAR',
+                      'rain_per_area','GC','TE','ECLR','SIN','SOUT'] + lc_cols
+    for col in log_candidates:
+        log_col = f'log_{col}'  # add prefix to avoid double log
+        df[log_col] = np.log(df[col].clip(lower=1e-15))
+    return df

reclaim/dynamic_features/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Dynamic features for RECLAIM :no-index:"""

reclaim/dynamic_features/catchment_dynamic.py ADDED Viewed

@@ -0,0 +1,103 @@
+import pandas as pd
+import numpy as np
+from typing import Dict, Sequence
+from reclaim.dynamic_features.utils.rainfall import (
+    mean_annual_rainfall_mm,
+    mean_annual_rainy_days,
+)
+from reclaim.dynamic_features.utils.statistical_metrics import (
+    annual_mean,
+    annual_std,
+    coefficient_of_variation,
+    skewness,
+    kurtosis_val,
+)
+from reclaim.dynamic_features.utils.ts_aggregate import compute_ts_aggregate
+def catchment_based_dynamic_features(
+    variable_info: Dict[str, Dict[str, str]],
+    observation_period: Sequence[int],
+) -> pd.DataFrame:
+    """
+    Compute dynamic catchment-based features for a single reservoir's catchment,
+    using precipitation, temperature, and wind speed time series.
+    Required time series keys (case-sensitive)
+        - "precip":  Daily precipitation in mm
+        - "tmin":    Daily minimum temperature in °C
+        - "tmax":    Daily maximum temperature in °C
+        - "wind":    Daily wind speed in m/s
+    Parameters
+    ----------
+    variable_info : dict
+        Dictionary of input series metadata.
+        Each key corresponds to a variable (precip, tmin, tmax, wind).
+        Each value is a dict with:
+            {
+                "path": str,
+                "time_column": str,
+                "data_column": str
+            }
+    observation_period : sequence[int]
+        Two-element sequence [OSY, OEY] specifying the observation period to clip the series.
+    Returns
+    -------
+    pd.DataFrame
+        A one-row DataFrame containing the computed catchment-based features.
+    Notes
+    -----
+    - Precipitation features are reported as mm/year (for MAR) and counts (rainy days).
+    - Wind statistics include mean, std, CV, skewness, kurtosis.
+    - Temperature features are simple annual means (°C).
+    """
+    variable_features = {
+        "precip": {
+            "MAR": mean_annual_rainfall_mm,
+            "#_rain_above_10": lambda ts: mean_annual_rainy_days(ts, threshold=10.0),
+            "#_rain_above_50": lambda ts: mean_annual_rainy_days(ts, threshold=50.0),
+            "#_rain_above_100": lambda ts: mean_annual_rainy_days(ts, threshold=100.0),
+        },
+        "tmin": {
+            "tmin_mean": annual_mean,
+        },
+        "tmax": {
+            "tmax_mean": annual_mean,
+        },
+        "wind": {
+            "wind_mean": annual_mean,
+            "wind_std": annual_std,
+            "wind_cv": coefficient_of_variation,
+            "wind_skew": skewness,
+            "wind_kurt": kurtosis_val,
+        },
+    }
+    results = {}
+    for var, feat_dict in variable_features.items():
+        if var not in variable_info:
+            for feat in feat_dict.keys():
+                results[feat] = np.nan
+            continue
+        path = variable_info[var]["path"]
+        time_col = variable_info[var]["time_column"]
+        data_col = variable_info[var]["data_column"]
+        for feat, func in feat_dict.items():
+            try:
+                df_feat = compute_ts_aggregate(
+                    path, time_col, data_col, func, feat, observation_period
+                )
+                results[feat] = df_feat.iloc[0, 0]  # extract scalar
+            except Exception:
+                results[feat] = np.nan
+    return pd.DataFrame([results])

reclaim/dynamic_features/reservoir_dynamic.py ADDED Viewed

@@ -0,0 +1,148 @@
+import os
+import pandas as pd
+import numpy as np
+from typing import Dict, Sequence, Union, Callable
+from reclaim.dynamic_features.utils.statistical_metrics import (
+    annual_mean,
+    annual_std,
+    skewness,
+    kurtosis_val,
+    coefficient_of_variation,
+    max_days_above_90th,
+    max_annual_persistence
+)
+from reclaim.dynamic_features.utils.inflow_outflow import (
+    mean_annual_flow_m3_per_s,
+    mean_annual_flow_std_m3_per_s,
+    max_annual_flow_m3_per_s,
+    mean_annual_flow_variability
+)
+from reclaim.dynamic_features.utils.ts_aggregate import compute_ts_aggregate
+def reservoir_based_dynamic_features(
+    variable_info: Dict[str, Dict[str, str]],
+    observation_period: Sequence[int],
+) -> pd.DataFrame:
+    """
+    Compute dynamic reservoir features for a single reservoir using inflow, outflow,
+    surface area, evaporation, and sediment-related time series.
+    Required time series keys (case-sensitive):
+    - ``inflow``:       Daily inflow in m³/day
+    - ``outflow``:      Daily outflow in m³/day
+    - ``evaporation``:  Daily evaporation in mm/day
+    - ``surface_area``: Reservoir surface area in km²
+    - ``nssc``:         Normalized suspended sediment concentration variant 1 (red/green) (dimensionless)
+    - ``nssc2``:        Normalized suspended sediment concentration variant 2 (near-infrared/red) (dimensionless)
+    Parameters
+    ----------
+    variable_info : dict
+        Dictionary of input series metadata.
+        Each key corresponds to a variable (``inflow``, ``outflow``, ``evaporation``, ``surface_area``, ``nssc``, ``nssc2``).
+        Each value is a dict with the following structure::
+            {
+                "path": str,          # Path to the CSV file
+                "time_column": str,   # Name of the datetime column
+                "data_column": str    # Name of the variable column
+            }
+        Example::
+            {
+                "inflow": {"path": "data/inflow.csv", "time_column": "date", "data_column": "inflow (m3/d)"},
+                "outflow": {"path": "data/outflow.csv", "time_column": "date", "data_column": "outflow (m3/d)"}
+            }
+    observation_period : sequence[int]
+        Two-element sequence [OSY, OEY] specifying the observation period to clip the series.
+    Returns
+    -------
+    pd.DataFrame
+        A one-row DataFrame containing the computed reservoir dynamic features.
+        Missing variables in ``variable_info`` will result in NaN values for their features.
+    Notes
+    -----
+    - All inflow/outflow metrics are converted to m³/s internally.
+    - Surface area statistics are reported both for full record and clipped period.
+    - NSSC statistics are dimensionless.
+    - If a variable is missing in ``variable_info``, its corresponding features are NaN.
+    """
+    # Define which features depend on which variable
+    variable_features = {
+        "inflow": {
+            "MAI": mean_annual_flow_m3_per_s,
+            "PAI": max_annual_flow_m3_per_s,
+            "I_cv": mean_annual_flow_variability,
+            "I_std": mean_annual_flow_std_m3_per_s,
+            "I_above_90": max_days_above_90th,
+            "I_max_persis": max_annual_persistence,
+        },
+        "outflow": {
+            "MAO": mean_annual_flow_m3_per_s,
+            "O_std": mean_annual_flow_std_m3_per_s,
+            "O_cv": mean_annual_flow_variability,
+        },
+        "evaporation": {
+            "E_mean": annual_mean,
+            "E_std": annual_std,
+        },
+        "surface_area": {
+            "SA_mean": annual_mean,
+            "SA_std": annual_std,
+            "SA_cv": coefficient_of_variation,
+            "SA_skew": skewness,
+            "SA_kurt": kurtosis_val,
+            "SA_mean_clip": annual_mean,
+            "SA_above_90": max_days_above_90th,
+        },
+        "nssc": {
+            "NSSC1_mean": annual_mean,
+            "NSSC1_std": annual_std,
+            "NSSC1_cv": coefficient_of_variation,
+            "NSSC1_skew": skewness,
+            "NSSC1_kurt": kurtosis_val,
+        },
+        "nssc2": {
+            "NSSC2_mean": annual_mean,
+            "NSSC2_above_90": max_days_above_90th,
+            "NSSC2_max_persis": max_annual_persistence,
+        },
+    }
+    results = {}
+    # Loop through required variables
+    for var, feat_dict in variable_features.items():
+        if var not in variable_info:
+            # Fill with NaN if variable not provided
+            for feat in feat_dict.keys():
+                results[feat] = np.nan
+            continue
+        path = variable_info[var]["path"]
+        time_col = variable_info[var]["time_column"]
+        data_col = variable_info[var]["data_column"]
+        # Some features require clipping, others use full record
+        for feat, func in feat_dict.items():
+            if var == "surface_area" and feat in ["SA_mean", "SA_std", "SA_cv", "SA_skew", "SA_kurt"]:
+                obs_period = None  # full record
+            else:
+                obs_period = observation_period
+            try:
+                df_feat = compute_ts_aggregate(
+                    path, time_col, data_col, func, feat, obs_period
+                )
+                results[feat] = df_feat.iloc[0, 0]  # single value
+            except Exception:
+                results[feat] = np.nan
+    return pd.DataFrame([results])

reclaim/dynamic_features/utils/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Utility functions to generate dynamic features for RECLAIM input dataset :no-index:"""

reclaim/dynamic_features/utils/catchment_meteorology.py ADDED Viewed

File without changes

reclaim/dynamic_features/utils/inflow_outflow.py ADDED Viewed

@@ -0,0 +1,95 @@
+import pandas as pd
+import numpy as np
+SECONDS_PER_DAY = 24 * 3600
+DAYS_PER_YEAR = 365.25
+SECONDS_PER_YEAR = SECONDS_PER_DAY * DAYS_PER_YEAR
+def mean_annual_flow_m3_per_s(ts: pd.Series) -> float:
+    """
+    Computes the mean annual flow in m³/s from a time series of daily flow in m³/day.
+    Parameters
+    ----------
+    ts : pd.Series
+        Time series of daily flow values in m³/day, indexed by datetime.
+    Returns
+    -------
+    float
+        Mean annual flow in m³/s.
+    """
+    if ts.empty:
+        return float('nan')
+    annual_totals = ts.groupby(ts.index.year).sum()
+    mean_annual = annual_totals.mean()
+    return mean_annual / SECONDS_PER_YEAR
+def mean_annual_flow_std_m3_per_s(ts: pd.Series) -> float:
+    """
+    Computes the mean annual standard deviation of daily flow (in m³/s).
+    Parameters
+    ----------
+    ts : pd.Series
+        Time series of daily flow values in m³/day, indexed by datetime.
+    Returns
+    -------
+    float
+        Mean annual standard deviation of flow in m³/s.
+    """
+    if ts.empty:
+        return float('nan')
+    annual_std = ts.groupby(ts.index.year).std()
+    annual_std_m3_per_s = annual_std / SECONDS_PER_DAY
+    return annual_std_m3_per_s.mean()
+def max_annual_flow_m3_per_s(ts: pd.Series) -> float:
+    """
+    Computes the maximum annual flow in m³/s from daily flow series.
+    Parameters
+    ----------
+    ts : pd.Series
+        Time series of daily flow values in m³/day, indexed by datetime.
+    Returns
+    -------
+    float
+        Maximum annual flow in m³/s.
+    """
+    if ts.empty:
+        return float('nan')
+    annual_totals = ts.groupby(ts.index.year).sum()
+    max_annual = annual_totals.max()
+    return max_annual / SECONDS_PER_YEAR
+def mean_annual_flow_variability(ts: pd.Series) -> float:
+    """
+    Computes the mean annual variability (coefficient of variation) of daily flow.
+    CV = std / mean within each year
+    Parameters
+    ----------
+    ts : pd.Series
+        Time series of daily flow values in m³/day, indexed by datetime.
+    Returns
+    -------
+    float
+        Mean coefficient of variation across all years (unitless).
+    """
+    if ts.empty:
+        return float('nan')
+    annual_stats = ts.groupby(ts.index.year).agg(['mean', 'std'])
+    annual_stats['cv'] = annual_stats['std'] / annual_stats['mean']
+    return annual_stats['cv'].mean()

reclaim/dynamic_features/utils/rainfall.py ADDED Viewed

@@ -0,0 +1,49 @@
+import pandas as pd
+def mean_annual_rainfall_mm(ts: pd.Series) -> float:
+    """
+    Calculates the mean annual rainfall in mm from a time series of daily rainfall in mm.
+    Parameters
+    ----------
+    ts : pd.Series
+        Time series of daily rainfall values in mm, indexed by datetime.
+    Returns
+    -------
+    float
+        Mean annual rainfall in mm.
+    """
+    if ts.empty:
+        return float('nan')
+    # Total rainfall for each year (mm/year)
+    annual_totals_mm = ts.groupby(ts.index.year).sum()
+    # Return mean annual rainfall (mm/year)
+    return annual_totals_mm.mean()
+def mean_annual_rainy_days(ts: pd.Series, threshold: float = 100.0) -> float:
+    """
+    Calculates the mean annual number of days on which daily rainfall exceeds a threshold.
+    Parameters
+    ----------
+    ts : pd.Series
+        Time series of daily rainfall values in mm, indexed by datetime.
+    threshold : float, optional
+        Rainfall threshold in mm to define a "rainy day" (default is 10 mm).
+    Returns
+    -------
+    float
+        Mean annual number of days exceeding the threshold.
+    """
+    if ts.empty:
+        return float('nan')
+    # Count days above threshold for each year
+    rainy_days_per_year = ts.groupby(ts.index.year).apply(lambda x: (x > threshold).sum())
+    # Return mean number of rainy days across years
+    return rainy_days_per_year.mean()

reclaim/dynamic_features/utils/statistical_metrics.py ADDED Viewed

@@ -0,0 +1,190 @@
+import pandas as pd
+import numpy as np
+from scipy.stats import skew, kurtosis
+def annual_mean(ts: pd.Series) -> float:
+    """
+    Calculates the mean of annual means from a time series.
+    The annual mean is computed for each year using daily values.
+    Parameters
+    ----------
+    ts : pd.Series
+        Time series of daily values, indexed by datetime.
+    Returns
+    -------
+    float
+        Mean of the annual mean values across all years.
+    """
+    if ts.empty:
+        return float('nan')
+    # Group by year and calculate mean surface area for each year
+    annual_means = ts.groupby(ts.index.year).mean()
+    # Return the mean of these annual means
+    return annual_means.mean()
+def annual_std(ts: pd.Series) -> float:
+    """
+    Calculates the mean annual standard deviation from a time series.
+    Standard deviation is computed for each year using daily values.
+    Parameters
+    ----------
+    ts : pd.Series
+        Time series of daily values, indexed by datetime.
+    Returns
+    -------
+    float
+        Mean standard deviation across all years.
+    """
+    if ts.empty:
+        return float('nan')
+    # Group by year and compute standard deviation for each year
+    annual_std_values = ts.groupby(ts.index.year).std()
+    # Return the mean standard deviation across years
+    return annual_std_values.mean()
+# Skewness
+def skewness(ts: pd.Series) -> float:
+    """
+    Calculates skewness of the given time series.
+    Parameters
+    ----------
+    ts : pd.Series
+        Time series, indexed by datetime.
+    Returns
+    -------
+    float
+        Skewness of the time series (unitless).
+    """
+    if ts.empty:
+        return float('nan')
+    return skew(ts.dropna())
+# Kurtosis
+def kurtosis_val(ts: pd.Series) -> float:
+    """
+    Calculates kurtosis of the given time series.
+    Parameters
+    ----------
+    ts : pd.Series
+        Time series, indexed by datetime.
+    Returns
+    -------
+    float
+        Kurtosis of the time series (excess kurtosis, unitless).
+    """
+    if ts.empty:
+        return float('nan')
+    return kurtosis(ts.dropna(), fisher=True)
+# COV
+def coefficient_of_variation(ts: pd.Series) -> float:
+    """
+    Calculates coefficient of variation (CV) of the given time series.
+    Parameters
+    ----------
+    ts : pd.Series
+        Time series, indexed by datetime.
+    Returns
+    -------
+    float
+        Coefficient of variation (std/mean, unitless).
+    """
+    if ts.empty:
+        return float('nan')
+    mean_val = ts.mean()
+    if mean_val == 0:
+        return float('nan')
+    return ts.std() / mean_val
+def max_days_above_90th(ts: pd.Series) -> float:
+    """
+    Calculates the maximum number of days per year where the daily values
+    exceed the 90th percentile threshold (computed over the entire time series).
+    Parameters
+    ----------
+    ts : pd.Series
+        Time series of daily values, indexed by datetime.
+    Returns
+    -------
+    float
+        Maximum count of days above the 90th percentile across years.
+    """
+    if ts.empty:
+        return float('nan')
+    # Compute global 90th percentile threshold
+    threshold = np.nanpercentile(ts, 90)
+    # Boolean series: True if value > threshold
+    above_threshold = ts > threshold
+    # Count per year
+    annual_counts = above_threshold.groupby(ts.index.year).sum()
+    # Return maximum count across years
+    return float(annual_counts.max()) if not annual_counts.empty else float('nan')
+def max_annual_persistence(timeseries, threshold=1/np.e, min_periods=30):
+    """
+    Compute the persistence (decorrelation time) of high values in a time series annually.
+    Parameters
+    ----------
+    timeseries : pd.Series
+        A datetime-indexed series of daily values.
+    threshold : float, optional
+        Autocorrelation cutoff (default=1/e ~ 0.367).
+    min_periods : int, optional
+        Minimum number of days required in a year to compute autocorrelation.
+    Returns
+    -------
+    int
+        Maximum persistence (days) across all years.
+    """
+    results = {}
+    # group by year
+    for year, group in timeseries.groupby(timeseries.index.year):
+        if len(group) < min_periods:
+            continue
+        # normalize (remove mean, divide std)
+        x = (group - group.mean()) / group.std()
+        n = len(x)
+        # compute autocorrelation using np.correlate
+        acf = np.correlate(x, x, mode='full') / n
+        acf = acf[n-1:] / acf[n-1]  # keep positive lags, normalize at lag 0 = 1
+        # find first lag where acf < threshold
+        persistence = np.argmax(acf < threshold)
+        if persistence == 0:  # if acf never drops below threshold
+            persistence = len(acf) - 1
+        results[year] = persistence
+    if not results:
+        return float('nan')
+    return max(results.values())