PyPI - hydroanomaly - Versions diffs - 1.0.0__py3-none-any.whl → 1.2.1__py3-none-any.whl - Mend

hydroanomaly 1.0.0py3-none-any.whl → 1.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

hydroanomaly/__init__.py CHANGED Viewed

@@ -5,17 +5,20 @@ A simple Python package with just 3 modules:
 1. USGS turbidity data retrieval (returns data and site coordinates)
 2. Sentinel satellite bands retrieval
 3. Time series visualization
+4. Machine learning anomaly detection (One-Class SVM and Isolation Forest)
 That's it - nothing else!
 """
-__version__ = "1.0.0"
+__version__ = "1.2.1"
 __author__ = "Ehsan Kahrizi (Ehsan.kahrizi@usu.edu)"
 # Import the 3 simple modules
 from .usgs_turbidity import get_turbidity, get_usgs_turbidity
 from .sentinel_bands import get_sentinel_bands, get_satellite_data, get_sentinel, get_sentinel_bands_gee, show_sentinel_ndwi_map
 from .visualize import plot_timeseries, plot_turbidity, plot_sentinel, plot_comparison, plot, visualize
+from .ml import run_oneclass_svm, run_isolation_forest
 # Export everything
 __all__ = [
@@ -30,14 +33,17 @@ __all__ = [
     'get_sentinel',
     'show_sentinel_ndwi_map',
     # Visualization functions
     'plot_timeseries',
     'plot_turbidity',
     'plot_sentinel',
     'plot_comparison',
     'plot',
-    'visualize'
+    'visualize',
+    # Machine learning functions
+    'run_oneclass_svm',
+    'run_isolation_forest'
 ]
 print(f"HydroAnomaly v{__version__} - Simple Water Data Package")

hydroanomaly/ml.py ADDED Viewed

@@ -0,0 +1,170 @@
+import numpy as np
+import pandas as pd
+from sklearn.preprocessing import StandardScaler
+from sklearn.svm import OneClassSVM
+from sklearn.ensemble import IsolationForest
+from sklearn.metrics import f1_score, recall_score, precision_score
+import matplotlib.pyplot as plt
+# ============= Helper Functions =========================================================================
+def match_nearest(row, usgs):
+    target_time = row['datetime']
+    same_day = usgs[usgs['datetime'] == target_time.datetime()]
+    if same_day.empty:
+        return np.nan
+    delta = (same_day['datetime'] - target_time).abs()
+    return same_day.loc[delta.idxmin(), 'turbidity']
+# ============= Preprocessing and Feature Engineering ========================================================
+def preprocess_data(sentinel, usgs):
+    # Add matched turbidity
+    sentinel['turbidity'] = sentinel.apply(lambda row: match_nearest(row, usgs), axis=1)
+    df = sentinel.dropna(subset=['turbidity'])
+    # Water pixels filtering
+    if 'SCL' in df.columns and (df['SCL'] == 6).sum() > 0:
+        df = df[df['SCL'] == 6].drop_duplicates(subset=['B2', 'B3', 'B4'])
+    # Feature engineering
+    bands = ['B2','B3','B4','B5','B6','B7','B8','B8A','B9','B11','B12']
+    df['NDVI'] = (df['B8'] - df['B4']) / (df['B8'] + df['B4'])
+    df['NDWI'] = (df['B3'] - df['B8']) / (df['B3'] + df['B8'])
+    df['NDSI'] = (df['B3'] - df['B11']) / (df['B3'] + df['B11'])
+    df = df.sort_values('datetime').reset_index(drop=True)
+    df['turbidity_diff1'] = df['turbidity'].diff()
+    df['turbidity_diff2'] = df['turbidity_diff1'].diff()
+    thresh = 2 * df['turbidity_diff2'].std()
+    df['spike'] = (df['turbidity_diff2'].abs() > thresh).astype(int)
+    df = df.dropna()
+    # Class label
+    df['Classe'] = (df['turbidity'] > 20).astype(int)
+    return df, bands
+# ============= Anomaly Detection Methods ================================================================
+def run_oneclass_svm(sentinel, usgs, plot=True):
+    """
+    Apply One-Class SVM anomaly detection on Sentinel/USGS data.
+    Returns: DataFrame with predictions, and best model parameters.
+    """
+    df, bands = preprocess_data(sentinel, usgs)
+    features = bands + ['NDVI','NDWI','NDSI','turbidity_diff1','turbidity_diff2','spike']
+    X = df[features].fillna(df[features].mean()).values
+    y = df['Classe'].values
+    scaler = StandardScaler()
+    X_scaled = scaler.fit_transform(X)
+    X_class0 = X_scaled[y == 0]
+    X_class1 = X_scaled[y == 1]
+    train_size = max(1, int(0.8 * len(X_class0)))
+    X_train = X_class0[:train_size]
+    X_test = np.vstack([X_class0[train_size:], X_class1])
+    y_test = np.array([0]*(len(X_class0)-train_size) + [1]*len(X_class1))
+    best_f1 = -1
+    best_model, best_y_pred, best_params = None, None, None
+    for gamma in ['auto', 'scale']:
+        for nu in [0.01, 0.05, 0.1, 0.2]:
+            model = OneClassSVM(kernel='rbf', gamma=gamma, nu=nu)
+            model.fit(X_train)
+            y_pred = np.where(model.predict(X_test) == 1, 0, 1)
+            if len(np.unique(y_pred)) > 1:
+                f1 = f1_score(y_test, y_pred)
+                if f1 > best_f1:
+                    best_f1 = f1
+                    best_model = model
+                    best_y_pred = y_pred
+                    best_params = {'gamma': gamma, 'nu': nu}
+    if best_f1 > -1:
+        df_out = df.iloc[-len(y_test):].copy()
+        df_out['predicted'] = best_y_pred
+        if plot:
+            plt.figure(figsize=(15,6))
+            plt.plot(df_out['datetime'], df_out['turbidity'], label='turbidity', color='blue')
+            plt.scatter(df_out[df_out['Classe']==1]['datetime'], df_out[df_out['Classe']==1]['turbidity'],
+                        color='red', marker='x', label='True Anomaly', s=100)
+            plt.scatter(df_out[df_out['predicted']==1]['datetime'], df_out[df_out['predicted']==1]['turbidity'],
+                        edgecolors='orange', facecolors='none', marker='o', label='Predicted Anomaly', s=80)
+            plt.title("True vs Predicted Anomalies (OneClassSVM)")
+            plt.xlabel("datetime")
+            plt.ylabel("turbidity")
+            plt.legend()
+            plt.grid(True)
+            plt.tight_layout()
+            plt.show()
+        return df_out, best_params, best_f1
+    else:
+        print("Could not find a good model. Try different hyperparameters.")
+        return None, None, None
+# ============= Isolation Forest Method ================================================================
+def run_isolation_forest(sentinel, usgs, plot=True):
+    """
+    Apply Isolation Forest anomaly detection on Sentinel/USGS data.
+    Returns: DataFrame with predictions, and best model parameters.
+    """
+    df, bands = preprocess_data(sentinel, usgs)
+    features = bands + ['NDVI','NDWI','NDSI','turbidity_diff1','turbidity_diff2','spike']
+    X = df[features].fillna(df[features].mean()).values
+    y = df['Classe'].values
+    scaler = StandardScaler()
+    X_scaled = scaler.fit_transform(X)
+    X_class0 = X_scaled[y == 0]
+    X_class1 = X_scaled[y == 1]
+    train_size = max(1, int(0.8 * len(X_class0)))
+    X_train = X_class0[:train_size]
+    X_test = np.vstack([X_class0[train_size:], X_class1])
+    y_test = np.array([0]*(len(X_class0)-train_size) + [1]*len(X_class1))
+    best_f1 = -1
+    best_model, best_y_pred, best_params = None, None, None
+    for contamination in [0.01, 0.05, 0.1, 0.15, 0.2, 0.3]:
+        model = IsolationForest(
+            n_estimators=100,
+            contamination=contamination,
+            max_samples='auto',
+            bootstrap=True,
+            random_state=42
+        )
+        model.fit(X_train)
+        y_pred = np.where(model.predict(X_test) == 1, 0, 1)
+        if len(np.unique(y_pred)) > 1:
+            f1 = f1_score(y_test, y_pred)
+            if f1 > best_f1:
+                best_f1 = f1
+                best_model = model
+                best_y_pred = y_pred
+                best_params = {'contamination': contamination}
+    if best_f1 > -1:
+        df_out = df.iloc[-len(y_test):].copy()
+        df_out['predicted'] = best_y_pred
+        if plot:
+            plt.figure(figsize=(15,6))
+            plt.plot(df_out['datetime'], df_out['turbidity'], label='turbidity', color='blue')
+            plt.scatter(df_out[df_out['Classe']==1]['datetime'], df_out[df_out['Classe']==1]['turbidity'],
+                        color='red', marker='x', label='True Anomaly', s=100)
+            plt.scatter(df_out[df_out['predicted']==1]['datetime'], df_out[df_out['predicted']==1]['turbidity'],
+                        edgecolors='orange', facecolors='none', marker='o', label='Predicted Anomaly', s=80)
+            plt.title("True vs Predicted Anomalies (Isolation Forest)")
+            plt.xlabel("datetime")
+            plt.ylabel("turbidity")
+            plt.legend()
+            plt.grid(True)
+            plt.tight_layout()
+            plt.show()
+        return df_out, best_params, best_f1
+    else:
+        print("Could not find a good model. Try different hyperparameters.")
+        return None, None, None

hydroanomaly/sentinel_bands.py CHANGED Viewed

@@ -79,14 +79,14 @@ def get_sentinel_bands_gee(
     s2_masked = s2.map(dynamic_scl_mask)
     def extract_features(image):
-        date = image.date().format('YYYY-MM-dd HH:mm:ss')
+        datetime = image.date().format('YYYY-MM-dd HH:mm:ss')
         values = image.reduceRegion(
             reducer=ee.Reducer.mean(),
             geometry=buffered_point,
             scale=20,
             maxPixels=1e8
         )
-        return ee.Feature(None, values.set('date', date))
+        return ee.Feature(None, values.set('datetime', datetime))
     features = s2_masked.map(extract_features)
     fc = ee.FeatureCollection(features).filter(ee.Filter.notNull(['B2']))
@@ -95,9 +95,9 @@ def get_sentinel_bands_gee(
     rows = [f['properties'] for f in data['features']]
     df = pd.DataFrame(rows)
     if not df.empty:
-        df['date'] = pd.to_datetime(df['date'])
-        df = df.sort_values('date')
-        df = df.set_index('date')
+        df['datetime'] = pd.to_datetime(df['datetime'])
+        df = df.sort_values('datetime')
+        df = df.set_index('datetime')
     return df

{hydroanomaly-1.0.0.dist-info → hydroanomaly-1.2.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hydroanomaly
-Version: 1.0.0
+Version: 1.2.1
 Summary: A Python package for hydro anomaly detection with simple USGS data retrieval
 Author-email: Ehsan Kahrizi <ehsan.kahrizi@usu.edu>
 License: MIT License
@@ -28,7 +28,7 @@ License: MIT License
 Project-URL: Homepage, https://github.com/yourusername/hydroanomaly
 Project-URL: Bug Reports, https://github.com/yourusername/hydroanomaly/issues
 Project-URL: Source, https://github.com/yourusername/hydroanomaly
-Keywords: python,package,hydro,anomaly,detection
+Keywords: python,package,hydrology,anomaly detection,remote sensing
 Classifier: Programming Language :: Python :: 3
 Classifier: Operating System :: OS Independent
 Requires-Python: >=3.6

hydroanomaly-1.2.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,10 @@
+hydroanomaly/__init__.py,sha256=HgiUaNbWp_hAHaOvzCJXRBDwe89orkscx3Sd7lvgnQo,1664
+hydroanomaly/ml.py,sha256=X2bYinNwRzQz0yFtRSzmN8OIZaX7vfT4BPSE6KMoYAM,7066
+hydroanomaly/sentinel_bands.py,sha256=XdpXUsJ8VeRQp9akDeQaVBefuuMrQIabslu8tg_FTpk,5399
+hydroanomaly/usgs_turbidity.py,sha256=k0cXRXpTe1YgjfR0Htw77SLD8hM--43jiEiJwx1vRg0,5664
+hydroanomaly/visualize.py,sha256=d_Ou1sTr648TdAW-94NXwNbLPL4rvYVYb5pw4Xux3aE,7228
+hydroanomaly-1.2.1.dist-info/licenses/LICENSE,sha256=OphKV48tcMv6ep-7j-8T6nycykPT0g8ZlMJ9zbGvdPs,1066
+hydroanomaly-1.2.1.dist-info/METADATA,sha256=keF0Y92CYoZxOnU9vYaGfnefWr8s9mN7ncA2Qn2bp4I,12981
+hydroanomaly-1.2.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+hydroanomaly-1.2.1.dist-info/top_level.txt,sha256=t-5Lc-eTLlkxIhR_N1Cpp6_YZafKS3xLLk9D2CtbE7o,13
+hydroanomaly-1.2.1.dist-info/RECORD,,

hydroanomaly-1.0.0.dist-info/RECORD DELETED Viewed

@@ -1,9 +0,0 @@
-hydroanomaly/__init__.py,sha256=PTaMZkFNuJnuA9v6tPIll0NoELl3AKJqpi5R-Y_kW9A,1442
-hydroanomaly/sentinel_bands.py,sha256=Y6RAunVJDYLs13WemSSQNEu07GqmhR64fC2mLPxwh2k,5371
-hydroanomaly/usgs_turbidity.py,sha256=k0cXRXpTe1YgjfR0Htw77SLD8hM--43jiEiJwx1vRg0,5664
-hydroanomaly/visualize.py,sha256=d_Ou1sTr648TdAW-94NXwNbLPL4rvYVYb5pw4Xux3aE,7228
-hydroanomaly-1.0.0.dist-info/licenses/LICENSE,sha256=OphKV48tcMv6ep-7j-8T6nycykPT0g8ZlMJ9zbGvdPs,1066
-hydroanomaly-1.0.0.dist-info/METADATA,sha256=yij922cCRRXtvyel5t1r45B5pNkg71h8jufNu07T-14,12962
-hydroanomaly-1.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-hydroanomaly-1.0.0.dist-info/top_level.txt,sha256=t-5Lc-eTLlkxIhR_N1Cpp6_YZafKS3xLLk9D2CtbE7o,13
-hydroanomaly-1.0.0.dist-info/RECORD,,

{hydroanomaly-1.0.0.dist-info → hydroanomaly-1.2.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{hydroanomaly-1.0.0.dist-info → hydroanomaly-1.2.1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{hydroanomaly-1.0.0.dist-info → hydroanomaly-1.2.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

hydroanomaly 1.0.0__py3-none-any.whl → 1.2.1__py3-none-any.whl

hydroanomaly 1.0.0py3-none-any.whl → 1.2.1py3-none-any.whl