PyPI - hydroanomaly - Versions diffs - 1.2.1__tar.gz → 1.2.2__tar.gz - Mend

hydroanomaly 1.2.1tar.gz → 1.2.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

{hydroanomaly-1.2.1 → hydroanomaly-1.2.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hydroanomaly
-Version: 1.2.1
+Version: 1.2.2
 Summary: A Python package for hydro anomaly detection with simple USGS data retrieval
 Author-email: Ehsan Kahrizi <ehsan.kahrizi@usu.edu>
 License: MIT License

{hydroanomaly-1.2.1 → hydroanomaly-1.2.2}/hydroanomaly/__init__.py RENAMED Viewed

@@ -10,7 +10,7 @@ A simple Python package with just 3 modules:
 That's it - nothing else!
 """
-__version__ = "1.2.1"
+__version__ = "1.2.2"
 __author__ = "Ehsan Kahrizi (Ehsan.kahrizi@usu.edu)"
 # Import the 3 simple modules

{hydroanomaly-1.2.1 → hydroanomaly-1.2.2}/hydroanomaly/ml.py RENAMED Viewed

@@ -6,24 +6,30 @@ from sklearn.ensemble import IsolationForest
 from sklearn.metrics import f1_score, recall_score, precision_score
 import matplotlib.pyplot as plt
-# ============= Helper Functions =========================================================================
-def match_nearest(row, usgs):
-    target_time = row['datetime']
-    same_day = usgs[usgs['datetime'] == target_time.datetime()]
-    if same_day.empty:
+# ============= Helper Function: Match nearest USGS turbidity by datetime ================================
+def match_nearest_datetime(sentinel_dt, usgs):
+    # usgs is indexed by 'datetime'
+    if usgs.empty:
         return np.nan
-    delta = (same_day['datetime'] - target_time).abs()
-    return same_day.loc[delta.idxmin(), 'turbidity']
+    # Find the closest datetime in usgs to sentinel_dt
+    i = usgs.index.get_indexer([sentinel_dt], method='nearest')[0]
+    return usgs.iloc[i]['Turbidity']
-# ============= Preprocessing and Feature Engineering ========================================================
+# ============= Preprocessing and Feature Engineering ====================================================
 def preprocess_data(sentinel, usgs):
-    # Add matched turbidity
-    sentinel['turbidity'] = sentinel.apply(lambda row: match_nearest(row, usgs), axis=1)
+    # Expect both to have DatetimeIndex named 'datetime'
+    if sentinel.index.name != 'datetime':
+        raise ValueError("Sentinel dataframe must have DatetimeIndex named 'datetime'")
+    if usgs.index.name != 'datetime':
+        raise ValueError("USGS dataframe must have DatetimeIndex named 'datetime'")
+    # Add matched turbidity to sentinel (by nearest datetime)
+    sentinel = sentinel.copy()
+    usgs = usgs.copy()
+    sentinel['turbidity'] = [match_nearest_datetime(dt, usgs) for dt in sentinel.index]
     df = sentinel.dropna(subset=['turbidity'])
-    # Water pixels filtering
+    # Water pixel filtering
     if 'SCL' in df.columns and (df['SCL'] == 6).sum() > 0:
         df = df[df['SCL'] == 6].drop_duplicates(subset=['B2', 'B3', 'B4'])
@@ -33,7 +39,7 @@ def preprocess_data(sentinel, usgs):
     df['NDWI'] = (df['B3'] - df['B8']) / (df['B3'] + df['B8'])
     df['NDSI'] = (df['B3'] - df['B11']) / (df['B3'] + df['B11'])
-    df = df.sort_values('datetime').reset_index(drop=True)
+    df = df.sort_index()
     df['turbidity_diff1'] = df['turbidity'].diff()
     df['turbidity_diff2'] = df['turbidity_diff1'].diff()
     thresh = 2 * df['turbidity_diff2'].std()
@@ -44,12 +50,11 @@ def preprocess_data(sentinel, usgs):
     df['Classe'] = (df['turbidity'] > 20).astype(int)
     return df, bands
-# ============= Anomaly Detection Methods ================================================================
+# ============= Anomaly Detection: One-Class SVM ========================================================
 def run_oneclass_svm(sentinel, usgs, plot=True):
     """
     Apply One-Class SVM anomaly detection on Sentinel/USGS data.
-    Returns: DataFrame with predictions, and best model parameters.
+    Inputs must have DatetimeIndex named 'datetime'.
     """
     df, bands = preprocess_data(sentinel, usgs)
     features = bands + ['NDVI','NDWI','NDSI','turbidity_diff1','turbidity_diff2','spike']
@@ -87,14 +92,14 @@ def run_oneclass_svm(sentinel, usgs, plot=True):
         df_out['predicted'] = best_y_pred
         if plot:
             plt.figure(figsize=(15,6))
-            plt.plot(df_out['datetime'], df_out['turbidity'], label='turbidity', color='blue')
-            plt.scatter(df_out[df_out['Classe']==1]['datetime'], df_out[df_out['Classe']==1]['turbidity'],
+            plt.plot(df_out.index, df_out['turbidity'], label='Turbidity', color='blue')
+            plt.scatter(df_out[df_out['Classe']==1].index, df_out[df_out['Classe']==1]['turbidity'],
                         color='red', marker='x', label='True Anomaly', s=100)
-            plt.scatter(df_out[df_out['predicted']==1]['datetime'], df_out[df_out['predicted']==1]['turbidity'],
+            plt.scatter(df_out[df_out['predicted']==1].index, df_out[df_out['predicted']==1]['turbidity'],
                         edgecolors='orange', facecolors='none', marker='o', label='Predicted Anomaly', s=80)
             plt.title("True vs Predicted Anomalies (OneClassSVM)")
-            plt.xlabel("datetime")
-            plt.ylabel("turbidity")
+            plt.xlabel("Datetime")
+            plt.ylabel("Turbidity")
             plt.legend()
             plt.grid(True)
             plt.tight_layout()
@@ -104,12 +109,11 @@ def run_oneclass_svm(sentinel, usgs, plot=True):
         print("Could not find a good model. Try different hyperparameters.")
         return None, None, None
-# ============= Isolation Forest Method ================================================================
+# ============= Anomaly Detection: Isolation Forest ======================================================
 def run_isolation_forest(sentinel, usgs, plot=True):
     """
     Apply Isolation Forest anomaly detection on Sentinel/USGS data.
-    Returns: DataFrame with predictions, and best model parameters.
+    Inputs must have DatetimeIndex named 'datetime'.
     """
     df, bands = preprocess_data(sentinel, usgs)
     features = bands + ['NDVI','NDWI','NDSI','turbidity_diff1','turbidity_diff2','spike']
@@ -152,14 +156,14 @@ def run_isolation_forest(sentinel, usgs, plot=True):
         df_out['predicted'] = best_y_pred
         if plot:
             plt.figure(figsize=(15,6))
-            plt.plot(df_out['datetime'], df_out['turbidity'], label='turbidity', color='blue')
-            plt.scatter(df_out[df_out['Classe']==1]['datetime'], df_out[df_out['Classe']==1]['turbidity'],
+            plt.plot(df_out.index, df_out['turbidity'], label='Turbidity', color='blue')
+            plt.scatter(df_out[df_out['Classe']==1].index, df_out[df_out['Classe']==1]['turbidity'],
                         color='red', marker='x', label='True Anomaly', s=100)
-            plt.scatter(df_out[df_out['predicted']==1]['datetime'], df_out[df_out['predicted']==1]['turbidity'],
+            plt.scatter(df_out[df_out['predicted']==1].index, df_out[df_out['predicted']==1]['turbidity'],
                         edgecolors='orange', facecolors='none', marker='o', label='Predicted Anomaly', s=80)
             plt.title("True vs Predicted Anomalies (Isolation Forest)")
-            plt.xlabel("datetime")
-            plt.ylabel("turbidity")
+            plt.xlabel("Datetime")
+            plt.ylabel("Turbidity")
             plt.legend()
             plt.grid(True)
             plt.tight_layout()

{hydroanomaly-1.2.1 → hydroanomaly-1.2.2}/hydroanomaly.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hydroanomaly
-Version: 1.2.1
+Version: 1.2.2
 Summary: A Python package for hydro anomaly detection with simple USGS data retrieval
 Author-email: Ehsan Kahrizi <ehsan.kahrizi@usu.edu>
 License: MIT License

{hydroanomaly-1.2.1 → hydroanomaly-1.2.2}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "hydroanomaly"
-version = "1.2.1"
+version = "1.2.2"
 authors = [
     {name = "Ehsan Kahrizi", email = "ehsan.kahrizi@usu.edu"},
 ]