hydroanomaly 1.2.1__tar.gz → 1.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hydroanomaly
3
- Version: 1.2.1
3
+ Version: 1.2.2
4
4
  Summary: A Python package for hydro anomaly detection with simple USGS data retrieval
5
5
  Author-email: Ehsan Kahrizi <ehsan.kahrizi@usu.edu>
6
6
  License: MIT License
@@ -10,7 +10,7 @@ A simple Python package with just 3 modules:
10
10
  That's it - nothing else!
11
11
  """
12
12
 
13
- __version__ = "1.2.1"
13
+ __version__ = "1.2.2"
14
14
  __author__ = "Ehsan Kahrizi (Ehsan.kahrizi@usu.edu)"
15
15
 
16
16
  # Import the 3 simple modules
@@ -6,24 +6,30 @@ from sklearn.ensemble import IsolationForest
6
6
  from sklearn.metrics import f1_score, recall_score, precision_score
7
7
  import matplotlib.pyplot as plt
8
8
 
9
- # ============= Helper Functions =========================================================================
10
- def match_nearest(row, usgs):
11
- target_time = row['datetime']
12
- same_day = usgs[usgs['datetime'] == target_time.datetime()]
13
- if same_day.empty:
9
+ # ============= Helper Function: Match nearest USGS turbidity by datetime ================================
10
+ def match_nearest_datetime(sentinel_dt, usgs):
11
+ # usgs is indexed by 'datetime'
12
+ if usgs.empty:
14
13
  return np.nan
15
- delta = (same_day['datetime'] - target_time).abs()
16
- return same_day.loc[delta.idxmin(), 'turbidity']
14
+ # Find the closest datetime in usgs to sentinel_dt
15
+ i = usgs.index.get_indexer([sentinel_dt], method='nearest')[0]
16
+ return usgs.iloc[i]['Turbidity']
17
17
 
18
-
19
-
20
- # ============= Preprocessing and Feature Engineering ========================================================
18
+ # ============= Preprocessing and Feature Engineering ====================================================
21
19
  def preprocess_data(sentinel, usgs):
22
- # Add matched turbidity
23
- sentinel['turbidity'] = sentinel.apply(lambda row: match_nearest(row, usgs), axis=1)
20
+ # Expect both to have DatetimeIndex named 'datetime'
21
+ if sentinel.index.name != 'datetime':
22
+ raise ValueError("Sentinel dataframe must have DatetimeIndex named 'datetime'")
23
+ if usgs.index.name != 'datetime':
24
+ raise ValueError("USGS dataframe must have DatetimeIndex named 'datetime'")
25
+
26
+ # Add matched turbidity to sentinel (by nearest datetime)
27
+ sentinel = sentinel.copy()
28
+ usgs = usgs.copy()
29
+ sentinel['turbidity'] = [match_nearest_datetime(dt, usgs) for dt in sentinel.index]
24
30
  df = sentinel.dropna(subset=['turbidity'])
25
31
 
26
- # Water pixels filtering
32
+ # Water pixel filtering
27
33
  if 'SCL' in df.columns and (df['SCL'] == 6).sum() > 0:
28
34
  df = df[df['SCL'] == 6].drop_duplicates(subset=['B2', 'B3', 'B4'])
29
35
 
@@ -33,7 +39,7 @@ def preprocess_data(sentinel, usgs):
33
39
  df['NDWI'] = (df['B3'] - df['B8']) / (df['B3'] + df['B8'])
34
40
  df['NDSI'] = (df['B3'] - df['B11']) / (df['B3'] + df['B11'])
35
41
 
36
- df = df.sort_values('datetime').reset_index(drop=True)
42
+ df = df.sort_index()
37
43
  df['turbidity_diff1'] = df['turbidity'].diff()
38
44
  df['turbidity_diff2'] = df['turbidity_diff1'].diff()
39
45
  thresh = 2 * df['turbidity_diff2'].std()
@@ -44,12 +50,11 @@ def preprocess_data(sentinel, usgs):
44
50
  df['Classe'] = (df['turbidity'] > 20).astype(int)
45
51
  return df, bands
46
52
 
47
-
48
- # ============= Anomaly Detection Methods ================================================================
53
+ # ============= Anomaly Detection: One-Class SVM ========================================================
49
54
  def run_oneclass_svm(sentinel, usgs, plot=True):
50
55
  """
51
56
  Apply One-Class SVM anomaly detection on Sentinel/USGS data.
52
- Returns: DataFrame with predictions, and best model parameters.
57
+ Inputs must have DatetimeIndex named 'datetime'.
53
58
  """
54
59
  df, bands = preprocess_data(sentinel, usgs)
55
60
  features = bands + ['NDVI','NDWI','NDSI','turbidity_diff1','turbidity_diff2','spike']
@@ -87,14 +92,14 @@ def run_oneclass_svm(sentinel, usgs, plot=True):
87
92
  df_out['predicted'] = best_y_pred
88
93
  if plot:
89
94
  plt.figure(figsize=(15,6))
90
- plt.plot(df_out['datetime'], df_out['turbidity'], label='turbidity', color='blue')
91
- plt.scatter(df_out[df_out['Classe']==1]['datetime'], df_out[df_out['Classe']==1]['turbidity'],
95
+ plt.plot(df_out.index, df_out['turbidity'], label='Turbidity', color='blue')
96
+ plt.scatter(df_out[df_out['Classe']==1].index, df_out[df_out['Classe']==1]['turbidity'],
92
97
  color='red', marker='x', label='True Anomaly', s=100)
93
- plt.scatter(df_out[df_out['predicted']==1]['datetime'], df_out[df_out['predicted']==1]['turbidity'],
98
+ plt.scatter(df_out[df_out['predicted']==1].index, df_out[df_out['predicted']==1]['turbidity'],
94
99
  edgecolors='orange', facecolors='none', marker='o', label='Predicted Anomaly', s=80)
95
100
  plt.title("True vs Predicted Anomalies (OneClassSVM)")
96
- plt.xlabel("datetime")
97
- plt.ylabel("turbidity")
101
+ plt.xlabel("Datetime")
102
+ plt.ylabel("Turbidity")
98
103
  plt.legend()
99
104
  plt.grid(True)
100
105
  plt.tight_layout()
@@ -104,12 +109,11 @@ def run_oneclass_svm(sentinel, usgs, plot=True):
104
109
  print("Could not find a good model. Try different hyperparameters.")
105
110
  return None, None, None
106
111
 
107
-
108
- # ============= Isolation Forest Method ================================================================
112
+ # ============= Anomaly Detection: Isolation Forest ======================================================
109
113
  def run_isolation_forest(sentinel, usgs, plot=True):
110
114
  """
111
115
  Apply Isolation Forest anomaly detection on Sentinel/USGS data.
112
- Returns: DataFrame with predictions, and best model parameters.
116
+ Inputs must have DatetimeIndex named 'datetime'.
113
117
  """
114
118
  df, bands = preprocess_data(sentinel, usgs)
115
119
  features = bands + ['NDVI','NDWI','NDSI','turbidity_diff1','turbidity_diff2','spike']
@@ -152,14 +156,14 @@ def run_isolation_forest(sentinel, usgs, plot=True):
152
156
  df_out['predicted'] = best_y_pred
153
157
  if plot:
154
158
  plt.figure(figsize=(15,6))
155
- plt.plot(df_out['datetime'], df_out['turbidity'], label='turbidity', color='blue')
156
- plt.scatter(df_out[df_out['Classe']==1]['datetime'], df_out[df_out['Classe']==1]['turbidity'],
159
+ plt.plot(df_out.index, df_out['turbidity'], label='Turbidity', color='blue')
160
+ plt.scatter(df_out[df_out['Classe']==1].index, df_out[df_out['Classe']==1]['turbidity'],
157
161
  color='red', marker='x', label='True Anomaly', s=100)
158
- plt.scatter(df_out[df_out['predicted']==1]['datetime'], df_out[df_out['predicted']==1]['turbidity'],
162
+ plt.scatter(df_out[df_out['predicted']==1].index, df_out[df_out['predicted']==1]['turbidity'],
159
163
  edgecolors='orange', facecolors='none', marker='o', label='Predicted Anomaly', s=80)
160
164
  plt.title("True vs Predicted Anomalies (Isolation Forest)")
161
- plt.xlabel("datetime")
162
- plt.ylabel("turbidity")
165
+ plt.xlabel("Datetime")
166
+ plt.ylabel("Turbidity")
163
167
  plt.legend()
164
168
  plt.grid(True)
165
169
  plt.tight_layout()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hydroanomaly
3
- Version: 1.2.1
3
+ Version: 1.2.2
4
4
  Summary: A Python package for hydro anomaly detection with simple USGS data retrieval
5
5
  Author-email: Ehsan Kahrizi <ehsan.kahrizi@usu.edu>
6
6
  License: MIT License
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "hydroanomaly"
7
- version = "1.2.1"
7
+ version = "1.2.2"
8
8
  authors = [
9
9
  {name = "Ehsan Kahrizi", email = "ehsan.kahrizi@usu.edu"},
10
10
  ]
File without changes
File without changes
File without changes