hydroanomaly 1.0.0__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hydroanomaly/__init__.py CHANGED
@@ -5,17 +5,20 @@ A simple Python package with just 3 modules:
5
5
  1. USGS turbidity data retrieval (returns data and site coordinates)
6
6
  2. Sentinel satellite bands retrieval
7
7
  3. Time series visualization
8
+ 4. Machine learning anomaly detection (One-Class SVM and Isolation Forest)
8
9
 
9
10
  That's it - nothing else!
10
11
  """
11
12
 
12
- __version__ = "1.0.0"
13
+ __version__ = "1.2.1"
13
14
  __author__ = "Ehsan Kahrizi (Ehsan.kahrizi@usu.edu)"
14
15
 
15
16
  # Import the 3 simple modules
16
17
  from .usgs_turbidity import get_turbidity, get_usgs_turbidity
17
18
  from .sentinel_bands import get_sentinel_bands, get_satellite_data, get_sentinel, get_sentinel_bands_gee, show_sentinel_ndwi_map
18
19
  from .visualize import plot_timeseries, plot_turbidity, plot_sentinel, plot_comparison, plot, visualize
20
+ from .ml import run_oneclass_svm, run_isolation_forest
21
+
19
22
 
20
23
  # Export everything
21
24
  __all__ = [
@@ -30,14 +33,17 @@ __all__ = [
30
33
  'get_sentinel',
31
34
  'show_sentinel_ndwi_map',
32
35
 
33
-
34
36
  # Visualization functions
35
37
  'plot_timeseries',
36
38
  'plot_turbidity',
37
39
  'plot_sentinel',
38
40
  'plot_comparison',
39
41
  'plot',
40
- 'visualize'
42
+ 'visualize',
43
+
44
+ # Machine learning functions
45
+ 'run_oneclass_svm',
46
+ 'run_isolation_forest'
41
47
  ]
42
48
 
43
49
  print(f"HydroAnomaly v{__version__} - Simple Water Data Package")
hydroanomaly/ml.py ADDED
@@ -0,0 +1,170 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from sklearn.preprocessing import StandardScaler
4
+ from sklearn.svm import OneClassSVM
5
+ from sklearn.ensemble import IsolationForest
6
+ from sklearn.metrics import f1_score, recall_score, precision_score
7
+ import matplotlib.pyplot as plt
8
+
9
+ # ============= Helper Functions =========================================================================
10
+ def match_nearest(row, usgs):
11
+ target_time = row['datetime']
12
+ same_day = usgs[usgs['datetime'] == target_time.datetime()]
13
+ if same_day.empty:
14
+ return np.nan
15
+ delta = (same_day['datetime'] - target_time).abs()
16
+ return same_day.loc[delta.idxmin(), 'turbidity']
17
+
18
+
19
+
20
+ # ============= Preprocessing and Feature Engineering ========================================================
21
+ def preprocess_data(sentinel, usgs):
22
+ # Add matched turbidity
23
+ sentinel['turbidity'] = sentinel.apply(lambda row: match_nearest(row, usgs), axis=1)
24
+ df = sentinel.dropna(subset=['turbidity'])
25
+
26
+ # Water pixels filtering
27
+ if 'SCL' in df.columns and (df['SCL'] == 6).sum() > 0:
28
+ df = df[df['SCL'] == 6].drop_duplicates(subset=['B2', 'B3', 'B4'])
29
+
30
+ # Feature engineering
31
+ bands = ['B2','B3','B4','B5','B6','B7','B8','B8A','B9','B11','B12']
32
+ df['NDVI'] = (df['B8'] - df['B4']) / (df['B8'] + df['B4'])
33
+ df['NDWI'] = (df['B3'] - df['B8']) / (df['B3'] + df['B8'])
34
+ df['NDSI'] = (df['B3'] - df['B11']) / (df['B3'] + df['B11'])
35
+
36
+ df = df.sort_values('datetime').reset_index(drop=True)
37
+ df['turbidity_diff1'] = df['turbidity'].diff()
38
+ df['turbidity_diff2'] = df['turbidity_diff1'].diff()
39
+ thresh = 2 * df['turbidity_diff2'].std()
40
+ df['spike'] = (df['turbidity_diff2'].abs() > thresh).astype(int)
41
+ df = df.dropna()
42
+
43
+ # Class label
44
+ df['Classe'] = (df['turbidity'] > 20).astype(int)
45
+ return df, bands
46
+
47
+
48
+ # ============= Anomaly Detection Methods ================================================================
49
+ def run_oneclass_svm(sentinel, usgs, plot=True):
50
+ """
51
+ Apply One-Class SVM anomaly detection on Sentinel/USGS data.
52
+ Returns: DataFrame with predictions, and best model parameters.
53
+ """
54
+ df, bands = preprocess_data(sentinel, usgs)
55
+ features = bands + ['NDVI','NDWI','NDSI','turbidity_diff1','turbidity_diff2','spike']
56
+ X = df[features].fillna(df[features].mean()).values
57
+ y = df['Classe'].values
58
+
59
+ scaler = StandardScaler()
60
+ X_scaled = scaler.fit_transform(X)
61
+
62
+ X_class0 = X_scaled[y == 0]
63
+ X_class1 = X_scaled[y == 1]
64
+
65
+ train_size = max(1, int(0.8 * len(X_class0)))
66
+ X_train = X_class0[:train_size]
67
+ X_test = np.vstack([X_class0[train_size:], X_class1])
68
+ y_test = np.array([0]*(len(X_class0)-train_size) + [1]*len(X_class1))
69
+
70
+ best_f1 = -1
71
+ best_model, best_y_pred, best_params = None, None, None
72
+ for gamma in ['auto', 'scale']:
73
+ for nu in [0.01, 0.05, 0.1, 0.2]:
74
+ model = OneClassSVM(kernel='rbf', gamma=gamma, nu=nu)
75
+ model.fit(X_train)
76
+ y_pred = np.where(model.predict(X_test) == 1, 0, 1)
77
+ if len(np.unique(y_pred)) > 1:
78
+ f1 = f1_score(y_test, y_pred)
79
+ if f1 > best_f1:
80
+ best_f1 = f1
81
+ best_model = model
82
+ best_y_pred = y_pred
83
+ best_params = {'gamma': gamma, 'nu': nu}
84
+
85
+ if best_f1 > -1:
86
+ df_out = df.iloc[-len(y_test):].copy()
87
+ df_out['predicted'] = best_y_pred
88
+ if plot:
89
+ plt.figure(figsize=(15,6))
90
+ plt.plot(df_out['datetime'], df_out['turbidity'], label='turbidity', color='blue')
91
+ plt.scatter(df_out[df_out['Classe']==1]['datetime'], df_out[df_out['Classe']==1]['turbidity'],
92
+ color='red', marker='x', label='True Anomaly', s=100)
93
+ plt.scatter(df_out[df_out['predicted']==1]['datetime'], df_out[df_out['predicted']==1]['turbidity'],
94
+ edgecolors='orange', facecolors='none', marker='o', label='Predicted Anomaly', s=80)
95
+ plt.title("True vs Predicted Anomalies (OneClassSVM)")
96
+ plt.xlabel("datetime")
97
+ plt.ylabel("turbidity")
98
+ plt.legend()
99
+ plt.grid(True)
100
+ plt.tight_layout()
101
+ plt.show()
102
+ return df_out, best_params, best_f1
103
+ else:
104
+ print("Could not find a good model. Try different hyperparameters.")
105
+ return None, None, None
106
+
107
+
108
+ # ============= Isolation Forest Method ================================================================
109
+ def run_isolation_forest(sentinel, usgs, plot=True):
110
+ """
111
+ Apply Isolation Forest anomaly detection on Sentinel/USGS data.
112
+ Returns: DataFrame with predictions, and best model parameters.
113
+ """
114
+ df, bands = preprocess_data(sentinel, usgs)
115
+ features = bands + ['NDVI','NDWI','NDSI','turbidity_diff1','turbidity_diff2','spike']
116
+ X = df[features].fillna(df[features].mean()).values
117
+ y = df['Classe'].values
118
+
119
+ scaler = StandardScaler()
120
+ X_scaled = scaler.fit_transform(X)
121
+
122
+ X_class0 = X_scaled[y == 0]
123
+ X_class1 = X_scaled[y == 1]
124
+
125
+ train_size = max(1, int(0.8 * len(X_class0)))
126
+ X_train = X_class0[:train_size]
127
+ X_test = np.vstack([X_class0[train_size:], X_class1])
128
+ y_test = np.array([0]*(len(X_class0)-train_size) + [1]*len(X_class1))
129
+
130
+ best_f1 = -1
131
+ best_model, best_y_pred, best_params = None, None, None
132
+ for contamination in [0.01, 0.05, 0.1, 0.15, 0.2, 0.3]:
133
+ model = IsolationForest(
134
+ n_estimators=100,
135
+ contamination=contamination,
136
+ max_samples='auto',
137
+ bootstrap=True,
138
+ random_state=42
139
+ )
140
+ model.fit(X_train)
141
+ y_pred = np.where(model.predict(X_test) == 1, 0, 1)
142
+ if len(np.unique(y_pred)) > 1:
143
+ f1 = f1_score(y_test, y_pred)
144
+ if f1 > best_f1:
145
+ best_f1 = f1
146
+ best_model = model
147
+ best_y_pred = y_pred
148
+ best_params = {'contamination': contamination}
149
+
150
+ if best_f1 > -1:
151
+ df_out = df.iloc[-len(y_test):].copy()
152
+ df_out['predicted'] = best_y_pred
153
+ if plot:
154
+ plt.figure(figsize=(15,6))
155
+ plt.plot(df_out['datetime'], df_out['turbidity'], label='turbidity', color='blue')
156
+ plt.scatter(df_out[df_out['Classe']==1]['datetime'], df_out[df_out['Classe']==1]['turbidity'],
157
+ color='red', marker='x', label='True Anomaly', s=100)
158
+ plt.scatter(df_out[df_out['predicted']==1]['datetime'], df_out[df_out['predicted']==1]['turbidity'],
159
+ edgecolors='orange', facecolors='none', marker='o', label='Predicted Anomaly', s=80)
160
+ plt.title("True vs Predicted Anomalies (Isolation Forest)")
161
+ plt.xlabel("datetime")
162
+ plt.ylabel("turbidity")
163
+ plt.legend()
164
+ plt.grid(True)
165
+ plt.tight_layout()
166
+ plt.show()
167
+ return df_out, best_params, best_f1
168
+ else:
169
+ print("Could not find a good model. Try different hyperparameters.")
170
+ return None, None, None
@@ -79,14 +79,14 @@ def get_sentinel_bands_gee(
79
79
  s2_masked = s2.map(dynamic_scl_mask)
80
80
 
81
81
  def extract_features(image):
82
- date = image.date().format('YYYY-MM-dd HH:mm:ss')
82
+ datetime = image.date().format('YYYY-MM-dd HH:mm:ss')
83
83
  values = image.reduceRegion(
84
84
  reducer=ee.Reducer.mean(),
85
85
  geometry=buffered_point,
86
86
  scale=20,
87
87
  maxPixels=1e8
88
88
  )
89
- return ee.Feature(None, values.set('date', date))
89
+ return ee.Feature(None, values.set('datetime', datetime))
90
90
 
91
91
  features = s2_masked.map(extract_features)
92
92
  fc = ee.FeatureCollection(features).filter(ee.Filter.notNull(['B2']))
@@ -95,9 +95,9 @@ def get_sentinel_bands_gee(
95
95
  rows = [f['properties'] for f in data['features']]
96
96
  df = pd.DataFrame(rows)
97
97
  if not df.empty:
98
- df['date'] = pd.to_datetime(df['date'])
99
- df = df.sort_values('date')
100
- df = df.set_index('date')
98
+ df['datetime'] = pd.to_datetime(df['datetime'])
99
+ df = df.sort_values('datetime')
100
+ df = df.set_index('datetime')
101
101
  return df
102
102
 
103
103
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hydroanomaly
3
- Version: 1.0.0
3
+ Version: 1.2.1
4
4
  Summary: A Python package for hydro anomaly detection with simple USGS data retrieval
5
5
  Author-email: Ehsan Kahrizi <ehsan.kahrizi@usu.edu>
6
6
  License: MIT License
@@ -28,7 +28,7 @@ License: MIT License
28
28
  Project-URL: Homepage, https://github.com/yourusername/hydroanomaly
29
29
  Project-URL: Bug Reports, https://github.com/yourusername/hydroanomaly/issues
30
30
  Project-URL: Source, https://github.com/yourusername/hydroanomaly
31
- Keywords: python,package,hydro,anomaly,detection
31
+ Keywords: python,package,hydrology,anomaly detection,remote sensing
32
32
  Classifier: Programming Language :: Python :: 3
33
33
  Classifier: Operating System :: OS Independent
34
34
  Requires-Python: >=3.6
@@ -0,0 +1,10 @@
1
+ hydroanomaly/__init__.py,sha256=HgiUaNbWp_hAHaOvzCJXRBDwe89orkscx3Sd7lvgnQo,1664
2
+ hydroanomaly/ml.py,sha256=X2bYinNwRzQz0yFtRSzmN8OIZaX7vfT4BPSE6KMoYAM,7066
3
+ hydroanomaly/sentinel_bands.py,sha256=XdpXUsJ8VeRQp9akDeQaVBefuuMrQIabslu8tg_FTpk,5399
4
+ hydroanomaly/usgs_turbidity.py,sha256=k0cXRXpTe1YgjfR0Htw77SLD8hM--43jiEiJwx1vRg0,5664
5
+ hydroanomaly/visualize.py,sha256=d_Ou1sTr648TdAW-94NXwNbLPL4rvYVYb5pw4Xux3aE,7228
6
+ hydroanomaly-1.2.1.dist-info/licenses/LICENSE,sha256=OphKV48tcMv6ep-7j-8T6nycykPT0g8ZlMJ9zbGvdPs,1066
7
+ hydroanomaly-1.2.1.dist-info/METADATA,sha256=keF0Y92CYoZxOnU9vYaGfnefWr8s9mN7ncA2Qn2bp4I,12981
8
+ hydroanomaly-1.2.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
9
+ hydroanomaly-1.2.1.dist-info/top_level.txt,sha256=t-5Lc-eTLlkxIhR_N1Cpp6_YZafKS3xLLk9D2CtbE7o,13
10
+ hydroanomaly-1.2.1.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- hydroanomaly/__init__.py,sha256=PTaMZkFNuJnuA9v6tPIll0NoELl3AKJqpi5R-Y_kW9A,1442
2
- hydroanomaly/sentinel_bands.py,sha256=Y6RAunVJDYLs13WemSSQNEu07GqmhR64fC2mLPxwh2k,5371
3
- hydroanomaly/usgs_turbidity.py,sha256=k0cXRXpTe1YgjfR0Htw77SLD8hM--43jiEiJwx1vRg0,5664
4
- hydroanomaly/visualize.py,sha256=d_Ou1sTr648TdAW-94NXwNbLPL4rvYVYb5pw4Xux3aE,7228
5
- hydroanomaly-1.0.0.dist-info/licenses/LICENSE,sha256=OphKV48tcMv6ep-7j-8T6nycykPT0g8ZlMJ9zbGvdPs,1066
6
- hydroanomaly-1.0.0.dist-info/METADATA,sha256=yij922cCRRXtvyel5t1r45B5pNkg71h8jufNu07T-14,12962
7
- hydroanomaly-1.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
8
- hydroanomaly-1.0.0.dist-info/top_level.txt,sha256=t-5Lc-eTLlkxIhR_N1Cpp6_YZafKS3xLLk9D2CtbE7o,13
9
- hydroanomaly-1.0.0.dist-info/RECORD,,