hydroanomaly 1.0.0__py3-none-any.whl → 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hydroanomaly/__init__.py +9 -3
- hydroanomaly/ml.py +170 -0
- hydroanomaly/sentinel_bands.py +5 -5
- {hydroanomaly-1.0.0.dist-info → hydroanomaly-1.2.1.dist-info}/METADATA +2 -2
- hydroanomaly-1.2.1.dist-info/RECORD +10 -0
- hydroanomaly-1.0.0.dist-info/RECORD +0 -9
- {hydroanomaly-1.0.0.dist-info → hydroanomaly-1.2.1.dist-info}/WHEEL +0 -0
- {hydroanomaly-1.0.0.dist-info → hydroanomaly-1.2.1.dist-info}/licenses/LICENSE +0 -0
- {hydroanomaly-1.0.0.dist-info → hydroanomaly-1.2.1.dist-info}/top_level.txt +0 -0
hydroanomaly/__init__.py
CHANGED
@@ -5,17 +5,20 @@ A simple Python package with just 3 modules:
|
|
5
5
|
1. USGS turbidity data retrieval (returns data and site coordinates)
|
6
6
|
2. Sentinel satellite bands retrieval
|
7
7
|
3. Time series visualization
|
8
|
+
4. Machine learning anomaly detection (One-Class SVM and Isolation Forest)
|
8
9
|
|
9
10
|
That's it - nothing else!
|
10
11
|
"""
|
11
12
|
|
12
|
-
__version__ = "1.
|
13
|
+
__version__ = "1.2.1"
|
13
14
|
__author__ = "Ehsan Kahrizi (Ehsan.kahrizi@usu.edu)"
|
14
15
|
|
15
16
|
# Import the 3 simple modules
|
16
17
|
from .usgs_turbidity import get_turbidity, get_usgs_turbidity
|
17
18
|
from .sentinel_bands import get_sentinel_bands, get_satellite_data, get_sentinel, get_sentinel_bands_gee, show_sentinel_ndwi_map
|
18
19
|
from .visualize import plot_timeseries, plot_turbidity, plot_sentinel, plot_comparison, plot, visualize
|
20
|
+
from .ml import run_oneclass_svm, run_isolation_forest
|
21
|
+
|
19
22
|
|
20
23
|
# Export everything
|
21
24
|
__all__ = [
|
@@ -30,14 +33,17 @@ __all__ = [
|
|
30
33
|
'get_sentinel',
|
31
34
|
'show_sentinel_ndwi_map',
|
32
35
|
|
33
|
-
|
34
36
|
# Visualization functions
|
35
37
|
'plot_timeseries',
|
36
38
|
'plot_turbidity',
|
37
39
|
'plot_sentinel',
|
38
40
|
'plot_comparison',
|
39
41
|
'plot',
|
40
|
-
'visualize'
|
42
|
+
'visualize',
|
43
|
+
|
44
|
+
# Machine learning functions
|
45
|
+
'run_oneclass_svm',
|
46
|
+
'run_isolation_forest'
|
41
47
|
]
|
42
48
|
|
43
49
|
print(f"HydroAnomaly v{__version__} - Simple Water Data Package")
|
hydroanomaly/ml.py
ADDED
@@ -0,0 +1,170 @@
|
|
1
|
+
import numpy as np
|
2
|
+
import pandas as pd
|
3
|
+
from sklearn.preprocessing import StandardScaler
|
4
|
+
from sklearn.svm import OneClassSVM
|
5
|
+
from sklearn.ensemble import IsolationForest
|
6
|
+
from sklearn.metrics import f1_score, recall_score, precision_score
|
7
|
+
import matplotlib.pyplot as plt
|
8
|
+
|
9
|
+
# ============= Helper Functions =========================================================================
|
10
|
+
def match_nearest(row, usgs):
|
11
|
+
target_time = row['datetime']
|
12
|
+
same_day = usgs[usgs['datetime'] == target_time.datetime()]
|
13
|
+
if same_day.empty:
|
14
|
+
return np.nan
|
15
|
+
delta = (same_day['datetime'] - target_time).abs()
|
16
|
+
return same_day.loc[delta.idxmin(), 'turbidity']
|
17
|
+
|
18
|
+
|
19
|
+
|
20
|
+
# ============= Preprocessing and Feature Engineering ========================================================
|
21
|
+
def preprocess_data(sentinel, usgs):
|
22
|
+
# Add matched turbidity
|
23
|
+
sentinel['turbidity'] = sentinel.apply(lambda row: match_nearest(row, usgs), axis=1)
|
24
|
+
df = sentinel.dropna(subset=['turbidity'])
|
25
|
+
|
26
|
+
# Water pixels filtering
|
27
|
+
if 'SCL' in df.columns and (df['SCL'] == 6).sum() > 0:
|
28
|
+
df = df[df['SCL'] == 6].drop_duplicates(subset=['B2', 'B3', 'B4'])
|
29
|
+
|
30
|
+
# Feature engineering
|
31
|
+
bands = ['B2','B3','B4','B5','B6','B7','B8','B8A','B9','B11','B12']
|
32
|
+
df['NDVI'] = (df['B8'] - df['B4']) / (df['B8'] + df['B4'])
|
33
|
+
df['NDWI'] = (df['B3'] - df['B8']) / (df['B3'] + df['B8'])
|
34
|
+
df['NDSI'] = (df['B3'] - df['B11']) / (df['B3'] + df['B11'])
|
35
|
+
|
36
|
+
df = df.sort_values('datetime').reset_index(drop=True)
|
37
|
+
df['turbidity_diff1'] = df['turbidity'].diff()
|
38
|
+
df['turbidity_diff2'] = df['turbidity_diff1'].diff()
|
39
|
+
thresh = 2 * df['turbidity_diff2'].std()
|
40
|
+
df['spike'] = (df['turbidity_diff2'].abs() > thresh).astype(int)
|
41
|
+
df = df.dropna()
|
42
|
+
|
43
|
+
# Class label
|
44
|
+
df['Classe'] = (df['turbidity'] > 20).astype(int)
|
45
|
+
return df, bands
|
46
|
+
|
47
|
+
|
48
|
+
# ============= Anomaly Detection Methods ================================================================
|
49
|
+
def run_oneclass_svm(sentinel, usgs, plot=True):
|
50
|
+
"""
|
51
|
+
Apply One-Class SVM anomaly detection on Sentinel/USGS data.
|
52
|
+
Returns: DataFrame with predictions, and best model parameters.
|
53
|
+
"""
|
54
|
+
df, bands = preprocess_data(sentinel, usgs)
|
55
|
+
features = bands + ['NDVI','NDWI','NDSI','turbidity_diff1','turbidity_diff2','spike']
|
56
|
+
X = df[features].fillna(df[features].mean()).values
|
57
|
+
y = df['Classe'].values
|
58
|
+
|
59
|
+
scaler = StandardScaler()
|
60
|
+
X_scaled = scaler.fit_transform(X)
|
61
|
+
|
62
|
+
X_class0 = X_scaled[y == 0]
|
63
|
+
X_class1 = X_scaled[y == 1]
|
64
|
+
|
65
|
+
train_size = max(1, int(0.8 * len(X_class0)))
|
66
|
+
X_train = X_class0[:train_size]
|
67
|
+
X_test = np.vstack([X_class0[train_size:], X_class1])
|
68
|
+
y_test = np.array([0]*(len(X_class0)-train_size) + [1]*len(X_class1))
|
69
|
+
|
70
|
+
best_f1 = -1
|
71
|
+
best_model, best_y_pred, best_params = None, None, None
|
72
|
+
for gamma in ['auto', 'scale']:
|
73
|
+
for nu in [0.01, 0.05, 0.1, 0.2]:
|
74
|
+
model = OneClassSVM(kernel='rbf', gamma=gamma, nu=nu)
|
75
|
+
model.fit(X_train)
|
76
|
+
y_pred = np.where(model.predict(X_test) == 1, 0, 1)
|
77
|
+
if len(np.unique(y_pred)) > 1:
|
78
|
+
f1 = f1_score(y_test, y_pred)
|
79
|
+
if f1 > best_f1:
|
80
|
+
best_f1 = f1
|
81
|
+
best_model = model
|
82
|
+
best_y_pred = y_pred
|
83
|
+
best_params = {'gamma': gamma, 'nu': nu}
|
84
|
+
|
85
|
+
if best_f1 > -1:
|
86
|
+
df_out = df.iloc[-len(y_test):].copy()
|
87
|
+
df_out['predicted'] = best_y_pred
|
88
|
+
if plot:
|
89
|
+
plt.figure(figsize=(15,6))
|
90
|
+
plt.plot(df_out['datetime'], df_out['turbidity'], label='turbidity', color='blue')
|
91
|
+
plt.scatter(df_out[df_out['Classe']==1]['datetime'], df_out[df_out['Classe']==1]['turbidity'],
|
92
|
+
color='red', marker='x', label='True Anomaly', s=100)
|
93
|
+
plt.scatter(df_out[df_out['predicted']==1]['datetime'], df_out[df_out['predicted']==1]['turbidity'],
|
94
|
+
edgecolors='orange', facecolors='none', marker='o', label='Predicted Anomaly', s=80)
|
95
|
+
plt.title("True vs Predicted Anomalies (OneClassSVM)")
|
96
|
+
plt.xlabel("datetime")
|
97
|
+
plt.ylabel("turbidity")
|
98
|
+
plt.legend()
|
99
|
+
plt.grid(True)
|
100
|
+
plt.tight_layout()
|
101
|
+
plt.show()
|
102
|
+
return df_out, best_params, best_f1
|
103
|
+
else:
|
104
|
+
print("Could not find a good model. Try different hyperparameters.")
|
105
|
+
return None, None, None
|
106
|
+
|
107
|
+
|
108
|
+
# ============= Isolation Forest Method ================================================================
|
109
|
+
def run_isolation_forest(sentinel, usgs, plot=True):
|
110
|
+
"""
|
111
|
+
Apply Isolation Forest anomaly detection on Sentinel/USGS data.
|
112
|
+
Returns: DataFrame with predictions, and best model parameters.
|
113
|
+
"""
|
114
|
+
df, bands = preprocess_data(sentinel, usgs)
|
115
|
+
features = bands + ['NDVI','NDWI','NDSI','turbidity_diff1','turbidity_diff2','spike']
|
116
|
+
X = df[features].fillna(df[features].mean()).values
|
117
|
+
y = df['Classe'].values
|
118
|
+
|
119
|
+
scaler = StandardScaler()
|
120
|
+
X_scaled = scaler.fit_transform(X)
|
121
|
+
|
122
|
+
X_class0 = X_scaled[y == 0]
|
123
|
+
X_class1 = X_scaled[y == 1]
|
124
|
+
|
125
|
+
train_size = max(1, int(0.8 * len(X_class0)))
|
126
|
+
X_train = X_class0[:train_size]
|
127
|
+
X_test = np.vstack([X_class0[train_size:], X_class1])
|
128
|
+
y_test = np.array([0]*(len(X_class0)-train_size) + [1]*len(X_class1))
|
129
|
+
|
130
|
+
best_f1 = -1
|
131
|
+
best_model, best_y_pred, best_params = None, None, None
|
132
|
+
for contamination in [0.01, 0.05, 0.1, 0.15, 0.2, 0.3]:
|
133
|
+
model = IsolationForest(
|
134
|
+
n_estimators=100,
|
135
|
+
contamination=contamination,
|
136
|
+
max_samples='auto',
|
137
|
+
bootstrap=True,
|
138
|
+
random_state=42
|
139
|
+
)
|
140
|
+
model.fit(X_train)
|
141
|
+
y_pred = np.where(model.predict(X_test) == 1, 0, 1)
|
142
|
+
if len(np.unique(y_pred)) > 1:
|
143
|
+
f1 = f1_score(y_test, y_pred)
|
144
|
+
if f1 > best_f1:
|
145
|
+
best_f1 = f1
|
146
|
+
best_model = model
|
147
|
+
best_y_pred = y_pred
|
148
|
+
best_params = {'contamination': contamination}
|
149
|
+
|
150
|
+
if best_f1 > -1:
|
151
|
+
df_out = df.iloc[-len(y_test):].copy()
|
152
|
+
df_out['predicted'] = best_y_pred
|
153
|
+
if plot:
|
154
|
+
plt.figure(figsize=(15,6))
|
155
|
+
plt.plot(df_out['datetime'], df_out['turbidity'], label='turbidity', color='blue')
|
156
|
+
plt.scatter(df_out[df_out['Classe']==1]['datetime'], df_out[df_out['Classe']==1]['turbidity'],
|
157
|
+
color='red', marker='x', label='True Anomaly', s=100)
|
158
|
+
plt.scatter(df_out[df_out['predicted']==1]['datetime'], df_out[df_out['predicted']==1]['turbidity'],
|
159
|
+
edgecolors='orange', facecolors='none', marker='o', label='Predicted Anomaly', s=80)
|
160
|
+
plt.title("True vs Predicted Anomalies (Isolation Forest)")
|
161
|
+
plt.xlabel("datetime")
|
162
|
+
plt.ylabel("turbidity")
|
163
|
+
plt.legend()
|
164
|
+
plt.grid(True)
|
165
|
+
plt.tight_layout()
|
166
|
+
plt.show()
|
167
|
+
return df_out, best_params, best_f1
|
168
|
+
else:
|
169
|
+
print("Could not find a good model. Try different hyperparameters.")
|
170
|
+
return None, None, None
|
hydroanomaly/sentinel_bands.py
CHANGED
@@ -79,14 +79,14 @@ def get_sentinel_bands_gee(
|
|
79
79
|
s2_masked = s2.map(dynamic_scl_mask)
|
80
80
|
|
81
81
|
def extract_features(image):
|
82
|
-
|
82
|
+
datetime = image.date().format('YYYY-MM-dd HH:mm:ss')
|
83
83
|
values = image.reduceRegion(
|
84
84
|
reducer=ee.Reducer.mean(),
|
85
85
|
geometry=buffered_point,
|
86
86
|
scale=20,
|
87
87
|
maxPixels=1e8
|
88
88
|
)
|
89
|
-
return ee.Feature(None, values.set('
|
89
|
+
return ee.Feature(None, values.set('datetime', datetime))
|
90
90
|
|
91
91
|
features = s2_masked.map(extract_features)
|
92
92
|
fc = ee.FeatureCollection(features).filter(ee.Filter.notNull(['B2']))
|
@@ -95,9 +95,9 @@ def get_sentinel_bands_gee(
|
|
95
95
|
rows = [f['properties'] for f in data['features']]
|
96
96
|
df = pd.DataFrame(rows)
|
97
97
|
if not df.empty:
|
98
|
-
df['
|
99
|
-
df = df.sort_values('
|
100
|
-
df = df.set_index('
|
98
|
+
df['datetime'] = pd.to_datetime(df['datetime'])
|
99
|
+
df = df.sort_values('datetime')
|
100
|
+
df = df.set_index('datetime')
|
101
101
|
return df
|
102
102
|
|
103
103
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: hydroanomaly
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.2.1
|
4
4
|
Summary: A Python package for hydro anomaly detection with simple USGS data retrieval
|
5
5
|
Author-email: Ehsan Kahrizi <ehsan.kahrizi@usu.edu>
|
6
6
|
License: MIT License
|
@@ -28,7 +28,7 @@ License: MIT License
|
|
28
28
|
Project-URL: Homepage, https://github.com/yourusername/hydroanomaly
|
29
29
|
Project-URL: Bug Reports, https://github.com/yourusername/hydroanomaly/issues
|
30
30
|
Project-URL: Source, https://github.com/yourusername/hydroanomaly
|
31
|
-
Keywords: python,package,
|
31
|
+
Keywords: python,package,hydrology,anomaly detection,remote sensing
|
32
32
|
Classifier: Programming Language :: Python :: 3
|
33
33
|
Classifier: Operating System :: OS Independent
|
34
34
|
Requires-Python: >=3.6
|
@@ -0,0 +1,10 @@
|
|
1
|
+
hydroanomaly/__init__.py,sha256=HgiUaNbWp_hAHaOvzCJXRBDwe89orkscx3Sd7lvgnQo,1664
|
2
|
+
hydroanomaly/ml.py,sha256=X2bYinNwRzQz0yFtRSzmN8OIZaX7vfT4BPSE6KMoYAM,7066
|
3
|
+
hydroanomaly/sentinel_bands.py,sha256=XdpXUsJ8VeRQp9akDeQaVBefuuMrQIabslu8tg_FTpk,5399
|
4
|
+
hydroanomaly/usgs_turbidity.py,sha256=k0cXRXpTe1YgjfR0Htw77SLD8hM--43jiEiJwx1vRg0,5664
|
5
|
+
hydroanomaly/visualize.py,sha256=d_Ou1sTr648TdAW-94NXwNbLPL4rvYVYb5pw4Xux3aE,7228
|
6
|
+
hydroanomaly-1.2.1.dist-info/licenses/LICENSE,sha256=OphKV48tcMv6ep-7j-8T6nycykPT0g8ZlMJ9zbGvdPs,1066
|
7
|
+
hydroanomaly-1.2.1.dist-info/METADATA,sha256=keF0Y92CYoZxOnU9vYaGfnefWr8s9mN7ncA2Qn2bp4I,12981
|
8
|
+
hydroanomaly-1.2.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
9
|
+
hydroanomaly-1.2.1.dist-info/top_level.txt,sha256=t-5Lc-eTLlkxIhR_N1Cpp6_YZafKS3xLLk9D2CtbE7o,13
|
10
|
+
hydroanomaly-1.2.1.dist-info/RECORD,,
|
@@ -1,9 +0,0 @@
|
|
1
|
-
hydroanomaly/__init__.py,sha256=PTaMZkFNuJnuA9v6tPIll0NoELl3AKJqpi5R-Y_kW9A,1442
|
2
|
-
hydroanomaly/sentinel_bands.py,sha256=Y6RAunVJDYLs13WemSSQNEu07GqmhR64fC2mLPxwh2k,5371
|
3
|
-
hydroanomaly/usgs_turbidity.py,sha256=k0cXRXpTe1YgjfR0Htw77SLD8hM--43jiEiJwx1vRg0,5664
|
4
|
-
hydroanomaly/visualize.py,sha256=d_Ou1sTr648TdAW-94NXwNbLPL4rvYVYb5pw4Xux3aE,7228
|
5
|
-
hydroanomaly-1.0.0.dist-info/licenses/LICENSE,sha256=OphKV48tcMv6ep-7j-8T6nycykPT0g8ZlMJ9zbGvdPs,1066
|
6
|
-
hydroanomaly-1.0.0.dist-info/METADATA,sha256=yij922cCRRXtvyel5t1r45B5pNkg71h8jufNu07T-14,12962
|
7
|
-
hydroanomaly-1.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
8
|
-
hydroanomaly-1.0.0.dist-info/top_level.txt,sha256=t-5Lc-eTLlkxIhR_N1Cpp6_YZafKS3xLLk9D2CtbE7o,13
|
9
|
-
hydroanomaly-1.0.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|