hydroanomaly 1.2.1__tar.gz → 1.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hydroanomaly-1.2.1 → hydroanomaly-1.2.2}/PKG-INFO +1 -1
- {hydroanomaly-1.2.1 → hydroanomaly-1.2.2}/hydroanomaly/__init__.py +1 -1
- {hydroanomaly-1.2.1 → hydroanomaly-1.2.2}/hydroanomaly/ml.py +34 -30
- {hydroanomaly-1.2.1 → hydroanomaly-1.2.2}/hydroanomaly.egg-info/PKG-INFO +1 -1
- {hydroanomaly-1.2.1 → hydroanomaly-1.2.2}/pyproject.toml +1 -1
- {hydroanomaly-1.2.1 → hydroanomaly-1.2.2}/LICENSE +0 -0
- {hydroanomaly-1.2.1 → hydroanomaly-1.2.2}/README.md +0 -0
- {hydroanomaly-1.2.1 → hydroanomaly-1.2.2}/hydroanomaly/sentinel_bands.py +0 -0
- {hydroanomaly-1.2.1 → hydroanomaly-1.2.2}/hydroanomaly/usgs_turbidity.py +0 -0
- {hydroanomaly-1.2.1 → hydroanomaly-1.2.2}/hydroanomaly/visualize.py +0 -0
- {hydroanomaly-1.2.1 → hydroanomaly-1.2.2}/hydroanomaly.egg-info/SOURCES.txt +0 -0
- {hydroanomaly-1.2.1 → hydroanomaly-1.2.2}/hydroanomaly.egg-info/dependency_links.txt +0 -0
- {hydroanomaly-1.2.1 → hydroanomaly-1.2.2}/hydroanomaly.egg-info/requires.txt +0 -0
- {hydroanomaly-1.2.1 → hydroanomaly-1.2.2}/hydroanomaly.egg-info/top_level.txt +0 -0
- {hydroanomaly-1.2.1 → hydroanomaly-1.2.2}/setup.cfg +0 -0
- {hydroanomaly-1.2.1 → hydroanomaly-1.2.2}/tests/test_hello.py +0 -0
- {hydroanomaly-1.2.1 → hydroanomaly-1.2.2}/tests/test_math_utils.py +0 -0
- {hydroanomaly-1.2.1 → hydroanomaly-1.2.2}/tests/test_usgs_data.py +0 -0
@@ -6,24 +6,30 @@ from sklearn.ensemble import IsolationForest
|
|
6
6
|
from sklearn.metrics import f1_score, recall_score, precision_score
|
7
7
|
import matplotlib.pyplot as plt
|
8
8
|
|
9
|
-
# ============= Helper
|
10
|
-
def
|
11
|
-
|
12
|
-
|
13
|
-
if same_day.empty:
|
9
|
+
# ============= Helper Function: Match nearest USGS turbidity by datetime ================================
|
10
|
+
def match_nearest_datetime(sentinel_dt, usgs):
|
11
|
+
# usgs is indexed by 'datetime'
|
12
|
+
if usgs.empty:
|
14
13
|
return np.nan
|
15
|
-
|
16
|
-
|
14
|
+
# Find the closest datetime in usgs to sentinel_dt
|
15
|
+
i = usgs.index.get_indexer([sentinel_dt], method='nearest')[0]
|
16
|
+
return usgs.iloc[i]['Turbidity']
|
17
17
|
|
18
|
-
|
19
|
-
|
20
|
-
# ============= Preprocessing and Feature Engineering ========================================================
|
18
|
+
# ============= Preprocessing and Feature Engineering ====================================================
|
21
19
|
def preprocess_data(sentinel, usgs):
|
22
|
-
#
|
23
|
-
|
20
|
+
# Expect both to have DatetimeIndex named 'datetime'
|
21
|
+
if sentinel.index.name != 'datetime':
|
22
|
+
raise ValueError("Sentinel dataframe must have DatetimeIndex named 'datetime'")
|
23
|
+
if usgs.index.name != 'datetime':
|
24
|
+
raise ValueError("USGS dataframe must have DatetimeIndex named 'datetime'")
|
25
|
+
|
26
|
+
# Add matched turbidity to sentinel (by nearest datetime)
|
27
|
+
sentinel = sentinel.copy()
|
28
|
+
usgs = usgs.copy()
|
29
|
+
sentinel['turbidity'] = [match_nearest_datetime(dt, usgs) for dt in sentinel.index]
|
24
30
|
df = sentinel.dropna(subset=['turbidity'])
|
25
31
|
|
26
|
-
# Water
|
32
|
+
# Water pixel filtering
|
27
33
|
if 'SCL' in df.columns and (df['SCL'] == 6).sum() > 0:
|
28
34
|
df = df[df['SCL'] == 6].drop_duplicates(subset=['B2', 'B3', 'B4'])
|
29
35
|
|
@@ -33,7 +39,7 @@ def preprocess_data(sentinel, usgs):
|
|
33
39
|
df['NDWI'] = (df['B3'] - df['B8']) / (df['B3'] + df['B8'])
|
34
40
|
df['NDSI'] = (df['B3'] - df['B11']) / (df['B3'] + df['B11'])
|
35
41
|
|
36
|
-
df = df.
|
42
|
+
df = df.sort_index()
|
37
43
|
df['turbidity_diff1'] = df['turbidity'].diff()
|
38
44
|
df['turbidity_diff2'] = df['turbidity_diff1'].diff()
|
39
45
|
thresh = 2 * df['turbidity_diff2'].std()
|
@@ -44,12 +50,11 @@ def preprocess_data(sentinel, usgs):
|
|
44
50
|
df['Classe'] = (df['turbidity'] > 20).astype(int)
|
45
51
|
return df, bands
|
46
52
|
|
47
|
-
|
48
|
-
# ============= Anomaly Detection Methods ================================================================
|
53
|
+
# ============= Anomaly Detection: One-Class SVM ========================================================
|
49
54
|
def run_oneclass_svm(sentinel, usgs, plot=True):
|
50
55
|
"""
|
51
56
|
Apply One-Class SVM anomaly detection on Sentinel/USGS data.
|
52
|
-
|
57
|
+
Inputs must have DatetimeIndex named 'datetime'.
|
53
58
|
"""
|
54
59
|
df, bands = preprocess_data(sentinel, usgs)
|
55
60
|
features = bands + ['NDVI','NDWI','NDSI','turbidity_diff1','turbidity_diff2','spike']
|
@@ -87,14 +92,14 @@ def run_oneclass_svm(sentinel, usgs, plot=True):
|
|
87
92
|
df_out['predicted'] = best_y_pred
|
88
93
|
if plot:
|
89
94
|
plt.figure(figsize=(15,6))
|
90
|
-
plt.plot(df_out
|
91
|
-
plt.scatter(df_out[df_out['Classe']==1]
|
95
|
+
plt.plot(df_out.index, df_out['turbidity'], label='Turbidity', color='blue')
|
96
|
+
plt.scatter(df_out[df_out['Classe']==1].index, df_out[df_out['Classe']==1]['turbidity'],
|
92
97
|
color='red', marker='x', label='True Anomaly', s=100)
|
93
|
-
plt.scatter(df_out[df_out['predicted']==1]
|
98
|
+
plt.scatter(df_out[df_out['predicted']==1].index, df_out[df_out['predicted']==1]['turbidity'],
|
94
99
|
edgecolors='orange', facecolors='none', marker='o', label='Predicted Anomaly', s=80)
|
95
100
|
plt.title("True vs Predicted Anomalies (OneClassSVM)")
|
96
|
-
plt.xlabel("
|
97
|
-
plt.ylabel("
|
101
|
+
plt.xlabel("Datetime")
|
102
|
+
plt.ylabel("Turbidity")
|
98
103
|
plt.legend()
|
99
104
|
plt.grid(True)
|
100
105
|
plt.tight_layout()
|
@@ -104,12 +109,11 @@ def run_oneclass_svm(sentinel, usgs, plot=True):
|
|
104
109
|
print("Could not find a good model. Try different hyperparameters.")
|
105
110
|
return None, None, None
|
106
111
|
|
107
|
-
|
108
|
-
# ============= Isolation Forest Method ================================================================
|
112
|
+
# ============= Anomaly Detection: Isolation Forest ======================================================
|
109
113
|
def run_isolation_forest(sentinel, usgs, plot=True):
|
110
114
|
"""
|
111
115
|
Apply Isolation Forest anomaly detection on Sentinel/USGS data.
|
112
|
-
|
116
|
+
Inputs must have DatetimeIndex named 'datetime'.
|
113
117
|
"""
|
114
118
|
df, bands = preprocess_data(sentinel, usgs)
|
115
119
|
features = bands + ['NDVI','NDWI','NDSI','turbidity_diff1','turbidity_diff2','spike']
|
@@ -152,14 +156,14 @@ def run_isolation_forest(sentinel, usgs, plot=True):
|
|
152
156
|
df_out['predicted'] = best_y_pred
|
153
157
|
if plot:
|
154
158
|
plt.figure(figsize=(15,6))
|
155
|
-
plt.plot(df_out
|
156
|
-
plt.scatter(df_out[df_out['Classe']==1]
|
159
|
+
plt.plot(df_out.index, df_out['turbidity'], label='Turbidity', color='blue')
|
160
|
+
plt.scatter(df_out[df_out['Classe']==1].index, df_out[df_out['Classe']==1]['turbidity'],
|
157
161
|
color='red', marker='x', label='True Anomaly', s=100)
|
158
|
-
plt.scatter(df_out[df_out['predicted']==1]
|
162
|
+
plt.scatter(df_out[df_out['predicted']==1].index, df_out[df_out['predicted']==1]['turbidity'],
|
159
163
|
edgecolors='orange', facecolors='none', marker='o', label='Predicted Anomaly', s=80)
|
160
164
|
plt.title("True vs Predicted Anomalies (Isolation Forest)")
|
161
|
-
plt.xlabel("
|
162
|
-
plt.ylabel("
|
165
|
+
plt.xlabel("Datetime")
|
166
|
+
plt.ylabel("Turbidity")
|
163
167
|
plt.legend()
|
164
168
|
plt.grid(True)
|
165
169
|
plt.tight_layout()
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|