hydroanomaly 1.2.0__py3-none-any.whl → 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hydroanomaly/__init__.py +1 -1
- hydroanomaly/ml.py +23 -23
- hydroanomaly/sentinel_bands.py +5 -5
- {hydroanomaly-1.2.0.dist-info → hydroanomaly-1.2.1.dist-info}/METADATA +1 -1
- hydroanomaly-1.2.1.dist-info/RECORD +10 -0
- hydroanomaly-1.2.0.dist-info/RECORD +0 -10
- {hydroanomaly-1.2.0.dist-info → hydroanomaly-1.2.1.dist-info}/WHEEL +0 -0
- {hydroanomaly-1.2.0.dist-info → hydroanomaly-1.2.1.dist-info}/licenses/LICENSE +0 -0
- {hydroanomaly-1.2.0.dist-info → hydroanomaly-1.2.1.dist-info}/top_level.txt +0 -0
hydroanomaly/__init__.py
CHANGED
hydroanomaly/ml.py
CHANGED
@@ -8,20 +8,20 @@ import matplotlib.pyplot as plt
|
|
8
8
|
|
9
9
|
# ============= Helper Functions =========================================================================
|
10
10
|
def match_nearest(row, usgs):
|
11
|
-
target_time = row['
|
12
|
-
same_day = usgs[usgs['
|
11
|
+
target_time = row['datetime']
|
12
|
+
same_day = usgs[usgs['datetime'] == target_time.datetime()]
|
13
13
|
if same_day.empty:
|
14
14
|
return np.nan
|
15
15
|
delta = (same_day['datetime'] - target_time).abs()
|
16
|
-
return same_day.loc[delta.idxmin(), '
|
16
|
+
return same_day.loc[delta.idxmin(), 'turbidity']
|
17
17
|
|
18
18
|
|
19
19
|
|
20
20
|
# ============= Preprocessing and Feature Engineering ========================================================
|
21
21
|
def preprocess_data(sentinel, usgs):
|
22
22
|
# Add matched turbidity
|
23
|
-
sentinel['
|
24
|
-
df = sentinel.dropna(subset=['
|
23
|
+
sentinel['turbidity'] = sentinel.apply(lambda row: match_nearest(row, usgs), axis=1)
|
24
|
+
df = sentinel.dropna(subset=['turbidity'])
|
25
25
|
|
26
26
|
# Water pixels filtering
|
27
27
|
if 'SCL' in df.columns and (df['SCL'] == 6).sum() > 0:
|
@@ -33,15 +33,15 @@ def preprocess_data(sentinel, usgs):
|
|
33
33
|
df['NDWI'] = (df['B3'] - df['B8']) / (df['B3'] + df['B8'])
|
34
34
|
df['NDSI'] = (df['B3'] - df['B11']) / (df['B3'] + df['B11'])
|
35
35
|
|
36
|
-
df = df.sort_values('
|
37
|
-
df['
|
38
|
-
df['
|
39
|
-
thresh = 2 * df['
|
40
|
-
df['spike'] = (df['
|
36
|
+
df = df.sort_values('datetime').reset_index(drop=True)
|
37
|
+
df['turbidity_diff1'] = df['turbidity'].diff()
|
38
|
+
df['turbidity_diff2'] = df['turbidity_diff1'].diff()
|
39
|
+
thresh = 2 * df['turbidity_diff2'].std()
|
40
|
+
df['spike'] = (df['turbidity_diff2'].abs() > thresh).astype(int)
|
41
41
|
df = df.dropna()
|
42
42
|
|
43
43
|
# Class label
|
44
|
-
df['Classe'] = (df['
|
44
|
+
df['Classe'] = (df['turbidity'] > 20).astype(int)
|
45
45
|
return df, bands
|
46
46
|
|
47
47
|
|
@@ -52,7 +52,7 @@ def run_oneclass_svm(sentinel, usgs, plot=True):
|
|
52
52
|
Returns: DataFrame with predictions, and best model parameters.
|
53
53
|
"""
|
54
54
|
df, bands = preprocess_data(sentinel, usgs)
|
55
|
-
features = bands + ['NDVI','NDWI','NDSI','
|
55
|
+
features = bands + ['NDVI','NDWI','NDSI','turbidity_diff1','turbidity_diff2','spike']
|
56
56
|
X = df[features].fillna(df[features].mean()).values
|
57
57
|
y = df['Classe'].values
|
58
58
|
|
@@ -87,14 +87,14 @@ def run_oneclass_svm(sentinel, usgs, plot=True):
|
|
87
87
|
df_out['predicted'] = best_y_pred
|
88
88
|
if plot:
|
89
89
|
plt.figure(figsize=(15,6))
|
90
|
-
plt.plot(df_out['
|
91
|
-
plt.scatter(df_out[df_out['Classe']==1]['
|
90
|
+
plt.plot(df_out['datetime'], df_out['turbidity'], label='turbidity', color='blue')
|
91
|
+
plt.scatter(df_out[df_out['Classe']==1]['datetime'], df_out[df_out['Classe']==1]['turbidity'],
|
92
92
|
color='red', marker='x', label='True Anomaly', s=100)
|
93
|
-
plt.scatter(df_out[df_out['predicted']==1]['
|
93
|
+
plt.scatter(df_out[df_out['predicted']==1]['datetime'], df_out[df_out['predicted']==1]['turbidity'],
|
94
94
|
edgecolors='orange', facecolors='none', marker='o', label='Predicted Anomaly', s=80)
|
95
95
|
plt.title("True vs Predicted Anomalies (OneClassSVM)")
|
96
|
-
plt.xlabel("
|
97
|
-
plt.ylabel("
|
96
|
+
plt.xlabel("datetime")
|
97
|
+
plt.ylabel("turbidity")
|
98
98
|
plt.legend()
|
99
99
|
plt.grid(True)
|
100
100
|
plt.tight_layout()
|
@@ -112,7 +112,7 @@ def run_isolation_forest(sentinel, usgs, plot=True):
|
|
112
112
|
Returns: DataFrame with predictions, and best model parameters.
|
113
113
|
"""
|
114
114
|
df, bands = preprocess_data(sentinel, usgs)
|
115
|
-
features = bands + ['NDVI','NDWI','NDSI','
|
115
|
+
features = bands + ['NDVI','NDWI','NDSI','turbidity_diff1','turbidity_diff2','spike']
|
116
116
|
X = df[features].fillna(df[features].mean()).values
|
117
117
|
y = df['Classe'].values
|
118
118
|
|
@@ -152,14 +152,14 @@ def run_isolation_forest(sentinel, usgs, plot=True):
|
|
152
152
|
df_out['predicted'] = best_y_pred
|
153
153
|
if plot:
|
154
154
|
plt.figure(figsize=(15,6))
|
155
|
-
plt.plot(df_out['
|
156
|
-
plt.scatter(df_out[df_out['Classe']==1]['
|
155
|
+
plt.plot(df_out['datetime'], df_out['turbidity'], label='turbidity', color='blue')
|
156
|
+
plt.scatter(df_out[df_out['Classe']==1]['datetime'], df_out[df_out['Classe']==1]['turbidity'],
|
157
157
|
color='red', marker='x', label='True Anomaly', s=100)
|
158
|
-
plt.scatter(df_out[df_out['predicted']==1]['
|
158
|
+
plt.scatter(df_out[df_out['predicted']==1]['datetime'], df_out[df_out['predicted']==1]['turbidity'],
|
159
159
|
edgecolors='orange', facecolors='none', marker='o', label='Predicted Anomaly', s=80)
|
160
160
|
plt.title("True vs Predicted Anomalies (Isolation Forest)")
|
161
|
-
plt.xlabel("
|
162
|
-
plt.ylabel("
|
161
|
+
plt.xlabel("datetime")
|
162
|
+
plt.ylabel("turbidity")
|
163
163
|
plt.legend()
|
164
164
|
plt.grid(True)
|
165
165
|
plt.tight_layout()
|
hydroanomaly/sentinel_bands.py
CHANGED
@@ -79,14 +79,14 @@ def get_sentinel_bands_gee(
|
|
79
79
|
s2_masked = s2.map(dynamic_scl_mask)
|
80
80
|
|
81
81
|
def extract_features(image):
|
82
|
-
|
82
|
+
datetime = image.date().format('YYYY-MM-dd HH:mm:ss')
|
83
83
|
values = image.reduceRegion(
|
84
84
|
reducer=ee.Reducer.mean(),
|
85
85
|
geometry=buffered_point,
|
86
86
|
scale=20,
|
87
87
|
maxPixels=1e8
|
88
88
|
)
|
89
|
-
return ee.Feature(None, values.set('
|
89
|
+
return ee.Feature(None, values.set('datetime', datetime))
|
90
90
|
|
91
91
|
features = s2_masked.map(extract_features)
|
92
92
|
fc = ee.FeatureCollection(features).filter(ee.Filter.notNull(['B2']))
|
@@ -95,9 +95,9 @@ def get_sentinel_bands_gee(
|
|
95
95
|
rows = [f['properties'] for f in data['features']]
|
96
96
|
df = pd.DataFrame(rows)
|
97
97
|
if not df.empty:
|
98
|
-
df['
|
99
|
-
df = df.sort_values('
|
100
|
-
df = df.set_index('
|
98
|
+
df['datetime'] = pd.to_datetime(df['datetime'])
|
99
|
+
df = df.sort_values('datetime')
|
100
|
+
df = df.set_index('datetime')
|
101
101
|
return df
|
102
102
|
|
103
103
|
|
@@ -0,0 +1,10 @@
|
|
1
|
+
hydroanomaly/__init__.py,sha256=HgiUaNbWp_hAHaOvzCJXRBDwe89orkscx3Sd7lvgnQo,1664
|
2
|
+
hydroanomaly/ml.py,sha256=X2bYinNwRzQz0yFtRSzmN8OIZaX7vfT4BPSE6KMoYAM,7066
|
3
|
+
hydroanomaly/sentinel_bands.py,sha256=XdpXUsJ8VeRQp9akDeQaVBefuuMrQIabslu8tg_FTpk,5399
|
4
|
+
hydroanomaly/usgs_turbidity.py,sha256=k0cXRXpTe1YgjfR0Htw77SLD8hM--43jiEiJwx1vRg0,5664
|
5
|
+
hydroanomaly/visualize.py,sha256=d_Ou1sTr648TdAW-94NXwNbLPL4rvYVYb5pw4Xux3aE,7228
|
6
|
+
hydroanomaly-1.2.1.dist-info/licenses/LICENSE,sha256=OphKV48tcMv6ep-7j-8T6nycykPT0g8ZlMJ9zbGvdPs,1066
|
7
|
+
hydroanomaly-1.2.1.dist-info/METADATA,sha256=keF0Y92CYoZxOnU9vYaGfnefWr8s9mN7ncA2Qn2bp4I,12981
|
8
|
+
hydroanomaly-1.2.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
9
|
+
hydroanomaly-1.2.1.dist-info/top_level.txt,sha256=t-5Lc-eTLlkxIhR_N1Cpp6_YZafKS3xLLk9D2CtbE7o,13
|
10
|
+
hydroanomaly-1.2.1.dist-info/RECORD,,
|
@@ -1,10 +0,0 @@
|
|
1
|
-
hydroanomaly/__init__.py,sha256=IGPsOv-xfQGQpr_HxhrcZXjvVVkAfMBxh97e6S8Rfac,1664
|
2
|
-
hydroanomaly/ml.py,sha256=vqfnmGijjxGgtqJ2rzOmnMMrrVAVlYOPe1AnuX4EuG4,7018
|
3
|
-
hydroanomaly/sentinel_bands.py,sha256=Y6RAunVJDYLs13WemSSQNEu07GqmhR64fC2mLPxwh2k,5371
|
4
|
-
hydroanomaly/usgs_turbidity.py,sha256=k0cXRXpTe1YgjfR0Htw77SLD8hM--43jiEiJwx1vRg0,5664
|
5
|
-
hydroanomaly/visualize.py,sha256=d_Ou1sTr648TdAW-94NXwNbLPL4rvYVYb5pw4Xux3aE,7228
|
6
|
-
hydroanomaly-1.2.0.dist-info/licenses/LICENSE,sha256=OphKV48tcMv6ep-7j-8T6nycykPT0g8ZlMJ9zbGvdPs,1066
|
7
|
-
hydroanomaly-1.2.0.dist-info/METADATA,sha256=bh51WnUxEbk3azZY2IN9a9Io1Pav7knU90qMiiiTGDU,12981
|
8
|
-
hydroanomaly-1.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
9
|
-
hydroanomaly-1.2.0.dist-info/top_level.txt,sha256=t-5Lc-eTLlkxIhR_N1Cpp6_YZafKS3xLLk9D2CtbE7o,13
|
10
|
-
hydroanomaly-1.2.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|