hydroanomaly 1.2.0__tar.gz → 1.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hydroanomaly
3
- Version: 1.2.0
3
+ Version: 1.2.1
4
4
  Summary: A Python package for hydro anomaly detection with simple USGS data retrieval
5
5
  Author-email: Ehsan Kahrizi <ehsan.kahrizi@usu.edu>
6
6
  License: MIT License
@@ -10,7 +10,7 @@ A simple Python package with just 3 modules:
10
10
  That's it - nothing else!
11
11
  """
12
12
 
13
- __version__ = "1.2.0"
13
+ __version__ = "1.2.1"
14
14
  __author__ = "Ehsan Kahrizi (Ehsan.kahrizi@usu.edu)"
15
15
 
16
16
  # Import the 3 simple modules
@@ -8,20 +8,20 @@ import matplotlib.pyplot as plt
8
8
 
9
9
  # ============= Helper Functions =========================================================================
10
10
  def match_nearest(row, usgs):
11
- target_time = row['date']
12
- same_day = usgs[usgs['date'] == target_time.date()]
11
+ target_time = row['datetime']
12
+ same_day = usgs[usgs['datetime'] == target_time.datetime()]
13
13
  if same_day.empty:
14
14
  return np.nan
15
15
  delta = (same_day['datetime'] - target_time).abs()
16
- return same_day.loc[delta.idxmin(), 'Turbidity']
16
+ return same_day.loc[delta.idxmin(), 'turbidity']
17
17
 
18
18
 
19
19
 
20
20
  # ============= Preprocessing and Feature Engineering ========================================================
21
21
  def preprocess_data(sentinel, usgs):
22
22
  # Add matched turbidity
23
- sentinel['Turbidity'] = sentinel.apply(lambda row: match_nearest(row, usgs), axis=1)
24
- df = sentinel.dropna(subset=['Turbidity'])
23
+ sentinel['turbidity'] = sentinel.apply(lambda row: match_nearest(row, usgs), axis=1)
24
+ df = sentinel.dropna(subset=['turbidity'])
25
25
 
26
26
  # Water pixels filtering
27
27
  if 'SCL' in df.columns and (df['SCL'] == 6).sum() > 0:
@@ -33,15 +33,15 @@ def preprocess_data(sentinel, usgs):
33
33
  df['NDWI'] = (df['B3'] - df['B8']) / (df['B3'] + df['B8'])
34
34
  df['NDSI'] = (df['B3'] - df['B11']) / (df['B3'] + df['B11'])
35
35
 
36
- df = df.sort_values('date').reset_index(drop=True)
37
- df['Turbidity_diff1'] = df['Turbidity'].diff()
38
- df['Turbidity_diff2'] = df['Turbidity_diff1'].diff()
39
- thresh = 2 * df['Turbidity_diff2'].std()
40
- df['spike'] = (df['Turbidity_diff2'].abs() > thresh).astype(int)
36
+ df = df.sort_values('datetime').reset_index(drop=True)
37
+ df['turbidity_diff1'] = df['turbidity'].diff()
38
+ df['turbidity_diff2'] = df['turbidity_diff1'].diff()
39
+ thresh = 2 * df['turbidity_diff2'].std()
40
+ df['spike'] = (df['turbidity_diff2'].abs() > thresh).astype(int)
41
41
  df = df.dropna()
42
42
 
43
43
  # Class label
44
- df['Classe'] = (df['Turbidity'] > 20).astype(int)
44
+ df['Classe'] = (df['turbidity'] > 20).astype(int)
45
45
  return df, bands
46
46
 
47
47
 
@@ -52,7 +52,7 @@ def run_oneclass_svm(sentinel, usgs, plot=True):
52
52
  Returns: DataFrame with predictions, and best model parameters.
53
53
  """
54
54
  df, bands = preprocess_data(sentinel, usgs)
55
- features = bands + ['NDVI','NDWI','NDSI','Turbidity_diff1','Turbidity_diff2','spike']
55
+ features = bands + ['NDVI','NDWI','NDSI','turbidity_diff1','turbidity_diff2','spike']
56
56
  X = df[features].fillna(df[features].mean()).values
57
57
  y = df['Classe'].values
58
58
 
@@ -87,14 +87,14 @@ def run_oneclass_svm(sentinel, usgs, plot=True):
87
87
  df_out['predicted'] = best_y_pred
88
88
  if plot:
89
89
  plt.figure(figsize=(15,6))
90
- plt.plot(df_out['date'], df_out['Turbidity'], label='Turbidity', color='blue')
91
- plt.scatter(df_out[df_out['Classe']==1]['date'], df_out[df_out['Classe']==1]['Turbidity'],
90
+ plt.plot(df_out['datetime'], df_out['turbidity'], label='turbidity', color='blue')
91
+ plt.scatter(df_out[df_out['Classe']==1]['datetime'], df_out[df_out['Classe']==1]['turbidity'],
92
92
  color='red', marker='x', label='True Anomaly', s=100)
93
- plt.scatter(df_out[df_out['predicted']==1]['date'], df_out[df_out['predicted']==1]['Turbidity'],
93
+ plt.scatter(df_out[df_out['predicted']==1]['datetime'], df_out[df_out['predicted']==1]['turbidity'],
94
94
  edgecolors='orange', facecolors='none', marker='o', label='Predicted Anomaly', s=80)
95
95
  plt.title("True vs Predicted Anomalies (OneClassSVM)")
96
- plt.xlabel("Date")
97
- plt.ylabel("Turbidity")
96
+ plt.xlabel("datetime")
97
+ plt.ylabel("turbidity")
98
98
  plt.legend()
99
99
  plt.grid(True)
100
100
  plt.tight_layout()
@@ -112,7 +112,7 @@ def run_isolation_forest(sentinel, usgs, plot=True):
112
112
  Returns: DataFrame with predictions, and best model parameters.
113
113
  """
114
114
  df, bands = preprocess_data(sentinel, usgs)
115
- features = bands + ['NDVI','NDWI','NDSI','Turbidity_diff1','Turbidity_diff2','spike']
115
+ features = bands + ['NDVI','NDWI','NDSI','turbidity_diff1','turbidity_diff2','spike']
116
116
  X = df[features].fillna(df[features].mean()).values
117
117
  y = df['Classe'].values
118
118
 
@@ -152,14 +152,14 @@ def run_isolation_forest(sentinel, usgs, plot=True):
152
152
  df_out['predicted'] = best_y_pred
153
153
  if plot:
154
154
  plt.figure(figsize=(15,6))
155
- plt.plot(df_out['date'], df_out['Turbidity'], label='Turbidity', color='blue')
156
- plt.scatter(df_out[df_out['Classe']==1]['date'], df_out[df_out['Classe']==1]['Turbidity'],
155
+ plt.plot(df_out['datetime'], df_out['turbidity'], label='turbidity', color='blue')
156
+ plt.scatter(df_out[df_out['Classe']==1]['datetime'], df_out[df_out['Classe']==1]['turbidity'],
157
157
  color='red', marker='x', label='True Anomaly', s=100)
158
- plt.scatter(df_out[df_out['predicted']==1]['date'], df_out[df_out['predicted']==1]['Turbidity'],
158
+ plt.scatter(df_out[df_out['predicted']==1]['datetime'], df_out[df_out['predicted']==1]['turbidity'],
159
159
  edgecolors='orange', facecolors='none', marker='o', label='Predicted Anomaly', s=80)
160
160
  plt.title("True vs Predicted Anomalies (Isolation Forest)")
161
- plt.xlabel("Date")
162
- plt.ylabel("Turbidity")
161
+ plt.xlabel("datetime")
162
+ plt.ylabel("turbidity")
163
163
  plt.legend()
164
164
  plt.grid(True)
165
165
  plt.tight_layout()
@@ -79,14 +79,14 @@ def get_sentinel_bands_gee(
79
79
  s2_masked = s2.map(dynamic_scl_mask)
80
80
 
81
81
  def extract_features(image):
82
- date = image.date().format('YYYY-MM-dd HH:mm:ss')
82
+ datetime = image.date().format('YYYY-MM-dd HH:mm:ss')
83
83
  values = image.reduceRegion(
84
84
  reducer=ee.Reducer.mean(),
85
85
  geometry=buffered_point,
86
86
  scale=20,
87
87
  maxPixels=1e8
88
88
  )
89
- return ee.Feature(None, values.set('date', date))
89
+ return ee.Feature(None, values.set('datetime', datetime))
90
90
 
91
91
  features = s2_masked.map(extract_features)
92
92
  fc = ee.FeatureCollection(features).filter(ee.Filter.notNull(['B2']))
@@ -95,9 +95,9 @@ def get_sentinel_bands_gee(
95
95
  rows = [f['properties'] for f in data['features']]
96
96
  df = pd.DataFrame(rows)
97
97
  if not df.empty:
98
- df['date'] = pd.to_datetime(df['date'])
99
- df = df.sort_values('date')
100
- df = df.set_index('date')
98
+ df['datetime'] = pd.to_datetime(df['datetime'])
99
+ df = df.sort_values('datetime')
100
+ df = df.set_index('datetime')
101
101
  return df
102
102
 
103
103
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hydroanomaly
3
- Version: 1.2.0
3
+ Version: 1.2.1
4
4
  Summary: A Python package for hydro anomaly detection with simple USGS data retrieval
5
5
  Author-email: Ehsan Kahrizi <ehsan.kahrizi@usu.edu>
6
6
  License: MIT License
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "hydroanomaly"
7
- version = "1.2.0"
7
+ version = "1.2.1"
8
8
  authors = [
9
9
  {name = "Ehsan Kahrizi", email = "ehsan.kahrizi@usu.edu"},
10
10
  ]
File without changes
File without changes
File without changes