hydroanomaly 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hydroanomaly/__init__.py +82 -3
- hydroanomaly/plotting.py +389 -0
- hydroanomaly/usgs_data.py +311 -0
- hydroanomaly-0.3.0.dist-info/METADATA +400 -0
- hydroanomaly-0.3.0.dist-info/RECORD +10 -0
- hydroanomaly-0.1.0.dist-info/METADATA +0 -60
- hydroanomaly-0.1.0.dist-info/RECORD +0 -8
- {hydroanomaly-0.1.0.dist-info → hydroanomaly-0.3.0.dist-info}/WHEEL +0 -0
- {hydroanomaly-0.1.0.dist-info → hydroanomaly-0.3.0.dist-info}/licenses/LICENSE +0 -0
- {hydroanomaly-0.1.0.dist-info → hydroanomaly-0.3.0.dist-info}/top_level.txt +0 -0
hydroanomaly/__init__.py
CHANGED
@@ -1,15 +1,94 @@
|
|
1
1
|
"""
|
2
2
|
HydroAnomaly
|
3
3
|
|
4
|
-
A Python package for hydro anomaly detection.
|
4
|
+
A Python package for hydro anomaly detection and USGS data retrieval.
|
5
5
|
"""
|
6
6
|
|
7
|
-
__version__ = "0.
|
7
|
+
__version__ = "0.2.0"
|
8
8
|
__author__ = "Your Name"
|
9
9
|
__email__ = "your.email@example.com"
|
10
10
|
|
11
11
|
# Import main modules for easy access
|
12
12
|
from .hello import greet
|
13
13
|
from .math_utils import add, multiply
|
14
|
+
from .usgs_data import get_usgs_data, USGSDataRetriever
|
15
|
+
from .plotting import plot_usgs_data, plot_multiple_gages, quick_plot, WaterDataPlotter
|
14
16
|
|
15
|
-
|
17
|
+
# Convenience functions for common use cases
|
18
|
+
def get_discharge(gage_number, start_date, end_date, save_file=None):
|
19
|
+
"""
|
20
|
+
Quick function to get discharge data from any USGS gage.
|
21
|
+
|
22
|
+
Args:
|
23
|
+
gage_number (str): USGS gage number (e.g., "08158000")
|
24
|
+
start_date (str): Start date in YYYY-MM-DD format
|
25
|
+
end_date (str): End date in YYYY-MM-DD format
|
26
|
+
save_file (str, optional): Filename to save data
|
27
|
+
|
28
|
+
Returns:
|
29
|
+
pandas.DataFrame: Discharge data
|
30
|
+
|
31
|
+
Example:
|
32
|
+
>>> import hydroanomaly
|
33
|
+
>>> data = hydroanomaly.get_discharge("08158000", "2023-01-01", "2023-01-31")
|
34
|
+
>>> print(f"Got {len(data)} discharge measurements")
|
35
|
+
"""
|
36
|
+
return get_usgs_data(
|
37
|
+
site_number=gage_number,
|
38
|
+
parameter_code="00060", # Discharge
|
39
|
+
start_date=start_date,
|
40
|
+
end_date=end_date,
|
41
|
+
save_to_file=save_file,
|
42
|
+
parameter_name="Discharge_cfs"
|
43
|
+
)
|
44
|
+
|
45
|
+
def get_water_level(gage_number, start_date, end_date, save_file=None):
|
46
|
+
"""
|
47
|
+
Quick function to get water level data from any USGS gage.
|
48
|
+
|
49
|
+
Args:
|
50
|
+
gage_number (str): USGS gage number (e.g., "08158000")
|
51
|
+
start_date (str): Start date in YYYY-MM-DD format
|
52
|
+
end_date (str): End date in YYYY-MM-DD format
|
53
|
+
save_file (str, optional): Filename to save data
|
54
|
+
|
55
|
+
Returns:
|
56
|
+
pandas.DataFrame: Water level data
|
57
|
+
"""
|
58
|
+
return get_usgs_data(
|
59
|
+
site_number=gage_number,
|
60
|
+
parameter_code="00065", # Gage height
|
61
|
+
start_date=start_date,
|
62
|
+
end_date=end_date,
|
63
|
+
save_to_file=save_file,
|
64
|
+
parameter_name="WaterLevel_ft"
|
65
|
+
)
|
66
|
+
|
67
|
+
def get_temperature(gage_number, start_date, end_date, save_file=None):
|
68
|
+
"""
|
69
|
+
Quick function to get water temperature data from any USGS gage.
|
70
|
+
|
71
|
+
Args:
|
72
|
+
gage_number (str): USGS gage number (e.g., "08158000")
|
73
|
+
start_date (str): Start date in YYYY-MM-DD format
|
74
|
+
end_date (str): End date in YYYY-MM-DD format
|
75
|
+
save_file (str, optional): Filename to save data
|
76
|
+
|
77
|
+
Returns:
|
78
|
+
pandas.DataFrame: Temperature data
|
79
|
+
"""
|
80
|
+
return get_usgs_data(
|
81
|
+
site_number=gage_number,
|
82
|
+
parameter_code="00010", # Temperature
|
83
|
+
start_date=start_date,
|
84
|
+
end_date=end_date,
|
85
|
+
save_to_file=save_file,
|
86
|
+
parameter_name="Temperature_C"
|
87
|
+
)
|
88
|
+
|
89
|
+
__all__ = [
|
90
|
+
"greet", "add", "multiply",
|
91
|
+
"get_usgs_data", "USGSDataRetriever",
|
92
|
+
"get_discharge", "get_water_level", "get_temperature",
|
93
|
+
"plot_usgs_data", "plot_multiple_gages", "quick_plot", "WaterDataPlotter"
|
94
|
+
]
|
hydroanomaly/plotting.py
ADDED
@@ -0,0 +1,389 @@
|
|
1
|
+
"""
|
2
|
+
Plotting Module for HydroAnomaly
|
3
|
+
|
4
|
+
This module provides easy-to-use plotting functions for USGS water data time series.
|
5
|
+
Creates professional-looking plots with minimal code.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import matplotlib.pyplot as plt
|
9
|
+
import matplotlib.dates as mdates
|
10
|
+
import seaborn as sns
|
11
|
+
import pandas as pd
|
12
|
+
import numpy as np
|
13
|
+
from datetime import datetime
|
14
|
+
from typing import Optional, Tuple, List, Dict, Any
|
15
|
+
import warnings
|
16
|
+
|
17
|
+
# Set style
|
18
|
+
plt.style.use('default')
|
19
|
+
sns.set_palette("husl")
|
20
|
+
|
21
|
+
|
22
|
+
class WaterDataPlotter:
|
23
|
+
"""
|
24
|
+
A class for creating professional time series plots of water data.
|
25
|
+
|
26
|
+
This class provides methods to create various types of plots including
|
27
|
+
basic time series, multi-parameter plots, and statistical visualizations.
|
28
|
+
"""
|
29
|
+
|
30
|
+
def __init__(self, style: str = 'seaborn-v0_8', figsize: Tuple[int, int] = (12, 6)):
|
31
|
+
"""
|
32
|
+
Initialize the plotter with default settings.
|
33
|
+
|
34
|
+
Args:
|
35
|
+
style (str): Matplotlib style to use
|
36
|
+
figsize (tuple): Default figure size (width, height)
|
37
|
+
"""
|
38
|
+
self.default_figsize = figsize
|
39
|
+
self.colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd',
|
40
|
+
'#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
|
41
|
+
|
42
|
+
# Set plotting style
|
43
|
+
try:
|
44
|
+
plt.style.use(style)
|
45
|
+
except:
|
46
|
+
plt.style.use('default')
|
47
|
+
warnings.warn(f"Style '{style}' not available, using default")
|
48
|
+
|
49
|
+
def plot_timeseries(
|
50
|
+
self,
|
51
|
+
data: pd.DataFrame,
|
52
|
+
parameter_name: str = "Value",
|
53
|
+
title: Optional[str] = None,
|
54
|
+
ylabel: Optional[str] = None,
|
55
|
+
color: str = '#1f77b4',
|
56
|
+
save_path: Optional[str] = None,
|
57
|
+
show_stats: bool = True,
|
58
|
+
figsize: Optional[Tuple[int, int]] = None
|
59
|
+
) -> plt.Figure:
|
60
|
+
"""
|
61
|
+
Create a basic time series plot.
|
62
|
+
|
63
|
+
Args:
|
64
|
+
data (pd.DataFrame): Data with 'datetime' and 'value' columns
|
65
|
+
parameter_name (str): Name of the parameter being plotted
|
66
|
+
title (str, optional): Plot title
|
67
|
+
ylabel (str, optional): Y-axis label
|
68
|
+
color (str): Line color
|
69
|
+
save_path (str, optional): Path to save the plot
|
70
|
+
show_stats (bool): Whether to show statistics on the plot
|
71
|
+
figsize (tuple, optional): Figure size
|
72
|
+
|
73
|
+
Returns:
|
74
|
+
matplotlib.figure.Figure: The created figure
|
75
|
+
"""
|
76
|
+
if len(data) == 0:
|
77
|
+
raise ValueError("No data to plot")
|
78
|
+
|
79
|
+
figsize = figsize or self.default_figsize
|
80
|
+
fig, ax = plt.subplots(figsize=figsize)
|
81
|
+
|
82
|
+
# Plot the data
|
83
|
+
ax.plot(data['datetime'], data['value'], color=color, linewidth=1.5, alpha=0.8)
|
84
|
+
|
85
|
+
# Customize the plot
|
86
|
+
if title is None:
|
87
|
+
title = f"{parameter_name} Time Series"
|
88
|
+
ax.set_title(title, fontsize=14, fontweight='bold', pad=20)
|
89
|
+
|
90
|
+
if ylabel is None:
|
91
|
+
ylabel = parameter_name
|
92
|
+
ax.set_ylabel(ylabel, fontsize=12)
|
93
|
+
ax.set_xlabel('Date', fontsize=12)
|
94
|
+
|
95
|
+
# Format dates on x-axis
|
96
|
+
self._format_date_axis(ax, data['datetime'])
|
97
|
+
|
98
|
+
# Add grid
|
99
|
+
ax.grid(True, alpha=0.3, linestyle='--')
|
100
|
+
|
101
|
+
# Add statistics if requested
|
102
|
+
if show_stats:
|
103
|
+
self._add_statistics_text(ax, data['value'], parameter_name)
|
104
|
+
|
105
|
+
# Improve layout
|
106
|
+
plt.tight_layout()
|
107
|
+
|
108
|
+
# Save if requested
|
109
|
+
if save_path:
|
110
|
+
plt.savefig(save_path, dpi=300, bbox_inches='tight')
|
111
|
+
print(f"📊 Plot saved to: {save_path}")
|
112
|
+
|
113
|
+
return fig
|
114
|
+
|
115
|
+
def plot_multiple_parameters(
|
116
|
+
self,
|
117
|
+
data_dict: Dict[str, pd.DataFrame],
|
118
|
+
title: str = "Multiple Parameters Time Series",
|
119
|
+
save_path: Optional[str] = None,
|
120
|
+
figsize: Optional[Tuple[int, int]] = None
|
121
|
+
) -> plt.Figure:
|
122
|
+
"""
|
123
|
+
Plot multiple parameters on separate subplots.
|
124
|
+
|
125
|
+
Args:
|
126
|
+
data_dict (dict): Dictionary with parameter names as keys and DataFrames as values
|
127
|
+
title (str): Main plot title
|
128
|
+
save_path (str, optional): Path to save the plot
|
129
|
+
figsize (tuple, optional): Figure size
|
130
|
+
|
131
|
+
Returns:
|
132
|
+
matplotlib.figure.Figure: The created figure
|
133
|
+
"""
|
134
|
+
n_params = len(data_dict)
|
135
|
+
if n_params == 0:
|
136
|
+
raise ValueError("No data provided")
|
137
|
+
|
138
|
+
figsize = figsize or (12, 4 * n_params)
|
139
|
+
fig, axes = plt.subplots(n_params, 1, figsize=figsize, sharex=True)
|
140
|
+
|
141
|
+
if n_params == 1:
|
142
|
+
axes = [axes]
|
143
|
+
|
144
|
+
colors = self.colors[:n_params]
|
145
|
+
|
146
|
+
for i, (param_name, data) in enumerate(data_dict.items()):
|
147
|
+
if len(data) == 0:
|
148
|
+
continue
|
149
|
+
|
150
|
+
ax = axes[i]
|
151
|
+
ax.plot(data['datetime'], data['value'],
|
152
|
+
color=colors[i], linewidth=1.5, alpha=0.8, label=param_name)
|
153
|
+
|
154
|
+
ax.set_ylabel(param_name, fontsize=11)
|
155
|
+
ax.grid(True, alpha=0.3, linestyle='--')
|
156
|
+
ax.legend(loc='upper right')
|
157
|
+
|
158
|
+
# Add basic stats
|
159
|
+
mean_val = data['value'].mean()
|
160
|
+
ax.axhline(y=mean_val, color=colors[i], linestyle=':', alpha=0.6,
|
161
|
+
label=f'Mean: {mean_val:.2f}')
|
162
|
+
|
163
|
+
# Format the bottom subplot x-axis
|
164
|
+
if data_dict:
|
165
|
+
sample_data = next(iter(data_dict.values()))
|
166
|
+
self._format_date_axis(axes[-1], sample_data['datetime'])
|
167
|
+
|
168
|
+
axes[-1].set_xlabel('Date', fontsize=12)
|
169
|
+
fig.suptitle(title, fontsize=14, fontweight='bold')
|
170
|
+
|
171
|
+
plt.tight_layout()
|
172
|
+
|
173
|
+
if save_path:
|
174
|
+
plt.savefig(save_path, dpi=300, bbox_inches='tight')
|
175
|
+
print(f"📊 Plot saved to: {save_path}")
|
176
|
+
|
177
|
+
return fig
|
178
|
+
|
179
|
+
def plot_comparison(
|
180
|
+
self,
|
181
|
+
data_list: List[Tuple[pd.DataFrame, str]],
|
182
|
+
title: str = "Data Comparison",
|
183
|
+
ylabel: str = "Value",
|
184
|
+
save_path: Optional[str] = None,
|
185
|
+
figsize: Optional[Tuple[int, int]] = None
|
186
|
+
) -> plt.Figure:
|
187
|
+
"""
|
188
|
+
Plot multiple datasets on the same axes for comparison.
|
189
|
+
|
190
|
+
Args:
|
191
|
+
data_list (list): List of tuples (DataFrame, label)
|
192
|
+
title (str): Plot title
|
193
|
+
ylabel (str): Y-axis label
|
194
|
+
save_path (str, optional): Path to save the plot
|
195
|
+
figsize (tuple, optional): Figure size
|
196
|
+
|
197
|
+
Returns:
|
198
|
+
matplotlib.figure.Figure: The created figure
|
199
|
+
"""
|
200
|
+
if not data_list:
|
201
|
+
raise ValueError("No data provided")
|
202
|
+
|
203
|
+
figsize = figsize or self.default_figsize
|
204
|
+
fig, ax = plt.subplots(figsize=figsize)
|
205
|
+
|
206
|
+
colors = self.colors[:len(data_list)]
|
207
|
+
|
208
|
+
for i, (data, label) in enumerate(data_list):
|
209
|
+
if len(data) == 0:
|
210
|
+
continue
|
211
|
+
|
212
|
+
ax.plot(data['datetime'], data['value'],
|
213
|
+
color=colors[i], linewidth=1.5, alpha=0.8, label=label)
|
214
|
+
|
215
|
+
ax.set_title(title, fontsize=14, fontweight='bold', pad=20)
|
216
|
+
ax.set_ylabel(ylabel, fontsize=12)
|
217
|
+
ax.set_xlabel('Date', fontsize=12)
|
218
|
+
ax.grid(True, alpha=0.3, linestyle='--')
|
219
|
+
ax.legend()
|
220
|
+
|
221
|
+
# Format dates
|
222
|
+
if data_list and len(data_list[0][0]) > 0:
|
223
|
+
self._format_date_axis(ax, data_list[0][0]['datetime'])
|
224
|
+
|
225
|
+
plt.tight_layout()
|
226
|
+
|
227
|
+
if save_path:
|
228
|
+
plt.savefig(save_path, dpi=300, bbox_inches='tight')
|
229
|
+
print(f"📊 Plot saved to: {save_path}")
|
230
|
+
|
231
|
+
return fig
|
232
|
+
|
233
|
+
def plot_statistics(
|
234
|
+
self,
|
235
|
+
data: pd.DataFrame,
|
236
|
+
parameter_name: str = "Parameter",
|
237
|
+
save_path: Optional[str] = None
|
238
|
+
) -> plt.Figure:
|
239
|
+
"""
|
240
|
+
Create statistical plots (histogram and box plot).
|
241
|
+
|
242
|
+
Args:
|
243
|
+
data (pd.DataFrame): Data with 'datetime' and 'value' columns
|
244
|
+
parameter_name (str): Name of the parameter
|
245
|
+
save_path (str, optional): Path to save the plot
|
246
|
+
|
247
|
+
Returns:
|
248
|
+
matplotlib.figure.Figure: The created figure
|
249
|
+
"""
|
250
|
+
if len(data) == 0:
|
251
|
+
raise ValueError("No data to plot")
|
252
|
+
|
253
|
+
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
|
254
|
+
|
255
|
+
# Histogram
|
256
|
+
ax1.hist(data['value'], bins=30, alpha=0.7, color=self.colors[0], edgecolor='black')
|
257
|
+
ax1.set_title(f'{parameter_name} Distribution', fontweight='bold')
|
258
|
+
ax1.set_xlabel(parameter_name)
|
259
|
+
ax1.set_ylabel('Frequency')
|
260
|
+
ax1.grid(True, alpha=0.3)
|
261
|
+
|
262
|
+
# Add statistics to histogram
|
263
|
+
mean_val = data['value'].mean()
|
264
|
+
median_val = data['value'].median()
|
265
|
+
ax1.axvline(mean_val, color='red', linestyle='--', alpha=0.8, label=f'Mean: {mean_val:.2f}')
|
266
|
+
ax1.axvline(median_val, color='orange', linestyle='--', alpha=0.8, label=f'Median: {median_val:.2f}')
|
267
|
+
ax1.legend()
|
268
|
+
|
269
|
+
# Box plot
|
270
|
+
box_data = ax2.boxplot(data['value'], patch_artist=True)
|
271
|
+
box_data['boxes'][0].set_facecolor(self.colors[1])
|
272
|
+
box_data['boxes'][0].set_alpha(0.7)
|
273
|
+
|
274
|
+
ax2.set_title(f'{parameter_name} Box Plot', fontweight='bold')
|
275
|
+
ax2.set_ylabel(parameter_name)
|
276
|
+
ax2.grid(True, alpha=0.3)
|
277
|
+
|
278
|
+
plt.tight_layout()
|
279
|
+
|
280
|
+
if save_path:
|
281
|
+
plt.savefig(save_path, dpi=300, bbox_inches='tight')
|
282
|
+
print(f"📊 Plot saved to: {save_path}")
|
283
|
+
|
284
|
+
return fig
|
285
|
+
|
286
|
+
def _format_date_axis(self, ax, dates):
|
287
|
+
"""Format the date axis based on the date range."""
|
288
|
+
date_range = (dates.max() - dates.min()).days
|
289
|
+
|
290
|
+
if date_range <= 7: # Less than a week
|
291
|
+
ax.xaxis.set_major_formatter(mdates.DateFormatter('%m/%d %H:%M'))
|
292
|
+
ax.xaxis.set_major_locator(mdates.HourLocator(interval=6))
|
293
|
+
elif date_range <= 31: # Less than a month
|
294
|
+
ax.xaxis.set_major_formatter(mdates.DateFormatter('%m/%d'))
|
295
|
+
ax.xaxis.set_major_locator(mdates.DayLocator(interval=2))
|
296
|
+
elif date_range <= 365: # Less than a year
|
297
|
+
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
|
298
|
+
ax.xaxis.set_major_locator(mdates.MonthLocator())
|
299
|
+
else: # More than a year
|
300
|
+
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
|
301
|
+
ax.xaxis.set_major_locator(mdates.YearLocator())
|
302
|
+
|
303
|
+
plt.setp(ax.xaxis.get_majorticklabels(), rotation=45, ha='right')
|
304
|
+
|
305
|
+
def _add_statistics_text(self, ax, values, parameter_name):
|
306
|
+
"""Add statistics text box to the plot."""
|
307
|
+
stats_text = (
|
308
|
+
f"Statistics:\n"
|
309
|
+
f"Mean: {values.mean():.2f}\n"
|
310
|
+
f"Median: {values.median():.2f}\n"
|
311
|
+
f"Min: {values.min():.2f}\n"
|
312
|
+
f"Max: {values.max():.2f}\n"
|
313
|
+
f"Std: {values.std():.2f}"
|
314
|
+
)
|
315
|
+
|
316
|
+
# Position the text box
|
317
|
+
ax.text(0.02, 0.98, stats_text, transform=ax.transAxes, fontsize=9,
|
318
|
+
verticalalignment='top', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.8))
|
319
|
+
|
320
|
+
|
321
|
+
# Convenience functions for easy plotting
|
322
|
+
def plot_usgs_data(
|
323
|
+
data: pd.DataFrame,
|
324
|
+
parameter_name: str = "Value",
|
325
|
+
title: Optional[str] = None,
|
326
|
+
save_path: Optional[str] = None,
|
327
|
+
show_stats: bool = True
|
328
|
+
) -> plt.Figure:
|
329
|
+
"""
|
330
|
+
Quick function to plot USGS time series data.
|
331
|
+
|
332
|
+
Args:
|
333
|
+
data (pd.DataFrame): Data with 'datetime' and 'value' columns
|
334
|
+
parameter_name (str): Name of the parameter being plotted
|
335
|
+
title (str, optional): Plot title
|
336
|
+
save_path (str, optional): Path to save the plot
|
337
|
+
show_stats (bool): Whether to show statistics on the plot
|
338
|
+
|
339
|
+
Returns:
|
340
|
+
matplotlib.figure.Figure: The created figure
|
341
|
+
|
342
|
+
Example:
|
343
|
+
>>> import hydroanomaly
|
344
|
+
>>> data = hydroanomaly.get_discharge("08158000", "2023-01-01", "2023-01-31")
|
345
|
+
>>> hydroanomaly.plot_usgs_data(data, "Discharge (cfs)", "Colorado River Discharge")
|
346
|
+
"""
|
347
|
+
plotter = WaterDataPlotter()
|
348
|
+
return plotter.plot_timeseries(
|
349
|
+
data=data,
|
350
|
+
parameter_name=parameter_name,
|
351
|
+
title=title,
|
352
|
+
save_path=save_path,
|
353
|
+
show_stats=show_stats
|
354
|
+
)
|
355
|
+
|
356
|
+
|
357
|
+
def plot_multiple_gages(
|
358
|
+
data_dict: Dict[str, pd.DataFrame],
|
359
|
+
title: str = "Multiple Gage Comparison",
|
360
|
+
parameter_name: str = "Value",
|
361
|
+
save_path: Optional[str] = None
|
362
|
+
) -> plt.Figure:
|
363
|
+
"""
|
364
|
+
Plot data from multiple gages for comparison.
|
365
|
+
|
366
|
+
Args:
|
367
|
+
data_dict (dict): Dictionary with gage IDs as keys and DataFrames as values
|
368
|
+
title (str): Plot title
|
369
|
+
parameter_name (str): Y-axis label
|
370
|
+
save_path (str, optional): Path to save the plot
|
371
|
+
|
372
|
+
Returns:
|
373
|
+
matplotlib.figure.Figure: The created figure
|
374
|
+
"""
|
375
|
+
plotter = WaterDataPlotter()
|
376
|
+
data_list = [(data, f"Gage {gage_id}") for gage_id, data in data_dict.items()]
|
377
|
+
return plotter.plot_comparison(data_list, title, parameter_name, save_path)
|
378
|
+
|
379
|
+
|
380
|
+
def quick_plot(data: pd.DataFrame, title: str = "USGS Data") -> None:
|
381
|
+
"""
|
382
|
+
Create a quick plot and show it immediately.
|
383
|
+
|
384
|
+
Args:
|
385
|
+
data (pd.DataFrame): Data to plot
|
386
|
+
title (str): Plot title
|
387
|
+
"""
|
388
|
+
plot_usgs_data(data, title=title)
|
389
|
+
plt.show()
|
@@ -0,0 +1,311 @@
|
|
1
|
+
"""
|
2
|
+
USGS Data Retrieval Module
|
3
|
+
|
4
|
+
This module provides functionality to retrieve water data from the USGS Water Services API.
|
5
|
+
Supports various water quality parameters and time series data.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import pandas as pd
|
9
|
+
import numpy as np
|
10
|
+
import requests
|
11
|
+
from io import StringIO
|
12
|
+
from datetime import datetime
|
13
|
+
from typing import Optional, Dict, Any
|
14
|
+
import warnings
|
15
|
+
|
16
|
+
|
17
|
+
class USGSDataRetriever:
|
18
|
+
"""
|
19
|
+
A class to retrieve and process USGS water data.
|
20
|
+
|
21
|
+
This class handles the retrieval of time series data from USGS Water Services
|
22
|
+
and provides methods to clean, process, and validate the data.
|
23
|
+
"""
|
24
|
+
|
25
|
+
def __init__(self):
|
26
|
+
"""Initialize the USGS Data Retriever."""
|
27
|
+
self.base_url = "https://waterservices.usgs.gov/nwis/iv/"
|
28
|
+
self.last_request_url = None
|
29
|
+
self.last_response = None
|
30
|
+
|
31
|
+
def retrieve_data(
|
32
|
+
self,
|
33
|
+
site_number: str,
|
34
|
+
parameter_code: str,
|
35
|
+
start_date: str,
|
36
|
+
end_date: str,
|
37
|
+
create_synthetic: bool = True
|
38
|
+
) -> pd.DataFrame:
|
39
|
+
"""
|
40
|
+
Retrieve USGS water data for specified parameters.
|
41
|
+
|
42
|
+
Args:
|
43
|
+
site_number (str): USGS site number (e.g., "294643095035200")
|
44
|
+
parameter_code (str): USGS parameter code (e.g., "63680" for turbidity)
|
45
|
+
start_date (str): Start date in YYYY-MM-DD format
|
46
|
+
end_date (str): End date in YYYY-MM-DD format
|
47
|
+
create_synthetic (bool): Whether to create synthetic data if no data found
|
48
|
+
|
49
|
+
Returns:
|
50
|
+
pd.DataFrame: DataFrame with datetime and parameter columns
|
51
|
+
|
52
|
+
Raises:
|
53
|
+
ValueError: If invalid dates or parameters provided
|
54
|
+
requests.RequestException: If API request fails
|
55
|
+
"""
|
56
|
+
# Validate inputs
|
57
|
+
self._validate_inputs(site_number, parameter_code, start_date, end_date)
|
58
|
+
|
59
|
+
# Construct URL
|
60
|
+
url = self._build_url(site_number, parameter_code, start_date, end_date)
|
61
|
+
self.last_request_url = url
|
62
|
+
|
63
|
+
print(f"🔄 Requesting data from USGS...")
|
64
|
+
print(f"📍 Site: {site_number}")
|
65
|
+
print(f"📊 Parameter: {parameter_code}")
|
66
|
+
print(f"📅 Period: {start_date} to {end_date}")
|
67
|
+
|
68
|
+
try:
|
69
|
+
# Make API request
|
70
|
+
response = requests.get(url, timeout=30)
|
71
|
+
self.last_response = response
|
72
|
+
|
73
|
+
if response.status_code == 200:
|
74
|
+
data = self._process_response(response.text, parameter_code)
|
75
|
+
|
76
|
+
if len(data) == 0 and create_synthetic:
|
77
|
+
print("\n⚠️ No USGS data available. Creating synthetic data...")
|
78
|
+
data = self._create_synthetic_data(start_date, end_date, parameter_code)
|
79
|
+
|
80
|
+
if len(data) > 0:
|
81
|
+
print(f"✅ Successfully retrieved {len(data)} data points")
|
82
|
+
return data
|
83
|
+
else:
|
84
|
+
print("❌ No data available for the specified parameters")
|
85
|
+
return pd.DataFrame(columns=["datetime", "value"])
|
86
|
+
|
87
|
+
else:
|
88
|
+
raise requests.RequestException(f"HTTP {response.status_code}: {response.reason}")
|
89
|
+
|
90
|
+
except requests.RequestException as e:
|
91
|
+
print(f"❌ Error retrieving data: {e}")
|
92
|
+
if create_synthetic:
|
93
|
+
print("🔄 Creating synthetic data as fallback...")
|
94
|
+
return self._create_synthetic_data(start_date, end_date, parameter_code)
|
95
|
+
else:
|
96
|
+
raise
|
97
|
+
|
98
|
+
def _validate_inputs(self, site_number: str, parameter_code: str, start_date: str, end_date: str):
|
99
|
+
"""Validate input parameters."""
|
100
|
+
if not site_number or not isinstance(site_number, str):
|
101
|
+
raise ValueError("Site number must be a non-empty string")
|
102
|
+
|
103
|
+
if not parameter_code or not isinstance(parameter_code, str):
|
104
|
+
raise ValueError("Parameter code must be a non-empty string")
|
105
|
+
|
106
|
+
try:
|
107
|
+
start_dt = datetime.strptime(start_date, "%Y-%m-%d")
|
108
|
+
end_dt = datetime.strptime(end_date, "%Y-%m-%d")
|
109
|
+
if start_dt >= end_dt:
|
110
|
+
raise ValueError("Start date must be before end date")
|
111
|
+
except ValueError as e:
|
112
|
+
raise ValueError(f"Invalid date format. Use YYYY-MM-DD: {e}")
|
113
|
+
|
114
|
+
def _build_url(self, site_number: str, parameter_code: str, start_date: str, end_date: str) -> str:
|
115
|
+
"""Build the USGS API URL."""
|
116
|
+
return (
|
117
|
+
f"{self.base_url}?sites={site_number}"
|
118
|
+
f"¶meterCd={parameter_code}"
|
119
|
+
f"&startDT={start_date}&endDT={end_date}"
|
120
|
+
f"&format=rdb"
|
121
|
+
)
|
122
|
+
|
123
|
+
def _process_response(self, content: str, parameter_code: str) -> pd.DataFrame:
|
124
|
+
"""Process the USGS API response."""
|
125
|
+
if "No sites found matching" in content or "No data" in content:
|
126
|
+
print("⚠️ No data available for this site/parameter combination")
|
127
|
+
return pd.DataFrame(columns=["datetime", "value"])
|
128
|
+
|
129
|
+
try:
|
130
|
+
# Read the tab-separated data
|
131
|
+
data = pd.read_csv(StringIO(content), sep='\t', comment='#')
|
132
|
+
|
133
|
+
# Drop empty columns
|
134
|
+
data = data.dropna(axis=1, how='all')
|
135
|
+
|
136
|
+
# Clean column names
|
137
|
+
data.columns = data.columns.str.strip()
|
138
|
+
|
139
|
+
# Find datetime and parameter columns
|
140
|
+
datetime_cols = [col for col in data.columns if 'datetime' in col.lower()]
|
141
|
+
parameter_cols = [col for col in data.columns if parameter_code in col]
|
142
|
+
|
143
|
+
if not datetime_cols:
|
144
|
+
raise ValueError("No datetime column found in response")
|
145
|
+
if not parameter_cols:
|
146
|
+
raise ValueError(f"No column found for parameter {parameter_code}")
|
147
|
+
|
148
|
+
datetime_col = datetime_cols[0]
|
149
|
+
parameter_col = parameter_cols[0]
|
150
|
+
|
151
|
+
# Keep only relevant columns
|
152
|
+
data = data[[datetime_col, parameter_col]].copy()
|
153
|
+
data.columns = ['datetime', 'value']
|
154
|
+
|
155
|
+
# Convert and clean data
|
156
|
+
data['datetime'] = pd.to_datetime(data['datetime'], errors='coerce')
|
157
|
+
data['value'] = pd.to_numeric(data['value'], errors='coerce')
|
158
|
+
|
159
|
+
# Remove rows with missing data
|
160
|
+
initial_count = len(data)
|
161
|
+
data = data.dropna()
|
162
|
+
final_count = len(data)
|
163
|
+
|
164
|
+
if initial_count > final_count:
|
165
|
+
print(f"⚠️ Removed {initial_count - final_count} rows with missing data")
|
166
|
+
|
167
|
+
return data
|
168
|
+
|
169
|
+
except Exception as e:
|
170
|
+
print(f"❌ Error parsing USGS response: {e}")
|
171
|
+
return pd.DataFrame(columns=["datetime", "value"])
|
172
|
+
|
173
|
+
def _create_synthetic_data(self, start_date: str, end_date: str, parameter_code: str) -> pd.DataFrame:
|
174
|
+
"""Create synthetic data as fallback."""
|
175
|
+
date_range = pd.date_range(start=start_date, end=end_date, freq='D')
|
176
|
+
|
177
|
+
# Create realistic synthetic data based on parameter type
|
178
|
+
if parameter_code == "63680": # Turbidity
|
179
|
+
base_value = 12
|
180
|
+
noise_std = 3
|
181
|
+
anomaly_range = (5, 15)
|
182
|
+
elif parameter_code == "00060": # Discharge
|
183
|
+
base_value = 100
|
184
|
+
noise_std = 20
|
185
|
+
anomaly_range = (50, 200)
|
186
|
+
elif parameter_code == "00065": # Gage height
|
187
|
+
base_value = 5
|
188
|
+
noise_std = 1
|
189
|
+
anomaly_range = (2, 8)
|
190
|
+
else: # Generic water quality parameter
|
191
|
+
base_value = 10
|
192
|
+
noise_std = 2
|
193
|
+
anomaly_range = (3, 10)
|
194
|
+
|
195
|
+
# Generate base synthetic data
|
196
|
+
synthetic_values = np.random.normal(base_value, noise_std, len(date_range))
|
197
|
+
|
198
|
+
# Add some anomalies (10% of data)
|
199
|
+
anomaly_count = int(len(date_range) * 0.1)
|
200
|
+
anomaly_indices = np.random.choice(len(date_range), size=anomaly_count, replace=False)
|
201
|
+
anomaly_values = np.random.uniform(anomaly_range[0], anomaly_range[1], anomaly_count)
|
202
|
+
synthetic_values[anomaly_indices] += anomaly_values
|
203
|
+
|
204
|
+
# Ensure positive values
|
205
|
+
synthetic_values = np.maximum(synthetic_values, 0.1)
|
206
|
+
|
207
|
+
synthetic_data = pd.DataFrame({
|
208
|
+
'datetime': date_range,
|
209
|
+
'value': synthetic_values
|
210
|
+
})
|
211
|
+
|
212
|
+
print(f"📊 Created {len(synthetic_data)} synthetic data points")
|
213
|
+
print("🔍 Sample synthetic data:")
|
214
|
+
print(synthetic_data.head())
|
215
|
+
|
216
|
+
return synthetic_data
|
217
|
+
|
218
|
+
def save_data(self, data: pd.DataFrame, filename: str, parameter_name: str = "parameter") -> str:
|
219
|
+
"""
|
220
|
+
Save data to CSV file.
|
221
|
+
|
222
|
+
Args:
|
223
|
+
data (pd.DataFrame): Data to save
|
224
|
+
filename (str): Output filename
|
225
|
+
parameter_name (str): Name of the parameter for column naming
|
226
|
+
|
227
|
+
Returns:
|
228
|
+
str: Path to saved file
|
229
|
+
"""
|
230
|
+
if len(data) == 0:
|
231
|
+
print("⚠️ No data to save")
|
232
|
+
return ""
|
233
|
+
|
234
|
+
# Rename value column to parameter name
|
235
|
+
save_data = data.copy()
|
236
|
+
save_data.columns = ['datetime', parameter_name]
|
237
|
+
|
238
|
+
# Add date column for convenience
|
239
|
+
save_data['date'] = save_data['datetime'].dt.date
|
240
|
+
|
241
|
+
# Save to CSV
|
242
|
+
save_data.to_csv(filename, index=False)
|
243
|
+
print(f"💾 Saved {len(save_data)} records to '{filename}'")
|
244
|
+
|
245
|
+
return filename
|
246
|
+
|
247
|
+
def get_data_summary(self, data: pd.DataFrame) -> Dict[str, Any]:
|
248
|
+
"""
|
249
|
+
Get summary statistics of the data.
|
250
|
+
|
251
|
+
Args:
|
252
|
+
data (pd.DataFrame): Data to summarize
|
253
|
+
|
254
|
+
Returns:
|
255
|
+
dict: Summary statistics
|
256
|
+
"""
|
257
|
+
if len(data) == 0:
|
258
|
+
return {"error": "No data available"}
|
259
|
+
|
260
|
+
summary = {
|
261
|
+
"record_count": len(data),
|
262
|
+
"date_range": {
|
263
|
+
"start": data['datetime'].min(),
|
264
|
+
"end": data['datetime'].max()
|
265
|
+
},
|
266
|
+
"value_stats": {
|
267
|
+
"min": data['value'].min(),
|
268
|
+
"max": data['value'].max(),
|
269
|
+
"mean": data['value'].mean(),
|
270
|
+
"median": data['value'].median(),
|
271
|
+
"std": data['value'].std()
|
272
|
+
},
|
273
|
+
"missing_data": {
|
274
|
+
"count": data['value'].isna().sum(),
|
275
|
+
"percentage": (data['value'].isna().sum() / len(data)) * 100
|
276
|
+
}
|
277
|
+
}
|
278
|
+
|
279
|
+
return summary
|
280
|
+
|
281
|
+
|
282
|
+
# Convenience function for easy access
|
283
|
+
def get_usgs_data(
|
284
|
+
site_number: str,
|
285
|
+
parameter_code: str,
|
286
|
+
start_date: str,
|
287
|
+
end_date: str,
|
288
|
+
save_to_file: Optional[str] = None,
|
289
|
+
parameter_name: str = "value"
|
290
|
+
) -> pd.DataFrame:
|
291
|
+
"""
|
292
|
+
Convenience function to retrieve USGS data.
|
293
|
+
|
294
|
+
Args:
|
295
|
+
site_number (str): USGS site number
|
296
|
+
parameter_code (str): USGS parameter code
|
297
|
+
start_date (str): Start date (YYYY-MM-DD)
|
298
|
+
end_date (str): End date (YYYY-MM-DD)
|
299
|
+
save_to_file (str, optional): Filename to save data
|
300
|
+
parameter_name (str): Name for the parameter column
|
301
|
+
|
302
|
+
Returns:
|
303
|
+
pd.DataFrame: Retrieved data
|
304
|
+
"""
|
305
|
+
retriever = USGSDataRetriever()
|
306
|
+
data = retriever.retrieve_data(site_number, parameter_code, start_date, end_date)
|
307
|
+
|
308
|
+
if save_to_file and len(data) > 0:
|
309
|
+
retriever.save_data(data, save_to_file, parameter_name)
|
310
|
+
|
311
|
+
return data
|
@@ -0,0 +1,400 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: hydroanomaly
|
3
|
+
Version: 0.3.0
|
4
|
+
Summary: A Python package for hydro anomaly detection
|
5
|
+
Home-page: https://github.com/yourusername/hydroanomaly
|
6
|
+
Author: Your Name
|
7
|
+
Author-email: Your Name <your.email@example.com>
|
8
|
+
License-Expression: MIT
|
9
|
+
Project-URL: Homepage, https://github.com/yourusername/hydroanomaly
|
10
|
+
Project-URL: Bug Reports, https://github.com/yourusername/hydroanomaly/issues
|
11
|
+
Project-URL: Source, https://github.com/yourusername/hydroanomaly
|
12
|
+
Keywords: python,package,hydro,anomaly,detection
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
14
|
+
Classifier: Operating System :: OS Independent
|
15
|
+
Requires-Python: >=3.6
|
16
|
+
Description-Content-Type: text/markdown
|
17
|
+
License-File: LICENSE
|
18
|
+
Requires-Dist: pandas>=1.3.0
|
19
|
+
Requires-Dist: numpy>=1.20.0
|
20
|
+
Requires-Dist: requests>=2.25.1
|
21
|
+
Requires-Dist: matplotlib>=3.3.0
|
22
|
+
Requires-Dist: seaborn>=0.11.0
|
23
|
+
Provides-Extra: dev
|
24
|
+
Requires-Dist: pytest>=6.0; extra == "dev"
|
25
|
+
Requires-Dist: black>=21.0; extra == "dev"
|
26
|
+
Requires-Dist: flake8>=3.8; extra == "dev"
|
27
|
+
Requires-Dist: mypy>=0.800; extra == "dev"
|
28
|
+
Dynamic: author
|
29
|
+
Dynamic: home-page
|
30
|
+
Dynamic: license-file
|
31
|
+
Dynamic: requires-python
|
32
|
+
|
33
|
+
# HydroAnomaly
|
34
|
+
|
35
|
+
A Python package for hydro anomaly detection, **USGS water data retrieval**, and **time series visualization**.
|
36
|
+
|
37
|
+
[](https://badge.fury.io/py/hydroanomaly)
|
38
|
+
[](https://pepy.tech/project/hydroanomaly)
|
39
|
+
|
40
|
+
## Features
|
41
|
+
|
42
|
+
- 🌊 **USGS Data Retrieval**: Get real-time and historical water data from USGS Water Services
|
43
|
+
- 📊 **Time Series Plotting**: Beautiful, professional visualizations for your water data
|
44
|
+
- 📈 **Multi-Parameter Analysis**: Compare multiple parameters or gages in one plot
|
45
|
+
- 📋 **Statistical Analysis**: Built-in statistics and distribution plots
|
46
|
+
- 🎯 **Easy to Use**: Simple functions for quick data exploration
|
47
|
+
|
48
|
+
## Installation
|
49
|
+
|
50
|
+
```bash
|
51
|
+
pip install hydroanomaly
|
52
|
+
```
|
53
|
+
|
54
|
+
## � USGS Data Retrieval
|
55
|
+
|
56
|
+
Easily retrieve real-time and historical water data from USGS Water Services:
|
57
|
+
|
58
|
+
```python
|
59
|
+
import hydroanomaly
|
60
|
+
|
61
|
+
# ------------------------
|
62
|
+
# User-defined settings
|
63
|
+
# ------------------------
|
64
|
+
site_number = "294643095035200" # USGS site number
|
65
|
+
parameter_code = "63680" # Turbidity
|
66
|
+
start_date = "2020-01-01"
|
67
|
+
end_date = "2024-12-30"
|
68
|
+
|
69
|
+
# ------------------------
|
70
|
+
# Data Extraction from USGS
|
71
|
+
# ------------------------
|
72
|
+
data = hydroanomaly.get_usgs_data(
|
73
|
+
site_number=site_number,
|
74
|
+
parameter_code=parameter_code,
|
75
|
+
start_date=start_date,
|
76
|
+
end_date=end_date,
|
77
|
+
save_to_file="USGS_turbidity.csv",
|
78
|
+
parameter_name="Turbidity"
|
79
|
+
)
|
80
|
+
|
81
|
+
print(f"Retrieved {len(data)} data points!")
|
82
|
+
print(data.head())
|
83
|
+
```
|
84
|
+
|
85
|
+
## 📊 Time Series Plotting
|
86
|
+
|
87
|
+
Create beautiful visualizations of your water data:
|
88
|
+
|
89
|
+
```python
|
90
|
+
import hydroanomaly
|
91
|
+
|
92
|
+
# Get some data
|
93
|
+
data = hydroanomaly.get_discharge("08158000", "2023-01-01", "2023-01-31")
|
94
|
+
|
95
|
+
# Quick plot (simplest method)
|
96
|
+
hydroanomaly.quick_plot(data, "Colorado River Discharge")
|
97
|
+
|
98
|
+
# Professional plot with statistics
|
99
|
+
fig = hydroanomaly.plot_usgs_data(
|
100
|
+
data=data,
|
101
|
+
parameter_name="Discharge (cfs)",
|
102
|
+
title="Colorado River at Austin - January 2023",
|
103
|
+
save_path="discharge_plot.png"
|
104
|
+
)
|
105
|
+
|
106
|
+
# Compare multiple gages
|
107
|
+
austin_data = hydroanomaly.get_discharge("08158000", "2023-01-01", "2023-01-07")
|
108
|
+
nola_data = hydroanomaly.get_discharge("07374000", "2023-01-01", "2023-01-07")
|
109
|
+
|
110
|
+
gage_data = {
|
111
|
+
"Colorado River (Austin)": austin_data,
|
112
|
+
"Mississippi River (New Orleans)": nola_data
|
113
|
+
}
|
114
|
+
|
115
|
+
fig = hydroanomaly.plot_multiple_gages(
|
116
|
+
data_dict=gage_data,
|
117
|
+
parameter_name="Discharge (cfs)",
|
118
|
+
title="River Discharge Comparison"
|
119
|
+
)
|
120
|
+
```
|
121
|
+
|
122
|
+
## Quick Start
|
123
|
+
|
124
|
+
```python
|
125
|
+
import hydroanomaly
|
126
|
+
|
127
|
+
# Get USGS data
|
128
|
+
data = hydroanomaly.get_discharge("08158000", "2023-01-01", "2023-01-31")
|
129
|
+
print(f"Retrieved {len(data)} discharge measurements")
|
130
|
+
|
131
|
+
# Plot the data
|
132
|
+
hydroanomaly.quick_plot(data, "Colorado River Discharge")
|
133
|
+
|
134
|
+
# Basic greeting functionality
|
135
|
+
print(hydroanomaly.greet("Water Engineer"))
|
136
|
+
# Output: Hello, Water Engineer!
|
137
|
+
|
138
|
+
# Math utilities for data analysis
|
139
|
+
result = hydroanomaly.add(25.5, 14.3)
|
140
|
+
print(f"Sum: {result}")
|
141
|
+
# Output: Sum: 39.8
|
142
|
+
```
|
143
|
+
|
144
|
+
## Features
|
145
|
+
|
146
|
+
- **🌊 USGS Data Retrieval**: Download real-time water data from USGS Water Services
|
147
|
+
- Support for any USGS site and parameter
|
148
|
+
- Automatic data cleaning and validation
|
149
|
+
- Convenient functions for common parameters (discharge, water level, temperature)
|
150
|
+
- Fallback synthetic data generation
|
151
|
+
- CSV export functionality
|
152
|
+
|
153
|
+
- **📊 Time Series Plotting**: Beautiful, professional visualizations
|
154
|
+
- Single parameter plots with statistics
|
155
|
+
- Multi-parameter comparison plots
|
156
|
+
- Multiple gage comparison plots
|
157
|
+
- Statistical analysis plots (histogram, box plot, etc.)
|
158
|
+
- Automatic legend and formatting
|
159
|
+
- Save plots in multiple formats (PNG, PDF, SVG)
|
160
|
+
|
161
|
+
- **📈 Data Analysis Tools**: Built-in utilities for water data
|
162
|
+
- Mathematical operations for data processing
|
163
|
+
- Statistical summaries and analysis
|
164
|
+
- Data validation and quality checks
|
165
|
+
|
166
|
+
- **🎯 Easy to Use**: Simple, intuitive API
|
167
|
+
- Quick plotting for rapid data exploration
|
168
|
+
- One-line data retrieval functions
|
169
|
+
- Comprehensive error handling
|
170
|
+
- Well-documented with examples
|
171
|
+
|
172
|
+
- **🧪 Well Tested**: Comprehensive test suite with 100% pass rate
|
173
|
+
|
174
|
+
## USGS Data Parameters
|
175
|
+
|
176
|
+
Common USGS parameter codes you can use:
|
177
|
+
- **00060**: Discharge (cubic feet per second)
|
178
|
+
- **00065**: Gage height (feet)
|
179
|
+
- **00010**: Water temperature (°C)
|
180
|
+
- **63680**: Turbidity (NTU)
|
181
|
+
- **00300**: Dissolved oxygen (mg/L)
|
182
|
+
- **00095**: Specific conductance (µS/cm)
|
183
|
+
|
184
|
+
Find USGS site numbers at: https://waterdata.usgs.gov/nwis
|
185
|
+
|
186
|
+
## Detailed Usage
|
187
|
+
|
188
|
+
### USGS Data Retrieval
|
189
|
+
```python
|
190
|
+
from hydroanomaly.usgs_data import USGSDataRetriever
|
191
|
+
|
192
|
+
# Create retriever instance
|
193
|
+
retriever = USGSDataRetriever()
|
194
|
+
|
195
|
+
# Get data with full control
|
196
|
+
data = retriever.retrieve_data(
|
197
|
+
site_number="08158000", # Colorado River at Austin, TX
|
198
|
+
parameter_code="00060", # Discharge
|
199
|
+
start_date="2023-01-01",
|
200
|
+
end_date="2023-01-31"
|
201
|
+
)
|
202
|
+
|
203
|
+
# Get summary statistics
|
204
|
+
summary = retriever.get_data_summary(data)
|
205
|
+
print(f"Retrieved {summary['record_count']} records")
|
206
|
+
print(f"Average discharge: {summary['value_stats']['mean']:.2f} cfs")
|
207
|
+
|
208
|
+
# Save data
|
209
|
+
retriever.save_data(data, "discharge_data.csv", "Discharge_cfs")
|
210
|
+
```
|
211
|
+
|
212
|
+
### Greeting Functions
|
213
|
+
```python
|
214
|
+
from hydroanomaly.hello import greet, say_goodbye
|
215
|
+
|
216
|
+
# Greet users
|
217
|
+
welcome_msg = greet("Data Scientist")
|
218
|
+
print(welcome_msg) # Hello, Data Scientist!
|
219
|
+
|
220
|
+
# Say goodbye
|
221
|
+
farewell_msg = say_goodbye("User")
|
222
|
+
print(farewell_msg) # Goodbye, User!
|
223
|
+
```
|
224
|
+
|
225
|
+
### Mathematical Operations
|
226
|
+
```python
|
227
|
+
from hydroanomaly.math_utils import add, multiply, divide
|
228
|
+
|
229
|
+
# Basic operations
|
230
|
+
sum_result = add(10.5, 20.3)
|
231
|
+
product = multiply(5, 7)
|
232
|
+
|
233
|
+
# Safe division with error handling
|
234
|
+
try:
|
235
|
+
result = divide(100, 5)
|
236
|
+
print(f"Result: {result}") # Result: 20.0
|
237
|
+
except ValueError as e:
|
238
|
+
print(f"Error: {e}")
|
239
|
+
```
|
240
|
+
|
241
|
+
### Time Series Plotting
|
242
|
+
|
243
|
+
```python
|
244
|
+
# Quick plotting for data exploration
|
245
|
+
data = hydroanomaly.get_discharge("08158000", "2023-01-01", "2023-01-07")
|
246
|
+
hydroanomaly.quick_plot(data, "Quick Discharge Check")
|
247
|
+
|
248
|
+
# Professional plots with full customization
|
249
|
+
from hydroanomaly.plotting import WaterDataPlotter
|
250
|
+
|
251
|
+
plotter = WaterDataPlotter()
|
252
|
+
|
253
|
+
# Single parameter with statistics
|
254
|
+
fig = plotter.plot_timeseries(
|
255
|
+
data=data,
|
256
|
+
parameter_name="Discharge (cfs)",
|
257
|
+
title="Colorado River Discharge",
|
258
|
+
color="blue",
|
259
|
+
save_path="discharge_analysis.png"
|
260
|
+
)
|
261
|
+
|
262
|
+
# Multiple parameters from same gage
|
263
|
+
discharge = hydroanomaly.get_discharge("08158000", "2023-01-01", "2023-01-07")
|
264
|
+
level = hydroanomaly.get_water_level("08158000", "2023-01-01", "2023-01-07")
|
265
|
+
|
266
|
+
data_dict = {
|
267
|
+
"Discharge (cfs)": discharge,
|
268
|
+
"Water Level (ft)": level
|
269
|
+
}
|
270
|
+
|
271
|
+
fig = plotter.plot_multiple_parameters(
|
272
|
+
data_dict=data_dict,
|
273
|
+
title="Colorado River - Multiple Parameters"
|
274
|
+
)
|
275
|
+
|
276
|
+
# Statistical analysis plots
|
277
|
+
fig = plotter.plot_statistics(
|
278
|
+
data=data,
|
279
|
+
parameter_name="Discharge (cfs)",
|
280
|
+
title="Discharge Statistics"
|
281
|
+
)
|
282
|
+
```
|
283
|
+
|
284
|
+
## 📚 Documentation & Examples
|
285
|
+
|
286
|
+
- **📖 [Plotting Guide](PLOTTING_GUIDE.md)**: Comprehensive plotting documentation with examples
|
287
|
+
- **🎯 [Examples](plotting_examples.py)**: Run `python plotting_examples.py` to see all features
|
288
|
+
- **🧪 [Tests](test_plotting.py)**: Verify functionality with `python test_plotting.py`
|
289
|
+
|
290
|
+
## Use Cases
|
291
|
+
|
292
|
+
- **🌊 Real Water Data Analysis**: Retrieve and analyze actual USGS water monitoring data
|
293
|
+
- **📊 Hydro Research**: Access historical water quality and quantity data with visualization
|
294
|
+
- **🚰 Water Management**: Monitor discharge, water levels, and quality parameters with plots
|
295
|
+
- **🎓 Educational Projects**: Learn data analysis and visualization with real environmental data
|
296
|
+
- **🔬 Environmental Studies**: Research water patterns and anomalies with statistical plots
|
297
|
+
- **⚡ Quick Prototyping**: Rapidly access and visualize water data for proof-of-concepts
|
298
|
+
- **📈 Data Reporting**: Generate professional plots for reports and presentations
|
299
|
+
|
300
|
+
## API Reference
|
301
|
+
|
302
|
+
### hydroanomaly.greet(name="World")
|
303
|
+
Returns a greeting message.
|
304
|
+
|
305
|
+
**Parameters:**
|
306
|
+
- `name` (str, optional): Name to greet. Defaults to "World".
|
307
|
+
|
308
|
+
**Returns:**
|
309
|
+
- str: Greeting message
|
310
|
+
|
311
|
+
### hydroanomaly.get_discharge(gage_number, start_date, end_date)
|
312
|
+
Get discharge data from USGS.
|
313
|
+
|
314
|
+
**Parameters:**
|
315
|
+
- `gage_number` (str): USGS gage number
|
316
|
+
- `start_date` (str): Start date (YYYY-MM-DD)
|
317
|
+
- `end_date` (str): End date (YYYY-MM-DD)
|
318
|
+
|
319
|
+
**Returns:**
|
320
|
+
- pandas.DataFrame: Time series data with datetime and value columns
|
321
|
+
|
322
|
+
### hydroanomaly.get_water_level(gage_number, start_date, end_date)
|
323
|
+
Get water level data from USGS.
|
324
|
+
|
325
|
+
**Parameters:**
|
326
|
+
- `gage_number` (str): USGS gage number
|
327
|
+
- `start_date` (str): Start date (YYYY-MM-DD)
|
328
|
+
- `end_date` (str): End date (YYYY-MM-DD)
|
329
|
+
|
330
|
+
**Returns:**
|
331
|
+
- pandas.DataFrame: Time series data with datetime and value columns
|
332
|
+
|
333
|
+
### hydroanomaly.plot_usgs_data(data, parameter_name, title, save_path, color)
|
334
|
+
Create a professional plot of USGS time series data.
|
335
|
+
|
336
|
+
**Parameters:**
|
337
|
+
- `data` (DataFrame): USGS data with datetime and value columns
|
338
|
+
- `parameter_name` (str, optional): Name of parameter for y-axis label
|
339
|
+
- `title` (str, optional): Plot title
|
340
|
+
- `save_path` (str, optional): Path to save the plot
|
341
|
+
- `color` (str, optional): Line color
|
342
|
+
|
343
|
+
**Returns:**
|
344
|
+
- matplotlib.figure.Figure: The plot figure
|
345
|
+
|
346
|
+
### hydroanomaly.quick_plot(data, title)
|
347
|
+
Create a quick plot for data exploration.
|
348
|
+
|
349
|
+
**Parameters:**
|
350
|
+
- `data` (DataFrame): USGS data with datetime and value columns
|
351
|
+
- `title` (str, optional): Plot title
|
352
|
+
|
353
|
+
**Returns:**
|
354
|
+
- matplotlib.figure.Figure: The plot figure
|
355
|
+
|
356
|
+
### hydroanomaly.plot_multiple_gages(data_dict, parameter_name, title, save_path)
|
357
|
+
Compare the same parameter across multiple gages.
|
358
|
+
|
359
|
+
**Parameters:**
|
360
|
+
- `data_dict` (dict): Dictionary with gage names as keys and data as values
|
361
|
+
- `parameter_name` (str, optional): Name of parameter for y-axis label
|
362
|
+
- `title` (str, optional): Plot title
|
363
|
+
- `save_path` (str, optional): Path to save the plot
|
364
|
+
|
365
|
+
**Returns:**
|
366
|
+
- matplotlib.figure.Figure: The plot figure
|
367
|
+
|
368
|
+
### Mathematical Operations
|
369
|
+
|
370
|
+
### hydroanomaly.add(a, b)
|
371
|
+
Adds two numbers.
|
372
|
+
|
373
|
+
**Parameters:**
|
374
|
+
- `a` (int/float): First number
|
375
|
+
- `b` (int/float): Second number
|
376
|
+
|
377
|
+
**Returns:**
|
378
|
+
- int/float: Sum of a and b
|
379
|
+
|
380
|
+
### hydroanomaly.multiply(a, b)
|
381
|
+
Multiplies two numbers.
|
382
|
+
|
383
|
+
**Parameters:**
|
384
|
+
- `a` (int/float): First number
|
385
|
+
- `b` (int/float): Second number
|
386
|
+
|
387
|
+
**Returns:**
|
388
|
+
- int/float: Product of a and b
|
389
|
+
|
390
|
+
## Contributing
|
391
|
+
|
392
|
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
393
|
+
|
394
|
+
## License
|
395
|
+
|
396
|
+
This project is licensed under the MIT License - see the LICENSE file for details.
|
397
|
+
|
398
|
+
---
|
399
|
+
|
400
|
+
**HydroAnomaly** - Making water data analysis simple and beautiful! 🌊📊
|
@@ -0,0 +1,10 @@
|
|
1
|
+
hydroanomaly/__init__.py,sha256=orCkJ2g57zR9o5RUekkh0eOUvhalQVbRro5Oxm-iYZk,2997
|
2
|
+
hydroanomaly/hello.py,sha256=AhK7UKF_3TyZcWL4IDlZq_BXdKQzUP-is-jv59fgqk4,566
|
3
|
+
hydroanomaly/math_utils.py,sha256=CDOGWAiRlb2PK5SNFysumnzp7_LbZ9aleHLR_3lsGrs,856
|
4
|
+
hydroanomaly/plotting.py,sha256=YZW6-Sb_IrhbHKFeoh1d86Ef4Ev5Gpq55lEv8XX0v20,13504
|
5
|
+
hydroanomaly/usgs_data.py,sha256=zUvfu3go-7cQuFtD8Hbm7pABpw_RPWuJxE66NhxYmIU,11631
|
6
|
+
hydroanomaly-0.3.0.dist-info/licenses/LICENSE,sha256=OphKV48tcMv6ep-7j-8T6nycykPT0g8ZlMJ9zbGvdPs,1066
|
7
|
+
hydroanomaly-0.3.0.dist-info/METADATA,sha256=3NDnQzVH84RCiRpHf1SyQf5OHDIGMsMTEa-tlI822p4,11680
|
8
|
+
hydroanomaly-0.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
9
|
+
hydroanomaly-0.3.0.dist-info/top_level.txt,sha256=t-5Lc-eTLlkxIhR_N1Cpp6_YZafKS3xLLk9D2CtbE7o,13
|
10
|
+
hydroanomaly-0.3.0.dist-info/RECORD,,
|
@@ -1,60 +0,0 @@
|
|
1
|
-
Metadata-Version: 2.4
|
2
|
-
Name: hydroanomaly
|
3
|
-
Version: 0.1.0
|
4
|
-
Summary: A Python package for hydro anomaly detection
|
5
|
-
Home-page: https://github.com/yourusername/hydroanomaly
|
6
|
-
Author: Your Name
|
7
|
-
Author-email: Your Name <your.email@example.com>
|
8
|
-
License: MIT
|
9
|
-
Project-URL: Homepage, https://github.com/yourusername/hydroanomaly
|
10
|
-
Project-URL: Bug Reports, https://github.com/yourusername/hydroanomaly/issues
|
11
|
-
Project-URL: Source, https://github.com/yourusername/hydroanomaly
|
12
|
-
Keywords: python,package,hydro,anomaly,detection
|
13
|
-
Classifier: Programming Language :: Python :: 3
|
14
|
-
Classifier: License :: OSI Approved :: MIT License
|
15
|
-
Classifier: Operating System :: OS Independent
|
16
|
-
Requires-Python: >=3.6
|
17
|
-
Description-Content-Type: text/markdown
|
18
|
-
License-File: LICENSE
|
19
|
-
Provides-Extra: dev
|
20
|
-
Requires-Dist: pytest>=6.0; extra == "dev"
|
21
|
-
Requires-Dist: black>=21.0; extra == "dev"
|
22
|
-
Requires-Dist: flake8>=3.8; extra == "dev"
|
23
|
-
Requires-Dist: mypy>=0.800; extra == "dev"
|
24
|
-
Dynamic: author
|
25
|
-
Dynamic: home-page
|
26
|
-
Dynamic: license-file
|
27
|
-
Dynamic: requires-python
|
28
|
-
|
29
|
-
# HydroAnomaly
|
30
|
-
|
31
|
-
A Python package for hydro anomaly detection.
|
32
|
-
|
33
|
-
## Installation
|
34
|
-
|
35
|
-
```bash
|
36
|
-
pip install hydroanomaly
|
37
|
-
```
|
38
|
-
|
39
|
-
## Usage
|
40
|
-
|
41
|
-
```python
|
42
|
-
from hydroanomaly import hello
|
43
|
-
|
44
|
-
# Example usage
|
45
|
-
hello.greet("World")
|
46
|
-
```
|
47
|
-
|
48
|
-
## Features
|
49
|
-
|
50
|
-
- Feature 1
|
51
|
-
- Feature 2
|
52
|
-
- Feature 3
|
53
|
-
|
54
|
-
## Contributing
|
55
|
-
|
56
|
-
Contributions are welcome! Please feel free to submit a Pull Request.
|
57
|
-
|
58
|
-
## License
|
59
|
-
|
60
|
-
This project is licensed under the MIT License - see the LICENSE file for details.
|
@@ -1,8 +0,0 @@
|
|
1
|
-
hydroanomaly/__init__.py,sha256=kUDFH0OgO9N8JgiU66z4ivD_xVNxMqkU3X522ebiXas,295
|
2
|
-
hydroanomaly/hello.py,sha256=AhK7UKF_3TyZcWL4IDlZq_BXdKQzUP-is-jv59fgqk4,566
|
3
|
-
hydroanomaly/math_utils.py,sha256=CDOGWAiRlb2PK5SNFysumnzp7_LbZ9aleHLR_3lsGrs,856
|
4
|
-
hydroanomaly-0.1.0.dist-info/licenses/LICENSE,sha256=OphKV48tcMv6ep-7j-8T6nycykPT0g8ZlMJ9zbGvdPs,1066
|
5
|
-
hydroanomaly-0.1.0.dist-info/METADATA,sha256=xQoNIBUgw2D3cUJpyCvMbkUrfJSAdxqW_3AFjF-8ots,1458
|
6
|
-
hydroanomaly-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
7
|
-
hydroanomaly-0.1.0.dist-info/top_level.txt,sha256=t-5Lc-eTLlkxIhR_N1Cpp6_YZafKS3xLLk9D2CtbE7o,13
|
8
|
-
hydroanomaly-0.1.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|