mpcaHydro 2.2.8__tar.gz → 2.2.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. {mpcahydro-2.2.8 → mpcahydro-2.2.10}/.gitignore +9 -6
  2. {mpcahydro-2.2.8 → mpcahydro-2.2.10}/PKG-INFO +6 -1
  3. mpcahydro-2.2.10/README.md +223 -0
  4. mpcahydro-2.2.10/docs/Makefile +20 -0
  5. mpcahydro-2.2.10/docs/_static/.gitkeep +0 -0
  6. mpcahydro-2.2.10/docs/_templates/.gitkeep +0 -0
  7. mpcahydro-2.2.10/docs/api.rst +93 -0
  8. mpcahydro-2.2.10/docs/conf.py +62 -0
  9. mpcahydro-2.2.10/docs/index.rst +18 -0
  10. mpcahydro-2.2.10/docs/make.bat +35 -0
  11. {mpcahydro-2.2.8 → mpcahydro-2.2.10}/pyproject.toml +15 -1
  12. {mpcahydro-2.2.8 → mpcahydro-2.2.10}/src/mpcaHydro/data/outlet.duckdb +0 -0
  13. {mpcahydro-2.2.8 → mpcahydro-2.2.10}/src/mpcaHydro/data/stations_EQUIS.gpkg +0 -0
  14. mpcahydro-2.2.10/src/mpcaHydro/data/stations_EQUIS.gpkg-shm +0 -0
  15. mpcahydro-2.2.10/src/mpcaHydro/data/stations_EQUIS.gpkg-wal +0 -0
  16. {mpcahydro-2.2.8 → mpcahydro-2.2.10}/src/mpcaHydro/data/stations_wiski.gpkg +0 -0
  17. {mpcahydro-2.2.8 → mpcahydro-2.2.10}/src/mpcaHydro/equis.py +472 -44
  18. mpcahydro-2.2.10/src/mpcaHydro/outlets.py +772 -0
  19. mpcahydro-2.2.10/src/mpcaHydro/pywisk.py +323 -0
  20. mpcahydro-2.2.10/src/mpcaHydro/warehouse.py +802 -0
  21. mpcahydro-2.2.10/src/mpcaHydro/warehouse_functions.py +1071 -0
  22. mpcahydro-2.2.10/src/mpcaHydro/wiski.py +904 -0
  23. mpcahydro-2.2.10/src/mpcaHydro/xref.py +244 -0
  24. mpcahydro-2.2.10/tests/fixtures/wiski_H67014001_2020.parquet +0 -0
  25. mpcahydro-2.2.10/tests/integration/__init__.py +0 -0
  26. mpcahydro-2.2.10/tests/integration/test_equis.py +167 -0
  27. mpcahydro-2.2.10/tests/integration/test_wiski.py +148 -0
  28. mpcahydro-2.2.10/tests/pixi.toml +25 -0
  29. mpcahydro-2.2.10/tests/unit/__init__.py +0 -0
  30. mpcahydro-2.2.10/tests/unit/test_wiski.py +53 -0
  31. mpcahydro-2.2.8/README.md +0 -2
  32. mpcahydro-2.2.8/demo.py +0 -226
  33. mpcahydro-2.2.8/src/mpcaHydro/outlets.py +0 -330
  34. mpcahydro-2.2.8/src/mpcaHydro/pywisk.py +0 -388
  35. mpcahydro-2.2.8/src/mpcaHydro/warehouse.py +0 -368
  36. mpcahydro-2.2.8/src/mpcaHydro/warehouse_functions.py +0 -527
  37. mpcahydro-2.2.8/src/mpcaHydro/wiski.py +0 -401
  38. mpcahydro-2.2.8/src/mpcaHydro/xref.py +0 -74
  39. mpcahydro-2.2.8/tests/integration/README.md +0 -48
  40. mpcahydro-2.2.8/tests/integration/conftest.py +0 -17
  41. mpcahydro-2.2.8/tests/integration/test_dataManager.py +0 -61
  42. mpcahydro-2.2.8/tests/integration/test_data_manager.py +0 -20
  43. mpcahydro-2.2.8/tests/integration/test_data_manager_integration.py +0 -220
  44. mpcahydro-2.2.8/tests/integration/test_equis_integration.py +0 -165
  45. mpcahydro-2.2.8/tests/integration/test_warehouse.duckdb +0 -0
  46. mpcahydro-2.2.8/tests/integration/test_warehouse.py +0 -216
  47. mpcahydro-2.2.8/tests/integration/test_wiski.py +0 -31
  48. mpcahydro-2.2.8/tests/integration/test_wiski_integration.py +0 -155
  49. mpcahydro-2.2.8/tests/test_data_manager_functions.py +0 -194
  50. mpcahydro-2.2.8/tests/unit/test_equis.py +0 -19
  51. {mpcahydro-2.2.8 → mpcahydro-2.2.10}/.gitattributes +0 -0
  52. {mpcahydro-2.2.8 → mpcahydro-2.2.10}/src/mpcaHydro/__init__.py +0 -0
  53. {mpcahydro-2.2.8 → mpcahydro-2.2.10}/src/mpcaHydro/csg.py +0 -0
  54. {mpcahydro-2.2.8 → mpcahydro-2.2.10}/src/mpcaHydro/data/EQUIS_PARAMETER_XREF.csv +0 -0
  55. {mpcahydro-2.2.8 → mpcahydro-2.2.10}/src/mpcaHydro/data/WISKI_EQUIS_XREF.csv +0 -0
  56. {mpcahydro-2.2.8 → mpcahydro-2.2.10}/src/mpcaHydro/data/WISKI_QUALITY_CODES.csv +0 -0
  57. {mpcahydro-2.2.8 → mpcahydro-2.2.10}/src/mpcaHydro/data/wiskiweb01.pca.state.mn.us.crt +0 -0
  58. {mpcahydro-2.2.8 → mpcahydro-2.2.10}/src/mpcaHydro/reports.py +0 -0
  59. {mpcahydro-2.2.8 → mpcahydro-2.2.10}/src/mpcaHydro/sql/analytics_tables.sql +0 -0
  60. {mpcahydro-2.2.8 → mpcahydro-2.2.10}/src/mpcaHydro/sql/outlets_schema.sql +0 -0
  61. {mpcahydro-2.2.8 → mpcahydro-2.2.10}/src/mpcaHydro/sql/schemas.sql +0 -0
  62. {mpcahydro-2.2.8 → mpcahydro-2.2.10}/src/mpcaHydro/sql/staging_tables.sql +0 -0
  63. {mpcahydro-2.2.8 → mpcahydro-2.2.10}/src/mpcaHydro/sql/views_analytics.sql +0 -0
  64. {mpcahydro-2.2.8 → mpcahydro-2.2.10}/src/mpcaHydro/sql/views_outlets.sql +0 -0
  65. {mpcahydro-2.2.8 → mpcahydro-2.2.10}/src/mpcaHydro/sql/views_reports.sql +0 -0
  66. {mpcahydro-2.2.8 → mpcahydro-2.2.10}/src/mpcaHydro/sql_loader.py +0 -0
  67. {mpcahydro-2.2.8 → mpcahydro-2.2.10}/src/mpcaHydro/swd.py +0 -0
@@ -1,6 +1,9 @@
1
- # pixi environments
2
- .pixi
3
- *.pyc
4
- *.whl
5
- *.gz
6
- /.vscode
1
+ # pixi environments
2
+ .pixi
3
+ *.pyc
4
+ *.whl
5
+ *.gz
6
+ /.vscode
7
+
8
+ # Sphinx build output
9
+ docs/_build/
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mpcaHydro
3
- Version: 2.2.8
3
+ Version: 2.2.10
4
4
  Summary: Python package for downloading MPCA hydrology data
5
5
  Project-URL: Homepage, https://github.com/mfratkin1/mpcaHydro
6
6
  Author-email: Mulu Fratkin <michael.fratkin@state.mn.us>
@@ -16,3 +16,8 @@ Requires-Dist: oracledb
16
16
  Requires-Dist: pandas
17
17
  Requires-Dist: pathlib
18
18
  Requires-Dist: requests
19
+ Provides-Extra: docs
20
+ Requires-Dist: sphinx; extra == 'docs'
21
+ Requires-Dist: sphinx-autodoc-typehints; extra == 'docs'
22
+ Provides-Extra: tests
23
+ Requires-Dist: pytest; extra == 'tests'
@@ -0,0 +1,223 @@
1
+ # mpcaHydro
2
+
3
+ Modules for downloading hydrology data from MPCA servers and databases.
4
+
5
+ ---
6
+
7
+ ## Overview
8
+
9
+ **mpcaHydro** is a Python package that downloads, stores, and retrieves water-quality and streamflow data from multiple Minnesota Pollution Control Agency (MPCA) data sources. It brings together information from several independent systems into a single local database so you can query and analyze data from one place instead of connecting to each system separately.
10
+
11
+ ### Data Sources
12
+
13
+ The package pulls from four data sources:
14
+
15
+ | Source | What It Contains | How It Connects |
16
+ |--------|-----------------|-----------------|
17
+ | **WISKI** | Continuous streamflow (discharge), water temperature, dissolved oxygen, and other sensor-based time-series data collected by MPCA monitoring stations | Public web API (KiWIS) — no credentials required |
18
+ | **EQuIS** | Lab-analyzed water-quality sample results (nutrients, solids, chlorophyll, etc.) from MPCA's environmental database | Oracle database — requires MPCA username and password |
19
+ | **SWD (Surface Water Data)** | Similar water-quality sample results available through MPCA's public Surface Water Data portal | Public web API — no credentials required |
20
+ | **CSG (Cooperative Stream Gauging)** | Discharge, water temperature, and dissolved oxygen data from the Minnesota DNR's stream gauging network | Public web API — no credentials required |
21
+
22
+ ### Supported Constituents
23
+
24
+ The package works with these water-quality and hydrological measurements:
25
+
26
+ | Abbreviation | Full Name |
27
+ |-------------|-----------|
28
+ | **Q** | Discharge (streamflow) |
29
+ | **QB** | Baseflow (the portion of streamflow from groundwater) |
30
+ | **TSS** | Total Suspended Solids |
31
+ | **TP** | Total Phosphorus |
32
+ | **OP** | Orthophosphate |
33
+ | **TKN** | Total Kjeldahl Nitrogen |
34
+ | **N** | Nitrate + Nitrite Nitrogen |
35
+ | **WT** | Water Temperature |
36
+ | **DO** | Dissolved Oxygen |
37
+ | **WL** | Water Level |
38
+ | **CHLA** | Chlorophyll-a |
39
+
40
+ ---
41
+
42
+ ## Warehouse Structure
43
+
44
+ The warehouse is a local DuckDB database file (named `observations.duckdb`) that organizes all downloaded data into a structured layout with five separate areas called **schemas**. Each schema serves a different purpose in the data pipeline:
45
+
46
+ ```
47
+ observations.duckdb
48
+ ├── staging — Raw data as it was first downloaded (temporary holding area)
49
+ ├── analytics — Cleaned and standardized data ready for analysis
50
+ ├── reports — Pre-built summary statistics and aggregated views
51
+ ├── outlets — Definitions linking monitoring stations to model reaches
52
+ └── mappings — Reference lookup tables (parameter codes, station cross-references, quality codes)
53
+ ```
54
+
55
+ ### How Data Flows Through the Warehouse
56
+
57
+ The warehouse download-and-process pipeline currently supports **WISKI** and **EQuIS** data. SWD and CSG modules can download and transform data independently but are not yet integrated into the warehouse pipeline.
58
+
59
+ 1. **Download** — Raw data is fetched from a source (WISKI or EQuIS).
60
+ 2. **Staging** — The raw data is loaded directly into the `staging` schema, preserving all original columns exactly as they came from the source system.
61
+ 3. **Transform** — The raw data goes through cleaning steps: unit conversions (e.g., micrograms to milligrams, Celsius to Fahrenheit), timezone normalization to Central Standard Time (UTC-6), mapping of technical parameter codes to readable constituent names, removal of invalid quality codes, and averaging of duplicate measurements within the same hour.
62
+ 4. **Analytics** — The transformed data is loaded into the `analytics` schema in a standardized format with consistent columns: `datetime`, `value`, `station_id`, `station_origin`, `constituent`, and `unit`.
63
+ 5. **Reports** — Summary views are automatically built on top of the analytics data.
64
+
65
+ ### Warehouse Schemas in Detail
66
+
67
+ #### Staging Schema
68
+
69
+ The staging schema preserves the raw data exactly as downloaded from each source system. This gives you access to every original field for troubleshooting or custom analysis. It contains:
70
+
71
+ - **`staging.equis`** — Raw EQuIS data with all original Oracle columns (latitude, longitude, sample methods, lab qualifiers, detection limits, etc.).
72
+ - **`staging.wiski`** — Raw WISKI data with all original KiWIS fields (timestamps, quality codes, quality code names, parameter types, station metadata, etc.).
73
+
74
+ #### Analytics Schema
75
+
76
+ The analytics schema holds the final, cleaned data that you query for analysis. All data here follows a standard format. It contains:
77
+
78
+ - **`analytics.equis`** — Processed EQuIS data with standardized columns: `datetime`, `value`, `station_id`, `station_origin`, `constituent`, and `unit`.
79
+ - **`analytics.wiski`** — Processed WISKI data with the same standardized columns.
80
+ - **`analytics.observations`** — A combined view that merges both EQuIS and WISKI data into a single table. When you query this view, you get all observations from both sources together, with each row tagged by its `station_origin` so you can tell where it came from.
81
+ - **`analytics.outlet_observations`** — A view that links observations to model outlets by joining observation data with the outlet station mappings. Results are grouped by outlet, constituent, and datetime, with values averaged when multiple stations feed the same outlet.
82
+ - **`analytics.outlet_observations_with_flow`** — Extends outlet observations by joining in corresponding flow (discharge) and baseflow data for each outlet and timestamp. This makes it easy to analyze water-quality data alongside the flow conditions at the time of measurement.
83
+
84
+ #### Reports Schema
85
+
86
+ The reports schema provides pre-computed summaries so you do not have to write your own aggregation queries:
87
+
88
+ - **`reports.constituent_summary`** — Groups all observations by station and constituent. For each station–constituent combination, it shows: sample count, average value, minimum and maximum values, and the first and last year of available data.
89
+ - **`reports.outlet_constituent_summary`** — Same type of summary but organized by outlet instead of by individual station. Shows sample count, average, min, max, and date range for each outlet–constituent combination.
90
+ - **`reports.wiski_qc_count`** — Summarizes the quality codes present in the raw WISKI data for each station and parameter, along with descriptions of what each quality code means. Useful for understanding data quality before analysis.
91
+
92
+ #### Outlets Schema
93
+
94
+ The outlets schema manages the relationship between monitoring stations and hydrological model reaches. In HSPF modeling, an "outlet" is a point where you compare model predictions against observed data. A single outlet can be associated with multiple monitoring stations (e.g., a WISKI gauge and an EQuIS sampling site at the same location) and multiple model reaches. It contains:
95
+
96
+ - **`outlets.outlet_groups`** — Defines each outlet with an ID, name, and the model repository it belongs to.
97
+ - **`outlets.outlet_stations`** — Links stations to outlets (which stations feed data to which outlet).
98
+ - **`outlets.outlet_reaches`** — Links model reaches to outlets (which model segments correspond to which outlet).
99
+ - **`outlets.station_reach_pairs`** — A view that derives the many-to-many relationship between stations and reaches through their shared outlets.
100
+
101
+ #### Mappings Schema
102
+
103
+ The mappings schema stores reference lookup tables used during data processing:
104
+
105
+ - **`mappings.wiski_parametertype`** — Maps WISKI internal parameter type IDs (e.g., `11500`) to readable constituent names (e.g., `Q` for discharge).
106
+ - **`mappings.equis_casrn`** — Maps EQuIS chemical CAS registry numbers (e.g., `7723-14-0`) to constituent names (e.g., `TP` for Total Phosphorus).
107
+ - **`mappings.station_xref`** — Cross-reference table linking WISKI station numbers to EQuIS station IDs, along with Watershed IDs (WID).
108
+ - **`mappings.wiski_quality_codes`** — Reference table with descriptions of WISKI quality codes and whether each code is currently active.
109
+
110
+ ---
111
+
112
+ ## DataManagerWrapper
113
+
114
+ The `DataManagerWrapper` class is the main interface for users of this package. It manages the database connection and provides methods for downloading data, processing it, and querying results. You create one instance by pointing it at the path where your database lives.
115
+
116
+ ### Creating a DataManagerWrapper
117
+
118
+ ```python
119
+ from mpcaHydro.warehouse_functions import DataManagerWrapper
120
+
121
+ # Create a new wrapper (initializes the database if it doesn't exist)
122
+ dm = DataManagerWrapper('/path/to/my/observations.duckdb')
123
+
124
+ # Or reset the database and start fresh
125
+ dm = DataManagerWrapper('/path/to/my/observations.duckdb', reset=True)
126
+ ```
127
+
128
+ ### Methods Reference
129
+
130
+ #### Downloading Data
131
+
132
+ | Method | What It Does |
133
+ |--------|-------------|
134
+ | **`download_wiski_data(station_ids, ...)`** | Downloads time-series data for a list of WISKI station IDs from the KiWIS web service. The raw data is stored in `staging.wiski`, then automatically transformed (unit conversions, quality filtering, baseflow calculation) and loaded into `analytics.wiski`. You can specify a year range, choose whether to filter by quality codes, and select the baseflow separation method. If `replace=True`, existing data for those stations is removed first. |
135
+ | **`download_equis_data(station_ids, oracle_username, oracle_password, ...)`** | Downloads lab sample data for a list of EQuIS station IDs from the MPCA Oracle database. Requires Oracle credentials. The raw data is stored in `staging.equis`, then transformed (non-detect handling, timezone normalization, unit conversion, constituent mapping, hourly averaging) and loaded into `analytics.equis`. If `replace=True`, existing data for those stations is removed first. |
136
+
137
+ #### Processing Data
138
+
139
+ | Method | What It Does |
140
+ |--------|-------------|
141
+ | **`process_wiski_data(...)`** | Re-processes the raw WISKI data already in the staging schema. Reads from `staging.wiski`, applies the full transformation pipeline (quality filtering, unit conversion, column normalization, hourly averaging, baseflow calculation), and writes the result to `analytics.wiski`. Useful if you want to reprocess data with different quality code filters or a different baseflow method without re-downloading. |
142
+ | **`process_equis_data()`** | Re-processes the raw EQuIS data already in the staging schema. Reads from `staging.equis`, applies the full transformation pipeline (constituent mapping, timezone normalization, unit conversion, non-detect handling, hourly averaging), and writes the result to `analytics.equis`. |
143
+ | **`process_all_data(...)`** | Convenience method that runs both `process_wiski_data` and `process_equis_data` in sequence. |
144
+ | **`update_views()`** | Refreshes all database views (analytics, reports, and outlets). Call this after making manual changes to ensure views reflect the current data. |
145
+
146
+ #### Retrieving Observation Data
147
+
148
+ | Method | What It Does |
149
+ |--------|-------------|
150
+ | **`get_observation_data(station_ids, constituent, agg_period=None)`** | Retrieves time-series data from the combined `analytics.observations` view for the specified stations and constituent (e.g., `'TP'`, `'Q'`). Optionally aggregates to a time period — `'D'` for daily, `'H'` for hourly, `'W'` for weekly, `'ME'` for monthly, etc. The aggregation method is chosen automatically: averages for flow and concentration data, sums for load data. Returns a pandas DataFrame with a datetime index. |
151
+ | **`get_outlet_data(outlet_id, constituent, agg_period='D')`** | Retrieves observation data for a specific outlet, including matching flow and baseflow values at each timestamp. The result includes columns for the constituent value, observed flow, and observed baseflow. This is the primary method for getting data formatted for model calibration comparison. |
152
+ | **`get_station_data(station_id, station_origin)`** | Retrieves all processed observation data for a single station from `analytics.observations`. Returns every constituent and time step available for that station. |
153
+ | **`get_raw_data(station_id, station_origin)`** | Retrieves the original, un-transformed data from the staging schema for a specific station. Useful for inspecting the raw data before processing or debugging transformation issues. |
154
+ | **`get_station_ids(station_origin=None)`** | Returns a list of all station IDs that have data in the analytics schema. Optionally filter by origin (`'wiski'` or `'equis'`) to see only stations from one source. |
155
+ | **`get_outlets(model_name)`** | Returns a table of all outlet definitions (station-to-reach mappings) for a specific model repository. |
156
+
157
+ #### Summary Reports
158
+
159
+ | Method | What It Does |
160
+ |--------|-------------|
161
+ | **`get_constituent_summary()`** | Returns a table showing, for each station and constituent: how many observations exist, and the first and last year of data. Queries the observations directly from the analytics schema. |
162
+ | **`station_summary(constituent=None)`** | Returns the pre-computed constituent summary from `reports.constituent_summary`, which includes sample count, average value, min/max values, and date range for each station–constituent combination. Optionally filter to a single constituent. |
163
+ | **`outlet_summary()`** | Returns the pre-computed outlet summary from `reports.outlet_constituent_summary`, showing sample count, average, min, max, and date range for each outlet–constituent combination. |
164
+ | **`wiski_qc_counts()`** | Returns a breakdown of WISKI quality codes by station and parameter from `reports.wiski_qc_count`. Shows how many observations fall under each quality code, with human-readable descriptions. Useful for assessing data quality before analysis. |
165
+ | **`station_reach_pairs()`** | Returns the full mapping of stations to model reaches from `reports.station_reach_pairs`, showing which stations are connected to which model reach segments. |
166
+
167
+ #### Data Export
168
+
169
+ | Method | What It Does |
170
+ |--------|-------------|
171
+ | **`export_station_to_csv(station_id, station_origin, output_path)`** | Exports all processed analytics data for a station to a CSV file. |
172
+ | **`export_raw_to_csv(station_id, station_origin, output_path)`** | Exports raw staging data for a station to a CSV file. |
173
+
174
+ #### Schema Templates
175
+
176
+ | Method | What It Does |
177
+ |--------|-------------|
178
+ | **`get_equis_template()`** | Returns an empty DataFrame with the exact column names and types of the `staging.equis` table. Useful if you need to manually prepare data for loading into the warehouse. |
179
+ | **`get_wiski_template()`** | Returns an empty DataFrame with the exact column names and types of the `staging.wiski` table. |
180
+
181
+ ---
182
+
183
+ ## Station Cross-Reference (xref)
184
+
185
+ The MPCA uses different station ID systems in WISKI and EQuIS. The `xref` module provides standalone functions to translate between them using a built-in cross-reference table (`WISKI_EQUIS_XREF.csv`). Each row in the cross-reference table links a WISKI station number to its corresponding EQuIS station ID(s), along with a Watershed ID (WID).
186
+
187
+ | Function | What It Does |
188
+ |----------|-------------|
189
+ | **`get_wiski_stations()`** | Returns a list of all WISKI station numbers in the cross-reference table. |
190
+ | **`get_equis_stations()`** | Returns a list of all EQuIS station IDs in the cross-reference table. |
191
+ | **`wiski_equis_alias(wiski_station_id)`** | Given a WISKI station number, returns the single primary EQuIS station ID it maps to. Raises an error if there are multiple matches. |
192
+ | **`wiski_equis_associations(wiski_station_id)`** | Given a WISKI station number, returns all EQuIS station IDs associated with it (may be more than one). |
193
+ | **`equis_wiski_alias(equis_station_id)`** | Given an EQuIS station ID, returns the single WISKI station number it maps to. Raises an error if multiple matches exist. |
194
+ | **`equis_wiski_associations(equis_station_id)`** | Given an EQuIS station ID, returns all WISKI station numbers associated with it. |
195
+
196
+ ---
197
+
198
+ ## How Data Is Processed
199
+
200
+ Each data source goes through its own transformation pipeline before being stored in the analytics schema. Here is a summary of what happens during processing:
201
+
202
+ ### WISKI Data Processing
203
+ 1. **Convert units** — Standardizes temperature from Celsius to Fahrenheit, mass from kilograms to pounds, and renames cubic-feet-per-second to "cfs".
204
+ 2. **Map parameters** — Converts internal WISKI parameter type IDs (e.g., `11500`) and station parameter numbers (e.g., `262`) to readable constituent names (e.g., `Q` for discharge).
205
+ 3. **Normalize columns** — Renames raw column names (e.g., `Value`, `Timestamp`, `Quality Code`) to standard names (`value`, `datetime`, `quality_code`).
206
+ 4. **Filter quality codes** — Removes observations with invalid or unreliable quality flags, keeping only codes that represent valid measurements.
207
+ 5. **Average by hour** — Groups measurements within the same hour and takes the average to produce a consistent hourly time-series.
208
+ 6. **Calculate baseflow** — For stations with discharge data, estimates the baseflow component using the Boughton method (separating groundwater contribution from surface runoff).
209
+
210
+ ### EQuIS Data Processing
211
+ 1. **Map constituents** — Converts chemical CAS registry numbers (e.g., `7723-14-0`) to readable names (e.g., `TP` for Total Phosphorus).
212
+ 2. **Normalize timezones** — Converts all timestamps to a consistent Central Standard Time offset (UTC-6), accounting for daylight saving time.
213
+ 3. **Normalize columns** — Selects and renames relevant columns to the standard format.
214
+ 4. **Convert units** — Standardizes micrograms/L to milligrams/L, milligrams/gram to milligrams/L, and Celsius to Fahrenheit.
215
+ 5. **Handle non-detects** — Replaces lab results below the detection limit with zero.
216
+ 6. **Average by hour** — Groups samples taken within the same hour at the same station and averages them.
217
+
218
+ ### SWD Data Processing
219
+ 1. **Filter parameters** — Keeps only observations for supported constituents.
220
+ 2. **Parse dates** — Combines separate date and time columns into a single datetime.
221
+ 3. **Convert units** — Same unit conversions as EQuIS (micrograms to milligrams, Celsius to Fahrenheit, kilograms to pounds).
222
+ 4. **Map constituents** — Converts parameter names to standard abbreviations.
223
+ 5. **Average by hour** — Groups and averages within the same hour.
@@ -0,0 +1,20 @@
1
+ # Minimal makefile for Sphinx documentation
2
+ #
3
+
4
+ # You can set these variables from the command line, and also
5
+ # from the environment for the first two.
6
+ SPHINXOPTS ?=
7
+ SPHINXBUILD ?= sphinx-build
8
+ SOURCEDIR = .
9
+ BUILDDIR = _build
10
+
11
+ # Put it first so that "make" without argument is like "make help".
12
+ help:
13
+ @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14
+
15
+ .PHONY: help Makefile
16
+
17
+ # Catch-all target: route all unknown targets to Sphinx using the new
18
+ # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19
+ %: Makefile
20
+ @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
File without changes
File without changes
@@ -0,0 +1,93 @@
1
+ API Reference
2
+ =============
3
+
4
+ .. contents:: Modules
5
+ :local:
6
+
7
+ warehouse_functions
8
+ -------------------
9
+
10
+ .. automodule:: mpcaHydro.warehouse_functions
11
+ :members:
12
+ :undoc-members:
13
+ :show-inheritance:
14
+
15
+ warehouse
16
+ ---------
17
+
18
+ .. automodule:: mpcaHydro.warehouse
19
+ :members:
20
+ :undoc-members:
21
+ :show-inheritance:
22
+
23
+ wiski
24
+ -----
25
+
26
+ .. automodule:: mpcaHydro.wiski
27
+ :members:
28
+ :undoc-members:
29
+ :show-inheritance:
30
+
31
+ equis
32
+ -----
33
+
34
+ .. automodule:: mpcaHydro.equis
35
+ :members:
36
+ :undoc-members:
37
+ :show-inheritance:
38
+
39
+ outlets
40
+ -------
41
+
42
+ .. automodule:: mpcaHydro.outlets
43
+ :members:
44
+ :undoc-members:
45
+ :show-inheritance:
46
+
47
+ reports
48
+ -------
49
+
50
+ .. automodule:: mpcaHydro.reports
51
+ :members:
52
+ :undoc-members:
53
+ :show-inheritance:
54
+
55
+ csg
56
+ ---
57
+
58
+ .. automodule:: mpcaHydro.csg
59
+ :members:
60
+ :undoc-members:
61
+ :show-inheritance:
62
+
63
+ pywisk
64
+ ------
65
+
66
+ .. automodule:: mpcaHydro.pywisk
67
+ :members:
68
+ :undoc-members:
69
+ :show-inheritance:
70
+
71
+ swd
72
+ ---
73
+
74
+ .. automodule:: mpcaHydro.swd
75
+ :members:
76
+ :undoc-members:
77
+ :show-inheritance:
78
+
79
+ sql_loader
80
+ ----------
81
+
82
+ .. automodule:: mpcaHydro.sql_loader
83
+ :members:
84
+ :undoc-members:
85
+ :show-inheritance:
86
+
87
+ xref
88
+ ----
89
+
90
+ .. automodule:: mpcaHydro.xref
91
+ :members:
92
+ :undoc-members:
93
+ :show-inheritance:
@@ -0,0 +1,62 @@
1
+ # Configuration file for the Sphinx documentation builder.
2
+ #
3
+ # For the full list of built-in configuration values, see the documentation:
4
+ # https://www.sphinx-doc.org/en/master/usage/configuration.html
5
+
6
+ import os
7
+ import sys
8
+
9
+ # -- Path setup --------------------------------------------------------------
10
+ # Add the project source directory so autodoc can find the package.
11
+ sys.path.insert(0, os.path.abspath(os.path.join("..", "src")))
12
+
13
+ # -- Project information -----------------------------------------------------
14
+ project = "mpcaHydro"
15
+ copyright = "2026, Mulu Fratkin"
16
+ author = "Mulu Fratkin"
17
+ release = "2.2.3"
18
+
19
+ # -- General configuration ---------------------------------------------------
20
+ extensions = [
21
+ "sphinx.ext.autodoc",
22
+ "sphinx.ext.napoleon",
23
+ "sphinx.ext.viewcode",
24
+ "sphinx.ext.intersphinx",
25
+ ]
26
+
27
+ # Napoleon settings (NumPy-style docstrings)
28
+ napoleon_google_docstring = False
29
+ napoleon_numpy_docstring = True
30
+ napoleon_include_init_with_doc = True
31
+
32
+ templates_path = ["_templates"]
33
+ exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
34
+
35
+ # -- Options for HTML output -------------------------------------------------
36
+ html_theme = "alabaster"
37
+ html_static_path = ["_static"]
38
+
39
+ # -- Intersphinx configuration -----------------------------------------------
40
+ intersphinx_mapping = {
41
+ "python": ("https://docs.python.org/3", None),
42
+ "pandas": ("https://pandas.pydata.org/docs/", None),
43
+ }
44
+
45
+ # -- Autodoc configuration ---------------------------------------------------
46
+ autodoc_member_order = "bysource"
47
+ autodoc_typehints = "description"
48
+
49
+ # Mock imports for packages that may not be installed in the docs build
50
+ # environment (e.g. geopandas requires system-level GDAL libraries).
51
+ # NOTE: outlets.py executes geopandas code at module level, so the mock
52
+ # alone is not sufficient for outlets, warehouse, and warehouse_functions.
53
+ # Install geopandas in your docs build environment to include those modules.
54
+ autodoc_mock_imports = [
55
+ "baseflow",
56
+ "duckdb",
57
+ "geopandas",
58
+ "numpy",
59
+ "oracledb",
60
+ "pandas",
61
+ "requests",
62
+ ]
@@ -0,0 +1,18 @@
1
+ mpcaHydro documentation
2
+ =======================
3
+
4
+ **mpcaHydro** is a Python package for downloading and managing MPCA
5
+ hydrology data.
6
+
7
+ .. toctree::
8
+ :maxdepth: 2
9
+ :caption: Contents:
10
+
11
+ api
12
+
13
+ Indices and tables
14
+ ==================
15
+
16
+ * :ref:`genindex`
17
+ * :ref:`modindex`
18
+ * :ref:`search`
@@ -0,0 +1,35 @@
1
+ @ECHO OFF
2
+
3
+ pushd %~dp0
4
+
5
+ REM Command file for Sphinx documentation
6
+
7
+ if "%SPHINXBUILD%" == "" (
8
+ set SPHINXBUILD=sphinx-build
9
+ )
10
+ set SOURCEDIR=.
11
+ set BUILDDIR=_build
12
+
13
+ %SPHINXBUILD% >NUL 2>NUL
14
+ if errorlevel 9009 (
15
+ echo.
16
+ echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17
+ echo.installed, then set the SPHINXBUILD environment variable to point
18
+ echo.to the full path of the 'sphinx-build' executable. Alternatively you
19
+ echo.may add the Sphinx directory to PATH.
20
+ echo.
21
+ echo.If you don't have Sphinx installed, grab it from
22
+ echo.https://www.sphinx-doc.org/
23
+ exit /b 1
24
+ )
25
+
26
+ if "%1" == "" goto help
27
+
28
+ %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29
+ goto end
30
+
31
+ :help
32
+ %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33
+
34
+ :end
35
+ popd
@@ -5,7 +5,7 @@ build-backend = "hatchling.build"
5
5
  [project]
6
6
  name = "mpcaHydro"
7
7
  urls = { "Homepage" = "https://github.com/mfratkin1/mpcaHydro" } # ? Add this!
8
- version = "2.2.8"
8
+ version = "2.2.10"
9
9
  dependencies = [
10
10
  "pandas",
11
11
  "requests",
@@ -29,3 +29,17 @@ classifiers = [
29
29
  "Development Status :: 3 - Alpha",
30
30
  "Programming Language :: Python"
31
31
  ]
32
+
33
+ [project.optional-dependencies]
34
+ tests = ["pytest"]
35
+ docs = [
36
+ "sphinx",
37
+ "sphinx-autodoc-typehints",
38
+ ]
39
+
40
+ [tool.pytest.ini_options]
41
+ markers = [
42
+ "network: test requires network access to WISKI/KISTERS API",
43
+ "credentials: test requires Oracle database credentials",
44
+ "integration: mark test as integration test",
45
+ ]