mpcaHydro 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mpcaHydro/etlCSG.py ADDED
@@ -0,0 +1,88 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Tue Oct 10 14:13:23 2023
4
+
5
+ @author: mfratki
6
+ """
7
+
8
+ import pandas as pd
9
+ # import geopandas as gpd
10
+
11
+
12
+ CONSITUENT_MAP = {'Water Temp. (C)': 'WT',
13
+ 'Discharge (cfs)': 'Q',
14
+ 'DO (mg/L)': 'DO'
15
+ }
16
+
17
+ def download(station_no):
18
+ # save_path = Path(save_path)
19
+ # file_path = save_path.joinpath('csg.csv')
20
+
21
+ station = station_no[1:]
22
+ df = pd.read_csv(f'https://maps2.dnr.state.mn.us/cgi-bin/csg.cgi?mode=dump_hydro_data_as_csv&site={station}&startdate=1996-1-1&enddate=2050-1-1')
23
+ df['station_id'] = station_no
24
+
25
+ return df
26
+
27
+
28
+
29
+ # def process(df):
30
+ # df['Timestamp'] = pd.to_datetime(df['Timestamp'])
31
+ # df.set_index('Timestamp',inplace=True)
32
+ # value_variables = [column for column in df.columns if (column not in ['Site','Timestamp','station_no']) & ~(column.endswith('Quality'))]
33
+
34
+ # test = df[value_variables].resample(rule='1H', kind='interval').mean().dropna()
35
+ # df = df['Value'].resample(rule='1H', kind='interval').mean().to_frame()
36
+
37
+ def transform(data):
38
+
39
+
40
+ data['Timestamp'] = pd.to_datetime(data['Timestamp'])
41
+ data['Timestamp'].dt.tz_localize('UTC')
42
+
43
+ id_columns = ['Timestamp','station_id']
44
+ quality_columns = ['Water Temp. (C) Quality',
45
+ 'Discharge (cfs) Quality',
46
+ 'DO (mg/L) Quality']
47
+
48
+ value_columns = ['Water Temp. (C)',
49
+ 'Discharge (cfs)',
50
+ 'DO (mg/L)']
51
+
52
+ value_columns = [column for column in data.columns if column in value_columns]
53
+ quality_columns = [column for column in data.columns if column in quality_columns]
54
+
55
+
56
+
57
+ data_melt = pd.melt(data,col_level=0,id_vars = id_columns,value_vars = value_columns)
58
+ data_melt['Quality'] = pd.melt(data,col_level=0,id_vars = id_columns,value_vars = quality_columns)['value']
59
+
60
+ data_melt.rename(columns = {'Timestamp': 'datetime',
61
+ 'Value': 'value',
62
+ 'stationparameter_name': 'variable',
63
+ 'station_no': 'station_id',
64
+ 'Quality' : 'quality'},inplace = True)
65
+
66
+ data_melt['unit'] = data_melt['variable'].map({'Water Temp. (C)' : 'C',
67
+ 'Discharge (cfs)' : 'cfs',
68
+ 'DO (mg/L)' : 'mg/L'})
69
+
70
+ data_melt['constituent'] = data_melt['variable'].map({'Water Temp. (C)' : 'WT',
71
+ 'Discharge (cfs)' : 'Q',
72
+ 'DO (mg/L)' : 'DO'})
73
+
74
+ data_melt.dropna(subset = 'value',inplace=True)
75
+ data['source'] = 'csg'
76
+ return data_melt
77
+
78
+
79
+
80
+
81
+
82
+ def load(data,file_path):
83
+
84
+ data.to_csv(file_path)
85
+
86
+
87
+
88
+
mpcaHydro/etlSWD.py ADDED
@@ -0,0 +1,187 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Tue Oct 10 14:13:23 2023
4
+
5
+ @author: mfratki
6
+ """
7
+
8
+ import pandas as pd
9
+ #from hspf_tools.orm.models import Station
10
+ # import geopandas as gpd
11
+
12
+
13
+
14
+
15
+
16
+ CONSTITUENT_MAP = {'Total suspended solids':'TSS',
17
+ 'Residue - nonfilterable (TSS)': 'TSS',
18
+ 'Kjeldahl nitrogen as N': 'TKN',
19
+ 'Nitrogen, Total Kjeldahl (TKN) as N': 'TKN',
20
+ 'Nitrate + Nitrite Nitrogen, Total as N': 'N',
21
+ 'Nitrate/Nitrite as N (N+N) as N': 'N',
22
+ 'Nutrient-nitrogen as N': 'N',
23
+ 'Phosphorus, Total as P as P':'TP',
24
+ 'Phosphorus as P': 'TP',
25
+ 'Carbonaceous biochemical oxygen demand, standard conditions': 'BOD',
26
+ 'Chemical oxygen demand':'BOD',
27
+ 'Chlorophyll a, corrected for pheophytin':'CHLA',
28
+ 'Chlorophyll-A':'CHLA',
29
+ 'Chlorophyll-a, Pheophytin Corrected':'CHLA',
30
+ 'Flow':'Q',
31
+ 'Temperature, water': 'WT',
32
+ 'Dissolved oxygen': 'DO',
33
+ 'Dissolved oxygen (DO)': 'DO',
34
+ 'Suspended Sediment Concentration': 'SSC'}
35
+
36
+ # station_no = 'S010-822'
37
+ # data = download(station_no)
38
+ # data = transform(data)
39
+
40
+
41
+ # def download(station_nos):
42
+ # df = pd.concat([_download(station_no) for station_no in station_nos])
43
+ # return df
44
+ import requests
45
+
46
+ def _download(station_no):
47
+ # Replace {station_no} in the URL with the actual station number
48
+ url = f"https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results?stationId={station_no}&format=json"
49
+
50
+ try:
51
+ # Send a GET request to the URL
52
+ response = requests.get(url)
53
+ response.raise_for_status() # Raise exception for HTTP errors
54
+ # Parse the JSON data
55
+ if response.json()['recordCount'] == 0:
56
+ return pd.DataFrame(columns = response.json()['column_names'])
57
+ else:
58
+ return pd.DataFrame(response.json()['data'])
59
+
60
+ except requests.exceptions.RequestException as e:
61
+ print(f"An error occurred: {e}")
62
+ return None
63
+
64
+
65
+
66
+ def download(station_no):
67
+ #df = pd.read_csv(f'https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results?stationId={station_no}&format=csv')
68
+ df = _download(station_no)
69
+ if df.empty:
70
+ return df
71
+ else:
72
+ df['station_id'] = station_no
73
+ return transform(df)
74
+
75
+ def info(station_no):
76
+ #df = pd.read_csv(f'https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results?stationId={station_no}&format=csv')
77
+ df = _download(station_no)
78
+ df['station_id'] = station_no
79
+ df.loc[:,'resultUnit'] = df['resultUnit'].str.lower()
80
+ df.replace({'resultUnit':'kg'},'lb',inplace=True)
81
+ df.replace({'resultUnit':'ug/l'},'mg/l',inplace=True)
82
+ df.replace({'resultUnit':'deg c'},'degF',inplace=True)
83
+ df.replace({'resultUnit':'deg c'},'degF',inplace=True)
84
+
85
+ return df.drop_duplicates(subset = 'station_id')
86
+
87
+
88
+ # def _info(station_nos):
89
+ # station_info = info(station_nos)
90
+ # if station_info.empty:
91
+ # return Station(station_nos,
92
+ # 'equis',
93
+ # station_type = 'River')
94
+ # else:
95
+ # return Station(station_info.iloc[0]['stationId'],
96
+ # 'equis',
97
+ # station_name = station_info.iloc[0]['stationName'],
98
+ # station_type = 'River')
99
+
100
+
101
+
102
+ def transform(df):
103
+ df = df.loc[df['parameter'].isin(CONSTITUENT_MAP.keys()),:]
104
+ df['datetime'] = pd.to_datetime(list(df.loc[:,'sampleDate'] +' ' + df.loc[:,'sampleTime']))
105
+ df = df.loc[(df['datetime'] > '1996') & (df['result'] != '(null)')]
106
+
107
+ if df.empty:
108
+ return df
109
+
110
+ df['result'] = pd.to_numeric(df['result'], errors='coerce')
111
+ df.rename(columns = {'result': 'value',
112
+ 'parameter': 'variable',
113
+ 'stationName': 'station_name',
114
+ 'stationID': 'station_id',
115
+ 'resultUnit':'unit'},inplace=True)
116
+
117
+ df['constituent'] = df['variable'].map(CONSTITUENT_MAP)
118
+ df['source'] = 'swd'
119
+ df['quality_id'] = pd.NA
120
+ station_name = df.iloc[0]['station_name']
121
+ df = df.loc[:,['datetime','value','variable','unit','station_id','station_name','constituent','source']]
122
+
123
+
124
+ df = df.astype({'value':float,
125
+ 'unit':str,
126
+ 'station_id':str,
127
+ 'station_name':str,
128
+ 'constituent':str})
129
+
130
+ # convert ug to mg/l
131
+ df.loc[:,'unit'] = df['unit'].str.lower()
132
+ df.loc[df['unit'] == 'ug/l','value'] = df.loc[df['unit'] == 'ug/l','value']*.001
133
+ df.loc[df['unit'] == 'kg','value'] = df.loc[df['unit'] == 'kg','value']*2.20462
134
+ df.loc[df['unit'] == 'deg c','value'] = df.loc[df['unit'] == 'deg c','value']*9/5 + 32 # Convert celsius to faren
135
+
136
+ df.replace({'unit':'kg'},'lb',inplace=True)
137
+ df.replace({'unit':'ug/l'},'mg/l',inplace=True)
138
+ df.replace({'unit':'deg c'},'degF',inplace=True)
139
+
140
+ # df['unit'].replace('kg','lb',inplace=True)
141
+ # df['unit'].replace('ug/l','mg/l',inplace=True)
142
+ # df['unit'].replace('deg c','degF',inplace=True)
143
+ df['data_type'] = 'discrete'
144
+ df['data_format'] = 'instantaneous'
145
+ df.set_index('datetime',drop=True,inplace=True)
146
+ df.index = df.index.tz_localize('UTC+06:00')
147
+
148
+ df.index = df.index.round('H').round('H')
149
+ df = df.reset_index()
150
+ df = df.groupby(['datetime','variable','unit','station_id','station_name','constituent','data_format','data_type','source']).mean()
151
+ df = df.reset_index()
152
+ df = df.set_index('datetime')
153
+ df['quality_id'] = pd.NA
154
+ df['station_name'] = station_name
155
+ return df
156
+
157
+ def load(df,file_path):
158
+ df.to_csv(file_path)
159
+
160
+
161
+
162
+
163
+ # base_url = 'https://webapp.pca.state.mn.us/surface-water/search?'
164
+
165
+
166
+ # https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results?
167
+
168
+
169
+ # dataType
170
+ # geographicType
171
+ # specificGeoAreaCode
172
+ # wuType
173
+ # stationType
174
+ # stationId
175
+
176
+
177
+
178
+ # CONSTITUENT_MAP = {'TSS': ['Total suspended solids'],
179
+ # 'TKN': ['Kjeldahl nitrogen as N','Nitrogen, Total Kjeldahl (TKN) as N'],
180
+ # 'N' : ['Nitrate + Nitrite Nitrogen, Total as N','Nitrate/Nitrite as N (N+N) as N'],
181
+ # 'TP' : ['Phosphorus, Total as P as P'],
182
+ # 'BOD': ['Carbonaceous biochemical oxygen demand, standard conditions',
183
+ # 'Chemical oxygen demand'],
184
+ # 'CHLA': ['Chlorophyll a, corrected for pheophytin',
185
+ # 'Chlorophyll-A',
186
+ # 'Chlorophyll-a, Pheophytin Corrected'],
187
+ # 'Q': ['Flow']}