hydroserverpy 0.2.5__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hydroserverpy might be problematic. Click here for more details.
- hydroserverpy/__init__.py +6 -15
- hydroserverpy/core/endpoints/__init__.py +9 -0
- hydroserverpy/core/endpoints/base.py +146 -0
- hydroserverpy/core/endpoints/data_loaders.py +93 -0
- hydroserverpy/core/endpoints/data_sources.py +93 -0
- hydroserverpy/core/endpoints/datastreams.py +225 -0
- hydroserverpy/core/endpoints/observed_properties.py +111 -0
- hydroserverpy/core/endpoints/processing_levels.py +111 -0
- hydroserverpy/core/endpoints/result_qualifiers.py +111 -0
- hydroserverpy/core/endpoints/sensors.py +111 -0
- hydroserverpy/core/endpoints/things.py +261 -0
- hydroserverpy/core/endpoints/units.py +111 -0
- hydroserverpy/{components → core/schemas}/__init__.py +1 -2
- hydroserverpy/core/schemas/base.py +124 -0
- hydroserverpy/core/schemas/data_loaders.py +73 -0
- hydroserverpy/core/schemas/data_sources.py +223 -0
- hydroserverpy/core/schemas/datastreams.py +330 -0
- hydroserverpy/core/schemas/observed_properties.py +43 -0
- hydroserverpy/core/schemas/processing_levels.py +31 -0
- hydroserverpy/core/schemas/result_qualifiers.py +26 -0
- hydroserverpy/core/schemas/sensors.py +68 -0
- hydroserverpy/core/schemas/things.py +346 -0
- hydroserverpy/core/schemas/units.py +29 -0
- hydroserverpy/core/service.py +200 -0
- hydroserverpy/etl/__init__.py +21 -0
- hydroserverpy/etl/extractors/__init__.py +0 -0
- hydroserverpy/etl/extractors/base.py +13 -0
- hydroserverpy/etl/extractors/ftp_extractor.py +50 -0
- hydroserverpy/etl/extractors/http_extractor.py +84 -0
- hydroserverpy/etl/extractors/local_file_extractor.py +25 -0
- hydroserverpy/etl/hydroserver_etl.py +40 -0
- hydroserverpy/etl/loaders/__init__.py +0 -0
- hydroserverpy/etl/loaders/base.py +13 -0
- hydroserverpy/etl/loaders/hydroserver_loader.py +68 -0
- hydroserverpy/etl/transformers/__init__.py +0 -0
- hydroserverpy/etl/transformers/base.py +52 -0
- hydroserverpy/etl/transformers/csv_transformer.py +88 -0
- hydroserverpy/etl/transformers/json_transformer.py +62 -0
- hydroserverpy/etl/types.py +7 -0
- hydroserverpy/etl_csv/__init__.py +0 -0
- hydroserverpy/{etl.py → etl_csv/hydroserver_etl_csv.py} +118 -95
- hydroserverpy/quality/__init__.py +1 -0
- hydroserverpy/quality/service.py +405 -0
- hydroserverpy-0.4.0.dist-info/METADATA +18 -0
- hydroserverpy-0.4.0.dist-info/RECORD +51 -0
- {hydroserverpy-0.2.5.dist-info → hydroserverpy-0.4.0.dist-info}/WHEEL +1 -1
- hydroserverpy/components/data_loaders.py +0 -67
- hydroserverpy/components/data_sources.py +0 -98
- hydroserverpy/components/datastreams.py +0 -47
- hydroserverpy/components/observed_properties.py +0 -48
- hydroserverpy/components/processing_levels.py +0 -48
- hydroserverpy/components/result_qualifiers.py +0 -48
- hydroserverpy/components/sensors.py +0 -48
- hydroserverpy/components/things.py +0 -48
- hydroserverpy/components/units.py +0 -48
- hydroserverpy/components/users.py +0 -28
- hydroserverpy/main.py +0 -62
- hydroserverpy/models.py +0 -218
- hydroserverpy/schemas/data_loaders.py +0 -27
- hydroserverpy/schemas/data_sources.py +0 -58
- hydroserverpy/schemas/datastreams.py +0 -56
- hydroserverpy/schemas/observed_properties.py +0 -33
- hydroserverpy/schemas/processing_levels.py +0 -33
- hydroserverpy/schemas/result_qualifiers.py +0 -32
- hydroserverpy/schemas/sensors.py +0 -39
- hydroserverpy/schemas/things.py +0 -107
- hydroserverpy/schemas/units.py +0 -32
- hydroserverpy/schemas/users.py +0 -28
- hydroserverpy/service.py +0 -170
- hydroserverpy/utils.py +0 -37
- hydroserverpy-0.2.5.dist-info/METADATA +0 -15
- hydroserverpy-0.2.5.dist-info/RECORD +0 -35
- /hydroserverpy/{schemas → core}/__init__.py +0 -0
- /hydroserverpy/{exceptions.py → etl_csv/exceptions.py} +0 -0
- {hydroserverpy-0.2.5.dist-info → hydroserverpy-0.4.0.dist-info}/LICENSE +0 -0
- {hydroserverpy-0.2.5.dist-info → hydroserverpy-0.4.0.dist-info}/top_level.txt +0 -0
- {hydroserverpy-0.2.5.dist-info → hydroserverpy-0.4.0.dist-info}/zip-safe +0 -0
|
@@ -1,33 +1,39 @@
|
|
|
1
1
|
import csv
|
|
2
2
|
import logging
|
|
3
|
-
import frost_sta_client as fsc
|
|
4
3
|
import croniter
|
|
5
|
-
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from typing import IO, List, TYPE_CHECKING
|
|
6
|
+
from requests import HTTPError
|
|
6
7
|
from datetime import datetime, timezone, timedelta
|
|
7
8
|
from dateutil.parser import isoparse
|
|
8
|
-
from .schemas.data_sources import DataSourceGetResponse
|
|
9
|
-
from .schemas.datastreams import DatastreamGetResponse
|
|
10
9
|
from .exceptions import HeaderParsingError, TimestampParsingError
|
|
11
|
-
|
|
10
|
+
import warnings
|
|
12
11
|
|
|
13
|
-
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from ..core.schemas import DataSource
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger("hydroserver_etl")
|
|
14
16
|
logger.addHandler(logging.NullHandler())
|
|
15
17
|
|
|
16
18
|
|
|
17
|
-
class
|
|
19
|
+
class HydroServerETLCSV:
|
|
18
20
|
|
|
19
21
|
def __init__(
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
datastreams: List[DatastreamGetResponse]
|
|
22
|
+
self,
|
|
23
|
+
service,
|
|
24
|
+
data_file: IO[str],
|
|
25
|
+
data_source: "DataSource",
|
|
25
26
|
):
|
|
27
|
+
warnings.warn(
|
|
28
|
+
"HydroServerETLCSV is deprecated and will be removed in a future version. "
|
|
29
|
+
"Please use the new HydroServerETL class.",
|
|
30
|
+
DeprecationWarning,
|
|
31
|
+
)
|
|
26
32
|
self._service = service
|
|
27
33
|
self._data_file = data_file
|
|
28
34
|
self._data_source = data_source
|
|
29
35
|
self._datastreams = {
|
|
30
|
-
datastream.
|
|
36
|
+
datastream.uid: datastream for datastream in data_source.datastreams
|
|
31
37
|
}
|
|
32
38
|
|
|
33
39
|
self._timestamp_column_index = None
|
|
@@ -66,12 +72,12 @@ class HydroServerETL:
|
|
|
66
72
|
self._failed_datastreams.extend(self._post_observations())
|
|
67
73
|
|
|
68
74
|
except HeaderParsingError as e:
|
|
69
|
-
self._message = f
|
|
75
|
+
self._message = f"Failed to parse header for {self._data_source.name} with error: {str(e)}"
|
|
70
76
|
logger.error(self._message)
|
|
71
77
|
self._file_header_error = True
|
|
72
78
|
|
|
73
79
|
except TimestampParsingError as e:
|
|
74
|
-
self._message = f
|
|
80
|
+
self._message = f"Failed to parse one or more timestamps for {self._data_source.name} with error: {str(e)}"
|
|
75
81
|
logger.error(self._message)
|
|
76
82
|
self._file_timestamp_error = True
|
|
77
83
|
|
|
@@ -79,7 +85,7 @@ class HydroServerETL:
|
|
|
79
85
|
self._failed_datastreams.extend(self._post_observations())
|
|
80
86
|
|
|
81
87
|
if not self._message and len(self._failed_datastreams) > 0:
|
|
82
|
-
self._message = f
|
|
88
|
+
self._message = f"One or more datastreams failed to sync with HydroServer for {self._data_source.name}."
|
|
83
89
|
|
|
84
90
|
self._update_data_source()
|
|
85
91
|
|
|
@@ -99,7 +105,8 @@ class HydroServerETL:
|
|
|
99
105
|
"""
|
|
100
106
|
|
|
101
107
|
if index == self._data_source.header_row or (
|
|
102
|
-
|
|
108
|
+
index == self._data_source.data_start_row
|
|
109
|
+
and self._timestamp_column_index is None
|
|
103
110
|
):
|
|
104
111
|
self._parse_file_header(row)
|
|
105
112
|
|
|
@@ -109,19 +116,30 @@ class HydroServerETL:
|
|
|
109
116
|
timestamp = self._parse_row_timestamp(row)
|
|
110
117
|
|
|
111
118
|
for datastream in self._datastreams.values():
|
|
112
|
-
if str(datastream.
|
|
113
|
-
if
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
self.
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
119
|
+
if str(datastream.uid) not in self._datastream_start_row_indexes.keys():
|
|
120
|
+
if (
|
|
121
|
+
not datastream.phenomenon_end_time
|
|
122
|
+
or timestamp > datastream.phenomenon_end_time
|
|
123
|
+
):
|
|
124
|
+
self._datastream_start_row_indexes[str(datastream.uid)] = index
|
|
125
|
+
|
|
126
|
+
if (
|
|
127
|
+
str(datastream.uid) in self._datastream_start_row_indexes.keys()
|
|
128
|
+
and self._datastream_start_row_indexes[str(datastream.uid)] <= index
|
|
129
|
+
):
|
|
130
|
+
if str(datastream.uid) not in self._observations.keys():
|
|
131
|
+
self._observations[str(datastream.uid)] = []
|
|
132
|
+
|
|
133
|
+
self._observations[str(datastream.uid)].append(
|
|
134
|
+
{
|
|
135
|
+
"phenomenon_time": timestamp,
|
|
136
|
+
"result": row[
|
|
137
|
+
self._datastream_column_indexes[
|
|
138
|
+
datastream.data_source_column
|
|
139
|
+
]
|
|
140
|
+
],
|
|
141
|
+
}
|
|
142
|
+
)
|
|
125
143
|
|
|
126
144
|
def _parse_file_header(self, row: List[str]) -> None:
|
|
127
145
|
"""
|
|
@@ -136,22 +154,29 @@ class HydroServerETL:
|
|
|
136
154
|
"""
|
|
137
155
|
|
|
138
156
|
try:
|
|
139
|
-
self._timestamp_column_index =
|
|
140
|
-
|
|
157
|
+
self._timestamp_column_index = (
|
|
158
|
+
row.index(self._data_source.timestamp_column)
|
|
159
|
+
if isinstance(self._data_source.timestamp_column, str)
|
|
141
160
|
else int(self._data_source.timestamp_column) - 1
|
|
161
|
+
)
|
|
142
162
|
if self._timestamp_column_index > len(row):
|
|
143
163
|
raise ValueError
|
|
144
164
|
self._datastream_column_indexes = {
|
|
145
|
-
datastream.data_source_column:
|
|
146
|
-
|
|
147
|
-
|
|
165
|
+
datastream.data_source_column: (
|
|
166
|
+
row.index(datastream.data_source_column)
|
|
167
|
+
if not datastream.data_source_column.isdigit()
|
|
168
|
+
else int(datastream.data_source_column) - 1
|
|
169
|
+
)
|
|
148
170
|
for datastream in self._datastreams.values()
|
|
149
171
|
}
|
|
150
|
-
if len(self._datastream_column_indexes.values()) > 0 and
|
|
151
|
-
|
|
172
|
+
if len(self._datastream_column_indexes.values()) > 0 and max(
|
|
173
|
+
self._datastream_column_indexes.values()
|
|
174
|
+
) > len(row):
|
|
152
175
|
raise ValueError
|
|
153
176
|
except ValueError as e:
|
|
154
|
-
logger.error(
|
|
177
|
+
logger.error(
|
|
178
|
+
f'Failed to load data from data source: "{self._data_source.name}"'
|
|
179
|
+
)
|
|
155
180
|
raise HeaderParsingError(str(e)) from e
|
|
156
181
|
|
|
157
182
|
def _parse_row_timestamp(self, row: List[str]) -> datetime:
|
|
@@ -164,32 +189,36 @@ class HydroServerETL:
|
|
|
164
189
|
"""
|
|
165
190
|
|
|
166
191
|
try:
|
|
167
|
-
if
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
192
|
+
if (
|
|
193
|
+
self._data_source.timestamp_format == "iso"
|
|
194
|
+
or self._data_source.timestamp_format is None
|
|
195
|
+
):
|
|
196
|
+
timestamp = isoparse(row[self._timestamp_column_index])
|
|
171
197
|
else:
|
|
172
198
|
timestamp = datetime.strptime(
|
|
173
199
|
row[self._timestamp_column_index],
|
|
174
|
-
self._data_source.timestamp_format
|
|
200
|
+
self._data_source.timestamp_format,
|
|
175
201
|
)
|
|
176
202
|
except ValueError as e:
|
|
177
203
|
raise TimestampParsingError(str(e)) from e
|
|
178
204
|
|
|
179
205
|
if timestamp.tzinfo is None:
|
|
180
206
|
if not self._data_source.timestamp_offset:
|
|
181
|
-
timestamp = timestamp.replace(
|
|
182
|
-
tzinfo=timezone.utc
|
|
183
|
-
)
|
|
207
|
+
timestamp = timestamp.replace(tzinfo=timezone.utc)
|
|
184
208
|
else:
|
|
185
209
|
try:
|
|
186
210
|
timestamp = timestamp.replace(
|
|
187
211
|
tzinfo=datetime.strptime(
|
|
188
|
-
self._data_source.timestamp_offset[:-2]
|
|
212
|
+
self._data_source.timestamp_offset[:-2]
|
|
213
|
+
+ ":"
|
|
214
|
+
+ self._data_source.timestamp_offset[3:],
|
|
215
|
+
"%z",
|
|
189
216
|
).tzinfo
|
|
190
217
|
)
|
|
191
218
|
except ValueError as e:
|
|
192
|
-
logger.error(
|
|
219
|
+
logger.error(
|
|
220
|
+
f'Failed to load data from data source: "{self._data_source.name}"'
|
|
221
|
+
)
|
|
193
222
|
raise TimestampParsingError(str(e)) from e
|
|
194
223
|
|
|
195
224
|
return timestamp
|
|
@@ -213,47 +242,41 @@ class HydroServerETL:
|
|
|
213
242
|
if datastream_id not in self._failed_datastreams and len(observations) > 0:
|
|
214
243
|
|
|
215
244
|
logger.info(
|
|
216
|
-
f
|
|
217
|
-
f'{observations[0]["phenomenon_time"].strftime("%Y-%m-%dT%H:%M:%S%z")} to '
|
|
218
|
-
f'{observations[-1]["phenomenon_time"].strftime("%Y-%m-%dT%H:%M:%S%z")} for datastream: '
|
|
219
|
-
f'{str(datastream_id)} in data source "{self._data_source.name}".'
|
|
245
|
+
f"Loading observations from "
|
|
246
|
+
+ f'{observations[0]["phenomenon_time"].strftime("%Y-%m-%dT%H:%M:%S%z")} to '
|
|
247
|
+
+ f'{observations[-1]["phenomenon_time"].strftime("%Y-%m-%dT%H:%M:%S%z")} for datastream: '
|
|
248
|
+
+ f'{str(datastream_id)} in data source "{self._data_source.name}".'
|
|
220
249
|
)
|
|
221
250
|
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
for observation in observations:
|
|
231
|
-
data_array_value.add_observation(fsc.Observation(
|
|
232
|
-
phenomenon_time=observation['phenomenon_time'].strftime('%Y-%m-%dT%H:%M:%S%z'),
|
|
233
|
-
result=observation['result'],
|
|
234
|
-
datastream=datastream
|
|
235
|
-
))
|
|
236
|
-
|
|
237
|
-
data_array_document = getattr(fsc.model, 'ext').data_array_document.DataArrayDocument()
|
|
238
|
-
data_array_document.add_data_array_value(data_array_value)
|
|
251
|
+
observations_df = pd.DataFrame(
|
|
252
|
+
[
|
|
253
|
+
[observation["phenomenon_time"], observation["result"]]
|
|
254
|
+
for observation in observations
|
|
255
|
+
],
|
|
256
|
+
columns=["timestamp", "value"],
|
|
257
|
+
)
|
|
239
258
|
|
|
240
259
|
try:
|
|
241
|
-
self._service.
|
|
242
|
-
|
|
260
|
+
self._service.datastreams.load_observations(
|
|
261
|
+
uid=datastream_id,
|
|
262
|
+
observations=observations_df,
|
|
263
|
+
)
|
|
264
|
+
except HTTPError:
|
|
243
265
|
failed_datastreams.append(datastream_id)
|
|
244
266
|
|
|
245
267
|
if not self._last_loaded_timestamp or (
|
|
246
|
-
|
|
247
|
-
|
|
268
|
+
observations[-1]["phenomenon_time"]
|
|
269
|
+
and observations[-1]["phenomenon_time"]
|
|
270
|
+
> self._last_loaded_timestamp
|
|
248
271
|
):
|
|
249
|
-
self._last_loaded_timestamp = observations[-1][
|
|
272
|
+
self._last_loaded_timestamp = observations[-1]["phenomenon_time"]
|
|
250
273
|
elif datastream_id in self._failed_datastreams:
|
|
251
274
|
logger.info(
|
|
252
|
-
f
|
|
253
|
-
f'{observations[0]["phenomenon_time"].strftime("%Y-%m-%dT%H:%M:%S%z")} to '
|
|
254
|
-
f'{observations[-1]["phenomenon_time"].strftime("%Y-%m-%dT%H:%M:%S%z")} for datastream: '
|
|
255
|
-
f'{str(datastream_id)} in data source "{self._data_source.name}",'
|
|
256
|
-
f
|
|
275
|
+
f"Skipping observations POST request from "
|
|
276
|
+
+ f'{observations[0]["phenomenon_time"].strftime("%Y-%m-%dT%H:%M:%S%z")} to '
|
|
277
|
+
+ f'{observations[-1]["phenomenon_time"].strftime("%Y-%m-%dT%H:%M:%S%z")} for datastream: '
|
|
278
|
+
+ f'{str(datastream_id)} in data source "{self._data_source.name}",'
|
|
279
|
+
+ f"due to previous failed POST request."
|
|
257
280
|
)
|
|
258
281
|
|
|
259
282
|
self._observations = {}
|
|
@@ -270,28 +293,28 @@ class HydroServerETL:
|
|
|
270
293
|
|
|
271
294
|
if self._data_source.crontab is not None:
|
|
272
295
|
next_sync = croniter.croniter(
|
|
273
|
-
self._data_source.crontab,
|
|
274
|
-
datetime.now()
|
|
296
|
+
self._data_source.crontab, datetime.now()
|
|
275
297
|
).get_next(datetime)
|
|
276
|
-
elif
|
|
298
|
+
elif (
|
|
299
|
+
self._data_source.interval is not None
|
|
300
|
+
and self._data_source.interval_units is not None
|
|
301
|
+
):
|
|
277
302
|
next_sync = datetime.now() + timedelta(
|
|
278
303
|
**{self._data_source.interval_units: self._data_source.interval}
|
|
279
304
|
)
|
|
280
305
|
else:
|
|
281
306
|
next_sync = None
|
|
282
307
|
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
)
|
|
289
|
-
|
|
290
|
-
last_synced=datetime.now(timezone.utc),
|
|
291
|
-
next_sync=next_sync
|
|
308
|
+
self._data_source.data_source_thru = self._last_loaded_timestamp
|
|
309
|
+
self._data_source.last_sync_successful = (
|
|
310
|
+
True
|
|
311
|
+
if not self._file_timestamp_error
|
|
312
|
+
and not self._file_header_error
|
|
313
|
+
and len(self._failed_datastreams) == 0
|
|
314
|
+
else False
|
|
292
315
|
)
|
|
316
|
+
self._data_source.last_sync_message = self._message
|
|
317
|
+
self._data_source.last_synced = datetime.now(timezone.utc)
|
|
318
|
+
self._data_source.next_sync = next_sync
|
|
293
319
|
|
|
294
|
-
self.
|
|
295
|
-
data_source_id=str(self._data_source.id),
|
|
296
|
-
data_source_body=updated_data_source
|
|
297
|
-
)
|
|
320
|
+
self._data_source.save()
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .service import HydroServerQualityControl, TimeUnit, FilterOperation, Operator
|