hydroserverpy 0.2.5__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hydroserverpy might be problematic. Click here for more details.

Files changed (77) hide show
  1. hydroserverpy/__init__.py +6 -15
  2. hydroserverpy/core/endpoints/__init__.py +9 -0
  3. hydroserverpy/core/endpoints/base.py +146 -0
  4. hydroserverpy/core/endpoints/data_loaders.py +93 -0
  5. hydroserverpy/core/endpoints/data_sources.py +93 -0
  6. hydroserverpy/core/endpoints/datastreams.py +225 -0
  7. hydroserverpy/core/endpoints/observed_properties.py +111 -0
  8. hydroserverpy/core/endpoints/processing_levels.py +111 -0
  9. hydroserverpy/core/endpoints/result_qualifiers.py +111 -0
  10. hydroserverpy/core/endpoints/sensors.py +111 -0
  11. hydroserverpy/core/endpoints/things.py +261 -0
  12. hydroserverpy/core/endpoints/units.py +111 -0
  13. hydroserverpy/{components → core/schemas}/__init__.py +1 -2
  14. hydroserverpy/core/schemas/base.py +124 -0
  15. hydroserverpy/core/schemas/data_loaders.py +73 -0
  16. hydroserverpy/core/schemas/data_sources.py +223 -0
  17. hydroserverpy/core/schemas/datastreams.py +330 -0
  18. hydroserverpy/core/schemas/observed_properties.py +43 -0
  19. hydroserverpy/core/schemas/processing_levels.py +31 -0
  20. hydroserverpy/core/schemas/result_qualifiers.py +26 -0
  21. hydroserverpy/core/schemas/sensors.py +68 -0
  22. hydroserverpy/core/schemas/things.py +346 -0
  23. hydroserverpy/core/schemas/units.py +29 -0
  24. hydroserverpy/core/service.py +200 -0
  25. hydroserverpy/etl/__init__.py +21 -0
  26. hydroserverpy/etl/extractors/__init__.py +0 -0
  27. hydroserverpy/etl/extractors/base.py +13 -0
  28. hydroserverpy/etl/extractors/ftp_extractor.py +50 -0
  29. hydroserverpy/etl/extractors/http_extractor.py +84 -0
  30. hydroserverpy/etl/extractors/local_file_extractor.py +25 -0
  31. hydroserverpy/etl/hydroserver_etl.py +40 -0
  32. hydroserverpy/etl/loaders/__init__.py +0 -0
  33. hydroserverpy/etl/loaders/base.py +13 -0
  34. hydroserverpy/etl/loaders/hydroserver_loader.py +68 -0
  35. hydroserverpy/etl/transformers/__init__.py +0 -0
  36. hydroserverpy/etl/transformers/base.py +52 -0
  37. hydroserverpy/etl/transformers/csv_transformer.py +88 -0
  38. hydroserverpy/etl/transformers/json_transformer.py +62 -0
  39. hydroserverpy/etl/types.py +7 -0
  40. hydroserverpy/etl_csv/__init__.py +0 -0
  41. hydroserverpy/{etl.py → etl_csv/hydroserver_etl_csv.py} +118 -95
  42. hydroserverpy/quality/__init__.py +1 -0
  43. hydroserverpy/quality/service.py +405 -0
  44. hydroserverpy-0.4.0.dist-info/METADATA +18 -0
  45. hydroserverpy-0.4.0.dist-info/RECORD +51 -0
  46. {hydroserverpy-0.2.5.dist-info → hydroserverpy-0.4.0.dist-info}/WHEEL +1 -1
  47. hydroserverpy/components/data_loaders.py +0 -67
  48. hydroserverpy/components/data_sources.py +0 -98
  49. hydroserverpy/components/datastreams.py +0 -47
  50. hydroserverpy/components/observed_properties.py +0 -48
  51. hydroserverpy/components/processing_levels.py +0 -48
  52. hydroserverpy/components/result_qualifiers.py +0 -48
  53. hydroserverpy/components/sensors.py +0 -48
  54. hydroserverpy/components/things.py +0 -48
  55. hydroserverpy/components/units.py +0 -48
  56. hydroserverpy/components/users.py +0 -28
  57. hydroserverpy/main.py +0 -62
  58. hydroserverpy/models.py +0 -218
  59. hydroserverpy/schemas/data_loaders.py +0 -27
  60. hydroserverpy/schemas/data_sources.py +0 -58
  61. hydroserverpy/schemas/datastreams.py +0 -56
  62. hydroserverpy/schemas/observed_properties.py +0 -33
  63. hydroserverpy/schemas/processing_levels.py +0 -33
  64. hydroserverpy/schemas/result_qualifiers.py +0 -32
  65. hydroserverpy/schemas/sensors.py +0 -39
  66. hydroserverpy/schemas/things.py +0 -107
  67. hydroserverpy/schemas/units.py +0 -32
  68. hydroserverpy/schemas/users.py +0 -28
  69. hydroserverpy/service.py +0 -170
  70. hydroserverpy/utils.py +0 -37
  71. hydroserverpy-0.2.5.dist-info/METADATA +0 -15
  72. hydroserverpy-0.2.5.dist-info/RECORD +0 -35
  73. /hydroserverpy/{schemas → core}/__init__.py +0 -0
  74. /hydroserverpy/{exceptions.py → etl_csv/exceptions.py} +0 -0
  75. {hydroserverpy-0.2.5.dist-info → hydroserverpy-0.4.0.dist-info}/LICENSE +0 -0
  76. {hydroserverpy-0.2.5.dist-info → hydroserverpy-0.4.0.dist-info}/top_level.txt +0 -0
  77. {hydroserverpy-0.2.5.dist-info → hydroserverpy-0.4.0.dist-info}/zip-safe +0 -0
@@ -1,33 +1,39 @@
1
1
  import csv
2
2
  import logging
3
- import frost_sta_client as fsc
4
3
  import croniter
5
- from typing import IO, List
4
+ import pandas as pd
5
+ from typing import IO, List, TYPE_CHECKING
6
+ from requests import HTTPError
6
7
  from datetime import datetime, timezone, timedelta
7
8
  from dateutil.parser import isoparse
8
- from .schemas.data_sources import DataSourceGetResponse
9
- from .schemas.datastreams import DatastreamGetResponse
10
9
  from .exceptions import HeaderParsingError, TimestampParsingError
11
- from .schemas.data_sources import DataSourcePatchBody
10
+ import warnings
12
11
 
13
- logger = logging.getLogger('hydroserver_etl')
12
+ if TYPE_CHECKING:
13
+ from ..core.schemas import DataSource
14
+
15
+ logger = logging.getLogger("hydroserver_etl")
14
16
  logger.addHandler(logging.NullHandler())
15
17
 
16
18
 
17
- class HydroServerETL:
19
+ class HydroServerETLCSV:
18
20
 
19
21
  def __init__(
20
- self,
21
- service,
22
- data_file: IO[str],
23
- data_source: DataSourceGetResponse,
24
- datastreams: List[DatastreamGetResponse]
22
+ self,
23
+ service,
24
+ data_file: IO[str],
25
+ data_source: "DataSource",
25
26
  ):
27
+ warnings.warn(
28
+ "HydroServerETLCSV is deprecated and will be removed in a future version. "
29
+ "Please use the new HydroServerETL class.",
30
+ DeprecationWarning,
31
+ )
26
32
  self._service = service
27
33
  self._data_file = data_file
28
34
  self._data_source = data_source
29
35
  self._datastreams = {
30
- datastream.id: datastream for datastream in datastreams
36
+ datastream.uid: datastream for datastream in data_source.datastreams
31
37
  }
32
38
 
33
39
  self._timestamp_column_index = None
@@ -66,12 +72,12 @@ class HydroServerETL:
66
72
  self._failed_datastreams.extend(self._post_observations())
67
73
 
68
74
  except HeaderParsingError as e:
69
- self._message = f'Failed to parse header for {self._data_source.name} with error: {str(e)}'
75
+ self._message = f"Failed to parse header for {self._data_source.name} with error: {str(e)}"
70
76
  logger.error(self._message)
71
77
  self._file_header_error = True
72
78
 
73
79
  except TimestampParsingError as e:
74
- self._message = f'Failed to parse one or more timestamps for {self._data_source.name} with error: {str(e)}'
80
+ self._message = f"Failed to parse one or more timestamps for {self._data_source.name} with error: {str(e)}"
75
81
  logger.error(self._message)
76
82
  self._file_timestamp_error = True
77
83
 
@@ -79,7 +85,7 @@ class HydroServerETL:
79
85
  self._failed_datastreams.extend(self._post_observations())
80
86
 
81
87
  if not self._message and len(self._failed_datastreams) > 0:
82
- self._message = f'One or more datastreams failed to sync with HydroServer for {self._data_source.name}.'
88
+ self._message = f"One or more datastreams failed to sync with HydroServer for {self._data_source.name}."
83
89
 
84
90
  self._update_data_source()
85
91
 
@@ -99,7 +105,8 @@ class HydroServerETL:
99
105
  """
100
106
 
101
107
  if index == self._data_source.header_row or (
102
- index == self._data_source.data_start_row and self._timestamp_column_index is None
108
+ index == self._data_source.data_start_row
109
+ and self._timestamp_column_index is None
103
110
  ):
104
111
  self._parse_file_header(row)
105
112
 
@@ -109,19 +116,30 @@ class HydroServerETL:
109
116
  timestamp = self._parse_row_timestamp(row)
110
117
 
111
118
  for datastream in self._datastreams.values():
112
- if str(datastream.id) not in self._datastream_start_row_indexes.keys():
113
- if not datastream.phenomenon_end_time or timestamp > datastream.phenomenon_end_time:
114
- self._datastream_start_row_indexes[str(datastream.id)] = index
115
-
116
- if str(datastream.id) in self._datastream_start_row_indexes.keys() \
117
- and self._datastream_start_row_indexes[str(datastream.id)] <= index:
118
- if str(datastream.id) not in self._observations.keys():
119
- self._observations[str(datastream.id)] = []
120
-
121
- self._observations[str(datastream.id)].append({
122
- 'phenomenon_time': timestamp,
123
- 'result': row[self._datastream_column_indexes[datastream.data_source_column]]
124
- })
119
+ if str(datastream.uid) not in self._datastream_start_row_indexes.keys():
120
+ if (
121
+ not datastream.phenomenon_end_time
122
+ or timestamp > datastream.phenomenon_end_time
123
+ ):
124
+ self._datastream_start_row_indexes[str(datastream.uid)] = index
125
+
126
+ if (
127
+ str(datastream.uid) in self._datastream_start_row_indexes.keys()
128
+ and self._datastream_start_row_indexes[str(datastream.uid)] <= index
129
+ ):
130
+ if str(datastream.uid) not in self._observations.keys():
131
+ self._observations[str(datastream.uid)] = []
132
+
133
+ self._observations[str(datastream.uid)].append(
134
+ {
135
+ "phenomenon_time": timestamp,
136
+ "result": row[
137
+ self._datastream_column_indexes[
138
+ datastream.data_source_column
139
+ ]
140
+ ],
141
+ }
142
+ )
125
143
 
126
144
  def _parse_file_header(self, row: List[str]) -> None:
127
145
  """
@@ -136,22 +154,29 @@ class HydroServerETL:
136
154
  """
137
155
 
138
156
  try:
139
- self._timestamp_column_index = row.index(self._data_source.timestamp_column) \
140
- if isinstance(self._data_source.timestamp_column, str) \
157
+ self._timestamp_column_index = (
158
+ row.index(self._data_source.timestamp_column)
159
+ if isinstance(self._data_source.timestamp_column, str)
141
160
  else int(self._data_source.timestamp_column) - 1
161
+ )
142
162
  if self._timestamp_column_index > len(row):
143
163
  raise ValueError
144
164
  self._datastream_column_indexes = {
145
- datastream.data_source_column: row.index(datastream.data_source_column)
146
- if not datastream.data_source_column.isdigit()
147
- else int(datastream.data_source_column) - 1
165
+ datastream.data_source_column: (
166
+ row.index(datastream.data_source_column)
167
+ if not datastream.data_source_column.isdigit()
168
+ else int(datastream.data_source_column) - 1
169
+ )
148
170
  for datastream in self._datastreams.values()
149
171
  }
150
- if len(self._datastream_column_indexes.values()) > 0 and \
151
- max(self._datastream_column_indexes.values()) > len(row):
172
+ if len(self._datastream_column_indexes.values()) > 0 and max(
173
+ self._datastream_column_indexes.values()
174
+ ) > len(row):
152
175
  raise ValueError
153
176
  except ValueError as e:
154
- logger.error(f'Failed to load data from data source: "{self._data_source.name}"')
177
+ logger.error(
178
+ f'Failed to load data from data source: "{self._data_source.name}"'
179
+ )
155
180
  raise HeaderParsingError(str(e)) from e
156
181
 
157
182
  def _parse_row_timestamp(self, row: List[str]) -> datetime:
@@ -164,32 +189,36 @@ class HydroServerETL:
164
189
  """
165
190
 
166
191
  try:
167
- if self._data_source.timestamp_format == 'iso' or self._data_source.timestamp_format is None:
168
- timestamp = isoparse(
169
- row[self._timestamp_column_index]
170
- )
192
+ if (
193
+ self._data_source.timestamp_format == "iso"
194
+ or self._data_source.timestamp_format is None
195
+ ):
196
+ timestamp = isoparse(row[self._timestamp_column_index])
171
197
  else:
172
198
  timestamp = datetime.strptime(
173
199
  row[self._timestamp_column_index],
174
- self._data_source.timestamp_format
200
+ self._data_source.timestamp_format,
175
201
  )
176
202
  except ValueError as e:
177
203
  raise TimestampParsingError(str(e)) from e
178
204
 
179
205
  if timestamp.tzinfo is None:
180
206
  if not self._data_source.timestamp_offset:
181
- timestamp = timestamp.replace(
182
- tzinfo=timezone.utc
183
- )
207
+ timestamp = timestamp.replace(tzinfo=timezone.utc)
184
208
  else:
185
209
  try:
186
210
  timestamp = timestamp.replace(
187
211
  tzinfo=datetime.strptime(
188
- self._data_source.timestamp_offset[:-2] + ':' + self._data_source.timestamp_offset[3:], '%z'
212
+ self._data_source.timestamp_offset[:-2]
213
+ + ":"
214
+ + self._data_source.timestamp_offset[3:],
215
+ "%z",
189
216
  ).tzinfo
190
217
  )
191
218
  except ValueError as e:
192
- logger.error(f'Failed to load data from data source: "{self._data_source.name}"')
219
+ logger.error(
220
+ f'Failed to load data from data source: "{self._data_source.name}"'
221
+ )
193
222
  raise TimestampParsingError(str(e)) from e
194
223
 
195
224
  return timestamp
@@ -213,47 +242,41 @@ class HydroServerETL:
213
242
  if datastream_id not in self._failed_datastreams and len(observations) > 0:
214
243
 
215
244
  logger.info(
216
- f'Loading observations from ' +
217
- f'{observations[0]["phenomenon_time"].strftime("%Y-%m-%dT%H:%M:%S%z")} to ' +
218
- f'{observations[-1]["phenomenon_time"].strftime("%Y-%m-%dT%H:%M:%S%z")} for datastream: ' +
219
- f'{str(datastream_id)} in data source "{self._data_source.name}".'
245
+ f"Loading observations from "
246
+ + f'{observations[0]["phenomenon_time"].strftime("%Y-%m-%dT%H:%M:%S%z")} to '
247
+ + f'{observations[-1]["phenomenon_time"].strftime("%Y-%m-%dT%H:%M:%S%z")} for datastream: '
248
+ + f'{str(datastream_id)} in data source "{self._data_source.name}".'
220
249
  )
221
250
 
222
- data_array_value = getattr(fsc.model, 'ext').data_array_value.DataArrayValue()
223
-
224
- datastream = fsc.Datastream(id=datastream_id)
225
- components = {data_array_value.Property.PHENOMENON_TIME, data_array_value.Property.RESULT}
226
-
227
- data_array_value.datastream = datastream
228
- data_array_value.components = components
229
-
230
- for observation in observations:
231
- data_array_value.add_observation(fsc.Observation(
232
- phenomenon_time=observation['phenomenon_time'].strftime('%Y-%m-%dT%H:%M:%S%z'),
233
- result=observation['result'],
234
- datastream=datastream
235
- ))
236
-
237
- data_array_document = getattr(fsc.model, 'ext').data_array_document.DataArrayDocument()
238
- data_array_document.add_data_array_value(data_array_value)
251
+ observations_df = pd.DataFrame(
252
+ [
253
+ [observation["phenomenon_time"], observation["result"]]
254
+ for observation in observations
255
+ ],
256
+ columns=["timestamp", "value"],
257
+ )
239
258
 
240
259
  try:
241
- self._service.sensorthings.observations().create(data_array_document)
242
- except KeyError:
260
+ self._service.datastreams.load_observations(
261
+ uid=datastream_id,
262
+ observations=observations_df,
263
+ )
264
+ except HTTPError:
243
265
  failed_datastreams.append(datastream_id)
244
266
 
245
267
  if not self._last_loaded_timestamp or (
246
- observations[-1]['phenomenon_time'] and observations[-1]['phenomenon_time'] >
247
- self._last_loaded_timestamp
268
+ observations[-1]["phenomenon_time"]
269
+ and observations[-1]["phenomenon_time"]
270
+ > self._last_loaded_timestamp
248
271
  ):
249
- self._last_loaded_timestamp = observations[-1]['phenomenon_time']
272
+ self._last_loaded_timestamp = observations[-1]["phenomenon_time"]
250
273
  elif datastream_id in self._failed_datastreams:
251
274
  logger.info(
252
- f'Skipping observations POST request from ' +
253
- f'{observations[0]["phenomenon_time"].strftime("%Y-%m-%dT%H:%M:%S%z")} to ' +
254
- f'{observations[-1]["phenomenon_time"].strftime("%Y-%m-%dT%H:%M:%S%z")} for datastream: ' +
255
- f'{str(datastream_id)} in data source "{self._data_source.name}",' +
256
- f'due to previous failed POST request.'
275
+ f"Skipping observations POST request from "
276
+ + f'{observations[0]["phenomenon_time"].strftime("%Y-%m-%dT%H:%M:%S%z")} to '
277
+ + f'{observations[-1]["phenomenon_time"].strftime("%Y-%m-%dT%H:%M:%S%z")} for datastream: '
278
+ + f'{str(datastream_id)} in data source "{self._data_source.name}",'
279
+ + f"due to previous failed POST request."
257
280
  )
258
281
 
259
282
  self._observations = {}
@@ -270,28 +293,28 @@ class HydroServerETL:
270
293
 
271
294
  if self._data_source.crontab is not None:
272
295
  next_sync = croniter.croniter(
273
- self._data_source.crontab,
274
- datetime.now()
296
+ self._data_source.crontab, datetime.now()
275
297
  ).get_next(datetime)
276
- elif self._data_source.interval is not None and self._data_source.interval_units is not None:
298
+ elif (
299
+ self._data_source.interval is not None
300
+ and self._data_source.interval_units is not None
301
+ ):
277
302
  next_sync = datetime.now() + timedelta(
278
303
  **{self._data_source.interval_units: self._data_source.interval}
279
304
  )
280
305
  else:
281
306
  next_sync = None
282
307
 
283
- updated_data_source = DataSourcePatchBody(
284
- data_source_thru=self._last_loaded_timestamp,
285
- last_sync_successful=(
286
- True if not self._file_timestamp_error and not self._file_header_error
287
- and len(self._failed_datastreams) == 0 else False
288
- ),
289
- last_sync_message=self._message,
290
- last_synced=datetime.now(timezone.utc),
291
- next_sync=next_sync
308
+ self._data_source.data_source_thru = self._last_loaded_timestamp
309
+ self._data_source.last_sync_successful = (
310
+ True
311
+ if not self._file_timestamp_error
312
+ and not self._file_header_error
313
+ and len(self._failed_datastreams) == 0
314
+ else False
292
315
  )
316
+ self._data_source.last_sync_message = self._message
317
+ self._data_source.last_synced = datetime.now(timezone.utc)
318
+ self._data_source.next_sync = next_sync
293
319
 
294
- self._service.data_sources.update(
295
- data_source_id=str(self._data_source.id),
296
- data_source_body=updated_data_source
297
- )
320
+ self._data_source.save()
@@ -0,0 +1 @@
1
+ from .service import HydroServerQualityControl, TimeUnit, FilterOperation, Operator