hydroserverpy 0.2.3__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hydroserverpy might be problematic. Click here for more details.
- hydroserverpy/__init__.py +6 -15
- hydroserverpy/core/endpoints/__init__.py +9 -0
- hydroserverpy/core/endpoints/base.py +133 -0
- hydroserverpy/core/endpoints/data_loaders.py +92 -0
- hydroserverpy/core/endpoints/data_sources.py +92 -0
- hydroserverpy/core/endpoints/datastreams.py +188 -0
- hydroserverpy/core/endpoints/observed_properties.py +93 -0
- hydroserverpy/core/endpoints/processing_levels.py +93 -0
- hydroserverpy/core/endpoints/result_qualifiers.py +93 -0
- hydroserverpy/core/endpoints/sensors.py +93 -0
- hydroserverpy/core/endpoints/things.py +240 -0
- hydroserverpy/core/endpoints/units.py +93 -0
- hydroserverpy/{components → core/schemas}/__init__.py +1 -2
- hydroserverpy/core/schemas/base.py +117 -0
- hydroserverpy/core/schemas/data_loaders.py +71 -0
- hydroserverpy/core/schemas/data_sources.py +206 -0
- hydroserverpy/core/schemas/datastreams.py +299 -0
- hydroserverpy/core/schemas/observed_properties.py +35 -0
- hydroserverpy/core/schemas/processing_levels.py +27 -0
- hydroserverpy/core/schemas/result_qualifiers.py +23 -0
- hydroserverpy/core/schemas/sensors.py +53 -0
- hydroserverpy/core/schemas/things.py +309 -0
- hydroserverpy/core/schemas/units.py +30 -0
- hydroserverpy/core/service.py +186 -0
- hydroserverpy/etl/__init__.py +0 -0
- hydroserverpy/{etl.py → etl/service.py} +32 -47
- hydroserverpy/quality/__init__.py +1 -0
- hydroserverpy/quality/service.py +391 -0
- {hydroserverpy-0.2.3.dist-info → hydroserverpy-0.3.0.dist-info}/METADATA +6 -3
- hydroserverpy-0.3.0.dist-info/RECORD +36 -0
- {hydroserverpy-0.2.3.dist-info → hydroserverpy-0.3.0.dist-info}/WHEEL +1 -1
- hydroserverpy/components/data_loaders.py +0 -67
- hydroserverpy/components/data_sources.py +0 -98
- hydroserverpy/components/datastreams.py +0 -47
- hydroserverpy/components/observed_properties.py +0 -48
- hydroserverpy/components/processing_levels.py +0 -48
- hydroserverpy/components/result_qualifiers.py +0 -48
- hydroserverpy/components/sensors.py +0 -48
- hydroserverpy/components/things.py +0 -48
- hydroserverpy/components/units.py +0 -48
- hydroserverpy/components/users.py +0 -28
- hydroserverpy/main.py +0 -62
- hydroserverpy/models.py +0 -218
- hydroserverpy/schemas/data_loaders.py +0 -27
- hydroserverpy/schemas/data_sources.py +0 -58
- hydroserverpy/schemas/datastreams.py +0 -56
- hydroserverpy/schemas/observed_properties.py +0 -33
- hydroserverpy/schemas/processing_levels.py +0 -33
- hydroserverpy/schemas/result_qualifiers.py +0 -32
- hydroserverpy/schemas/sensors.py +0 -39
- hydroserverpy/schemas/things.py +0 -108
- hydroserverpy/schemas/units.py +0 -32
- hydroserverpy/schemas/users.py +0 -28
- hydroserverpy/service.py +0 -170
- hydroserverpy/utils.py +0 -37
- hydroserverpy-0.2.3.dist-info/RECORD +0 -35
- /hydroserverpy/{schemas → core}/__init__.py +0 -0
- /hydroserverpy/{exceptions.py → etl/exceptions.py} +0 -0
- {hydroserverpy-0.2.3.dist-info → hydroserverpy-0.3.0.dist-info}/LICENSE +0 -0
- {hydroserverpy-0.2.3.dist-info → hydroserverpy-0.3.0.dist-info}/top_level.txt +0 -0
- {hydroserverpy-0.2.3.dist-info → hydroserverpy-0.3.0.dist-info}/zip-safe +0 -0
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
import csv
|
|
2
2
|
import logging
|
|
3
|
-
import frost_sta_client as fsc
|
|
4
3
|
import croniter
|
|
5
|
-
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from typing import IO, List, TYPE_CHECKING
|
|
6
|
+
from requests import HTTPError
|
|
6
7
|
from datetime import datetime, timezone, timedelta
|
|
7
8
|
from dateutil.parser import isoparse
|
|
8
|
-
from .schemas.data_sources import DataSourceGetResponse
|
|
9
|
-
from .schemas.datastreams import DatastreamGetResponse
|
|
10
9
|
from .exceptions import HeaderParsingError, TimestampParsingError
|
|
11
|
-
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from ..core.schemas import DataSource
|
|
12
13
|
|
|
13
14
|
logger = logging.getLogger('hydroserver_etl')
|
|
14
15
|
logger.addHandler(logging.NullHandler())
|
|
@@ -20,14 +21,13 @@ class HydroServerETL:
|
|
|
20
21
|
self,
|
|
21
22
|
service,
|
|
22
23
|
data_file: IO[str],
|
|
23
|
-
data_source:
|
|
24
|
-
datastreams: List[DatastreamGetResponse]
|
|
24
|
+
data_source: 'DataSource',
|
|
25
25
|
):
|
|
26
26
|
self._service = service
|
|
27
27
|
self._data_file = data_file
|
|
28
28
|
self._data_source = data_source
|
|
29
29
|
self._datastreams = {
|
|
30
|
-
datastream.
|
|
30
|
+
datastream.uid: datastream for datastream in data_source.datastreams
|
|
31
31
|
}
|
|
32
32
|
|
|
33
33
|
self._timestamp_column_index = None
|
|
@@ -109,16 +109,16 @@ class HydroServerETL:
|
|
|
109
109
|
timestamp = self._parse_row_timestamp(row)
|
|
110
110
|
|
|
111
111
|
for datastream in self._datastreams.values():
|
|
112
|
-
if str(datastream.
|
|
112
|
+
if str(datastream.uid) not in self._datastream_start_row_indexes.keys():
|
|
113
113
|
if not datastream.phenomenon_end_time or timestamp > datastream.phenomenon_end_time:
|
|
114
|
-
self._datastream_start_row_indexes[str(datastream.
|
|
114
|
+
self._datastream_start_row_indexes[str(datastream.uid)] = index
|
|
115
115
|
|
|
116
|
-
if str(datastream.
|
|
117
|
-
and self._datastream_start_row_indexes[str(datastream.
|
|
118
|
-
if str(datastream.
|
|
119
|
-
self._observations[str(datastream.
|
|
116
|
+
if str(datastream.uid) in self._datastream_start_row_indexes.keys() \
|
|
117
|
+
and self._datastream_start_row_indexes[str(datastream.uid)] <= index:
|
|
118
|
+
if str(datastream.uid) not in self._observations.keys():
|
|
119
|
+
self._observations[str(datastream.uid)] = []
|
|
120
120
|
|
|
121
|
-
self._observations[str(datastream.
|
|
121
|
+
self._observations[str(datastream.uid)].append({
|
|
122
122
|
'phenomenon_time': timestamp,
|
|
123
123
|
'result': row[self._datastream_column_indexes[datastream.data_source_column]]
|
|
124
124
|
})
|
|
@@ -219,27 +219,17 @@ class HydroServerETL:
|
|
|
219
219
|
f'{str(datastream_id)} in data source "{self._data_source.name}".'
|
|
220
220
|
)
|
|
221
221
|
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
data_array_value.datastream = datastream
|
|
228
|
-
data_array_value.components = components
|
|
229
|
-
|
|
230
|
-
for observation in observations:
|
|
231
|
-
data_array_value.add_observation(fsc.Observation(
|
|
232
|
-
phenomenon_time=observation['phenomenon_time'].strftime('%Y-%m-%dT%H:%M:%S%z'),
|
|
233
|
-
result=observation['result'],
|
|
234
|
-
datastream=datastream
|
|
235
|
-
))
|
|
236
|
-
|
|
237
|
-
data_array_document = getattr(fsc.model, 'ext').data_array_document.DataArrayDocument()
|
|
238
|
-
data_array_document.add_data_array_value(data_array_value)
|
|
222
|
+
observations_df = pd.DataFrame(
|
|
223
|
+
[[observation['phenomenon_time'], observation['result']] for observation in observations],
|
|
224
|
+
columns=['timestamp', 'value']
|
|
225
|
+
)
|
|
239
226
|
|
|
240
227
|
try:
|
|
241
|
-
self._service.
|
|
242
|
-
|
|
228
|
+
self._service.datastreams.load_observations(
|
|
229
|
+
uid=datastream_id,
|
|
230
|
+
observations=observations_df,
|
|
231
|
+
)
|
|
232
|
+
except HTTPError:
|
|
243
233
|
failed_datastreams.append(datastream_id)
|
|
244
234
|
|
|
245
235
|
if not self._last_loaded_timestamp or (
|
|
@@ -280,18 +270,13 @@ class HydroServerETL:
|
|
|
280
270
|
else:
|
|
281
271
|
next_sync = None
|
|
282
272
|
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
and len(self._failed_datastreams) == 0 else False
|
|
288
|
-
),
|
|
289
|
-
last_sync_message=self._message,
|
|
290
|
-
last_synced=datetime.now(timezone.utc),
|
|
291
|
-
next_sync=next_sync
|
|
273
|
+
self._data_source.data_source_thru = self._last_loaded_timestamp
|
|
274
|
+
self._data_source.last_sync_successful = (
|
|
275
|
+
True if not self._file_timestamp_error and not self._file_header_error
|
|
276
|
+
and len(self._failed_datastreams) == 0 else False
|
|
292
277
|
)
|
|
278
|
+
self._data_source.last_sync_message = self._message
|
|
279
|
+
self._data_source.last_synced = datetime.now(timezone.utc)
|
|
280
|
+
self._data_source.next_sync = next_sync
|
|
293
281
|
|
|
294
|
-
self.
|
|
295
|
-
data_source_id=str(self._data_source.id),
|
|
296
|
-
data_source_body=updated_data_source
|
|
297
|
-
)
|
|
282
|
+
self._data_source.save()
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .service import HydroServerQualityControl, TimeUnit, FilterOperation, Operator
|
|
@@ -0,0 +1,391 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import numpy as np
|
|
3
|
+
from uuid import UUID
|
|
4
|
+
from typing import Union, Optional, List, Tuple, Dict
|
|
5
|
+
from enum import Enum
|
|
6
|
+
|
|
7
|
+
# Tools
|
|
8
|
+
# [x] Interpolate
|
|
9
|
+
# [x] Set a value to a constant
|
|
10
|
+
# [x] Change value by applying arithmetic (+, -, *, /)
|
|
11
|
+
# [x] Shift
|
|
12
|
+
# [x] Drift correction (linear)
|
|
13
|
+
# [x] Delete values
|
|
14
|
+
# [x] Fill values
|
|
15
|
+
|
|
16
|
+
# Automation
|
|
17
|
+
# [x] Gap filling
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class TimeUnit(Enum):
|
|
21
|
+
"""Enumeration for time units."""
|
|
22
|
+
|
|
23
|
+
SECOND = 's'
|
|
24
|
+
MINUTE = 'm'
|
|
25
|
+
HOUR = 'h'
|
|
26
|
+
DAY = 'D'
|
|
27
|
+
WEEK = 'W'
|
|
28
|
+
MONTH = 'M'
|
|
29
|
+
YEAR = 'Y'
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class FilterOperation(Enum):
|
|
33
|
+
"""Enumeration for filter operations."""
|
|
34
|
+
|
|
35
|
+
LT = 'LT'
|
|
36
|
+
LTE = 'LTE'
|
|
37
|
+
GT = 'GT'
|
|
38
|
+
GTE = 'GTE'
|
|
39
|
+
E = 'E'
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class Operator(Enum):
|
|
43
|
+
"""Enumeration for mathematical operations."""
|
|
44
|
+
|
|
45
|
+
MULT = 'MULT'
|
|
46
|
+
DIV = 'DIV'
|
|
47
|
+
ADD = 'ADD'
|
|
48
|
+
SUB = 'SUB'
|
|
49
|
+
ASSIGN = 'ASSIGN'
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class HydroServerQualityControl:
|
|
53
|
+
"""
|
|
54
|
+
Quality control operations for HydroServer observations.
|
|
55
|
+
|
|
56
|
+
:param datastream_id: The ID of the datastream.
|
|
57
|
+
:type datastream_id: Union[UUID, str]
|
|
58
|
+
:param observations: DataFrame containing 'timestamp' and 'value' columns.
|
|
59
|
+
:type observations: pd.DataFrame
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
datastream_id: Union[UUID, str]
|
|
63
|
+
|
|
64
|
+
def __init__(self, datastream_id: Union[UUID, str], observations: pd.DataFrame) -> None:
|
|
65
|
+
|
|
66
|
+
assert 'timestamp' in observations.columns, "Observations must have a 'timestamp' column"
|
|
67
|
+
assert pd.api.types.is_datetime64_any_dtype(observations['timestamp']), \
|
|
68
|
+
"Observations 'timestamp' column must be of datetime type"
|
|
69
|
+
|
|
70
|
+
assert 'value' in observations.columns, "Observations must have a 'value' column"
|
|
71
|
+
assert pd.api.types.is_float_dtype(observations['value']), \
|
|
72
|
+
"Observations 'value' column must be of float type"
|
|
73
|
+
|
|
74
|
+
self.datastream_id = str(datastream_id)
|
|
75
|
+
self._df = observations
|
|
76
|
+
self._filtered_observations = None
|
|
77
|
+
|
|
78
|
+
@property
|
|
79
|
+
def observations(self) -> pd.DataFrame:
|
|
80
|
+
"""
|
|
81
|
+
Returns the observations DataFrame, filtered if a filter has been applied.
|
|
82
|
+
|
|
83
|
+
:return: Observations DataFrame.
|
|
84
|
+
:rtype: pd.DataFrame
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
if self._filtered_observations is None:
|
|
88
|
+
return self._df
|
|
89
|
+
|
|
90
|
+
return self._filtered_observations
|
|
91
|
+
|
|
92
|
+
###################
|
|
93
|
+
# Filters
|
|
94
|
+
###################
|
|
95
|
+
|
|
96
|
+
@staticmethod
|
|
97
|
+
def _has_filter(data_filter: Dict[str, Union[float, int]], key: FilterOperation) -> bool:
|
|
98
|
+
"""
|
|
99
|
+
Checks if a given filter operation exists in the filter dictionary.
|
|
100
|
+
|
|
101
|
+
:param data_filter: Dictionary containing the filters.
|
|
102
|
+
:type data_filter: Dict[str, Union[float, int]]
|
|
103
|
+
:param key: Filter operation to check for.
|
|
104
|
+
:type key: FilterOperation
|
|
105
|
+
:return: True if the filter operation exists, False otherwise.
|
|
106
|
+
:rtype: bool
|
|
107
|
+
"""
|
|
108
|
+
|
|
109
|
+
return (
|
|
110
|
+
key.value in data_filter and
|
|
111
|
+
(isinstance(data_filter[key.value], float) or isinstance(data_filter[key.value], int))
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
def filter(self, data_filter: Dict[str, Union[float, int]]) -> None:
|
|
115
|
+
"""
|
|
116
|
+
Executes the applied filters and returns the resulting DataFrame.
|
|
117
|
+
|
|
118
|
+
:param data_filter: Dictionary containing filter operations and their values.
|
|
119
|
+
:type data_filter: Dict[str, Union[float, int]]
|
|
120
|
+
"""
|
|
121
|
+
|
|
122
|
+
query = []
|
|
123
|
+
|
|
124
|
+
if self._has_filter(data_filter, FilterOperation.LT):
|
|
125
|
+
query.append(
|
|
126
|
+
f'`value` < {data_filter[FilterOperation.LT.value]}')
|
|
127
|
+
|
|
128
|
+
if self._has_filter(data_filter, FilterOperation.LTE):
|
|
129
|
+
query.append(
|
|
130
|
+
f'`value` <= {data_filter[FilterOperation.LTE.value]}')
|
|
131
|
+
|
|
132
|
+
if self._has_filter(data_filter, FilterOperation.GT):
|
|
133
|
+
query.append(
|
|
134
|
+
f'`value` > {data_filter[FilterOperation.GT.value]}')
|
|
135
|
+
|
|
136
|
+
if self._has_filter(data_filter, FilterOperation.GTE):
|
|
137
|
+
query.append(
|
|
138
|
+
f'`value` >= {data_filter[FilterOperation.GTE.value]}')
|
|
139
|
+
|
|
140
|
+
if self._has_filter(data_filter, FilterOperation.E):
|
|
141
|
+
query.append(
|
|
142
|
+
f'`value` == {data_filter[FilterOperation.E.value]}')
|
|
143
|
+
|
|
144
|
+
if len(query):
|
|
145
|
+
self._filtered_observations = self._df.query(" | ".join(query))
|
|
146
|
+
else:
|
|
147
|
+
self._filtered_observations = None
|
|
148
|
+
|
|
149
|
+
###################
|
|
150
|
+
# Gap Analysis
|
|
151
|
+
###################
|
|
152
|
+
|
|
153
|
+
def find_gaps(self, time_value: int, time_unit: str) -> pd.DataFrame:
|
|
154
|
+
"""
|
|
155
|
+
Identifies gaps in the observations based on the specified time value and unit.
|
|
156
|
+
|
|
157
|
+
:param time_value: The time value for detecting gaps.
|
|
158
|
+
:type time_value: int
|
|
159
|
+
:param time_unit: The unit of time (e.g., 's', 'm', 'h').
|
|
160
|
+
:type time_unit: str
|
|
161
|
+
:return: DataFrame containing the observations with gaps.
|
|
162
|
+
:rtype: pd.DataFrame
|
|
163
|
+
"""
|
|
164
|
+
|
|
165
|
+
return self.observations[self._df['timestamp'].diff() > np.timedelta64(time_value, time_unit)]
|
|
166
|
+
|
|
167
|
+
def fill_gap(self, gap: Tuple[int, str], fill: Tuple[int, str], interpolate_values: bool) -> pd.DataFrame:
|
|
168
|
+
"""
|
|
169
|
+
Fills identified gaps in the observations with placeholder values and optionally interpolates the values.
|
|
170
|
+
|
|
171
|
+
:param gap: Tuple containing the time value and unit for identifying gaps.
|
|
172
|
+
:type gap: Tuple[int, str]
|
|
173
|
+
:param fill: Tuple containing the time value and unit for filling gaps.
|
|
174
|
+
:type fill: Tuple[int, str]
|
|
175
|
+
:param interpolate_values: Whether to interpolate values for the filled gaps.
|
|
176
|
+
:type interpolate_values: bool
|
|
177
|
+
:return: DataFrame of points that filled the gaps.
|
|
178
|
+
:rtype: pd.DataFrame
|
|
179
|
+
"""
|
|
180
|
+
|
|
181
|
+
gaps_df = self.find_gaps(gap[0], gap[1])
|
|
182
|
+
time_gap = np.timedelta64(fill[0], fill[1])
|
|
183
|
+
points = []
|
|
184
|
+
index = []
|
|
185
|
+
added_index = []
|
|
186
|
+
|
|
187
|
+
for gap_row in gaps_df.iterrows():
|
|
188
|
+
gap_end_index = gap_row[0]
|
|
189
|
+
gap_start_index = gap_end_index - 1
|
|
190
|
+
|
|
191
|
+
gap_start_date = self._df.iloc[gap_start_index]['timestamp']
|
|
192
|
+
gap_end_date = self._df.iloc[gap_end_index]['timestamp']
|
|
193
|
+
|
|
194
|
+
start = gap_start_date + time_gap
|
|
195
|
+
end = gap_end_date
|
|
196
|
+
|
|
197
|
+
# Annotate the points that will fill this gap
|
|
198
|
+
while start < end:
|
|
199
|
+
points.append([start, -9999])
|
|
200
|
+
index.append(gap_start_index)
|
|
201
|
+
start = start + time_gap
|
|
202
|
+
|
|
203
|
+
if interpolate_values:
|
|
204
|
+
# Keep an index of the position where the points will end up
|
|
205
|
+
added_index.append(gap_start_index + len(added_index) + 1)
|
|
206
|
+
|
|
207
|
+
self.add_points(points, index)
|
|
208
|
+
|
|
209
|
+
if interpolate_values:
|
|
210
|
+
self.interpolate(added_index)
|
|
211
|
+
|
|
212
|
+
# Return the list of points that filled the gaps
|
|
213
|
+
return pd.DataFrame(
|
|
214
|
+
points, columns=['timestamp', 'value']
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
######################################
|
|
218
|
+
# Data point operations
|
|
219
|
+
######################################
|
|
220
|
+
|
|
221
|
+
def add_points(self, points: List[List[Union[str, float]]], index: Optional[List[int]] = None) -> None:
|
|
222
|
+
"""
|
|
223
|
+
Adds new points to the observations, optionally at specified indices.
|
|
224
|
+
|
|
225
|
+
:param points: List of points to be added.
|
|
226
|
+
:type points: List[List[Union[str, float]]]
|
|
227
|
+
:param index: Optional list of indices at which to insert the points.
|
|
228
|
+
:type index: Optional[List[int]]
|
|
229
|
+
"""
|
|
230
|
+
|
|
231
|
+
# If an index list was provided, insert the points to the DataFrame at the corresponding index.
|
|
232
|
+
# We do this by creating a dictionary of slices where the key is the index to insert at, and the value is an
|
|
233
|
+
# array of points to insert at that index
|
|
234
|
+
# We iterate through the dictionary keys in reverse order, so that we can insert without altering the position
|
|
235
|
+
# of elements before
|
|
236
|
+
if index is not None:
|
|
237
|
+
# This is the most efficient way to insert into a DataFrame for a large dataset.
|
|
238
|
+
|
|
239
|
+
# create a dictionary of points to insert at each index
|
|
240
|
+
slices = {}
|
|
241
|
+
for idx, value in enumerate(index):
|
|
242
|
+
if value not in slices:
|
|
243
|
+
slices[value] = []
|
|
244
|
+
|
|
245
|
+
slices[value].append(points[idx])
|
|
246
|
+
|
|
247
|
+
for s in sorted(slices.items(), reverse=True):
|
|
248
|
+
# Split DataFrame and insert new row.
|
|
249
|
+
idx = s[0] + 1
|
|
250
|
+
val = s[1]
|
|
251
|
+
df1 = self._df.iloc[:idx, :]
|
|
252
|
+
df2 = self._df.iloc[idx:, :]
|
|
253
|
+
|
|
254
|
+
points_df = pd.DataFrame(
|
|
255
|
+
val, columns=['timestamp', 'value'])
|
|
256
|
+
self._df = pd.concat([df1, points_df, df2]).reset_index(drop=True)
|
|
257
|
+
|
|
258
|
+
else:
|
|
259
|
+
# This way of inserting is not as efficient, but performance should be good enough given that the existing
|
|
260
|
+
# data in the DataFrame is pre-sorted.
|
|
261
|
+
|
|
262
|
+
# Create a new dataframe with the points
|
|
263
|
+
points_df = pd.DataFrame(
|
|
264
|
+
points, columns=['timestamp', 'value'])
|
|
265
|
+
|
|
266
|
+
# Concatenate both dataframes. New rows will be at the end.
|
|
267
|
+
self._df = pd.concat([self._df, points_df])
|
|
268
|
+
|
|
269
|
+
# Sort and reset index
|
|
270
|
+
self._df = self._df.sort_values('timestamp')
|
|
271
|
+
self._df.reset_index(drop=True, inplace=True)
|
|
272
|
+
|
|
273
|
+
def change_values(self, index_list: List[int], operator: str, value: Union[int, float]) -> None:
|
|
274
|
+
"""
|
|
275
|
+
Changes the values of observations based on the specified operator and value.
|
|
276
|
+
|
|
277
|
+
:param index_list: List of indices for which values will be changed.
|
|
278
|
+
:type index_list: List[int]
|
|
279
|
+
:param operator: The operation to perform ('MULT', 'DIV', 'ADD', 'SUB', 'ASSIGN').
|
|
280
|
+
:type operator: str
|
|
281
|
+
:param value: The value to use in the operation.
|
|
282
|
+
:type value: Union[int, float]
|
|
283
|
+
"""
|
|
284
|
+
|
|
285
|
+
def operation(x):
|
|
286
|
+
if operator == Operator.MULT.value:
|
|
287
|
+
return x * value
|
|
288
|
+
elif operator == Operator.DIV.value:
|
|
289
|
+
if value == 0:
|
|
290
|
+
print("Error: cannot divide by 0")
|
|
291
|
+
return x
|
|
292
|
+
return x / value
|
|
293
|
+
elif operator == Operator.ADD.value:
|
|
294
|
+
return x + value
|
|
295
|
+
elif operator == Operator.SUB.value:
|
|
296
|
+
return x - value
|
|
297
|
+
elif operator == Operator.ASSIGN.value:
|
|
298
|
+
return value
|
|
299
|
+
else:
|
|
300
|
+
return x
|
|
301
|
+
|
|
302
|
+
self._df.loc[index_list, 'value'] = self._df.loc[index_list, 'value'].apply(operation)
|
|
303
|
+
|
|
304
|
+
def delete_points(self, index_list: List[int]) -> None:
|
|
305
|
+
"""
|
|
306
|
+
Deletes points from the observations at the specified indices.
|
|
307
|
+
|
|
308
|
+
:param index_list: List of indices for which points will be deleted.
|
|
309
|
+
:type index_list: List[int]
|
|
310
|
+
"""
|
|
311
|
+
|
|
312
|
+
self._df.drop(index=index_list, inplace=True)
|
|
313
|
+
self._df.reset_index(drop=True, inplace=True)
|
|
314
|
+
|
|
315
|
+
def shift_points(self, index_list: List[int], time_value: int, time_unit: str) -> None:
|
|
316
|
+
"""
|
|
317
|
+
Shifts the timestamps of the observations at the specified indices by a given time value and unit.
|
|
318
|
+
|
|
319
|
+
:param index_list: List of indices where timestamps will be shifted.
|
|
320
|
+
:type index_list: List[int]
|
|
321
|
+
:param time_value: The amount of time to shift the timestamps.
|
|
322
|
+
:type time_value: int
|
|
323
|
+
:param time_unit: The unit of time (e.g., 's' for seconds, 'm' for minutes).
|
|
324
|
+
:type time_unit: str
|
|
325
|
+
"""
|
|
326
|
+
|
|
327
|
+
shift_value = np.timedelta64(time_value, time_unit)
|
|
328
|
+
condition = self._df.index.isin(index_list)
|
|
329
|
+
|
|
330
|
+
# Apply the shift
|
|
331
|
+
self._df.loc[condition, 'timestamp'] = self._df.loc[condition, 'timestamp'] + shift_value
|
|
332
|
+
self._df = self._df.sort_values('timestamp')
|
|
333
|
+
self._df.reset_index(drop=True, inplace=True)
|
|
334
|
+
|
|
335
|
+
def interpolate(self, index_list: List[int]) -> None:
|
|
336
|
+
"""
|
|
337
|
+
Interpolates the values of observations at the specified indices using linear interpolation.
|
|
338
|
+
|
|
339
|
+
:param index_list: List of indices where values will be interpolated.
|
|
340
|
+
:type index_list: list[int]
|
|
341
|
+
"""
|
|
342
|
+
|
|
343
|
+
condition = self._df.index.isin(index_list)
|
|
344
|
+
self._df['value'].mask(condition, inplace=True)
|
|
345
|
+
self._df['value'].interpolate(method='linear', inplace=True)
|
|
346
|
+
|
|
347
|
+
def drift_correction(self, start: int, end: int, gap_width: float) -> pd.DataFrame:
|
|
348
|
+
"""
|
|
349
|
+
Applies drift correction to the values of observations within the specified index range.
|
|
350
|
+
|
|
351
|
+
:param start: Start index of the range to apply drift correction.
|
|
352
|
+
:type start: int
|
|
353
|
+
:param end: End index of the range to apply drift correction.
|
|
354
|
+
:type end: int
|
|
355
|
+
:param gap_width: The width of the drift gap to correct.
|
|
356
|
+
:type gap_width: float
|
|
357
|
+
:return: DataFrame after applying drift correction.
|
|
358
|
+
:rtype: pd.DataFrame
|
|
359
|
+
"""
|
|
360
|
+
|
|
361
|
+
# validate range
|
|
362
|
+
if start >= end:
|
|
363
|
+
print('Start and end index cannot overlap')
|
|
364
|
+
return self._df
|
|
365
|
+
elif end > len(self._df) - 1:
|
|
366
|
+
print('End index out of range')
|
|
367
|
+
return self._df
|
|
368
|
+
elif start < 0:
|
|
369
|
+
print('Start index must be greater than or equal to 0')
|
|
370
|
+
return self._df
|
|
371
|
+
|
|
372
|
+
points = self._df.iloc[start:end + 1]
|
|
373
|
+
start_date = points.iloc[0]['timestamp']
|
|
374
|
+
end_date = points.iloc[-1]['timestamp']
|
|
375
|
+
|
|
376
|
+
x_l = (end_date - start_date).total_seconds()
|
|
377
|
+
ndv = -9999
|
|
378
|
+
# y_n = y_0 + G(x_i / x_l)
|
|
379
|
+
|
|
380
|
+
def f(row):
|
|
381
|
+
if row['value'] != ndv:
|
|
382
|
+
return (
|
|
383
|
+
row['value'] +
|
|
384
|
+
(gap_width * ((row['timestamp'] - start_date).total_seconds() / x_l))
|
|
385
|
+
)
|
|
386
|
+
else:
|
|
387
|
+
return row['value']
|
|
388
|
+
|
|
389
|
+
self._df.loc[points.index, 'value'] = points.apply(f, axis=1)
|
|
390
|
+
|
|
391
|
+
return self._df
|
|
@@ -1,15 +1,18 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: hydroserverpy
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Requires-Python: <4,>=3.9
|
|
4
5
|
License-File: LICENSE
|
|
5
6
|
Requires-Dist: requests >=2
|
|
6
|
-
Requires-Dist: pydantic
|
|
7
|
+
Requires-Dist: pydantic >=2.6
|
|
8
|
+
Requires-Dist: pandas >=2.2
|
|
9
|
+
Requires-Dist: numpy >=2.0
|
|
7
10
|
Requires-Dist: pyyaml >=5
|
|
8
11
|
Requires-Dist: simplejson >=3
|
|
9
12
|
Requires-Dist: crontab >=1
|
|
10
13
|
Requires-Dist: python-dateutil >=2.8.2
|
|
11
14
|
Requires-Dist: croniter >=2.0.1
|
|
12
|
-
Requires-Dist:
|
|
15
|
+
Requires-Dist: country-list >=1.1.0
|
|
13
16
|
Provides-Extra: docs
|
|
14
17
|
Requires-Dist: sphinx-autodoc-typehints ; extra == 'docs'
|
|
15
18
|
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
hydroserverpy/__init__.py,sha256=oVxU_pSr5RZx4s5UHYYNCrjXQAP5lujlosB2a3qG260,223
|
|
2
|
+
hydroserverpy/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
+
hydroserverpy/core/service.py,sha256=ch6rwNM8oM3L-9phE474grofCR3QkTQ8AopkMyn9sjo,5339
|
|
4
|
+
hydroserverpy/core/endpoints/__init__.py,sha256=SpN1WLX0R8HOLHBv1cp3VZcIEiKD4BBbgFGQppdA_mw,404
|
|
5
|
+
hydroserverpy/core/endpoints/base.py,sha256=Ay0a8rucNo9x8h1MMMuMaISkBAgc9DK_kouLGtnHPjk,4925
|
|
6
|
+
hydroserverpy/core/endpoints/data_loaders.py,sha256=boUdmn9_9QDJS38nBXwKXYiX_We_rfK1OFjCU8ptQUE,3009
|
|
7
|
+
hydroserverpy/core/endpoints/data_sources.py,sha256=c9hwlFUUY-Ae84TrlB2boLjKIIhN_fzxHSYvPSb9PKA,3007
|
|
8
|
+
hydroserverpy/core/endpoints/datastreams.py,sha256=z8NX21L2qcj07Cotm5sTT3GrNEV2jS0-AjLQDRHeRq8,6899
|
|
9
|
+
hydroserverpy/core/endpoints/observed_properties.py,sha256=7_BtjsCyyGaQ1EMhlQXhWupTs85q7i85LdRLejVUb9o,3309
|
|
10
|
+
hydroserverpy/core/endpoints/processing_levels.py,sha256=pDolo_BqkSufs3w_ykRbsg-93xWME7vYIA6BAwzdT7A,3286
|
|
11
|
+
hydroserverpy/core/endpoints/result_qualifiers.py,sha256=977r3Vf6Y3kkoRcHQwTbQPSCxxeWsJkcDCPhFI30Tss,3301
|
|
12
|
+
hydroserverpy/core/endpoints/sensors.py,sha256=niF6eY8dFdfowqW_dLq9-7CPolWO__lFwElMDJTYtsM,3122
|
|
13
|
+
hydroserverpy/core/endpoints/things.py,sha256=n0nSq2AgJ7zFwAerkTtYlFvc1EzgUfHsu_nNTcVxqr0,8357
|
|
14
|
+
hydroserverpy/core/endpoints/units.py,sha256=UZKizV8ZLMCPZQ0ol5U8UON_MB80uhxL2PUwDbtQLXA,3084
|
|
15
|
+
hydroserverpy/core/schemas/__init__.py,sha256=gUFAim1jcUWAvj0IqpR8UKaCdXT1v11SmFnOzEmcwJM,353
|
|
16
|
+
hydroserverpy/core/schemas/base.py,sha256=k5ctiVXLm5A0TI-Nkz9l3oDGsq16B3yKcQgnZcJVDds,3683
|
|
17
|
+
hydroserverpy/core/schemas/data_loaders.py,sha256=BJv0-IUkgsNisuwyOooUAHaAv1DLCF5SVVnBjpfahYM,2470
|
|
18
|
+
hydroserverpy/core/schemas/data_sources.py,sha256=oearfow10p-JJ5d_3-S2IEJ4039d4SMQlVA0N4dNOH0,8058
|
|
19
|
+
hydroserverpy/core/schemas/datastreams.py,sha256=OEVc7xMVaFSHvLMyunt2g0u6xAfv5xg0fO6Uz8VjOSU,11413
|
|
20
|
+
hydroserverpy/core/schemas/observed_properties.py,sha256=1ibOAYbsf1DQAos68V8KEmvBUoS2WO73dd5MhSWCC90,1161
|
|
21
|
+
hydroserverpy/core/schemas/processing_levels.py,sha256=K4N5by5bfSD3iPcx6Jv1x-xOPxW3_gwhZcEfaqwSDmQ,867
|
|
22
|
+
hydroserverpy/core/schemas/result_qualifiers.py,sha256=ejUMPCDOq3cuq3a6j2uvQNRdVNEVQ1jJAacYv-o9NZI,722
|
|
23
|
+
hydroserverpy/core/schemas/sensors.py,sha256=8nXSEyD1kGAL2Wi76kHvjXNSgh2VFTfMgBkFP8ACsZw,1835
|
|
24
|
+
hydroserverpy/core/schemas/things.py,sha256=Pkr3bIo4VjLML7o53_R2lXwBmx0Z7aoGlvJKC9Diw60,11139
|
|
25
|
+
hydroserverpy/core/schemas/units.py,sha256=EB4HATC-C2RxPC0EpcVN--aQa6nl1O9Lj1jkpXlJ-8Y,826
|
|
26
|
+
hydroserverpy/etl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
27
|
+
hydroserverpy/etl/exceptions.py,sha256=0UY8YUlNepG0y6FfH36hJyR1bOhwYHSZIdUSSMTg7GA,314
|
|
28
|
+
hydroserverpy/etl/service.py,sha256=g-zp89KK4MDIdcTWQDNKbCg485ByAg0Ui01ooFldxhI,12414
|
|
29
|
+
hydroserverpy/quality/__init__.py,sha256=GGBMkFSXciJLYrbV-NraFrj_mXWCy_GTcy9KKrKXU4c,84
|
|
30
|
+
hydroserverpy/quality/service.py,sha256=3y6hjt1jcRcQSx9xRw37YLFBNCch488MHhn-p5nmR6E,13830
|
|
31
|
+
hydroserverpy-0.3.0.dist-info/LICENSE,sha256=xVqFxDw3QOEJukakL7gQCqIMTQ1dlSCTo6Oc1otNW80,1508
|
|
32
|
+
hydroserverpy-0.3.0.dist-info/METADATA,sha256=8n1ZsmvItJmbNl7EgTtOGE4zr-MqY4xE-H5CUMZ3zgU,488
|
|
33
|
+
hydroserverpy-0.3.0.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
|
34
|
+
hydroserverpy-0.3.0.dist-info/top_level.txt,sha256=Zf37hrncXLOYvXhgCrf5mZdeq81G9fShdE2LfYbtb7w,14
|
|
35
|
+
hydroserverpy-0.3.0.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
36
|
+
hydroserverpy-0.3.0.dist-info/RECORD,,
|
|
@@ -1,67 +0,0 @@
|
|
|
1
|
-
from typing import List
|
|
2
|
-
from uuid import UUID
|
|
3
|
-
from hydroserverpy.schemas.data_loaders import DataLoaderGetResponse, DataLoaderPostBody, DataLoaderPatchBody
|
|
4
|
-
from hydroserverpy.schemas.data_sources import DataSourceGetResponse
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class DataLoader:
|
|
8
|
-
|
|
9
|
-
def __init__(self, service):
|
|
10
|
-
self._service = service
|
|
11
|
-
|
|
12
|
-
def list(self):
|
|
13
|
-
|
|
14
|
-
return self._service.get(
|
|
15
|
-
'data-loaders',
|
|
16
|
-
response_schema=List[DataLoaderGetResponse]
|
|
17
|
-
)
|
|
18
|
-
|
|
19
|
-
def list_data_sources(self, data_loader_id: UUID):
|
|
20
|
-
|
|
21
|
-
return self._service.get(
|
|
22
|
-
f'data-loaders/{data_loader_id}/data-sources',
|
|
23
|
-
response_schema=List[DataSourceGetResponse]
|
|
24
|
-
)
|
|
25
|
-
|
|
26
|
-
def get(self, data_loader_id: UUID):
|
|
27
|
-
|
|
28
|
-
return self._service.get(
|
|
29
|
-
f'data-loaders/{str(data_loader_id)}',
|
|
30
|
-
response_schema=DataLoaderGetResponse
|
|
31
|
-
)
|
|
32
|
-
|
|
33
|
-
def create(self, data_loader_body: DataLoaderPostBody):
|
|
34
|
-
|
|
35
|
-
return self._service.post(
|
|
36
|
-
f'data-loaders',
|
|
37
|
-
headers={'Content-type': 'application/json'},
|
|
38
|
-
data=data_loader_body.json(by_alias=True),
|
|
39
|
-
response_schema=DataLoaderGetResponse
|
|
40
|
-
)
|
|
41
|
-
|
|
42
|
-
def update(self, data_loader_id: UUID, data_loader_body: DataLoaderPatchBody):
|
|
43
|
-
|
|
44
|
-
return self._service.patch(
|
|
45
|
-
f'data-loaders/{str(data_loader_id)}',
|
|
46
|
-
headers={'Content-type': 'application/json'},
|
|
47
|
-
data=data_loader_body.json(exclude_unset=True, by_alias=True),
|
|
48
|
-
response_schema=DataLoaderGetResponse
|
|
49
|
-
)
|
|
50
|
-
|
|
51
|
-
def delete(self, data_loader_id: UUID):
|
|
52
|
-
|
|
53
|
-
return self._service.delete(
|
|
54
|
-
f'data-loaders/{str(data_loader_id)}'
|
|
55
|
-
)
|
|
56
|
-
|
|
57
|
-
def load_data(self, data_loader_id: str):
|
|
58
|
-
|
|
59
|
-
data_sources_response = self.list_data_sources(data_loader_id=UUID(data_loader_id))
|
|
60
|
-
|
|
61
|
-
if data_sources_response.data:
|
|
62
|
-
data_sources = data_sources_response.data
|
|
63
|
-
else:
|
|
64
|
-
return None
|
|
65
|
-
|
|
66
|
-
for data_source in data_sources:
|
|
67
|
-
self._service.data_sources.load_data(data_source_id=data_source.id)
|