hydroserverpy 0.3.0__py3-none-any.whl → 0.5.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hydroserverpy might be problematic. Click here for more details.

Files changed (83) hide show
  1. hydroserverpy/__init__.py +3 -4
  2. hydroserverpy/api/http.py +24 -0
  3. hydroserverpy/api/main.py +152 -0
  4. hydroserverpy/api/models/__init__.py +18 -0
  5. hydroserverpy/api/models/base.py +74 -0
  6. hydroserverpy/api/models/etl/__init__.py +0 -0
  7. hydroserverpy/api/models/iam/__init__.py +0 -0
  8. hydroserverpy/api/models/iam/account.py +12 -0
  9. hydroserverpy/api/models/iam/collaborator.py +34 -0
  10. hydroserverpy/api/models/iam/role.py +10 -0
  11. hydroserverpy/api/models/iam/workspace.py +203 -0
  12. hydroserverpy/api/models/sta/__init__.py +0 -0
  13. hydroserverpy/api/models/sta/datastream.py +336 -0
  14. hydroserverpy/api/models/sta/observed_property.py +72 -0
  15. hydroserverpy/api/models/sta/processing_level.py +50 -0
  16. hydroserverpy/api/models/sta/result_qualifier.py +49 -0
  17. hydroserverpy/api/models/sta/sensor.py +105 -0
  18. hydroserverpy/api/models/sta/thing.py +217 -0
  19. hydroserverpy/api/models/sta/unit.py +49 -0
  20. hydroserverpy/api/services/__init__.py +8 -0
  21. hydroserverpy/api/services/base.py +92 -0
  22. hydroserverpy/api/services/etl/__init__.py +0 -0
  23. hydroserverpy/api/services/iam/__init__.py +0 -0
  24. hydroserverpy/api/services/iam/workspace.py +126 -0
  25. hydroserverpy/api/services/sta/__init__.py +0 -0
  26. hydroserverpy/api/services/sta/datastream.py +354 -0
  27. hydroserverpy/api/services/sta/observed_property.py +98 -0
  28. hydroserverpy/api/services/sta/processing_level.py +78 -0
  29. hydroserverpy/api/services/sta/result_qualifier.py +74 -0
  30. hydroserverpy/api/services/sta/sensor.py +116 -0
  31. hydroserverpy/api/services/sta/thing.py +188 -0
  32. hydroserverpy/api/services/sta/unit.py +82 -0
  33. hydroserverpy/etl/__init__.py +21 -0
  34. hydroserverpy/etl/extractors/__init__.py +0 -0
  35. hydroserverpy/etl/extractors/base.py +13 -0
  36. hydroserverpy/etl/extractors/ftp_extractor.py +50 -0
  37. hydroserverpy/etl/extractors/http_extractor.py +84 -0
  38. hydroserverpy/etl/extractors/local_file_extractor.py +25 -0
  39. hydroserverpy/etl/hydroserver_etl.py +40 -0
  40. hydroserverpy/etl/loaders/__init__.py +0 -0
  41. hydroserverpy/etl/loaders/base.py +13 -0
  42. hydroserverpy/etl/loaders/hydroserver_loader.py +68 -0
  43. hydroserverpy/etl/transformers/__init__.py +0 -0
  44. hydroserverpy/etl/transformers/base.py +52 -0
  45. hydroserverpy/etl/transformers/csv_transformer.py +88 -0
  46. hydroserverpy/etl/transformers/json_transformer.py +62 -0
  47. hydroserverpy/etl/types.py +7 -0
  48. hydroserverpy/etl_csv/__init__.py +0 -0
  49. hydroserverpy/{etl/service.py → etl_csv/hydroserver_etl_csv.py} +93 -55
  50. hydroserverpy/quality/service.py +84 -70
  51. hydroserverpy-0.5.0b1.dist-info/METADATA +19 -0
  52. hydroserverpy-0.5.0b1.dist-info/RECORD +59 -0
  53. {hydroserverpy-0.3.0.dist-info → hydroserverpy-0.5.0b1.dist-info}/WHEEL +1 -1
  54. hydroserverpy/core/endpoints/__init__.py +0 -9
  55. hydroserverpy/core/endpoints/base.py +0 -133
  56. hydroserverpy/core/endpoints/data_loaders.py +0 -92
  57. hydroserverpy/core/endpoints/data_sources.py +0 -92
  58. hydroserverpy/core/endpoints/datastreams.py +0 -188
  59. hydroserverpy/core/endpoints/observed_properties.py +0 -93
  60. hydroserverpy/core/endpoints/processing_levels.py +0 -93
  61. hydroserverpy/core/endpoints/result_qualifiers.py +0 -93
  62. hydroserverpy/core/endpoints/sensors.py +0 -93
  63. hydroserverpy/core/endpoints/things.py +0 -240
  64. hydroserverpy/core/endpoints/units.py +0 -93
  65. hydroserverpy/core/schemas/__init__.py +0 -9
  66. hydroserverpy/core/schemas/base.py +0 -117
  67. hydroserverpy/core/schemas/data_loaders.py +0 -71
  68. hydroserverpy/core/schemas/data_sources.py +0 -206
  69. hydroserverpy/core/schemas/datastreams.py +0 -299
  70. hydroserverpy/core/schemas/observed_properties.py +0 -35
  71. hydroserverpy/core/schemas/processing_levels.py +0 -27
  72. hydroserverpy/core/schemas/result_qualifiers.py +0 -23
  73. hydroserverpy/core/schemas/sensors.py +0 -53
  74. hydroserverpy/core/schemas/things.py +0 -309
  75. hydroserverpy/core/schemas/units.py +0 -30
  76. hydroserverpy/core/service.py +0 -186
  77. hydroserverpy-0.3.0.dist-info/METADATA +0 -18
  78. hydroserverpy-0.3.0.dist-info/RECORD +0 -36
  79. /hydroserverpy/{core → api}/__init__.py +0 -0
  80. /hydroserverpy/{etl → etl_csv}/exceptions.py +0 -0
  81. {hydroserverpy-0.3.0.dist-info → hydroserverpy-0.5.0b1.dist-info/licenses}/LICENSE +0 -0
  82. {hydroserverpy-0.3.0.dist-info → hydroserverpy-0.5.0b1.dist-info}/top_level.txt +0 -0
  83. {hydroserverpy-0.3.0.dist-info → hydroserverpy-0.5.0b1.dist-info}/zip-safe +0 -0
@@ -0,0 +1,68 @@
1
+ from hydroserverpy import HydroServer
2
+ from typing import Dict, Optional
3
+ from .base import Loader
4
+ import logging
5
+ import pandas as pd
6
+
7
+
8
+ class HydroServerLoader(HydroServer, Loader):
9
+ """
10
+ A class that extends the HydroServer client with ETL-specific functionalities.
11
+ """
12
+
13
+ def __init__(
14
+ self,
15
+ host: str,
16
+ username: Optional[str] = None,
17
+ password: Optional[str] = None,
18
+ apikey: Optional[str] = None,
19
+ api_route: str = "api",
20
+ ):
21
+ super().__init__(host, username, password, apikey, api_route)
22
+
23
+ def load(self, data: pd.DataFrame, source_target_map) -> None:
24
+ """
25
+ Load observations from a DataFrame to the HydroServer.
26
+
27
+ :param data: A Pandas DataFrame where each column corresponds to a datastream.
28
+ """
29
+ data_requirements = self.get_data_requirements(source_target_map)
30
+ for ds_id in data.columns:
31
+ if ds_id == "timestamp":
32
+ continue
33
+
34
+ df = data[["timestamp", ds_id]].copy()
35
+ df.rename(columns={ds_id: "value"}, inplace=True)
36
+ df.dropna(subset=["value"], inplace=True)
37
+
38
+ phenomenon_end_time = data_requirements[ds_id]["start_time"]
39
+ if phenomenon_end_time:
40
+ df = df[df["timestamp"] > phenomenon_end_time]
41
+ if df.empty:
42
+ logging.warning(
43
+ f"No new data to upload for datastream {ds_id}. Skipping."
44
+ )
45
+ continue
46
+ self.datastreams.load_observations(uid=ds_id, observations=df)
47
+
48
+ def get_data_requirements(
49
+ self, source_target_map
50
+ ) -> Dict[str, Dict[str, pd.Timestamp]]:
51
+ """
52
+ Each target system needs to be able to answer the question: 'What data do you need?'
53
+ and return a time range for each target time series. Usually the answer will be
54
+ 'anything newer than my most recent observation'.
55
+ """
56
+ data_requirements = {}
57
+ for ds_id in source_target_map.values():
58
+ datastream = self.datastreams.get(uid=ds_id)
59
+ if not datastream:
60
+ message = "Couldn't fetch target datastream. ETL process aborted."
61
+ logging.error(message)
62
+ raise message
63
+ start_time = pd.Timestamp(
64
+ datastream.phenomenon_end_time or "1970-01-01T00:00:00Z"
65
+ )
66
+ end_time = pd.Timestamp.now()
67
+ data_requirements[ds_id] = {"start_time": start_time, "end_time": end_time}
68
+ return data_requirements
File without changes
@@ -0,0 +1,52 @@
1
+ from abc import ABC, abstractmethod
2
+ import logging
3
+ import pandas as pd
4
+
5
+
6
+ class Transformer(ABC):
7
+ @abstractmethod
8
+ def transform(self, *args, **kwargs) -> None:
9
+ pass
10
+
11
+ @property
12
+ def needs_datastreams(self) -> bool:
13
+ return False
14
+
15
+ @staticmethod
16
+ def standardize_dataframe(
17
+ df,
18
+ datastream_ids,
19
+ timestamp_column: str = "timestamp",
20
+ timestamp_format: str = "ISO8601",
21
+ ):
22
+ df.rename(
23
+ columns={timestamp_column: "timestamp", **datastream_ids},
24
+ inplace=True,
25
+ )
26
+
27
+ # Verify timestamp column is present in the DataFrame
28
+ if "timestamp" not in df.columns:
29
+ message = f"Timestamp column '{timestamp_column}' not found in data."
30
+ logging.error(message)
31
+ raise ValueError(message)
32
+
33
+ # Verify that all datastream_ids are present in the DataFrame
34
+ expected_columns = set(datastream_ids.values())
35
+ actual_columns = set(df.columns)
36
+ missing_datastream_ids = expected_columns - actual_columns
37
+
38
+ if missing_datastream_ids:
39
+ raise ValueError(
40
+ "The following datastream IDs are specified in the config file but their related keys could not be "
41
+ f"found in the source system's extracted data: {missing_datastream_ids}"
42
+ )
43
+
44
+ # Keep only 'timestamp' and datastream_id columns
45
+ columns_to_keep = ["timestamp"] + list(expected_columns)
46
+ df = df[columns_to_keep]
47
+
48
+ # Convert timestamp column to datetime if not already
49
+ if not pd.api.types.is_datetime64_any_dtype(df["timestamp"]):
50
+ df["timestamp"] = pd.to_datetime(df["timestamp"], format=timestamp_format)
51
+
52
+ return df
@@ -0,0 +1,88 @@
1
+ import logging
2
+ import pandas as pd
3
+ from typing import Dict, Optional, Union
4
+ from .base import Transformer
5
+
6
+
7
+ class CSVTransformer(Transformer):
8
+ def __init__(
9
+ self,
10
+ header_row: Optional[int],
11
+ data_start_row: int,
12
+ timestamp_column: Union[str, int],
13
+ datastream_ids: Dict[Union[str, int], str],
14
+ delimiter: Optional[str] = ",",
15
+ timestamp_format: Optional[str] = "ISO8601",
16
+ ):
17
+ # Pandas is zero-based while CSV is one-based so convert
18
+ self.header_row = None if header_row is None else header_row - 1
19
+ self.data_start_row = data_start_row - 1
20
+ self.timestamp_column = self.convert_to_zero_based(timestamp_column)
21
+ self.datastream_ids = datastream_ids
22
+ self.timestamp_format = timestamp_format
23
+ self.delimiter = delimiter
24
+
25
+ def transform(self, data_file) -> Union[pd.DataFrame, None]:
26
+ """
27
+ Transforms a CSV file-like object into a Pandas DataFrame where the column
28
+ names are replaced with their target datastream ids.
29
+
30
+ Parameters:
31
+ data_file: File-like object containing CSV data.
32
+ Returns:
33
+ observations_map (dict): Dict mapping datastream IDs to pandas DataFrames.
34
+ """
35
+
36
+ try:
37
+ df = pd.read_csv(
38
+ data_file,
39
+ delimiter=self.delimiter,
40
+ header=self.header_row,
41
+ parse_dates=[self.timestamp_column],
42
+ date_format=self.timestamp_format,
43
+ skiprows=self.calculate_skiprows(),
44
+ usecols=[self.timestamp_column] + list(self.datastream_ids.keys()),
45
+ )
46
+ except Exception as e:
47
+ logging.error(f"Error reading CSV data: {e}")
48
+ return None
49
+
50
+ if self.header_row is None:
51
+ df.columns = list(range(1, len(df.columns) + 1))
52
+
53
+ return self.standardize_dataframe(
54
+ df, self.datastream_ids, self.timestamp_column, self.timestamp_format
55
+ )
56
+
57
+ def calculate_skiprows(self):
58
+ """
59
+ Calculates the skiprows parameter for pd.read_csv.
60
+
61
+ Returns:
62
+ skiprows (list or None): List of row indices to skip, or None if no rows need to be skipped.
63
+ Raises:
64
+ ValueError: If header_row is not compatible with data_start_row.
65
+ """
66
+ if self.data_start_row == 0:
67
+ if self.header_row is not None:
68
+ # Cannot have a header row if data starts at the first row
69
+ raise ValueError(
70
+ "header_row must be None when data_start_row is 1 (first row)"
71
+ )
72
+ return None # No rows to skip
73
+
74
+ skiprows = list(range(self.data_start_row))
75
+
76
+ if self.header_row is not None:
77
+ if self.header_row >= self.data_start_row:
78
+ raise ValueError("header_row must be less than data_start_row")
79
+ if self.header_row in skiprows:
80
+ # Do not skip the header row
81
+ skiprows.remove(self.header_row)
82
+ return skiprows
83
+
84
+ @staticmethod
85
+ def convert_to_zero_based(index: Union[str, int]) -> Union[str, int]:
86
+ if isinstance(index, int):
87
+ return index - 1
88
+ return index
@@ -0,0 +1,62 @@
1
+ import logging
2
+ import pandas as pd
3
+ from typing import Dict, Optional, Any, List
4
+ from .base import Transformer
5
+ import json
6
+ import jmespath
7
+
8
+
9
+ class JSONTransformer(Transformer):
10
+ def __init__(
11
+ self,
12
+ query_string: str,
13
+ datastream_ids: Dict[str, str],
14
+ timestamp_format: Optional[str] = "ISO8601",
15
+ ):
16
+ """
17
+ Initializes the JSONTransformer.
18
+
19
+ Parameters:
20
+ query_string (str): JMESPath to the data array containing time series data.
21
+ Since JMESPath can natively rename column names, the assumption is the timestamp column
22
+ is always named 'timestamp' or converted to 'timestamp' in the JMESPath query.
23
+ datastream_ids (dict): Mapping from JSON field names to datastream IDs.
24
+ timestamp_format (str, optional): The format of the timestamp, if it needs special parsing.
25
+ """
26
+ self.query_string = query_string
27
+ self.datastream_ids = datastream_ids
28
+ self.timestamp_format = timestamp_format
29
+
30
+ def transform(self, data_file):
31
+ """
32
+ Transforms a JSON file-like object into the standard Pandas dataframe format.
33
+ Since JMESPath can natively rename column names, the assumption is the timestamp column
34
+ is always named 'timestamp' for JSON data or converted to 'timestamp' in the JMESPath query.
35
+
36
+ Parameters:
37
+ data_file: File-like object containing JSON data.
38
+
39
+ Returns:
40
+ pd.DataFrame: pandas DataFrames in the format pd.Timestamp, datastream_id_1, datastream_id_2, ...
41
+ """
42
+ json_data = json.load(data_file)
43
+ data_points = self.extract_data_points(json_data)
44
+ if not data_points:
45
+ logging.warning("No data points found in the JSON data.")
46
+ return None
47
+
48
+ df = pd.DataFrame(data_points)
49
+
50
+ return self.standardize_dataframe(
51
+ df,
52
+ self.datastream_ids,
53
+ timestamp_format=self.timestamp_format,
54
+ )
55
+
56
+ def extract_data_points(self, json_data: Any) -> Optional[List[dict]]:
57
+ """Extracts data points from the JSON data using the data_path."""
58
+ data_points = jmespath.search(self.query_string, json_data)
59
+
60
+ if isinstance(data_points, dict):
61
+ data_points = [data_points]
62
+ return data_points
@@ -0,0 +1,7 @@
1
+ from typing import TypedDict
2
+ import pandas as pd
3
+
4
+
5
+ class TimeRange(TypedDict):
6
+ start_time: pd.Timestamp
7
+ end_time: pd.Timestamp
File without changes
@@ -7,22 +7,28 @@ from requests import HTTPError
7
7
  from datetime import datetime, timezone, timedelta
8
8
  from dateutil.parser import isoparse
9
9
  from .exceptions import HeaderParsingError, TimestampParsingError
10
+ import warnings
10
11
 
11
12
  if TYPE_CHECKING:
12
- from ..core.schemas import DataSource
13
+ from hydroserverpy_old.core.schemas import DataSource
13
14
 
14
- logger = logging.getLogger('hydroserver_etl')
15
+ logger = logging.getLogger("hydroserver_etl")
15
16
  logger.addHandler(logging.NullHandler())
16
17
 
17
18
 
18
- class HydroServerETL:
19
+ class HydroServerETLCSV:
19
20
 
20
21
  def __init__(
21
- self,
22
- service,
23
- data_file: IO[str],
24
- data_source: 'DataSource',
22
+ self,
23
+ service,
24
+ data_file: IO[str],
25
+ data_source: "DataSource",
25
26
  ):
27
+ warnings.warn(
28
+ "HydroServerETLCSV is deprecated and will be removed in a future version. "
29
+ "Please use the new HydroServerETL class.",
30
+ DeprecationWarning,
31
+ )
26
32
  self._service = service
27
33
  self._data_file = data_file
28
34
  self._data_source = data_source
@@ -66,12 +72,12 @@ class HydroServerETL:
66
72
  self._failed_datastreams.extend(self._post_observations())
67
73
 
68
74
  except HeaderParsingError as e:
69
- self._message = f'Failed to parse header for {self._data_source.name} with error: {str(e)}'
75
+ self._message = f"Failed to parse header for {self._data_source.name} with error: {str(e)}"
70
76
  logger.error(self._message)
71
77
  self._file_header_error = True
72
78
 
73
79
  except TimestampParsingError as e:
74
- self._message = f'Failed to parse one or more timestamps for {self._data_source.name} with error: {str(e)}'
80
+ self._message = f"Failed to parse one or more timestamps for {self._data_source.name} with error: {str(e)}"
75
81
  logger.error(self._message)
76
82
  self._file_timestamp_error = True
77
83
 
@@ -79,7 +85,7 @@ class HydroServerETL:
79
85
  self._failed_datastreams.extend(self._post_observations())
80
86
 
81
87
  if not self._message and len(self._failed_datastreams) > 0:
82
- self._message = f'One or more datastreams failed to sync with HydroServer for {self._data_source.name}.'
88
+ self._message = f"One or more datastreams failed to sync with HydroServer for {self._data_source.name}."
83
89
 
84
90
  self._update_data_source()
85
91
 
@@ -99,7 +105,8 @@ class HydroServerETL:
99
105
  """
100
106
 
101
107
  if index == self._data_source.header_row or (
102
- index == self._data_source.data_start_row and self._timestamp_column_index is None
108
+ index == self._data_source.data_start_row
109
+ and self._timestamp_column_index is None
103
110
  ):
104
111
  self._parse_file_header(row)
105
112
 
@@ -110,18 +117,29 @@ class HydroServerETL:
110
117
 
111
118
  for datastream in self._datastreams.values():
112
119
  if str(datastream.uid) not in self._datastream_start_row_indexes.keys():
113
- if not datastream.phenomenon_end_time or timestamp > datastream.phenomenon_end_time:
120
+ if (
121
+ not datastream.phenomenon_end_time
122
+ or timestamp > datastream.phenomenon_end_time
123
+ ):
114
124
  self._datastream_start_row_indexes[str(datastream.uid)] = index
115
125
 
116
- if str(datastream.uid) in self._datastream_start_row_indexes.keys() \
117
- and self._datastream_start_row_indexes[str(datastream.uid)] <= index:
126
+ if (
127
+ str(datastream.uid) in self._datastream_start_row_indexes.keys()
128
+ and self._datastream_start_row_indexes[str(datastream.uid)] <= index
129
+ ):
118
130
  if str(datastream.uid) not in self._observations.keys():
119
131
  self._observations[str(datastream.uid)] = []
120
132
 
121
- self._observations[str(datastream.uid)].append({
122
- 'phenomenon_time': timestamp,
123
- 'result': row[self._datastream_column_indexes[datastream.data_source_column]]
124
- })
133
+ self._observations[str(datastream.uid)].append(
134
+ {
135
+ "phenomenon_time": timestamp,
136
+ "result": row[
137
+ self._datastream_column_indexes[
138
+ datastream.data_source_column
139
+ ]
140
+ ],
141
+ }
142
+ )
125
143
 
126
144
  def _parse_file_header(self, row: List[str]) -> None:
127
145
  """
@@ -136,22 +154,29 @@ class HydroServerETL:
136
154
  """
137
155
 
138
156
  try:
139
- self._timestamp_column_index = row.index(self._data_source.timestamp_column) \
140
- if isinstance(self._data_source.timestamp_column, str) \
157
+ self._timestamp_column_index = (
158
+ row.index(self._data_source.timestamp_column)
159
+ if isinstance(self._data_source.timestamp_column, str)
141
160
  else int(self._data_source.timestamp_column) - 1
161
+ )
142
162
  if self._timestamp_column_index > len(row):
143
163
  raise ValueError
144
164
  self._datastream_column_indexes = {
145
- datastream.data_source_column: row.index(datastream.data_source_column)
146
- if not datastream.data_source_column.isdigit()
147
- else int(datastream.data_source_column) - 1
165
+ datastream.data_source_column: (
166
+ row.index(datastream.data_source_column)
167
+ if not datastream.data_source_column.isdigit()
168
+ else int(datastream.data_source_column) - 1
169
+ )
148
170
  for datastream in self._datastreams.values()
149
171
  }
150
- if len(self._datastream_column_indexes.values()) > 0 and \
151
- max(self._datastream_column_indexes.values()) > len(row):
172
+ if len(self._datastream_column_indexes.values()) > 0 and max(
173
+ self._datastream_column_indexes.values()
174
+ ) > len(row):
152
175
  raise ValueError
153
176
  except ValueError as e:
154
- logger.error(f'Failed to load data from data source: "{self._data_source.name}"')
177
+ logger.error(
178
+ f'Failed to load data from data source: "{self._data_source.name}"'
179
+ )
155
180
  raise HeaderParsingError(str(e)) from e
156
181
 
157
182
  def _parse_row_timestamp(self, row: List[str]) -> datetime:
@@ -164,32 +189,36 @@ class HydroServerETL:
164
189
  """
165
190
 
166
191
  try:
167
- if self._data_source.timestamp_format == 'iso' or self._data_source.timestamp_format is None:
168
- timestamp = isoparse(
169
- row[self._timestamp_column_index]
170
- )
192
+ if (
193
+ self._data_source.timestamp_format == "iso"
194
+ or self._data_source.timestamp_format is None
195
+ ):
196
+ timestamp = isoparse(row[self._timestamp_column_index])
171
197
  else:
172
198
  timestamp = datetime.strptime(
173
199
  row[self._timestamp_column_index],
174
- self._data_source.timestamp_format
200
+ self._data_source.timestamp_format,
175
201
  )
176
202
  except ValueError as e:
177
203
  raise TimestampParsingError(str(e)) from e
178
204
 
179
205
  if timestamp.tzinfo is None:
180
206
  if not self._data_source.timestamp_offset:
181
- timestamp = timestamp.replace(
182
- tzinfo=timezone.utc
183
- )
207
+ timestamp = timestamp.replace(tzinfo=timezone.utc)
184
208
  else:
185
209
  try:
186
210
  timestamp = timestamp.replace(
187
211
  tzinfo=datetime.strptime(
188
- self._data_source.timestamp_offset[:-2] + ':' + self._data_source.timestamp_offset[3:], '%z'
212
+ self._data_source.timestamp_offset[:-2]
213
+ + ":"
214
+ + self._data_source.timestamp_offset[3:],
215
+ "%z",
189
216
  ).tzinfo
190
217
  )
191
218
  except ValueError as e:
192
- logger.error(f'Failed to load data from data source: "{self._data_source.name}"')
219
+ logger.error(
220
+ f'Failed to load data from data source: "{self._data_source.name}"'
221
+ )
193
222
  raise TimestampParsingError(str(e)) from e
194
223
 
195
224
  return timestamp
@@ -213,15 +242,18 @@ class HydroServerETL:
213
242
  if datastream_id not in self._failed_datastreams and len(observations) > 0:
214
243
 
215
244
  logger.info(
216
- f'Loading observations from ' +
217
- f'{observations[0]["phenomenon_time"].strftime("%Y-%m-%dT%H:%M:%S%z")} to ' +
218
- f'{observations[-1]["phenomenon_time"].strftime("%Y-%m-%dT%H:%M:%S%z")} for datastream: ' +
219
- f'{str(datastream_id)} in data source "{self._data_source.name}".'
245
+ f"Loading observations from "
246
+ + f'{observations[0]["phenomenon_time"].strftime("%Y-%m-%dT%H:%M:%S%z")} to '
247
+ + f'{observations[-1]["phenomenon_time"].strftime("%Y-%m-%dT%H:%M:%S%z")} for datastream: '
248
+ + f'{str(datastream_id)} in data source "{self._data_source.name}".'
220
249
  )
221
250
 
222
251
  observations_df = pd.DataFrame(
223
- [[observation['phenomenon_time'], observation['result']] for observation in observations],
224
- columns=['timestamp', 'value']
252
+ [
253
+ [observation["phenomenon_time"], observation["result"]]
254
+ for observation in observations
255
+ ],
256
+ columns=["timestamp", "value"],
225
257
  )
226
258
 
227
259
  try:
@@ -233,17 +265,18 @@ class HydroServerETL:
233
265
  failed_datastreams.append(datastream_id)
234
266
 
235
267
  if not self._last_loaded_timestamp or (
236
- observations[-1]['phenomenon_time'] and observations[-1]['phenomenon_time'] >
237
- self._last_loaded_timestamp
268
+ observations[-1]["phenomenon_time"]
269
+ and observations[-1]["phenomenon_time"]
270
+ > self._last_loaded_timestamp
238
271
  ):
239
- self._last_loaded_timestamp = observations[-1]['phenomenon_time']
272
+ self._last_loaded_timestamp = observations[-1]["phenomenon_time"]
240
273
  elif datastream_id in self._failed_datastreams:
241
274
  logger.info(
242
- f'Skipping observations POST request from ' +
243
- f'{observations[0]["phenomenon_time"].strftime("%Y-%m-%dT%H:%M:%S%z")} to ' +
244
- f'{observations[-1]["phenomenon_time"].strftime("%Y-%m-%dT%H:%M:%S%z")} for datastream: ' +
245
- f'{str(datastream_id)} in data source "{self._data_source.name}",' +
246
- f'due to previous failed POST request.'
275
+ f"Skipping observations POST request from "
276
+ + f'{observations[0]["phenomenon_time"].strftime("%Y-%m-%dT%H:%M:%S%z")} to '
277
+ + f'{observations[-1]["phenomenon_time"].strftime("%Y-%m-%dT%H:%M:%S%z")} for datastream: '
278
+ + f'{str(datastream_id)} in data source "{self._data_source.name}",'
279
+ + f"due to previous failed POST request."
247
280
  )
248
281
 
249
282
  self._observations = {}
@@ -260,10 +293,12 @@ class HydroServerETL:
260
293
 
261
294
  if self._data_source.crontab is not None:
262
295
  next_sync = croniter.croniter(
263
- self._data_source.crontab,
264
- datetime.now()
296
+ self._data_source.crontab, datetime.now()
265
297
  ).get_next(datetime)
266
- elif self._data_source.interval is not None and self._data_source.interval_units is not None:
298
+ elif (
299
+ self._data_source.interval is not None
300
+ and self._data_source.interval_units is not None
301
+ ):
267
302
  next_sync = datetime.now() + timedelta(
268
303
  **{self._data_source.interval_units: self._data_source.interval}
269
304
  )
@@ -272,8 +307,11 @@ class HydroServerETL:
272
307
 
273
308
  self._data_source.data_source_thru = self._last_loaded_timestamp
274
309
  self._data_source.last_sync_successful = (
275
- True if not self._file_timestamp_error and not self._file_header_error
276
- and len(self._failed_datastreams) == 0 else False
310
+ True
311
+ if not self._file_timestamp_error
312
+ and not self._file_header_error
313
+ and len(self._failed_datastreams) == 0
314
+ else False
277
315
  )
278
316
  self._data_source.last_sync_message = self._message
279
317
  self._data_source.last_synced = datetime.now(timezone.utc)