hydroserverpy 1.1.0b1__py3-none-any.whl → 1.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hydroserverpy might be problematic. Click here for more details.

@@ -178,7 +178,7 @@ class WorkspaceService(EndpointService):
178
178
  None,
179
179
  ...,
180
180
  )
181
- else None
181
+ else expires_at
182
182
  )
183
183
  }
184
184
  headers = {"Content-type": "application/json"}
@@ -195,7 +195,7 @@ class DatastreamService(SensorThingsService):
195
195
  None,
196
196
  ...,
197
197
  )
198
- else None
198
+ else phenomenon_begin_time
199
199
  ),
200
200
  "phenomenonEndTime": (
201
201
  phenomenon_end_time.isoformat()
@@ -204,7 +204,7 @@ class DatastreamService(SensorThingsService):
204
204
  None,
205
205
  ...,
206
206
  )
207
- else None
207
+ else phenomenon_end_time
208
208
  ),
209
209
  "resultBeginTime": (
210
210
  result_begin_time.isoformat()
@@ -213,7 +213,7 @@ class DatastreamService(SensorThingsService):
213
213
  None,
214
214
  ...,
215
215
  )
216
- else None
216
+ else result_begin_time
217
217
  ),
218
218
  "resultEndTime": (
219
219
  result_end_time.isoformat()
@@ -222,7 +222,7 @@ class DatastreamService(SensorThingsService):
222
222
  None,
223
223
  ...,
224
224
  )
225
- else None
225
+ else result_end_time
226
226
  ),
227
227
  "isPrivate": is_private,
228
228
  "isVisible": is_visible,
@@ -7,64 +7,79 @@ from .base import Extractor
7
7
 
8
8
 
9
9
  class HTTPExtractor(Extractor):
10
- def __init__(
11
- self,
12
- url: str,
13
- url_variables: dict = None,
14
- params: dict = None,
15
- headers: dict = None,
16
- auth: tuple = None,
17
- ):
18
- self.url = self.format_url(url, url_variables or {})
19
- self.params = params
20
- self.headers = headers
21
- self.auth = auth
22
- self.start_date = None
10
+ def __init__(self, settings: object):
11
+ self.url = settings["urlTemplate"]
12
+ # self.url = self.format_url(url, url_variables or {})
13
+ # self.params = settings.get('params', )
14
+ # self.headers = headers
15
+ # self.auth = auth
23
16
 
24
17
  def prepare_params(self, data_requirements: Dict[str, TimeRange]):
25
- start_times = [
26
- req["start_time"] for req in data_requirements.values() if req["start_time"]
27
- ]
18
+ pass
19
+ # TODO: Uncomment this once url templates work on in the Data Management App
20
+ # start_times = [
21
+ # req["start_time"] for req in data_requirements.values() if req["start_time"]
22
+ # ]
28
23
 
29
- if start_times:
30
- oldest_start_time = min(start_times).isoformat()
31
- start_time_key = self.params.pop("start_time_key", None)
32
- if start_time_key:
33
- self.params[start_time_key] = oldest_start_time
34
- logging.info(
35
- f"Set start_time to {oldest_start_time} and removed 'start_time_key'"
36
- )
37
- else:
38
- logging.warning("'start_time_key' not found in params.")
24
+ # if start_times:
25
+ # oldest_start_time = min(start_times)
26
+ # start_time_key = self.params.pop("start_time_key", None)
27
+ # if start_time_key:
28
+ # self.params[start_time_key] = oldest_start_time
29
+ # logging.info(
30
+ # f"Set start_time to {oldest_start_time} and removed 'start_time_key'"
31
+ # )
32
+ # else:
33
+ # logging.warning("'start_time_key' not found in params.")
39
34
 
40
- end_times = [
41
- req["end_time"] for req in data_requirements.values() if req["end_time"]
42
- ]
35
+ # end_times = [
36
+ # req["end_time"] for req in data_requirements.values() if req["end_time"]
37
+ # ]
43
38
 
44
- if end_times:
45
- newest_end_time = max(end_times).isoformat()
46
- end_time_key = self.params.pop("end_time_key", None)
47
- if end_time_key:
48
- self.params[end_time_key] = newest_end_time
49
- logging.info(
50
- f"Set end_time to {newest_end_time} and removed 'end_time_key'"
51
- )
52
- else:
53
- logging.warning("'end_time_key' not found in params.")
39
+ # if end_times:
40
+ # newest_end_time = max(end_times)
41
+ # end_time_key = self.params.pop("end_time_key", None)
42
+ # if end_time_key:
43
+ # self.params[end_time_key] = newest_end_time
44
+ # logging.info(
45
+ # f"Set end_time to {newest_end_time} and removed 'end_time_key'"
46
+ # )
47
+ # else:
48
+ # logging.warning("'end_time_key' not found in params.")
54
49
 
55
50
  def extract(self):
56
51
  """
57
52
  Downloads the file from the HTTP/HTTPS server and returns a file-like object.
58
53
  """
59
- response = requests.get(
60
- url=self.url,
61
- params=self.params,
62
- headers=self.headers,
63
- auth=self.auth,
64
- stream=True,
65
- )
66
- response.raise_for_status()
67
- logging.info(f"Successfully downloaded file from {response.url}")
54
+
55
+ logging.info(f"Requesting data from → {self.url}")
56
+
57
+ # endpoints = [
58
+ # "https://httpbin.org/get",
59
+ # "https://jsonplaceholder.typicode.com/posts/1",
60
+ # "https://api.github.com",
61
+ # "https://api.ipify.org?format=json",
62
+ # "https://www.python.org/",
63
+ # "https://waterservices.usgs.gov/nwis/iv/?&format=json&sites=01646500&parameterCd=00060",
64
+ # "https://datahub.io/core/country-list/r/data.csv",
65
+ # "https://raw.githubusercontent.com/cs109/2014_data/master/countries.csv",
66
+ # # "https://rain-flow.slco.org/export/file/?delimiter=comma&site_id=68&data_start=2025-04-09&data_end=2025-05-09&device_id=2",
67
+ # # "https://rain-flow.slco.org/export/file/?mime=txt&delimiter=comma&site_id=68&data_start=2025-05-09%2000:00:00&data_end=2025-05-09%2023:59:59&device_id=2"
68
+ # ]
69
+ # for url in endpoints:
70
+ # try:
71
+ # r = requests.get(url, timeout=10)
72
+ # print(f"{url:50} → {r.status_code}")
73
+ # except Exception as e:
74
+ # print(f"{url:50} → ERROR: {e}")
75
+
76
+ try:
77
+ response = requests.get(self.url)
78
+ except Exception as e:
79
+ logging.error(f"Failed to fetch {repr(self.url)}: {e}")
80
+ raise
81
+
82
+ logging.info(f"Received response")
68
83
 
69
84
  data = BytesIO()
70
85
  for chunk in response.iter_content(chunk_size=8192):
@@ -6,8 +6,12 @@ from ..types import TimeRange
6
6
 
7
7
 
8
8
  class LocalFileExtractor(Extractor):
9
- def __init__(self, filepath: str):
10
- self.filepath = filepath
9
+ def __init__(self, settings: object):
10
+ if "path" not in settings:
11
+ message = "Missing required setting 'path' in LocalFileExtractor settings."
12
+ logging.error(message)
13
+ raise ValueError(message)
14
+ self.path = settings["path"]
11
15
 
12
16
  def prepare_params(self, data_requirements: Dict[str, TimeRange]):
13
17
  pass
@@ -17,9 +21,9 @@ class LocalFileExtractor(Extractor):
17
21
  Opens the file and returns a file-like object.
18
22
  """
19
23
  try:
20
- file_handle = open(self.filepath, "r")
21
- logging.info(f"Successfully opened file '{self.filepath}'.")
24
+ file_handle = open(self.path, "r")
25
+ logging.info(f"Successfully opened file '{self.path}'.")
22
26
  return file_handle
23
27
  except Exception as e:
24
- logging.error(f"Error opening file '{self.filepath}': {e}")
28
+ logging.error(f"Error opening file '{self.path}': {e}")
25
29
  return None
@@ -1,5 +1,8 @@
1
+ import datetime
1
2
  from hydroserverpy import HydroServer
2
3
  from typing import Dict, Optional
4
+
5
+ from hydroserverpy.etl.types import TimeRange
3
6
  from .base import Loader
4
7
  import logging
5
8
  import pandas as pd
@@ -13,20 +16,25 @@ class HydroServerLoader(HydroServer, Loader):
13
16
  def __init__(
14
17
  self,
15
18
  host: str,
16
- username: Optional[str] = None,
19
+ email: Optional[str] = None,
17
20
  password: Optional[str] = None,
18
21
  apikey: Optional[str] = None,
19
- api_route: str = "api",
20
22
  ):
21
- super().__init__(host, username, password, apikey, api_route)
23
+ super().__init__(
24
+ host=host,
25
+ email=email,
26
+ password=password,
27
+ apikey=apikey,
28
+ )
22
29
 
23
- def load(self, data: pd.DataFrame, source_target_map) -> None:
30
+ def load(self, data: pd.DataFrame, payload_settings) -> None:
24
31
  """
25
32
  Load observations from a DataFrame to the HydroServer.
26
33
 
27
34
  :param data: A Pandas DataFrame where each column corresponds to a datastream.
28
35
  """
29
- data_requirements = self.get_data_requirements(source_target_map)
36
+ mappings = payload_settings["mappings"]
37
+ time_ranges = self.get_data_requirements(mappings)
30
38
  for ds_id in data.columns:
31
39
  if ds_id == "timestamp":
32
40
  continue
@@ -35,9 +43,17 @@ class HydroServerLoader(HydroServer, Loader):
35
43
  df.rename(columns={ds_id: "value"}, inplace=True)
36
44
  df.dropna(subset=["value"], inplace=True)
37
45
 
38
- phenomenon_end_time = data_requirements[ds_id]["start_time"]
39
- if phenomenon_end_time:
40
- df = df[df["timestamp"] > phenomenon_end_time]
46
+ # ensure the timestamp column is UTC‑aware
47
+ timestamp_column = df["timestamp"]
48
+ if timestamp_column.dt.tz is None:
49
+ df["timestamp"] = timestamp_column.dt.tz_localize("UTC")
50
+
51
+ time_range = time_ranges[ds_id]
52
+ start_ts = pd.to_datetime(time_range["start_time"], utc=True)
53
+
54
+ if start_ts:
55
+ df = df[df["timestamp"] > start_ts]
56
+ logging.info(f"start cutoff for data loading {start_ts}")
41
57
  if df.empty:
42
58
  logging.warning(
43
59
  f"No new data to upload for datastream {ds_id}. Skipping."
@@ -45,24 +61,31 @@ class HydroServerLoader(HydroServer, Loader):
45
61
  continue
46
62
  self.datastreams.load_observations(uid=ds_id, observations=df)
47
63
 
48
- def get_data_requirements(
49
- self, source_target_map
50
- ) -> Dict[str, Dict[str, pd.Timestamp]]:
64
+ def get_data_requirements(self, source_target_map) -> Dict[str, TimeRange]:
51
65
  """
52
66
  Each target system needs to be able to answer the question: 'What data do you need?'
53
67
  and return a time range for each target time series. Usually the answer will be
54
68
  'anything newer than my most recent observation'.
55
69
  """
56
70
  data_requirements = {}
57
- for ds_id in source_target_map.values():
58
- datastream = self.datastreams.get(uid=ds_id)
71
+ target_ids = [mapping["targetIdentifier"] for mapping in source_target_map]
72
+ for id in target_ids:
73
+ datastream = self.datastreams.get(uid=id)
59
74
  if not datastream:
60
75
  message = "Couldn't fetch target datastream. ETL process aborted."
61
76
  logging.error(message)
62
77
  raise message
63
- start_time = pd.Timestamp(
78
+
79
+ start_ts = pd.Timestamp(
64
80
  datastream.phenomenon_end_time or "1970-01-01T00:00:00Z"
65
81
  )
66
- end_time = pd.Timestamp.now()
67
- data_requirements[ds_id] = {"start_time": start_time, "end_time": end_time}
82
+ if start_ts.tzinfo is None:
83
+ start_ts = start_ts.tz_localize("UTC")
84
+
85
+ end_ts = pd.Timestamp.now(tz="UTC")
86
+
87
+ data_requirements[id] = {
88
+ "start_time": start_ts.isoformat(),
89
+ "end_time": end_ts.isoformat(),
90
+ }
68
91
  return data_requirements
@@ -1,9 +1,22 @@
1
1
  from abc import ABC, abstractmethod
2
+ from datetime import timedelta, timezone
2
3
  import logging
4
+ from typing import Union
3
5
  import pandas as pd
4
6
 
5
7
 
6
8
  class Transformer(ABC):
9
+ def __init__(self, settings: object):
10
+ # timestampFormat will be the strs: 'utc', 'ISO8601', 'constant', or some custom openStrftime.
11
+ # If 'constant', then the system will append the timestamp_offset to the end of it.
12
+ self.timestamp_format = settings.get("timestampFormat", "ISO8601")
13
+ self.timestamp_offset: str = settings.get("timestampOffset", "+0000")
14
+ self.timestamp_key: Union[str, int] = settings["timestampKey"]
15
+
16
+ if isinstance(self.timestamp_key, int):
17
+ # Users will always interact in 1-based, so if the key is a column index, convert to 0-based
18
+ self.timestamp_key = self.timestamp_key - 1
19
+
7
20
  @abstractmethod
8
21
  def transform(self, *args, **kwargs) -> None:
9
22
  pass
@@ -12,41 +25,93 @@ class Transformer(ABC):
12
25
  def needs_datastreams(self) -> bool:
13
26
  return False
14
27
 
15
- @staticmethod
16
- def standardize_dataframe(
17
- df,
18
- datastream_ids,
19
- timestamp_column: str = "timestamp",
20
- timestamp_format: str = "ISO8601",
21
- ):
28
+ def standardize_dataframe(self, df, payload_mappings):
29
+ rename_map = {
30
+ mapping["sourceIdentifier"]: mapping["targetIdentifier"]
31
+ for mapping in payload_mappings
32
+ }
33
+
22
34
  df.rename(
23
- columns={timestamp_column: "timestamp", **datastream_ids},
35
+ columns={self.timestamp_key: "timestamp", **rename_map},
24
36
  inplace=True,
25
37
  )
26
38
 
27
39
  # Verify timestamp column is present in the DataFrame
28
40
  if "timestamp" not in df.columns:
29
- message = f"Timestamp column '{timestamp_column}' not found in data."
41
+ message = f"Timestamp column '{self.timestamp_key}' not found in data."
30
42
  logging.error(message)
31
43
  raise ValueError(message)
32
44
 
33
- # Verify that all datastream_ids are present in the DataFrame
34
- expected_columns = set(datastream_ids.values())
35
- actual_columns = set(df.columns)
36
- missing_datastream_ids = expected_columns - actual_columns
37
-
38
- if missing_datastream_ids:
45
+ # verify datastream columns
46
+ expected = set(rename_map.values())
47
+ missing = expected - set(df.columns)
48
+ if missing:
39
49
  raise ValueError(
40
50
  "The following datastream IDs are specified in the config file but their related keys could not be "
41
- f"found in the source system's extracted data: {missing_datastream_ids}"
51
+ f"found in the source system's extracted data: {missing}"
42
52
  )
43
53
 
44
- # Keep only 'timestamp' and datastream_id columns
45
- columns_to_keep = ["timestamp"] + list(expected_columns)
46
- df = df[columns_to_keep]
54
+ # keep only timestamp + datastream columns; remove the rest inplace
55
+ to_keep = ["timestamp", *expected]
56
+ df.drop(columns=df.columns.difference(to_keep), inplace=True)
57
+
58
+ df["timestamp"] = self._parse_timestamps(df["timestamp"])
47
59
 
48
- # Convert timestamp column to datetime if not already
49
- if not pd.api.types.is_datetime64_any_dtype(df["timestamp"]):
50
- df["timestamp"] = pd.to_datetime(df["timestamp"], format=timestamp_format)
60
+ df.drop_duplicates(subset=["timestamp"], keep="last")
61
+ logging.info(f"standardized dataframe created: {df.shape}")
62
+ logging.info(f"{df.info()}")
63
+ logging.info(f"{df.head()}")
51
64
 
52
65
  return df
66
+
67
+ def _parse_timestamps(self, raw_series: pd.Series) -> pd.Series:
68
+ """Return a Series of pandas UTC datetimes for the four supported modes."""
69
+ logging.info(f"parsing timestamps. Format: {self.timestamp_format}")
70
+
71
+ fmt = self.timestamp_format.lower()
72
+
73
+ VALID_KEYS = {"utc", "iso8601", "constant"}
74
+ if fmt not in VALID_KEYS and "%" not in self.timestamp_format:
75
+ raise ValueError(
76
+ f"timestamp_format must be one of {', '.join(VALID_KEYS)} "
77
+ "or a valid strftime pattern."
78
+ )
79
+
80
+ series = raw_series.str.strip()
81
+
82
+ if fmt == "utc":
83
+ # Accept Z-suffix, no offset, fractional seconds, etc.
84
+ parsed = pd.to_datetime(series, utc=True, errors="coerce")
85
+
86
+ elif fmt == "iso8601":
87
+ # pandas reads the embedded offset, then we shift to UTC
88
+ parsed = pd.to_datetime(series, errors="coerce").dt.tz_convert("UTC")
89
+
90
+ elif fmt == "constant":
91
+ offset = str(self.timestamp_offset).strip()
92
+ if not (len(offset) == 5 and offset[0] in "+-"):
93
+ raise ValueError(f"Invalid timestampOffset: {self.timestamp_offset}")
94
+
95
+ sign_multiplier = 1 if offset[0] == "+" else -1
96
+ hours = int(offset[1:3])
97
+ minutes = int(offset[3:5])
98
+ total_minutes = sign_multiplier * (hours * 60 + minutes)
99
+ local_timezone = timezone(timedelta(minutes=total_minutes))
100
+
101
+ naive_times = pd.to_datetime(series, errors="coerce")
102
+ localized_times = naive_times.dt.tz_localize(local_timezone)
103
+ parsed = localized_times.dt.tz_convert("UTC")
104
+
105
+ else:
106
+ logging.info(f"timestamp format is custom {self.timestamp_format}")
107
+ parsed = pd.to_datetime(
108
+ series, format=self.timestamp_format, errors="coerce"
109
+ ).dt.tz_localize("UTC")
110
+
111
+ if parsed.isna().any():
112
+ bad_rows = series[parsed.isna()].head(5).tolist()
113
+ logging.warning(
114
+ f"{parsed.isna().sum()} timestamps failed to parse. Sample bad values: {bad_rows}"
115
+ )
116
+
117
+ return parsed
@@ -1,28 +1,25 @@
1
+ from io import StringIO
1
2
  import logging
2
3
  import pandas as pd
3
- from typing import Dict, Optional, Union
4
+ from typing import Iterable, Union
4
5
  from .base import Transformer
5
6
 
6
7
 
7
8
  class CSVTransformer(Transformer):
8
- def __init__(
9
- self,
10
- header_row: Optional[int],
11
- data_start_row: int,
12
- timestamp_column: Union[str, int],
13
- datastream_ids: Dict[Union[str, int], str],
14
- delimiter: Optional[str] = ",",
15
- timestamp_format: Optional[str] = "ISO8601",
16
- ):
9
+ def __init__(self, settings: object):
10
+ super().__init__(settings)
11
+
17
12
  # Pandas is zero-based while CSV is one-based so convert
18
- self.header_row = None if header_row is None else header_row - 1
19
- self.data_start_row = data_start_row - 1
20
- self.timestamp_column = self.convert_to_zero_based(timestamp_column)
21
- self.datastream_ids = datastream_ids
22
- self.timestamp_format = timestamp_format
23
- self.delimiter = delimiter
13
+ self.header_row = (
14
+ None if settings.get("headerRow") is None else settings["headerRow"] - 1
15
+ )
16
+ self.data_start_row = (
17
+ settings["dataStartRow"] - 1 if "dataStartRow" in settings else 0
18
+ )
19
+ self.delimiter = settings.get("delimiter", ",")
20
+ self.identifier_type = settings.get("identifierType", "name")
24
21
 
25
- def transform(self, data_file) -> Union[pd.DataFrame, None]:
22
+ def transform(self, data_file, mappings) -> Union[pd.DataFrame, None]:
26
23
  """
27
24
  Transforms a CSV file-like object into a Pandas DataFrame where the column
28
25
  names are replaced with their target datastream ids.
@@ -33,16 +30,22 @@ class CSVTransformer(Transformer):
33
30
  observations_map (dict): Dict mapping datastream IDs to pandas DataFrames.
34
31
  """
35
32
 
33
+ clean_file = self._strip_comments(data_file)
34
+ source_identifiers = [mapping["sourceIdentifier"] for mapping in mappings]
35
+
36
36
  try:
37
+ # Pandas’ heuristics strip offsets and silently coerce failures to strings.
38
+ # Reading as pure text guarantees we always start with exactly what was in the file.
39
+ # Timestamps will be parsed at df standardization time.
37
40
  df = pd.read_csv(
38
- data_file,
39
- delimiter=self.delimiter,
41
+ clean_file,
42
+ sep=self.delimiter,
40
43
  header=self.header_row,
41
- parse_dates=[self.timestamp_column],
42
- date_format=self.timestamp_format,
43
- skiprows=self.calculate_skiprows(),
44
- usecols=[self.timestamp_column] + list(self.datastream_ids.keys()),
44
+ skiprows=self._build_skiprows(),
45
+ usecols=[self.timestamp_key] + source_identifiers,
46
+ dtype={self.timestamp_key: "string"},
45
47
  )
48
+ logging.info(f"CSV file read into dataframe: {df.shape}")
46
49
  except Exception as e:
47
50
  logging.error(f"Error reading CSV data: {e}")
48
51
  return None
@@ -50,39 +53,25 @@ class CSVTransformer(Transformer):
50
53
  if self.header_row is None:
51
54
  df.columns = list(range(1, len(df.columns) + 1))
52
55
 
53
- return self.standardize_dataframe(
54
- df, self.datastream_ids, self.timestamp_column, self.timestamp_format
55
- )
56
+ return self.standardize_dataframe(df, mappings)
56
57
 
57
- def calculate_skiprows(self):
58
+ def _strip_comments(self, stream: Iterable[Union[str, bytes]]) -> StringIO:
58
59
  """
59
- Calculates the skiprows parameter for pd.read_csv.
60
-
61
- Returns:
62
- skiprows (list or None): List of row indices to skip, or None if no rows need to be skipped.
63
- Raises:
64
- ValueError: If header_row is not compatible with data_start_row.
60
+ Remove lines whose first non-blank char is '#'.
61
+ Works for both text and binary iterables.
65
62
  """
66
- if self.data_start_row == 0:
67
- if self.header_row is not None:
68
- # Cannot have a header row if data starts at the first row
69
- raise ValueError(
70
- "header_row must be None when data_start_row is 1 (first row)"
71
- )
72
- return None # No rows to skip
63
+ clean: list[str] = []
73
64
 
74
- skiprows = list(range(self.data_start_row))
65
+ for raw in stream:
66
+ # normalize to bytes
67
+ b = raw if isinstance(raw, bytes) else raw.encode("utf-8", "ignore")
68
+ if b.lstrip().startswith(b"#"):
69
+ continue
70
+ clean.append(
71
+ raw.decode("utf-8", "ignore") if isinstance(raw, bytes) else raw
72
+ )
75
73
 
76
- if self.header_row is not None:
77
- if self.header_row >= self.data_start_row:
78
- raise ValueError("header_row must be less than data_start_row")
79
- if self.header_row in skiprows:
80
- # Do not skip the header row
81
- skiprows.remove(self.header_row)
82
- return skiprows
74
+ return StringIO("".join(clean))
83
75
 
84
- @staticmethod
85
- def convert_to_zero_based(index: Union[str, int]) -> Union[str, int]:
86
- if isinstance(index, int):
87
- return index - 1
88
- return index
76
+ def _build_skiprows(self):
77
+ return lambda idx: idx != self.header_row and idx < self.data_start_row
@@ -7,27 +7,11 @@ import jmespath
7
7
 
8
8
 
9
9
  class JSONTransformer(Transformer):
10
- def __init__(
11
- self,
12
- query_string: str,
13
- datastream_ids: Dict[str, str],
14
- timestamp_format: Optional[str] = "ISO8601",
15
- ):
16
- """
17
- Initializes the JSONTransformer.
18
-
19
- Parameters:
20
- query_string (str): JMESPath to the data array containing time series data.
21
- Since JMESPath can natively rename column names, the assumption is the timestamp column
22
- is always named 'timestamp' or converted to 'timestamp' in the JMESPath query.
23
- datastream_ids (dict): Mapping from JSON field names to datastream IDs.
24
- timestamp_format (str, optional): The format of the timestamp, if it needs special parsing.
25
- """
26
- self.query_string = query_string
27
- self.datastream_ids = datastream_ids
28
- self.timestamp_format = timestamp_format
10
+ def __init__(self, settings: object):
11
+ super().__init__(settings)
12
+ self.JMESPath = settings["JMESPath"]
29
13
 
30
- def transform(self, data_file):
14
+ def transform(self, data_file, mappings):
31
15
  """
32
16
  Transforms a JSON file-like object into the standard Pandas dataframe format.
33
17
  Since JMESPath can natively rename column names, the assumption is the timestamp column
@@ -47,15 +31,11 @@ class JSONTransformer(Transformer):
47
31
 
48
32
  df = pd.DataFrame(data_points)
49
33
 
50
- return self.standardize_dataframe(
51
- df,
52
- self.datastream_ids,
53
- timestamp_format=self.timestamp_format,
54
- )
34
+ return self.standardize_dataframe(df, mappings)
55
35
 
56
36
  def extract_data_points(self, json_data: Any) -> Optional[List[dict]]:
57
37
  """Extracts data points from the JSON data using the data_path."""
58
- data_points = jmespath.search(self.query_string, json_data)
38
+ data_points = jmespath.search(self.JMESPath, json_data)
59
39
 
60
40
  if isinstance(data_points, dict):
61
41
  data_points = [data_points]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hydroserverpy
3
- Version: 1.1.0b1
3
+ Version: 1.1.2
4
4
  Requires-Python: <4,>=3.9
5
5
  License-File: LICENSE
6
6
  Requires-Dist: requests>=2
@@ -30,9 +30,9 @@ hydroserverpy/api/services/etl/data_archive.py,sha256=hlNJOHJSZ1kV2n2xivWIBtT1Eo
30
30
  hydroserverpy/api/services/etl/data_source.py,sha256=DCgTyh8lF2iwh4uszePFg9UupXxJCN7Ww9Ut1MQKHis,6491
31
31
  hydroserverpy/api/services/etl/orchestration_system.py,sha256=JFuSJJUq4JJUt8KlZ-Ga0ktyQIe2U0Sa7ogd4oLjex4,2166
32
32
  hydroserverpy/api/services/iam/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
- hydroserverpy/api/services/iam/workspace.py,sha256=KQ1zH7EnEcBrpeF-2APAo3aMiiSGCaUgOy_GwRQtyRI,8395
33
+ hydroserverpy/api/services/iam/workspace.py,sha256=Y6IituULcr1jrXMJvLOb4czS2U_AmF-132jEUDN4_4Y,8401
34
34
  hydroserverpy/api/services/sta/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
- hydroserverpy/api/services/sta/datastream.py,sha256=_m-xFom3z8wo5-1_q8NjWUpcw36wYv1brIG7xeGGadk,12402
35
+ hydroserverpy/api/services/sta/datastream.py,sha256=i5JtjkktEjkdvuWif7gNY7aNqIQwskUD1vSkybfilfs,12458
36
36
  hydroserverpy/api/services/sta/observed_property.py,sha256=nRlqBldJpXlj8VOZ4EwNOs4ZgmBw5w-EqAChfM3Z0Z0,2908
37
37
  hydroserverpy/api/services/sta/processing_level.py,sha256=Oupfeww2XgT83AwR5Spt91VjZK6MG0XIl11Et9fRjA0,2255
38
38
  hydroserverpy/api/services/sta/result_qualifier.py,sha256=XG5Ng3xdFT-l3Ktkuq23Cty1RfmepBO7EQ9gPzidZuA,2069
@@ -45,23 +45,23 @@ hydroserverpy/etl/types.py,sha256=4PY3CM-uoXIsf2lhcqtLC6HaRGXe7HKGDU22R8-H35c,13
45
45
  hydroserverpy/etl/extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
46
  hydroserverpy/etl/extractors/base.py,sha256=GZKJfAhfJedRcNagnoqUiDZn286r-JzM7dW_F1dWsfY,275
47
47
  hydroserverpy/etl/extractors/ftp_extractor.py,sha256=5LwvHuvLk6LwRSVyE9EkV3DPgVlAvRrOBpl1a8B7dLg,1387
48
- hydroserverpy/etl/extractors/http_extractor.py,sha256=-duQwnsFBk4NQS2qhO55evcCUOnrBe3JX_LU9RyysX4,2709
49
- hydroserverpy/etl/extractors/local_file_extractor.py,sha256=T_Y9NTO0cC5L9mDPbIG6wYlXDQoatg8MobP97liFl4U,692
48
+ hydroserverpy/etl/extractors/http_extractor.py,sha256=FYC0nzb8Yt0UvLqikQaBD4KjkWkMLS1EYpzwiS1u3JE,3807
49
+ hydroserverpy/etl/extractors/local_file_extractor.py,sha256=UQ37t_NLqkZQ7biPoOrShU4NsV1OJEtg-U1mYIQGrPk,883
50
50
  hydroserverpy/etl/loaders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
51
  hydroserverpy/etl/loaders/base.py,sha256=DrA9u7SNBxkPKqaszlP368yNbxihdqIGzP8rA6NAp6U,295
52
- hydroserverpy/etl/loaders/hydroserver_loader.py,sha256=bl4Z5TkXgJyKEHuPdY7LSrKMKOoZW_EDFl8dIRwDkv4,2549
52
+ hydroserverpy/etl/loaders/hydroserver_loader.py,sha256=-v45laSDp6S895xrmDgI9Dt6c7-7amwh2wCfDG4Ua3k,3200
53
53
  hydroserverpy/etl/transformers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
54
- hydroserverpy/etl/transformers/base.py,sha256=237oVBhS3HQ3fcE4bZT5U1437WzV2x5kOFC229DY53M,1741
55
- hydroserverpy/etl/transformers/csv_transformer.py,sha256=9DKSO4NfUUDlr_c6UnH4AU3-7LxwSSeuQdou0iiCjdM,3238
56
- hydroserverpy/etl/transformers/json_transformer.py,sha256=ity0MXcYjEnlun4Y6cVSrnjrglKrK4JOXXHxWHIHN2A,2323
54
+ hydroserverpy/etl/transformers/base.py,sha256=OsLyjtLwPOCGdeoH2Ui0d0GqD2n55-9ch17UjZRFU7I,4615
55
+ hydroserverpy/etl/transformers/csv_transformer.py,sha256=0kWfRKPwiGxCNZ87Q4SiBlfM3PuKL6upc1ljphBY89o,2891
56
+ hydroserverpy/etl/transformers/json_transformer.py,sha256=R7tSyDB4Wn1snP75ctbEDMaMCdjyhPnMzN_W2VV3Mv4,1506
57
57
  hydroserverpy/etl_csv/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
58
58
  hydroserverpy/etl_csv/exceptions.py,sha256=0UY8YUlNepG0y6FfH36hJyR1bOhwYHSZIdUSSMTg7GA,314
59
59
  hydroserverpy/etl_csv/hydroserver_etl_csv.py,sha256=0ueBphEaAAlsb0cn71Ihgd5zOD8Zdu4Ts_yGwvXW53M,14544
60
60
  hydroserverpy/quality/__init__.py,sha256=GGBMkFSXciJLYrbV-NraFrj_mXWCy_GTcy9KKrKXU4c,84
61
61
  hydroserverpy/quality/service.py,sha256=U02UfLKVmFvr5ySiH0n0JYzUIabq5uprrHIiwcqBlqY,13879
62
- hydroserverpy-1.1.0b1.dist-info/licenses/LICENSE,sha256=xVqFxDw3QOEJukakL7gQCqIMTQ1dlSCTo6Oc1otNW80,1508
63
- hydroserverpy-1.1.0b1.dist-info/METADATA,sha256=SF2m2AtxqgHEz0XO7ozpwAimryavdn5Hap9rdYEdSIA,532
64
- hydroserverpy-1.1.0b1.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
65
- hydroserverpy-1.1.0b1.dist-info/top_level.txt,sha256=Zf37hrncXLOYvXhgCrf5mZdeq81G9fShdE2LfYbtb7w,14
66
- hydroserverpy-1.1.0b1.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
67
- hydroserverpy-1.1.0b1.dist-info/RECORD,,
62
+ hydroserverpy-1.1.2.dist-info/licenses/LICENSE,sha256=xVqFxDw3QOEJukakL7gQCqIMTQ1dlSCTo6Oc1otNW80,1508
63
+ hydroserverpy-1.1.2.dist-info/METADATA,sha256=9X-SRNkSpacV98Esy_m2Fg0DJb0AZkqtayTo3NFebb8,530
64
+ hydroserverpy-1.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
65
+ hydroserverpy-1.1.2.dist-info/top_level.txt,sha256=Zf37hrncXLOYvXhgCrf5mZdeq81G9fShdE2LfYbtb7w,14
66
+ hydroserverpy-1.1.2.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
67
+ hydroserverpy-1.1.2.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.4.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5