hydroserverpy 1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hydroserverpy/__init__.py +7 -0
- hydroserverpy/api/__init__.py +0 -0
- hydroserverpy/api/client.py +203 -0
- hydroserverpy/api/models/__init__.py +22 -0
- hydroserverpy/api/models/base.py +207 -0
- hydroserverpy/api/models/etl/__init__.py +26 -0
- hydroserverpy/api/models/etl/data_archive.py +77 -0
- hydroserverpy/api/models/etl/data_source.py +146 -0
- hydroserverpy/api/models/etl/etl_configuration.py +224 -0
- hydroserverpy/api/models/etl/extractors/__init__.py +6 -0
- hydroserverpy/api/models/etl/extractors/base.py +52 -0
- hydroserverpy/api/models/etl/extractors/ftp_extractor.py +50 -0
- hydroserverpy/api/models/etl/extractors/http_extractor.py +28 -0
- hydroserverpy/api/models/etl/extractors/local_file_extractor.py +20 -0
- hydroserverpy/api/models/etl/factories.py +23 -0
- hydroserverpy/api/models/etl/loaders/__init__.py +4 -0
- hydroserverpy/api/models/etl/loaders/base.py +11 -0
- hydroserverpy/api/models/etl/loaders/hydroserver_loader.py +98 -0
- hydroserverpy/api/models/etl/orchestration_configuration.py +35 -0
- hydroserverpy/api/models/etl/orchestration_system.py +63 -0
- hydroserverpy/api/models/etl/schedule.py +16 -0
- hydroserverpy/api/models/etl/status.py +14 -0
- hydroserverpy/api/models/etl/timestamp_parser.py +112 -0
- hydroserverpy/api/models/etl/transformers/__init__.py +5 -0
- hydroserverpy/api/models/etl/transformers/base.py +135 -0
- hydroserverpy/api/models/etl/transformers/csv_transformer.py +88 -0
- hydroserverpy/api/models/etl/transformers/json_transformer.py +48 -0
- hydroserverpy/api/models/etl/types.py +7 -0
- hydroserverpy/api/models/iam/__init__.py +0 -0
- hydroserverpy/api/models/iam/account.py +12 -0
- hydroserverpy/api/models/iam/apikey.py +96 -0
- hydroserverpy/api/models/iam/collaborator.py +70 -0
- hydroserverpy/api/models/iam/role.py +38 -0
- hydroserverpy/api/models/iam/workspace.py +297 -0
- hydroserverpy/api/models/sta/__init__.py +0 -0
- hydroserverpy/api/models/sta/datastream.py +254 -0
- hydroserverpy/api/models/sta/observation.py +103 -0
- hydroserverpy/api/models/sta/observed_property.py +37 -0
- hydroserverpy/api/models/sta/processing_level.py +35 -0
- hydroserverpy/api/models/sta/result_qualifier.py +34 -0
- hydroserverpy/api/models/sta/sensor.py +44 -0
- hydroserverpy/api/models/sta/thing.py +113 -0
- hydroserverpy/api/models/sta/unit.py +36 -0
- hydroserverpy/api/services/__init__.py +12 -0
- hydroserverpy/api/services/base.py +118 -0
- hydroserverpy/api/services/etl/__init__.py +0 -0
- hydroserverpy/api/services/etl/data_archive.py +166 -0
- hydroserverpy/api/services/etl/data_source.py +163 -0
- hydroserverpy/api/services/etl/orchestration_system.py +66 -0
- hydroserverpy/api/services/iam/__init__.py +0 -0
- hydroserverpy/api/services/iam/role.py +38 -0
- hydroserverpy/api/services/iam/workspace.py +232 -0
- hydroserverpy/api/services/sta/__init__.py +0 -0
- hydroserverpy/api/services/sta/datastream.py +296 -0
- hydroserverpy/api/services/sta/observed_property.py +82 -0
- hydroserverpy/api/services/sta/processing_level.py +72 -0
- hydroserverpy/api/services/sta/result_qualifier.py +64 -0
- hydroserverpy/api/services/sta/sensor.py +102 -0
- hydroserverpy/api/services/sta/thing.py +195 -0
- hydroserverpy/api/services/sta/unit.py +78 -0
- hydroserverpy/api/utils.py +22 -0
- hydroserverpy/quality/__init__.py +1 -0
- hydroserverpy/quality/service.py +405 -0
- hydroserverpy-1.5.1.dist-info/METADATA +66 -0
- hydroserverpy-1.5.1.dist-info/RECORD +69 -0
- hydroserverpy-1.5.1.dist-info/WHEEL +5 -0
- hydroserverpy-1.5.1.dist-info/licenses/LICENSE +28 -0
- hydroserverpy-1.5.1.dist-info/top_level.txt +1 -0
- hydroserverpy-1.5.1.dist-info/zip-safe +1 -0
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
from functools import cached_property
|
|
2
|
+
import logging
|
|
3
|
+
from datetime import datetime, timedelta, timezone
|
|
4
|
+
import re
|
|
5
|
+
from typing import Literal, Optional, Union, get_args
|
|
6
|
+
from zoneinfo import ZoneInfo
|
|
7
|
+
import pandas as pd
|
|
8
|
+
from pydantic import BaseModel, Field
|
|
9
|
+
|
|
10
|
+
TimestampFormat = Literal["ISO8601", "naive", "custom"]
|
|
11
|
+
ALLOWED_TIMESTAMP_FORMATS = {m.lower() for m in get_args(TimestampFormat)}
|
|
12
|
+
TimezoneMode = Literal["utc", "daylightSavings", "fixedOffset", "embeddedOffset"]
|
|
13
|
+
ALLOWED_TIMEZONE_MODES = {m.lower() for m in get_args(TimezoneMode)}
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class Timestamp(BaseModel):
|
|
17
|
+
format: TimestampFormat
|
|
18
|
+
timezone_mode: TimezoneMode = Field("embeddedOffset", alias="timezoneMode")
|
|
19
|
+
custom_format: Optional[str] = Field(None, alias="customFormat")
|
|
20
|
+
timezone: Optional[str] = None
|
|
21
|
+
key: Optional[str] = None
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class TimestampParser:
|
|
25
|
+
def __init__(self, raw: Union[Timestamp, dict]):
|
|
26
|
+
if isinstance(raw, dict):
|
|
27
|
+
self.timestamp = Timestamp.model_validate(raw)
|
|
28
|
+
else:
|
|
29
|
+
self.timestamp = raw
|
|
30
|
+
|
|
31
|
+
if self.timestamp.format.lower() not in ALLOWED_TIMESTAMP_FORMATS:
|
|
32
|
+
raise ValueError(
|
|
33
|
+
f"timestamp format {self.timestamp.format!r} must be one of {ALLOWED_TIMESTAMP_FORMATS}"
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
self.tz_mode = self.timestamp.timezone_mode.lower()
|
|
37
|
+
if self.tz_mode not in ALLOWED_TIMEZONE_MODES and "%" not in self.tz_mode:
|
|
38
|
+
raise ValueError(
|
|
39
|
+
f"timezone mode {self.tz_mode} must be one of {', '.join(ALLOWED_TIMEZONE_MODES)} "
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
@cached_property
|
|
43
|
+
def tz(self):
|
|
44
|
+
if self.tz_mode == "fixedoffset":
|
|
45
|
+
offset = self.timestamp.timezone.strip()
|
|
46
|
+
if len(offset) != 5 or offset[0] not in "+-":
|
|
47
|
+
raise ValueError(f"Invalid timezone: {offset}")
|
|
48
|
+
sign = 1 if offset[0] == "+" else -1
|
|
49
|
+
hrs, mins = int(offset[1:3]), int(offset[3:5])
|
|
50
|
+
return timezone(timedelta(minutes=sign * (hrs * 60 + mins)))
|
|
51
|
+
if self.tz_mode == "daylightsavings":
|
|
52
|
+
return ZoneInfo(self.timestamp.timezone)
|
|
53
|
+
if self.tz_mode == "utc":
|
|
54
|
+
return timezone.utc
|
|
55
|
+
|
|
56
|
+
def _convert_series_to_UTC(self, s: pd.Series):
|
|
57
|
+
timestamp_fmt = self.timestamp.format.lower()
|
|
58
|
+
|
|
59
|
+
if timestamp_fmt == "iso8601":
|
|
60
|
+
return pd.to_datetime(s, utc=True, errors="coerce")
|
|
61
|
+
|
|
62
|
+
if timestamp_fmt == "custom":
|
|
63
|
+
pattern = self.timestamp.custom_format or ""
|
|
64
|
+
naive = pd.to_datetime(s, format=pattern, errors="coerce")
|
|
65
|
+
else:
|
|
66
|
+
naive = pd.to_datetime(s, errors="coerce")
|
|
67
|
+
|
|
68
|
+
tz_mode = self.timestamp.timezone_mode.lower()
|
|
69
|
+
if tz_mode == "utc":
|
|
70
|
+
return pd.to_datetime(naive, utc=True, errors="coerce")
|
|
71
|
+
|
|
72
|
+
localized = naive.dt.tz_localize(self.tz)
|
|
73
|
+
return localized.dt.tz_convert(timezone.utc)
|
|
74
|
+
|
|
75
|
+
def parse_series(self, raw_series: pd.Series) -> pd.Series:
|
|
76
|
+
if pd.api.types.is_datetime64_any_dtype(raw_series):
|
|
77
|
+
s = raw_series # already datetimes
|
|
78
|
+
else:
|
|
79
|
+
s = raw_series.astype("string", copy=False).str.strip()
|
|
80
|
+
parsed = self._convert_series_to_UTC(s)
|
|
81
|
+
|
|
82
|
+
if parsed.isna().any():
|
|
83
|
+
bad_rows = s[parsed.isna()].head(2).tolist()
|
|
84
|
+
logging.warning(
|
|
85
|
+
f"{parsed.isna().sum()} timestamps failed to parse. "
|
|
86
|
+
f"Sample bad values: {bad_rows}"
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
return parsed
|
|
90
|
+
|
|
91
|
+
def utc_to_string(self, dt: Union[datetime, pd.Timestamp]) -> str:
|
|
92
|
+
"""
|
|
93
|
+
Convert a UTC datetime or pd.Timestamp to a custom string format.
|
|
94
|
+
|
|
95
|
+
Some external APIs are picky about their timestamp formats, so we need the ability to pull a
|
|
96
|
+
UTC timestamp from HydroServer and format it into a custom string.
|
|
97
|
+
"""
|
|
98
|
+
if isinstance(dt, pd.Timestamp):
|
|
99
|
+
dt = dt.to_pydatetime()
|
|
100
|
+
|
|
101
|
+
tz_format = self.timestamp.format.lower()
|
|
102
|
+
if tz_format == "iso8601":
|
|
103
|
+
return dt.astimezone(timezone.utc).isoformat()
|
|
104
|
+
|
|
105
|
+
if tz_format == "naive":
|
|
106
|
+
return dt.astimezone(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S")
|
|
107
|
+
|
|
108
|
+
if tz_format == "custom":
|
|
109
|
+
logging.info(f"custom timestamp: ... {self.timestamp}")
|
|
110
|
+
return dt.astimezone(self.tz).strftime(self.timestamp.custom_format)
|
|
111
|
+
|
|
112
|
+
raise ValueError(f"Unknown timestamp.format: {self.timestamp.format!r}")
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
import ast
|
|
3
|
+
from functools import lru_cache
|
|
4
|
+
import logging
|
|
5
|
+
import re
|
|
6
|
+
from typing import List, Union
|
|
7
|
+
import pandas as pd
|
|
8
|
+
|
|
9
|
+
from ..timestamp_parser import TimestampParser
|
|
10
|
+
from ..etl_configuration import MappingPath, TransformerConfig, SourceTargetMapping
|
|
11
|
+
|
|
12
|
+
ALLOWED_AST = (
|
|
13
|
+
ast.Expression,
|
|
14
|
+
ast.BinOp,
|
|
15
|
+
ast.UnaryOp,
|
|
16
|
+
ast.Add,
|
|
17
|
+
ast.Sub,
|
|
18
|
+
ast.Mult,
|
|
19
|
+
ast.Div,
|
|
20
|
+
ast.UAdd,
|
|
21
|
+
ast.USub,
|
|
22
|
+
ast.Name,
|
|
23
|
+
ast.Load,
|
|
24
|
+
ast.Constant,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _canonicalize_expr(expr: str) -> str:
|
|
29
|
+
# normalize whitespace for cache hits; parentheses remain intact
|
|
30
|
+
return re.sub(r"\s+", "", expr)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@lru_cache(maxsize=256)
|
|
34
|
+
def _compile_arithmetic_expr_canon(expr_no_ws: str):
|
|
35
|
+
tree = ast.parse(expr_no_ws, mode="eval")
|
|
36
|
+
for node in ast.walk(tree):
|
|
37
|
+
if not isinstance(node, ALLOWED_AST):
|
|
38
|
+
raise ValueError(
|
|
39
|
+
"Only +, -, *, / with 'x' and numeric literals are allowed."
|
|
40
|
+
)
|
|
41
|
+
if isinstance(node, ast.Name) and node.id != "x":
|
|
42
|
+
raise ValueError("Only the variable 'x' is allowed.")
|
|
43
|
+
if isinstance(node, ast.Constant):
|
|
44
|
+
val = node.value
|
|
45
|
+
if isinstance(val, bool) or not isinstance(val, (int, float)):
|
|
46
|
+
raise ValueError("Only numeric literals are allowed.")
|
|
47
|
+
return compile(tree, "<expr>", "eval")
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _compile_arithmetic_expr(expr: str):
|
|
51
|
+
return _compile_arithmetic_expr_canon(_canonicalize_expr(expr))
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class Transformer(ABC):
|
|
55
|
+
def __init__(self, transformer_config: TransformerConfig):
|
|
56
|
+
self.cfg = transformer_config
|
|
57
|
+
self.timestamp = transformer_config.timestamp
|
|
58
|
+
self.timestamp_parser = TimestampParser(self.timestamp)
|
|
59
|
+
|
|
60
|
+
@abstractmethod
|
|
61
|
+
def transform(self, *args, **kwargs) -> None:
|
|
62
|
+
pass
|
|
63
|
+
|
|
64
|
+
@property
|
|
65
|
+
def needs_datastreams(self) -> bool:
|
|
66
|
+
return False
|
|
67
|
+
|
|
68
|
+
def standardize_dataframe(
|
|
69
|
+
self, df: pd.DataFrame, mappings: List[SourceTargetMapping]
|
|
70
|
+
):
|
|
71
|
+
if not df.empty:
|
|
72
|
+
logging.info(f"Read payload into dataframe: {df.iloc[0].to_dict()}")
|
|
73
|
+
else:
|
|
74
|
+
logging.info("Read payload into dataframe: [empty dataframe]")
|
|
75
|
+
|
|
76
|
+
# 1) Normalize timestamp column
|
|
77
|
+
df.rename(columns={self.timestamp.key: "timestamp"}, inplace=True)
|
|
78
|
+
if "timestamp" not in df.columns:
|
|
79
|
+
msg = f"Timestamp column '{self.timestamp.key}' not found in data."
|
|
80
|
+
logging.error(msg)
|
|
81
|
+
raise ValueError(msg)
|
|
82
|
+
logging.info(f"Renamed timestamp column to 'timestamp'")
|
|
83
|
+
|
|
84
|
+
df["timestamp"] = self.timestamp_parser.parse_series(df["timestamp"])
|
|
85
|
+
df = df.drop_duplicates(subset=["timestamp"], keep="last")
|
|
86
|
+
|
|
87
|
+
def _resolve_source_col(s_id: Union[str, int]) -> str:
|
|
88
|
+
if isinstance(s_id, int) and s_id not in df.columns:
|
|
89
|
+
try:
|
|
90
|
+
return df.columns[s_id]
|
|
91
|
+
except IndexError:
|
|
92
|
+
raise ValueError(
|
|
93
|
+
f"Source index {s_id} is out of range for extracted data."
|
|
94
|
+
)
|
|
95
|
+
if s_id not in df.columns:
|
|
96
|
+
raise ValueError(f"Source column '{s_id}' not found in extracted data.")
|
|
97
|
+
return s_id
|
|
98
|
+
|
|
99
|
+
def _apply_transformations(series: pd.Series, path: MappingPath) -> pd.Series:
|
|
100
|
+
out = series # accumulator for sequential transforms
|
|
101
|
+
if out.dtype == "object":
|
|
102
|
+
out = pd.to_numeric(out, errors="coerce")
|
|
103
|
+
|
|
104
|
+
for transformation in path.data_transformations:
|
|
105
|
+
if transformation.type == "expression":
|
|
106
|
+
code = _compile_arithmetic_expr(transformation.expression)
|
|
107
|
+
try:
|
|
108
|
+
out = eval(code, {"__builtins__": {}}, {"x": out})
|
|
109
|
+
except Exception as ee:
|
|
110
|
+
logging.exception(
|
|
111
|
+
"Data transformation failed for expression=%r",
|
|
112
|
+
transformation.expression,
|
|
113
|
+
)
|
|
114
|
+
raise
|
|
115
|
+
else:
|
|
116
|
+
msg = f"Unsupported transformation type: {transformation.type}"
|
|
117
|
+
logging.error(msg)
|
|
118
|
+
raise ValueError(msg)
|
|
119
|
+
return out
|
|
120
|
+
|
|
121
|
+
# source target mappings may be one to many. Therefore, create a new column for each target and apply transformations
|
|
122
|
+
transformed_df = pd.DataFrame(index=df.index)
|
|
123
|
+
for m in mappings:
|
|
124
|
+
src_col = _resolve_source_col(m.source_identifier)
|
|
125
|
+
base = df[src_col]
|
|
126
|
+
for path in m.paths:
|
|
127
|
+
target_col = str(path.target_identifier)
|
|
128
|
+
transformed_df[target_col] = _apply_transformations(base, path)
|
|
129
|
+
|
|
130
|
+
# 6) Keep only timestamp + target columns
|
|
131
|
+
df = pd.concat([df[["timestamp"]], pd.DataFrame(transformed_df)], axis=1)
|
|
132
|
+
|
|
133
|
+
logging.info(f"standardized dataframe created: {df.shape}")
|
|
134
|
+
|
|
135
|
+
return df
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
from io import StringIO
|
|
2
|
+
import logging
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from typing import Iterable, List, Union
|
|
5
|
+
from .base import Transformer
|
|
6
|
+
from ..etl_configuration import TransformerConfig, SourceTargetMapping
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class CSVTransformer(Transformer):
|
|
10
|
+
def __init__(self, transformer_config: TransformerConfig):
|
|
11
|
+
super().__init__(transformer_config)
|
|
12
|
+
|
|
13
|
+
# Pandas is zero-based while CSV is one-based so convert
|
|
14
|
+
self.header_row = (
|
|
15
|
+
None if self.cfg.header_row is None else self.cfg.header_row - 1
|
|
16
|
+
)
|
|
17
|
+
self.data_start_row = (
|
|
18
|
+
self.cfg.data_start_row - 1 if self.cfg.data_start_row else 0
|
|
19
|
+
)
|
|
20
|
+
self.delimiter = self.cfg.delimiter or ","
|
|
21
|
+
self.identifier_type = self.cfg.identifier_type or "name"
|
|
22
|
+
|
|
23
|
+
def transform(
|
|
24
|
+
self, data_file, mappings: List[SourceTargetMapping]
|
|
25
|
+
) -> Union[pd.DataFrame, None]:
|
|
26
|
+
"""
|
|
27
|
+
Transforms a CSV file-like object into a Pandas DataFrame where the column
|
|
28
|
+
names are replaced with their target datastream ids.
|
|
29
|
+
|
|
30
|
+
Parameters:
|
|
31
|
+
data_file: File-like object containing CSV data.
|
|
32
|
+
Returns:
|
|
33
|
+
observations_map (dict): Dict mapping datastream IDs to pandas DataFrames.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
clean_file = self._strip_comments(data_file)
|
|
37
|
+
use_index = self.identifier_type == "index"
|
|
38
|
+
|
|
39
|
+
if use_index:
|
|
40
|
+
# Users will always interact in 1-based, so if the key is a column index, convert to 0-based to work with Pandas
|
|
41
|
+
timestamp_pos = int(self.timestamp.key) - 1
|
|
42
|
+
usecols = [timestamp_pos] + [int(m.source_identifier) - 1 for m in mappings]
|
|
43
|
+
else:
|
|
44
|
+
usecols = [self.timestamp.key] + [m.source_identifier for m in mappings]
|
|
45
|
+
|
|
46
|
+
try:
|
|
47
|
+
# Pandas’ heuristics strip offsets and silently coerce failures to strings.
|
|
48
|
+
# Reading as pure text guarantees we always start with exactly what was in the file.
|
|
49
|
+
# Timestamps will be parsed at df standardization time.
|
|
50
|
+
df = pd.read_csv(
|
|
51
|
+
clean_file,
|
|
52
|
+
sep=self.delimiter,
|
|
53
|
+
header=0,
|
|
54
|
+
skiprows=self._build_skiprows(),
|
|
55
|
+
usecols=usecols,
|
|
56
|
+
dtype={self.timestamp.key: "string"},
|
|
57
|
+
)
|
|
58
|
+
logging.info(f"CSV file read into dataframe: {df.shape}")
|
|
59
|
+
except Exception as e:
|
|
60
|
+
logging.error(f"Error reading CSV data: {e}")
|
|
61
|
+
return None
|
|
62
|
+
|
|
63
|
+
# In index mode, relabel columns back to original 1-based indices so base transformer can use integer labels directly
|
|
64
|
+
if use_index:
|
|
65
|
+
df.columns = [(c + 1) if isinstance(c, int) else c for c in usecols]
|
|
66
|
+
|
|
67
|
+
return self.standardize_dataframe(df, mappings)
|
|
68
|
+
|
|
69
|
+
def _strip_comments(self, stream: Iterable[Union[str, bytes]]) -> StringIO:
|
|
70
|
+
"""
|
|
71
|
+
Remove lines whose first non-blank char is '#'.
|
|
72
|
+
Works for both text and binary iterables.
|
|
73
|
+
"""
|
|
74
|
+
clean: list[str] = []
|
|
75
|
+
|
|
76
|
+
for raw in stream:
|
|
77
|
+
# normalize to bytes
|
|
78
|
+
b = raw if isinstance(raw, bytes) else raw.encode("utf-8", "ignore")
|
|
79
|
+
if b.lstrip().startswith(b"#"):
|
|
80
|
+
continue
|
|
81
|
+
clean.append(
|
|
82
|
+
raw.decode("utf-8", "ignore") if isinstance(raw, bytes) else raw
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
return StringIO("".join(clean))
|
|
86
|
+
|
|
87
|
+
def _build_skiprows(self):
|
|
88
|
+
return lambda idx: idx != self.header_row and idx < self.data_start_row
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from typing import Optional, Any, List
|
|
4
|
+
from .base import Transformer
|
|
5
|
+
import json
|
|
6
|
+
import jmespath
|
|
7
|
+
from ..etl_configuration import TransformerConfig, SourceTargetMapping
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class JSONTransformer(Transformer):
|
|
11
|
+
def __init__(self, transformer_config: TransformerConfig):
|
|
12
|
+
super().__init__(transformer_config)
|
|
13
|
+
self.jmespath = transformer_config.jmespath
|
|
14
|
+
|
|
15
|
+
def transform(self, data_file, mappings: List[SourceTargetMapping]):
|
|
16
|
+
"""
|
|
17
|
+
Transforms a JSON file-like object into the standard Pandas dataframe format.
|
|
18
|
+
Since JMESPath can natively rename column names, the assumption is the timestamp column
|
|
19
|
+
is always named 'timestamp' for JSON data or converted to 'timestamp' in the JMESPath query.
|
|
20
|
+
|
|
21
|
+
Parameters:
|
|
22
|
+
data_file: File-like object containing JSON data.
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
pd.DataFrame: pandas DataFrames in the format pd.Timestamp, datastream_id_1, datastream_id_2, ...
|
|
26
|
+
"""
|
|
27
|
+
if data_file is None:
|
|
28
|
+
raise TypeError(
|
|
29
|
+
"JSONTransformer received None; expected file-like, bytes, or str"
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
json_data = json.load(data_file)
|
|
33
|
+
data_points = self.extract_data_points(json_data)
|
|
34
|
+
if not data_points:
|
|
35
|
+
logging.warning("No data points found in the JSON data.")
|
|
36
|
+
return None
|
|
37
|
+
|
|
38
|
+
df = pd.DataFrame(data_points)
|
|
39
|
+
|
|
40
|
+
return self.standardize_dataframe(df, mappings)
|
|
41
|
+
|
|
42
|
+
def extract_data_points(self, json_data: Any) -> Optional[List[dict]]:
|
|
43
|
+
"""Extracts data points from the JSON data using the data_path."""
|
|
44
|
+
data_points = jmespath.search(self.jmespath, json_data)
|
|
45
|
+
|
|
46
|
+
if isinstance(data_points, dict):
|
|
47
|
+
data_points = [data_points]
|
|
48
|
+
return data_points
|
|
File without changes
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
from pydantic import BaseModel, Field, EmailStr
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class Account(BaseModel):
|
|
6
|
+
name: str = Field(..., max_length=255)
|
|
7
|
+
email: EmailStr
|
|
8
|
+
organization_name: Optional[str] = None
|
|
9
|
+
phone: Optional[str] = Field(None, max_length=15)
|
|
10
|
+
address: Optional[str] = Field(None, max_length=255)
|
|
11
|
+
link: Optional[str] = Field(None, max_length=2000)
|
|
12
|
+
user_type: str = Field(..., max_length=255, alias="type")
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
from typing import Optional, Union, ClassVar, TYPE_CHECKING
|
|
2
|
+
from uuid import UUID
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from hydroserverpy.api.models.iam.role import Role
|
|
5
|
+
from hydroserverpy.api.utils import normalize_uuid
|
|
6
|
+
from ..base import HydroServerBaseModel
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from hydroserverpy import HydroServer
|
|
10
|
+
from hydroserverpy.api.models import Workspace
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class APIKey(HydroServerBaseModel):
|
|
14
|
+
name: str
|
|
15
|
+
role_id: Union[UUID, str]
|
|
16
|
+
workspace_id: Union[UUID, str]
|
|
17
|
+
description: Optional[str] = None
|
|
18
|
+
is_active: bool
|
|
19
|
+
expires_at: Optional[datetime] = None
|
|
20
|
+
|
|
21
|
+
_editable_fields: ClassVar[set[str]] = {"name", "description", "role_id", "is_active", "expires_at"}
|
|
22
|
+
|
|
23
|
+
def __init__(self, client: "HydroServer", **data):
|
|
24
|
+
super().__init__(client=client, service=None, **data)
|
|
25
|
+
|
|
26
|
+
self._workspace = None
|
|
27
|
+
self._role = None
|
|
28
|
+
|
|
29
|
+
@property
|
|
30
|
+
def workspace(self) -> "Workspace":
|
|
31
|
+
"""The workspace this API key belongs to."""
|
|
32
|
+
|
|
33
|
+
if self._workspace is None:
|
|
34
|
+
self._workspace = self.client.workspaces.get(uid=self.workspace_id)
|
|
35
|
+
|
|
36
|
+
return self._workspace
|
|
37
|
+
|
|
38
|
+
@property
|
|
39
|
+
def role(self) -> "Role":
|
|
40
|
+
"""The role this API key is assigned."""
|
|
41
|
+
|
|
42
|
+
if self._role is None:
|
|
43
|
+
self._role = self.client.roles.get(uid=self.role_id)
|
|
44
|
+
|
|
45
|
+
return self._role
|
|
46
|
+
|
|
47
|
+
@role.setter
|
|
48
|
+
def role(self, role: Union["Role", UUID, str] = ...):
|
|
49
|
+
if not role:
|
|
50
|
+
raise ValueError("Role of API key cannot be None.")
|
|
51
|
+
if normalize_uuid(role) != str(self.role_id):
|
|
52
|
+
self.role_id = normalize_uuid(role)
|
|
53
|
+
self._role = None
|
|
54
|
+
|
|
55
|
+
def save(self):
|
|
56
|
+
"""Saves changes to this resource to HydroServer."""
|
|
57
|
+
|
|
58
|
+
if not self.uid:
|
|
59
|
+
raise AttributeError("Data cannot be saved: UID is not set.")
|
|
60
|
+
|
|
61
|
+
if self.unsaved_changes:
|
|
62
|
+
saved_resource = self.client.workspaces.update_api_key(
|
|
63
|
+
uid=self.workspace_id, api_key_id=self.uid, **self.unsaved_changes
|
|
64
|
+
)
|
|
65
|
+
self._server_data = saved_resource.dict(by_alias=False).copy()
|
|
66
|
+
self.__dict__.update(saved_resource.__dict__)
|
|
67
|
+
|
|
68
|
+
def refresh(self):
|
|
69
|
+
"""Refreshes this resource from HydroServer."""
|
|
70
|
+
|
|
71
|
+
if self.uid is None:
|
|
72
|
+
raise ValueError("Cannot refresh data without a valid ID.")
|
|
73
|
+
|
|
74
|
+
refreshed_resource = self.client.workspaces.get_api_key(uid=self.workspace_id, api_key_id=self.uid)
|
|
75
|
+
self._server_data = refreshed_resource.dict(by_alias=False).copy()
|
|
76
|
+
self.__dict__.update(refreshed_resource.__dict__)
|
|
77
|
+
|
|
78
|
+
def delete(self):
|
|
79
|
+
"""Deletes this resource from HydroServer."""
|
|
80
|
+
|
|
81
|
+
if self.uid is None:
|
|
82
|
+
raise AttributeError("Cannot delete data without a valid ID.")
|
|
83
|
+
|
|
84
|
+
self.client.workspaces.delete_api_key(
|
|
85
|
+
uid=self.workspace_id, api_key_id=self.uid
|
|
86
|
+
)
|
|
87
|
+
self.uid = None
|
|
88
|
+
|
|
89
|
+
def regenerate(self):
|
|
90
|
+
"""Regenerates this API key. WARNING: Previous key will be invalidated."""
|
|
91
|
+
|
|
92
|
+
_, key = self.client.workspaces.regenerate_api_key(
|
|
93
|
+
uid=self.workspace_id, api_key_id=self.uid
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
return key
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
from typing import Union, ClassVar, TYPE_CHECKING
|
|
2
|
+
from uuid import UUID
|
|
3
|
+
from pydantic import Field, AliasPath
|
|
4
|
+
from hydroserverpy.api.models.iam.role import Role
|
|
5
|
+
from hydroserverpy.api.utils import normalize_uuid
|
|
6
|
+
from ..base import HydroServerBaseModel
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from hydroserverpy import HydroServer
|
|
10
|
+
from hydroserverpy.api.models.iam.workspace import Workspace
|
|
11
|
+
from hydroserverpy.api.models.iam.account import Account
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Collaborator(HydroServerBaseModel):
|
|
15
|
+
user: "Account"
|
|
16
|
+
role_id: Union[UUID, str] = Field(..., validation_alias=AliasPath("role", "id"))
|
|
17
|
+
workspace_id: Union[UUID, str]
|
|
18
|
+
|
|
19
|
+
_editable_fields: ClassVar[set[str]] = {"role_id"}
|
|
20
|
+
|
|
21
|
+
def __init__(self, client: "HydroServer", **data):
|
|
22
|
+
super().__init__(client=client, service=None, **data)
|
|
23
|
+
|
|
24
|
+
self._workspace = None
|
|
25
|
+
self._role = Role(client=client, **data.get("role"))
|
|
26
|
+
|
|
27
|
+
@property
|
|
28
|
+
def workspace(self) -> "Workspace":
|
|
29
|
+
"""The workspace this collaborator belongs to."""
|
|
30
|
+
|
|
31
|
+
if self._workspace is None:
|
|
32
|
+
self._workspace = self.client.workspaces.get(uid=self.workspace_id)
|
|
33
|
+
|
|
34
|
+
return self._workspace
|
|
35
|
+
|
|
36
|
+
@property
|
|
37
|
+
def role(self) -> "Role":
|
|
38
|
+
"""The role this collaborator is assigned."""
|
|
39
|
+
|
|
40
|
+
if self._role is None:
|
|
41
|
+
self._role = self.client.roles.get(uid=self.role_id)
|
|
42
|
+
|
|
43
|
+
return self._role
|
|
44
|
+
|
|
45
|
+
@role.setter
|
|
46
|
+
def role(self, role: Union["Role", UUID, str] = ...):
|
|
47
|
+
if not role:
|
|
48
|
+
raise ValueError("Role of collaborator cannot be None.")
|
|
49
|
+
if normalize_uuid(role) != str(self.role_id):
|
|
50
|
+
self.role_id = normalize_uuid(role)
|
|
51
|
+
self._role = None
|
|
52
|
+
|
|
53
|
+
def save(self):
|
|
54
|
+
"""Saves changes to this resource to HydroServer."""
|
|
55
|
+
|
|
56
|
+
if self.unsaved_changes:
|
|
57
|
+
self.client.workspaces.edit_collaborator_role(
|
|
58
|
+
uid=str(self.workspace_id), email=self.user.email, role=self.role
|
|
59
|
+
)
|
|
60
|
+
self._role = None
|
|
61
|
+
self._server_data["role_id"] = self.role_id
|
|
62
|
+
self.__dict__.update({"role_id": self.role_id})
|
|
63
|
+
|
|
64
|
+
def delete(self):
|
|
65
|
+
"""Deletes this resource from HydroServer."""
|
|
66
|
+
|
|
67
|
+
self.client.workspaces.remove_collaborator(
|
|
68
|
+
uid=str(self.workspace_id), email=self.user.email
|
|
69
|
+
)
|
|
70
|
+
self.uid = None
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from typing import Optional, Union, TYPE_CHECKING
|
|
2
|
+
from uuid import UUID
|
|
3
|
+
from pydantic import Field
|
|
4
|
+
from ..base import HydroServerBaseModel
|
|
5
|
+
|
|
6
|
+
if TYPE_CHECKING:
|
|
7
|
+
from hydroserverpy import HydroServer
|
|
8
|
+
from hydroserverpy.api.models import Workspace
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Role(HydroServerBaseModel):
|
|
12
|
+
name: str = Field(..., max_length=255)
|
|
13
|
+
description: str
|
|
14
|
+
workspace_id: Optional[Union[UUID, str]] = None
|
|
15
|
+
|
|
16
|
+
def __init__(self, client: "HydroServer", **data):
|
|
17
|
+
super().__init__(client=client, service=client.workspaces, **data)
|
|
18
|
+
|
|
19
|
+
self._workspace = None
|
|
20
|
+
|
|
21
|
+
@classmethod
|
|
22
|
+
def get_route(cls):
|
|
23
|
+
return "roles"
|
|
24
|
+
|
|
25
|
+
@property
|
|
26
|
+
def workspace(self) -> "Workspace":
|
|
27
|
+
"""The workspace this role belongs to."""
|
|
28
|
+
|
|
29
|
+
if self._workspace is None and self.workspace_id is not None:
|
|
30
|
+
self._workspace = self.client.workspaces.get(uid=self.workspace_id)
|
|
31
|
+
|
|
32
|
+
return self._workspace
|
|
33
|
+
|
|
34
|
+
def save(self):
|
|
35
|
+
raise NotImplementedError("Editing roles not enabled.")
|
|
36
|
+
|
|
37
|
+
def delete(self):
|
|
38
|
+
raise NotImplementedError("Deleting roles not enabled.")
|