hydroserverpy 1.3.1__py3-none-any.whl → 1.4.0b4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hydroserverpy might be problematic. Click here for more details.

Files changed (39) hide show
  1. hydroserverpy/__init__.py +0 -2
  2. hydroserverpy/api/models/etl/__init__.py +26 -0
  3. hydroserverpy/api/models/etl/data_source.py +107 -72
  4. hydroserverpy/api/models/etl/etl_configuration.py +224 -0
  5. hydroserverpy/api/models/etl/extractors/__init__.py +6 -0
  6. hydroserverpy/{etl → api/models/etl}/extractors/base.py +16 -19
  7. hydroserverpy/{etl → api/models/etl}/extractors/http_extractor.py +7 -8
  8. hydroserverpy/api/models/etl/extractors/local_file_extractor.py +20 -0
  9. hydroserverpy/api/models/etl/factories.py +23 -0
  10. hydroserverpy/api/models/etl/loaders/__init__.py +4 -0
  11. hydroserverpy/{etl → api/models/etl}/loaders/base.py +0 -2
  12. hydroserverpy/api/models/etl/loaders/hydroserver_loader.py +100 -0
  13. hydroserverpy/api/models/etl/schedule.py +16 -0
  14. hydroserverpy/api/models/etl/status.py +14 -0
  15. hydroserverpy/{etl → api/models/etl}/timestamp_parser.py +4 -1
  16. hydroserverpy/api/models/etl/transformers/__init__.py +5 -0
  17. hydroserverpy/api/models/etl/transformers/base.py +137 -0
  18. hydroserverpy/{etl → api/models/etl}/transformers/csv_transformer.py +24 -13
  19. hydroserverpy/{etl → api/models/etl}/transformers/json_transformer.py +21 -6
  20. hydroserverpy/api/services/etl/data_source.py +1 -4
  21. {hydroserverpy-1.3.1.dist-info → hydroserverpy-1.4.0b4.dist-info}/METADATA +1 -1
  22. {hydroserverpy-1.3.1.dist-info → hydroserverpy-1.4.0b4.dist-info}/RECORD +28 -29
  23. hydroserverpy/etl/__init__.py +0 -21
  24. hydroserverpy/etl/extractors/__init__.py +0 -0
  25. hydroserverpy/etl/extractors/local_file_extractor.py +0 -19
  26. hydroserverpy/etl/hydroserver_etl.py +0 -40
  27. hydroserverpy/etl/loaders/__init__.py +0 -0
  28. hydroserverpy/etl/loaders/hydroserver_loader.py +0 -71
  29. hydroserverpy/etl/transformers/__init__.py +0 -0
  30. hydroserverpy/etl/transformers/base.py +0 -64
  31. hydroserverpy/etl_csv/__init__.py +0 -0
  32. hydroserverpy/etl_csv/exceptions.py +0 -14
  33. hydroserverpy/etl_csv/hydroserver_etl_csv.py +0 -342
  34. /hydroserverpy/{etl → api/models/etl}/extractors/ftp_extractor.py +0 -0
  35. /hydroserverpy/{etl → api/models/etl}/types.py +0 -0
  36. {hydroserverpy-1.3.1.dist-info → hydroserverpy-1.4.0b4.dist-info}/WHEEL +0 -0
  37. {hydroserverpy-1.3.1.dist-info → hydroserverpy-1.4.0b4.dist-info}/licenses/LICENSE +0 -0
  38. {hydroserverpy-1.3.1.dist-info → hydroserverpy-1.4.0b4.dist-info}/top_level.txt +0 -0
  39. {hydroserverpy-1.3.1.dist-info → hydroserverpy-1.4.0b4.dist-info}/zip-safe +0 -0
@@ -0,0 +1,100 @@
1
+ from __future__ import annotations
2
+ from typing import TYPE_CHECKING
3
+
4
+ from .base import Loader
5
+ import logging
6
+ import pandas as pd
7
+ from ..etl_configuration import Payload, SourceTargetMapping
8
+
9
+ if TYPE_CHECKING:
10
+ from hydroserverpy.api.client import HydroServer
11
+
12
+
13
+ class HydroServerLoader(Loader):
14
+ """
15
+ A class that extends the HydroServer client with ETL-specific functionalities.
16
+ """
17
+
18
+ def __init__(self, client: HydroServer, data_source_id):
19
+ self.client = client
20
+ self._begin_cache: dict[str, pd.Timestamp] = {}
21
+ self.data_source_id = data_source_id
22
+
23
+ def load(self, data: pd.DataFrame, payload: Payload) -> None:
24
+ """
25
+ Load observations from a DataFrame to the HydroServer.
26
+ :param data: A Pandas DataFrame where each column corresponds to a datastream.
27
+ """
28
+ begin_date = self.earliest_begin_date(payload)
29
+ new_data = data[data["timestamp"] > begin_date]
30
+ for col in new_data.columns.difference(["timestamp"]):
31
+ df = (
32
+ new_data[["timestamp", col]]
33
+ .rename(columns={col: "value"})
34
+ .dropna(subset=["value"])
35
+ )
36
+ if df.empty:
37
+ logging.warning(f"No new data for {col}, skipping.")
38
+ continue
39
+ logging.info(f"loading dataframe {df}")
40
+ logging.info(f"dtypes: {df.dtypes}")
41
+
42
+ df = df.rename(columns={"timestamp": "phenomenon_time", "value": "result"})
43
+
44
+ # Chunked upload
45
+ CHUNK_SIZE = 5000
46
+ total = len(df)
47
+ for start in range(0, total, CHUNK_SIZE):
48
+ end = min(start + CHUNK_SIZE, total)
49
+ chunk = df.iloc[start:end]
50
+ logging.info(
51
+ "Uploading %s rows (%s-%s) to datastream %s",
52
+ len(chunk),
53
+ start,
54
+ end - 1,
55
+ col,
56
+ )
57
+ try:
58
+ self.client.datastreams.load_observations(
59
+ uid=str(col), observations=chunk
60
+ )
61
+ except Exception as e:
62
+ status = getattr(e, "status_code", None) or getattr(
63
+ getattr(e, "response", None), "status_code", None
64
+ )
65
+ if status == 409 or "409" in str(e) or "Conflict" in str(e):
66
+ logging.info(
67
+ "409 Conflict for datastream %s on rows %s-%s; skipping remainder for this stream.",
68
+ col,
69
+ start,
70
+ end - 1,
71
+ )
72
+ break
73
+ raise
74
+
75
+ def _fetch_earliest_begin(
76
+ self, mappings: list[SourceTargetMapping]
77
+ ) -> pd.Timestamp:
78
+ logging.info("Querying HydroServer for earliest begin date for payload...")
79
+ timestamps = []
80
+ datastreams = self.client.datastreams.list(
81
+ data_source=self.data_source_id
82
+ ).items
83
+ ds_by_uid = {str(ds.uid): ds for ds in datastreams}
84
+ for m in mappings:
85
+ for p in m.paths:
86
+ datastream = ds_by_uid[str(p.target_identifier)]
87
+ raw = datastream.phenomenon_end_time or "1970-01-01"
88
+ ts = pd.to_datetime(raw, utc=True)
89
+ timestamps.append(ts)
90
+ logging.info(f"Found earliest begin date: {min(timestamps)}")
91
+ return min(timestamps)
92
+
93
+ def earliest_begin_date(self, payload: Payload) -> pd.Timestamp:
94
+ """
95
+ Return earliest begin date for a payload, or compute+cache it on first call.
96
+ """
97
+ key = payload.name
98
+ if key not in self._begin_cache:
99
+ self._begin_cache[key] = self._fetch_earliest_begin(payload.mappings)
100
+ return self._begin_cache[key]
@@ -0,0 +1,16 @@
1
+ from datetime import datetime
2
+ from typing import Literal, Optional
3
+ from pydantic import BaseModel, Field
4
+
5
+
6
+ class Schedule(BaseModel):
7
+ interval: int = Field(..., gt=0)
8
+ interval_units: Optional[Literal["minutes", "hours", "days"]] = Field(
9
+ None, alias="intervalUnits"
10
+ )
11
+ crontab: Optional[str]
12
+ start_time: Optional[datetime] = Field(None, alias="startTime")
13
+ end_time: Optional[datetime] = Field(None, alias="endTime")
14
+
15
+ class Config:
16
+ allow_population_by_field_name = True
@@ -0,0 +1,14 @@
1
+ from datetime import datetime
2
+ from typing import Optional
3
+ from pydantic import BaseModel, Field
4
+
5
+
6
+ class Status(BaseModel):
7
+ paused: bool = Field(False)
8
+ last_run_successful: Optional[bool] = Field(None, alias="lastRunSuccessful")
9
+ last_run_message: Optional[str] = Field(None, alias="lastRunMessage")
10
+ last_run: Optional[datetime] = Field(None, alias="lastRun")
11
+ next_run: Optional[datetime] = Field(None, alias="nextRun")
12
+
13
+ class Config:
14
+ allow_population_by_field_name = True
@@ -73,7 +73,10 @@ class TimestampParser:
73
73
  return localized.dt.tz_convert(timezone.utc)
74
74
 
75
75
  def parse_series(self, raw_series: pd.Series) -> pd.Series:
76
- s = raw_series.str.strip()
76
+ if pd.api.types.is_datetime64_any_dtype(raw_series):
77
+ s = raw_series # already datetimes
78
+ else:
79
+ s = raw_series.astype("string", copy=False).str.strip()
77
80
  parsed = self._convert_series_to_UTC(s)
78
81
 
79
82
  if parsed.isna().any():
@@ -0,0 +1,5 @@
1
+ from .base import Transformer
2
+ from .json_transformer import JSONTransformer
3
+ from .csv_transformer import CSVTransformer
4
+
5
+ __all__ = ["Transformer", "JSONTransformer", "CSVTransformer"]
@@ -0,0 +1,137 @@
1
+ from abc import ABC, abstractmethod
2
+ import ast
3
+ from functools import lru_cache
4
+ import logging
5
+ import re
6
+ from typing import List, Union
7
+ import pandas as pd
8
+
9
+ from ..timestamp_parser import TimestampParser
10
+ from ..etl_configuration import MappingPath, TransformerConfig, SourceTargetMapping
11
+
12
+ ALLOWED_AST = (
13
+ ast.Expression,
14
+ ast.BinOp,
15
+ ast.UnaryOp,
16
+ ast.Add,
17
+ ast.Sub,
18
+ ast.Mult,
19
+ ast.Div,
20
+ ast.UAdd,
21
+ ast.USub,
22
+ ast.Name,
23
+ ast.Load,
24
+ ast.Constant,
25
+ )
26
+
27
+
28
+ def _canonicalize_expr(expr: str) -> str:
29
+ # normalize whitespace for cache hits; parentheses remain intact
30
+ return re.sub(r"\s+", "", expr)
31
+
32
+
33
+ @lru_cache(maxsize=256)
34
+ def _compile_arithmetic_expr_canon(expr_no_ws: str):
35
+ tree = ast.parse(expr_no_ws, mode="eval")
36
+ for node in ast.walk(tree):
37
+ if not isinstance(node, ALLOWED_AST):
38
+ raise ValueError(
39
+ "Only +, -, *, / with 'x' and numeric literals are allowed."
40
+ )
41
+ if isinstance(node, ast.Name) and node.id != "x":
42
+ raise ValueError("Only the variable 'x' is allowed.")
43
+ if isinstance(node, ast.Constant):
44
+ val = node.value
45
+ if isinstance(val, bool) or not isinstance(val, (int, float)):
46
+ raise ValueError("Only numeric literals are allowed.")
47
+ return compile(tree, "<expr>", "eval")
48
+
49
+
50
+ def _compile_arithmetic_expr(expr: str):
51
+ return _compile_arithmetic_expr_canon(_canonicalize_expr(expr))
52
+
53
+
54
+ class Transformer(ABC):
55
+ def __init__(self, transformer_config: TransformerConfig):
56
+ self.cfg = transformer_config
57
+ self.timestamp = transformer_config.timestamp
58
+ self.timestamp_parser = TimestampParser(self.timestamp)
59
+
60
+ @abstractmethod
61
+ def transform(self, *args, **kwargs) -> None:
62
+ pass
63
+
64
+ @property
65
+ def needs_datastreams(self) -> bool:
66
+ return False
67
+
68
+ def standardize_dataframe(
69
+ self, df: pd.DataFrame, mappings: List[SourceTargetMapping]
70
+ ):
71
+ logging.info(f"Successfully read payload into dataframe:\n {df}")
72
+
73
+ # 1) Normalize timestamp column
74
+ df.rename(columns={self.timestamp.key: "timestamp"}, inplace=True)
75
+ if "timestamp" not in df.columns:
76
+ msg = f"Timestamp column '{self.timestamp.key}' not found in data."
77
+ logging.error(msg)
78
+ raise ValueError(msg)
79
+ logging.info(f"Renamed timestamp column to 'timestamp'")
80
+
81
+ df["timestamp"] = self.timestamp_parser.parse_series(df["timestamp"])
82
+ logging.info(f"Normalized timestamp column \n {df}")
83
+
84
+ df = df.drop_duplicates(subset=["timestamp"], keep="last")
85
+ logging.info(f"Removed duplicates\n")
86
+
87
+ def _resolve_source_col(s_id: Union[str, int]) -> str:
88
+ if isinstance(s_id, int) and s_id not in df.columns:
89
+ try:
90
+ return df.columns[s_id]
91
+ except IndexError:
92
+ raise ValueError(
93
+ f"Source index {s_id} is out of range for extracted data."
94
+ )
95
+ if s_id not in df.columns:
96
+ raise ValueError(f"Source column '{s_id}' not found in extracted data.")
97
+ return s_id
98
+
99
+ def _apply_transformations(series: pd.Series, path: MappingPath) -> pd.Series:
100
+ out = series # accumulator for sequential transforms
101
+ if out.dtype == "object":
102
+ out = pd.to_numeric(out, errors="coerce")
103
+
104
+ for transformation in path.data_transformations:
105
+ if transformation.type == "expression":
106
+ code = _compile_arithmetic_expr(transformation.expression)
107
+ try:
108
+ out = eval(code, {"__builtins__": {}}, {"x": out})
109
+ except Exception as ee:
110
+ logging.exception(
111
+ "Data transformation failed for expression=%r",
112
+ transformation.expression,
113
+ )
114
+ raise
115
+ else:
116
+ msg = f"Unsupported transformation type: {transformation.type}"
117
+ logging.error(msg)
118
+ raise ValueError(msg)
119
+ return out
120
+
121
+ # source target mappings may be one to many. Therefore, create a new column for each target and apply transformations
122
+ transformed_df = pd.DataFrame(index=df.index)
123
+ for m in mappings:
124
+ src_col = _resolve_source_col(m.source_identifier)
125
+ base = df[src_col]
126
+ for path in m.paths:
127
+ target_col = str(path.target_identifier)
128
+ transformed_df[target_col] = _apply_transformations(base, path)
129
+
130
+ logging.info(f"Mapped payload sources to targets")
131
+
132
+ # 6) Keep only timestamp + target columns
133
+ df = pd.concat([df[["timestamp"]], pd.DataFrame(transformed_df)], axis=1)
134
+
135
+ logging.info(f"standardized dataframe created: {df.shape}")
136
+
137
+ return df
@@ -1,25 +1,28 @@
1
1
  from io import StringIO
2
2
  import logging
3
3
  import pandas as pd
4
- from typing import Iterable, Union
4
+ from typing import Iterable, List, Union
5
5
  from .base import Transformer
6
+ from ..etl_configuration import TransformerConfig, SourceTargetMapping
6
7
 
7
8
 
8
9
  class CSVTransformer(Transformer):
9
- def __init__(self, settings: object):
10
- super().__init__(settings)
10
+ def __init__(self, transformer_config: TransformerConfig):
11
+ super().__init__(transformer_config)
11
12
 
12
13
  # Pandas is zero-based while CSV is one-based so convert
13
14
  self.header_row = (
14
- None if settings.get("headerRow") is None else settings["headerRow"] - 1
15
+ None if self.cfg.header_row is None else self.cfg.header_row - 1
15
16
  )
16
17
  self.data_start_row = (
17
- settings["dataStartRow"] - 1 if "dataStartRow" in settings else 0
18
+ self.cfg.data_start_row - 1 if self.cfg.data_start_row else 0
18
19
  )
19
- self.delimiter = settings.get("delimiter", ",")
20
- self.identifier_type = settings.get("identifierType", "name")
20
+ self.delimiter = self.cfg.delimiter or ","
21
+ self.identifier_type = self.cfg.identifier_type or "name"
21
22
 
22
- def transform(self, data_file, mappings) -> Union[pd.DataFrame, None]:
23
+ def transform(
24
+ self, data_file, mappings: List[SourceTargetMapping]
25
+ ) -> Union[pd.DataFrame, None]:
23
26
  """
24
27
  Transforms a CSV file-like object into a Pandas DataFrame where the column
25
28
  names are replaced with their target datastream ids.
@@ -31,7 +34,14 @@ class CSVTransformer(Transformer):
31
34
  """
32
35
 
33
36
  clean_file = self._strip_comments(data_file)
34
- source_identifiers = [mapping["sourceIdentifier"] for mapping in mappings]
37
+ use_index = self.identifier_type == "index"
38
+
39
+ if use_index:
40
+ # Users will always interact in 1-based, so if the key is a column index, convert to 0-based to work with Pandas
41
+ timestamp_pos = int(self.timestamp.key) - 1
42
+ usecols = [timestamp_pos] + [int(m.source_identifier) - 1 for m in mappings]
43
+ else:
44
+ usecols = [self.timestamp.key] + [m.source_identifier for m in mappings]
35
45
 
36
46
  try:
37
47
  # Pandas’ heuristics strip offsets and silently coerce failures to strings.
@@ -42,16 +52,17 @@ class CSVTransformer(Transformer):
42
52
  sep=self.delimiter,
43
53
  header=self.header_row,
44
54
  skiprows=self._build_skiprows(),
45
- usecols=[self.timestamp_key] + source_identifiers,
46
- dtype={self.timestamp_key: "string"},
55
+ usecols=usecols,
56
+ dtype={self.timestamp.key: "string"},
47
57
  )
48
58
  logging.info(f"CSV file read into dataframe: {df.shape}")
49
59
  except Exception as e:
50
60
  logging.error(f"Error reading CSV data: {e}")
51
61
  return None
52
62
 
53
- if self.header_row is None:
54
- df.columns = list(range(1, len(df.columns) + 1))
63
+ # In index mode, relabel columns back to original 1-based indices so base transformer can use integer labels directly
64
+ if use_index:
65
+ df.columns = [(c + 1) if isinstance(c, int) else c for c in usecols]
55
66
 
56
67
  return self.standardize_dataframe(df, mappings)
57
68
 
@@ -1,17 +1,18 @@
1
1
  import logging
2
2
  import pandas as pd
3
- from typing import Dict, Optional, Any, List
3
+ from typing import Optional, Any, List
4
4
  from .base import Transformer
5
5
  import json
6
6
  import jmespath
7
+ from ..etl_configuration import TransformerConfig, SourceTargetMapping
7
8
 
8
9
 
9
10
  class JSONTransformer(Transformer):
10
- def __init__(self, settings: object):
11
- super().__init__(settings)
12
- self.JMESPath = settings["JMESPath"]
11
+ def __init__(self, transformer_config: TransformerConfig):
12
+ super().__init__(transformer_config)
13
+ self.jmespath = transformer_config.jmespath
13
14
 
14
- def transform(self, data_file, mappings):
15
+ def transform(self, data_file, mappings: List[SourceTargetMapping]):
15
16
  """
16
17
  Transforms a JSON file-like object into the standard Pandas dataframe format.
17
18
  Since JMESPath can natively rename column names, the assumption is the timestamp column
@@ -23,7 +24,21 @@ class JSONTransformer(Transformer):
23
24
  Returns:
24
25
  pd.DataFrame: pandas DataFrames in the format pd.Timestamp, datastream_id_1, datastream_id_2, ...
25
26
  """
27
+ if data_file is None:
28
+ raise TypeError(
29
+ "JSONTransformer received None; expected file-like, bytes, or str"
30
+ )
31
+
26
32
  json_data = json.load(data_file)
33
+ logging.info(f"Read in json data: \n{data_file}")
34
+ logging.info(
35
+ "JSONTransformer cfg:\n jmespath=%r\n ts.key=%r\n ts.format=%r\n ts.custom=%r",
36
+ self.cfg.jmespath,
37
+ self.timestamp.key,
38
+ self.timestamp.format,
39
+ self.timestamp.custom_format,
40
+ )
41
+
27
42
  data_points = self.extract_data_points(json_data)
28
43
  if not data_points:
29
44
  logging.warning("No data points found in the JSON data.")
@@ -35,7 +50,7 @@ class JSONTransformer(Transformer):
35
50
 
36
51
  def extract_data_points(self, json_data: Any) -> Optional[List[dict]]:
37
52
  """Extracts data points from the JSON data using the data_path."""
38
- data_points = jmespath.search(self.JMESPath, json_data)
53
+ data_points = jmespath.search(self.jmespath, json_data)
39
54
 
40
55
  if isinstance(data_points, dict):
41
56
  data_points = [data_points]
@@ -77,10 +77,7 @@ class DataSourceService(HydroServerBaseService):
77
77
  "paused": paused,
78
78
  },
79
79
  "datastreamIds": (
80
- [
81
- normalize_uuid(datastream)
82
- for datastream in datastreams
83
- ]
80
+ [normalize_uuid(datastream) for datastream in datastreams]
84
81
  if datastreams
85
82
  else []
86
83
  ),
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hydroserverpy
3
- Version: 1.3.1
3
+ Version: 1.4.0b4
4
4
  Requires-Python: <4,>=3.9
5
5
  License-File: LICENSE
6
6
  Requires-Dist: requests>=2
@@ -1,14 +1,32 @@
1
- hydroserverpy/__init__.py,sha256=gn3x_C6Pe1Dn90uXn7yIwEhaQm5DE76MhamdMOqF2yM,220
1
+ hydroserverpy/__init__.py,sha256=xnuWIehUYshy05GptyIvHiD52FOjoWOloXAfT1LgP3U,150
2
2
  hydroserverpy/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  hydroserverpy/api/client.py,sha256=jduKZV2cOkPVRjIjAiVYnTncMfEtW6IaCb895Y_PfiI,5697
4
4
  hydroserverpy/api/utils.py,sha256=1RUglpvegBZOcu9BEExxsAzaGOyu4tdUk2JyiBEbzxI,496
5
5
  hydroserverpy/api/models/__init__.py,sha256=NLq95t1oC2co5aqVYSw9Pq0RAsLHnLjNq1tsgbMepTg,773
6
6
  hydroserverpy/api/models/base.py,sha256=mQZbanDg9t2GN9mOR_XOOtAfYF7AkY0fBZ6fHat6NRs,6944
7
- hydroserverpy/api/models/etl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ hydroserverpy/api/models/etl/__init__.py,sha256=_D8_Nbs06-47wwsHOAF9tOohQYQ52gOhz_NUyfloMUw,699
8
8
  hydroserverpy/api/models/etl/data_archive.py,sha256=rnmD_FQ1yjJ0KPBigylAQ3uQ6QBppJtBopJK4oCPLSo,2613
9
- hydroserverpy/api/models/etl/data_source.py,sha256=4s5JfpF00Heir9T1oc_KAUdI3z5Jj8ce8R56KqQqm5A,3959
9
+ hydroserverpy/api/models/etl/data_source.py,sha256=YWSdudFONe1eniGBkruLRAP8BDyt0tGmZD8hzzByjKU,5123
10
+ hydroserverpy/api/models/etl/etl_configuration.py,sha256=anD_0zlldJKogie15j9SIabJvNqnY5fz5xjuFsFUFwU,6034
11
+ hydroserverpy/api/models/etl/factories.py,sha256=-inTw_C694YieDU4vbrm1qyeZMEYZqVhHSyEQJLMueo,802
10
12
  hydroserverpy/api/models/etl/orchestration_configuration.py,sha256=ElSrgi7ioFZJFJg6aGogW5ZZk7fA17y4p--yWwiOhZ0,1367
11
13
  hydroserverpy/api/models/etl/orchestration_system.py,sha256=5wdGsXCMqHfE3--zG-3WAPAVPNMPIx99y-7UUhdCink,2060
14
+ hydroserverpy/api/models/etl/schedule.py,sha256=-TxRpYSFbyYkzAPBWOh5udx1s6v1SvLl3_LE2j_b1uE,512
15
+ hydroserverpy/api/models/etl/status.py,sha256=vYT7go7DMcOgy29w0yhHpKz6AdprLmOxWZE9G_DHVdw,503
16
+ hydroserverpy/api/models/etl/timestamp_parser.py,sha256=lDnParK2j2M9TF7qspJDeKFGGpO4d1F2KJEKZ4xH5Yw,4374
17
+ hydroserverpy/api/models/etl/types.py,sha256=4PY3CM-uoXIsf2lhcqtLC6HaRGXe7HKGDU22R8-H35c,135
18
+ hydroserverpy/api/models/etl/extractors/__init__.py,sha256=Z0viw2vk96Ytpz3n7ODtkYz9Zx0I0NsZUbna2ZWvhkw,243
19
+ hydroserverpy/api/models/etl/extractors/base.py,sha256=uLAdi1PrOVMtuCU1ZN_liBW_ElD2mklrBrQ_AZZQtNw,1949
20
+ hydroserverpy/api/models/etl/extractors/ftp_extractor.py,sha256=5LwvHuvLk6LwRSVyE9EkV3DPgVlAvRrOBpl1a8B7dLg,1387
21
+ hydroserverpy/api/models/etl/extractors/http_extractor.py,sha256=AgS0vDmHhN3do1FII-hNEvkK40lDjlS1iftHplWd1No,805
22
+ hydroserverpy/api/models/etl/extractors/local_file_extractor.py,sha256=AwC0T-F8D0S7zR0MUIQXKLfv9b0uU60YoUW615lgNl0,648
23
+ hydroserverpy/api/models/etl/loaders/__init__.py,sha256=rEqYo1Tim7Fzrp1jPhV_yn3ll90dUGMAjcieEqh_4Pk,118
24
+ hydroserverpy/api/models/etl/loaders/base.py,sha256=J3dqm_b6BmEsF7VR3sUxBVQpLJsRx7fTIir5v5TORE0,229
25
+ hydroserverpy/api/models/etl/loaders/hydroserver_loader.py,sha256=0qdsSu4bLD4R86eSyQY2Qwg1HmMsruVjFhBKCrErsBU,3868
26
+ hydroserverpy/api/models/etl/transformers/__init__.py,sha256=YQhjdoRdq4xikLWETnsRIaMvCae5flRpPrfw9lj9pOA,184
27
+ hydroserverpy/api/models/etl/transformers/base.py,sha256=Qt9U6rRFS8aq142n0Cig1wMkVC9-4IBiXIE1nAjEPDY,4971
28
+ hydroserverpy/api/models/etl/transformers/csv_transformer.py,sha256=06AWV9S9we4LRQLpn5WMVl7sX6ylDKPb2KHNC0Jiu7o,3478
29
+ hydroserverpy/api/models/etl/transformers/json_transformer.py,sha256=puKQI8abWJEQTcw34lEHgHjuPOuzcSBv95_txErzchk,2116
12
30
  hydroserverpy/api/models/iam/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
31
  hydroserverpy/api/models/iam/account.py,sha256=7COk_CPYFlthg1uFWTBlJESfnuqMW90TSjZoIcBb-_8,439
14
32
  hydroserverpy/api/models/iam/apikey.py,sha256=Z4iXg_K056naT3ogwc5wzyNnRpxHkOCz0lk-Gim4eL8,3146
@@ -28,7 +46,7 @@ hydroserverpy/api/services/__init__.py,sha256=Nb7rc1Zt8kpRElgFdWPdcyUDrtm7XdJDgz
28
46
  hydroserverpy/api/services/base.py,sha256=f7CoQ1m-pdgVwqJsdvE7vcannw-3i7yJgBMI4eHZxAQ,3725
29
47
  hydroserverpy/api/services/etl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
30
48
  hydroserverpy/api/services/etl/data_archive.py,sha256=-Pmv9EqNJncVX3gPDIeNM4TsR6fHgOIjmMGt9fGOeYg,5842
31
- hydroserverpy/api/services/etl/data_source.py,sha256=xR_GQA7IRi-2bMrF6m_kWSmx2xi3knH-GWbGTA3LoQs,5831
49
+ hydroserverpy/api/services/etl/data_source.py,sha256=XWWgbVyhyZxRt4s6wBc9-lnv_O86Bte1Vk3_Aza4HGY,5773
32
50
  hydroserverpy/api/services/etl/orchestration_system.py,sha256=Otj_DiFpFBQzSc4Ei7LxneBf3VPnodI0pqoQM2BldcM,1935
33
51
  hydroserverpy/api/services/iam/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
52
  hydroserverpy/api/services/iam/role.py,sha256=PV0odC_lL9kV3ggrTjAUEMTo0WNUzv4AeMHNAXlkbN8,1137
@@ -41,30 +59,11 @@ hydroserverpy/api/services/sta/result_qualifier.py,sha256=gkgofUqzGXgdkyAvK9RW_d
41
59
  hydroserverpy/api/services/sta/sensor.py,sha256=SmrIFNHD_vrlnbZvzsv0Wf0Pexk2oDWQ28LtWdj2kao,3274
42
60
  hydroserverpy/api/services/sta/thing.py,sha256=Hyo3zTghSs7IIdsOGRu35i9w-aGOYlK9bl2AnmU4bBs,6666
43
61
  hydroserverpy/api/services/sta/unit.py,sha256=NFToSAIGTwDfwYWe8Q-I_f5xsw_GYzFEkMnhSJ-ChvE,2178
44
- hydroserverpy/etl/__init__.py,sha256=qK2m4LZl8czR3VE8SxrlipSy5tLGLNB60lxD7dD0GjU,659
45
- hydroserverpy/etl/hydroserver_etl.py,sha256=FSdvM3T7QHEWWulWRT8t-FMHSxAGB4GvleUXtSk5IWc,1507
46
- hydroserverpy/etl/timestamp_parser.py,sha256=MA_a0qPExbIQGt-ju7w6WflVDMzigW1LKUFCJ_jhkp4,4218
47
- hydroserverpy/etl/types.py,sha256=4PY3CM-uoXIsf2lhcqtLC6HaRGXe7HKGDU22R8-H35c,135
48
- hydroserverpy/etl/extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
- hydroserverpy/etl/extractors/base.py,sha256=mK8WotEcG-4cHIW3ExS03wxyKtXPzcDhmo8S_5CGnek,1989
50
- hydroserverpy/etl/extractors/ftp_extractor.py,sha256=5LwvHuvLk6LwRSVyE9EkV3DPgVlAvRrOBpl1a8B7dLg,1387
51
- hydroserverpy/etl/extractors/http_extractor.py,sha256=WxWyg-GLyr6Rb-2uCFniWe6Nmk71x-frmxgEYTr9juU,814
52
- hydroserverpy/etl/extractors/local_file_extractor.py,sha256=WZ4xIg5FiJ5GbVuR71Uj9tw_vVyzGYeweWctKscUSW0,563
53
- hydroserverpy/etl/loaders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
54
- hydroserverpy/etl/loaders/base.py,sha256=q3pTp8NqZUYF1IxwKp7TOA5b4HuJkhz3FD9tIqpL7iM,273
55
- hydroserverpy/etl/loaders/hydroserver_loader.py,sha256=N4zu_PefOwMr-NoFvq0g57VumYpNtD6o76oqhmF35ts,2545
56
- hydroserverpy/etl/transformers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
57
- hydroserverpy/etl/transformers/base.py,sha256=BtRNQItt6VY9r1TBMHByOTzOB1rY1QdY8ijqCgl0riI,2259
58
- hydroserverpy/etl/transformers/csv_transformer.py,sha256=0kWfRKPwiGxCNZ87Q4SiBlfM3PuKL6upc1ljphBY89o,2891
59
- hydroserverpy/etl/transformers/json_transformer.py,sha256=R7tSyDB4Wn1snP75ctbEDMaMCdjyhPnMzN_W2VV3Mv4,1506
60
- hydroserverpy/etl_csv/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
61
- hydroserverpy/etl_csv/exceptions.py,sha256=0UY8YUlNepG0y6FfH36hJyR1bOhwYHSZIdUSSMTg7GA,314
62
- hydroserverpy/etl_csv/hydroserver_etl_csv.py,sha256=zZDIpbBTUdm4-9G3gJ8F_IqsLvP5wtGvr4Xy6_5K3tQ,14181
63
62
  hydroserverpy/quality/__init__.py,sha256=GGBMkFSXciJLYrbV-NraFrj_mXWCy_GTcy9KKrKXU4c,84
64
63
  hydroserverpy/quality/service.py,sha256=U02UfLKVmFvr5ySiH0n0JYzUIabq5uprrHIiwcqBlqY,13879
65
- hydroserverpy-1.3.1.dist-info/licenses/LICENSE,sha256=xVqFxDw3QOEJukakL7gQCqIMTQ1dlSCTo6Oc1otNW80,1508
66
- hydroserverpy-1.3.1.dist-info/METADATA,sha256=7p10qFkxjJqbUJglmqEF300RygoDzFsvpHFU0f9STEw,530
67
- hydroserverpy-1.3.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
68
- hydroserverpy-1.3.1.dist-info/top_level.txt,sha256=Zf37hrncXLOYvXhgCrf5mZdeq81G9fShdE2LfYbtb7w,14
69
- hydroserverpy-1.3.1.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
70
- hydroserverpy-1.3.1.dist-info/RECORD,,
64
+ hydroserverpy-1.4.0b4.dist-info/licenses/LICENSE,sha256=xVqFxDw3QOEJukakL7gQCqIMTQ1dlSCTo6Oc1otNW80,1508
65
+ hydroserverpy-1.4.0b4.dist-info/METADATA,sha256=GCgulq3Im1uhFlRJtRg54dwOIqHK3wFfxAXpT6hlDlA,532
66
+ hydroserverpy-1.4.0b4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
67
+ hydroserverpy-1.4.0b4.dist-info/top_level.txt,sha256=Zf37hrncXLOYvXhgCrf5mZdeq81G9fShdE2LfYbtb7w,14
68
+ hydroserverpy-1.4.0b4.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
69
+ hydroserverpy-1.4.0b4.dist-info/RECORD,,
@@ -1,21 +0,0 @@
1
- from .extractors.local_file_extractor import LocalFileExtractor
2
- from .extractors.ftp_extractor import FTPExtractor
3
- from .extractors.http_extractor import HTTPExtractor
4
- from .transformers.csv_transformer import CSVTransformer
5
- from .transformers.json_transformer import JSONTransformer
6
- from .transformers.base import Transformer
7
- from .extractors.base import Extractor
8
- from .loaders.base import Loader
9
- from .loaders.hydroserver_loader import HydroServerLoader
10
-
11
- __all__ = [
12
- "CSVTransformer",
13
- "JSONTransformer",
14
- "LocalFileExtractor",
15
- "FTPExtractor",
16
- "HTTPExtractor",
17
- "Extractor",
18
- "Transformer",
19
- "Loader",
20
- "HydroServerLoader",
21
- ]
File without changes
@@ -1,19 +0,0 @@
1
- import logging
2
- from .base import Extractor
3
-
4
-
5
- class LocalFileExtractor(Extractor):
6
- def __init__(self, settings: object):
7
- super().__init__(settings)
8
-
9
- def extract(self):
10
- """
11
- Opens the file and returns a file-like object.
12
- """
13
- try:
14
- file_handle = open(self.source_uri, "r")
15
- logging.info(f"Successfully opened file '{self.source_uri}'.")
16
- return file_handle
17
- except Exception as e:
18
- logging.error(f"Error opening file '{self.source_uri}': {e}")
19
- return None
@@ -1,40 +0,0 @@
1
- import logging
2
- import pandas as pd
3
-
4
-
5
- class HydroServerETL:
6
- def __init__(self, extractor, transformer, loader, source_target_map):
7
- self.extractor = extractor
8
- self.transformer = transformer
9
- self.loader = loader
10
- self.source_target_map = source_target_map
11
-
12
- def run(self):
13
- """
14
- Extracts, transforms, and loads data as defined by the class parameters.
15
- """
16
-
17
- # Step 1: Get Target System data requirements from the Loader & prepare parameters for the Extractor
18
- data_requirements = self.loader.get_data_requirements(self.source_target_map)
19
- self.extractor.prepare_params(data_requirements)
20
-
21
- # Step 2: Extract
22
- data = self.extractor.extract()
23
- if data is None or (isinstance(data, pd.DataFrame) and data.empty):
24
- logging.warning(f"No data was returned from the extractor. Ending ETL run.")
25
- return
26
- else:
27
- logging.info(f"Successfully extracted data.")
28
-
29
- # Step 3: Transform
30
- if self.transformer:
31
- data = self.transformer.transform(data)
32
- if data is None or (isinstance(data, pd.DataFrame) and data.empty):
33
- logging.warning(f"No data returned from the transformer. Ending run.")
34
- return
35
- else:
36
- logging.info(f"Successfully transformed data. {data}")
37
-
38
- # Step 4: Load
39
- self.loader.load(data, self.source_target_map)
40
- logging.info("Successfully loaded data.")
File without changes