hydroserverpy 1.3.0b3__tar.gz → 1.4.0b3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. {hydroserverpy-1.3.0b3/src/hydroserverpy.egg-info → hydroserverpy-1.4.0b3}/PKG-INFO +1 -1
  2. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/README.md +2 -1
  3. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/setup.cfg +1 -1
  4. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy/__init__.py +0 -2
  5. hydroserverpy-1.4.0b3/src/hydroserverpy/api/models/etl/__init__.py +26 -0
  6. hydroserverpy-1.4.0b3/src/hydroserverpy/api/models/etl/data_source.py +146 -0
  7. hydroserverpy-1.4.0b3/src/hydroserverpy/api/models/etl/etl_configuration.py +224 -0
  8. hydroserverpy-1.4.0b3/src/hydroserverpy/api/models/etl/extractors/__init__.py +6 -0
  9. {hydroserverpy-1.3.0b3/src/hydroserverpy → hydroserverpy-1.4.0b3/src/hydroserverpy/api/models}/etl/extractors/base.py +16 -19
  10. {hydroserverpy-1.3.0b3/src/hydroserverpy → hydroserverpy-1.4.0b3/src/hydroserverpy/api/models}/etl/extractors/http_extractor.py +5 -3
  11. hydroserverpy-1.4.0b3/src/hydroserverpy/api/models/etl/extractors/local_file_extractor.py +20 -0
  12. hydroserverpy-1.4.0b3/src/hydroserverpy/api/models/etl/factories.py +23 -0
  13. hydroserverpy-1.4.0b3/src/hydroserverpy/api/models/etl/loaders/__init__.py +4 -0
  14. {hydroserverpy-1.3.0b3/src/hydroserverpy → hydroserverpy-1.4.0b3/src/hydroserverpy/api/models}/etl/loaders/base.py +0 -2
  15. hydroserverpy-1.4.0b3/src/hydroserverpy/api/models/etl/loaders/hydroserver_loader.py +100 -0
  16. hydroserverpy-1.4.0b3/src/hydroserverpy/api/models/etl/schedule.py +16 -0
  17. hydroserverpy-1.4.0b3/src/hydroserverpy/api/models/etl/status.py +14 -0
  18. hydroserverpy-1.4.0b3/src/hydroserverpy/api/models/etl/transformers/__init__.py +5 -0
  19. hydroserverpy-1.4.0b3/src/hydroserverpy/api/models/etl/transformers/base.py +128 -0
  20. {hydroserverpy-1.3.0b3/src/hydroserverpy → hydroserverpy-1.4.0b3/src/hydroserverpy/api/models}/etl/transformers/csv_transformer.py +24 -13
  21. {hydroserverpy-1.3.0b3/src/hydroserverpy → hydroserverpy-1.4.0b3/src/hydroserverpy/api/models}/etl/transformers/json_transformer.py +7 -6
  22. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy/api/services/etl/data_source.py +1 -4
  23. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3/src/hydroserverpy.egg-info}/PKG-INFO +1 -1
  24. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy.egg-info/SOURCES.txt +18 -19
  25. hydroserverpy-1.3.0b3/src/hydroserverpy/api/models/etl/data_source.py +0 -111
  26. hydroserverpy-1.3.0b3/src/hydroserverpy/api/services/sta/__init__.py +0 -0
  27. hydroserverpy-1.3.0b3/src/hydroserverpy/etl/__init__.py +0 -21
  28. hydroserverpy-1.3.0b3/src/hydroserverpy/etl/extractors/__init__.py +0 -0
  29. hydroserverpy-1.3.0b3/src/hydroserverpy/etl/extractors/local_file_extractor.py +0 -19
  30. hydroserverpy-1.3.0b3/src/hydroserverpy/etl/hydroserver_etl.py +0 -40
  31. hydroserverpy-1.3.0b3/src/hydroserverpy/etl/loaders/__init__.py +0 -0
  32. hydroserverpy-1.3.0b3/src/hydroserverpy/etl/loaders/hydroserver_loader.py +0 -71
  33. hydroserverpy-1.3.0b3/src/hydroserverpy/etl/transformers/__init__.py +0 -0
  34. hydroserverpy-1.3.0b3/src/hydroserverpy/etl/transformers/base.py +0 -64
  35. hydroserverpy-1.3.0b3/src/hydroserverpy/etl_csv/__init__.py +0 -0
  36. hydroserverpy-1.3.0b3/src/hydroserverpy/etl_csv/exceptions.py +0 -14
  37. hydroserverpy-1.3.0b3/src/hydroserverpy/etl_csv/hydroserver_etl_csv.py +0 -346
  38. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/LICENSE +0 -0
  39. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/pyproject.toml +0 -0
  40. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/setup.py +0 -0
  41. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy/api/__init__.py +0 -0
  42. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy/api/client.py +0 -0
  43. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy/api/models/__init__.py +0 -0
  44. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy/api/models/base.py +0 -0
  45. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy/api/models/etl/data_archive.py +0 -0
  46. {hydroserverpy-1.3.0b3/src/hydroserverpy → hydroserverpy-1.4.0b3/src/hydroserverpy/api/models}/etl/extractors/ftp_extractor.py +0 -0
  47. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy/api/models/etl/orchestration_configuration.py +0 -0
  48. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy/api/models/etl/orchestration_system.py +0 -0
  49. {hydroserverpy-1.3.0b3/src/hydroserverpy → hydroserverpy-1.4.0b3/src/hydroserverpy/api/models}/etl/timestamp_parser.py +0 -0
  50. {hydroserverpy-1.3.0b3/src/hydroserverpy → hydroserverpy-1.4.0b3/src/hydroserverpy/api/models}/etl/types.py +0 -0
  51. {hydroserverpy-1.3.0b3/src/hydroserverpy/api/models/etl → hydroserverpy-1.4.0b3/src/hydroserverpy/api/models/iam}/__init__.py +0 -0
  52. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy/api/models/iam/account.py +0 -0
  53. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy/api/models/iam/apikey.py +0 -0
  54. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy/api/models/iam/collaborator.py +0 -0
  55. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy/api/models/iam/role.py +0 -0
  56. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy/api/models/iam/workspace.py +0 -0
  57. {hydroserverpy-1.3.0b3/src/hydroserverpy/api/models/iam → hydroserverpy-1.4.0b3/src/hydroserverpy/api/models/sta}/__init__.py +0 -0
  58. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy/api/models/sta/datastream.py +0 -0
  59. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy/api/models/sta/observation.py +0 -0
  60. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy/api/models/sta/observed_property.py +0 -0
  61. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy/api/models/sta/processing_level.py +0 -0
  62. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy/api/models/sta/result_qualifier.py +0 -0
  63. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy/api/models/sta/sensor.py +0 -0
  64. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy/api/models/sta/thing.py +0 -0
  65. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy/api/models/sta/unit.py +0 -0
  66. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy/api/services/__init__.py +0 -0
  67. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy/api/services/base.py +0 -0
  68. {hydroserverpy-1.3.0b3/src/hydroserverpy/api/models/sta → hydroserverpy-1.4.0b3/src/hydroserverpy/api/services/etl}/__init__.py +0 -0
  69. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy/api/services/etl/data_archive.py +0 -0
  70. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy/api/services/etl/orchestration_system.py +0 -0
  71. {hydroserverpy-1.3.0b3/src/hydroserverpy/api/services/etl → hydroserverpy-1.4.0b3/src/hydroserverpy/api/services/iam}/__init__.py +0 -0
  72. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy/api/services/iam/role.py +0 -0
  73. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy/api/services/iam/workspace.py +0 -0
  74. {hydroserverpy-1.3.0b3/src/hydroserverpy/api/services/iam → hydroserverpy-1.4.0b3/src/hydroserverpy/api/services/sta}/__init__.py +0 -0
  75. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy/api/services/sta/datastream.py +0 -0
  76. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy/api/services/sta/observed_property.py +0 -0
  77. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy/api/services/sta/processing_level.py +0 -0
  78. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy/api/services/sta/result_qualifier.py +0 -0
  79. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy/api/services/sta/sensor.py +0 -0
  80. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy/api/services/sta/thing.py +0 -0
  81. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy/api/services/sta/unit.py +0 -0
  82. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy/api/utils.py +0 -0
  83. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy/quality/__init__.py +0 -0
  84. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy/quality/service.py +0 -0
  85. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy.egg-info/dependency_links.txt +0 -0
  86. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy.egg-info/requires.txt +0 -0
  87. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy.egg-info/top_level.txt +0 -0
  88. {hydroserverpy-1.3.0b3 → hydroserverpy-1.4.0b3}/src/hydroserverpy.egg-info/zip-safe +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hydroserverpy
3
- Version: 1.3.0b3
3
+ Version: 1.4.0b3
4
4
  Requires-Python: <4,>=3.9
5
5
  License-File: LICENSE
6
6
  Requires-Dist: requests>=2
@@ -40,4 +40,5 @@ hs_api = HydroServer(
40
40
 
41
41
  ## Funding and Acknowledgements
42
42
 
43
- Funding for this project was provided by the National Oceanic & Atmospheric Administration (NOAA), awarded to the Cooperative Institute for Research to Operations in Hydrology (CIROH) through the NOAA Cooperative Agreement with The University of Alabama (NA22NWS4320003).
43
+ Funding for this project was provided by the National Oceanic & Atmospheric Administration (NOAA), awarded to the Cooperative Institute for Research to Operations in Hydrology (CIROH) through the NOAA Cooperative Agreement with The University of Alabama (NA22NWS4320003). Utah State University is a founding member of CIROH and receives funding under subaward from the University of Alabama. Additional funding and support have been provided by the State of Utah Division of Water Rights, the World Meorological Organization, and the Utah Water Research laboratory at Utah State University.
44
+
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = hydroserverpy
3
- version = 1.3.0b3
3
+ version = 1.4.0b3
4
4
 
5
5
  [options]
6
6
  package_dir =
@@ -1,9 +1,7 @@
1
1
  from .api.client import HydroServer
2
- from .etl.hydroserver_etl import HydroServerETL
3
2
  from .quality import HydroServerQualityControl
4
3
 
5
4
  __all__ = [
6
5
  "HydroServer",
7
6
  "HydroServerQualityControl",
8
- "HydroServerETL",
9
7
  ]
@@ -0,0 +1,26 @@
1
+ from .extractors import Extractor, HTTPExtractor, LocalFileExtractor, FTPExtractor
2
+ from .transformers import JSONTransformer, CSVTransformer, Transformer
3
+ from .loaders import HydroServerLoader, Loader
4
+
5
+ from .etl_configuration import EtlConfiguration
6
+ from .schedule import Schedule
7
+ from .status import Status
8
+ from .orchestration_system import OrchestrationSystem
9
+ from .data_source import DataSource
10
+
11
+ __all__ = [
12
+ "CSVTransformer",
13
+ "JSONTransformer",
14
+ "LocalFileExtractor",
15
+ "FTPExtractor",
16
+ "HTTPExtractor",
17
+ "Extractor",
18
+ "Transformer",
19
+ "Loader",
20
+ "HydroServerLoader",
21
+ "EtlConfiguration",
22
+ "Schedule",
23
+ "Status",
24
+ "OrchestrationSystem",
25
+ "DataSource",
26
+ ]
@@ -0,0 +1,146 @@
1
+ from __future__ import annotations
2
+ from datetime import datetime, timedelta, timezone
3
+ from functools import cached_property
4
+ import logging
5
+ import uuid
6
+ from typing import ClassVar, TYPE_CHECKING, List, Optional, Union
7
+ import croniter
8
+ import pandas as pd
9
+ from pydantic import Field
10
+
11
+ from ..base import HydroServerBaseModel
12
+ from ..sta.datastream import Datastream
13
+ from .orchestration_system import OrchestrationSystem
14
+ from .etl_configuration import EtlConfiguration
15
+ from .schedule import Schedule
16
+ from .status import Status
17
+ from .factories import extractor_factory, transformer_factory, loader_factory
18
+ from .loaders import HydroServerLoader
19
+
20
+ if TYPE_CHECKING:
21
+ from hydroserverpy import HydroServer
22
+ from hydroserverpy.api.models import Workspace
23
+
24
+
25
+ class DataSource(HydroServerBaseModel):
26
+ name: str = Field(..., max_length=255)
27
+ settings: EtlConfiguration
28
+ orchestration_system_id: uuid.UUID
29
+ schedule: Schedule
30
+ status: Status
31
+ workspace_id: uuid.UUID
32
+
33
+ _editable_fields: ClassVar[set[str]] = {
34
+ "name",
35
+ "settings",
36
+ "status",
37
+ "schedule",
38
+ "interval",
39
+ "interval_units",
40
+ "crontab",
41
+ "start_time",
42
+ "end_time",
43
+ "last_run_successful",
44
+ "last_run_message",
45
+ "last_run",
46
+ "next_run",
47
+ "paused",
48
+ }
49
+
50
+ def __init__(self, client: HydroServer, **data):
51
+ super().__init__(client=client, service=client.datasources, **data)
52
+
53
+ @classmethod
54
+ def get_route(cls):
55
+ return "data-sources"
56
+
57
+ @cached_property
58
+ def workspace(self) -> Workspace:
59
+ return self.client.workspaces.get(uid=self.workspace_id)
60
+
61
+ @cached_property
62
+ def orchestration_system(self) -> OrchestrationSystem:
63
+ return self.client.orchestrationsystems.get(uid=self.orchestration_system_id)
64
+
65
+ @cached_property
66
+ def datastreams(self) -> List[Datastream]:
67
+ return self.client.datastreams.list(data_source=self.uid, fetch_all=True).items
68
+
69
+ # TODO: Add functions like add_payload, add_mapping, etc. and don't allow the user to manually
70
+ # link or unlink datastreams - handle that automatically.
71
+ def add_datastream(self, datastream: Union["Datastream", uuid.UUID, str]):
72
+ """Add a datastream to this data source."""
73
+
74
+ self.client.datasources.add_datastream(uid=self.uid, datastream=datastream)
75
+
76
+ def remove_datastream(self, datastream: Union["Datastream", uuid.UUID, str]):
77
+ """Remove a datastream from this data source."""
78
+
79
+ self.client.datasources.remove_datastream(uid=self.uid, datastream=datastream)
80
+
81
+ def _next_run(self) -> Optional[str]:
82
+ now = datetime.now(timezone.utc)
83
+ if cron := self.schedule.crontab:
84
+ return croniter.croniter(cron, now).get_next(datetime).isoformat()
85
+ if iv := self.schedule.interval:
86
+ unit = self.schedule.interval_units or "minutes"
87
+ return (now + timedelta(**{unit: iv})).isoformat()
88
+ return None
89
+
90
+ def _update_status(self, loader: HydroServerLoader, success: bool, msg: str):
91
+ short_msg = msg if len(msg) <= 255 else msg[:252] + "…"
92
+ loader.client.datasources.update(
93
+ uid=self.uid,
94
+ last_run=datetime.now(timezone.utc).isoformat(),
95
+ last_run_successful=success,
96
+ last_run_message=short_msg,
97
+ next_run=self._next_run(),
98
+ )
99
+
100
+ def is_empty(self, data):
101
+ if data is None:
102
+ return True
103
+ if isinstance(data, pd.DataFrame) and data.empty:
104
+ return True
105
+ return False
106
+
107
+ def load_data(self, payload_name: str = None):
108
+ """Load data for this data source."""
109
+ if self.status.paused is True:
110
+ return
111
+
112
+ if payload_name:
113
+ self.load_data_for_payload(payload_name)
114
+ else:
115
+ for p in self.settings.payloads:
116
+ self.load_data_for_payload(p.name)
117
+
118
+ def load_data_for_payload(self, payload_name: str):
119
+ payload = next(p for p in self.settings.payloads if p.name == payload_name)
120
+
121
+ extractor_cls = extractor_factory(self.settings.extractor)
122
+ transformer_cls = transformer_factory(self.settings.transformer)
123
+ loader_cls = loader_factory(self.settings.loader, self.client, self.uid)
124
+
125
+ try:
126
+ logging.info("Starting extract")
127
+ data = extractor_cls.extract(payload, loader_cls)
128
+ if self.is_empty(data):
129
+ self._update_status(
130
+ loader_cls, True, "No data returned from the extractor"
131
+ )
132
+ return
133
+
134
+ logging.info("Starting transform")
135
+ data = transformer_cls.transform(data, payload.mappings)
136
+ if self.is_empty(data):
137
+ self._update_status(
138
+ loader_cls, True, "No data returned from the transformer"
139
+ )
140
+ return
141
+
142
+ logging.info("Starting load")
143
+ loader_cls.load(data, payload)
144
+ self._update_status(loader_cls, True, "OK")
145
+ except Exception as e:
146
+ self._update_status(loader_cls, False, str(e))
@@ -0,0 +1,224 @@
1
+ from typing import Annotated, Dict, List, Literal, Optional, Union
2
+ from pydantic import BaseModel, Field, field_validator
3
+ from enum import Enum
4
+
5
+ WorkflowType = Literal["ETL", "Aggregation", "Virtual", "SDL"]
6
+ CSVDelimiterType = Literal[",", "|", "\t", ";", " "]
7
+ ExtractorType = Literal["HTTP", "local"]
8
+ TransformerType = Literal["JSON", "CSV"]
9
+ LoaderType = Literal["HydroServer"]
10
+ IdentifierType = Literal["name", "index"]
11
+ RunTimeValue = Literal["jobExecutionTime", "latestObservationTimestamp"]
12
+
13
+
14
+ class FixedOffsetTimezone(str, Enum):
15
+ UTC_MINUS_1200 = "-1200"
16
+ UTC_MINUS_1100 = "-1100"
17
+ UTC_MINUS_1000 = "-1000"
18
+ UTC_MINUS_0900 = "-0900"
19
+ UTC_MINUS_0800 = "-0800"
20
+ UTC_MINUS_0700 = "-0700"
21
+ UTC_MINUS_0600 = "-0600"
22
+ UTC_MINUS_0500 = "-0500"
23
+ UTC_MINUS_0430 = "-0430"
24
+ UTC_MINUS_0400 = "-0400"
25
+ UTC_MINUS_0330 = "-0330"
26
+ UTC_MINUS_0300 = "-0300"
27
+ UTC_MINUS_0200 = "-0200"
28
+ UTC_MINUS_0100 = "-0100"
29
+ UTC_PLUS_0000 = "+0000"
30
+ UTC_PLUS_0100 = "+0100"
31
+ UTC_PLUS_0200 = "+0200"
32
+ UTC_PLUS_0300 = "+0300"
33
+ UTC_PLUS_0330 = "+0330"
34
+ UTC_PLUS_0400 = "+0400"
35
+ UTC_PLUS_0430 = "+0430"
36
+ UTC_PLUS_0500 = "+0500"
37
+ UTC_PLUS_0530 = "+0530"
38
+ UTC_PLUS_0545 = "+0545"
39
+ UTC_PLUS_0600 = "+0600"
40
+ UTC_PLUS_0630 = "+0630"
41
+ UTC_PLUS_0700 = "+0700"
42
+ UTC_PLUS_0800 = "+0800"
43
+ UTC_PLUS_0845 = "+0845"
44
+ UTC_PLUS_0900 = "+0900"
45
+ UTC_PLUS_0930 = "+0930"
46
+ UTC_PLUS_1000 = "+1000"
47
+ UTC_PLUS_1030 = "+1030"
48
+ UTC_PLUS_1100 = "+1100"
49
+ UTC_PLUS_1130 = "+1130"
50
+ UTC_PLUS_1200 = "+1200"
51
+ UTC_PLUS_1245 = "+1245"
52
+ UTC_PLUS_1300 = "+1300"
53
+ UTC_PLUS_1400 = "+1400"
54
+
55
+
56
+ class TimestampFormat(str, Enum):
57
+ ISO8601 = "ISO8601"
58
+ naive = "naive"
59
+ custom = "custom"
60
+
61
+
62
+ class TimezoneMode(str, Enum):
63
+ utc = "utc" # always UTC
64
+ daylightSavings = "daylightSavings" # IANA / DST-aware
65
+ fixedOffset = "fixedOffset" # constant offset
66
+ embeddedOffset = "embeddedOffset" # offset in ISO string
67
+
68
+
69
+ class Timestamp(BaseModel):
70
+ key: Optional[str] = None
71
+ format: TimestampFormat
72
+ custom_format: Optional[str] = Field(None, alias="customFormat")
73
+ timezone_mode: TimezoneMode = Field(..., alias="timezoneMode")
74
+ timezone: Optional[Union[FixedOffsetTimezone, str]] = Field(None, alias="timezone")
75
+
76
+ class Config:
77
+ allow_population_by_field_name = True
78
+
79
+ @field_validator("timezone")
80
+ def check_timezone(cls, timezone_value, info):
81
+ mode = info.data.get("timezone_mode")
82
+ if mode == TimezoneMode.fixedOffset and timezone_value is None:
83
+ raise ValueError("`timezone` must be set when timezoneMode is fixedOffset")
84
+ return timezone_value
85
+
86
+
87
+ class PerPayloadPlaceholder(BaseModel):
88
+ name: str
89
+ type: Literal["perPayload"]
90
+
91
+
92
+ class RunTimePlaceholder(BaseModel):
93
+ name: str
94
+ type: Literal["runTime"]
95
+ run_time_value: RunTimeValue = Field(..., alias="runTimeValue")
96
+ timestamp: Timestamp
97
+
98
+ class Config:
99
+ allow_population_by_field_name = True
100
+
101
+
102
+ PlaceholderVariable = Annotated[
103
+ Union[PerPayloadPlaceholder, RunTimePlaceholder],
104
+ Field(discriminator="type"),
105
+ ]
106
+
107
+
108
+ class BaseExtractor(BaseModel):
109
+ type: ExtractorType
110
+ source_uri: str = Field(..., alias="sourceUri")
111
+ placeholder_variables: Optional[List[PlaceholderVariable]] = Field(
112
+ default_factory=list,
113
+ alias="placeholderVariables",
114
+ )
115
+
116
+ class Config:
117
+ allow_population_by_field_name = True
118
+
119
+
120
+ class HTTPExtractor(BaseExtractor):
121
+ type: Literal["HTTP"]
122
+
123
+
124
+ class LocalFileExtractor(BaseExtractor):
125
+ type: Literal["local"]
126
+
127
+
128
+ ExtractorConfig = Annotated[
129
+ Union[HTTPExtractor, LocalFileExtractor], Field(discriminator="type")
130
+ ]
131
+
132
+
133
+ class BaseTransformer(BaseModel):
134
+ type: TransformerType
135
+ timestamp: Timestamp
136
+
137
+
138
+ class JSONTransformer(BaseTransformer):
139
+ type: Literal["JSON"]
140
+ jmespath: str = Field(..., alias="JMESPath")
141
+
142
+ class Config:
143
+ allow_population_by_field_name = True
144
+
145
+
146
+ class CSVTransformer(BaseTransformer):
147
+ type: Literal["CSV"]
148
+ header_row: Optional[int] = Field(..., alias="headerRow")
149
+ data_start_row: int = Field(..., alias="dataStartRow")
150
+ delimiter: CSVDelimiterType
151
+ identifier_type: IdentifierType = Field(..., alias="identifierType")
152
+
153
+ class Config:
154
+ allow_population_by_field_name = True
155
+
156
+
157
+ TransformerConfig = Union[JSONTransformer, CSVTransformer]
158
+
159
+
160
+ class BaseLoaderConfig(BaseModel):
161
+ type: LoaderType
162
+
163
+
164
+ class HydroServerLoaderConfig(BaseLoaderConfig):
165
+ type: Literal["HydroServer"]
166
+
167
+
168
+ LoaderConfig = HydroServerLoaderConfig
169
+
170
+
171
+ class ExpressionDataTransformation(BaseModel):
172
+ type: Literal["expression"]
173
+ expression: str
174
+
175
+ class Config:
176
+ allow_population_by_field_name = True
177
+
178
+
179
+ class LookupTableDataTransformation(BaseModel):
180
+ type: Literal["lookup"]
181
+ lookup_table_id: str = Field(..., alias="lookupTableId")
182
+
183
+ class Config:
184
+ allow_population_by_field_name = True
185
+
186
+
187
+ DataTransformation = Union[ExpressionDataTransformation, LookupTableDataTransformation]
188
+
189
+
190
+ class MappingPath(BaseModel):
191
+ target_identifier: Union[str, int] = Field(..., alias="targetIdentifier")
192
+ data_transformations: List[DataTransformation] = Field(
193
+ default_factory=list, alias="dataTransformations"
194
+ )
195
+
196
+ class Config:
197
+ allow_population_by_field_name = True
198
+
199
+
200
+ class SourceTargetMapping(BaseModel):
201
+ source_identifier: Union[str, int] = Field(..., alias="sourceIdentifier")
202
+ paths: List[MappingPath] = Field(default_factory=list)
203
+
204
+ class Config:
205
+ allow_population_by_field_name = True
206
+
207
+
208
+ class Payload(BaseModel):
209
+ name: str = ""
210
+ mappings: List[SourceTargetMapping] = Field(default_factory=list)
211
+ extractor_variables: Dict[str, str] = Field(
212
+ default_factory=dict, alias="extractorVariables"
213
+ )
214
+
215
+ class Config:
216
+ allow_population_by_field_name = True
217
+
218
+
219
+ class EtlConfiguration(BaseModel):
220
+ type: WorkflowType
221
+ extractor: ExtractorConfig
222
+ transformer: TransformerConfig
223
+ loader: LoaderConfig
224
+ payloads: List[Payload]
@@ -0,0 +1,6 @@
1
+ from .base import Extractor
2
+ from .ftp_extractor import FTPExtractor
3
+ from .http_extractor import HTTPExtractor
4
+ from .local_file_extractor import LocalFileExtractor
5
+
6
+ __all__ = ["Extractor", "HTTPExtractor", "LocalFileExtractor", "FTPExtractor"]
@@ -2,49 +2,46 @@ from abc import abstractmethod
2
2
  import logging
3
3
  import pandas as pd
4
4
  from datetime import datetime
5
-
6
- from hydroserverpy.etl.timestamp_parser import TimestampParser
5
+ from ..etl_configuration import ExtractorConfig, Payload
6
+ from ..timestamp_parser import TimestampParser
7
7
 
8
8
 
9
9
  class Extractor:
10
- def __init__(self, settings: dict):
11
- self.settings = settings
12
- self.source_uri = settings["sourceUri"]
10
+ def __init__(self, extractor_config: ExtractorConfig):
11
+ self.cfg = extractor_config
13
12
 
14
- def resolve_placeholder_variables(self, payload, loader):
13
+ def resolve_placeholder_variables(self, payload: Payload, loader):
15
14
  logging.info(f"Creating runtime variables...")
16
15
  filled = {}
17
- for var in self.settings.get("placeholderVariables", []):
18
- name = var["name"]
19
- var_type = var.get("type", None)
16
+ for placeholder in self.cfg.placeholder_variables:
17
+ name = placeholder.name
20
18
 
21
- if var_type == "runTime":
19
+ if placeholder.type == "runTime":
22
20
  logging.info(f"Resolving runtime var: {name}")
23
- if var.get("runTimeValue", None) == "latestObservationTimestamp":
21
+ if placeholder.run_time_value == "latestObservationTimestamp":
24
22
  value = loader.earliest_begin_date(payload)
25
- elif var.get("runTimeValue", None) == "jobExecutionTime":
23
+ elif placeholder.run_time_value == "jobExecutionTime":
26
24
  value = pd.Timestamp.now(tz="UTC")
27
- elif var_type == "perPayload":
25
+ elif placeholder.type == "perPayload":
28
26
  logging.info(f"Resolving payload var: {name}")
29
- payload_vars = payload.get("extractorVariables", {})
30
- if name not in payload_vars:
27
+ if name not in payload.extractor_variables:
31
28
  raise KeyError(f"Missing per-payload variable '{name}'")
32
- value = payload_vars[name]
29
+ value = payload.extractor_variables[name]
33
30
  else:
34
31
  continue
35
32
 
36
33
  if isinstance(value, (datetime, pd.Timestamp)):
37
- parser = TimestampParser(var["timestamp"])
34
+ parser = TimestampParser(placeholder.timestamp)
38
35
  value = parser.utc_to_string(value)
39
36
 
40
37
  filled[name] = value
41
38
  if not filled:
42
- return self.source_uri
39
+ return self.cfg.source_uri
43
40
  return self.format_uri(filled)
44
41
 
45
42
  def format_uri(self, placeholder_variables):
46
43
  try:
47
- uri = self.source_uri.format(**placeholder_variables)
44
+ uri = self.cfg.source_uri.format(**placeholder_variables)
48
45
  except KeyError as e:
49
46
  missing_key = e.args[0]
50
47
  raise KeyError(f"Missing placeholder variable: {missing_key}")
@@ -1,14 +1,16 @@
1
1
  import logging
2
2
  import requests
3
3
  from io import BytesIO
4
- from .base import Extractor
4
+
5
+ from ..etl_configuration import Payload
6
+ from .base import Extractor, ExtractorConfig
5
7
 
6
8
 
7
9
  class HTTPExtractor(Extractor):
8
- def __init__(self, settings: dict):
10
+ def __init__(self, settings: ExtractorConfig):
9
11
  super().__init__(settings)
10
12
 
11
- def extract(self, payload, loader=None):
13
+ def extract(self, payload: Payload, loader=None):
12
14
  """
13
15
  Downloads the file from the HTTP/HTTPS server and returns a file-like object.
14
16
  """
@@ -0,0 +1,20 @@
1
+ import logging
2
+ from .base import Extractor
3
+ from ..etl_configuration import ExtractorConfig
4
+
5
+
6
+ class LocalFileExtractor(Extractor):
7
+ def __init__(self, extractor_config: ExtractorConfig):
8
+ super().__init__(extractor_config)
9
+
10
+ def extract(self):
11
+ """
12
+ Opens the file and returns a file-like object.
13
+ """
14
+ try:
15
+ file_handle = open(self.cfg.source_uri, "r")
16
+ logging.info(f"Successfully opened file '{self.cfg.source_uri}'.")
17
+ return file_handle
18
+ except Exception as e:
19
+ logging.error(f"Error opening file '{self.cfg.source_uri}': {e}")
20
+ return None
@@ -0,0 +1,23 @@
1
+ from .extractors import HTTPExtractor, LocalFileExtractor
2
+ from .transformers import JSONTransformer, CSVTransformer
3
+ from .loaders import HydroServerLoader
4
+ from .etl_configuration import ExtractorConfig, TransformerConfig, LoaderConfig
5
+
6
+ EXTRACTORS = {"HTTP": HTTPExtractor, "local": LocalFileExtractor}
7
+ TRANSFORMERS = {"JSON": JSONTransformer, "CSV": CSVTransformer}
8
+ LOADERS = {"HydroServer": HydroServerLoader}
9
+
10
+
11
+ def extractor_factory(settings: ExtractorConfig):
12
+ cls = EXTRACTORS[settings.type]
13
+ return cls(settings)
14
+
15
+
16
+ def transformer_factory(settings: TransformerConfig):
17
+ cls = TRANSFORMERS[settings.type]
18
+ return cls(settings)
19
+
20
+
21
+ def loader_factory(settings: LoaderConfig, auth_context, data_source_id: str):
22
+ cls = LOADERS[settings.type]
23
+ return cls(auth_context, data_source_id)
@@ -0,0 +1,4 @@
1
+ from .base import Loader
2
+ from .hydroserver_loader import HydroServerLoader
3
+
4
+ __all__ = ["Loader", "HydroServerLoader"]
@@ -1,6 +1,4 @@
1
1
  from abc import ABC, abstractmethod
2
- from typing import Dict
3
- import pandas as pd
4
2
 
5
3
 
6
4
  class Loader(ABC):