hydroserverpy 0.3.0__py3-none-any.whl → 0.5.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hydroserverpy might be problematic. Click here for more details.

Files changed (83) hide show
  1. hydroserverpy/__init__.py +3 -4
  2. hydroserverpy/api/http.py +24 -0
  3. hydroserverpy/api/main.py +152 -0
  4. hydroserverpy/api/models/__init__.py +18 -0
  5. hydroserverpy/api/models/base.py +74 -0
  6. hydroserverpy/api/models/etl/__init__.py +0 -0
  7. hydroserverpy/api/models/iam/__init__.py +0 -0
  8. hydroserverpy/api/models/iam/account.py +12 -0
  9. hydroserverpy/api/models/iam/collaborator.py +34 -0
  10. hydroserverpy/api/models/iam/role.py +10 -0
  11. hydroserverpy/api/models/iam/workspace.py +203 -0
  12. hydroserverpy/api/models/sta/__init__.py +0 -0
  13. hydroserverpy/api/models/sta/datastream.py +336 -0
  14. hydroserverpy/api/models/sta/observed_property.py +72 -0
  15. hydroserverpy/api/models/sta/processing_level.py +50 -0
  16. hydroserverpy/api/models/sta/result_qualifier.py +49 -0
  17. hydroserverpy/api/models/sta/sensor.py +105 -0
  18. hydroserverpy/api/models/sta/thing.py +217 -0
  19. hydroserverpy/api/models/sta/unit.py +49 -0
  20. hydroserverpy/api/services/__init__.py +8 -0
  21. hydroserverpy/api/services/base.py +92 -0
  22. hydroserverpy/api/services/etl/__init__.py +0 -0
  23. hydroserverpy/api/services/iam/__init__.py +0 -0
  24. hydroserverpy/api/services/iam/workspace.py +126 -0
  25. hydroserverpy/api/services/sta/__init__.py +0 -0
  26. hydroserverpy/api/services/sta/datastream.py +354 -0
  27. hydroserverpy/api/services/sta/observed_property.py +98 -0
  28. hydroserverpy/api/services/sta/processing_level.py +78 -0
  29. hydroserverpy/api/services/sta/result_qualifier.py +74 -0
  30. hydroserverpy/api/services/sta/sensor.py +116 -0
  31. hydroserverpy/api/services/sta/thing.py +188 -0
  32. hydroserverpy/api/services/sta/unit.py +82 -0
  33. hydroserverpy/etl/__init__.py +21 -0
  34. hydroserverpy/etl/extractors/__init__.py +0 -0
  35. hydroserverpy/etl/extractors/base.py +13 -0
  36. hydroserverpy/etl/extractors/ftp_extractor.py +50 -0
  37. hydroserverpy/etl/extractors/http_extractor.py +84 -0
  38. hydroserverpy/etl/extractors/local_file_extractor.py +25 -0
  39. hydroserverpy/etl/hydroserver_etl.py +40 -0
  40. hydroserverpy/etl/loaders/__init__.py +0 -0
  41. hydroserverpy/etl/loaders/base.py +13 -0
  42. hydroserverpy/etl/loaders/hydroserver_loader.py +68 -0
  43. hydroserverpy/etl/transformers/__init__.py +0 -0
  44. hydroserverpy/etl/transformers/base.py +52 -0
  45. hydroserverpy/etl/transformers/csv_transformer.py +88 -0
  46. hydroserverpy/etl/transformers/json_transformer.py +62 -0
  47. hydroserverpy/etl/types.py +7 -0
  48. hydroserverpy/etl_csv/__init__.py +0 -0
  49. hydroserverpy/{etl/service.py → etl_csv/hydroserver_etl_csv.py} +93 -55
  50. hydroserverpy/quality/service.py +84 -70
  51. hydroserverpy-0.5.0b1.dist-info/METADATA +19 -0
  52. hydroserverpy-0.5.0b1.dist-info/RECORD +59 -0
  53. {hydroserverpy-0.3.0.dist-info → hydroserverpy-0.5.0b1.dist-info}/WHEEL +1 -1
  54. hydroserverpy/core/endpoints/__init__.py +0 -9
  55. hydroserverpy/core/endpoints/base.py +0 -133
  56. hydroserverpy/core/endpoints/data_loaders.py +0 -92
  57. hydroserverpy/core/endpoints/data_sources.py +0 -92
  58. hydroserverpy/core/endpoints/datastreams.py +0 -188
  59. hydroserverpy/core/endpoints/observed_properties.py +0 -93
  60. hydroserverpy/core/endpoints/processing_levels.py +0 -93
  61. hydroserverpy/core/endpoints/result_qualifiers.py +0 -93
  62. hydroserverpy/core/endpoints/sensors.py +0 -93
  63. hydroserverpy/core/endpoints/things.py +0 -240
  64. hydroserverpy/core/endpoints/units.py +0 -93
  65. hydroserverpy/core/schemas/__init__.py +0 -9
  66. hydroserverpy/core/schemas/base.py +0 -117
  67. hydroserverpy/core/schemas/data_loaders.py +0 -71
  68. hydroserverpy/core/schemas/data_sources.py +0 -206
  69. hydroserverpy/core/schemas/datastreams.py +0 -299
  70. hydroserverpy/core/schemas/observed_properties.py +0 -35
  71. hydroserverpy/core/schemas/processing_levels.py +0 -27
  72. hydroserverpy/core/schemas/result_qualifiers.py +0 -23
  73. hydroserverpy/core/schemas/sensors.py +0 -53
  74. hydroserverpy/core/schemas/things.py +0 -309
  75. hydroserverpy/core/schemas/units.py +0 -30
  76. hydroserverpy/core/service.py +0 -186
  77. hydroserverpy-0.3.0.dist-info/METADATA +0 -18
  78. hydroserverpy-0.3.0.dist-info/RECORD +0 -36
  79. /hydroserverpy/{core → api}/__init__.py +0 -0
  80. /hydroserverpy/{etl → etl_csv}/exceptions.py +0 -0
  81. {hydroserverpy-0.3.0.dist-info → hydroserverpy-0.5.0b1.dist-info/licenses}/LICENSE +0 -0
  82. {hydroserverpy-0.3.0.dist-info → hydroserverpy-0.5.0b1.dist-info}/top_level.txt +0 -0
  83. {hydroserverpy-0.3.0.dist-info → hydroserverpy-0.5.0b1.dist-info}/zip-safe +0 -0
@@ -0,0 +1,116 @@
1
+ from typing import Optional, Union, List, TYPE_CHECKING
2
+ from uuid import UUID
3
+ from ..base import SensorThingsService
4
+ from hydroserverpy.api.models import Sensor
5
+
6
+
7
+ if TYPE_CHECKING:
8
+ from hydroserverpy import HydroServer
9
+ from hydroserverpy.api.models import Workspace
10
+
11
+
12
+ class SensorService(SensorThingsService):
13
+ def __init__(self, connection: "HydroServer"):
14
+ self._model = Sensor
15
+ self._api_route = "api/data"
16
+ self._endpoint_route = "sensors"
17
+ self._sta_route = "api/sensorthings/v1.1/Sensors"
18
+
19
+ super().__init__(connection)
20
+
21
+ def list(
22
+ self,
23
+ workspace: Optional[Union["Workspace", UUID, str]] = None,
24
+ page: int = 1,
25
+ page_size: int = 100,
26
+ ) -> List["Sensor"]:
27
+ """Fetch a collection of sensors."""
28
+
29
+ params = {"$top": page_size, "$skip": page_size * (page - 1)}
30
+
31
+ if workspace:
32
+ params["$filter"] = (
33
+ f"properties/workspace/id eq '{str(getattr(workspace, 'uid', workspace))}'"
34
+ )
35
+
36
+ return super()._list(params=params)
37
+
38
+ def get(
39
+ self, uid: Union[UUID, str], fetch_by_datastream_uid: bool = False
40
+ ) -> "Sensor":
41
+ """Get a sensor by ID."""
42
+
43
+ return self._get(
44
+ uid=str(uid),
45
+ path=(
46
+ f"api/sensorthings/v1.1/Datastreams('{str(uid)}')/Sensor"
47
+ if fetch_by_datastream_uid
48
+ else None
49
+ ),
50
+ )
51
+
52
+ def create(
53
+ self,
54
+ workspace: Union["Workspace", UUID, str],
55
+ name: str,
56
+ description: str,
57
+ encoding_type: str,
58
+ method_type: str,
59
+ manufacturer: Optional[str] = None,
60
+ sensor_model: Optional[str] = None,
61
+ sensor_model_link: Optional[str] = None,
62
+ method_link: Optional[str] = None,
63
+ method_code: Optional[str] = None,
64
+ ) -> "Sensor":
65
+ """Create a new sensor."""
66
+
67
+ kwargs = {
68
+ "name": name,
69
+ "description": description,
70
+ "encodingType": encoding_type,
71
+ "methodType": method_type,
72
+ "manufacturer": manufacturer,
73
+ "model": sensor_model,
74
+ "modelLink": sensor_model_link,
75
+ "methodLink": method_link,
76
+ "methodCode": method_code,
77
+ "workspaceId": str(getattr(workspace, "uid", workspace)),
78
+ }
79
+
80
+ return super()._create(**kwargs)
81
+
82
+ def update(
83
+ self,
84
+ uid: Union[UUID, str],
85
+ name: str = ...,
86
+ description: str = ...,
87
+ encoding_type: str = ...,
88
+ method_type: str = ...,
89
+ manufacturer: Optional[str] = ...,
90
+ sensor_model: Optional[str] = ...,
91
+ sensor_model_link: Optional[str] = ...,
92
+ method_link: Optional[str] = ...,
93
+ method_code: Optional[str] = ...,
94
+ ) -> "Sensor":
95
+ """Update a sensor."""
96
+
97
+ kwargs = {
98
+ "name": name,
99
+ "description": description,
100
+ "encodingType": encoding_type,
101
+ "methodType": method_type,
102
+ "manufacturer": manufacturer,
103
+ "model": sensor_model,
104
+ "modelLink": sensor_model_link,
105
+ "methodLink": method_link,
106
+ "methodCode": method_code,
107
+ }
108
+
109
+ return super()._update(
110
+ uid=str(uid), **{k: v for k, v in kwargs.items() if v is not ...}
111
+ )
112
+
113
+ def delete(self, uid: Union[UUID, str]) -> None:
114
+ """Delete a sensor."""
115
+
116
+ super()._delete(uid=str(uid))
@@ -0,0 +1,188 @@
1
+ import json
2
+ from typing import TYPE_CHECKING, Union, IO, List, Dict, Optional
3
+ from uuid import UUID
4
+ from ..base import SensorThingsService
5
+ from hydroserverpy.api.models import Thing
6
+
7
+
8
+ if TYPE_CHECKING:
9
+ from hydroserverpy import HydroServer
10
+ from hydroserverpy.api.models import Workspace
11
+
12
+
13
+ class ThingService(SensorThingsService):
14
+ def __init__(self, connection: "HydroServer"):
15
+ self._model = Thing
16
+ self._api_route = "api/data"
17
+ self._endpoint_route = "things"
18
+ self._sta_route = "api/sensorthings/v1.1/Things"
19
+
20
+ super().__init__(connection)
21
+
22
+ def list(
23
+ self,
24
+ workspace: Optional[Union["Workspace", UUID, str]] = None,
25
+ page: int = 1,
26
+ page_size: int = 100,
27
+ ) -> List["Thing"]:
28
+ """Fetch a collection of things."""
29
+
30
+ params = {
31
+ "$top": page_size,
32
+ "$skip": page_size * (page - 1),
33
+ "$expand": "Locations",
34
+ }
35
+
36
+ if workspace:
37
+ params["$filter"] = (
38
+ f"properties/workspace/id eq '{str(getattr(workspace, 'uid', workspace))}'"
39
+ )
40
+
41
+ return super()._list(params=params)
42
+
43
+ def get(
44
+ self, uid: Union[UUID, str], fetch_by_datastream_uid: bool = False
45
+ ) -> "Thing":
46
+ """Get a thing by ID."""
47
+
48
+ params = {"$expand": "Locations"}
49
+ return self._get(
50
+ uid=str(uid),
51
+ path=(
52
+ f"api/sensorthings/v1.1/Datastreams('{str(uid)}')/Thing"
53
+ if fetch_by_datastream_uid
54
+ else None
55
+ ),
56
+ params=params,
57
+ )
58
+
59
+ def create(
60
+ self,
61
+ workspace: Union["Workspace", UUID, str],
62
+ name: str,
63
+ description: str,
64
+ sampling_feature_type: str,
65
+ sampling_feature_code: str,
66
+ site_type: str,
67
+ is_private: False,
68
+ latitude: float,
69
+ longitude: float,
70
+ elevation_m: Optional[float] = None,
71
+ elevation_datum: Optional[str] = None,
72
+ state: Optional[str] = None,
73
+ county: Optional[str] = None,
74
+ country: Optional[str] = None,
75
+ data_disclaimer: Optional[str] = None,
76
+ ) -> "Thing":
77
+ """Create a new thing."""
78
+
79
+ kwargs = {
80
+ "name": name,
81
+ "description": description,
82
+ "samplingFeatureType": sampling_feature_type,
83
+ "samplingFeatureCode": sampling_feature_code,
84
+ "siteType": site_type,
85
+ "isPrivate": is_private,
86
+ "latitude": latitude,
87
+ "longitude": longitude,
88
+ "elevation_m": elevation_m,
89
+ "elevationDatum": elevation_datum,
90
+ "state": state,
91
+ "county": county,
92
+ "country": country,
93
+ "dataDisclaimer": data_disclaimer,
94
+ "workspaceId": str(getattr(workspace, "uid", workspace)),
95
+ }
96
+
97
+ return super()._create(**kwargs)
98
+
99
+ def update(
100
+ self,
101
+ uid: Union[UUID, str],
102
+ name: str = ...,
103
+ description: str = ...,
104
+ sampling_feature_type: str = ...,
105
+ sampling_feature_code: str = ...,
106
+ site_type: str = ...,
107
+ is_private: False = ...,
108
+ latitude: float = ...,
109
+ longitude: float = ...,
110
+ elevation_m: Optional[float] = ...,
111
+ elevation_datum: Optional[str] = ...,
112
+ state: Optional[str] = ...,
113
+ county: Optional[str] = ...,
114
+ country: Optional[str] = ...,
115
+ data_disclaimer: Optional[str] = ...,
116
+ ) -> "Thing":
117
+ """Update a thing."""
118
+
119
+ kwargs = {
120
+ "name": name,
121
+ "description": description,
122
+ "samplingFeatureType": sampling_feature_type,
123
+ "samplingFeatureCode": sampling_feature_code,
124
+ "siteType": site_type,
125
+ "isPrivate": is_private,
126
+ "latitude": latitude,
127
+ "longitude": longitude,
128
+ "elevation_m": elevation_m,
129
+ "elevationDatum": elevation_datum,
130
+ "state": state,
131
+ "county": county,
132
+ "country": country,
133
+ "dataDisclaimer": data_disclaimer,
134
+ }
135
+
136
+ return super()._update(
137
+ uid=str(uid), **{k: v for k, v in kwargs.items() if v is not ...}
138
+ )
139
+
140
+ def delete(self, uid: Union[UUID, str]) -> None:
141
+ """Delete a thing."""
142
+
143
+ super()._delete(uid=str(uid))
144
+
145
+ def add_tag(self, uid: Union[UUID, str], key: str, value: str) -> Dict[str, str]:
146
+ """Tag a HydroServer thing."""
147
+
148
+ return self._connection.request(
149
+ "post",
150
+ f"{self._api_route}/{self._endpoint_route}/{str(uid)}/tags",
151
+ data=json.dumps({"key": key, "value": value}),
152
+ ).json()
153
+
154
+ def update_tag(self, uid: Union[UUID, str], key: str, value: str) -> Dict[str, str]:
155
+ """Update the tag of a HydroServer thing."""
156
+
157
+ return self._connection.request(
158
+ "put",
159
+ f"{self._api_route}/{self._endpoint_route}/{str(uid)}/tags",
160
+ data=json.dumps({"key": key, "value": value}),
161
+ ).json()
162
+
163
+ def delete_tag(self, uid: Union[UUID, str], key: str) -> None:
164
+ """Remove a tag from a HydroServer thing."""
165
+
166
+ self._connection.request(
167
+ "delete",
168
+ f"{self._api_route}/{self._endpoint_route}/{str(uid)}/tags",
169
+ data=json.dumps({"key": key}),
170
+ )
171
+
172
+ def add_photo(self, uid: Union[UUID, str], file: IO[bytes]) -> Dict[str, str]:
173
+ """Add a photo of a HydroServer thing."""
174
+
175
+ return self._connection.request(
176
+ "post",
177
+ f"{self._api_route}/{self._endpoint_route}/{str(uid)}/photos",
178
+ files={"file": file},
179
+ ).json()
180
+
181
+ def delete_photo(self, uid: Union[UUID, str], name: str) -> None:
182
+ """Delete a photo of a HydroServer thing."""
183
+
184
+ self._connection.request(
185
+ "delete",
186
+ f"{self._api_route}/{self._endpoint_route}/{str(uid)}/photos",
187
+ data=json.dumps({"name": name}),
188
+ )
@@ -0,0 +1,82 @@
1
+ from typing import Optional, Union, List, TYPE_CHECKING
2
+ from uuid import UUID
3
+ from ..base import EndpointService
4
+ from hydroserverpy.api.models import Unit
5
+
6
+
7
+ if TYPE_CHECKING:
8
+ from hydroserverpy import HydroServer
9
+ from hydroserverpy.api.models import Workspace
10
+
11
+
12
+ class UnitService(EndpointService):
13
+ def __init__(self, connection: "HydroServer"):
14
+ self._model = Unit
15
+ self._api_route = "api/data"
16
+ self._endpoint_route = "units"
17
+
18
+ super().__init__(connection)
19
+
20
+ def list(
21
+ self,
22
+ workspace: Optional[Union["Workspace", UUID, str]] = None,
23
+ ) -> List["Unit"]:
24
+ """Fetch a collection of units."""
25
+
26
+ workspace_id = getattr(workspace, "uid", workspace)
27
+ workspace_id = str(workspace_id) if workspace_id else None
28
+
29
+ return super()._list(
30
+ params={"workspace_id": workspace_id} if workspace_id else {},
31
+ )
32
+
33
+ def get(self, uid: Union[UUID, str]) -> "Unit":
34
+ """Get a unit by ID."""
35
+
36
+ return super()._get(uid=str(uid))
37
+
38
+ def create(
39
+ self,
40
+ workspace: Union["Workspace", UUID, str],
41
+ name: str,
42
+ symbol: str,
43
+ definition: str,
44
+ unit_type: str,
45
+ ) -> "Unit":
46
+ """Create a new unit."""
47
+
48
+ kwargs = {
49
+ "name": name,
50
+ "symbol": symbol,
51
+ "definition": definition,
52
+ "type": unit_type,
53
+ "workspaceId": str(getattr(workspace, "uid", workspace)),
54
+ }
55
+
56
+ return super()._create(**kwargs)
57
+
58
+ def update(
59
+ self,
60
+ uid: Union[UUID, str],
61
+ name: str = ...,
62
+ symbol: str = ...,
63
+ definition: str = ...,
64
+ unit_type: str = ...,
65
+ ) -> "Unit":
66
+ """Update a unit."""
67
+
68
+ kwargs = {
69
+ "name": name,
70
+ "symbol": symbol,
71
+ "definition": definition,
72
+ "type": unit_type,
73
+ }
74
+
75
+ return super()._update(
76
+ uid=str(uid), **{k: v for k, v in kwargs.items() if v is not ...}
77
+ )
78
+
79
+ def delete(self, uid: Union[UUID, str]) -> None:
80
+ """Delete a unit."""
81
+
82
+ super()._delete(uid=str(uid))
@@ -0,0 +1,21 @@
1
+ from .extractors.local_file_extractor import LocalFileExtractor
2
+ from .extractors.ftp_extractor import FTPExtractor
3
+ from .extractors.http_extractor import HTTPExtractor
4
+ from .transformers.csv_transformer import CSVTransformer
5
+ from .transformers.json_transformer import JSONTransformer
6
+ from .transformers.base import Transformer
7
+ from .extractors.base import Extractor
8
+ from .loaders.base import Loader
9
+ from .loaders.hydroserver_loader import HydroServerLoader
10
+
11
+ __all__ = [
12
+ "CSVTransformer",
13
+ "JSONTransformer",
14
+ "LocalFileExtractor",
15
+ "FTPExtractor",
16
+ "HTTPExtractor",
17
+ "Extractor",
18
+ "Transformer",
19
+ "Loader",
20
+ "HydroServerLoader",
21
+ ]
File without changes
@@ -0,0 +1,13 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Dict
3
+ from ..types import TimeRange
4
+
5
+
6
+ class Extractor(ABC):
7
+ @abstractmethod
8
+ def prepare_params(self, data_requirements: Dict[str, TimeRange]):
9
+ pass
10
+
11
+ @abstractmethod
12
+ def extract(self):
13
+ pass
@@ -0,0 +1,50 @@
1
+ import logging
2
+ from ftplib import FTP
3
+ from io import BytesIO
4
+ from typing import Dict
5
+
6
+ from .base import Extractor
7
+ from ..types import TimeRange
8
+
9
+
10
+ class FTPExtractor(Extractor):
11
+ def __init__(
12
+ self,
13
+ host: str,
14
+ filepath: str,
15
+ username: str = None,
16
+ password: str = None,
17
+ port: int = 21,
18
+ ):
19
+ self.host = host
20
+ self.port = int(port)
21
+ self.username = username
22
+ self.password = password
23
+ self.filepath = filepath
24
+
25
+ def prepare_params(self, data_requirements: Dict[str, TimeRange]):
26
+ pass
27
+
28
+ def extract(self):
29
+ """
30
+ Downloads the file from the FTP server and returns a file-like object.
31
+ """
32
+ ftp = FTP()
33
+ try:
34
+ ftp.connect(self.host, self.port)
35
+ ftp.login(user=self.username, passwd=self.password)
36
+ logging.info(f"Connected to FTP server: {self.host}:{self.port}")
37
+
38
+ data = BytesIO()
39
+ ftp.retrbinary(f"RETR {self.filepath}", data.write)
40
+ logging.info(
41
+ f"Successfully downloaded file '{self.filepath}' from FTP server."
42
+ )
43
+ data.seek(0)
44
+ return data
45
+ except Exception as e:
46
+ logging.error(f"Error retrieving file from FTP server: {e}")
47
+ return None
48
+ finally:
49
+ if ftp:
50
+ ftp.quit()
@@ -0,0 +1,84 @@
1
+ import logging
2
+ from hydroserverpy.etl.types import TimeRange
3
+ import requests
4
+ from io import BytesIO
5
+ from typing import Dict
6
+ from .base import Extractor
7
+
8
+
9
+ class HTTPExtractor(Extractor):
10
+ def __init__(
11
+ self,
12
+ url: str,
13
+ url_variables: dict = None,
14
+ params: dict = None,
15
+ headers: dict = None,
16
+ auth: tuple = None,
17
+ ):
18
+ self.url = self.format_url(url, url_variables or {})
19
+ self.params = params
20
+ self.headers = headers
21
+ self.auth = auth
22
+ self.start_date = None
23
+
24
+ def prepare_params(self, data_requirements: Dict[str, TimeRange]):
25
+ start_times = [
26
+ req["start_time"] for req in data_requirements.values() if req["start_time"]
27
+ ]
28
+
29
+ if start_times:
30
+ oldest_start_time = min(start_times).isoformat()
31
+ start_time_key = self.params.pop("start_time_key", None)
32
+ if start_time_key:
33
+ self.params[start_time_key] = oldest_start_time
34
+ logging.info(
35
+ f"Set start_time to {oldest_start_time} and removed 'start_time_key'"
36
+ )
37
+ else:
38
+ logging.warning("'start_time_key' not found in params.")
39
+
40
+ end_times = [
41
+ req["end_time"] for req in data_requirements.values() if req["end_time"]
42
+ ]
43
+
44
+ if end_times:
45
+ newest_end_time = max(end_times).isoformat()
46
+ end_time_key = self.params.pop("end_time_key", None)
47
+ if end_time_key:
48
+ self.params[end_time_key] = newest_end_time
49
+ logging.info(
50
+ f"Set end_time to {newest_end_time} and removed 'end_time_key'"
51
+ )
52
+ else:
53
+ logging.warning("'end_time_key' not found in params.")
54
+
55
+ def extract(self):
56
+ """
57
+ Downloads the file from the HTTP/HTTPS server and returns a file-like object.
58
+ """
59
+ response = requests.get(
60
+ url=self.url,
61
+ params=self.params,
62
+ headers=self.headers,
63
+ auth=self.auth,
64
+ stream=True,
65
+ )
66
+ response.raise_for_status()
67
+ logging.info(f"Successfully downloaded file from {response.url}")
68
+
69
+ data = BytesIO()
70
+ for chunk in response.iter_content(chunk_size=8192):
71
+ if chunk:
72
+ data.write(chunk)
73
+ data.seek(0)
74
+ return data
75
+
76
+ @staticmethod
77
+ def format_url(url_template, url_variables):
78
+ try:
79
+ url = url_template.format(**url_variables)
80
+ except KeyError as e:
81
+ missing_key = e.args[0]
82
+ raise KeyError(f"Missing configuration url_variable: {missing_key}")
83
+
84
+ return url
@@ -0,0 +1,25 @@
1
+ import logging
2
+ from typing import Dict
3
+
4
+ from .base import Extractor
5
+ from ..types import TimeRange
6
+
7
+
8
+ class LocalFileExtractor(Extractor):
9
+ def __init__(self, filepath: str):
10
+ self.filepath = filepath
11
+
12
+ def prepare_params(self, data_requirements: Dict[str, TimeRange]):
13
+ pass
14
+
15
+ def extract(self):
16
+ """
17
+ Opens the file and returns a file-like object.
18
+ """
19
+ try:
20
+ file_handle = open(self.filepath, "r")
21
+ logging.info(f"Successfully opened file '{self.filepath}'.")
22
+ return file_handle
23
+ except Exception as e:
24
+ logging.error(f"Error opening file '{self.filepath}': {e}")
25
+ return None
@@ -0,0 +1,40 @@
1
+ import logging
2
+ import pandas as pd
3
+
4
+
5
+ class HydroServerETL:
6
+ def __init__(self, extractor, transformer, loader, source_target_map):
7
+ self.extractor = extractor
8
+ self.transformer = transformer
9
+ self.loader = loader
10
+ self.source_target_map = source_target_map
11
+
12
+ def run(self):
13
+ """
14
+ Extracts, transforms, and loads data as defined by the class parameters.
15
+ """
16
+
17
+ # Step 1: Get Target System data requirements from the Loader & prepare parameters for the Extractor
18
+ data_requirements = self.loader.get_data_requirements(self.source_target_map)
19
+ self.extractor.prepare_params(data_requirements)
20
+
21
+ # Step 2: Extract
22
+ data = self.extractor.extract()
23
+ if data is None or (isinstance(data, pd.DataFrame) and data.empty):
24
+ logging.warning(f"No data was returned from the extractor. Ending ETL run.")
25
+ return
26
+ else:
27
+ logging.info(f"Successfully extracted data.")
28
+
29
+ # Step 3: Transform
30
+ if self.transformer:
31
+ data = self.transformer.transform(data)
32
+ if data is None or (isinstance(data, pd.DataFrame) and data.empty):
33
+ logging.warning(f"No data returned from the transformer. Ending run.")
34
+ return
35
+ else:
36
+ logging.info(f"Successfully transformed data. {data}")
37
+
38
+ # Step 4: Load
39
+ self.loader.load(data, self.source_target_map)
40
+ logging.info("Successfully loaded data.")
File without changes
@@ -0,0 +1,13 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Dict
3
+ import pandas as pd
4
+
5
+
6
+ class Loader(ABC):
7
+ @abstractmethod
8
+ def load(self, *args, **kwargs) -> None:
9
+ pass
10
+
11
+ @abstractmethod
12
+ def get_data_requirements(self, df: pd.DataFrame) -> Dict[str, pd.Timestamp]:
13
+ pass