hydroserverpy 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hydroserverpy might be problematic. Click here for more details.
- hydroserverpy/__init__.py +1 -1
- hydroserverpy/core/endpoints/base.py +44 -31
- hydroserverpy/core/endpoints/data_loaders.py +6 -5
- hydroserverpy/core/endpoints/data_sources.py +6 -5
- hydroserverpy/core/endpoints/datastreams.py +89 -52
- hydroserverpy/core/endpoints/observed_properties.py +36 -18
- hydroserverpy/core/endpoints/processing_levels.py +36 -18
- hydroserverpy/core/endpoints/result_qualifiers.py +37 -19
- hydroserverpy/core/endpoints/sensors.py +37 -19
- hydroserverpy/core/endpoints/things.py +58 -37
- hydroserverpy/core/endpoints/units.py +37 -19
- hydroserverpy/core/schemas/base.py +13 -6
- hydroserverpy/core/schemas/data_loaders.py +6 -4
- hydroserverpy/core/schemas/data_sources.py +73 -56
- hydroserverpy/core/schemas/datastreams.py +101 -70
- hydroserverpy/core/schemas/observed_properties.py +18 -10
- hydroserverpy/core/schemas/processing_levels.py +10 -6
- hydroserverpy/core/schemas/result_qualifiers.py +7 -4
- hydroserverpy/core/schemas/sensors.py +33 -18
- hydroserverpy/core/schemas/things.py +97 -60
- hydroserverpy/core/schemas/units.py +7 -8
- hydroserverpy/core/service.py +31 -17
- hydroserverpy/etl/__init__.py +21 -0
- hydroserverpy/etl/extractors/__init__.py +0 -0
- hydroserverpy/etl/extractors/base.py +13 -0
- hydroserverpy/etl/extractors/ftp_extractor.py +50 -0
- hydroserverpy/etl/extractors/http_extractor.py +84 -0
- hydroserverpy/etl/extractors/local_file_extractor.py +25 -0
- hydroserverpy/etl/hydroserver_etl.py +40 -0
- hydroserverpy/etl/loaders/__init__.py +0 -0
- hydroserverpy/etl/loaders/base.py +13 -0
- hydroserverpy/etl/loaders/hydroserver_loader.py +68 -0
- hydroserverpy/etl/transformers/__init__.py +0 -0
- hydroserverpy/etl/transformers/base.py +52 -0
- hydroserverpy/etl/transformers/csv_transformer.py +88 -0
- hydroserverpy/etl/transformers/json_transformer.py +62 -0
- hydroserverpy/etl/types.py +7 -0
- hydroserverpy/etl_csv/__init__.py +0 -0
- hydroserverpy/{etl/service.py → etl_csv/hydroserver_etl_csv.py} +92 -54
- hydroserverpy/quality/service.py +84 -70
- hydroserverpy-0.4.0.dist-info/METADATA +18 -0
- hydroserverpy-0.4.0.dist-info/RECORD +51 -0
- {hydroserverpy-0.3.0.dist-info → hydroserverpy-0.4.0.dist-info}/WHEEL +1 -1
- hydroserverpy-0.3.0.dist-info/METADATA +0 -18
- hydroserverpy-0.3.0.dist-info/RECORD +0 -36
- /hydroserverpy/{etl → etl_csv}/exceptions.py +0 -0
- {hydroserverpy-0.3.0.dist-info → hydroserverpy-0.4.0.dist-info}/LICENSE +0 -0
- {hydroserverpy-0.3.0.dist-info → hydroserverpy-0.4.0.dist-info}/top_level.txt +0 -0
- {hydroserverpy-0.3.0.dist-info → hydroserverpy-0.4.0.dist-info}/zip-safe +0 -0
|
@@ -5,40 +5,55 @@ from hydroserverpy.core.schemas.base import HydroServerCoreModel
|
|
|
5
5
|
|
|
6
6
|
class SensorFields(BaseModel):
|
|
7
7
|
name: str = Field(
|
|
8
|
-
...,
|
|
9
|
-
|
|
8
|
+
...,
|
|
9
|
+
strip_whitespace=True,
|
|
10
|
+
max_length=255,
|
|
11
|
+
description="The name of the sensor.",
|
|
10
12
|
)
|
|
11
13
|
description: str = Field(
|
|
12
|
-
strip_whitespace=True,
|
|
13
|
-
description='A description of the sensor.'
|
|
14
|
+
strip_whitespace=True, description="A description of the sensor."
|
|
14
15
|
)
|
|
15
16
|
encoding_type: str = Field(
|
|
16
|
-
...,
|
|
17
|
-
|
|
17
|
+
...,
|
|
18
|
+
strip_whitespace=True,
|
|
19
|
+
max_length=255,
|
|
20
|
+
description="The encoding type of the sensor.",
|
|
18
21
|
)
|
|
19
22
|
manufacturer: Optional[str] = Field(
|
|
20
|
-
None,
|
|
21
|
-
|
|
23
|
+
None,
|
|
24
|
+
strip_whitespace=True,
|
|
25
|
+
max_length=255,
|
|
26
|
+
description="The manufacturer of the sensor.",
|
|
22
27
|
)
|
|
23
28
|
model: Optional[str] = Field(
|
|
24
|
-
None,
|
|
25
|
-
|
|
29
|
+
None,
|
|
30
|
+
strip_whitespace=True,
|
|
31
|
+
max_length=255,
|
|
32
|
+
description="The model of the sensor.",
|
|
26
33
|
)
|
|
27
34
|
model_link: Optional[str] = Field(
|
|
28
|
-
None,
|
|
29
|
-
|
|
35
|
+
None,
|
|
36
|
+
strip_whitespace=True,
|
|
37
|
+
max_length=500,
|
|
38
|
+
description="A link to a website or file that describes the sensor model.",
|
|
30
39
|
)
|
|
31
40
|
method_type: str = Field(
|
|
32
|
-
...,
|
|
33
|
-
|
|
41
|
+
...,
|
|
42
|
+
strip_whitespace=True,
|
|
43
|
+
max_length=100,
|
|
44
|
+
description="The type of method used by this sensor to collect observations.",
|
|
34
45
|
)
|
|
35
46
|
method_link: Optional[str] = Field(
|
|
36
|
-
None,
|
|
37
|
-
|
|
47
|
+
None,
|
|
48
|
+
strip_whitespace=True,
|
|
49
|
+
max_length=500,
|
|
50
|
+
description="A link to a website or file that describes the sensor method.",
|
|
38
51
|
)
|
|
39
52
|
method_code: Optional[str] = Field(
|
|
40
|
-
None,
|
|
41
|
-
|
|
53
|
+
None,
|
|
54
|
+
strip_whitespace=True,
|
|
55
|
+
max_length=50,
|
|
56
|
+
description="A code representing the sensor method.",
|
|
42
57
|
)
|
|
43
58
|
|
|
44
59
|
model_config = ConfigDict(protected_namespaces=())
|
|
@@ -10,73 +10,106 @@ if TYPE_CHECKING:
|
|
|
10
10
|
|
|
11
11
|
class ThingFields(BaseModel):
|
|
12
12
|
name: str = Field(
|
|
13
|
-
...,
|
|
14
|
-
|
|
13
|
+
...,
|
|
14
|
+
strip_whitespace=True,
|
|
15
|
+
max_length=200,
|
|
16
|
+
description="The name of the site/thing.",
|
|
15
17
|
)
|
|
16
18
|
description: str = Field(
|
|
17
|
-
..., strip_whitespace=True,
|
|
18
|
-
description='A description of the site/thing.'
|
|
19
|
+
..., strip_whitespace=True, description="A description of the site/thing."
|
|
19
20
|
)
|
|
20
21
|
sampling_feature_type: str = Field(
|
|
21
|
-
...,
|
|
22
|
-
|
|
22
|
+
...,
|
|
23
|
+
strip_whitespace=True,
|
|
24
|
+
max_length=200,
|
|
25
|
+
description="The sampling feature type of the site/thing.",
|
|
23
26
|
)
|
|
24
27
|
sampling_feature_code: str = Field(
|
|
25
|
-
...,
|
|
26
|
-
|
|
28
|
+
...,
|
|
29
|
+
strip_whitespace=True,
|
|
30
|
+
max_length=200,
|
|
31
|
+
description="A code representing the sampling feature of the site/thing.",
|
|
27
32
|
)
|
|
28
33
|
site_type: str = Field(
|
|
29
|
-
...,
|
|
30
|
-
|
|
34
|
+
...,
|
|
35
|
+
strip_whitespace=True,
|
|
36
|
+
max_length=200,
|
|
37
|
+
description="The type of the site/thing.",
|
|
31
38
|
)
|
|
32
39
|
data_disclaimer: Optional[str] = Field(
|
|
33
|
-
None,
|
|
34
|
-
|
|
40
|
+
None,
|
|
41
|
+
strip_whitespace=True,
|
|
42
|
+
description="An optional data disclaimer to attach to observations collected at this site/thing.",
|
|
35
43
|
)
|
|
36
44
|
|
|
37
45
|
|
|
38
46
|
# Get a list of all ISO 3166-1 alpha-2 country codes
|
|
39
|
-
valid_country_codes = [code for code, _ in countries_for_language(
|
|
47
|
+
valid_country_codes = [code for code, _ in countries_for_language("en")]
|
|
40
48
|
|
|
41
49
|
|
|
42
50
|
class LocationFields(BaseModel):
|
|
43
51
|
latitude: float = Field(
|
|
44
|
-
...,
|
|
45
|
-
|
|
46
|
-
|
|
52
|
+
...,
|
|
53
|
+
ge=-90,
|
|
54
|
+
le=90,
|
|
55
|
+
serialization_alias="latitude",
|
|
56
|
+
validation_alias=AliasChoices("latitude", AliasPath("location", "latitude")),
|
|
57
|
+
description="The WGS84 latitude of the location.",
|
|
47
58
|
)
|
|
48
59
|
longitude: float = Field(
|
|
49
|
-
...,
|
|
50
|
-
|
|
51
|
-
|
|
60
|
+
...,
|
|
61
|
+
ge=-180,
|
|
62
|
+
le=180,
|
|
63
|
+
serialization_alias="longitude",
|
|
64
|
+
validation_alias=AliasChoices("longitude", AliasPath("location", "longitude")),
|
|
65
|
+
description="The WGS84 longitude of the location.",
|
|
52
66
|
)
|
|
53
67
|
elevation_m: Optional[float] = Field(
|
|
54
|
-
None,
|
|
55
|
-
|
|
56
|
-
|
|
68
|
+
None,
|
|
69
|
+
ge=-99999,
|
|
70
|
+
le=99999,
|
|
71
|
+
serialization_alias="elevation_m",
|
|
72
|
+
validation_alias=AliasChoices(
|
|
73
|
+
"elevation_m", AliasPath("location", "elevation_m")
|
|
74
|
+
),
|
|
75
|
+
description="The elevation in meters of the location.",
|
|
57
76
|
)
|
|
58
77
|
elevation_datum: Optional[str] = Field(
|
|
59
|
-
None,
|
|
60
|
-
|
|
61
|
-
|
|
78
|
+
None,
|
|
79
|
+
strip_whitespace=True,
|
|
80
|
+
max_length=255,
|
|
81
|
+
serialization_alias="elevationDatum",
|
|
82
|
+
validation_alias=AliasChoices(
|
|
83
|
+
"elevationDatum", AliasPath("location", "elevationDatum")
|
|
84
|
+
),
|
|
85
|
+
description="The datum used to represent the elevation of the location.",
|
|
62
86
|
)
|
|
63
87
|
state: Optional[str] = Field(
|
|
64
|
-
None,
|
|
65
|
-
|
|
66
|
-
|
|
88
|
+
None,
|
|
89
|
+
strip_whitespace=True,
|
|
90
|
+
max_length=200,
|
|
91
|
+
serialization_alias="state",
|
|
92
|
+
validation_alias=AliasChoices("state", AliasPath("location", "state")),
|
|
93
|
+
description="The state/province of the location.",
|
|
67
94
|
)
|
|
68
95
|
county: Optional[str] = Field(
|
|
69
|
-
None,
|
|
70
|
-
|
|
71
|
-
|
|
96
|
+
None,
|
|
97
|
+
strip_whitespace=True,
|
|
98
|
+
max_length=200,
|
|
99
|
+
serialization_alias="county",
|
|
100
|
+
validation_alias=AliasChoices("county", AliasPath("location", "county")),
|
|
101
|
+
description="The county/district of the location.",
|
|
72
102
|
)
|
|
73
103
|
country: Optional[str] = Field(
|
|
74
|
-
None,
|
|
75
|
-
|
|
76
|
-
|
|
104
|
+
None,
|
|
105
|
+
strip_whitespace=True,
|
|
106
|
+
max_length=2,
|
|
107
|
+
serialization_alias="country",
|
|
108
|
+
validation_alias=AliasChoices("country", AliasPath("location", "country")),
|
|
109
|
+
description="The ISO 3166-1 alpha-2 country code of the location.",
|
|
77
110
|
)
|
|
78
111
|
|
|
79
|
-
@field_validator(
|
|
112
|
+
@field_validator("country", mode="after")
|
|
80
113
|
def check_country_code(cls, value: str) -> str:
|
|
81
114
|
"""
|
|
82
115
|
Validate the country code to ensure it is an ISO 3166-1 alpha-2 country code.
|
|
@@ -89,7 +122,9 @@ class LocationFields(BaseModel):
|
|
|
89
122
|
"""
|
|
90
123
|
|
|
91
124
|
if value and value.upper() not in valid_country_codes:
|
|
92
|
-
raise ValueError(
|
|
125
|
+
raise ValueError(
|
|
126
|
+
f"Invalid country code: {value}. Must be an ISO 3166-1 alpha-2 country code."
|
|
127
|
+
)
|
|
93
128
|
|
|
94
129
|
return value
|
|
95
130
|
|
|
@@ -123,7 +158,7 @@ class Thing(HydroServerCoreModel, ThingFields, LocationFields):
|
|
|
123
158
|
self._archive = None
|
|
124
159
|
|
|
125
160
|
@property
|
|
126
|
-
def datastreams(self) -> List[
|
|
161
|
+
def datastreams(self) -> List["Datastream"]:
|
|
127
162
|
"""
|
|
128
163
|
The datastreams associated with the thing. If not already cached, fetch the datastreams from the
|
|
129
164
|
server.
|
|
@@ -138,7 +173,7 @@ class Thing(HydroServerCoreModel, ThingFields, LocationFields):
|
|
|
138
173
|
return self._datastreams
|
|
139
174
|
|
|
140
175
|
@property
|
|
141
|
-
def tags(self) -> List[
|
|
176
|
+
def tags(self) -> List["Tag"]:
|
|
142
177
|
"""
|
|
143
178
|
The tags associated with the thing. If not already cached, fetch the tags from the server.
|
|
144
179
|
|
|
@@ -152,7 +187,7 @@ class Thing(HydroServerCoreModel, ThingFields, LocationFields):
|
|
|
152
187
|
return self._tags
|
|
153
188
|
|
|
154
189
|
@property
|
|
155
|
-
def photos(self) -> List[
|
|
190
|
+
def photos(self) -> List["Photo"]:
|
|
156
191
|
"""
|
|
157
192
|
The photos associated with the thing. If not already cached, fetch the photos from the server.
|
|
158
193
|
|
|
@@ -166,7 +201,7 @@ class Thing(HydroServerCoreModel, ThingFields, LocationFields):
|
|
|
166
201
|
return self._photos
|
|
167
202
|
|
|
168
203
|
@property
|
|
169
|
-
def archive(self) ->
|
|
204
|
+
def archive(self) -> "Archive":
|
|
170
205
|
"""
|
|
171
206
|
The archive associated with the thing. If not already cached, fetch the archive from the server.
|
|
172
207
|
|
|
@@ -185,7 +220,7 @@ class Thing(HydroServerCoreModel, ThingFields, LocationFields):
|
|
|
185
220
|
if they were previously loaded.
|
|
186
221
|
"""
|
|
187
222
|
|
|
188
|
-
entity = self._endpoint.get(uid=self.uid).model_dump(exclude=[
|
|
223
|
+
entity = self._endpoint.get(uid=self.uid).model_dump(exclude=["uid"])
|
|
189
224
|
self._original_data = entity
|
|
190
225
|
self.__dict__.update(entity)
|
|
191
226
|
if self._datastreams is not None:
|
|
@@ -218,7 +253,9 @@ class Thing(HydroServerCoreModel, ThingFields, LocationFields):
|
|
|
218
253
|
"""
|
|
219
254
|
|
|
220
255
|
selected_tag = next((tag for tag in self._tags if tag.key == key))
|
|
221
|
-
updated_tag = self._endpoint.update_tag(
|
|
256
|
+
updated_tag = self._endpoint.update_tag(
|
|
257
|
+
uid=self.uid, tag_uid=selected_tag.uid, value=value
|
|
258
|
+
)
|
|
222
259
|
self._tags = [tag if tag.key != key else updated_tag for tag in self._tags]
|
|
223
260
|
|
|
224
261
|
def delete_tag(self, key: str) -> None:
|
|
@@ -253,7 +290,9 @@ class Thing(HydroServerCoreModel, ThingFields, LocationFields):
|
|
|
253
290
|
|
|
254
291
|
selected_photo = next((photo for photo in self._photos if photo.link == link))
|
|
255
292
|
self._endpoint.delete_photo(uid=self.uid, photo_uid=selected_photo.uid)
|
|
256
|
-
self._photos = [
|
|
293
|
+
self._photos = [
|
|
294
|
+
photo for photo in self._photos if photo.link != selected_photo.link
|
|
295
|
+
]
|
|
257
296
|
|
|
258
297
|
|
|
259
298
|
class Archive(HydroServerBaseModel):
|
|
@@ -262,20 +301,24 @@ class Archive(HydroServerBaseModel):
|
|
|
262
301
|
"""
|
|
263
302
|
|
|
264
303
|
link: Optional[str] = Field(
|
|
265
|
-
None,
|
|
266
|
-
|
|
304
|
+
None,
|
|
305
|
+
strip_whitespace=True,
|
|
306
|
+
max_length=255,
|
|
307
|
+
description="A link to the HydroShare resource containing the archived site/thing.",
|
|
267
308
|
)
|
|
268
|
-
frequency: Optional[Literal[
|
|
309
|
+
frequency: Optional[Literal["daily", "weekly", "monthly"]] = Field(
|
|
269
310
|
...,
|
|
270
|
-
description=
|
|
311
|
+
description="The frequency at which the site/thing should be archived.",
|
|
271
312
|
)
|
|
272
313
|
path: str = Field(
|
|
273
|
-
...,
|
|
274
|
-
|
|
314
|
+
...,
|
|
315
|
+
strip_whitespace=True,
|
|
316
|
+
max_length=255,
|
|
317
|
+
description="The path within the HydroShare resource containing the archived data.",
|
|
275
318
|
)
|
|
276
319
|
datastream_ids: List[UUID] = Field(
|
|
277
320
|
...,
|
|
278
|
-
description=
|
|
321
|
+
description="The list of datastreams that are included in the archived data.",
|
|
279
322
|
)
|
|
280
323
|
|
|
281
324
|
|
|
@@ -285,12 +328,10 @@ class Tag(HydroServerBaseModel):
|
|
|
285
328
|
"""
|
|
286
329
|
|
|
287
330
|
key: str = Field(
|
|
288
|
-
..., strip_whitespace=True, max_length=255,
|
|
289
|
-
description='The key of the tag.'
|
|
331
|
+
..., strip_whitespace=True, max_length=255, description="The key of the tag."
|
|
290
332
|
)
|
|
291
333
|
value: str = Field(
|
|
292
|
-
..., strip_whitespace=True, max_length=255,
|
|
293
|
-
description='The value of the tag.'
|
|
334
|
+
..., strip_whitespace=True, max_length=255, description="The value of the tag."
|
|
294
335
|
)
|
|
295
336
|
|
|
296
337
|
|
|
@@ -300,10 +341,6 @@ class Photo(HydroServerBaseModel):
|
|
|
300
341
|
"""
|
|
301
342
|
|
|
302
343
|
file_path: str = Field(
|
|
303
|
-
..., strip_whitespace=True,
|
|
304
|
-
description='The file path of the photo.'
|
|
305
|
-
)
|
|
306
|
-
link: str = Field(
|
|
307
|
-
..., strip_whitespace=True,
|
|
308
|
-
description='The link to the photo.'
|
|
344
|
+
..., strip_whitespace=True, description="The file path of the photo."
|
|
309
345
|
)
|
|
346
|
+
link: str = Field(..., strip_whitespace=True, description="The link to the photo.")
|
|
@@ -4,20 +4,19 @@ from hydroserverpy.core.schemas.base import HydroServerCoreModel
|
|
|
4
4
|
|
|
5
5
|
class UnitFields:
|
|
6
6
|
name: str = Field(
|
|
7
|
-
..., strip_whitespace=True, max_length=255,
|
|
8
|
-
description='The name of the unit.'
|
|
7
|
+
..., strip_whitespace=True, max_length=255, description="The name of the unit."
|
|
9
8
|
)
|
|
10
9
|
symbol: str = Field(
|
|
11
|
-
...,
|
|
12
|
-
|
|
10
|
+
...,
|
|
11
|
+
strip_whitespace=True,
|
|
12
|
+
max_length=255,
|
|
13
|
+
description="The symbol of the unit.",
|
|
13
14
|
)
|
|
14
15
|
definition: str = Field(
|
|
15
|
-
..., strip_whitespace=True,
|
|
16
|
-
description='The definition of the unit.'
|
|
16
|
+
..., strip_whitespace=True, description="The definition of the unit."
|
|
17
17
|
)
|
|
18
18
|
type: str = Field(
|
|
19
|
-
..., strip_whitespace=True, max_length=255,
|
|
20
|
-
description='The type of the unit.'
|
|
19
|
+
..., strip_whitespace=True, max_length=255, description="The type of the unit."
|
|
21
20
|
)
|
|
22
21
|
|
|
23
22
|
|
hydroserverpy/core/service.py
CHANGED
|
@@ -1,8 +1,16 @@
|
|
|
1
1
|
import requests
|
|
2
2
|
from typing import Optional
|
|
3
|
-
from hydroserverpy.core.endpoints import (
|
|
4
|
-
|
|
5
|
-
|
|
3
|
+
from hydroserverpy.core.endpoints import (
|
|
4
|
+
DataLoaderEndpoint,
|
|
5
|
+
DataSourceEndpoint,
|
|
6
|
+
DatastreamEndpoint,
|
|
7
|
+
ThingEndpoint,
|
|
8
|
+
SensorEndpoint,
|
|
9
|
+
UnitEndpoint,
|
|
10
|
+
ProcessingLevelEndpoint,
|
|
11
|
+
ObservedPropertyEndpoint,
|
|
12
|
+
ResultQualifierEndpoint,
|
|
13
|
+
)
|
|
6
14
|
|
|
7
15
|
|
|
8
16
|
class HydroServer:
|
|
@@ -22,16 +30,23 @@ class HydroServer:
|
|
|
22
30
|
"""
|
|
23
31
|
|
|
24
32
|
def __init__(
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
33
|
+
self,
|
|
34
|
+
host: str,
|
|
35
|
+
username: Optional[str] = None,
|
|
36
|
+
password: Optional[str] = None,
|
|
37
|
+
apikey: Optional[str] = None,
|
|
38
|
+
api_route: str = "api",
|
|
31
39
|
):
|
|
32
|
-
self.host = host.strip(
|
|
33
|
-
self.auth = (
|
|
34
|
-
|
|
40
|
+
self.host = host.strip("/")
|
|
41
|
+
self.auth = (
|
|
42
|
+
(
|
|
43
|
+
username or "__key__",
|
|
44
|
+
password or apikey,
|
|
45
|
+
)
|
|
46
|
+
if (username and password) or apikey
|
|
47
|
+
else None
|
|
48
|
+
)
|
|
49
|
+
self.api_route = api_route.strip("/")
|
|
35
50
|
self._session = None
|
|
36
51
|
self._timeout = 60
|
|
37
52
|
self._initialize_session()
|
|
@@ -49,10 +64,8 @@ class HydroServer:
|
|
|
49
64
|
|
|
50
65
|
self._session = requests.Session()
|
|
51
66
|
|
|
52
|
-
if self.auth and self.auth[0] ==
|
|
53
|
-
self._session.headers.update(
|
|
54
|
-
{'key': self.auth[1]}
|
|
55
|
-
)
|
|
67
|
+
if self.auth and self.auth[0] == "__key__":
|
|
68
|
+
self._session.headers.update({"key": self.auth[1]})
|
|
56
69
|
elif self.auth:
|
|
57
70
|
self._session.auth = self.auth
|
|
58
71
|
|
|
@@ -75,7 +88,8 @@ class HydroServer:
|
|
|
75
88
|
response = getattr(self._session, method)(
|
|
76
89
|
f'{self.host}/{path.strip("/")}',
|
|
77
90
|
timeout=self._timeout,
|
|
78
|
-
*args,
|
|
91
|
+
*args,
|
|
92
|
+
**kwargs,
|
|
79
93
|
)
|
|
80
94
|
response.raise_for_status()
|
|
81
95
|
return response
|
hydroserverpy/etl/__init__.py
CHANGED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from .extractors.local_file_extractor import LocalFileExtractor
|
|
2
|
+
from .extractors.ftp_extractor import FTPExtractor
|
|
3
|
+
from .extractors.http_extractor import HTTPExtractor
|
|
4
|
+
from .transformers.csv_transformer import CSVTransformer
|
|
5
|
+
from .transformers.json_transformer import JSONTransformer
|
|
6
|
+
from .transformers.base import Transformer
|
|
7
|
+
from .extractors.base import Extractor
|
|
8
|
+
from .loaders.base import Loader
|
|
9
|
+
from .loaders.hydroserver_loader import HydroServerLoader
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"CSVTransformer",
|
|
13
|
+
"JSONTransformer",
|
|
14
|
+
"LocalFileExtractor",
|
|
15
|
+
"FTPExtractor",
|
|
16
|
+
"HTTPExtractor",
|
|
17
|
+
"Extractor",
|
|
18
|
+
"Transformer",
|
|
19
|
+
"Loader",
|
|
20
|
+
"HydroServerLoader",
|
|
21
|
+
]
|
|
File without changes
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import Dict
|
|
3
|
+
from ..types import TimeRange
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Extractor(ABC):
|
|
7
|
+
@abstractmethod
|
|
8
|
+
def prepare_params(self, data_requirements: Dict[str, TimeRange]):
|
|
9
|
+
pass
|
|
10
|
+
|
|
11
|
+
@abstractmethod
|
|
12
|
+
def extract(self):
|
|
13
|
+
pass
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from ftplib import FTP
|
|
3
|
+
from io import BytesIO
|
|
4
|
+
from typing import Dict
|
|
5
|
+
|
|
6
|
+
from .base import Extractor
|
|
7
|
+
from ..types import TimeRange
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class FTPExtractor(Extractor):
|
|
11
|
+
def __init__(
|
|
12
|
+
self,
|
|
13
|
+
host: str,
|
|
14
|
+
filepath: str,
|
|
15
|
+
username: str = None,
|
|
16
|
+
password: str = None,
|
|
17
|
+
port: int = 21,
|
|
18
|
+
):
|
|
19
|
+
self.host = host
|
|
20
|
+
self.port = int(port)
|
|
21
|
+
self.username = username
|
|
22
|
+
self.password = password
|
|
23
|
+
self.filepath = filepath
|
|
24
|
+
|
|
25
|
+
def prepare_params(self, data_requirements: Dict[str, TimeRange]):
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
def extract(self):
|
|
29
|
+
"""
|
|
30
|
+
Downloads the file from the FTP server and returns a file-like object.
|
|
31
|
+
"""
|
|
32
|
+
ftp = FTP()
|
|
33
|
+
try:
|
|
34
|
+
ftp.connect(self.host, self.port)
|
|
35
|
+
ftp.login(user=self.username, passwd=self.password)
|
|
36
|
+
logging.info(f"Connected to FTP server: {self.host}:{self.port}")
|
|
37
|
+
|
|
38
|
+
data = BytesIO()
|
|
39
|
+
ftp.retrbinary(f"RETR {self.filepath}", data.write)
|
|
40
|
+
logging.info(
|
|
41
|
+
f"Successfully downloaded file '{self.filepath}' from FTP server."
|
|
42
|
+
)
|
|
43
|
+
data.seek(0)
|
|
44
|
+
return data
|
|
45
|
+
except Exception as e:
|
|
46
|
+
logging.error(f"Error retrieving file from FTP server: {e}")
|
|
47
|
+
return None
|
|
48
|
+
finally:
|
|
49
|
+
if ftp:
|
|
50
|
+
ftp.quit()
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from hydroserverpy.etl.types import TimeRange
|
|
3
|
+
import requests
|
|
4
|
+
from io import BytesIO
|
|
5
|
+
from typing import Dict
|
|
6
|
+
from .base import Extractor
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class HTTPExtractor(Extractor):
|
|
10
|
+
def __init__(
|
|
11
|
+
self,
|
|
12
|
+
url: str,
|
|
13
|
+
url_variables: dict = None,
|
|
14
|
+
params: dict = None,
|
|
15
|
+
headers: dict = None,
|
|
16
|
+
auth: tuple = None,
|
|
17
|
+
):
|
|
18
|
+
self.url = self.format_url(url, url_variables or {})
|
|
19
|
+
self.params = params
|
|
20
|
+
self.headers = headers
|
|
21
|
+
self.auth = auth
|
|
22
|
+
self.start_date = None
|
|
23
|
+
|
|
24
|
+
def prepare_params(self, data_requirements: Dict[str, TimeRange]):
|
|
25
|
+
start_times = [
|
|
26
|
+
req["start_time"] for req in data_requirements.values() if req["start_time"]
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
if start_times:
|
|
30
|
+
oldest_start_time = min(start_times).isoformat()
|
|
31
|
+
start_time_key = self.params.pop("start_time_key", None)
|
|
32
|
+
if start_time_key:
|
|
33
|
+
self.params[start_time_key] = oldest_start_time
|
|
34
|
+
logging.info(
|
|
35
|
+
f"Set start_time to {oldest_start_time} and removed 'start_time_key'"
|
|
36
|
+
)
|
|
37
|
+
else:
|
|
38
|
+
logging.warning("'start_time_key' not found in params.")
|
|
39
|
+
|
|
40
|
+
end_times = [
|
|
41
|
+
req["end_time"] for req in data_requirements.values() if req["end_time"]
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
if end_times:
|
|
45
|
+
newest_end_time = max(end_times).isoformat()
|
|
46
|
+
end_time_key = self.params.pop("end_time_key", None)
|
|
47
|
+
if end_time_key:
|
|
48
|
+
self.params[end_time_key] = newest_end_time
|
|
49
|
+
logging.info(
|
|
50
|
+
f"Set end_time to {newest_end_time} and removed 'end_time_key'"
|
|
51
|
+
)
|
|
52
|
+
else:
|
|
53
|
+
logging.warning("'end_time_key' not found in params.")
|
|
54
|
+
|
|
55
|
+
def extract(self):
|
|
56
|
+
"""
|
|
57
|
+
Downloads the file from the HTTP/HTTPS server and returns a file-like object.
|
|
58
|
+
"""
|
|
59
|
+
response = requests.get(
|
|
60
|
+
url=self.url,
|
|
61
|
+
params=self.params,
|
|
62
|
+
headers=self.headers,
|
|
63
|
+
auth=self.auth,
|
|
64
|
+
stream=True,
|
|
65
|
+
)
|
|
66
|
+
response.raise_for_status()
|
|
67
|
+
logging.info(f"Successfully downloaded file from {response.url}")
|
|
68
|
+
|
|
69
|
+
data = BytesIO()
|
|
70
|
+
for chunk in response.iter_content(chunk_size=8192):
|
|
71
|
+
if chunk:
|
|
72
|
+
data.write(chunk)
|
|
73
|
+
data.seek(0)
|
|
74
|
+
return data
|
|
75
|
+
|
|
76
|
+
@staticmethod
|
|
77
|
+
def format_url(url_template, url_variables):
|
|
78
|
+
try:
|
|
79
|
+
url = url_template.format(**url_variables)
|
|
80
|
+
except KeyError as e:
|
|
81
|
+
missing_key = e.args[0]
|
|
82
|
+
raise KeyError(f"Missing configuration url_variable: {missing_key}")
|
|
83
|
+
|
|
84
|
+
return url
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Dict
|
|
3
|
+
|
|
4
|
+
from .base import Extractor
|
|
5
|
+
from ..types import TimeRange
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class LocalFileExtractor(Extractor):
|
|
9
|
+
def __init__(self, filepath: str):
|
|
10
|
+
self.filepath = filepath
|
|
11
|
+
|
|
12
|
+
def prepare_params(self, data_requirements: Dict[str, TimeRange]):
|
|
13
|
+
pass
|
|
14
|
+
|
|
15
|
+
def extract(self):
|
|
16
|
+
"""
|
|
17
|
+
Opens the file and returns a file-like object.
|
|
18
|
+
"""
|
|
19
|
+
try:
|
|
20
|
+
file_handle = open(self.filepath, "r")
|
|
21
|
+
logging.info(f"Successfully opened file '{self.filepath}'.")
|
|
22
|
+
return file_handle
|
|
23
|
+
except Exception as e:
|
|
24
|
+
logging.error(f"Error opening file '{self.filepath}': {e}")
|
|
25
|
+
return None
|