hydroserverpy 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hydroserverpy might be problematic. Click here for more details.

Files changed (49) hide show
  1. hydroserverpy/__init__.py +1 -1
  2. hydroserverpy/core/endpoints/base.py +44 -31
  3. hydroserverpy/core/endpoints/data_loaders.py +6 -5
  4. hydroserverpy/core/endpoints/data_sources.py +6 -5
  5. hydroserverpy/core/endpoints/datastreams.py +89 -52
  6. hydroserverpy/core/endpoints/observed_properties.py +36 -18
  7. hydroserverpy/core/endpoints/processing_levels.py +36 -18
  8. hydroserverpy/core/endpoints/result_qualifiers.py +37 -19
  9. hydroserverpy/core/endpoints/sensors.py +37 -19
  10. hydroserverpy/core/endpoints/things.py +58 -37
  11. hydroserverpy/core/endpoints/units.py +37 -19
  12. hydroserverpy/core/schemas/base.py +13 -6
  13. hydroserverpy/core/schemas/data_loaders.py +6 -4
  14. hydroserverpy/core/schemas/data_sources.py +73 -56
  15. hydroserverpy/core/schemas/datastreams.py +101 -70
  16. hydroserverpy/core/schemas/observed_properties.py +18 -10
  17. hydroserverpy/core/schemas/processing_levels.py +10 -6
  18. hydroserverpy/core/schemas/result_qualifiers.py +7 -4
  19. hydroserverpy/core/schemas/sensors.py +33 -18
  20. hydroserverpy/core/schemas/things.py +97 -60
  21. hydroserverpy/core/schemas/units.py +7 -8
  22. hydroserverpy/core/service.py +31 -17
  23. hydroserverpy/etl/__init__.py +21 -0
  24. hydroserverpy/etl/extractors/__init__.py +0 -0
  25. hydroserverpy/etl/extractors/base.py +13 -0
  26. hydroserverpy/etl/extractors/ftp_extractor.py +50 -0
  27. hydroserverpy/etl/extractors/http_extractor.py +84 -0
  28. hydroserverpy/etl/extractors/local_file_extractor.py +25 -0
  29. hydroserverpy/etl/hydroserver_etl.py +40 -0
  30. hydroserverpy/etl/loaders/__init__.py +0 -0
  31. hydroserverpy/etl/loaders/base.py +13 -0
  32. hydroserverpy/etl/loaders/hydroserver_loader.py +68 -0
  33. hydroserverpy/etl/transformers/__init__.py +0 -0
  34. hydroserverpy/etl/transformers/base.py +52 -0
  35. hydroserverpy/etl/transformers/csv_transformer.py +88 -0
  36. hydroserverpy/etl/transformers/json_transformer.py +62 -0
  37. hydroserverpy/etl/types.py +7 -0
  38. hydroserverpy/etl_csv/__init__.py +0 -0
  39. hydroserverpy/{etl/service.py → etl_csv/hydroserver_etl_csv.py} +92 -54
  40. hydroserverpy/quality/service.py +84 -70
  41. hydroserverpy-0.4.0.dist-info/METADATA +18 -0
  42. hydroserverpy-0.4.0.dist-info/RECORD +51 -0
  43. {hydroserverpy-0.3.0.dist-info → hydroserverpy-0.4.0.dist-info}/WHEEL +1 -1
  44. hydroserverpy-0.3.0.dist-info/METADATA +0 -18
  45. hydroserverpy-0.3.0.dist-info/RECORD +0 -36
  46. /hydroserverpy/{etl → etl_csv}/exceptions.py +0 -0
  47. {hydroserverpy-0.3.0.dist-info → hydroserverpy-0.4.0.dist-info}/LICENSE +0 -0
  48. {hydroserverpy-0.3.0.dist-info → hydroserverpy-0.4.0.dist-info}/top_level.txt +0 -0
  49. {hydroserverpy-0.3.0.dist-info → hydroserverpy-0.4.0.dist-info}/zip-safe +0 -0
@@ -5,40 +5,55 @@ from hydroserverpy.core.schemas.base import HydroServerCoreModel
5
5
 
6
6
  class SensorFields(BaseModel):
7
7
  name: str = Field(
8
- ..., strip_whitespace=True, max_length=255,
9
- description='The name of the sensor.'
8
+ ...,
9
+ strip_whitespace=True,
10
+ max_length=255,
11
+ description="The name of the sensor.",
10
12
  )
11
13
  description: str = Field(
12
- strip_whitespace=True,
13
- description='A description of the sensor.'
14
+ strip_whitespace=True, description="A description of the sensor."
14
15
  )
15
16
  encoding_type: str = Field(
16
- ..., strip_whitespace=True, max_length=255,
17
- description='The encoding type of the sensor.'
17
+ ...,
18
+ strip_whitespace=True,
19
+ max_length=255,
20
+ description="The encoding type of the sensor.",
18
21
  )
19
22
  manufacturer: Optional[str] = Field(
20
- None, strip_whitespace=True, max_length=255,
21
- description='The manufacturer of the sensor.'
23
+ None,
24
+ strip_whitespace=True,
25
+ max_length=255,
26
+ description="The manufacturer of the sensor.",
22
27
  )
23
28
  model: Optional[str] = Field(
24
- None, strip_whitespace=True, max_length=255,
25
- description='The model of the sensor.'
29
+ None,
30
+ strip_whitespace=True,
31
+ max_length=255,
32
+ description="The model of the sensor.",
26
33
  )
27
34
  model_link: Optional[str] = Field(
28
- None, strip_whitespace=True, max_length=500,
29
- description='A link to a website or file that describes the sensor model.'
35
+ None,
36
+ strip_whitespace=True,
37
+ max_length=500,
38
+ description="A link to a website or file that describes the sensor model.",
30
39
  )
31
40
  method_type: str = Field(
32
- ..., strip_whitespace=True, max_length=100,
33
- description='The type of method used by this sensor to collect observations.'
41
+ ...,
42
+ strip_whitespace=True,
43
+ max_length=100,
44
+ description="The type of method used by this sensor to collect observations.",
34
45
  )
35
46
  method_link: Optional[str] = Field(
36
- None, strip_whitespace=True, max_length=500,
37
- description='A link to a website or file that describes the sensor method.'
47
+ None,
48
+ strip_whitespace=True,
49
+ max_length=500,
50
+ description="A link to a website or file that describes the sensor method.",
38
51
  )
39
52
  method_code: Optional[str] = Field(
40
- None, strip_whitespace=True, max_length=50,
41
- description='A code representing the sensor method.'
53
+ None,
54
+ strip_whitespace=True,
55
+ max_length=50,
56
+ description="A code representing the sensor method.",
42
57
  )
43
58
 
44
59
  model_config = ConfigDict(protected_namespaces=())
@@ -10,73 +10,106 @@ if TYPE_CHECKING:
10
10
 
11
11
  class ThingFields(BaseModel):
12
12
  name: str = Field(
13
- ..., strip_whitespace=True, max_length=200,
14
- description='The name of the site/thing.'
13
+ ...,
14
+ strip_whitespace=True,
15
+ max_length=200,
16
+ description="The name of the site/thing.",
15
17
  )
16
18
  description: str = Field(
17
- ..., strip_whitespace=True,
18
- description='A description of the site/thing.'
19
+ ..., strip_whitespace=True, description="A description of the site/thing."
19
20
  )
20
21
  sampling_feature_type: str = Field(
21
- ..., strip_whitespace=True, max_length=200,
22
- description='The sampling feature type of the site/thing.'
22
+ ...,
23
+ strip_whitespace=True,
24
+ max_length=200,
25
+ description="The sampling feature type of the site/thing.",
23
26
  )
24
27
  sampling_feature_code: str = Field(
25
- ..., strip_whitespace=True, max_length=200,
26
- description='A code representing the sampling feature of the site/thing.'
28
+ ...,
29
+ strip_whitespace=True,
30
+ max_length=200,
31
+ description="A code representing the sampling feature of the site/thing.",
27
32
  )
28
33
  site_type: str = Field(
29
- ..., strip_whitespace=True, max_length=200,
30
- description='The type of the site/thing.'
34
+ ...,
35
+ strip_whitespace=True,
36
+ max_length=200,
37
+ description="The type of the site/thing.",
31
38
  )
32
39
  data_disclaimer: Optional[str] = Field(
33
- None, strip_whitespace=True,
34
- description='An optional data disclaimer to attach to observations collected at this site/thing.'
40
+ None,
41
+ strip_whitespace=True,
42
+ description="An optional data disclaimer to attach to observations collected at this site/thing.",
35
43
  )
36
44
 
37
45
 
38
46
  # Get a list of all ISO 3166-1 alpha-2 country codes
39
- valid_country_codes = [code for code, _ in countries_for_language('en')]
47
+ valid_country_codes = [code for code, _ in countries_for_language("en")]
40
48
 
41
49
 
42
50
  class LocationFields(BaseModel):
43
51
  latitude: float = Field(
44
- ..., ge=-90, le=90, serialization_alias='latitude',
45
- validation_alias=AliasChoices('latitude', AliasPath('location', 'latitude')),
46
- description='The WGS84 latitude of the location.'
52
+ ...,
53
+ ge=-90,
54
+ le=90,
55
+ serialization_alias="latitude",
56
+ validation_alias=AliasChoices("latitude", AliasPath("location", "latitude")),
57
+ description="The WGS84 latitude of the location.",
47
58
  )
48
59
  longitude: float = Field(
49
- ..., ge=-180, le=180, serialization_alias='longitude',
50
- validation_alias=AliasChoices('longitude', AliasPath('location', 'longitude')),
51
- description='The WGS84 longitude of the location.'
60
+ ...,
61
+ ge=-180,
62
+ le=180,
63
+ serialization_alias="longitude",
64
+ validation_alias=AliasChoices("longitude", AliasPath("location", "longitude")),
65
+ description="The WGS84 longitude of the location.",
52
66
  )
53
67
  elevation_m: Optional[float] = Field(
54
- None, ge=-99999, le=99999, serialization_alias='elevation_m',
55
- validation_alias=AliasChoices('elevation_m', AliasPath('location', 'elevation_m')),
56
- description='The elevation in meters of the location.'
68
+ None,
69
+ ge=-99999,
70
+ le=99999,
71
+ serialization_alias="elevation_m",
72
+ validation_alias=AliasChoices(
73
+ "elevation_m", AliasPath("location", "elevation_m")
74
+ ),
75
+ description="The elevation in meters of the location.",
57
76
  )
58
77
  elevation_datum: Optional[str] = Field(
59
- None, strip_whitespace=True, max_length=255, serialization_alias='elevationDatum',
60
- validation_alias=AliasChoices('elevationDatum', AliasPath('location', 'elevationDatum')),
61
- description='The datum used to represent the elevation of the location.'
78
+ None,
79
+ strip_whitespace=True,
80
+ max_length=255,
81
+ serialization_alias="elevationDatum",
82
+ validation_alias=AliasChoices(
83
+ "elevationDatum", AliasPath("location", "elevationDatum")
84
+ ),
85
+ description="The datum used to represent the elevation of the location.",
62
86
  )
63
87
  state: Optional[str] = Field(
64
- None, strip_whitespace=True, max_length=200, serialization_alias='state',
65
- validation_alias=AliasChoices('state', AliasPath('location', 'state')),
66
- description='The state/province of the location.'
88
+ None,
89
+ strip_whitespace=True,
90
+ max_length=200,
91
+ serialization_alias="state",
92
+ validation_alias=AliasChoices("state", AliasPath("location", "state")),
93
+ description="The state/province of the location.",
67
94
  )
68
95
  county: Optional[str] = Field(
69
- None, strip_whitespace=True, max_length=200, serialization_alias='county',
70
- validation_alias=AliasChoices('county', AliasPath('location', 'county')),
71
- description='The county/district of the location.'
96
+ None,
97
+ strip_whitespace=True,
98
+ max_length=200,
99
+ serialization_alias="county",
100
+ validation_alias=AliasChoices("county", AliasPath("location", "county")),
101
+ description="The county/district of the location.",
72
102
  )
73
103
  country: Optional[str] = Field(
74
- None, strip_whitespace=True, max_length=2, serialization_alias='country',
75
- validation_alias=AliasChoices('country', AliasPath('location', 'country')),
76
- description='The ISO 3166-1 alpha-2 country code of the location.'
104
+ None,
105
+ strip_whitespace=True,
106
+ max_length=2,
107
+ serialization_alias="country",
108
+ validation_alias=AliasChoices("country", AliasPath("location", "country")),
109
+ description="The ISO 3166-1 alpha-2 country code of the location.",
77
110
  )
78
111
 
79
- @field_validator('country', mode='after')
112
+ @field_validator("country", mode="after")
80
113
  def check_country_code(cls, value: str) -> str:
81
114
  """
82
115
  Validate the country code to ensure it is an ISO 3166-1 alpha-2 country code.
@@ -89,7 +122,9 @@ class LocationFields(BaseModel):
89
122
  """
90
123
 
91
124
  if value and value.upper() not in valid_country_codes:
92
- raise ValueError(f'Invalid country code: {value}. Must be an ISO 3166-1 alpha-2 country code.')
125
+ raise ValueError(
126
+ f"Invalid country code: {value}. Must be an ISO 3166-1 alpha-2 country code."
127
+ )
93
128
 
94
129
  return value
95
130
 
@@ -123,7 +158,7 @@ class Thing(HydroServerCoreModel, ThingFields, LocationFields):
123
158
  self._archive = None
124
159
 
125
160
  @property
126
- def datastreams(self) -> List['Datastream']:
161
+ def datastreams(self) -> List["Datastream"]:
127
162
  """
128
163
  The datastreams associated with the thing. If not already cached, fetch the datastreams from the
129
164
  server.
@@ -138,7 +173,7 @@ class Thing(HydroServerCoreModel, ThingFields, LocationFields):
138
173
  return self._datastreams
139
174
 
140
175
  @property
141
- def tags(self) -> List['Tag']:
176
+ def tags(self) -> List["Tag"]:
142
177
  """
143
178
  The tags associated with the thing. If not already cached, fetch the tags from the server.
144
179
 
@@ -152,7 +187,7 @@ class Thing(HydroServerCoreModel, ThingFields, LocationFields):
152
187
  return self._tags
153
188
 
154
189
  @property
155
- def photos(self) -> List['Photo']:
190
+ def photos(self) -> List["Photo"]:
156
191
  """
157
192
  The photos associated with the thing. If not already cached, fetch the photos from the server.
158
193
 
@@ -166,7 +201,7 @@ class Thing(HydroServerCoreModel, ThingFields, LocationFields):
166
201
  return self._photos
167
202
 
168
203
  @property
169
- def archive(self) -> 'Archive':
204
+ def archive(self) -> "Archive":
170
205
  """
171
206
  The archive associated with the thing. If not already cached, fetch the archive from the server.
172
207
 
@@ -185,7 +220,7 @@ class Thing(HydroServerCoreModel, ThingFields, LocationFields):
185
220
  if they were previously loaded.
186
221
  """
187
222
 
188
- entity = self._endpoint.get(uid=self.uid).model_dump(exclude=['uid'])
223
+ entity = self._endpoint.get(uid=self.uid).model_dump(exclude=["uid"])
189
224
  self._original_data = entity
190
225
  self.__dict__.update(entity)
191
226
  if self._datastreams is not None:
@@ -218,7 +253,9 @@ class Thing(HydroServerCoreModel, ThingFields, LocationFields):
218
253
  """
219
254
 
220
255
  selected_tag = next((tag for tag in self._tags if tag.key == key))
221
- updated_tag = self._endpoint.update_tag(uid=self.uid, tag_uid=selected_tag.uid, value=value)
256
+ updated_tag = self._endpoint.update_tag(
257
+ uid=self.uid, tag_uid=selected_tag.uid, value=value
258
+ )
222
259
  self._tags = [tag if tag.key != key else updated_tag for tag in self._tags]
223
260
 
224
261
  def delete_tag(self, key: str) -> None:
@@ -253,7 +290,9 @@ class Thing(HydroServerCoreModel, ThingFields, LocationFields):
253
290
 
254
291
  selected_photo = next((photo for photo in self._photos if photo.link == link))
255
292
  self._endpoint.delete_photo(uid=self.uid, photo_uid=selected_photo.uid)
256
- self._photos = [photo for photo in self._photos if photo.link != selected_photo.link]
293
+ self._photos = [
294
+ photo for photo in self._photos if photo.link != selected_photo.link
295
+ ]
257
296
 
258
297
 
259
298
  class Archive(HydroServerBaseModel):
@@ -262,20 +301,24 @@ class Archive(HydroServerBaseModel):
262
301
  """
263
302
 
264
303
  link: Optional[str] = Field(
265
- None, strip_whitespace=True, max_length=255,
266
- description='A link to the HydroShare resource containing the archived site/thing.'
304
+ None,
305
+ strip_whitespace=True,
306
+ max_length=255,
307
+ description="A link to the HydroShare resource containing the archived site/thing.",
267
308
  )
268
- frequency: Optional[Literal['daily', 'weekly', 'monthly']] = Field(
309
+ frequency: Optional[Literal["daily", "weekly", "monthly"]] = Field(
269
310
  ...,
270
- description='The frequency at which the site/thing should be archived.',
311
+ description="The frequency at which the site/thing should be archived.",
271
312
  )
272
313
  path: str = Field(
273
- ..., strip_whitespace=True, max_length=255,
274
- description='The path within the HydroShare resource containing the archived data.'
314
+ ...,
315
+ strip_whitespace=True,
316
+ max_length=255,
317
+ description="The path within the HydroShare resource containing the archived data.",
275
318
  )
276
319
  datastream_ids: List[UUID] = Field(
277
320
  ...,
278
- description='The list of datastreams that are included in the archived data.',
321
+ description="The list of datastreams that are included in the archived data.",
279
322
  )
280
323
 
281
324
 
@@ -285,12 +328,10 @@ class Tag(HydroServerBaseModel):
285
328
  """
286
329
 
287
330
  key: str = Field(
288
- ..., strip_whitespace=True, max_length=255,
289
- description='The key of the tag.'
331
+ ..., strip_whitespace=True, max_length=255, description="The key of the tag."
290
332
  )
291
333
  value: str = Field(
292
- ..., strip_whitespace=True, max_length=255,
293
- description='The value of the tag.'
334
+ ..., strip_whitespace=True, max_length=255, description="The value of the tag."
294
335
  )
295
336
 
296
337
 
@@ -300,10 +341,6 @@ class Photo(HydroServerBaseModel):
300
341
  """
301
342
 
302
343
  file_path: str = Field(
303
- ..., strip_whitespace=True,
304
- description='The file path of the photo.'
305
- )
306
- link: str = Field(
307
- ..., strip_whitespace=True,
308
- description='The link to the photo.'
344
+ ..., strip_whitespace=True, description="The file path of the photo."
309
345
  )
346
+ link: str = Field(..., strip_whitespace=True, description="The link to the photo.")
@@ -4,20 +4,19 @@ from hydroserverpy.core.schemas.base import HydroServerCoreModel
4
4
 
5
5
  class UnitFields:
6
6
  name: str = Field(
7
- ..., strip_whitespace=True, max_length=255,
8
- description='The name of the unit.'
7
+ ..., strip_whitespace=True, max_length=255, description="The name of the unit."
9
8
  )
10
9
  symbol: str = Field(
11
- ..., strip_whitespace=True, max_length=255,
12
- description='The symbol of the unit.'
10
+ ...,
11
+ strip_whitespace=True,
12
+ max_length=255,
13
+ description="The symbol of the unit.",
13
14
  )
14
15
  definition: str = Field(
15
- ..., strip_whitespace=True,
16
- description='The definition of the unit.'
16
+ ..., strip_whitespace=True, description="The definition of the unit."
17
17
  )
18
18
  type: str = Field(
19
- ..., strip_whitespace=True, max_length=255,
20
- description='The type of the unit.'
19
+ ..., strip_whitespace=True, max_length=255, description="The type of the unit."
21
20
  )
22
21
 
23
22
 
@@ -1,8 +1,16 @@
1
1
  import requests
2
2
  from typing import Optional
3
- from hydroserverpy.core.endpoints import (DataLoaderEndpoint, DataSourceEndpoint, DatastreamEndpoint, ThingEndpoint,
4
- SensorEndpoint, UnitEndpoint, ProcessingLevelEndpoint,
5
- ObservedPropertyEndpoint, ResultQualifierEndpoint)
3
+ from hydroserverpy.core.endpoints import (
4
+ DataLoaderEndpoint,
5
+ DataSourceEndpoint,
6
+ DatastreamEndpoint,
7
+ ThingEndpoint,
8
+ SensorEndpoint,
9
+ UnitEndpoint,
10
+ ProcessingLevelEndpoint,
11
+ ObservedPropertyEndpoint,
12
+ ResultQualifierEndpoint,
13
+ )
6
14
 
7
15
 
8
16
  class HydroServer:
@@ -22,16 +30,23 @@ class HydroServer:
22
30
  """
23
31
 
24
32
  def __init__(
25
- self,
26
- host: str,
27
- username: Optional[str] = None,
28
- password: Optional[str] = None,
29
- apikey: Optional[str] = None,
30
- api_route: str = 'api'
33
+ self,
34
+ host: str,
35
+ username: Optional[str] = None,
36
+ password: Optional[str] = None,
37
+ apikey: Optional[str] = None,
38
+ api_route: str = "api",
31
39
  ):
32
- self.host = host.strip('/')
33
- self.auth = (username or '__key__', password or apikey,) if (username and password) or apikey else None
34
- self.api_route = api_route.strip('/')
40
+ self.host = host.strip("/")
41
+ self.auth = (
42
+ (
43
+ username or "__key__",
44
+ password or apikey,
45
+ )
46
+ if (username and password) or apikey
47
+ else None
48
+ )
49
+ self.api_route = api_route.strip("/")
35
50
  self._session = None
36
51
  self._timeout = 60
37
52
  self._initialize_session()
@@ -49,10 +64,8 @@ class HydroServer:
49
64
 
50
65
  self._session = requests.Session()
51
66
 
52
- if self.auth and self.auth[0] == '__key__':
53
- self._session.headers.update(
54
- {'key': self.auth[1]}
55
- )
67
+ if self.auth and self.auth[0] == "__key__":
68
+ self._session.headers.update({"key": self.auth[1]})
56
69
  elif self.auth:
57
70
  self._session.auth = self.auth
58
71
 
@@ -75,7 +88,8 @@ class HydroServer:
75
88
  response = getattr(self._session, method)(
76
89
  f'{self.host}/{path.strip("/")}',
77
90
  timeout=self._timeout,
78
- *args, **kwargs
91
+ *args,
92
+ **kwargs,
79
93
  )
80
94
  response.raise_for_status()
81
95
  return response
@@ -0,0 +1,21 @@
1
+ from .extractors.local_file_extractor import LocalFileExtractor
2
+ from .extractors.ftp_extractor import FTPExtractor
3
+ from .extractors.http_extractor import HTTPExtractor
4
+ from .transformers.csv_transformer import CSVTransformer
5
+ from .transformers.json_transformer import JSONTransformer
6
+ from .transformers.base import Transformer
7
+ from .extractors.base import Extractor
8
+ from .loaders.base import Loader
9
+ from .loaders.hydroserver_loader import HydroServerLoader
10
+
11
+ __all__ = [
12
+ "CSVTransformer",
13
+ "JSONTransformer",
14
+ "LocalFileExtractor",
15
+ "FTPExtractor",
16
+ "HTTPExtractor",
17
+ "Extractor",
18
+ "Transformer",
19
+ "Loader",
20
+ "HydroServerLoader",
21
+ ]
File without changes
@@ -0,0 +1,13 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Dict
3
+ from ..types import TimeRange
4
+
5
+
6
+ class Extractor(ABC):
7
+ @abstractmethod
8
+ def prepare_params(self, data_requirements: Dict[str, TimeRange]):
9
+ pass
10
+
11
+ @abstractmethod
12
+ def extract(self):
13
+ pass
@@ -0,0 +1,50 @@
1
+ import logging
2
+ from ftplib import FTP
3
+ from io import BytesIO
4
+ from typing import Dict
5
+
6
+ from .base import Extractor
7
+ from ..types import TimeRange
8
+
9
+
10
+ class FTPExtractor(Extractor):
11
+ def __init__(
12
+ self,
13
+ host: str,
14
+ filepath: str,
15
+ username: str = None,
16
+ password: str = None,
17
+ port: int = 21,
18
+ ):
19
+ self.host = host
20
+ self.port = int(port)
21
+ self.username = username
22
+ self.password = password
23
+ self.filepath = filepath
24
+
25
+ def prepare_params(self, data_requirements: Dict[str, TimeRange]):
26
+ pass
27
+
28
+ def extract(self):
29
+ """
30
+ Downloads the file from the FTP server and returns a file-like object.
31
+ """
32
+ ftp = FTP()
33
+ try:
34
+ ftp.connect(self.host, self.port)
35
+ ftp.login(user=self.username, passwd=self.password)
36
+ logging.info(f"Connected to FTP server: {self.host}:{self.port}")
37
+
38
+ data = BytesIO()
39
+ ftp.retrbinary(f"RETR {self.filepath}", data.write)
40
+ logging.info(
41
+ f"Successfully downloaded file '{self.filepath}' from FTP server."
42
+ )
43
+ data.seek(0)
44
+ return data
45
+ except Exception as e:
46
+ logging.error(f"Error retrieving file from FTP server: {e}")
47
+ return None
48
+ finally:
49
+ if ftp:
50
+ ftp.quit()
@@ -0,0 +1,84 @@
1
+ import logging
2
+ from hydroserverpy.etl.types import TimeRange
3
+ import requests
4
+ from io import BytesIO
5
+ from typing import Dict
6
+ from .base import Extractor
7
+
8
+
9
+ class HTTPExtractor(Extractor):
10
+ def __init__(
11
+ self,
12
+ url: str,
13
+ url_variables: dict = None,
14
+ params: dict = None,
15
+ headers: dict = None,
16
+ auth: tuple = None,
17
+ ):
18
+ self.url = self.format_url(url, url_variables or {})
19
+ self.params = params
20
+ self.headers = headers
21
+ self.auth = auth
22
+ self.start_date = None
23
+
24
+ def prepare_params(self, data_requirements: Dict[str, TimeRange]):
25
+ start_times = [
26
+ req["start_time"] for req in data_requirements.values() if req["start_time"]
27
+ ]
28
+
29
+ if start_times:
30
+ oldest_start_time = min(start_times).isoformat()
31
+ start_time_key = self.params.pop("start_time_key", None)
32
+ if start_time_key:
33
+ self.params[start_time_key] = oldest_start_time
34
+ logging.info(
35
+ f"Set start_time to {oldest_start_time} and removed 'start_time_key'"
36
+ )
37
+ else:
38
+ logging.warning("'start_time_key' not found in params.")
39
+
40
+ end_times = [
41
+ req["end_time"] for req in data_requirements.values() if req["end_time"]
42
+ ]
43
+
44
+ if end_times:
45
+ newest_end_time = max(end_times).isoformat()
46
+ end_time_key = self.params.pop("end_time_key", None)
47
+ if end_time_key:
48
+ self.params[end_time_key] = newest_end_time
49
+ logging.info(
50
+ f"Set end_time to {newest_end_time} and removed 'end_time_key'"
51
+ )
52
+ else:
53
+ logging.warning("'end_time_key' not found in params.")
54
+
55
+ def extract(self):
56
+ """
57
+ Downloads the file from the HTTP/HTTPS server and returns a file-like object.
58
+ """
59
+ response = requests.get(
60
+ url=self.url,
61
+ params=self.params,
62
+ headers=self.headers,
63
+ auth=self.auth,
64
+ stream=True,
65
+ )
66
+ response.raise_for_status()
67
+ logging.info(f"Successfully downloaded file from {response.url}")
68
+
69
+ data = BytesIO()
70
+ for chunk in response.iter_content(chunk_size=8192):
71
+ if chunk:
72
+ data.write(chunk)
73
+ data.seek(0)
74
+ return data
75
+
76
+ @staticmethod
77
+ def format_url(url_template, url_variables):
78
+ try:
79
+ url = url_template.format(**url_variables)
80
+ except KeyError as e:
81
+ missing_key = e.args[0]
82
+ raise KeyError(f"Missing configuration url_variable: {missing_key}")
83
+
84
+ return url
@@ -0,0 +1,25 @@
1
+ import logging
2
+ from typing import Dict
3
+
4
+ from .base import Extractor
5
+ from ..types import TimeRange
6
+
7
+
8
+ class LocalFileExtractor(Extractor):
9
+ def __init__(self, filepath: str):
10
+ self.filepath = filepath
11
+
12
+ def prepare_params(self, data_requirements: Dict[str, TimeRange]):
13
+ pass
14
+
15
+ def extract(self):
16
+ """
17
+ Opens the file and returns a file-like object.
18
+ """
19
+ try:
20
+ file_handle = open(self.filepath, "r")
21
+ logging.info(f"Successfully opened file '{self.filepath}'.")
22
+ return file_handle
23
+ except Exception as e:
24
+ logging.error(f"Error opening file '{self.filepath}': {e}")
25
+ return None