meteostat 1.7.6__py3-none-any.whl → 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meteostat/__init__.py +38 -19
- meteostat/api/config.py +158 -0
- meteostat/api/daily.py +76 -0
- meteostat/api/hourly.py +80 -0
- meteostat/api/interpolate.py +378 -0
- meteostat/api/inventory.py +59 -0
- meteostat/api/merge.py +103 -0
- meteostat/api/monthly.py +73 -0
- meteostat/api/normals.py +144 -0
- meteostat/api/point.py +30 -0
- meteostat/api/stations.py +234 -0
- meteostat/api/timeseries.py +334 -0
- meteostat/core/cache.py +212 -59
- meteostat/core/data.py +203 -0
- meteostat/core/logger.py +9 -0
- meteostat/core/network.py +82 -0
- meteostat/core/parameters.py +112 -0
- meteostat/core/providers.py +184 -0
- meteostat/core/schema.py +170 -0
- meteostat/core/validator.py +38 -0
- meteostat/enumerations.py +149 -0
- meteostat/interpolation/idw.py +120 -0
- meteostat/interpolation/lapserate.py +91 -0
- meteostat/interpolation/nearest.py +31 -0
- meteostat/parameters.py +354 -0
- meteostat/providers/dwd/climat.py +166 -0
- meteostat/providers/dwd/daily.py +144 -0
- meteostat/providers/dwd/hourly.py +218 -0
- meteostat/providers/dwd/monthly.py +138 -0
- meteostat/providers/dwd/mosmix.py +351 -0
- meteostat/providers/dwd/poi.py +117 -0
- meteostat/providers/dwd/shared.py +155 -0
- meteostat/providers/eccc/daily.py +87 -0
- meteostat/providers/eccc/hourly.py +104 -0
- meteostat/providers/eccc/monthly.py +66 -0
- meteostat/providers/eccc/shared.py +45 -0
- meteostat/providers/index.py +496 -0
- meteostat/providers/meteostat/daily.py +65 -0
- meteostat/providers/meteostat/daily_derived.py +110 -0
- meteostat/providers/meteostat/hourly.py +66 -0
- meteostat/providers/meteostat/monthly.py +45 -0
- meteostat/providers/meteostat/monthly_derived.py +106 -0
- meteostat/providers/meteostat/shared.py +93 -0
- meteostat/providers/metno/forecast.py +186 -0
- meteostat/providers/noaa/ghcnd.py +228 -0
- meteostat/providers/noaa/isd_lite.py +142 -0
- meteostat/providers/noaa/metar.py +163 -0
- meteostat/typing.py +113 -0
- meteostat/utils/conversions.py +231 -0
- meteostat/utils/data.py +194 -0
- meteostat/utils/geo.py +28 -0
- meteostat/utils/guards.py +51 -0
- meteostat/utils/parsers.py +161 -0
- meteostat/utils/types.py +113 -0
- meteostat/utils/validators.py +31 -0
- meteostat-2.0.1.dist-info/METADATA +130 -0
- meteostat-2.0.1.dist-info/RECORD +64 -0
- {meteostat-1.7.6.dist-info → meteostat-2.0.1.dist-info}/WHEEL +1 -2
- meteostat/core/loader.py +0 -103
- meteostat/core/warn.py +0 -34
- meteostat/enumerations/granularity.py +0 -22
- meteostat/interface/base.py +0 -39
- meteostat/interface/daily.py +0 -118
- meteostat/interface/hourly.py +0 -154
- meteostat/interface/meteodata.py +0 -210
- meteostat/interface/monthly.py +0 -109
- meteostat/interface/normals.py +0 -245
- meteostat/interface/point.py +0 -143
- meteostat/interface/stations.py +0 -252
- meteostat/interface/timeseries.py +0 -237
- meteostat/series/aggregate.py +0 -48
- meteostat/series/convert.py +0 -28
- meteostat/series/count.py +0 -17
- meteostat/series/coverage.py +0 -20
- meteostat/series/fetch.py +0 -28
- meteostat/series/interpolate.py +0 -47
- meteostat/series/normalize.py +0 -76
- meteostat/series/stations.py +0 -22
- meteostat/units.py +0 -149
- meteostat/utilities/__init__.py +0 -0
- meteostat/utilities/aggregations.py +0 -37
- meteostat/utilities/endpoint.py +0 -33
- meteostat/utilities/helpers.py +0 -70
- meteostat/utilities/mutations.py +0 -89
- meteostat/utilities/validations.py +0 -30
- meteostat-1.7.6.dist-info/METADATA +0 -112
- meteostat-1.7.6.dist-info/RECORD +0 -39
- meteostat-1.7.6.dist-info/top_level.txt +0 -1
- /meteostat/{core → api}/__init__.py +0 -0
- /meteostat/{enumerations → interpolation}/__init__.py +0 -0
- /meteostat/{interface → providers}/__init__.py +0 -0
- /meteostat/{interface/interpolate.py → py.typed} +0 -0
- /meteostat/{series → utils}/__init__.py +0 -0
- {meteostat-1.7.6.dist-info → meteostat-2.0.1.dist-info/licenses}/LICENSE +0 -0
meteostat/core/data.py
ADDED
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Data Service
|
|
3
|
+
|
|
4
|
+
The Data Service is responsible for fetching meteorological data from
|
|
5
|
+
different providers and merging it into a single time series.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
from typing import List, Optional, Union, cast
|
|
10
|
+
|
|
11
|
+
import pandas as pd
|
|
12
|
+
|
|
13
|
+
from meteostat.api.timeseries import TimeSeries
|
|
14
|
+
from meteostat.core.logger import logger
|
|
15
|
+
from meteostat.core.parameters import parameter_service
|
|
16
|
+
from meteostat.core.providers import provider_service
|
|
17
|
+
from meteostat.core.schema import schema_service
|
|
18
|
+
from meteostat.enumerations import Parameter, Provider
|
|
19
|
+
from meteostat.typing import Station, Request
|
|
20
|
+
from meteostat.utils.data import stations_to_df
|
|
21
|
+
from meteostat.utils.guards import request_size_guard
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class DataService:
|
|
25
|
+
"""
|
|
26
|
+
Data Service
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
@staticmethod
|
|
30
|
+
def _add_source(df: pd.DataFrame, provider_id: str) -> pd.DataFrame:
|
|
31
|
+
"""
|
|
32
|
+
Add source column to DataFrame
|
|
33
|
+
"""
|
|
34
|
+
if "source" not in df.index.names:
|
|
35
|
+
df["source"] = provider_id
|
|
36
|
+
df = df.set_index(["source"], append=True)
|
|
37
|
+
|
|
38
|
+
return df
|
|
39
|
+
|
|
40
|
+
@staticmethod
|
|
41
|
+
def filter_time(
|
|
42
|
+
df: pd.DataFrame,
|
|
43
|
+
start: Union[datetime, None] = None,
|
|
44
|
+
end: Union[datetime, None] = None,
|
|
45
|
+
) -> pd.DataFrame:
|
|
46
|
+
"""
|
|
47
|
+
Filter time series data based on start and end date
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
# Return empty DataFrame if input is empty
|
|
51
|
+
if df.empty:
|
|
52
|
+
return df
|
|
53
|
+
|
|
54
|
+
# Get time index
|
|
55
|
+
time = df.index.get_level_values("time")
|
|
56
|
+
|
|
57
|
+
# Filter & return
|
|
58
|
+
try:
|
|
59
|
+
return df.loc[(time >= start) & (time <= end)] if start and end else df
|
|
60
|
+
except TypeError:
|
|
61
|
+
return (
|
|
62
|
+
df.loc[(time >= start.date()) & (time <= end.date())]
|
|
63
|
+
if start and end
|
|
64
|
+
else df
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
@staticmethod
|
|
68
|
+
def concat_fragments(
|
|
69
|
+
fragments: List[pd.DataFrame],
|
|
70
|
+
parameters: List[Parameter],
|
|
71
|
+
) -> pd.DataFrame:
|
|
72
|
+
"""
|
|
73
|
+
Concatenate multiple fragments into a single DataFrame
|
|
74
|
+
"""
|
|
75
|
+
try:
|
|
76
|
+
cleaned = [
|
|
77
|
+
df.dropna(how="all", axis=1) if not df.empty else None
|
|
78
|
+
for df in fragments
|
|
79
|
+
]
|
|
80
|
+
filtered = [df for df in cleaned if df is not None]
|
|
81
|
+
if not filtered:
|
|
82
|
+
return pd.DataFrame()
|
|
83
|
+
df = pd.concat(filtered)
|
|
84
|
+
df = schema_service.fill(df, parameters)
|
|
85
|
+
df = schema_service.purge(df, parameters)
|
|
86
|
+
return df
|
|
87
|
+
except ValueError:
|
|
88
|
+
return pd.DataFrame()
|
|
89
|
+
|
|
90
|
+
def _fetch_provider_data(
|
|
91
|
+
self, req: Request, station: Station, provider: Provider
|
|
92
|
+
) -> Optional[pd.DataFrame]:
|
|
93
|
+
"""
|
|
94
|
+
Fetch data for a single weather station and provider
|
|
95
|
+
"""
|
|
96
|
+
try:
|
|
97
|
+
# Fetch DataFrame for current provider
|
|
98
|
+
df = provider_service.fetch_data(provider, req, station)
|
|
99
|
+
|
|
100
|
+
# Continue if no data was returned
|
|
101
|
+
if df is None:
|
|
102
|
+
return None
|
|
103
|
+
|
|
104
|
+
# Add current station ID to DataFrame
|
|
105
|
+
df = pd.concat([df], keys=[station.id], names=["station"])
|
|
106
|
+
|
|
107
|
+
# Add source index column to DataFrame
|
|
108
|
+
df = self._add_source(df, provider)
|
|
109
|
+
|
|
110
|
+
# Filter DataFrame for requested parameters and time range
|
|
111
|
+
df = self.filter_time(df, req.start, req.end)
|
|
112
|
+
|
|
113
|
+
# Drop empty rows
|
|
114
|
+
df = df.dropna(how="all")
|
|
115
|
+
|
|
116
|
+
return df
|
|
117
|
+
|
|
118
|
+
except Exception:
|
|
119
|
+
logger.error(
|
|
120
|
+
'Could not fetch data for provider "%s"',
|
|
121
|
+
provider,
|
|
122
|
+
exc_info=True,
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
def _fetch_station_data(self, req: Request, station: Station) -> List[pd.DataFrame]:
|
|
126
|
+
"""
|
|
127
|
+
Fetch data for a single weather station
|
|
128
|
+
"""
|
|
129
|
+
fragments = []
|
|
130
|
+
|
|
131
|
+
filtered_providers = provider_service.filter_providers(req, station)
|
|
132
|
+
|
|
133
|
+
for provider in filtered_providers:
|
|
134
|
+
df = self._fetch_provider_data(req, station, provider)
|
|
135
|
+
|
|
136
|
+
# Continue if no data was returned
|
|
137
|
+
if df is None:
|
|
138
|
+
continue
|
|
139
|
+
|
|
140
|
+
fragments.append(df)
|
|
141
|
+
|
|
142
|
+
return fragments
|
|
143
|
+
|
|
144
|
+
def fetch(
|
|
145
|
+
self,
|
|
146
|
+
req: Request,
|
|
147
|
+
) -> TimeSeries:
|
|
148
|
+
"""
|
|
149
|
+
Load meteorological time series data from different providers
|
|
150
|
+
"""
|
|
151
|
+
# Guard request
|
|
152
|
+
request_size_guard(req)
|
|
153
|
+
|
|
154
|
+
# Convert stations to list if single Station
|
|
155
|
+
stations: List[Station] = (
|
|
156
|
+
cast(List[Station], req.station)
|
|
157
|
+
if isinstance(req.station, list)
|
|
158
|
+
else [req.station]
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
logger.debug(
|
|
162
|
+
"%s time series requested for %s station(s)", req.granularity, len(stations)
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
# Filter parameters
|
|
166
|
+
req.parameters = parameter_service.filter_parameters(
|
|
167
|
+
req.granularity, req.parameters
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
fragments = []
|
|
171
|
+
|
|
172
|
+
# Go through all weather stations
|
|
173
|
+
for station in stations:
|
|
174
|
+
station_fragments = self._fetch_station_data(req, station)
|
|
175
|
+
|
|
176
|
+
if station_fragments:
|
|
177
|
+
fragments.extend(station_fragments)
|
|
178
|
+
|
|
179
|
+
# Merge data in a single DataFrame
|
|
180
|
+
if fragments:
|
|
181
|
+
df = self.concat_fragments(fragments, req.parameters)
|
|
182
|
+
else:
|
|
183
|
+
df = pd.DataFrame()
|
|
184
|
+
|
|
185
|
+
# Set data types
|
|
186
|
+
df = schema_service.format(df, req.granularity)
|
|
187
|
+
|
|
188
|
+
# Create time series
|
|
189
|
+
ts = TimeSeries(
|
|
190
|
+
req.granularity,
|
|
191
|
+
stations_to_df(stations),
|
|
192
|
+
df,
|
|
193
|
+
req.start,
|
|
194
|
+
req.end,
|
|
195
|
+
req.timezone,
|
|
196
|
+
multi_station=isinstance(req.station, list),
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
# Return time series
|
|
200
|
+
return ts
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
data_service = DataService()
|
meteostat/core/logger.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Network Service
|
|
3
|
+
|
|
4
|
+
The Network Service provides methods to send HTTP requests
|
|
5
|
+
considering the Meteostat configuration.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Optional
|
|
9
|
+
|
|
10
|
+
import requests
|
|
11
|
+
|
|
12
|
+
from meteostat import __version__
|
|
13
|
+
from meteostat.core.logger import logger
|
|
14
|
+
from meteostat.api.config import config
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class NetworkService:
|
|
18
|
+
"""
|
|
19
|
+
Network Service
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
@staticmethod
|
|
23
|
+
def _process_headers(headers: dict) -> dict:
|
|
24
|
+
"""
|
|
25
|
+
Process headers
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
headers["X-Meteostat-Version"] = __version__
|
|
29
|
+
|
|
30
|
+
return headers
|
|
31
|
+
|
|
32
|
+
def get(
|
|
33
|
+
self,
|
|
34
|
+
url: str,
|
|
35
|
+
params=None,
|
|
36
|
+
headers: Optional[dict] = None,
|
|
37
|
+
stream: Optional[bool] = None,
|
|
38
|
+
) -> requests.Response:
|
|
39
|
+
"""
|
|
40
|
+
Send a GET request using the Meteostat configuration
|
|
41
|
+
"""
|
|
42
|
+
if headers is None:
|
|
43
|
+
headers = {}
|
|
44
|
+
|
|
45
|
+
headers = self._process_headers(headers)
|
|
46
|
+
|
|
47
|
+
return requests.get(
|
|
48
|
+
url,
|
|
49
|
+
params,
|
|
50
|
+
headers=headers,
|
|
51
|
+
stream=stream,
|
|
52
|
+
proxies=config.network_proxies,
|
|
53
|
+
timeout=30,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
def get_from_mirrors(
|
|
57
|
+
self,
|
|
58
|
+
mirrors: list[str],
|
|
59
|
+
params=None,
|
|
60
|
+
headers: Optional[dict] = None,
|
|
61
|
+
stream: Optional[bool] = None,
|
|
62
|
+
) -> Optional[requests.Response]:
|
|
63
|
+
"""
|
|
64
|
+
Send a GET request to multiple mirrors using the Meteostat configuration
|
|
65
|
+
"""
|
|
66
|
+
for mirror in mirrors:
|
|
67
|
+
try:
|
|
68
|
+
response = self.get(
|
|
69
|
+
mirror,
|
|
70
|
+
params=params,
|
|
71
|
+
headers=headers,
|
|
72
|
+
stream=stream,
|
|
73
|
+
)
|
|
74
|
+
if response.status_code == 200:
|
|
75
|
+
return response
|
|
76
|
+
except requests.RequestException:
|
|
77
|
+
logger.warning("Could not fetch data from '%s'", mirror)
|
|
78
|
+
continue
|
|
79
|
+
return None
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
network_service = NetworkService()
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Parameter Service
|
|
3
|
+
|
|
4
|
+
The Parameter Service provides methods to manage and access
|
|
5
|
+
supported parameters for data requests.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import List, Optional
|
|
9
|
+
|
|
10
|
+
from meteostat.core.logger import logger
|
|
11
|
+
from meteostat.enumerations import Granularity, Parameter
|
|
12
|
+
from meteostat.parameters import DEFAULT_PARAMETERS
|
|
13
|
+
from meteostat.typing import ParameterSpec
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ParameterService:
|
|
17
|
+
"""
|
|
18
|
+
Parameter Service
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
_parameters: List[ParameterSpec]
|
|
22
|
+
|
|
23
|
+
@staticmethod
|
|
24
|
+
def _has_duplicates(parameter_specs: List[ParameterSpec]) -> bool:
|
|
25
|
+
"""
|
|
26
|
+
Check if parameter list contains duplicates
|
|
27
|
+
"""
|
|
28
|
+
seen = set()
|
|
29
|
+
for spec in parameter_specs:
|
|
30
|
+
key = (spec.id, spec.granularity)
|
|
31
|
+
if key in seen:
|
|
32
|
+
return True # Duplicate found
|
|
33
|
+
seen.add(key)
|
|
34
|
+
return False # No duplicates found
|
|
35
|
+
|
|
36
|
+
def _parameter_exists(self, parameter: ParameterSpec) -> bool:
|
|
37
|
+
"""
|
|
38
|
+
Check if a parameter already exists
|
|
39
|
+
"""
|
|
40
|
+
key = (parameter.id, parameter.granularity)
|
|
41
|
+
return any((spec.id, spec.granularity) == key for spec in self.parameters)
|
|
42
|
+
|
|
43
|
+
def __init__(self, parameters: List[ParameterSpec]) -> None:
|
|
44
|
+
if self._has_duplicates(parameters):
|
|
45
|
+
raise ValueError("List of parameters contains duplicates")
|
|
46
|
+
|
|
47
|
+
self._parameters = parameters
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def parameters(self) -> List[ParameterSpec]:
|
|
51
|
+
"""
|
|
52
|
+
Get supported parameters
|
|
53
|
+
"""
|
|
54
|
+
return self._parameters
|
|
55
|
+
|
|
56
|
+
def register(self, parameter: ParameterSpec) -> None:
|
|
57
|
+
"""
|
|
58
|
+
Register a parameter
|
|
59
|
+
"""
|
|
60
|
+
if self._parameter_exists(parameter):
|
|
61
|
+
raise ValueError("The parameter already exists")
|
|
62
|
+
|
|
63
|
+
self._parameters.append(parameter)
|
|
64
|
+
|
|
65
|
+
def get_parameter(
|
|
66
|
+
self, parameter_id: Parameter, granularity: Granularity
|
|
67
|
+
) -> Optional[ParameterSpec]:
|
|
68
|
+
"""
|
|
69
|
+
Get parameter by ID and granularity
|
|
70
|
+
"""
|
|
71
|
+
return next(
|
|
72
|
+
(
|
|
73
|
+
parameter
|
|
74
|
+
for parameter in self.parameters
|
|
75
|
+
if parameter.id == parameter_id and parameter.granularity == granularity
|
|
76
|
+
),
|
|
77
|
+
None,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
def filter_parameters(
|
|
81
|
+
self, granularity: Granularity, parameters: List[Parameter]
|
|
82
|
+
) -> List[Parameter]:
|
|
83
|
+
"""
|
|
84
|
+
Raise exception if a requested parameter is not part of the schema
|
|
85
|
+
"""
|
|
86
|
+
supported_parameters = list(
|
|
87
|
+
map(
|
|
88
|
+
lambda parameter: parameter.id,
|
|
89
|
+
filter(
|
|
90
|
+
lambda parameter: parameter.granularity == granularity,
|
|
91
|
+
self.parameters,
|
|
92
|
+
),
|
|
93
|
+
)
|
|
94
|
+
)
|
|
95
|
+
# Get difference between requested parameters and root schema
|
|
96
|
+
diff = set(parameters).difference(supported_parameters)
|
|
97
|
+
# Log warning
|
|
98
|
+
if diff:
|
|
99
|
+
logger.error(
|
|
100
|
+
"Tried to request data for unsupported parameter(s): %s",
|
|
101
|
+
", ".join(diff),
|
|
102
|
+
)
|
|
103
|
+
# Return intersection
|
|
104
|
+
return list(
|
|
105
|
+
filter(
|
|
106
|
+
lambda parameter: parameter in parameters,
|
|
107
|
+
supported_parameters,
|
|
108
|
+
)
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
parameter_service = ParameterService(DEFAULT_PARAMETERS)
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Provider Service
|
|
3
|
+
|
|
4
|
+
The Provider Service provides methods to interact with data providers.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
from importlib import import_module
|
|
9
|
+
from statistics import fmean
|
|
10
|
+
from typing import List, Optional, TypeGuard, cast
|
|
11
|
+
|
|
12
|
+
import pandas as pd
|
|
13
|
+
|
|
14
|
+
from meteostat.core.logger import logger
|
|
15
|
+
from meteostat.enumerations import Granularity, Priority, Provider
|
|
16
|
+
from meteostat.providers.index import DEFAULT_PROVIDERS
|
|
17
|
+
from meteostat.typing import (
|
|
18
|
+
ProviderRequest,
|
|
19
|
+
ProviderSpec,
|
|
20
|
+
Station,
|
|
21
|
+
Request,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class ProviderService:
|
|
26
|
+
"""
|
|
27
|
+
Provider Service
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
_providers: List[ProviderSpec]
|
|
31
|
+
|
|
32
|
+
def __init__(self, providers: List[ProviderSpec]) -> None:
|
|
33
|
+
self._providers = providers
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
def providers(self) -> List[ProviderSpec]:
|
|
37
|
+
"""
|
|
38
|
+
Get supported providers
|
|
39
|
+
"""
|
|
40
|
+
return self._providers
|
|
41
|
+
|
|
42
|
+
def register(self, provider: ProviderSpec) -> None:
|
|
43
|
+
"""
|
|
44
|
+
Register a provider
|
|
45
|
+
"""
|
|
46
|
+
self._providers.append(provider)
|
|
47
|
+
|
|
48
|
+
def get_provider(self, provider_id: Provider | str) -> Optional[ProviderSpec]:
|
|
49
|
+
"""
|
|
50
|
+
Get provider by ID
|
|
51
|
+
"""
|
|
52
|
+
return next(
|
|
53
|
+
(provider for provider in self._providers if provider.id == provider_id),
|
|
54
|
+
None,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
def _get_provider_priority(self, provider_id: Provider | str) -> int:
|
|
58
|
+
"""
|
|
59
|
+
Get priority of a provider by its ID
|
|
60
|
+
"""
|
|
61
|
+
baselines = {
|
|
62
|
+
Granularity.HOURLY: 0,
|
|
63
|
+
Granularity.DAILY: 100,
|
|
64
|
+
Granularity.MONTHLY: 200,
|
|
65
|
+
Granularity.NORMALS: 300,
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
provider = self.get_provider(provider_id)
|
|
69
|
+
|
|
70
|
+
if not provider:
|
|
71
|
+
return Priority.NONE
|
|
72
|
+
|
|
73
|
+
baseline = baselines[provider.granularity]
|
|
74
|
+
|
|
75
|
+
return int(provider.priority + baseline)
|
|
76
|
+
|
|
77
|
+
def get_source_priority(self, source: str) -> float:
|
|
78
|
+
"""
|
|
79
|
+
Get priority of a source string
|
|
80
|
+
"""
|
|
81
|
+
provider_ids = source.split(" ")
|
|
82
|
+
|
|
83
|
+
if len(provider_ids) == 1:
|
|
84
|
+
return self._get_provider_priority(provider_ids[0])
|
|
85
|
+
|
|
86
|
+
priorities = [
|
|
87
|
+
self._get_provider_priority(provider) for provider in provider_ids
|
|
88
|
+
]
|
|
89
|
+
|
|
90
|
+
return fmean(priorities)
|
|
91
|
+
|
|
92
|
+
def filter_providers(self, query: Request, station: Station) -> List[Provider]:
|
|
93
|
+
"""
|
|
94
|
+
Get a filtered list of providers
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
def _filter(provider_id: Provider) -> TypeGuard[Provider]:
|
|
98
|
+
provider = self.get_provider(provider_id)
|
|
99
|
+
|
|
100
|
+
if provider is None:
|
|
101
|
+
return False
|
|
102
|
+
|
|
103
|
+
# Filter out providers with diverging granularities
|
|
104
|
+
if provider.granularity is not query.granularity:
|
|
105
|
+
logger.error(
|
|
106
|
+
"Provider '%s' does not support granularity '%s'",
|
|
107
|
+
provider_id,
|
|
108
|
+
query.granularity,
|
|
109
|
+
)
|
|
110
|
+
return False
|
|
111
|
+
|
|
112
|
+
# Filter out providers with no overlap in parameters
|
|
113
|
+
if set(provider.parameters).isdisjoint(query.parameters):
|
|
114
|
+
logger.info(
|
|
115
|
+
"Provider '%s' does not support any requested parameter",
|
|
116
|
+
provider_id,
|
|
117
|
+
)
|
|
118
|
+
return False
|
|
119
|
+
|
|
120
|
+
# Filter out providers which do not serve the station's country
|
|
121
|
+
if provider.countries and station.country not in provider.countries:
|
|
122
|
+
logger.info(
|
|
123
|
+
"Skipping provider '%s' as it does not serve the station's country ('%s')",
|
|
124
|
+
provider_id,
|
|
125
|
+
station.country,
|
|
126
|
+
)
|
|
127
|
+
return False
|
|
128
|
+
|
|
129
|
+
# Filter out providers which stopped providing data before the request's start date
|
|
130
|
+
if query.end and query.end < datetime.combine(
|
|
131
|
+
provider.start, datetime.min.time()
|
|
132
|
+
):
|
|
133
|
+
logger.info(
|
|
134
|
+
"Skipping provider '%s' as it stopped providing data before request start",
|
|
135
|
+
provider_id,
|
|
136
|
+
)
|
|
137
|
+
return False
|
|
138
|
+
|
|
139
|
+
# Filter out providers which only started providing data after the request's end date
|
|
140
|
+
if (
|
|
141
|
+
provider.end is not None
|
|
142
|
+
and query.start is not None
|
|
143
|
+
and query.start > datetime.combine(provider.end, datetime.max.time())
|
|
144
|
+
):
|
|
145
|
+
logger.info(
|
|
146
|
+
"Skipping provider '%s' as it only started providing data after request end",
|
|
147
|
+
provider_id,
|
|
148
|
+
)
|
|
149
|
+
return False
|
|
150
|
+
|
|
151
|
+
return True
|
|
152
|
+
|
|
153
|
+
return list(filter(_filter, query.providers))
|
|
154
|
+
|
|
155
|
+
def fetch_data(
|
|
156
|
+
self, provider_id: Provider, req: Request, station: Station
|
|
157
|
+
) -> Optional[pd.DataFrame]:
|
|
158
|
+
"""
|
|
159
|
+
Fetch data from a given provider
|
|
160
|
+
"""
|
|
161
|
+
provider = self.get_provider(provider_id)
|
|
162
|
+
|
|
163
|
+
if not provider:
|
|
164
|
+
return None
|
|
165
|
+
|
|
166
|
+
query = ProviderRequest(
|
|
167
|
+
station=station,
|
|
168
|
+
start=req.start
|
|
169
|
+
or (
|
|
170
|
+
datetime.combine(provider.start, datetime.min.time())
|
|
171
|
+
if provider.start
|
|
172
|
+
else None
|
|
173
|
+
),
|
|
174
|
+
end=req.end or (provider.end or datetime.now()),
|
|
175
|
+
parameters=req.parameters,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
module = import_module(cast(str, provider.module))
|
|
179
|
+
df = module.fetch(query)
|
|
180
|
+
|
|
181
|
+
return df
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
provider_service = ProviderService(providers=DEFAULT_PROVIDERS)
|