meteora 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of meteora might be problematic. Click here for more details.

meteora/__init__.py ADDED
@@ -0,0 +1 @@
1
+ """Meteora."""
@@ -0,0 +1,7 @@
1
+ """Clients module."""
2
+
3
+ from meteora.clients.aemet import AemetClient
4
+ from meteora.clients.agrometeo import AgrometeoClient
5
+ from meteora.clients.iem import ASOSOneMinIEMClient, METARASOSIEMClient
6
+ from meteora.clients.meteocat import MeteocatClient
7
+ from meteora.clients.metoffice import MetOfficeClient
@@ -0,0 +1,148 @@
1
+ """AEMET client."""
2
+
3
+ from typing import Mapping, Union
4
+
5
+ import pandas as pd
6
+ import pyproj
7
+
8
+ from meteora import settings, utils
9
+ from meteora.clients.base import BaseJSONClient, RegionType, VariablesType
10
+ from meteora.mixins import (
11
+ AllStationsEndpointMixin,
12
+ APIKeyParamMixin,
13
+ VariablesEndpointMixin,
14
+ )
15
+
16
+ # API endpoints
17
+ BASE_URL = "https://opendata.aemet.es/opendata/api"
18
+ STATIONS_ENDPOINT = (
19
+ f"{BASE_URL}/valores/climatologicos/inventarioestaciones/todasestaciones"
20
+ )
21
+ VARIABLES_ENDPOINT = TS_ENDPOINT = f"{BASE_URL}/observacion/convencional/todas"
22
+
23
+ # useful constants
24
+ # ACHTUNG: in Aemet, the station id col is "indicativo" in the stations endpoint but
25
+ # "idema" in the data endpoint
26
+ STATIONS_ID_COL = "idema"
27
+ VARIABLES_ID_COL = "id"
28
+ ECV_DICT = {
29
+ "precipitation": "prec",
30
+ "pressure": "pres",
31
+ "surface_wind_speed": "vv",
32
+ "surface_wind_direction": "dv",
33
+ "temperature": "ta",
34
+ "water_vapour": "hr",
35
+ }
36
+ TIME_COL = "fint"
37
+
38
+
39
+ class AemetClient(
40
+ APIKeyParamMixin,
41
+ AllStationsEndpointMixin,
42
+ VariablesEndpointMixin,
43
+ BaseJSONClient,
44
+ ):
45
+ """MetOffice client."""
46
+
47
+ # geom constants
48
+ X_COL = "longitud"
49
+ Y_COL = "latitud"
50
+ CRS = pyproj.CRS("epsg:4326")
51
+
52
+ # API endpoints
53
+ _stations_endpoint = STATIONS_ENDPOINT
54
+ _variables_endpoint = VARIABLES_ENDPOINT
55
+ _ts_endpoint = TS_ENDPOINT
56
+
57
+ # data frame labels constants
58
+ _stations_id_col = STATIONS_ID_COL
59
+ # _variables_name_col = VARIABLES_NAME_COL
60
+ _variables_id_col = VARIABLES_ID_COL
61
+ _ecv_dict = ECV_DICT
62
+ _time_col = TIME_COL
63
+
64
+ # auth constants
65
+ _api_key_param_name = "api_key"
66
+ # request_headers = {"cache-control": "no-cache"}
67
+
68
+ def __init__(
69
+ self, region: RegionType, api_key: str, sjoin_kws: Union[Mapping, None] = None
70
+ ) -> None:
71
+ """Initialize MetOffice client."""
72
+ self.region = region
73
+ self._api_key = api_key
74
+ if sjoin_kws is None:
75
+ sjoin_kws = settings.SJOIN_KWS.copy()
76
+ self.SJOIN_KWS = sjoin_kws
77
+ # need to call super().__init__() to set the cache
78
+ super().__init__()
79
+
80
+ @property
81
+ def request_headers(self):
82
+ """Request headers."""
83
+ try:
84
+ return self._request_headers
85
+ except AttributeError:
86
+ self._request_headers = super().request_headers | {
87
+ "cache-control": "no-cache"
88
+ }
89
+ return self._request_headers
90
+
91
+ def _stations_df_from_content(self, response_content: dict) -> pd.DataFrame:
92
+ # response_content returns a dict with urls, where the one under the "datos" key
93
+ # contains the JSON data
94
+ stations_df = pd.read_json(response_content["datos"], encoding="latin1")
95
+ for col in [self.X_COL, self.Y_COL]:
96
+ stations_df[col] = utils.dms_to_decimal(stations_df[col])
97
+ return stations_df
98
+
99
+ def _variables_df_from_content(self, response_json) -> pd.DataFrame:
100
+ return pd.json_normalize(
101
+ pd.read_json(response_json["metadatos"], encoding="latin1")["campos"]
102
+ )
103
+
104
+ @property
105
+ def variables_df(self) -> pd.DataFrame:
106
+ """Variables dataframe."""
107
+ try:
108
+ return self._variables_df
109
+ except AttributeError:
110
+ with self._session.cache_disabled():
111
+ response_content = self._get_content_from_url(self._variables_endpoint)
112
+ self._variables_df = self._variables_df_from_content(response_content)
113
+ return self._variables_df
114
+
115
+ def _ts_df_from_content(self, response_content):
116
+ # response_content returns a dict with urls, where the one under the "datos" key
117
+ # contains the JSON data
118
+ ts_df = pd.read_json(response_content["datos"], encoding="latin1")
119
+ # filter only stations from the region
120
+ # TODO: how to handle better the "indicativo" column name? i.e., the stations id
121
+ # column is "idema" in the observation data frame but "indicativo" in the
122
+ # stations data frame.
123
+ return ts_df[
124
+ ts_df[self._stations_id_col].isin(self.stations_gdf["indicativo"])
125
+ ].set_index([self._stations_id_col, self._time_col])
126
+
127
+ def get_ts_df(
128
+ self,
129
+ variables: VariablesType,
130
+ ) -> pd.DataFrame:
131
+ """Get time series data frame for the last 24h.
132
+
133
+ Parameters
134
+ ----------
135
+ variables : str, int or list-like of str or int
136
+ Target variables, which can be either an AEMET variable code (integer or
137
+ string) or an essential climate variable (ECV) following the Meteora
138
+ nomenclature (string).
139
+
140
+ Returns
141
+ -------
142
+ ts_df : pandas.DataFrame
143
+ Long form data frame with a time series of meaurements (second-level index)
144
+ at each station (first-level index) for each variable (column).
145
+ """
146
+ # disable cache since the endpoint returns the latest 24h of data
147
+ with self._session.cache_disabled():
148
+ return self._get_ts_df(variables)
@@ -0,0 +1,246 @@
1
+ """Agrometeo client."""
2
+
3
+ from typing import Any, Mapping, Union
4
+
5
+ import pandas as pd
6
+ import pyproj
7
+
8
+ from meteora import settings
9
+ from meteora.clients.base import BaseJSONClient, DateTimeType, RegionType, VariablesType
10
+ from meteora.mixins import AllStationsEndpointMixin, VariablesEndpointMixin
11
+
12
+ # API endpoints
13
+ BASE_URL = "https://agrometeo.ch/backend/api"
14
+ STATIONS_ENDPOINT = f"{BASE_URL}/stations"
15
+ VARIABLES_ENDPOINT = f"{BASE_URL}/sensors"
16
+ TS_ENDPOINT = f"{BASE_URL}/meteo/data"
17
+
18
+ # useful constants
19
+ LONLAT_CRS = pyproj.CRS("epsg:4326")
20
+ LV03_CRS = pyproj.CRS("epsg:21781")
21
+ # ACHTUNG: for some reason, the API mixes up the longitude and latitude columns ONLY in
22
+ # the CH1903/LV03 projection. This is why we need to swap the columns in the dict below.
23
+ GEOM_COL_DICT = {LONLAT_CRS: ["long_dec", "lat_dec"], LV03_CRS: ["lat_ch", "long_ch"]}
24
+ DEFAULT_CRS = LV03_CRS
25
+ # stations column used by the Agrometeo API (do not change)
26
+ STATIONS_API_ID_COL = "id"
27
+ # stations column used to index the data (e.g., time-series dataframe) by the client's
28
+ # class (can be any column that is unique to each station, e.g., name or id).
29
+ # The docstring would read as:
30
+ # stations_id_col : str, optional
31
+ # Column of `stations_gdf` that will be used in the returned data frame to identify
32
+ # the stations. If None, the value from `STATIONS_ID_COL` will be used.
33
+ # STATIONS_ID_COL = "name"
34
+ STATIONS_ID_COL = "id"
35
+ # variables name column
36
+ VARIABLES_NAME_COL = "name.en"
37
+ # variables code column
38
+ VARIABLES_ID_COL = "id"
39
+ # agrometeo sensors
40
+ # 42 Leaf moisture III
41
+ # 43 Voltage of internal lithium battery
42
+ # 1 Temperature 2m above ground
43
+ # 4 Relative humidity
44
+ # 6 Precipitation
45
+ # 15 Intensity of precipitation
46
+ # 7 Leaf moisture
47
+ # 11 Solar radiation
48
+ # 41 Solar Energie
49
+ # 9 Avg. wind speed
50
+ # 14 Max. wind speed
51
+ # 8 Wind direction
52
+ # 22 Temperature +10cm
53
+ # 12 Luxmeter after Lufft
54
+ # 10 ETP-Turc
55
+ # 24 ETo-PenMon
56
+ # 13 Dew point
57
+ # 18 Real air pressure
58
+ # 2 Soil temperature +5cm
59
+ # 19 Soil temperature -20cm
60
+ # 3 Soil temperature -10cm
61
+ # 5 Soil moisture -5cm
62
+ # 20 Pressure on sea level
63
+ # 17 Leaf moisture II
64
+ # 25 Soil moisture -30cm
65
+ # 26 Soil moisture -50cm
66
+ # 39 unused
67
+ # 33 Temperature in leafzone
68
+ # 32 battery voltage
69
+ # 21 min. wind speed
70
+ # 23 Temperatur +20cm
71
+ # 27 Temperatur in Pflanze1
72
+ # 28 Temperatur in Pflanze1
73
+ # 29 UVAB
74
+ # 30 UVA
75
+ # 31 UAB
76
+ # 34 Air humidity in leafzone
77
+ # 35 Photosyth. active radiation
78
+ # 36 Soil temperature -10cm
79
+ # 37 Temperatur 2m unbelüftet
80
+ # 38 elative Luftfeuchtigkeit +5cm
81
+ # 40 Precip. Radolan Day
82
+ # 100 Hour
83
+ # 101 Year
84
+ # 102 Day of year
85
+ # 103 Degree hours
86
+ # 104 Density of sporulation
87
+ # 105 Leaf surface
88
+ ECV_DICT = {
89
+ "precipitation": 6, # "Precipitation",
90
+ "pressure": 18, # "Real air pressure",
91
+ "surface_radiation_shortwave": 11, # "Solar radiation",
92
+ "surface_wind_speed": 9, # "Avg. wind speed",
93
+ "surface_wind_direction": 8, # "Wind direction",
94
+ "temperature": 1, # "Temperature 2m above ground",
95
+ "water_vapour": 4, # "Relative humidity",
96
+ }
97
+ TIME_COL = "date"
98
+ API_DT_FMT = "%Y-%m-%d"
99
+ SCALE = "none"
100
+ MEASUREMENT = "avg"
101
+
102
+
103
+ class AgrometeoClient(AllStationsEndpointMixin, VariablesEndpointMixin, BaseJSONClient):
104
+ """Agrometeo client."""
105
+
106
+ # API endpoints
107
+ _stations_endpoint = STATIONS_ENDPOINT
108
+ _variables_endpoint = VARIABLES_ENDPOINT
109
+ _ts_endpoint = TS_ENDPOINT
110
+
111
+ # data frame labels constants
112
+ _stations_id_col = STATIONS_ID_COL
113
+ _variables_id_col = VARIABLES_ID_COL
114
+ # _variables_name_col = VARIABLES_NAME_COL
115
+ _ecv_dict = ECV_DICT
116
+ _time_col = TIME_COL
117
+
118
+ def __init__(
119
+ self,
120
+ region: RegionType,
121
+ crs: Any = None,
122
+ sjoin_kws: Union[Mapping, None] = None,
123
+ ) -> None:
124
+ """Initialize Agrometeo client."""
125
+ # ACHTUNG: CRS must be either EPSG:4326 or EPSG:21781
126
+ # ACHTUNG: CRS must be set before region
127
+ if crs is not None:
128
+ crs = pyproj.CRS(crs)
129
+ else:
130
+ crs = DEFAULT_CRS
131
+ self.CRS = crs
132
+ # self._variables_name_col = variables_name_col or VARIABLES_NAME_COL
133
+ try:
134
+ self.X_COL, self.Y_COL = GEOM_COL_DICT[self.CRS]
135
+ except KeyError:
136
+ raise ValueError(
137
+ f"CRS must be among {list(GEOM_COL_DICT.keys())}, got {self.CRS}"
138
+ )
139
+
140
+ self.region = region
141
+ if sjoin_kws is None:
142
+ sjoin_kws = settings.SJOIN_KWS.copy()
143
+ self.SJOIN_KWS = sjoin_kws
144
+
145
+ # need to call super().__init__() to set the cache
146
+ super().__init__()
147
+
148
+ def _stations_df_from_content(self, response_content: dict) -> pd.DataFrame:
149
+ return pd.DataFrame(response_content["data"]).set_index(self._stations_id_col)
150
+
151
+ def _variables_df_from_content(self, response_content: dict) -> pd.DataFrame:
152
+ variables_df = pd.json_normalize(response_content["data"])
153
+ # ACHTUNG: need to strip strings, at least in variables name column. Note
154
+ # that *it seems* that the integer type of variable code column is inferred
155
+ # correctly
156
+ variables_df[VARIABLES_NAME_COL] = variables_df[VARIABLES_NAME_COL].str.strip()
157
+ return variables_df
158
+
159
+ def _ts_params(self, variable_ids, start, end, scale=None, measurement=None):
160
+ # process date args
161
+ start_date = pd.Timestamp(start).strftime(API_DT_FMT)
162
+ end_date = pd.Timestamp(end).strftime(API_DT_FMT)
163
+ # process scale and measurement args
164
+ if scale is None:
165
+ # the API needs it to be lowercase
166
+ scale = SCALE
167
+ if measurement is None:
168
+ measurement = MEASUREMENT
169
+
170
+ _stations_ids = self.stations_gdf.index.astype(str)
171
+
172
+ return {
173
+ "from": start_date,
174
+ "to": end_date,
175
+ "scale": scale,
176
+ "sensors": ",".join(
177
+ f"{variable_id}:{measurement}" for variable_id in variable_ids
178
+ ),
179
+ "stations": ",".join(_stations_ids),
180
+ }
181
+
182
+ def _ts_df_from_content(self, response_content):
183
+ # parse the response as a data frame
184
+ ts_df = pd.json_normalize(response_content["data"]).set_index(self._time_col)
185
+ ts_df.index = pd.to_datetime(ts_df.index)
186
+ ts_df.index.name = self._time_col
187
+
188
+ # ts_df.columns = self.stations_gdf[STATIONS_ID_COL]
189
+ # ACHTUNG: note that agrometeo returns the data indexed by keys of the form
190
+ # "{station_id}_{variable_code}_{measurement}". We can ignore the latter and
191
+ # convert to a two-level (station, variable) multi index
192
+ ts_df.columns = (
193
+ ts_df.columns.str.split("_")
194
+ .str[:-1]
195
+ .map(tuple)
196
+ .rename([self._stations_id_col, "variable"])
197
+ )
198
+ # convert station and variable ids to integer
199
+ # ts_df.columns = ts_df.columns.set_levels(
200
+ # ts_df.columns.levels["station"].astype(int), level="station"
201
+ # )
202
+ for level_i, level_name in enumerate(ts_df.columns.names):
203
+ ts_df.columns = ts_df.columns.set_levels(
204
+ ts_df.columns.levels[level_i].astype(int), level=level_name
205
+ )
206
+
207
+ # convert to long form and return it
208
+ return ts_df.stack(level=self._stations_id_col, future_stack=True).swaplevel()
209
+
210
+ def get_ts_df(
211
+ self,
212
+ variables: VariablesType,
213
+ start: DateTimeType,
214
+ end: DateTimeType,
215
+ *,
216
+ scale: Union[str, None] = None,
217
+ measurement: Union[str, None] = None,
218
+ ) -> pd.DataFrame:
219
+ """Get time series data frame.
220
+
221
+ Parameters
222
+ ----------
223
+ variables : str, int or list-like of str or int
224
+ Target variables, which can be either an Agrometeo variable code (integer or
225
+ string) or an essential climate variable (ECV) following the Meteora
226
+ nomenclature (string).
227
+ start, end : datetime-like, str, int, float
228
+ Values representing the start and end of the requested data period
229
+ respectively. Accepts any datetime-like object that can be passed to
230
+ pandas.Timestamp.
231
+ scale : None or {"hour", "day", "month", "year"}, default None
232
+ Temporal scale of the measurements. The default value of None returns the
233
+ finest scale, i.e., 10 minutes.
234
+ measurement : None or {"min", "avg", "max"}, default None
235
+ Whether the measurement values correspond to the minimum, average or maximum
236
+ value for the required temporal scale. Ignored if `scale` is None.
237
+
238
+ Returns
239
+ -------
240
+ ts_df : pandas.DataFrame
241
+ Long form data frame with a time series of meaurements (second-level index)
242
+ at each station (first-level index) for each variable (column).
243
+ """
244
+ return self._get_ts_df(
245
+ variables, start, end, scale=scale, measurement=measurement
246
+ )