meteora 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of meteora might be problematic. Click here for more details.
- meteora/__init__.py +1 -0
- meteora/clients/__init__.py +7 -0
- meteora/clients/aemet.py +148 -0
- meteora/clients/agrometeo.py +246 -0
- meteora/clients/base.py +393 -0
- meteora/clients/iem.py +250 -0
- meteora/clients/meteocat.py +213 -0
- meteora/clients/metoffice.py +221 -0
- meteora/mixins/__init__.py +10 -0
- meteora/mixins/auth.py +45 -0
- meteora/mixins/region.py +136 -0
- meteora/mixins/stations.py +83 -0
- meteora/mixins/variables.py +108 -0
- meteora/settings.py +29 -0
- meteora/utils.py +162 -0
- meteora-0.1.0.dist-info/LICENSE +674 -0
- meteora-0.1.0.dist-info/METADATA +177 -0
- meteora-0.1.0.dist-info/RECORD +20 -0
- meteora-0.1.0.dist-info/WHEEL +5 -0
- meteora-0.1.0.dist-info/top_level.txt +1 -0
meteora/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Meteora."""
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
"""Clients module."""
|
|
2
|
+
|
|
3
|
+
from meteora.clients.aemet import AemetClient
|
|
4
|
+
from meteora.clients.agrometeo import AgrometeoClient
|
|
5
|
+
from meteora.clients.iem import ASOSOneMinIEMClient, METARASOSIEMClient
|
|
6
|
+
from meteora.clients.meteocat import MeteocatClient
|
|
7
|
+
from meteora.clients.metoffice import MetOfficeClient
|
meteora/clients/aemet.py
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""AEMET client."""
|
|
2
|
+
|
|
3
|
+
from typing import Mapping, Union
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
import pyproj
|
|
7
|
+
|
|
8
|
+
from meteora import settings, utils
|
|
9
|
+
from meteora.clients.base import BaseJSONClient, RegionType, VariablesType
|
|
10
|
+
from meteora.mixins import (
|
|
11
|
+
AllStationsEndpointMixin,
|
|
12
|
+
APIKeyParamMixin,
|
|
13
|
+
VariablesEndpointMixin,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
# API endpoints
|
|
17
|
+
BASE_URL = "https://opendata.aemet.es/opendata/api"
|
|
18
|
+
STATIONS_ENDPOINT = (
|
|
19
|
+
f"{BASE_URL}/valores/climatologicos/inventarioestaciones/todasestaciones"
|
|
20
|
+
)
|
|
21
|
+
VARIABLES_ENDPOINT = TS_ENDPOINT = f"{BASE_URL}/observacion/convencional/todas"
|
|
22
|
+
|
|
23
|
+
# useful constants
|
|
24
|
+
# ACHTUNG: in Aemet, the station id col is "indicativo" in the stations endpoint but
|
|
25
|
+
# "idema" in the data endpoint
|
|
26
|
+
STATIONS_ID_COL = "idema"
|
|
27
|
+
VARIABLES_ID_COL = "id"
|
|
28
|
+
ECV_DICT = {
|
|
29
|
+
"precipitation": "prec",
|
|
30
|
+
"pressure": "pres",
|
|
31
|
+
"surface_wind_speed": "vv",
|
|
32
|
+
"surface_wind_direction": "dv",
|
|
33
|
+
"temperature": "ta",
|
|
34
|
+
"water_vapour": "hr",
|
|
35
|
+
}
|
|
36
|
+
TIME_COL = "fint"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class AemetClient(
|
|
40
|
+
APIKeyParamMixin,
|
|
41
|
+
AllStationsEndpointMixin,
|
|
42
|
+
VariablesEndpointMixin,
|
|
43
|
+
BaseJSONClient,
|
|
44
|
+
):
|
|
45
|
+
"""MetOffice client."""
|
|
46
|
+
|
|
47
|
+
# geom constants
|
|
48
|
+
X_COL = "longitud"
|
|
49
|
+
Y_COL = "latitud"
|
|
50
|
+
CRS = pyproj.CRS("epsg:4326")
|
|
51
|
+
|
|
52
|
+
# API endpoints
|
|
53
|
+
_stations_endpoint = STATIONS_ENDPOINT
|
|
54
|
+
_variables_endpoint = VARIABLES_ENDPOINT
|
|
55
|
+
_ts_endpoint = TS_ENDPOINT
|
|
56
|
+
|
|
57
|
+
# data frame labels constants
|
|
58
|
+
_stations_id_col = STATIONS_ID_COL
|
|
59
|
+
# _variables_name_col = VARIABLES_NAME_COL
|
|
60
|
+
_variables_id_col = VARIABLES_ID_COL
|
|
61
|
+
_ecv_dict = ECV_DICT
|
|
62
|
+
_time_col = TIME_COL
|
|
63
|
+
|
|
64
|
+
# auth constants
|
|
65
|
+
_api_key_param_name = "api_key"
|
|
66
|
+
# request_headers = {"cache-control": "no-cache"}
|
|
67
|
+
|
|
68
|
+
def __init__(
|
|
69
|
+
self, region: RegionType, api_key: str, sjoin_kws: Union[Mapping, None] = None
|
|
70
|
+
) -> None:
|
|
71
|
+
"""Initialize MetOffice client."""
|
|
72
|
+
self.region = region
|
|
73
|
+
self._api_key = api_key
|
|
74
|
+
if sjoin_kws is None:
|
|
75
|
+
sjoin_kws = settings.SJOIN_KWS.copy()
|
|
76
|
+
self.SJOIN_KWS = sjoin_kws
|
|
77
|
+
# need to call super().__init__() to set the cache
|
|
78
|
+
super().__init__()
|
|
79
|
+
|
|
80
|
+
@property
|
|
81
|
+
def request_headers(self):
|
|
82
|
+
"""Request headers."""
|
|
83
|
+
try:
|
|
84
|
+
return self._request_headers
|
|
85
|
+
except AttributeError:
|
|
86
|
+
self._request_headers = super().request_headers | {
|
|
87
|
+
"cache-control": "no-cache"
|
|
88
|
+
}
|
|
89
|
+
return self._request_headers
|
|
90
|
+
|
|
91
|
+
def _stations_df_from_content(self, response_content: dict) -> pd.DataFrame:
|
|
92
|
+
# response_content returns a dict with urls, where the one under the "datos" key
|
|
93
|
+
# contains the JSON data
|
|
94
|
+
stations_df = pd.read_json(response_content["datos"], encoding="latin1")
|
|
95
|
+
for col in [self.X_COL, self.Y_COL]:
|
|
96
|
+
stations_df[col] = utils.dms_to_decimal(stations_df[col])
|
|
97
|
+
return stations_df
|
|
98
|
+
|
|
99
|
+
def _variables_df_from_content(self, response_json) -> pd.DataFrame:
|
|
100
|
+
return pd.json_normalize(
|
|
101
|
+
pd.read_json(response_json["metadatos"], encoding="latin1")["campos"]
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
@property
|
|
105
|
+
def variables_df(self) -> pd.DataFrame:
|
|
106
|
+
"""Variables dataframe."""
|
|
107
|
+
try:
|
|
108
|
+
return self._variables_df
|
|
109
|
+
except AttributeError:
|
|
110
|
+
with self._session.cache_disabled():
|
|
111
|
+
response_content = self._get_content_from_url(self._variables_endpoint)
|
|
112
|
+
self._variables_df = self._variables_df_from_content(response_content)
|
|
113
|
+
return self._variables_df
|
|
114
|
+
|
|
115
|
+
def _ts_df_from_content(self, response_content):
|
|
116
|
+
# response_content returns a dict with urls, where the one under the "datos" key
|
|
117
|
+
# contains the JSON data
|
|
118
|
+
ts_df = pd.read_json(response_content["datos"], encoding="latin1")
|
|
119
|
+
# filter only stations from the region
|
|
120
|
+
# TODO: how to handle better the "indicativo" column name? i.e., the stations id
|
|
121
|
+
# column is "idema" in the observation data frame but "indicativo" in the
|
|
122
|
+
# stations data frame.
|
|
123
|
+
return ts_df[
|
|
124
|
+
ts_df[self._stations_id_col].isin(self.stations_gdf["indicativo"])
|
|
125
|
+
].set_index([self._stations_id_col, self._time_col])
|
|
126
|
+
|
|
127
|
+
def get_ts_df(
|
|
128
|
+
self,
|
|
129
|
+
variables: VariablesType,
|
|
130
|
+
) -> pd.DataFrame:
|
|
131
|
+
"""Get time series data frame for the last 24h.
|
|
132
|
+
|
|
133
|
+
Parameters
|
|
134
|
+
----------
|
|
135
|
+
variables : str, int or list-like of str or int
|
|
136
|
+
Target variables, which can be either an AEMET variable code (integer or
|
|
137
|
+
string) or an essential climate variable (ECV) following the Meteora
|
|
138
|
+
nomenclature (string).
|
|
139
|
+
|
|
140
|
+
Returns
|
|
141
|
+
-------
|
|
142
|
+
ts_df : pandas.DataFrame
|
|
143
|
+
Long form data frame with a time series of meaurements (second-level index)
|
|
144
|
+
at each station (first-level index) for each variable (column).
|
|
145
|
+
"""
|
|
146
|
+
# disable cache since the endpoint returns the latest 24h of data
|
|
147
|
+
with self._session.cache_disabled():
|
|
148
|
+
return self._get_ts_df(variables)
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
"""Agrometeo client."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Mapping, Union
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
import pyproj
|
|
7
|
+
|
|
8
|
+
from meteora import settings
|
|
9
|
+
from meteora.clients.base import BaseJSONClient, DateTimeType, RegionType, VariablesType
|
|
10
|
+
from meteora.mixins import AllStationsEndpointMixin, VariablesEndpointMixin
|
|
11
|
+
|
|
12
|
+
# API endpoints
|
|
13
|
+
BASE_URL = "https://agrometeo.ch/backend/api"
|
|
14
|
+
STATIONS_ENDPOINT = f"{BASE_URL}/stations"
|
|
15
|
+
VARIABLES_ENDPOINT = f"{BASE_URL}/sensors"
|
|
16
|
+
TS_ENDPOINT = f"{BASE_URL}/meteo/data"
|
|
17
|
+
|
|
18
|
+
# useful constants
|
|
19
|
+
LONLAT_CRS = pyproj.CRS("epsg:4326")
|
|
20
|
+
LV03_CRS = pyproj.CRS("epsg:21781")
|
|
21
|
+
# ACHTUNG: for some reason, the API mixes up the longitude and latitude columns ONLY in
|
|
22
|
+
# the CH1903/LV03 projection. This is why we need to swap the columns in the dict below.
|
|
23
|
+
GEOM_COL_DICT = {LONLAT_CRS: ["long_dec", "lat_dec"], LV03_CRS: ["lat_ch", "long_ch"]}
|
|
24
|
+
DEFAULT_CRS = LV03_CRS
|
|
25
|
+
# stations column used by the Agrometeo API (do not change)
|
|
26
|
+
STATIONS_API_ID_COL = "id"
|
|
27
|
+
# stations column used to index the data (e.g., time-series dataframe) by the client's
|
|
28
|
+
# class (can be any column that is unique to each station, e.g., name or id).
|
|
29
|
+
# The docstring would read as:
|
|
30
|
+
# stations_id_col : str, optional
|
|
31
|
+
# Column of `stations_gdf` that will be used in the returned data frame to identify
|
|
32
|
+
# the stations. If None, the value from `STATIONS_ID_COL` will be used.
|
|
33
|
+
# STATIONS_ID_COL = "name"
|
|
34
|
+
STATIONS_ID_COL = "id"
|
|
35
|
+
# variables name column
|
|
36
|
+
VARIABLES_NAME_COL = "name.en"
|
|
37
|
+
# variables code column
|
|
38
|
+
VARIABLES_ID_COL = "id"
|
|
39
|
+
# agrometeo sensors
|
|
40
|
+
# 42 Leaf moisture III
|
|
41
|
+
# 43 Voltage of internal lithium battery
|
|
42
|
+
# 1 Temperature 2m above ground
|
|
43
|
+
# 4 Relative humidity
|
|
44
|
+
# 6 Precipitation
|
|
45
|
+
# 15 Intensity of precipitation
|
|
46
|
+
# 7 Leaf moisture
|
|
47
|
+
# 11 Solar radiation
|
|
48
|
+
# 41 Solar Energie
|
|
49
|
+
# 9 Avg. wind speed
|
|
50
|
+
# 14 Max. wind speed
|
|
51
|
+
# 8 Wind direction
|
|
52
|
+
# 22 Temperature +10cm
|
|
53
|
+
# 12 Luxmeter after Lufft
|
|
54
|
+
# 10 ETP-Turc
|
|
55
|
+
# 24 ETo-PenMon
|
|
56
|
+
# 13 Dew point
|
|
57
|
+
# 18 Real air pressure
|
|
58
|
+
# 2 Soil temperature +5cm
|
|
59
|
+
# 19 Soil temperature -20cm
|
|
60
|
+
# 3 Soil temperature -10cm
|
|
61
|
+
# 5 Soil moisture -5cm
|
|
62
|
+
# 20 Pressure on sea level
|
|
63
|
+
# 17 Leaf moisture II
|
|
64
|
+
# 25 Soil moisture -30cm
|
|
65
|
+
# 26 Soil moisture -50cm
|
|
66
|
+
# 39 unused
|
|
67
|
+
# 33 Temperature in leafzone
|
|
68
|
+
# 32 battery voltage
|
|
69
|
+
# 21 min. wind speed
|
|
70
|
+
# 23 Temperatur +20cm
|
|
71
|
+
# 27 Temperatur in Pflanze1
|
|
72
|
+
# 28 Temperatur in Pflanze1
|
|
73
|
+
# 29 UVAB
|
|
74
|
+
# 30 UVA
|
|
75
|
+
# 31 UAB
|
|
76
|
+
# 34 Air humidity in leafzone
|
|
77
|
+
# 35 Photosyth. active radiation
|
|
78
|
+
# 36 Soil temperature -10cm
|
|
79
|
+
# 37 Temperatur 2m unbelüftet
|
|
80
|
+
# 38 elative Luftfeuchtigkeit +5cm
|
|
81
|
+
# 40 Precip. Radolan Day
|
|
82
|
+
# 100 Hour
|
|
83
|
+
# 101 Year
|
|
84
|
+
# 102 Day of year
|
|
85
|
+
# 103 Degree hours
|
|
86
|
+
# 104 Density of sporulation
|
|
87
|
+
# 105 Leaf surface
|
|
88
|
+
ECV_DICT = {
|
|
89
|
+
"precipitation": 6, # "Precipitation",
|
|
90
|
+
"pressure": 18, # "Real air pressure",
|
|
91
|
+
"surface_radiation_shortwave": 11, # "Solar radiation",
|
|
92
|
+
"surface_wind_speed": 9, # "Avg. wind speed",
|
|
93
|
+
"surface_wind_direction": 8, # "Wind direction",
|
|
94
|
+
"temperature": 1, # "Temperature 2m above ground",
|
|
95
|
+
"water_vapour": 4, # "Relative humidity",
|
|
96
|
+
}
|
|
97
|
+
TIME_COL = "date"
|
|
98
|
+
API_DT_FMT = "%Y-%m-%d"
|
|
99
|
+
SCALE = "none"
|
|
100
|
+
MEASUREMENT = "avg"
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class AgrometeoClient(AllStationsEndpointMixin, VariablesEndpointMixin, BaseJSONClient):
|
|
104
|
+
"""Agrometeo client."""
|
|
105
|
+
|
|
106
|
+
# API endpoints
|
|
107
|
+
_stations_endpoint = STATIONS_ENDPOINT
|
|
108
|
+
_variables_endpoint = VARIABLES_ENDPOINT
|
|
109
|
+
_ts_endpoint = TS_ENDPOINT
|
|
110
|
+
|
|
111
|
+
# data frame labels constants
|
|
112
|
+
_stations_id_col = STATIONS_ID_COL
|
|
113
|
+
_variables_id_col = VARIABLES_ID_COL
|
|
114
|
+
# _variables_name_col = VARIABLES_NAME_COL
|
|
115
|
+
_ecv_dict = ECV_DICT
|
|
116
|
+
_time_col = TIME_COL
|
|
117
|
+
|
|
118
|
+
def __init__(
|
|
119
|
+
self,
|
|
120
|
+
region: RegionType,
|
|
121
|
+
crs: Any = None,
|
|
122
|
+
sjoin_kws: Union[Mapping, None] = None,
|
|
123
|
+
) -> None:
|
|
124
|
+
"""Initialize Agrometeo client."""
|
|
125
|
+
# ACHTUNG: CRS must be either EPSG:4326 or EPSG:21781
|
|
126
|
+
# ACHTUNG: CRS must be set before region
|
|
127
|
+
if crs is not None:
|
|
128
|
+
crs = pyproj.CRS(crs)
|
|
129
|
+
else:
|
|
130
|
+
crs = DEFAULT_CRS
|
|
131
|
+
self.CRS = crs
|
|
132
|
+
# self._variables_name_col = variables_name_col or VARIABLES_NAME_COL
|
|
133
|
+
try:
|
|
134
|
+
self.X_COL, self.Y_COL = GEOM_COL_DICT[self.CRS]
|
|
135
|
+
except KeyError:
|
|
136
|
+
raise ValueError(
|
|
137
|
+
f"CRS must be among {list(GEOM_COL_DICT.keys())}, got {self.CRS}"
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
self.region = region
|
|
141
|
+
if sjoin_kws is None:
|
|
142
|
+
sjoin_kws = settings.SJOIN_KWS.copy()
|
|
143
|
+
self.SJOIN_KWS = sjoin_kws
|
|
144
|
+
|
|
145
|
+
# need to call super().__init__() to set the cache
|
|
146
|
+
super().__init__()
|
|
147
|
+
|
|
148
|
+
def _stations_df_from_content(self, response_content: dict) -> pd.DataFrame:
|
|
149
|
+
return pd.DataFrame(response_content["data"]).set_index(self._stations_id_col)
|
|
150
|
+
|
|
151
|
+
def _variables_df_from_content(self, response_content: dict) -> pd.DataFrame:
|
|
152
|
+
variables_df = pd.json_normalize(response_content["data"])
|
|
153
|
+
# ACHTUNG: need to strip strings, at least in variables name column. Note
|
|
154
|
+
# that *it seems* that the integer type of variable code column is inferred
|
|
155
|
+
# correctly
|
|
156
|
+
variables_df[VARIABLES_NAME_COL] = variables_df[VARIABLES_NAME_COL].str.strip()
|
|
157
|
+
return variables_df
|
|
158
|
+
|
|
159
|
+
def _ts_params(self, variable_ids, start, end, scale=None, measurement=None):
|
|
160
|
+
# process date args
|
|
161
|
+
start_date = pd.Timestamp(start).strftime(API_DT_FMT)
|
|
162
|
+
end_date = pd.Timestamp(end).strftime(API_DT_FMT)
|
|
163
|
+
# process scale and measurement args
|
|
164
|
+
if scale is None:
|
|
165
|
+
# the API needs it to be lowercase
|
|
166
|
+
scale = SCALE
|
|
167
|
+
if measurement is None:
|
|
168
|
+
measurement = MEASUREMENT
|
|
169
|
+
|
|
170
|
+
_stations_ids = self.stations_gdf.index.astype(str)
|
|
171
|
+
|
|
172
|
+
return {
|
|
173
|
+
"from": start_date,
|
|
174
|
+
"to": end_date,
|
|
175
|
+
"scale": scale,
|
|
176
|
+
"sensors": ",".join(
|
|
177
|
+
f"{variable_id}:{measurement}" for variable_id in variable_ids
|
|
178
|
+
),
|
|
179
|
+
"stations": ",".join(_stations_ids),
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
def _ts_df_from_content(self, response_content):
|
|
183
|
+
# parse the response as a data frame
|
|
184
|
+
ts_df = pd.json_normalize(response_content["data"]).set_index(self._time_col)
|
|
185
|
+
ts_df.index = pd.to_datetime(ts_df.index)
|
|
186
|
+
ts_df.index.name = self._time_col
|
|
187
|
+
|
|
188
|
+
# ts_df.columns = self.stations_gdf[STATIONS_ID_COL]
|
|
189
|
+
# ACHTUNG: note that agrometeo returns the data indexed by keys of the form
|
|
190
|
+
# "{station_id}_{variable_code}_{measurement}". We can ignore the latter and
|
|
191
|
+
# convert to a two-level (station, variable) multi index
|
|
192
|
+
ts_df.columns = (
|
|
193
|
+
ts_df.columns.str.split("_")
|
|
194
|
+
.str[:-1]
|
|
195
|
+
.map(tuple)
|
|
196
|
+
.rename([self._stations_id_col, "variable"])
|
|
197
|
+
)
|
|
198
|
+
# convert station and variable ids to integer
|
|
199
|
+
# ts_df.columns = ts_df.columns.set_levels(
|
|
200
|
+
# ts_df.columns.levels["station"].astype(int), level="station"
|
|
201
|
+
# )
|
|
202
|
+
for level_i, level_name in enumerate(ts_df.columns.names):
|
|
203
|
+
ts_df.columns = ts_df.columns.set_levels(
|
|
204
|
+
ts_df.columns.levels[level_i].astype(int), level=level_name
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
# convert to long form and return it
|
|
208
|
+
return ts_df.stack(level=self._stations_id_col, future_stack=True).swaplevel()
|
|
209
|
+
|
|
210
|
+
def get_ts_df(
|
|
211
|
+
self,
|
|
212
|
+
variables: VariablesType,
|
|
213
|
+
start: DateTimeType,
|
|
214
|
+
end: DateTimeType,
|
|
215
|
+
*,
|
|
216
|
+
scale: Union[str, None] = None,
|
|
217
|
+
measurement: Union[str, None] = None,
|
|
218
|
+
) -> pd.DataFrame:
|
|
219
|
+
"""Get time series data frame.
|
|
220
|
+
|
|
221
|
+
Parameters
|
|
222
|
+
----------
|
|
223
|
+
variables : str, int or list-like of str or int
|
|
224
|
+
Target variables, which can be either an Agrometeo variable code (integer or
|
|
225
|
+
string) or an essential climate variable (ECV) following the Meteora
|
|
226
|
+
nomenclature (string).
|
|
227
|
+
start, end : datetime-like, str, int, float
|
|
228
|
+
Values representing the start and end of the requested data period
|
|
229
|
+
respectively. Accepts any datetime-like object that can be passed to
|
|
230
|
+
pandas.Timestamp.
|
|
231
|
+
scale : None or {"hour", "day", "month", "year"}, default None
|
|
232
|
+
Temporal scale of the measurements. The default value of None returns the
|
|
233
|
+
finest scale, i.e., 10 minutes.
|
|
234
|
+
measurement : None or {"min", "avg", "max"}, default None
|
|
235
|
+
Whether the measurement values correspond to the minimum, average or maximum
|
|
236
|
+
value for the required temporal scale. Ignored if `scale` is None.
|
|
237
|
+
|
|
238
|
+
Returns
|
|
239
|
+
-------
|
|
240
|
+
ts_df : pandas.DataFrame
|
|
241
|
+
Long form data frame with a time series of meaurements (second-level index)
|
|
242
|
+
at each station (first-level index) for each variable (column).
|
|
243
|
+
"""
|
|
244
|
+
return self._get_ts_df(
|
|
245
|
+
variables, start, end, scale=scale, measurement=measurement
|
|
246
|
+
)
|