meteostat 1.7.6__py3-none-any.whl → 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meteostat/__init__.py +38 -19
- meteostat/api/config.py +158 -0
- meteostat/api/daily.py +76 -0
- meteostat/api/hourly.py +80 -0
- meteostat/api/interpolate.py +378 -0
- meteostat/api/inventory.py +59 -0
- meteostat/api/merge.py +103 -0
- meteostat/api/monthly.py +73 -0
- meteostat/api/normals.py +144 -0
- meteostat/api/point.py +30 -0
- meteostat/api/stations.py +234 -0
- meteostat/api/timeseries.py +334 -0
- meteostat/core/cache.py +212 -59
- meteostat/core/data.py +203 -0
- meteostat/core/logger.py +9 -0
- meteostat/core/network.py +82 -0
- meteostat/core/parameters.py +112 -0
- meteostat/core/providers.py +184 -0
- meteostat/core/schema.py +170 -0
- meteostat/core/validator.py +38 -0
- meteostat/enumerations.py +149 -0
- meteostat/interpolation/idw.py +120 -0
- meteostat/interpolation/lapserate.py +91 -0
- meteostat/interpolation/nearest.py +31 -0
- meteostat/parameters.py +354 -0
- meteostat/providers/dwd/climat.py +166 -0
- meteostat/providers/dwd/daily.py +144 -0
- meteostat/providers/dwd/hourly.py +218 -0
- meteostat/providers/dwd/monthly.py +138 -0
- meteostat/providers/dwd/mosmix.py +351 -0
- meteostat/providers/dwd/poi.py +117 -0
- meteostat/providers/dwd/shared.py +155 -0
- meteostat/providers/eccc/daily.py +87 -0
- meteostat/providers/eccc/hourly.py +104 -0
- meteostat/providers/eccc/monthly.py +66 -0
- meteostat/providers/eccc/shared.py +45 -0
- meteostat/providers/index.py +496 -0
- meteostat/providers/meteostat/daily.py +65 -0
- meteostat/providers/meteostat/daily_derived.py +110 -0
- meteostat/providers/meteostat/hourly.py +66 -0
- meteostat/providers/meteostat/monthly.py +45 -0
- meteostat/providers/meteostat/monthly_derived.py +106 -0
- meteostat/providers/meteostat/shared.py +93 -0
- meteostat/providers/metno/forecast.py +186 -0
- meteostat/providers/noaa/ghcnd.py +228 -0
- meteostat/providers/noaa/isd_lite.py +142 -0
- meteostat/providers/noaa/metar.py +163 -0
- meteostat/typing.py +113 -0
- meteostat/utils/conversions.py +231 -0
- meteostat/utils/data.py +194 -0
- meteostat/utils/geo.py +28 -0
- meteostat/utils/guards.py +51 -0
- meteostat/utils/parsers.py +161 -0
- meteostat/utils/types.py +113 -0
- meteostat/utils/validators.py +31 -0
- meteostat-2.0.1.dist-info/METADATA +130 -0
- meteostat-2.0.1.dist-info/RECORD +64 -0
- {meteostat-1.7.6.dist-info → meteostat-2.0.1.dist-info}/WHEEL +1 -2
- meteostat/core/loader.py +0 -103
- meteostat/core/warn.py +0 -34
- meteostat/enumerations/granularity.py +0 -22
- meteostat/interface/base.py +0 -39
- meteostat/interface/daily.py +0 -118
- meteostat/interface/hourly.py +0 -154
- meteostat/interface/meteodata.py +0 -210
- meteostat/interface/monthly.py +0 -109
- meteostat/interface/normals.py +0 -245
- meteostat/interface/point.py +0 -143
- meteostat/interface/stations.py +0 -252
- meteostat/interface/timeseries.py +0 -237
- meteostat/series/aggregate.py +0 -48
- meteostat/series/convert.py +0 -28
- meteostat/series/count.py +0 -17
- meteostat/series/coverage.py +0 -20
- meteostat/series/fetch.py +0 -28
- meteostat/series/interpolate.py +0 -47
- meteostat/series/normalize.py +0 -76
- meteostat/series/stations.py +0 -22
- meteostat/units.py +0 -149
- meteostat/utilities/__init__.py +0 -0
- meteostat/utilities/aggregations.py +0 -37
- meteostat/utilities/endpoint.py +0 -33
- meteostat/utilities/helpers.py +0 -70
- meteostat/utilities/mutations.py +0 -89
- meteostat/utilities/validations.py +0 -30
- meteostat-1.7.6.dist-info/METADATA +0 -112
- meteostat-1.7.6.dist-info/RECORD +0 -39
- meteostat-1.7.6.dist-info/top_level.txt +0 -1
- /meteostat/{core → api}/__init__.py +0 -0
- /meteostat/{enumerations → interpolation}/__init__.py +0 -0
- /meteostat/{interface → providers}/__init__.py +0 -0
- /meteostat/{interface/interpolate.py → py.typed} +0 -0
- /meteostat/{series → utils}/__init__.py +0 -0
- {meteostat-1.7.6.dist-info → meteostat-2.0.1.dist-info/licenses}/LICENSE +0 -0
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
"""
|
|
2
|
+
DWD national hourly data import routine
|
|
3
|
+
|
|
4
|
+
Get hourly data for weather stations in Germany.
|
|
5
|
+
|
|
6
|
+
The code is licensed under the MIT license.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from ftplib import FTP
|
|
11
|
+
from io import BytesIO
|
|
12
|
+
from typing import Callable, Dict, List, NotRequired, Optional, TypedDict
|
|
13
|
+
from zipfile import ZipFile
|
|
14
|
+
|
|
15
|
+
import pandas as pd
|
|
16
|
+
|
|
17
|
+
from meteostat.enumerations import TTL, Parameter
|
|
18
|
+
from meteostat.core.logger import logger
|
|
19
|
+
from meteostat.typing import ProviderRequest, Station
|
|
20
|
+
from meteostat.core.cache import cache_service
|
|
21
|
+
from meteostat.api.config import config
|
|
22
|
+
from meteostat.utils.conversions import ms_to_kmh
|
|
23
|
+
from meteostat.providers.dwd.shared import get_condicode
|
|
24
|
+
from meteostat.providers.dwd.shared import get_ftp_connection
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class ParameterDefinition(TypedDict):
|
|
28
|
+
dir: str
|
|
29
|
+
usecols: List[int]
|
|
30
|
+
names: Dict[str, str]
|
|
31
|
+
convert: NotRequired[Dict[str, Callable]]
|
|
32
|
+
encoding: NotRequired[str]
|
|
33
|
+
historical_only: NotRequired[bool]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
BASE_DIR = "/climate_environment/CDC/observations_germany/climate/hourly/"
|
|
37
|
+
PARAMETERS: List[ParameterDefinition] = [
|
|
38
|
+
{
|
|
39
|
+
"dir": "precipitation",
|
|
40
|
+
"usecols": [1, 3],
|
|
41
|
+
"names": {"R1": Parameter.PRCP},
|
|
42
|
+
},
|
|
43
|
+
{
|
|
44
|
+
"dir": "air_temperature",
|
|
45
|
+
"usecols": [1, 3, 4],
|
|
46
|
+
"names": {"TT_TU": Parameter.TEMP, "RF_TU": Parameter.RHUM},
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
"dir": "wind",
|
|
50
|
+
"usecols": [1, 3, 4],
|
|
51
|
+
"names": {"F": Parameter.WSPD, "D": Parameter.WDIR},
|
|
52
|
+
"convert": {"wspd": ms_to_kmh},
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
"dir": "pressure",
|
|
56
|
+
"usecols": [1, 3],
|
|
57
|
+
"names": {"P": Parameter.PRES},
|
|
58
|
+
},
|
|
59
|
+
{
|
|
60
|
+
"dir": "sun",
|
|
61
|
+
"usecols": [1, 3],
|
|
62
|
+
"names": {"SD_SO": Parameter.TSUN},
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
"dir": "cloudiness",
|
|
66
|
+
"usecols": [1, 4],
|
|
67
|
+
"names": {"V_N": Parameter.CLDC},
|
|
68
|
+
},
|
|
69
|
+
{
|
|
70
|
+
"dir": "visibility",
|
|
71
|
+
"usecols": [1, 4],
|
|
72
|
+
"names": {"V_VV": Parameter.VSBY},
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
"dir": "weather_phenomena",
|
|
76
|
+
"usecols": [1, 3],
|
|
77
|
+
"names": {"WW": Parameter.COCO},
|
|
78
|
+
"convert": {"coco": get_condicode},
|
|
79
|
+
"encoding": "latin-1",
|
|
80
|
+
},
|
|
81
|
+
# TODO: Implement solar radiation
|
|
82
|
+
# {
|
|
83
|
+
# "dir": "solar",
|
|
84
|
+
# "usecols": [1, 5],
|
|
85
|
+
# "names": {"FG_LBERG": "srad"},
|
|
86
|
+
# "convert": {"srad": jcm2_to_wm2},
|
|
87
|
+
# "historical_only": True,
|
|
88
|
+
# },
|
|
89
|
+
]
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def find_file(ftp: FTP, path: str, needle: str):
|
|
93
|
+
"""
|
|
94
|
+
Find file in directory
|
|
95
|
+
"""
|
|
96
|
+
match = None
|
|
97
|
+
|
|
98
|
+
try:
|
|
99
|
+
ftp.cwd(BASE_DIR + path)
|
|
100
|
+
files = ftp.nlst()
|
|
101
|
+
matching = [f for f in files if needle in f]
|
|
102
|
+
match = matching[0]
|
|
103
|
+
logger.debug(f"Found file '{match}' in '{path}' directory")
|
|
104
|
+
except IndexError:
|
|
105
|
+
logger.info(f"File '{needle}' not found in '{path}' directory")
|
|
106
|
+
|
|
107
|
+
return match
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
@cache_service.cache(TTL.DAY, "pickle")
|
|
111
|
+
def get_df(parameter_dir: str, mode: str, station_id: str) -> Optional[pd.DataFrame]:
|
|
112
|
+
"""
|
|
113
|
+
Get a file from DWD FTP server and convert to Polars DataFrame
|
|
114
|
+
"""
|
|
115
|
+
logger.debug(
|
|
116
|
+
f"Fetching {parameter_dir} data ({mode}) for DWD station '{station_id}'"
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
parameter = next(param for param in PARAMETERS if param["dir"] == parameter_dir)
|
|
120
|
+
|
|
121
|
+
ftp = get_ftp_connection()
|
|
122
|
+
remote_file = find_file(ftp, f"{parameter['dir']}/{mode}", station_id)
|
|
123
|
+
|
|
124
|
+
if remote_file is None:
|
|
125
|
+
return None
|
|
126
|
+
|
|
127
|
+
buffer = BytesIO()
|
|
128
|
+
ftp.retrbinary("RETR " + remote_file, buffer.write)
|
|
129
|
+
|
|
130
|
+
# Unzip file
|
|
131
|
+
with ZipFile(buffer, "r") as zipped:
|
|
132
|
+
filelist = zipped.namelist()
|
|
133
|
+
raw = None
|
|
134
|
+
for file in filelist:
|
|
135
|
+
if file[:7] == "produkt":
|
|
136
|
+
with zipped.open(file, "r") as reader:
|
|
137
|
+
raw = BytesIO(reader.read())
|
|
138
|
+
|
|
139
|
+
# Convert raw data to DataFrame
|
|
140
|
+
df: pd.DataFrame = pd.read_csv( # type: ignore
|
|
141
|
+
raw,
|
|
142
|
+
sep=";",
|
|
143
|
+
skipinitialspace=True,
|
|
144
|
+
date_format="%Y%m%d%H",
|
|
145
|
+
na_values=[-999, "-999"],
|
|
146
|
+
usecols=parameter["usecols"],
|
|
147
|
+
encoding=parameter["encoding"] if "encoding" in parameter else None,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
df["time"] = pd.to_datetime(df.pop("MESS_DATUM"), format="%Y%m%d%H")
|
|
151
|
+
|
|
152
|
+
logger.debug(f"Found {len(df)} rows in {remote_file}")
|
|
153
|
+
|
|
154
|
+
# Rename columns
|
|
155
|
+
df = df.rename(columns=lambda x: x.strip())
|
|
156
|
+
df = df.rename(columns=parameter["names"])
|
|
157
|
+
|
|
158
|
+
# Convert column data
|
|
159
|
+
if "convert" in parameter:
|
|
160
|
+
for col, func in parameter["convert"].items():
|
|
161
|
+
df[col] = df[col].apply(func)
|
|
162
|
+
|
|
163
|
+
# Set index
|
|
164
|
+
df = df.set_index("time")
|
|
165
|
+
|
|
166
|
+
# Round decimals
|
|
167
|
+
df = df.round(1)
|
|
168
|
+
|
|
169
|
+
return df
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def get_parameter(
|
|
173
|
+
parameter_dir: str, modes: list[str], station: Station
|
|
174
|
+
) -> Optional[pd.DataFrame]:
|
|
175
|
+
logger.debug(f"Fetching {parameter_dir} data ({modes}) for station '{station.id}'")
|
|
176
|
+
try:
|
|
177
|
+
data = [
|
|
178
|
+
get_df(parameter_dir, mode, station.identifiers["national"])
|
|
179
|
+
for mode in modes
|
|
180
|
+
]
|
|
181
|
+
if all(d is None for d in data):
|
|
182
|
+
return None
|
|
183
|
+
df = pd.concat(data)
|
|
184
|
+
return df.loc[~df.index.duplicated(keep="first")]
|
|
185
|
+
except Exception as error:
|
|
186
|
+
logger.warning(error, exc_info=True)
|
|
187
|
+
return None
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def fetch(req: ProviderRequest):
|
|
191
|
+
if "national" not in req.station.identifiers:
|
|
192
|
+
return None
|
|
193
|
+
|
|
194
|
+
# Check which modes to consider for data fetching
|
|
195
|
+
#
|
|
196
|
+
# The dataset is divided into a versioned part with completed quality check ("historical"),
|
|
197
|
+
# and a part for which the quality check has not yet been completed ("recent").
|
|
198
|
+
#
|
|
199
|
+
# There is no definite answer as to when the quality check is completed. We're assuming a
|
|
200
|
+
# period of 3 years here. If the end date of the query is within this period, we will also
|
|
201
|
+
# consider the "recent" mode.
|
|
202
|
+
modes = ["historical"]
|
|
203
|
+
if abs((req.end - datetime.now()).days) < 3 * 365:
|
|
204
|
+
modes.append("recent")
|
|
205
|
+
|
|
206
|
+
columns = map(
|
|
207
|
+
lambda args: get_parameter(*args),
|
|
208
|
+
(
|
|
209
|
+
(parameter["dir"], config.dwd_hourly_modes or modes, req.station)
|
|
210
|
+
for parameter in [
|
|
211
|
+
param
|
|
212
|
+
for param in PARAMETERS
|
|
213
|
+
if not set(req.parameters).isdisjoint(param["names"].values())
|
|
214
|
+
]
|
|
215
|
+
),
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
return pd.concat(columns, axis=1)
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
"""
|
|
2
|
+
DWD national daily data import routine
|
|
3
|
+
|
|
4
|
+
Get daily data for weather stations in Germany.
|
|
5
|
+
|
|
6
|
+
The code is licensed under the MIT license.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from ftplib import FTP
|
|
11
|
+
from io import BytesIO
|
|
12
|
+
from typing import Optional
|
|
13
|
+
from zipfile import ZipFile
|
|
14
|
+
|
|
15
|
+
import pandas as pd
|
|
16
|
+
|
|
17
|
+
from meteostat.enumerations import TTL, Parameter
|
|
18
|
+
from meteostat.typing import ProviderRequest
|
|
19
|
+
from meteostat.core.cache import cache_service
|
|
20
|
+
from meteostat.utils.conversions import ms_to_kmh
|
|
21
|
+
from meteostat.providers.dwd.shared import get_ftp_connection
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
BASE_DIR = "/climate_environment/CDC/observations_germany/climate/monthly/kl/"
|
|
25
|
+
USECOLS = [1, 4, 5, 6, 7, 9, 10, 11, 12, 14] # CSV cols which should be read
|
|
26
|
+
PARSE_DATES = {"time": [0]} # Which columns should be parsed as dates?
|
|
27
|
+
NAMES = {
|
|
28
|
+
"MO_N": Parameter.CLDC,
|
|
29
|
+
"MO_TT": Parameter.TEMP,
|
|
30
|
+
"MO_TX": Parameter.TMAX,
|
|
31
|
+
"MO_TN": Parameter.TMIN,
|
|
32
|
+
"MX_TX": Parameter.TXMX,
|
|
33
|
+
"MX_TN": Parameter.TXMN,
|
|
34
|
+
"MX_FX": Parameter.WPGT,
|
|
35
|
+
"MO_SD_S": Parameter.TSUN,
|
|
36
|
+
"MO_RR": Parameter.PRCP,
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def find_file(ftp: FTP, mode: str, needle: str):
|
|
41
|
+
"""
|
|
42
|
+
Find file in directory
|
|
43
|
+
"""
|
|
44
|
+
match = None
|
|
45
|
+
|
|
46
|
+
try:
|
|
47
|
+
ftp.cwd(BASE_DIR + mode)
|
|
48
|
+
files = ftp.nlst()
|
|
49
|
+
matching = [f for f in files if needle in f]
|
|
50
|
+
match = matching[0]
|
|
51
|
+
except BaseException:
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
return match
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@cache_service.cache(TTL.WEEK, "pickle")
|
|
58
|
+
def get_df(station: str, mode: str) -> Optional[pd.DataFrame]:
|
|
59
|
+
"""
|
|
60
|
+
Get a file from DWD FTP server and convert to Polars DataFrame
|
|
61
|
+
"""
|
|
62
|
+
ftp = get_ftp_connection()
|
|
63
|
+
remote_file = find_file(ftp, mode, f"_{station}_")
|
|
64
|
+
|
|
65
|
+
if remote_file is None:
|
|
66
|
+
return None
|
|
67
|
+
|
|
68
|
+
buffer = BytesIO()
|
|
69
|
+
ftp.retrbinary("RETR " + remote_file, buffer.write)
|
|
70
|
+
|
|
71
|
+
ftp.close()
|
|
72
|
+
|
|
73
|
+
# Unzip file
|
|
74
|
+
with ZipFile(buffer, "r") as zipped:
|
|
75
|
+
filelist = zipped.namelist()
|
|
76
|
+
raw = None
|
|
77
|
+
for file in filelist:
|
|
78
|
+
if file[:7] == "produkt":
|
|
79
|
+
with zipped.open(file, "r") as reader:
|
|
80
|
+
raw = BytesIO(reader.read())
|
|
81
|
+
|
|
82
|
+
# Convert raw data to DataFrame
|
|
83
|
+
df: pd.DataFrame = pd.read_csv( # type: ignore
|
|
84
|
+
raw,
|
|
85
|
+
sep=r"\s*;\s*",
|
|
86
|
+
date_format="%Y%m%d",
|
|
87
|
+
na_values=["-999", -999],
|
|
88
|
+
usecols=USECOLS,
|
|
89
|
+
parse_dates=PARSE_DATES,
|
|
90
|
+
engine="python",
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# Rename columns
|
|
94
|
+
df = df.rename(columns=lambda x: x.strip())
|
|
95
|
+
df = df.rename(columns=NAMES)
|
|
96
|
+
|
|
97
|
+
# Convert data
|
|
98
|
+
df["wpgt"] = df["wpgt"].apply(ms_to_kmh)
|
|
99
|
+
df["tsun"] = df["tsun"] * 60
|
|
100
|
+
df["tsun"] = df["tsun"].round()
|
|
101
|
+
df["cldc"] = df["cldc"].round()
|
|
102
|
+
|
|
103
|
+
# Set index
|
|
104
|
+
df = df.set_index("time")
|
|
105
|
+
|
|
106
|
+
# Round decimals
|
|
107
|
+
df = df.round(1)
|
|
108
|
+
|
|
109
|
+
return df
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def fetch(req: ProviderRequest):
|
|
113
|
+
if "national" not in req.station.identifiers:
|
|
114
|
+
return pd.DataFrame()
|
|
115
|
+
|
|
116
|
+
# Check which modes to consider for data fetching
|
|
117
|
+
#
|
|
118
|
+
# The dataset is divided into a versioned part with completed quality check ("historical"),
|
|
119
|
+
# and a part for which the quality check has not yet been completed ("recent").
|
|
120
|
+
#
|
|
121
|
+
# There is no definite answer as to when the quality check is completed. We're assuming a
|
|
122
|
+
# period of 3 years here. If the end date of the query is within this period, we will also
|
|
123
|
+
# consider the "recent" mode.
|
|
124
|
+
modes = ["historical"]
|
|
125
|
+
if abs((req.end - datetime.now()).days) < 3 * 365:
|
|
126
|
+
modes.append("recent")
|
|
127
|
+
|
|
128
|
+
data = [
|
|
129
|
+
get_df(
|
|
130
|
+
req.station.identifiers["national"],
|
|
131
|
+
mode,
|
|
132
|
+
)
|
|
133
|
+
for mode in modes
|
|
134
|
+
]
|
|
135
|
+
|
|
136
|
+
df = pd.concat(data)
|
|
137
|
+
|
|
138
|
+
return df.loc[~df.index.duplicated(keep="first")]
|
|
@@ -0,0 +1,351 @@
|
|
|
1
|
+
"""
|
|
2
|
+
DWD MOSMIX data provider
|
|
3
|
+
|
|
4
|
+
Parameters: https://www.dwd.de/DE/leistungen/met_verfahren_mosmix/mosmix_parameteruebersicht.pdf?__blob=publicationFile&v=3
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import re
|
|
8
|
+
from io import BytesIO
|
|
9
|
+
from typing import Optional
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
from zipfile import ZipFile
|
|
12
|
+
from lxml import etree # type: ignore
|
|
13
|
+
|
|
14
|
+
import pandas as pd
|
|
15
|
+
|
|
16
|
+
from meteostat.core.cache import cache_service
|
|
17
|
+
from meteostat.enumerations import TTL, Parameter
|
|
18
|
+
from meteostat.typing import ProviderRequest
|
|
19
|
+
from meteostat.utils.conversions import (
|
|
20
|
+
kelvin_to_celsius,
|
|
21
|
+
ms_to_kmh,
|
|
22
|
+
percentage_to_okta,
|
|
23
|
+
temp_dwpt_to_rhum,
|
|
24
|
+
)
|
|
25
|
+
from meteostat.core.network import network_service
|
|
26
|
+
|
|
27
|
+
ENDPOINT = "https://opendata.dwd.de/weather/local_forecasts/mos/MOSMIX_L/single_stations/{station}/kml/MOSMIX_L_LATEST_{station}.kmz"
|
|
28
|
+
COCO_MAP = {
|
|
29
|
+
"0": 1,
|
|
30
|
+
"1": 2,
|
|
31
|
+
"2": 3,
|
|
32
|
+
"3": 4,
|
|
33
|
+
"45": 5,
|
|
34
|
+
"49": 5,
|
|
35
|
+
"61": 7,
|
|
36
|
+
"63": 8,
|
|
37
|
+
"65": 9,
|
|
38
|
+
"51": 7,
|
|
39
|
+
"53": 8,
|
|
40
|
+
"55": 9,
|
|
41
|
+
"68": 12,
|
|
42
|
+
"69": 13,
|
|
43
|
+
"71": 14,
|
|
44
|
+
"73": 15,
|
|
45
|
+
"75": 16,
|
|
46
|
+
"80": 17,
|
|
47
|
+
"81": 18,
|
|
48
|
+
"82": 18,
|
|
49
|
+
"83": 19,
|
|
50
|
+
"84": 20,
|
|
51
|
+
"85": 21,
|
|
52
|
+
"86": 22,
|
|
53
|
+
"66": 10,
|
|
54
|
+
"67": 11,
|
|
55
|
+
"56": 10,
|
|
56
|
+
"57": 11,
|
|
57
|
+
"95": 25,
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def get_coco(code: str | int) -> Optional[int]:
|
|
62
|
+
"""
|
|
63
|
+
Map DWD MOSMIX weather condition codes to Meteostat condicodes
|
|
64
|
+
"""
|
|
65
|
+
return COCO_MAP.get(str(code))
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@cache_service.cache(TTL.HOUR, "pickle")
|
|
69
|
+
def get_df(station: str) -> Optional[pd.DataFrame]:
|
|
70
|
+
# Fetch the KMZ file data in memory
|
|
71
|
+
response = network_service.get(ENDPOINT.format(station=station))
|
|
72
|
+
kmz_data = BytesIO(response.content)
|
|
73
|
+
|
|
74
|
+
# KMZ -> KML in memory
|
|
75
|
+
with ZipFile(kmz_data, "r") as kmz:
|
|
76
|
+
with kmz.open(kmz.infolist()[0].filename, "r") as raw:
|
|
77
|
+
kml = raw.read()
|
|
78
|
+
|
|
79
|
+
# Parse KML
|
|
80
|
+
tree = etree.fromstring(kml)
|
|
81
|
+
|
|
82
|
+
# Skip stale forecasts
|
|
83
|
+
issue_time = datetime.strptime(
|
|
84
|
+
tree.xpath(
|
|
85
|
+
"//kml:kml/kml:Document/kml:ExtendedData/"
|
|
86
|
+
+ "dwd:ProductDefinition/dwd:IssueTime",
|
|
87
|
+
namespaces=tree.nsmap,
|
|
88
|
+
)[0].text,
|
|
89
|
+
"%Y-%m-%dT%H:%M:%S.%fZ",
|
|
90
|
+
)
|
|
91
|
+
if (datetime.now() - issue_time).total_seconds() > 25200:
|
|
92
|
+
return None
|
|
93
|
+
|
|
94
|
+
# Collect all time steps
|
|
95
|
+
timesteps = []
|
|
96
|
+
for step in tree.xpath(
|
|
97
|
+
"//kml:kml/kml:Document/kml:ExtendedData/dwd:ProductDefinition/"
|
|
98
|
+
+ "dwd:ForecastTimeSteps/dwd:TimeStep",
|
|
99
|
+
namespaces=tree.nsmap,
|
|
100
|
+
):
|
|
101
|
+
timesteps.append(step.text)
|
|
102
|
+
|
|
103
|
+
# COLLECT WEATHER DATA
|
|
104
|
+
# Each parameter is processed individually
|
|
105
|
+
data = {
|
|
106
|
+
"time": timesteps,
|
|
107
|
+
Parameter.TEMP: [],
|
|
108
|
+
Parameter.DWPT: [],
|
|
109
|
+
Parameter.PRCP: [],
|
|
110
|
+
Parameter.WDIR: [],
|
|
111
|
+
Parameter.WSPD: [],
|
|
112
|
+
Parameter.WPGT: [],
|
|
113
|
+
Parameter.TSUN: [],
|
|
114
|
+
Parameter.PRES: [],
|
|
115
|
+
Parameter.CLDC: [],
|
|
116
|
+
Parameter.VSBY: [],
|
|
117
|
+
Parameter.COCO: [],
|
|
118
|
+
}
|
|
119
|
+
placemark = tree.xpath(
|
|
120
|
+
"//kml:kml/kml:Document/kml:Placemark", namespaces=tree.nsmap
|
|
121
|
+
)[0]
|
|
122
|
+
|
|
123
|
+
# Pressure
|
|
124
|
+
for value in (
|
|
125
|
+
re.sub(
|
|
126
|
+
r"/\s+/",
|
|
127
|
+
" ",
|
|
128
|
+
placemark.xpath(
|
|
129
|
+
'kml:ExtendedData/dwd:Forecast[@dwd:elementName="PPPP"]/dwd:value',
|
|
130
|
+
namespaces=tree.nsmap,
|
|
131
|
+
)[0].text,
|
|
132
|
+
)
|
|
133
|
+
.strip()
|
|
134
|
+
.split()
|
|
135
|
+
):
|
|
136
|
+
data[Parameter.PRES].append(
|
|
137
|
+
float(value) / 100
|
|
138
|
+
if value.lstrip("-").replace(".", "", 1).isdigit()
|
|
139
|
+
else None
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
# Air temperature
|
|
143
|
+
for value in (
|
|
144
|
+
re.sub(
|
|
145
|
+
r"/\s+/",
|
|
146
|
+
" ",
|
|
147
|
+
placemark.xpath(
|
|
148
|
+
'kml:ExtendedData/dwd:Forecast[@dwd:elementName="TTT"]/dwd:value',
|
|
149
|
+
namespaces=tree.nsmap,
|
|
150
|
+
)[0].text,
|
|
151
|
+
)
|
|
152
|
+
.strip()
|
|
153
|
+
.split()
|
|
154
|
+
):
|
|
155
|
+
data[Parameter.TEMP].append(
|
|
156
|
+
kelvin_to_celsius(float(value))
|
|
157
|
+
if value.lstrip("-").replace(".", "", 1).isdigit()
|
|
158
|
+
else None
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
# Dew point
|
|
162
|
+
for value in (
|
|
163
|
+
re.sub(
|
|
164
|
+
r"/\s+/",
|
|
165
|
+
" ",
|
|
166
|
+
placemark.xpath(
|
|
167
|
+
'kml:ExtendedData/dwd:Forecast[@dwd:elementName="Td"]/dwd:value',
|
|
168
|
+
namespaces=tree.nsmap,
|
|
169
|
+
)[0].text,
|
|
170
|
+
)
|
|
171
|
+
.strip()
|
|
172
|
+
.split()
|
|
173
|
+
):
|
|
174
|
+
data[Parameter.DWPT].append(
|
|
175
|
+
kelvin_to_celsius(float(value))
|
|
176
|
+
if value.lstrip("-").replace(".", "", 1).isdigit()
|
|
177
|
+
else None
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
# Wind direction
|
|
181
|
+
for value in (
|
|
182
|
+
re.sub(
|
|
183
|
+
r"/\s+/",
|
|
184
|
+
" ",
|
|
185
|
+
placemark.xpath(
|
|
186
|
+
'kml:ExtendedData/dwd:Forecast[@dwd:elementName="DD"]/dwd:value',
|
|
187
|
+
namespaces=tree.nsmap,
|
|
188
|
+
)[0].text,
|
|
189
|
+
)
|
|
190
|
+
.strip()
|
|
191
|
+
.split()
|
|
192
|
+
):
|
|
193
|
+
data[Parameter.WDIR].append(
|
|
194
|
+
int(float(value))
|
|
195
|
+
if value.lstrip("-").replace(".", "", 1).isdigit()
|
|
196
|
+
else None
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
# Wind speed
|
|
200
|
+
for value in (
|
|
201
|
+
re.sub(
|
|
202
|
+
r"/\s+/",
|
|
203
|
+
" ",
|
|
204
|
+
placemark.xpath(
|
|
205
|
+
'kml:ExtendedData/dwd:Forecast[@dwd:elementName="FF"]/dwd:value',
|
|
206
|
+
namespaces=tree.nsmap,
|
|
207
|
+
)[0].text,
|
|
208
|
+
)
|
|
209
|
+
.strip()
|
|
210
|
+
.split()
|
|
211
|
+
):
|
|
212
|
+
data[Parameter.WSPD].append(
|
|
213
|
+
ms_to_kmh(float(value))
|
|
214
|
+
if value.lstrip("-").replace(".", "", 1).isdigit()
|
|
215
|
+
else None
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
# Peak wind gust
|
|
219
|
+
for value in (
|
|
220
|
+
re.sub(
|
|
221
|
+
r"/\s+/",
|
|
222
|
+
" ",
|
|
223
|
+
placemark.xpath(
|
|
224
|
+
'kml:ExtendedData/dwd:Forecast[@dwd:elementName="FX1"]/dwd:value',
|
|
225
|
+
namespaces=tree.nsmap,
|
|
226
|
+
)[0].text,
|
|
227
|
+
)
|
|
228
|
+
.strip()
|
|
229
|
+
.split()
|
|
230
|
+
):
|
|
231
|
+
data[Parameter.WPGT].append(
|
|
232
|
+
ms_to_kmh(float(value))
|
|
233
|
+
if value.lstrip("-").replace(".", "", 1).isdigit()
|
|
234
|
+
else None
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
# Weather condition
|
|
238
|
+
for value in (
|
|
239
|
+
re.sub(
|
|
240
|
+
r"/\s+/",
|
|
241
|
+
" ",
|
|
242
|
+
placemark.xpath(
|
|
243
|
+
'kml:ExtendedData/dwd:Forecast[@dwd:elementName="ww"]/dwd:value',
|
|
244
|
+
namespaces=tree.nsmap,
|
|
245
|
+
)[0].text,
|
|
246
|
+
)
|
|
247
|
+
.strip()
|
|
248
|
+
.split()
|
|
249
|
+
):
|
|
250
|
+
data[Parameter.COCO].append(
|
|
251
|
+
get_coco(int(float(value)))
|
|
252
|
+
if value.lstrip("-").replace(".", "", 1).isdigit()
|
|
253
|
+
else None
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
# Precipitation
|
|
257
|
+
for value in (
|
|
258
|
+
re.sub(
|
|
259
|
+
r"/\s+/",
|
|
260
|
+
" ",
|
|
261
|
+
placemark.xpath(
|
|
262
|
+
'kml:ExtendedData/dwd:Forecast[@dwd:elementName="RR1c"]/dwd:value',
|
|
263
|
+
namespaces=tree.nsmap,
|
|
264
|
+
)[0].text,
|
|
265
|
+
)
|
|
266
|
+
.strip()
|
|
267
|
+
.split()
|
|
268
|
+
):
|
|
269
|
+
data[Parameter.PRCP].append(
|
|
270
|
+
float(value) if value.lstrip("-").replace(".", "", 1).isdigit() else None
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
# Sunshine Duration
|
|
274
|
+
for value in (
|
|
275
|
+
re.sub(
|
|
276
|
+
r"/\s+/",
|
|
277
|
+
" ",
|
|
278
|
+
placemark.xpath(
|
|
279
|
+
'kml:ExtendedData/dwd:Forecast[@dwd:elementName="SunD1"]/dwd:value',
|
|
280
|
+
namespaces=tree.nsmap,
|
|
281
|
+
)[0].text,
|
|
282
|
+
)
|
|
283
|
+
.strip()
|
|
284
|
+
.split()
|
|
285
|
+
):
|
|
286
|
+
data[Parameter.TSUN].append(
|
|
287
|
+
float(value) / 60
|
|
288
|
+
if value.lstrip("-").replace(".", "", 1).isdigit()
|
|
289
|
+
else None
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
# Cloud Cover
|
|
293
|
+
for value in (
|
|
294
|
+
re.sub(
|
|
295
|
+
r"/\s+/",
|
|
296
|
+
" ",
|
|
297
|
+
placemark.xpath(
|
|
298
|
+
'kml:ExtendedData/dwd:Forecast[@dwd:elementName="N"]/dwd:value',
|
|
299
|
+
namespaces=tree.nsmap,
|
|
300
|
+
)[0].text,
|
|
301
|
+
)
|
|
302
|
+
.strip()
|
|
303
|
+
.split()
|
|
304
|
+
):
|
|
305
|
+
data[Parameter.CLDC].append(
|
|
306
|
+
percentage_to_okta(float(value))
|
|
307
|
+
if value.lstrip("-").replace(".", "", 1).isdigit()
|
|
308
|
+
else None
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
# Visibility
|
|
312
|
+
for value in (
|
|
313
|
+
re.sub(
|
|
314
|
+
r"/\s+/",
|
|
315
|
+
" ",
|
|
316
|
+
placemark.xpath(
|
|
317
|
+
'kml:ExtendedData/dwd:Forecast[@dwd:elementName="VV"]/dwd:value',
|
|
318
|
+
namespaces=tree.nsmap,
|
|
319
|
+
)[0].text,
|
|
320
|
+
)
|
|
321
|
+
.strip()
|
|
322
|
+
.split()
|
|
323
|
+
):
|
|
324
|
+
data[Parameter.VSBY].append(
|
|
325
|
+
float(value) if value.lstrip("-").replace(".", "", 1).isdigit() else None
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
# Convert data dict to DataFrame
|
|
329
|
+
df = pd.DataFrame.from_dict(data)
|
|
330
|
+
|
|
331
|
+
# Convert time strings to datetime
|
|
332
|
+
df["time"] = pd.to_datetime(df["time"])
|
|
333
|
+
|
|
334
|
+
# Calculate humidity data
|
|
335
|
+
df[Parameter.RHUM] = df.apply(temp_dwpt_to_rhum, axis=1)
|
|
336
|
+
|
|
337
|
+
# Set index
|
|
338
|
+
df = df.set_index(["time"])
|
|
339
|
+
|
|
340
|
+
# Round decimals
|
|
341
|
+
df = df.round(1)
|
|
342
|
+
|
|
343
|
+
# Remove tz awareness
|
|
344
|
+
df = df.tz_convert(None, level="time")
|
|
345
|
+
|
|
346
|
+
return df
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
def fetch(req: ProviderRequest) -> Optional[pd.DataFrame]:
|
|
350
|
+
if "mosmix" in req.station.identifiers:
|
|
351
|
+
return get_df(req.station.identifiers["mosmix"])
|