meteostat 1.7.6__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meteostat/__init__.py +32 -19
- meteostat/api/daily.py +76 -0
- meteostat/api/hourly.py +80 -0
- meteostat/api/interpolate.py +240 -0
- meteostat/api/inventory.py +59 -0
- meteostat/api/merge.py +103 -0
- meteostat/api/monthly.py +73 -0
- meteostat/api/normals.py +144 -0
- meteostat/api/point.py +30 -0
- meteostat/api/stations.py +234 -0
- meteostat/api/timeseries.py +334 -0
- meteostat/core/cache.py +212 -59
- meteostat/core/config.py +158 -0
- meteostat/core/data.py +199 -0
- meteostat/core/logger.py +9 -0
- meteostat/core/network.py +82 -0
- meteostat/core/parameters.py +112 -0
- meteostat/core/providers.py +184 -0
- meteostat/core/schema.py +170 -0
- meteostat/core/validator.py +38 -0
- meteostat/enumerations.py +149 -0
- meteostat/interpolation/idw.py +120 -0
- meteostat/interpolation/lapserate.py +91 -0
- meteostat/interpolation/nearest.py +31 -0
- meteostat/parameters.py +354 -0
- meteostat/providers/dwd/climat.py +166 -0
- meteostat/providers/dwd/daily.py +144 -0
- meteostat/providers/dwd/hourly.py +218 -0
- meteostat/providers/dwd/monthly.py +138 -0
- meteostat/providers/dwd/mosmix.py +351 -0
- meteostat/providers/dwd/poi.py +117 -0
- meteostat/providers/dwd/shared.py +155 -0
- meteostat/providers/eccc/daily.py +87 -0
- meteostat/providers/eccc/hourly.py +104 -0
- meteostat/providers/eccc/monthly.py +66 -0
- meteostat/providers/eccc/shared.py +45 -0
- meteostat/providers/index.py +496 -0
- meteostat/providers/meteostat/daily.py +65 -0
- meteostat/providers/meteostat/daily_derived.py +110 -0
- meteostat/providers/meteostat/hourly.py +66 -0
- meteostat/providers/meteostat/monthly.py +45 -0
- meteostat/providers/meteostat/monthly_derived.py +106 -0
- meteostat/providers/meteostat/shared.py +93 -0
- meteostat/providers/metno/forecast.py +186 -0
- meteostat/providers/noaa/ghcnd.py +228 -0
- meteostat/providers/noaa/isd_lite.py +142 -0
- meteostat/providers/noaa/metar.py +163 -0
- meteostat/typing.py +113 -0
- meteostat/utils/conversions.py +231 -0
- meteostat/utils/data.py +194 -0
- meteostat/utils/geo.py +28 -0
- meteostat/utils/parsers.py +168 -0
- meteostat/utils/types.py +113 -0
- meteostat/utils/validators.py +31 -0
- meteostat-2.0.0.dist-info/METADATA +134 -0
- meteostat-2.0.0.dist-info/RECORD +63 -0
- {meteostat-1.7.6.dist-info → meteostat-2.0.0.dist-info}/WHEEL +1 -2
- meteostat/core/loader.py +0 -103
- meteostat/core/warn.py +0 -34
- meteostat/enumerations/granularity.py +0 -22
- meteostat/interface/base.py +0 -39
- meteostat/interface/daily.py +0 -118
- meteostat/interface/hourly.py +0 -154
- meteostat/interface/meteodata.py +0 -210
- meteostat/interface/monthly.py +0 -109
- meteostat/interface/normals.py +0 -245
- meteostat/interface/point.py +0 -143
- meteostat/interface/stations.py +0 -252
- meteostat/interface/timeseries.py +0 -237
- meteostat/series/aggregate.py +0 -48
- meteostat/series/convert.py +0 -28
- meteostat/series/count.py +0 -17
- meteostat/series/coverage.py +0 -20
- meteostat/series/fetch.py +0 -28
- meteostat/series/interpolate.py +0 -47
- meteostat/series/normalize.py +0 -76
- meteostat/series/stations.py +0 -22
- meteostat/units.py +0 -149
- meteostat/utilities/__init__.py +0 -0
- meteostat/utilities/aggregations.py +0 -37
- meteostat/utilities/endpoint.py +0 -33
- meteostat/utilities/helpers.py +0 -70
- meteostat/utilities/mutations.py +0 -89
- meteostat/utilities/validations.py +0 -30
- meteostat-1.7.6.dist-info/METADATA +0 -112
- meteostat-1.7.6.dist-info/RECORD +0 -39
- meteostat-1.7.6.dist-info/top_level.txt +0 -1
- /meteostat/{core → api}/__init__.py +0 -0
- /meteostat/{enumerations → interpolation}/__init__.py +0 -0
- /meteostat/{interface → providers}/__init__.py +0 -0
- /meteostat/{interface/interpolate.py → py.typed} +0 -0
- /meteostat/{series → utils}/__init__.py +0 -0
- {meteostat-1.7.6.dist-info → meteostat-2.0.0.dist-info/licenses}/LICENSE +0 -0
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
"""
|
|
2
|
+
DWD national daily data import routine
|
|
3
|
+
|
|
4
|
+
Get daily data for weather stations in Germany.
|
|
5
|
+
|
|
6
|
+
The code is licensed under the MIT license.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from ftplib import FTP
|
|
11
|
+
from io import BytesIO
|
|
12
|
+
from typing import Optional
|
|
13
|
+
from zipfile import ZipFile
|
|
14
|
+
|
|
15
|
+
import pandas as pd
|
|
16
|
+
|
|
17
|
+
from meteostat.core.config import config
|
|
18
|
+
from meteostat.enumerations import TTL, Parameter
|
|
19
|
+
from meteostat.typing import ProviderRequest
|
|
20
|
+
from meteostat.core.cache import cache_service
|
|
21
|
+
from meteostat.utils.conversions import ms_to_kmh, pres_to_msl
|
|
22
|
+
from meteostat.providers.dwd.shared import get_ftp_connection
|
|
23
|
+
|
|
24
|
+
BASE_DIR = "/climate_environment/CDC/observations_germany/climate/daily/kl/"
|
|
25
|
+
USECOLS = [1, 3, 4, 6, 8, 9, 10, 12, 13, 14, 15, 16] # CSV cols which should be read
|
|
26
|
+
NAMES = {
|
|
27
|
+
"FX": Parameter.WPGT,
|
|
28
|
+
"FM": Parameter.WSPD,
|
|
29
|
+
"RSK": Parameter.PRCP,
|
|
30
|
+
"SDK": Parameter.TSUN,
|
|
31
|
+
"SHK_TAG": Parameter.SNWD,
|
|
32
|
+
"NM": Parameter.CLDC,
|
|
33
|
+
"PM": Parameter.PRES,
|
|
34
|
+
"TMK": Parameter.TEMP,
|
|
35
|
+
"UPM": Parameter.RHUM,
|
|
36
|
+
"TXK": Parameter.TMAX,
|
|
37
|
+
"TNK": Parameter.TMIN,
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def find_file(ftp: FTP, mode: str, needle: str):
|
|
42
|
+
"""
|
|
43
|
+
Find file in directory
|
|
44
|
+
"""
|
|
45
|
+
match = None
|
|
46
|
+
|
|
47
|
+
try:
|
|
48
|
+
ftp.cwd(BASE_DIR + mode)
|
|
49
|
+
files = ftp.nlst()
|
|
50
|
+
matching = [f for f in files if needle in f]
|
|
51
|
+
match = matching[0]
|
|
52
|
+
except BaseException:
|
|
53
|
+
pass
|
|
54
|
+
|
|
55
|
+
return match
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@cache_service.cache(TTL.DAY, "pickle")
|
|
59
|
+
def get_df(station: str, elevation: int, mode: str) -> Optional[pd.DataFrame]:
|
|
60
|
+
"""
|
|
61
|
+
Get a file from DWD FTP server and convert to DataFrame
|
|
62
|
+
"""
|
|
63
|
+
ftp = get_ftp_connection()
|
|
64
|
+
remote_file = find_file(ftp, mode, f"_{station}_")
|
|
65
|
+
|
|
66
|
+
if remote_file is None:
|
|
67
|
+
return None
|
|
68
|
+
|
|
69
|
+
buffer = BytesIO()
|
|
70
|
+
ftp.retrbinary("RETR " + remote_file, buffer.write)
|
|
71
|
+
|
|
72
|
+
ftp.close()
|
|
73
|
+
|
|
74
|
+
# Unzip file
|
|
75
|
+
with ZipFile(buffer, "r") as zipped:
|
|
76
|
+
filelist = zipped.namelist()
|
|
77
|
+
raw = None
|
|
78
|
+
for file in filelist:
|
|
79
|
+
if file[:7] == "produkt":
|
|
80
|
+
with zipped.open(file, "r") as reader:
|
|
81
|
+
raw = BytesIO(reader.read())
|
|
82
|
+
|
|
83
|
+
# Convert raw data to DataFrame
|
|
84
|
+
df: pd.DataFrame = pd.read_csv( # type: ignore
|
|
85
|
+
raw,
|
|
86
|
+
sep=r"\s*;\s*",
|
|
87
|
+
date_format="%Y%m%d",
|
|
88
|
+
na_values=["-999", -999],
|
|
89
|
+
usecols=USECOLS,
|
|
90
|
+
engine="python",
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# Rename columns
|
|
94
|
+
df = df.rename(columns=lambda x: x.strip())
|
|
95
|
+
df = df.rename(columns=NAMES)
|
|
96
|
+
|
|
97
|
+
# Parse date column (first column contains the date)
|
|
98
|
+
df["time"] = pd.to_datetime(df.iloc[:, 0], format="%Y%m%d")
|
|
99
|
+
df = df.drop(df.columns[0], axis=1)
|
|
100
|
+
|
|
101
|
+
# Convert data
|
|
102
|
+
df[Parameter.SNWD] = df[Parameter.SNWD] * 10
|
|
103
|
+
df[Parameter.WPGT] = df[Parameter.WPGT].apply(ms_to_kmh)
|
|
104
|
+
df[Parameter.WSPD] = df[Parameter.WSPD].apply(ms_to_kmh)
|
|
105
|
+
df[Parameter.TSUN] = df[Parameter.TSUN] * 60
|
|
106
|
+
df[Parameter.PRES] = df.apply(lambda row: pres_to_msl(row, elevation), axis=1)
|
|
107
|
+
|
|
108
|
+
# Set index
|
|
109
|
+
df = df.set_index("time")
|
|
110
|
+
|
|
111
|
+
# Round decimals
|
|
112
|
+
df = df.round(1)
|
|
113
|
+
|
|
114
|
+
return df
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def fetch(req: ProviderRequest):
|
|
118
|
+
if "national" not in req.station.identifiers:
|
|
119
|
+
return pd.DataFrame()
|
|
120
|
+
|
|
121
|
+
# Check which modes to consider for data fetching
|
|
122
|
+
#
|
|
123
|
+
# The dataset is divided into a versioned part with completed quality check ("historical"),
|
|
124
|
+
# and a part for which the quality check has not yet been completed ("recent").
|
|
125
|
+
#
|
|
126
|
+
# There is no definite answer as to when the quality check is completed. We're assuming a
|
|
127
|
+
# period of 3 years here. If the end date of the query is within this period, we will also
|
|
128
|
+
# consider the "recent" mode.
|
|
129
|
+
modes = ["historical"]
|
|
130
|
+
if abs((req.end - datetime.now()).days) < 3 * 365:
|
|
131
|
+
modes.append("recent")
|
|
132
|
+
|
|
133
|
+
data = [
|
|
134
|
+
get_df(
|
|
135
|
+
req.station.identifiers["national"],
|
|
136
|
+
req.station.elevation,
|
|
137
|
+
mode,
|
|
138
|
+
)
|
|
139
|
+
for mode in config.dwd_daily_modes or modes
|
|
140
|
+
]
|
|
141
|
+
|
|
142
|
+
df = pd.concat(data)
|
|
143
|
+
|
|
144
|
+
return df.loc[~df.index.duplicated(keep="first")]
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
"""
|
|
2
|
+
DWD national hourly data import routine
|
|
3
|
+
|
|
4
|
+
Get hourly data for weather stations in Germany.
|
|
5
|
+
|
|
6
|
+
The code is licensed under the MIT license.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from ftplib import FTP
|
|
11
|
+
from io import BytesIO
|
|
12
|
+
from typing import Callable, Dict, List, NotRequired, Optional, TypedDict
|
|
13
|
+
from zipfile import ZipFile
|
|
14
|
+
|
|
15
|
+
import pandas as pd
|
|
16
|
+
|
|
17
|
+
from meteostat.enumerations import TTL, Parameter
|
|
18
|
+
from meteostat.core.logger import logger
|
|
19
|
+
from meteostat.typing import ProviderRequest, Station
|
|
20
|
+
from meteostat.core.cache import cache_service
|
|
21
|
+
from meteostat.core.config import config
|
|
22
|
+
from meteostat.utils.conversions import ms_to_kmh
|
|
23
|
+
from meteostat.providers.dwd.shared import get_condicode
|
|
24
|
+
from meteostat.providers.dwd.shared import get_ftp_connection
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class ParameterDefinition(TypedDict):
|
|
28
|
+
dir: str
|
|
29
|
+
usecols: List[int]
|
|
30
|
+
names: Dict[str, str]
|
|
31
|
+
convert: NotRequired[Dict[str, Callable]]
|
|
32
|
+
encoding: NotRequired[str]
|
|
33
|
+
historical_only: NotRequired[bool]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
BASE_DIR = "/climate_environment/CDC/observations_germany/climate/hourly/"
|
|
37
|
+
PARAMETERS: List[ParameterDefinition] = [
|
|
38
|
+
{
|
|
39
|
+
"dir": "precipitation",
|
|
40
|
+
"usecols": [1, 3],
|
|
41
|
+
"names": {"R1": Parameter.PRCP},
|
|
42
|
+
},
|
|
43
|
+
{
|
|
44
|
+
"dir": "air_temperature",
|
|
45
|
+
"usecols": [1, 3, 4],
|
|
46
|
+
"names": {"TT_TU": Parameter.TEMP, "RF_TU": Parameter.RHUM},
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
"dir": "wind",
|
|
50
|
+
"usecols": [1, 3, 4],
|
|
51
|
+
"names": {"F": Parameter.WSPD, "D": Parameter.WDIR},
|
|
52
|
+
"convert": {"wspd": ms_to_kmh},
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
"dir": "pressure",
|
|
56
|
+
"usecols": [1, 3],
|
|
57
|
+
"names": {"P": Parameter.PRES},
|
|
58
|
+
},
|
|
59
|
+
{
|
|
60
|
+
"dir": "sun",
|
|
61
|
+
"usecols": [1, 3],
|
|
62
|
+
"names": {"SD_SO": Parameter.TSUN},
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
"dir": "cloudiness",
|
|
66
|
+
"usecols": [1, 4],
|
|
67
|
+
"names": {"V_N": Parameter.CLDC},
|
|
68
|
+
},
|
|
69
|
+
{
|
|
70
|
+
"dir": "visibility",
|
|
71
|
+
"usecols": [1, 4],
|
|
72
|
+
"names": {"V_VV": Parameter.VSBY},
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
"dir": "weather_phenomena",
|
|
76
|
+
"usecols": [1, 3],
|
|
77
|
+
"names": {"WW": Parameter.COCO},
|
|
78
|
+
"convert": {"coco": get_condicode},
|
|
79
|
+
"encoding": "latin-1",
|
|
80
|
+
},
|
|
81
|
+
# TODO: Implement solar radiation
|
|
82
|
+
# {
|
|
83
|
+
# "dir": "solar",
|
|
84
|
+
# "usecols": [1, 5],
|
|
85
|
+
# "names": {"FG_LBERG": "srad"},
|
|
86
|
+
# "convert": {"srad": jcm2_to_wm2},
|
|
87
|
+
# "historical_only": True,
|
|
88
|
+
# },
|
|
89
|
+
]
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def find_file(ftp: FTP, path: str, needle: str):
|
|
93
|
+
"""
|
|
94
|
+
Find file in directory
|
|
95
|
+
"""
|
|
96
|
+
match = None
|
|
97
|
+
|
|
98
|
+
try:
|
|
99
|
+
ftp.cwd(BASE_DIR + path)
|
|
100
|
+
files = ftp.nlst()
|
|
101
|
+
matching = [f for f in files if needle in f]
|
|
102
|
+
match = matching[0]
|
|
103
|
+
logger.debug(f"Found file '{match}' in '{path}' directory")
|
|
104
|
+
except IndexError:
|
|
105
|
+
logger.info(f"File '{needle}' not found in '{path}' directory")
|
|
106
|
+
|
|
107
|
+
return match
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
@cache_service.cache(TTL.DAY, "pickle")
|
|
111
|
+
def get_df(parameter_dir: str, mode: str, station_id: str) -> Optional[pd.DataFrame]:
|
|
112
|
+
"""
|
|
113
|
+
Get a file from DWD FTP server and convert to Polars DataFrame
|
|
114
|
+
"""
|
|
115
|
+
logger.debug(
|
|
116
|
+
f"Fetching {parameter_dir} data ({mode}) for DWD station '{station_id}'"
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
parameter = next(param for param in PARAMETERS if param["dir"] == parameter_dir)
|
|
120
|
+
|
|
121
|
+
ftp = get_ftp_connection()
|
|
122
|
+
remote_file = find_file(ftp, f"{parameter['dir']}/{mode}", station_id)
|
|
123
|
+
|
|
124
|
+
if remote_file is None:
|
|
125
|
+
return None
|
|
126
|
+
|
|
127
|
+
buffer = BytesIO()
|
|
128
|
+
ftp.retrbinary("RETR " + remote_file, buffer.write)
|
|
129
|
+
|
|
130
|
+
# Unzip file
|
|
131
|
+
with ZipFile(buffer, "r") as zipped:
|
|
132
|
+
filelist = zipped.namelist()
|
|
133
|
+
raw = None
|
|
134
|
+
for file in filelist:
|
|
135
|
+
if file[:7] == "produkt":
|
|
136
|
+
with zipped.open(file, "r") as reader:
|
|
137
|
+
raw = BytesIO(reader.read())
|
|
138
|
+
|
|
139
|
+
# Convert raw data to DataFrame
|
|
140
|
+
df: pd.DataFrame = pd.read_csv( # type: ignore
|
|
141
|
+
raw,
|
|
142
|
+
sep=";",
|
|
143
|
+
skipinitialspace=True,
|
|
144
|
+
date_format="%Y%m%d%H",
|
|
145
|
+
na_values=[-999, "-999"],
|
|
146
|
+
usecols=parameter["usecols"],
|
|
147
|
+
encoding=parameter["encoding"] if "encoding" in parameter else None,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
df["time"] = pd.to_datetime(df.pop("MESS_DATUM"), format="%Y%m%d%H")
|
|
151
|
+
|
|
152
|
+
logger.debug(f"Found {len(df)} rows in {remote_file}")
|
|
153
|
+
|
|
154
|
+
# Rename columns
|
|
155
|
+
df = df.rename(columns=lambda x: x.strip())
|
|
156
|
+
df = df.rename(columns=parameter["names"])
|
|
157
|
+
|
|
158
|
+
# Convert column data
|
|
159
|
+
if "convert" in parameter:
|
|
160
|
+
for col, func in parameter["convert"].items():
|
|
161
|
+
df[col] = df[col].apply(func)
|
|
162
|
+
|
|
163
|
+
# Set index
|
|
164
|
+
df = df.set_index("time")
|
|
165
|
+
|
|
166
|
+
# Round decimals
|
|
167
|
+
df = df.round(1)
|
|
168
|
+
|
|
169
|
+
return df
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def get_parameter(
|
|
173
|
+
parameter_dir: str, modes: list[str], station: Station
|
|
174
|
+
) -> Optional[pd.DataFrame]:
|
|
175
|
+
logger.debug(f"Fetching {parameter_dir} data ({modes}) for station '{station.id}'")
|
|
176
|
+
try:
|
|
177
|
+
data = [
|
|
178
|
+
get_df(parameter_dir, mode, station.identifiers["national"])
|
|
179
|
+
for mode in modes
|
|
180
|
+
]
|
|
181
|
+
if all(d is None for d in data):
|
|
182
|
+
return None
|
|
183
|
+
df = pd.concat(data)
|
|
184
|
+
return df.loc[~df.index.duplicated(keep="first")]
|
|
185
|
+
except Exception as error:
|
|
186
|
+
logger.warning(error, exc_info=True)
|
|
187
|
+
return None
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def fetch(req: ProviderRequest):
|
|
191
|
+
if "national" not in req.station.identifiers:
|
|
192
|
+
return None
|
|
193
|
+
|
|
194
|
+
# Check which modes to consider for data fetching
|
|
195
|
+
#
|
|
196
|
+
# The dataset is divided into a versioned part with completed quality check ("historical"),
|
|
197
|
+
# and a part for which the quality check has not yet been completed ("recent").
|
|
198
|
+
#
|
|
199
|
+
# There is no definite answer as to when the quality check is completed. We're assuming a
|
|
200
|
+
# period of 3 years here. If the end date of the query is within this period, we will also
|
|
201
|
+
# consider the "recent" mode.
|
|
202
|
+
modes = ["historical"]
|
|
203
|
+
if abs((req.end - datetime.now()).days) < 3 * 365:
|
|
204
|
+
modes.append("recent")
|
|
205
|
+
|
|
206
|
+
columns = map(
|
|
207
|
+
lambda args: get_parameter(*args),
|
|
208
|
+
(
|
|
209
|
+
(parameter["dir"], config.dwd_hourly_modes or modes, req.station)
|
|
210
|
+
for parameter in [
|
|
211
|
+
param
|
|
212
|
+
for param in PARAMETERS
|
|
213
|
+
if not set(req.parameters).isdisjoint(param["names"].values())
|
|
214
|
+
]
|
|
215
|
+
),
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
return pd.concat(columns, axis=1)
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
"""
|
|
2
|
+
DWD national daily data import routine
|
|
3
|
+
|
|
4
|
+
Get daily data for weather stations in Germany.
|
|
5
|
+
|
|
6
|
+
The code is licensed under the MIT license.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from ftplib import FTP
|
|
11
|
+
from io import BytesIO
|
|
12
|
+
from typing import Optional
|
|
13
|
+
from zipfile import ZipFile
|
|
14
|
+
|
|
15
|
+
import pandas as pd
|
|
16
|
+
|
|
17
|
+
from meteostat.enumerations import TTL, Parameter
|
|
18
|
+
from meteostat.typing import ProviderRequest
|
|
19
|
+
from meteostat.core.cache import cache_service
|
|
20
|
+
from meteostat.utils.conversions import ms_to_kmh
|
|
21
|
+
from meteostat.providers.dwd.shared import get_ftp_connection
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
BASE_DIR = "/climate_environment/CDC/observations_germany/climate/monthly/kl/"
|
|
25
|
+
USECOLS = [1, 4, 5, 6, 7, 9, 10, 11, 12, 14] # CSV cols which should be read
|
|
26
|
+
PARSE_DATES = {"time": [0]} # Which columns should be parsed as dates?
|
|
27
|
+
NAMES = {
|
|
28
|
+
"MO_N": Parameter.CLDC,
|
|
29
|
+
"MO_TT": Parameter.TEMP,
|
|
30
|
+
"MO_TX": Parameter.TMAX,
|
|
31
|
+
"MO_TN": Parameter.TMIN,
|
|
32
|
+
"MX_TX": Parameter.TXMX,
|
|
33
|
+
"MX_TN": Parameter.TXMN,
|
|
34
|
+
"MX_FX": Parameter.WPGT,
|
|
35
|
+
"MO_SD_S": Parameter.TSUN,
|
|
36
|
+
"MO_RR": Parameter.PRCP,
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def find_file(ftp: FTP, mode: str, needle: str):
|
|
41
|
+
"""
|
|
42
|
+
Find file in directory
|
|
43
|
+
"""
|
|
44
|
+
match = None
|
|
45
|
+
|
|
46
|
+
try:
|
|
47
|
+
ftp.cwd(BASE_DIR + mode)
|
|
48
|
+
files = ftp.nlst()
|
|
49
|
+
matching = [f for f in files if needle in f]
|
|
50
|
+
match = matching[0]
|
|
51
|
+
except BaseException:
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
return match
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@cache_service.cache(TTL.WEEK, "pickle")
|
|
58
|
+
def get_df(station: str, mode: str) -> Optional[pd.DataFrame]:
|
|
59
|
+
"""
|
|
60
|
+
Get a file from DWD FTP server and convert to Polars DataFrame
|
|
61
|
+
"""
|
|
62
|
+
ftp = get_ftp_connection()
|
|
63
|
+
remote_file = find_file(ftp, mode, f"_{station}_")
|
|
64
|
+
|
|
65
|
+
if remote_file is None:
|
|
66
|
+
return None
|
|
67
|
+
|
|
68
|
+
buffer = BytesIO()
|
|
69
|
+
ftp.retrbinary("RETR " + remote_file, buffer.write)
|
|
70
|
+
|
|
71
|
+
ftp.close()
|
|
72
|
+
|
|
73
|
+
# Unzip file
|
|
74
|
+
with ZipFile(buffer, "r") as zipped:
|
|
75
|
+
filelist = zipped.namelist()
|
|
76
|
+
raw = None
|
|
77
|
+
for file in filelist:
|
|
78
|
+
if file[:7] == "produkt":
|
|
79
|
+
with zipped.open(file, "r") as reader:
|
|
80
|
+
raw = BytesIO(reader.read())
|
|
81
|
+
|
|
82
|
+
# Convert raw data to DataFrame
|
|
83
|
+
df: pd.DataFrame = pd.read_csv( # type: ignore
|
|
84
|
+
raw,
|
|
85
|
+
sep=r"\s*;\s*",
|
|
86
|
+
date_format="%Y%m%d",
|
|
87
|
+
na_values=["-999", -999],
|
|
88
|
+
usecols=USECOLS,
|
|
89
|
+
parse_dates=PARSE_DATES,
|
|
90
|
+
engine="python",
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# Rename columns
|
|
94
|
+
df = df.rename(columns=lambda x: x.strip())
|
|
95
|
+
df = df.rename(columns=NAMES)
|
|
96
|
+
|
|
97
|
+
# Convert data
|
|
98
|
+
df["wpgt"] = df["wpgt"].apply(ms_to_kmh)
|
|
99
|
+
df["tsun"] = df["tsun"] * 60
|
|
100
|
+
df["tsun"] = df["tsun"].round()
|
|
101
|
+
df["cldc"] = df["cldc"].round()
|
|
102
|
+
|
|
103
|
+
# Set index
|
|
104
|
+
df = df.set_index("time")
|
|
105
|
+
|
|
106
|
+
# Round decimals
|
|
107
|
+
df = df.round(1)
|
|
108
|
+
|
|
109
|
+
return df
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def fetch(req: ProviderRequest):
|
|
113
|
+
if "national" not in req.station.identifiers:
|
|
114
|
+
return pd.DataFrame()
|
|
115
|
+
|
|
116
|
+
# Check which modes to consider for data fetching
|
|
117
|
+
#
|
|
118
|
+
# The dataset is divided into a versioned part with completed quality check ("historical"),
|
|
119
|
+
# and a part for which the quality check has not yet been completed ("recent").
|
|
120
|
+
#
|
|
121
|
+
# There is no definite answer as to when the quality check is completed. We're assuming a
|
|
122
|
+
# period of 3 years here. If the end date of the query is within this period, we will also
|
|
123
|
+
# consider the "recent" mode.
|
|
124
|
+
modes = ["historical"]
|
|
125
|
+
if abs((req.end - datetime.now()).days) < 3 * 365:
|
|
126
|
+
modes.append("recent")
|
|
127
|
+
|
|
128
|
+
data = [
|
|
129
|
+
get_df(
|
|
130
|
+
req.station.identifiers["national"],
|
|
131
|
+
mode,
|
|
132
|
+
)
|
|
133
|
+
for mode in modes
|
|
134
|
+
]
|
|
135
|
+
|
|
136
|
+
df = pd.concat(data)
|
|
137
|
+
|
|
138
|
+
return df.loc[~df.index.duplicated(keep="first")]
|