meteostat 1.7.5__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. meteostat/__init__.py +32 -19
  2. meteostat/api/daily.py +76 -0
  3. meteostat/api/hourly.py +80 -0
  4. meteostat/api/interpolate.py +240 -0
  5. meteostat/api/inventory.py +59 -0
  6. meteostat/api/merge.py +103 -0
  7. meteostat/api/monthly.py +73 -0
  8. meteostat/api/normals.py +144 -0
  9. meteostat/api/point.py +30 -0
  10. meteostat/api/stations.py +234 -0
  11. meteostat/api/timeseries.py +334 -0
  12. meteostat/core/cache.py +212 -59
  13. meteostat/core/config.py +158 -0
  14. meteostat/core/data.py +199 -0
  15. meteostat/core/logger.py +9 -0
  16. meteostat/core/network.py +82 -0
  17. meteostat/core/parameters.py +112 -0
  18. meteostat/core/providers.py +184 -0
  19. meteostat/core/schema.py +170 -0
  20. meteostat/core/validator.py +38 -0
  21. meteostat/enumerations.py +149 -0
  22. meteostat/interpolation/idw.py +120 -0
  23. meteostat/interpolation/lapserate.py +91 -0
  24. meteostat/interpolation/nearest.py +31 -0
  25. meteostat/parameters.py +354 -0
  26. meteostat/providers/dwd/climat.py +166 -0
  27. meteostat/providers/dwd/daily.py +144 -0
  28. meteostat/providers/dwd/hourly.py +218 -0
  29. meteostat/providers/dwd/monthly.py +138 -0
  30. meteostat/providers/dwd/mosmix.py +351 -0
  31. meteostat/providers/dwd/poi.py +117 -0
  32. meteostat/providers/dwd/shared.py +155 -0
  33. meteostat/providers/eccc/daily.py +87 -0
  34. meteostat/providers/eccc/hourly.py +104 -0
  35. meteostat/providers/eccc/monthly.py +66 -0
  36. meteostat/providers/eccc/shared.py +45 -0
  37. meteostat/providers/index.py +496 -0
  38. meteostat/providers/meteostat/daily.py +65 -0
  39. meteostat/providers/meteostat/daily_derived.py +110 -0
  40. meteostat/providers/meteostat/hourly.py +66 -0
  41. meteostat/providers/meteostat/monthly.py +45 -0
  42. meteostat/providers/meteostat/monthly_derived.py +106 -0
  43. meteostat/providers/meteostat/shared.py +93 -0
  44. meteostat/providers/metno/forecast.py +186 -0
  45. meteostat/providers/noaa/ghcnd.py +228 -0
  46. meteostat/providers/noaa/isd_lite.py +142 -0
  47. meteostat/providers/noaa/metar.py +163 -0
  48. meteostat/typing.py +113 -0
  49. meteostat/utils/conversions.py +231 -0
  50. meteostat/utils/data.py +194 -0
  51. meteostat/utils/geo.py +28 -0
  52. meteostat/utils/parsers.py +168 -0
  53. meteostat/utils/types.py +113 -0
  54. meteostat/utils/validators.py +31 -0
  55. meteostat-2.0.0.dist-info/METADATA +134 -0
  56. meteostat-2.0.0.dist-info/RECORD +63 -0
  57. {meteostat-1.7.5.dist-info → meteostat-2.0.0.dist-info}/WHEEL +1 -2
  58. meteostat/core/loader.py +0 -103
  59. meteostat/core/warn.py +0 -34
  60. meteostat/enumerations/granularity.py +0 -22
  61. meteostat/interface/base.py +0 -39
  62. meteostat/interface/daily.py +0 -118
  63. meteostat/interface/hourly.py +0 -154
  64. meteostat/interface/meteodata.py +0 -210
  65. meteostat/interface/monthly.py +0 -109
  66. meteostat/interface/normals.py +0 -245
  67. meteostat/interface/point.py +0 -143
  68. meteostat/interface/stations.py +0 -252
  69. meteostat/interface/timeseries.py +0 -237
  70. meteostat/series/aggregate.py +0 -48
  71. meteostat/series/convert.py +0 -28
  72. meteostat/series/count.py +0 -17
  73. meteostat/series/coverage.py +0 -20
  74. meteostat/series/fetch.py +0 -28
  75. meteostat/series/interpolate.py +0 -47
  76. meteostat/series/normalize.py +0 -76
  77. meteostat/series/stations.py +0 -22
  78. meteostat/units.py +0 -149
  79. meteostat/utilities/__init__.py +0 -0
  80. meteostat/utilities/aggregations.py +0 -37
  81. meteostat/utilities/endpoint.py +0 -33
  82. meteostat/utilities/helpers.py +0 -70
  83. meteostat/utilities/mutations.py +0 -85
  84. meteostat/utilities/validations.py +0 -30
  85. meteostat-1.7.5.dist-info/METADATA +0 -112
  86. meteostat-1.7.5.dist-info/RECORD +0 -39
  87. meteostat-1.7.5.dist-info/top_level.txt +0 -1
  88. /meteostat/{core → api}/__init__.py +0 -0
  89. /meteostat/{enumerations → interpolation}/__init__.py +0 -0
  90. /meteostat/{interface → providers}/__init__.py +0 -0
  91. /meteostat/{interface/interpolate.py → py.typed} +0 -0
  92. /meteostat/{series → utils}/__init__.py +0 -0
  93. {meteostat-1.7.5.dist-info → meteostat-2.0.0.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,144 @@
1
+ """
2
+ DWD national daily data import routine
3
+
4
+ Get daily data for weather stations in Germany.
5
+
6
+ The code is licensed under the MIT license.
7
+ """
8
+
9
+ from datetime import datetime
10
+ from ftplib import FTP
11
+ from io import BytesIO
12
+ from typing import Optional
13
+ from zipfile import ZipFile
14
+
15
+ import pandas as pd
16
+
17
+ from meteostat.core.config import config
18
+ from meteostat.enumerations import TTL, Parameter
19
+ from meteostat.typing import ProviderRequest
20
+ from meteostat.core.cache import cache_service
21
+ from meteostat.utils.conversions import ms_to_kmh, pres_to_msl
22
+ from meteostat.providers.dwd.shared import get_ftp_connection
23
+
24
+ BASE_DIR = "/climate_environment/CDC/observations_germany/climate/daily/kl/"
25
+ USECOLS = [1, 3, 4, 6, 8, 9, 10, 12, 13, 14, 15, 16] # CSV cols which should be read
26
+ NAMES = {
27
+ "FX": Parameter.WPGT,
28
+ "FM": Parameter.WSPD,
29
+ "RSK": Parameter.PRCP,
30
+ "SDK": Parameter.TSUN,
31
+ "SHK_TAG": Parameter.SNWD,
32
+ "NM": Parameter.CLDC,
33
+ "PM": Parameter.PRES,
34
+ "TMK": Parameter.TEMP,
35
+ "UPM": Parameter.RHUM,
36
+ "TXK": Parameter.TMAX,
37
+ "TNK": Parameter.TMIN,
38
+ }
39
+
40
+
41
+ def find_file(ftp: FTP, mode: str, needle: str):
42
+ """
43
+ Find file in directory
44
+ """
45
+ match = None
46
+
47
+ try:
48
+ ftp.cwd(BASE_DIR + mode)
49
+ files = ftp.nlst()
50
+ matching = [f for f in files if needle in f]
51
+ match = matching[0]
52
+ except BaseException:
53
+ pass
54
+
55
+ return match
56
+
57
+
58
+ @cache_service.cache(TTL.DAY, "pickle")
59
+ def get_df(station: str, elevation: int, mode: str) -> Optional[pd.DataFrame]:
60
+ """
61
+ Get a file from DWD FTP server and convert to DataFrame
62
+ """
63
+ ftp = get_ftp_connection()
64
+ remote_file = find_file(ftp, mode, f"_{station}_")
65
+
66
+ if remote_file is None:
67
+ return None
68
+
69
+ buffer = BytesIO()
70
+ ftp.retrbinary("RETR " + remote_file, buffer.write)
71
+
72
+ ftp.close()
73
+
74
+ # Unzip file
75
+ with ZipFile(buffer, "r") as zipped:
76
+ filelist = zipped.namelist()
77
+ raw = None
78
+ for file in filelist:
79
+ if file[:7] == "produkt":
80
+ with zipped.open(file, "r") as reader:
81
+ raw = BytesIO(reader.read())
82
+
83
+ # Convert raw data to DataFrame
84
+ df: pd.DataFrame = pd.read_csv( # type: ignore
85
+ raw,
86
+ sep=r"\s*;\s*",
87
+ date_format="%Y%m%d",
88
+ na_values=["-999", -999],
89
+ usecols=USECOLS,
90
+ engine="python",
91
+ )
92
+
93
+ # Rename columns
94
+ df = df.rename(columns=lambda x: x.strip())
95
+ df = df.rename(columns=NAMES)
96
+
97
+ # Parse date column (first column contains the date)
98
+ df["time"] = pd.to_datetime(df.iloc[:, 0], format="%Y%m%d")
99
+ df = df.drop(df.columns[0], axis=1)
100
+
101
+ # Convert data
102
+ df[Parameter.SNWD] = df[Parameter.SNWD] * 10
103
+ df[Parameter.WPGT] = df[Parameter.WPGT].apply(ms_to_kmh)
104
+ df[Parameter.WSPD] = df[Parameter.WSPD].apply(ms_to_kmh)
105
+ df[Parameter.TSUN] = df[Parameter.TSUN] * 60
106
+ df[Parameter.PRES] = df.apply(lambda row: pres_to_msl(row, elevation), axis=1)
107
+
108
+ # Set index
109
+ df = df.set_index("time")
110
+
111
+ # Round decimals
112
+ df = df.round(1)
113
+
114
+ return df
115
+
116
+
117
+ def fetch(req: ProviderRequest):
118
+ if "national" not in req.station.identifiers:
119
+ return pd.DataFrame()
120
+
121
+ # Check which modes to consider for data fetching
122
+ #
123
+ # The dataset is divided into a versioned part with completed quality check ("historical"),
124
+ # and a part for which the quality check has not yet been completed ("recent").
125
+ #
126
+ # There is no definite answer as to when the quality check is completed. We're assuming a
127
+ # period of 3 years here. If the end date of the query is within this period, we will also
128
+ # consider the "recent" mode.
129
+ modes = ["historical"]
130
+ if abs((req.end - datetime.now()).days) < 3 * 365:
131
+ modes.append("recent")
132
+
133
+ data = [
134
+ get_df(
135
+ req.station.identifiers["national"],
136
+ req.station.elevation,
137
+ mode,
138
+ )
139
+ for mode in config.dwd_daily_modes or modes
140
+ ]
141
+
142
+ df = pd.concat(data)
143
+
144
+ return df.loc[~df.index.duplicated(keep="first")]
@@ -0,0 +1,218 @@
1
+ """
2
+ DWD national hourly data import routine
3
+
4
+ Get hourly data for weather stations in Germany.
5
+
6
+ The code is licensed under the MIT license.
7
+ """
8
+
9
+ from datetime import datetime
10
+ from ftplib import FTP
11
+ from io import BytesIO
12
+ from typing import Callable, Dict, List, NotRequired, Optional, TypedDict
13
+ from zipfile import ZipFile
14
+
15
+ import pandas as pd
16
+
17
+ from meteostat.enumerations import TTL, Parameter
18
+ from meteostat.core.logger import logger
19
+ from meteostat.typing import ProviderRequest, Station
20
+ from meteostat.core.cache import cache_service
21
+ from meteostat.core.config import config
22
+ from meteostat.utils.conversions import ms_to_kmh
23
+ from meteostat.providers.dwd.shared import get_condicode
24
+ from meteostat.providers.dwd.shared import get_ftp_connection
25
+
26
+
27
+ class ParameterDefinition(TypedDict):
28
+ dir: str
29
+ usecols: List[int]
30
+ names: Dict[str, str]
31
+ convert: NotRequired[Dict[str, Callable]]
32
+ encoding: NotRequired[str]
33
+ historical_only: NotRequired[bool]
34
+
35
+
36
+ BASE_DIR = "/climate_environment/CDC/observations_germany/climate/hourly/"
37
+ PARAMETERS: List[ParameterDefinition] = [
38
+ {
39
+ "dir": "precipitation",
40
+ "usecols": [1, 3],
41
+ "names": {"R1": Parameter.PRCP},
42
+ },
43
+ {
44
+ "dir": "air_temperature",
45
+ "usecols": [1, 3, 4],
46
+ "names": {"TT_TU": Parameter.TEMP, "RF_TU": Parameter.RHUM},
47
+ },
48
+ {
49
+ "dir": "wind",
50
+ "usecols": [1, 3, 4],
51
+ "names": {"F": Parameter.WSPD, "D": Parameter.WDIR},
52
+ "convert": {"wspd": ms_to_kmh},
53
+ },
54
+ {
55
+ "dir": "pressure",
56
+ "usecols": [1, 3],
57
+ "names": {"P": Parameter.PRES},
58
+ },
59
+ {
60
+ "dir": "sun",
61
+ "usecols": [1, 3],
62
+ "names": {"SD_SO": Parameter.TSUN},
63
+ },
64
+ {
65
+ "dir": "cloudiness",
66
+ "usecols": [1, 4],
67
+ "names": {"V_N": Parameter.CLDC},
68
+ },
69
+ {
70
+ "dir": "visibility",
71
+ "usecols": [1, 4],
72
+ "names": {"V_VV": Parameter.VSBY},
73
+ },
74
+ {
75
+ "dir": "weather_phenomena",
76
+ "usecols": [1, 3],
77
+ "names": {"WW": Parameter.COCO},
78
+ "convert": {"coco": get_condicode},
79
+ "encoding": "latin-1",
80
+ },
81
+ # TODO: Implement solar radiation
82
+ # {
83
+ # "dir": "solar",
84
+ # "usecols": [1, 5],
85
+ # "names": {"FG_LBERG": "srad"},
86
+ # "convert": {"srad": jcm2_to_wm2},
87
+ # "historical_only": True,
88
+ # },
89
+ ]
90
+
91
+
92
+ def find_file(ftp: FTP, path: str, needle: str):
93
+ """
94
+ Find file in directory
95
+ """
96
+ match = None
97
+
98
+ try:
99
+ ftp.cwd(BASE_DIR + path)
100
+ files = ftp.nlst()
101
+ matching = [f for f in files if needle in f]
102
+ match = matching[0]
103
+ logger.debug(f"Found file '{match}' in '{path}' directory")
104
+ except IndexError:
105
+ logger.info(f"File '{needle}' not found in '{path}' directory")
106
+
107
+ return match
108
+
109
+
110
+ @cache_service.cache(TTL.DAY, "pickle")
111
+ def get_df(parameter_dir: str, mode: str, station_id: str) -> Optional[pd.DataFrame]:
112
+ """
113
+ Get a file from DWD FTP server and convert to Polars DataFrame
114
+ """
115
+ logger.debug(
116
+ f"Fetching {parameter_dir} data ({mode}) for DWD station '{station_id}'"
117
+ )
118
+
119
+ parameter = next(param for param in PARAMETERS if param["dir"] == parameter_dir)
120
+
121
+ ftp = get_ftp_connection()
122
+ remote_file = find_file(ftp, f"{parameter['dir']}/{mode}", station_id)
123
+
124
+ if remote_file is None:
125
+ return None
126
+
127
+ buffer = BytesIO()
128
+ ftp.retrbinary("RETR " + remote_file, buffer.write)
129
+
130
+ # Unzip file
131
+ with ZipFile(buffer, "r") as zipped:
132
+ filelist = zipped.namelist()
133
+ raw = None
134
+ for file in filelist:
135
+ if file[:7] == "produkt":
136
+ with zipped.open(file, "r") as reader:
137
+ raw = BytesIO(reader.read())
138
+
139
+ # Convert raw data to DataFrame
140
+ df: pd.DataFrame = pd.read_csv( # type: ignore
141
+ raw,
142
+ sep=";",
143
+ skipinitialspace=True,
144
+ date_format="%Y%m%d%H",
145
+ na_values=[-999, "-999"],
146
+ usecols=parameter["usecols"],
147
+ encoding=parameter["encoding"] if "encoding" in parameter else None,
148
+ )
149
+
150
+ df["time"] = pd.to_datetime(df.pop("MESS_DATUM"), format="%Y%m%d%H")
151
+
152
+ logger.debug(f"Found {len(df)} rows in {remote_file}")
153
+
154
+ # Rename columns
155
+ df = df.rename(columns=lambda x: x.strip())
156
+ df = df.rename(columns=parameter["names"])
157
+
158
+ # Convert column data
159
+ if "convert" in parameter:
160
+ for col, func in parameter["convert"].items():
161
+ df[col] = df[col].apply(func)
162
+
163
+ # Set index
164
+ df = df.set_index("time")
165
+
166
+ # Round decimals
167
+ df = df.round(1)
168
+
169
+ return df
170
+
171
+
172
+ def get_parameter(
173
+ parameter_dir: str, modes: list[str], station: Station
174
+ ) -> Optional[pd.DataFrame]:
175
+ logger.debug(f"Fetching {parameter_dir} data ({modes}) for station '{station.id}'")
176
+ try:
177
+ data = [
178
+ get_df(parameter_dir, mode, station.identifiers["national"])
179
+ for mode in modes
180
+ ]
181
+ if all(d is None for d in data):
182
+ return None
183
+ df = pd.concat(data)
184
+ return df.loc[~df.index.duplicated(keep="first")]
185
+ except Exception as error:
186
+ logger.warning(error, exc_info=True)
187
+ return None
188
+
189
+
190
+ def fetch(req: ProviderRequest):
191
+ if "national" not in req.station.identifiers:
192
+ return None
193
+
194
+ # Check which modes to consider for data fetching
195
+ #
196
+ # The dataset is divided into a versioned part with completed quality check ("historical"),
197
+ # and a part for which the quality check has not yet been completed ("recent").
198
+ #
199
+ # There is no definite answer as to when the quality check is completed. We're assuming a
200
+ # period of 3 years here. If the end date of the query is within this period, we will also
201
+ # consider the "recent" mode.
202
+ modes = ["historical"]
203
+ if abs((req.end - datetime.now()).days) < 3 * 365:
204
+ modes.append("recent")
205
+
206
+ columns = map(
207
+ lambda args: get_parameter(*args),
208
+ (
209
+ (parameter["dir"], config.dwd_hourly_modes or modes, req.station)
210
+ for parameter in [
211
+ param
212
+ for param in PARAMETERS
213
+ if not set(req.parameters).isdisjoint(param["names"].values())
214
+ ]
215
+ ),
216
+ )
217
+
218
+ return pd.concat(columns, axis=1)
@@ -0,0 +1,138 @@
1
+ """
2
+ DWD national daily data import routine
3
+
4
+ Get daily data for weather stations in Germany.
5
+
6
+ The code is licensed under the MIT license.
7
+ """
8
+
9
+ from datetime import datetime
10
+ from ftplib import FTP
11
+ from io import BytesIO
12
+ from typing import Optional
13
+ from zipfile import ZipFile
14
+
15
+ import pandas as pd
16
+
17
+ from meteostat.enumerations import TTL, Parameter
18
+ from meteostat.typing import ProviderRequest
19
+ from meteostat.core.cache import cache_service
20
+ from meteostat.utils.conversions import ms_to_kmh
21
+ from meteostat.providers.dwd.shared import get_ftp_connection
22
+
23
+
24
+ BASE_DIR = "/climate_environment/CDC/observations_germany/climate/monthly/kl/"
25
+ USECOLS = [1, 4, 5, 6, 7, 9, 10, 11, 12, 14] # CSV cols which should be read
26
+ PARSE_DATES = {"time": [0]} # Which columns should be parsed as dates?
27
+ NAMES = {
28
+ "MO_N": Parameter.CLDC,
29
+ "MO_TT": Parameter.TEMP,
30
+ "MO_TX": Parameter.TMAX,
31
+ "MO_TN": Parameter.TMIN,
32
+ "MX_TX": Parameter.TXMX,
33
+ "MX_TN": Parameter.TXMN,
34
+ "MX_FX": Parameter.WPGT,
35
+ "MO_SD_S": Parameter.TSUN,
36
+ "MO_RR": Parameter.PRCP,
37
+ }
38
+
39
+
40
+ def find_file(ftp: FTP, mode: str, needle: str):
41
+ """
42
+ Find file in directory
43
+ """
44
+ match = None
45
+
46
+ try:
47
+ ftp.cwd(BASE_DIR + mode)
48
+ files = ftp.nlst()
49
+ matching = [f for f in files if needle in f]
50
+ match = matching[0]
51
+ except BaseException:
52
+ pass
53
+
54
+ return match
55
+
56
+
57
+ @cache_service.cache(TTL.WEEK, "pickle")
58
+ def get_df(station: str, mode: str) -> Optional[pd.DataFrame]:
59
+ """
60
+ Get a file from DWD FTP server and convert to Polars DataFrame
61
+ """
62
+ ftp = get_ftp_connection()
63
+ remote_file = find_file(ftp, mode, f"_{station}_")
64
+
65
+ if remote_file is None:
66
+ return None
67
+
68
+ buffer = BytesIO()
69
+ ftp.retrbinary("RETR " + remote_file, buffer.write)
70
+
71
+ ftp.close()
72
+
73
+ # Unzip file
74
+ with ZipFile(buffer, "r") as zipped:
75
+ filelist = zipped.namelist()
76
+ raw = None
77
+ for file in filelist:
78
+ if file[:7] == "produkt":
79
+ with zipped.open(file, "r") as reader:
80
+ raw = BytesIO(reader.read())
81
+
82
+ # Convert raw data to DataFrame
83
+ df: pd.DataFrame = pd.read_csv( # type: ignore
84
+ raw,
85
+ sep=r"\s*;\s*",
86
+ date_format="%Y%m%d",
87
+ na_values=["-999", -999],
88
+ usecols=USECOLS,
89
+ parse_dates=PARSE_DATES,
90
+ engine="python",
91
+ )
92
+
93
+ # Rename columns
94
+ df = df.rename(columns=lambda x: x.strip())
95
+ df = df.rename(columns=NAMES)
96
+
97
+ # Convert data
98
+ df["wpgt"] = df["wpgt"].apply(ms_to_kmh)
99
+ df["tsun"] = df["tsun"] * 60
100
+ df["tsun"] = df["tsun"].round()
101
+ df["cldc"] = df["cldc"].round()
102
+
103
+ # Set index
104
+ df = df.set_index("time")
105
+
106
+ # Round decimals
107
+ df = df.round(1)
108
+
109
+ return df
110
+
111
+
112
+ def fetch(req: ProviderRequest):
113
+ if "national" not in req.station.identifiers:
114
+ return pd.DataFrame()
115
+
116
+ # Check which modes to consider for data fetching
117
+ #
118
+ # The dataset is divided into a versioned part with completed quality check ("historical"),
119
+ # and a part for which the quality check has not yet been completed ("recent").
120
+ #
121
+ # There is no definite answer as to when the quality check is completed. We're assuming a
122
+ # period of 3 years here. If the end date of the query is within this period, we will also
123
+ # consider the "recent" mode.
124
+ modes = ["historical"]
125
+ if abs((req.end - datetime.now()).days) < 3 * 365:
126
+ modes.append("recent")
127
+
128
+ data = [
129
+ get_df(
130
+ req.station.identifiers["national"],
131
+ mode,
132
+ )
133
+ for mode in modes
134
+ ]
135
+
136
+ df = pd.concat(data)
137
+
138
+ return df.loc[~df.index.duplicated(keep="first")]