meteostat 1.6.8__py3-none-any.whl → 1.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,29 +17,46 @@ from meteostat.interface.point import Point
17
17
 
18
18
 
19
19
  class Monthly(TimeSeries):
20
-
21
20
  """
22
21
  Retrieve monthly weather data for one or multiple weather stations or
23
22
  a single geographical point
24
23
  """
25
24
 
26
25
  # The cache subdirectory
27
- cache_subdir: str = "monthly"
26
+ cache_subdir = "monthly"
28
27
 
29
28
  # Granularity
30
29
  granularity = Granularity.MONTHLY
31
30
 
32
31
  # Default frequency
33
- _freq: str = "1MS"
32
+ _freq = "1MS"
33
+
34
+ # Source mappings
35
+ _source_mappings = {
36
+ "dwd_monthly": "A",
37
+ "eccc_monthly": "A",
38
+ "dwd_daily": "C",
39
+ "eccc_daily": "C",
40
+ "ghcnd": "D",
41
+ "dwd_hourly": "E",
42
+ "eccc_hourly": "E",
43
+ "isd_lite": "F",
44
+ "synop": "G",
45
+ "dwd_poi": "G",
46
+ "metar": "H",
47
+ "model": "I",
48
+ "dwd_mosmix": "I",
49
+ "metno_forecast": "I",
50
+ }
34
51
 
35
52
  # Flag which represents model data
36
53
  _model_flag = "I"
37
54
 
38
55
  # Columns
39
- _columns: list = [
56
+ _columns = [
40
57
  "year",
41
58
  "month",
42
- "tavg",
59
+ {"tavg": "temp"},
43
60
  "tmin",
44
61
  "tmax",
45
62
  "prcp",
@@ -51,22 +68,11 @@ class Monthly(TimeSeries):
51
68
  # Index of first meteorological column
52
69
  _first_met_col = 2
53
70
 
54
- # Data types
55
- _types: dict = {
56
- "tavg": "float64",
57
- "tmin": "float64",
58
- "tmax": "float64",
59
- "prcp": "float64",
60
- "wspd": "float64",
61
- "pres": "float64",
62
- "tsun": "float64",
63
- }
64
-
65
71
  # Columns for date parsing
66
- _parse_dates: dict = {"time": [0, 1]}
72
+ _parse_dates = ["year", "month"]
67
73
 
68
74
  # Default aggregation functions
69
- aggregations: dict = {
75
+ aggregations = {
70
76
  "tavg": "mean",
71
77
  "tmin": "mean",
72
78
  "tmax": "mean",
@@ -84,7 +90,6 @@ class Monthly(TimeSeries):
84
90
  model: bool = True, # Include model data?
85
91
  flags: bool = False, # Load source flags?
86
92
  ) -> None:
87
-
88
93
  # Set start date
89
94
  if start is not None:
90
95
  start = start.replace(day=1)
@@ -9,10 +9,13 @@ The code is licensed under the MIT license.
9
9
  """
10
10
 
11
11
  from copy import copy
12
- from typing import Union
12
+ from typing import Optional, Union
13
13
  from datetime import datetime
14
14
  import numpy as np
15
15
  import pandas as pd
16
+ from meteostat.core.cache import file_in_cache, get_local_file_path
17
+ from meteostat.core.loader import load_handler
18
+ from meteostat.utilities.endpoint import generate_endpoint_path
16
19
  from meteostat.enumerations.granularity import Granularity
17
20
  from meteostat.core.warn import warn
18
21
  from meteostat.interface.meteodata import MeteoData
@@ -20,32 +23,31 @@ from meteostat.interface.point import Point
20
23
 
21
24
 
22
25
  class Normals(MeteoData):
23
-
24
26
  """
25
27
  Retrieve climate normals for one or multiple weather stations or
26
28
  a single geographical point
27
29
  """
28
30
 
29
31
  # The cache subdirectory
30
- cache_subdir: str = "normals"
32
+ cache_subdir = "normals"
31
33
 
32
34
  # Granularity
33
35
  granularity = Granularity.NORMALS
34
36
 
35
37
  # The list of weather Stations
36
- _stations: pd.Index = None
38
+ _stations: Optional[pd.Index] = None
37
39
 
38
40
  # The first year of the period
39
- _start: int = None
41
+ _start: Optional[int] = None
40
42
 
41
43
  # The last year of the period
42
- _end: int = None
44
+ _end: Optional[int] = None
43
45
 
44
46
  # The data frame
45
47
  _data: pd.DataFrame = pd.DataFrame()
46
48
 
47
49
  # Columns
48
- _columns: list = [
50
+ _columns = [
49
51
  "start",
50
52
  "end",
51
53
  "month",
@@ -60,26 +62,62 @@ class Normals(MeteoData):
60
62
  # Index of first meteorological column
61
63
  _first_met_col = 3
62
64
 
63
- # Data types
64
- _types: dict = {
65
- "tmin": "float64",
66
- "tmax": "float64",
67
- "prcp": "float64",
68
- "wspd": "float64",
69
- "pres": "float64",
70
- "tsun": "float64",
71
- }
72
-
73
65
  # Which columns should be parsed as dates?
74
66
  _parse_dates = None
75
67
 
68
+ def _load_data(self, station: str, year: Optional[int] = None) -> None:
69
+ """
70
+ Load file for a single station from Meteostat
71
+ """
72
+
73
+ # File name
74
+ file = generate_endpoint_path(self.granularity, station, year)
75
+
76
+ # Get local file path
77
+ path = get_local_file_path(self.cache_dir, self.cache_subdir, file)
78
+
79
+ # Check if file in cache
80
+ if self.max_age > 0 and file_in_cache(path, self.max_age):
81
+ # Read cached data
82
+ df = pd.read_pickle(path)
83
+
84
+ else:
85
+ # Get data from Meteostat
86
+ df = load_handler(
87
+ self.endpoint,
88
+ file,
89
+ self.proxy,
90
+ self._columns,
91
+ )
92
+
93
+ # Validate and prepare data for further processing
94
+ if not df.empty:
95
+ # Add weather station ID
96
+ df["station"] = station
97
+
98
+ # Set index
99
+ df = df.set_index(["station", "start", "end", "month"])
100
+
101
+ # Save as Pickle
102
+ if self.max_age > 0:
103
+ df.to_pickle(path)
104
+
105
+ # Filter time period and append to DataFrame
106
+ if self.granularity == Granularity.NORMALS and not df.empty and self._end:
107
+ # Get time index
108
+ end = df.index.get_level_values("end")
109
+ # Filter & return
110
+ return df.loc[end == self._end]
111
+
112
+ # Return
113
+ return df
114
+
76
115
  def __init__(
77
116
  self,
78
117
  loc: Union[pd.DataFrame, Point, list, str],
79
118
  start: int = None,
80
119
  end: int = None,
81
120
  ) -> None:
82
-
83
121
  # Set list of weather stations
84
122
  if isinstance(loc, pd.DataFrame):
85
123
  self._stations = loc.index
@@ -146,7 +184,9 @@ class Normals(MeteoData):
146
184
  # Go through all periods
147
185
  for period in periods:
148
186
  # Create DataFrame
149
- df = pd.DataFrame(columns=temp._columns[temp._first_met_col :])
187
+ df = pd.DataFrame(
188
+ columns=temp._columns[temp._first_met_col :], dtype="float64"
189
+ )
150
190
  # Populate index columns
151
191
  df["month"] = range(1, 13)
152
192
  df["station"] = station
@@ -14,7 +14,6 @@ from meteostat.interface.stations import Stations
14
14
 
15
15
 
16
16
  class Point:
17
-
18
17
  """
19
18
  Automatically select weather stations by geographic location
20
19
  """
@@ -53,7 +52,6 @@ class Point:
53
52
  _alt: int = None
54
53
 
55
54
  def __init__(self, lat: float, lon: float, alt: int = None) -> None:
56
-
57
55
  self._lat = lat
58
56
  self._lon = lon
59
57
  self._alt = alt
@@ -90,7 +88,7 @@ class Point:
90
88
  # Apply inventory filter
91
89
  if freq and start and end:
92
90
  age = (datetime.now() - end).days
93
- if model == False or age > 180:
91
+ if model is False or age > 180:
94
92
  stations = stations.inventory(freq, (start, end))
95
93
 
96
94
  # Apply altitude filter
@@ -110,7 +108,6 @@ class Point:
110
108
 
111
109
  # Score values
112
110
  if self.radius:
113
-
114
111
  # Calculate score values
115
112
  stations["score"] = (
116
113
  (1 - (stations["distance"] / self.radius)) * self.weight_dist
@@ -19,7 +19,6 @@ from meteostat.utilities.helpers import get_distance
19
19
 
20
20
 
21
21
  class Stations(Base):
22
-
23
22
  """
24
23
  Select weather stations from the full list of stations
25
24
  """
@@ -80,15 +79,18 @@ class Stations(Base):
80
79
 
81
80
  # Check if file in cache
82
81
  if self.max_age > 0 and file_in_cache(path, self.max_age):
83
-
84
82
  # Read cached data
85
83
  df = pd.read_pickle(path)
86
84
 
87
85
  else:
88
-
89
86
  # Get data from Meteostat
90
87
  df = load_handler(
91
- self.endpoint, file, self._columns, self._types, self._parse_dates, True
88
+ self.endpoint,
89
+ file,
90
+ self.proxy,
91
+ self._columns,
92
+ self._types,
93
+ self._parse_dates,
92
94
  )
93
95
 
94
96
  # Add index
@@ -102,7 +104,6 @@ class Stations(Base):
102
104
  self._data = df
103
105
 
104
106
  def __init__(self) -> None:
105
-
106
107
  # Get all weather stations
107
108
  self._load()
108
109
 
@@ -179,12 +180,12 @@ class Stations(Base):
179
180
 
180
181
  if required is True:
181
182
  # Make sure data exists at all
182
- temp._data = temp._data[(pd.isna(temp._data[freq + "_start"]) == False)]
183
+ temp._data = temp._data[~pd.isna(temp._data[f"{freq}_start"])]
183
184
 
184
185
  elif isinstance(required, tuple):
185
186
  # Make sure data exists across period
186
187
  temp._data = temp._data[
187
- (pd.isna(temp._data[freq + "_start"]) == False)
188
+ (~pd.isna(temp._data[f"{freq}_start"]))
188
189
  & (temp._data[freq + "_start"] <= required[0])
189
190
  & (
190
191
  temp._data[freq + "_end"] + timedelta(seconds=temp.max_age)
@@ -195,7 +196,7 @@ class Stations(Base):
195
196
  else:
196
197
  # Make sure data exists on a certain day
197
198
  temp._data = temp._data[
198
- (pd.isna(temp._data[freq + "_start"]) == False)
199
+ (~pd.isna(temp._data[f"{freq}_start"]))
199
200
  & (temp._data[freq + "_start"] <= required)
200
201
  & (
201
202
  temp._data[freq + "_end"] + timedelta(seconds=temp.max_age)
@@ -9,72 +9,118 @@ The code is licensed under the MIT license.
9
9
  """
10
10
 
11
11
  from datetime import datetime
12
- from typing import Union
13
- import numpy as np
12
+ from typing import Optional, Union
14
13
  import pandas as pd
14
+ from meteostat.core.cache import file_in_cache, get_local_file_path
15
+ from meteostat.core.loader import load_handler
15
16
  from meteostat.enumerations.granularity import Granularity
16
- from meteostat.core.cache import get_local_file_path, file_in_cache
17
- from meteostat.core.loader import processing_handler, load_handler
18
- from meteostat.utilities.mutations import localize, filter_time
19
- from meteostat.utilities.validations import validate_series
20
17
  from meteostat.utilities.endpoint import generate_endpoint_path
18
+ from meteostat.utilities.mutations import filter_time, localize
19
+ from meteostat.utilities.validations import validate_series
20
+ from meteostat.utilities.helpers import get_flag_from_source_factory, with_suffix
21
21
  from meteostat.interface.point import Point
22
22
  from meteostat.interface.meteodata import MeteoData
23
23
 
24
24
 
25
25
  class TimeSeries(MeteoData):
26
-
27
26
  """
28
27
  TimeSeries class which provides features which are
29
28
  used across all time series classes
30
29
  """
31
30
 
31
+ # Base URL of the Meteostat bulk data interface
32
+ endpoint = "https://data.meteostat.net/"
33
+
32
34
  # The list of origin weather Stations
33
- _origin_stations: Union[pd.Index, None] = None
35
+ _origin_stations: Optional[pd.Index] = None
34
36
 
35
37
  # The start date
36
- _start: Union[datetime, None] = None
38
+ _start: Optional[datetime] = None
37
39
 
38
40
  # The end date
39
- _end: Union[datetime, None] = None
41
+ _end: Optional[datetime] = None
40
42
 
41
43
  # Include model data?
42
- _model: bool = True
44
+ _model = True
43
45
 
44
46
  # Fetch source flags?
45
- _flags = bool = False
47
+ _flags = False
46
48
 
47
- def _load_flags(self, station: str, year: Union[int, None] = None) -> None:
49
+ def _load_data(self, station: str, year: Optional[int] = None) -> None:
48
50
  """
49
- Load flag file for a single station from Meteostat
51
+ Load file for a single station from Meteostat
50
52
  """
51
-
52
53
  # File name
53
- file = generate_endpoint_path(self.granularity, station, year, True)
54
+ file = generate_endpoint_path(self.granularity, station, year)
54
55
 
55
56
  # Get local file path
56
57
  path = get_local_file_path(self.cache_dir, self.cache_subdir, file)
57
58
 
58
59
  # Check if file in cache
59
60
  if self.max_age > 0 and file_in_cache(path, self.max_age):
60
-
61
61
  # Read cached data
62
62
  df = pd.read_pickle(path)
63
63
 
64
64
  else:
65
-
66
65
  # Get data from Meteostat
67
66
  df = load_handler(
68
67
  self.endpoint,
69
68
  file,
70
- self._columns,
71
- {key: "string" for key in self._columns[self._first_met_col :]},
72
- self._parse_dates,
69
+ self.proxy,
70
+ default_df=pd.DataFrame(
71
+ columns=self._raw_columns
72
+ + with_suffix(self._raw_columns, "_source")
73
+ ),
74
+ )
75
+
76
+ # Add time column and drop original columns
77
+ if len(self._parse_dates) < 3:
78
+ df["day"] = 1
79
+
80
+ df["time"] = pd.to_datetime(
81
+ df[
82
+ (
83
+ self._parse_dates
84
+ if len(self._parse_dates) > 2
85
+ else self._parse_dates + ["day"]
86
+ )
87
+ ]
73
88
  )
89
+ df = df.drop(self._parse_dates, axis=1)
74
90
 
75
- # Validate Series
91
+ # Validate and prepare data for further processing
76
92
  df = validate_series(df, station)
77
93
 
94
+ # Rename columns
95
+ df = df.rename(columns=self._renamed_columns, errors="ignore")
96
+
97
+ # Convert sources to flags
98
+ for col in df.columns:
99
+ basecol = col[:-7] if col.endswith("_source") else col
100
+
101
+ if basecol not in self._processed_columns:
102
+ df.drop(col, axis=1, inplace=True)
103
+ continue
104
+
105
+ if basecol == col:
106
+ df[col] = df[col].astype("Float64")
107
+
108
+ if col.endswith("_source"):
109
+ flagcol = f"{basecol}_flag"
110
+ df[flagcol] = pd.NA
111
+ df[flagcol] = df[flagcol].astype("string")
112
+ mask = df[col].notna()
113
+ df.loc[mask, flagcol] = df.loc[mask, col].apply(
114
+ get_flag_from_source_factory(
115
+ self._source_mappings, self._model_flag
116
+ )
117
+ )
118
+ df.drop(col, axis=1, inplace=True)
119
+
120
+ # Process virtual columns
121
+ for key, value in self._virtual_columns.items():
122
+ df = value(df, key)
123
+
78
124
  # Save as Pickle
79
125
  if self.max_age > 0:
80
126
  df.to_pickle(path)
@@ -88,59 +134,33 @@ class TimeSeries(MeteoData):
88
134
  df = localize(df, self._timezone)
89
135
 
90
136
  # Filter time period and append to DataFrame
91
- if self._start and self._end:
92
- df = filter_time(df, self._start, self._end)
137
+ df = filter_time(df, self._start, self._end)
93
138
 
139
+ # Return
94
140
  return df
95
141
 
96
- def _get_flags(self) -> None:
97
- """
98
- Get all source flags
99
- """
100
-
101
- if len(self._stations) > 0:
102
-
103
- # Get list of datasets
104
- datasets = self._get_datasets()
105
-
106
- # Data Processings
107
- return processing_handler(
108
- datasets, self._load_flags, self.processes, self.threads
109
- )
110
-
111
- # Empty DataFrame
112
- return pd.DataFrame(columns=[*self._types])
113
-
114
142
  def _filter_model(self) -> None:
115
143
  """
116
144
  Remove model data from time series
117
145
  """
118
146
 
119
- columns = self._columns[self._first_met_col :]
120
-
121
- for col_name in columns:
147
+ for col_name in self._processed_columns:
122
148
  self._data.loc[
123
149
  (pd.isna(self._data[f"{col_name}_flag"]))
124
150
  | (self._data[f"{col_name}_flag"].str.contains(self._model_flag)),
125
151
  col_name,
126
- ] = np.nan
127
-
128
- # Conditionally, remove flags from DataFrame
129
- if not self._flags:
130
- self._data.drop(
131
- map(lambda col_name: f"{col_name}_flag", columns), axis=1, inplace=True
132
- )
152
+ ] = pd.NA
133
153
 
134
154
  # Drop nan-only rows
135
- self._data.dropna(how="all", subset=columns, inplace=True)
155
+ self._data.dropna(how="all", subset=self._processed_columns, inplace=True)
136
156
 
137
157
  def _init_time_series(
138
158
  self,
139
159
  loc: Union[pd.DataFrame, Point, list, str], # Station(s) or geo point
140
160
  start: datetime = None,
141
161
  end: datetime = None,
142
- model: bool = True, # Include model data?
143
- flags: bool = False, # Load source flags?
162
+ model=True, # Include model data?
163
+ flags=False, # Load source flags?
144
164
  ) -> None:
145
165
  """
146
166
  Common initialization for all time series, regardless
@@ -169,20 +189,32 @@ class TimeSeries(MeteoData):
169
189
  # Get data for all weather stations
170
190
  self._data = self._get_data()
171
191
 
172
- # Load source flags through map file
173
- # if flags are explicitly requested or
174
- # model data is excluded
175
- if flags or not model:
176
- flags = self._get_flags()
177
- self._data = self._data.merge(
178
- flags, on=["station", "time"], how="left", suffixes=[None, "_flag"]
179
- )
192
+ # Fill columns if they don't exist
193
+ for col in self._processed_columns:
194
+ if col not in self._data.columns:
195
+ self._data[col] = pd.NA
196
+ self._data[col] = self._data[col].astype("Float64")
197
+ self._data[f"{col}_flag"] = pd.NA
198
+ self._data[f"{col}_flag"] = self._data[f"{col}_flag"].astype("string")
180
199
 
181
- # Remove model data from DataFrame and
182
- # drop flags if not specified otherwise
200
+ # Reorder the DataFrame
201
+ self._data = self._data[
202
+ self._processed_columns + with_suffix(self._processed_columns, "_flag")
203
+ ]
204
+
205
+ # Remove model data from DataFrame
183
206
  if not model:
184
207
  self._filter_model()
185
208
 
209
+ # Conditionally, remove flags from DataFrame
210
+ if not self._flags:
211
+ self._data.drop(
212
+ with_suffix(self._processed_columns, "_flag"),
213
+ axis=1,
214
+ errors="ignore",
215
+ inplace=True,
216
+ )
217
+
186
218
  # Interpolate data spatially if requested
187
219
  # location is a geographical point
188
220
  if isinstance(loc, Point):
@@ -19,7 +19,6 @@ def aggregate(self, freq: str = None, spatial: bool = False):
19
19
  """
20
20
 
21
21
  if self.count() > 0 and not self._data.isnull().values.all():
22
-
23
22
  # Create temporal instance
24
23
  temp = copy(self)
25
24
 
@@ -21,7 +21,7 @@ def convert(self, units: dict):
21
21
 
22
22
  # Change data units
23
23
  for parameter, unit in units.items():
24
- if parameter in temp._columns:
24
+ if parameter in temp._processed_columns:
25
25
  temp._data[parameter] = temp._data[parameter].apply(unit)
26
26
 
27
27
  # Return class instance
@@ -9,6 +9,7 @@ The code is licensed under the MIT license.
9
9
  """
10
10
 
11
11
  from copy import copy
12
+ import numpy as np
12
13
  from meteostat.core.warn import warn
13
14
 
14
15
 
@@ -18,17 +19,26 @@ def interpolate(self, limit: int = 3):
18
19
  """
19
20
 
20
21
  if self.count() > 0 and not self._data.isnull().values.all():
21
-
22
22
  # Create temporal instance
23
23
  temp = copy(self)
24
24
 
25
+ # Convert to float64
26
+ temp._data = temp._data.astype("float64")
27
+
25
28
  # Apply interpolation
26
29
  temp._data = temp._data.groupby("station", group_keys=False).apply(
27
30
  lambda group: group.interpolate(
28
- method="linear", limit=limit, limit_direction="both", axis=0
31
+ method="linear",
32
+ limit=limit,
33
+ limit_direction="both",
34
+ axis=0,
35
+ fill_value=np.nan,
29
36
  )
30
37
  )
31
38
 
39
+ # Convert to original type
40
+ temp._data = temp._data.astype("Float64")
41
+
32
42
  # Return class instance
33
43
  return temp
34
44
 
@@ -27,9 +27,8 @@ def normalize(self):
27
27
  temp = copy(self)
28
28
 
29
29
  if temp._start and temp._end and temp.coverage() < 1:
30
-
31
30
  # Create result DataFrame
32
- result = pd.DataFrame(columns=temp._columns[temp._first_met_col :])
31
+ result = pd.DataFrame(columns=temp._processed_columns, dtype="Float64")
33
32
 
34
33
  # Handle tz-aware date ranges
35
34
  if hasattr(temp, "_timezone") and temp._timezone is not None:
@@ -43,7 +42,7 @@ def normalize(self):
43
42
  # Go through list of weather stations
44
43
  for station in temp._stations:
45
44
  # Create data frame
46
- df = pd.DataFrame(columns=temp._columns[temp._first_met_col :])
45
+ df = pd.DataFrame(columns=temp._processed_columns, dtype="Float64")
47
46
  # Add time series
48
47
  df["time"] = pd.date_range(
49
48
  start,
@@ -54,7 +53,7 @@ def normalize(self):
54
53
  # Add station ID
55
54
  df["station"] = station
56
55
  # Add columns
57
- for column in temp._columns[temp._first_met_col :]:
56
+ for column in temp._processed_columns:
58
57
  # Add column to DataFrame
59
58
  df[column] = nan
60
59
 
@@ -71,7 +70,7 @@ def normalize(self):
71
70
  )
72
71
 
73
72
  # None -> nan
74
- temp._data = temp._data.fillna(nan)
73
+ temp._data = temp._data.fillna(pd.NA)
75
74
 
76
75
  # Return class instance
77
76
  return temp
@@ -25,7 +25,7 @@ def generate_endpoint_path(
25
25
  # Base path
26
26
  path = f"{granularity.value}/"
27
27
 
28
- if granularity == Granularity.HOURLY and year:
28
+ if granularity in (Granularity.HOURLY, Granularity.DAILY) and year:
29
29
  path += f"{year}/"
30
30
 
31
31
  appendix = ".map" if map_file else ""