meteostat 1.6.8__py3-none-any.whl → 1.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
meteostat/__init__.py CHANGED
@@ -12,7 +12,7 @@ The code is licensed under the MIT license.
12
12
  """
13
13
 
14
14
  __appname__ = "meteostat"
15
- __version__ = "1.6.8"
15
+ __version__ = "1.7.1"
16
16
 
17
17
  from .interface.base import Base
18
18
  from .interface.timeseries import TimeSeries
@@ -22,3 +22,14 @@ from .interface.hourly import Hourly
22
22
  from .interface.daily import Daily
23
23
  from .interface.monthly import Monthly
24
24
  from .interface.normals import Normals
25
+
26
+ __all__ = [
27
+ "Base",
28
+ "TimeSeries",
29
+ "Stations",
30
+ "Point",
31
+ "Hourly",
32
+ "Daily",
33
+ "Monthly",
34
+ "Normals",
35
+ ]
meteostat/core/cache.py CHANGED
@@ -53,7 +53,6 @@ def clear_cache(cls, max_age: int = None) -> None:
53
53
  """
54
54
 
55
55
  if os.path.exists(cls.cache_dir + os.sep + cls.cache_subdir):
56
-
57
56
  # Set max_age
58
57
  if max_age is None:
59
58
  max_age = cls.max_age
@@ -63,7 +62,6 @@ def clear_cache(cls, max_age: int = None) -> None:
63
62
 
64
63
  # Go through all files
65
64
  for file in os.listdir(cls.cache_dir + os.sep + cls.cache_subdir):
66
-
67
65
  # Get full path
68
66
  path = os.path.join(cls.cache_dir + os.sep + cls.cache_subdir, file)
69
67
 
meteostat/core/loader.py CHANGED
@@ -8,16 +8,19 @@ under the terms of the Creative Commons Attribution-NonCommercial
8
8
  The code is licensed under the MIT license.
9
9
  """
10
10
 
11
+ from io import BytesIO
12
+ from gzip import GzipFile
13
+ from urllib.request import Request, ProxyHandler, build_opener
11
14
  from urllib.error import HTTPError
12
15
  from multiprocessing import Pool
13
16
  from multiprocessing.pool import ThreadPool
14
- from typing import Callable, Union
17
+ from typing import Callable, List, Optional
15
18
  import pandas as pd
16
19
  from meteostat.core.warn import warn
17
20
 
18
21
 
19
22
  def processing_handler(
20
- datasets: list, load: Callable[[dict], None], cores: int, threads: int
23
+ datasets: List, load: Callable[[dict], None], cores: int, threads: int
21
24
  ) -> None:
22
25
  """
23
26
  Load multiple datasets (simultaneously)
@@ -28,10 +31,8 @@ def processing_handler(
28
31
 
29
32
  # Multi-core processing
30
33
  if cores > 1 and len(datasets) > 1:
31
-
32
34
  # Create process pool
33
35
  with Pool(cores) as pool:
34
-
35
36
  # Process datasets in pool
36
37
  output = pool.starmap(load, datasets)
37
38
 
@@ -41,10 +42,8 @@ def processing_handler(
41
42
 
42
43
  # Multi-thread processing
43
44
  elif threads > 1 and len(datasets) > 1:
44
-
45
45
  # Create process pool
46
46
  with ThreadPool(threads) as pool:
47
-
48
47
  # Process datasets in pool
49
48
  output = pool.starmap(load, datasets)
50
49
 
@@ -54,12 +53,11 @@ def processing_handler(
54
53
 
55
54
  # Single-thread processing
56
55
  else:
57
-
58
56
  for dataset in datasets:
59
57
  output.append(load(*dataset))
60
58
 
61
59
  # Remove empty DataFrames
62
- filtered = list(filter(lambda df: df.index.size > 0, output))
60
+ filtered = list(filter(lambda df: not df.empty, output))
63
61
 
64
62
  return pd.concat(filtered) if len(filtered) > 0 else output[0]
65
63
 
@@ -67,36 +65,36 @@ def processing_handler(
67
65
  def load_handler(
68
66
  endpoint: str,
69
67
  path: str,
70
- columns: list,
71
- types: Union[dict, None],
72
- parse_dates: list,
73
- coerce_dates: bool = False,
68
+ proxy: Optional[str] = None,
69
+ names: Optional[List] = None,
70
+ dtype: Optional[dict] = None,
71
+ parse_dates: Optional[List] = None,
72
+ default_df: Optional[pd.DataFrame] = None,
74
73
  ) -> pd.DataFrame:
75
74
  """
76
75
  Load a single CSV file into a DataFrame
77
76
  """
78
77
 
79
78
  try:
79
+ handlers = []
80
+
81
+ # Set a proxy
82
+ if proxy:
83
+ handlers.append(ProxyHandler({"http": proxy, "https": proxy}))
80
84
 
81
85
  # Read CSV file from Meteostat endpoint
82
- df = pd.read_csv(
83
- endpoint + path,
84
- compression="gzip",
85
- names=columns,
86
- dtype=types,
87
- parse_dates=parse_dates,
88
- )
89
-
90
- # Force datetime conversion
91
- if coerce_dates:
92
- df.iloc[:, parse_dates] = df.iloc[:, parse_dates].apply(
93
- pd.to_datetime, errors="coerce"
94
- )
86
+ with build_opener(*handlers).open(Request(endpoint + path)) as response:
87
+ # Decompress the content
88
+ with GzipFile(fileobj=BytesIO(response.read()), mode="rb") as file:
89
+ df = pd.read_csv(
90
+ file,
91
+ names=names,
92
+ dtype=dtype,
93
+ parse_dates=parse_dates,
94
+ )
95
95
 
96
96
  except (FileNotFoundError, HTTPError):
97
-
98
- # Create empty DataFrane
99
- df = pd.DataFrame(columns=[*types])
97
+ df = default_df if default_df is not None else pd.DataFrame(columns=names)
100
98
 
101
99
  # Display warning
102
100
  warn(f"Cannot load {path} from {endpoint}")
meteostat/core/warn.py CHANGED
@@ -16,7 +16,7 @@ def _format(message, category, _filename, _lineno, _line=None) -> str:
16
16
  Print warning on a single line
17
17
  """
18
18
 
19
- return "%s: %s\n" % (category.__name__, message)
19
+ return f"{category.__name__}: {message}\n"
20
20
 
21
21
 
22
22
  # Set warning format
@@ -9,28 +9,31 @@ The code is licensed under the MIT license.
9
9
  """
10
10
 
11
11
  import os
12
+ from typing import Optional
12
13
 
13
14
 
14
15
  class Base:
15
-
16
16
  """
17
17
  Base class that provides features which are used across the package
18
18
  """
19
19
 
20
20
  # Base URL of the Meteostat bulk data interface
21
- endpoint: str = "https://bulk.meteostat.net/v2/"
21
+ endpoint = "https://bulk.meteostat.net/v2/"
22
+
23
+ # Proxy URL for the Meteostat (bulk) data interface
24
+ proxy: Optional[str] = None
22
25
 
23
26
  # Location of the cache directory
24
- cache_dir: str = os.path.expanduser("~") + os.sep + ".meteostat" + os.sep + "cache"
27
+ cache_dir = os.path.expanduser("~") + os.sep + ".meteostat" + os.sep + "cache"
25
28
 
26
29
  # Auto clean cache directories?
27
- autoclean: bool = True
30
+ autoclean = True
28
31
 
29
32
  # Maximum age of a cached file in seconds
30
- max_age: int = 24 * 60 * 60
33
+ max_age = 24 * 60 * 60
31
34
 
32
35
  # Number of processes used for processing files
33
- processes: int = 1
36
+ processes = 1
34
37
 
35
38
  # Number of threads used for processing files
36
- threads: int = 1
39
+ threads = 1
@@ -8,7 +8,7 @@ under the terms of the Creative Commons Attribution-NonCommercial
8
8
  The code is licensed under the MIT license.
9
9
  """
10
10
 
11
- from datetime import datetime
11
+ from datetime import datetime, timedelta
12
12
  from typing import Union
13
13
  import pandas as pd
14
14
  from meteostat.enumerations.granularity import Granularity
@@ -18,33 +18,54 @@ from meteostat.interface.point import Point
18
18
 
19
19
 
20
20
  class Daily(TimeSeries):
21
-
22
21
  """
23
22
  Retrieve daily weather observations for one or multiple weather stations or
24
23
  a single geographical point
25
24
  """
26
25
 
27
26
  # The cache subdirectory
28
- cache_subdir: str = "daily"
27
+ cache_subdir = "daily"
29
28
 
30
29
  # Granularity
31
30
  granularity = Granularity.DAILY
32
31
 
32
+ # Download data as annual chunks
33
+ # This cannot be changed and is only kept for backward compatibility
34
+ chunked = True
35
+
33
36
  # Default frequency
34
- _freq: str = "1D"
37
+ _freq = "1D"
38
+
39
+ # Source mappings
40
+ _source_mappings = {
41
+ "dwd_daily": "A",
42
+ "eccc_daily": "A",
43
+ "ghcnd": "B",
44
+ "dwd_hourly": "C",
45
+ "eccc_hourly": "C",
46
+ "isd_lite": "D",
47
+ "synop": "E",
48
+ "dwd_poi": "E",
49
+ "metar": "F",
50
+ "model": "G",
51
+ "dwd_mosmix": "G",
52
+ "metno_forecast": "G",
53
+ }
35
54
 
36
55
  # Flag which represents model data
37
56
  _model_flag = "G"
38
57
 
39
58
  # Columns
40
- _columns: list = [
41
- "date",
42
- "tavg",
59
+ _columns = [
60
+ "year",
61
+ "month",
62
+ "day",
63
+ {"tavg": "temp"},
43
64
  "tmin",
44
65
  "tmax",
45
66
  "prcp",
46
- "snow",
47
- "wdir",
67
+ {"snow": "snwd"},
68
+ {"wdir": None},
48
69
  "wspd",
49
70
  "wpgt",
50
71
  "pres",
@@ -52,27 +73,13 @@ class Daily(TimeSeries):
52
73
  ]
53
74
 
54
75
  # Index of first meteorological column
55
- _first_met_col = 1
56
-
57
- # Data types
58
- _types: dict = {
59
- "tavg": "float64",
60
- "tmin": "float64",
61
- "tmax": "float64",
62
- "prcp": "float64",
63
- "snow": "float64",
64
- "wdir": "float64",
65
- "wspd": "float64",
66
- "wpgt": "float64",
67
- "pres": "float64",
68
- "tsun": "float64",
69
- }
76
+ _first_met_col = 3
70
77
 
71
78
  # Columns for date parsing
72
- _parse_dates: dict = {"time": [0]}
79
+ _parse_dates = ["year", "month", "day"]
73
80
 
74
81
  # Default aggregation functions
75
- aggregations: dict = {
82
+ aggregations = {
76
83
  "tavg": "mean",
77
84
  "tmin": "min",
78
85
  "tmax": "max",
@@ -88,12 +95,18 @@ class Daily(TimeSeries):
88
95
  def __init__(
89
96
  self,
90
97
  loc: Union[pd.DataFrame, Point, list, str], # Station(s) or geo point
91
- start: datetime = None,
92
- end: datetime = None,
93
- model: bool = True, # Include model data?
94
- flags: bool = False, # Load source flags?
98
+ start=datetime(1781, 1, 1, 0, 0, 0),
99
+ end=datetime.combine(
100
+ datetime.today().date() + timedelta(days=10), datetime.max.time()
101
+ ),
102
+ model=True, # Include model data?
103
+ flags=False, # Load source flags?
95
104
  ) -> None:
96
-
105
+ # Extract relevant years
106
+ if self.chunked:
107
+ self._annual_steps = [
108
+ start.year + i for i in range(end.year - start.year + 1)
109
+ ]
97
110
  # Initialize time series
98
111
  self._init_time_series(loc, start, end, model, flags)
99
112
 
@@ -9,50 +9,66 @@ The code is licensed under the MIT license.
9
9
  """
10
10
 
11
11
  from math import floor
12
- from datetime import datetime
13
- from typing import Union
12
+ from datetime import datetime, timedelta
13
+ from typing import Optional, Union
14
14
  import pytz
15
15
  import pandas as pd
16
16
  from meteostat.enumerations.granularity import Granularity
17
17
  from meteostat.utilities.aggregations import degree_mean
18
18
  from meteostat.interface.timeseries import TimeSeries
19
19
  from meteostat.interface.point import Point
20
+ from meteostat.utilities.mutations import calculate_dwpt
20
21
 
21
22
 
22
23
  class Hourly(TimeSeries):
23
-
24
24
  """
25
25
  Retrieve hourly weather observations for one or multiple weather stations or
26
26
  a single geographical point
27
27
  """
28
28
 
29
29
  # The cache subdirectory
30
- cache_subdir: str = "hourly"
30
+ cache_subdir = "hourly"
31
31
 
32
32
  # Granularity
33
33
  granularity = Granularity.HOURLY
34
34
 
35
35
  # Download data as annual chunks
36
- chunked: bool = True
36
+ # This cannot be changed and is only kept for backward compatibility
37
+ chunked = True
37
38
 
38
39
  # The time zone
39
- _timezone: str = None
40
+ _timezone: Optional[str] = None
40
41
 
41
42
  # Default frequency
42
- _freq: str = "1H"
43
+ _freq = "1h"
44
+
45
+ # Source mappings
46
+ _source_mappings = {
47
+ "metar": "D",
48
+ "model": "E",
49
+ "isd_lite": "B",
50
+ "synop": "C",
51
+ "dwd_poi": "C",
52
+ "dwd_hourly": "A",
53
+ "dwd_mosmix": "E",
54
+ "metno_forecast": "E",
55
+ "eccc_hourly": "A",
56
+ }
43
57
 
44
58
  # Flag which represents model data
45
59
  _model_flag = "E"
46
60
 
47
61
  # Raw data columns
48
- _columns: list = [
49
- "date",
62
+ _columns = [
63
+ "year",
64
+ "month",
65
+ "day",
50
66
  "hour",
51
67
  "temp",
52
- "dwpt",
68
+ {"dwpt": calculate_dwpt},
53
69
  "rhum",
54
70
  "prcp",
55
- "snow",
71
+ {"snow": "snwd"},
56
72
  "wdir",
57
73
  "wspd",
58
74
  "wpgt",
@@ -62,28 +78,13 @@ class Hourly(TimeSeries):
62
78
  ]
63
79
 
64
80
  # Index of first meteorological column
65
- _first_met_col = 2
66
-
67
- # Data types
68
- _types: dict = {
69
- "temp": "float64",
70
- "dwpt": "float64",
71
- "rhum": "float64",
72
- "prcp": "float64",
73
- "snow": "float64",
74
- "wdir": "float64",
75
- "wspd": "float64",
76
- "wpgt": "float64",
77
- "pres": "float64",
78
- "tsun": "float64",
79
- "coco": "float64",
80
- }
81
+ _first_met_col = 4
81
82
 
82
83
  # Columns for date parsing
83
- _parse_dates: dict = {"time": [0, 1]}
84
+ _parse_dates = ["year", "month", "day", "hour"]
84
85
 
85
86
  # Default aggregation functions
86
- aggregations: dict = {
87
+ aggregations = {
87
88
  "temp": "mean",
88
89
  "dwpt": "mean",
89
90
  "rhum": "mean",
@@ -98,22 +99,19 @@ class Hourly(TimeSeries):
98
99
  }
99
100
 
100
101
  def _set_time(
101
- self, start: datetime = None, end: datetime = None, timezone: str = None
102
+ self,
103
+ start: Optional[datetime] = None,
104
+ end: Optional[datetime] = None,
105
+ timezone: Optional[str] = None,
102
106
  ) -> None:
103
107
  """
104
108
  Set & adapt the period's time zone
105
109
  """
106
-
107
- # Don't use chunks if full dataset is requested
108
- if start == None:
109
- self.chunked = False
110
-
111
110
  if timezone:
112
111
  # Save timezone
113
112
  self._timezone = timezone
114
113
 
115
114
  if start and end:
116
-
117
115
  # Initialize time zone
118
116
  timezone = pytz.timezone(self._timezone)
119
117
 
@@ -124,7 +122,9 @@ class Hourly(TimeSeries):
124
122
  end = timezone.localize(end, is_dst=None).astimezone(pytz.utc)
125
123
 
126
124
  if self.chunked:
127
- self._annual_steps = [start.year + i for i in range(end.year - start.year + 1)]
125
+ self._annual_steps = [
126
+ start.year + i for i in range(end.year - start.year + 1)
127
+ ]
128
128
 
129
129
  self._start = start
130
130
  self._end = end
@@ -132,13 +132,14 @@ class Hourly(TimeSeries):
132
132
  def __init__(
133
133
  self,
134
134
  loc: Union[pd.DataFrame, Point, list, str], # Station(s) or geo point
135
- start: datetime = None,
136
- end: datetime = None,
137
- timezone: str = None,
138
- model: bool = True, # Include model data?
139
- flags: bool = False, # Load source flags?
135
+ start=datetime(1890, 1, 1, 0, 0, 0),
136
+ end=datetime.combine(
137
+ datetime.today().date() + timedelta(days=10), datetime.max.time()
138
+ ),
139
+ timezone: Optional[str] = None,
140
+ model=True, # Include model data?
141
+ flags=False, # Load source flags?
140
142
  ) -> None:
141
-
142
143
  # Set time zone and adapt period
143
144
  self._set_time(start, end, timezone)
144
145
 
@@ -11,20 +11,17 @@ under the terms of the Creative Commons Attribution-NonCommercial
11
11
  The code is licensed under the MIT license.
12
12
  """
13
13
 
14
- from typing import Union
14
+ from collections.abc import Callable
15
+ from typing import Dict, List, Union
15
16
  import pandas as pd
16
17
  from meteostat.enumerations.granularity import Granularity
17
- from meteostat.core.cache import get_local_file_path, file_in_cache
18
- from meteostat.core.loader import processing_handler, load_handler
19
- from meteostat.utilities.mutations import localize, filter_time, adjust_temp
20
- from meteostat.utilities.validations import validate_series
18
+ from meteostat.core.loader import processing_handler
19
+ from meteostat.utilities.mutations import adjust_temp
21
20
  from meteostat.utilities.aggregations import weighted_average
22
- from meteostat.utilities.endpoint import generate_endpoint_path
23
21
  from meteostat.interface.base import Base
24
22
 
25
23
 
26
24
  class MeteoData(Base):
27
-
28
25
  """
29
26
  A parent class for both time series and
30
27
  climate normals data
@@ -36,73 +33,66 @@ class MeteoData(Base):
36
33
  # The data frame
37
34
  _data: pd.DataFrame = pd.DataFrame()
38
35
 
39
- def _load_data(self, station: str, year: Union[int, None] = None) -> None:
36
+ @property
37
+ def _raw_columns(self) -> List[str]:
40
38
  """
41
- Load file for a single station from Meteostat
39
+ Get the list of raw data columns, excluding any dicts with callable values
42
40
  """
43
-
44
- # File name
45
- file = generate_endpoint_path(self.granularity, station, year)
46
-
47
- # Get local file path
48
- path = get_local_file_path(self.cache_dir, self.cache_subdir, file)
49
-
50
- # Check if file in cache
51
- if self.max_age > 0 and file_in_cache(path, self.max_age):
52
-
53
- # Read cached data
54
- df = pd.read_pickle(path)
55
-
56
- else:
57
-
58
- # Get data from Meteostat
59
- df = load_handler(
60
- self.endpoint, file, self._columns, self._types, self._parse_dates
41
+ return [
42
+ list(col.values())[0] if isinstance(col, dict) else col
43
+ for col in self._columns
44
+ if not (
45
+ isinstance(col, dict)
46
+ and (
47
+ isinstance(list(col.values())[0], Callable)
48
+ or list(col.values())[0] is None
49
+ )
61
50
  )
51
+ ]
62
52
 
63
- # Validate and prepare data for further processing
64
- if self.granularity == Granularity.NORMALS and df.index.size > 0:
65
- # Add weather station ID
66
- # pylint: disable=unsupported-assignment-operation
67
- df["station"] = station
68
-
69
- # Set index
70
- df = df.set_index(["station", "start", "end", "month"])
53
+ @property
54
+ def _processed_columns(self) -> List[str]:
55
+ """
56
+ Get the list of processed data columns, excluding any dicts with callable values
57
+ """
58
+ return [
59
+ list(col.keys())[0] if isinstance(col, dict) else col
60
+ for col in self._columns[self._first_met_col :]
61
+ ]
71
62
 
72
- else:
73
- df = validate_series(df, station)
74
-
75
- # Save as Pickle
76
- if self.max_age > 0:
77
- df.to_pickle(path)
78
-
79
- # Localize time column
80
- if (
81
- self.granularity == Granularity.HOURLY
82
- and self._timezone is not None
83
- and len(df.index) > 0
84
- ):
85
- df = localize(df, self._timezone)
86
-
87
- # Filter time period and append to DataFrame
88
- # pylint: disable=no-else-return
89
- if self.granularity == Granularity.NORMALS and df.index.size > 0 and self._end:
90
- # Get time index
91
- end = df.index.get_level_values("end")
92
- # Filter & return
93
- return df.loc[end == self._end]
94
- elif not self.granularity == Granularity.NORMALS:
95
- df = filter_time(df, self._start, self._end)
96
-
97
- # Return
98
- return df
63
+ @property
64
+ def _renamed_columns(self) -> Dict[str, str]:
65
+ """
66
+ Get the dict of renamed data columns, including `_source` suffixes
67
+ """
68
+ return {
69
+ new_key: new_val
70
+ for d in self._columns
71
+ if isinstance(d, dict)
72
+ for k, v in d.items()
73
+ if not isinstance(v, Callable)
74
+ for new_key, new_val in ((v, k), (f"{v}_source", f"{k}_source"))
75
+ }
76
+
77
+ @property
78
+ def _virtual_columns(self) -> Dict[str, str]:
79
+ """
80
+ Get the dict of virtual data columns
81
+ """
82
+ return {
83
+ k: v
84
+ for d in self._columns
85
+ if isinstance(d, dict)
86
+ for k, v in d.items()
87
+ if isinstance(v, Callable)
88
+ }
99
89
 
100
90
  def _get_datasets(self) -> list:
101
91
  """
102
92
  Get list of datasets
103
93
  """
104
94
 
105
- if self.granularity == Granularity.HOURLY and self.chunked:
95
+ if self.granularity in (Granularity.HOURLY, Granularity.DAILY):
106
96
  datasets = [
107
97
  (str(station), year)
108
98
  for station in self._stations
@@ -119,7 +109,6 @@ class MeteoData(Base):
119
109
  """
120
110
 
121
111
  if len(self._stations) > 0:
122
-
123
112
  # Get list of datasets
124
113
  datasets = self._get_datasets()
125
114
 
@@ -140,12 +129,10 @@ class MeteoData(Base):
140
129
  """
141
130
 
142
131
  if self._stations.size == 0 or self._data.size == 0:
143
- return None
132
+ return
144
133
 
145
134
  if method == "nearest":
146
-
147
135
  if adapt_temp:
148
-
149
136
  # Join elevation of involved weather stations
150
137
  data = self._data.join(stations["elevation"], on="station")
151
138
 
@@ -156,7 +143,6 @@ class MeteoData(Base):
156
143
  data = data.drop("elevation", axis=1).round(1)
157
144
 
158
145
  else:
159
-
160
146
  data = self._data
161
147
 
162
148
  if self.granularity == Granularity.NORMALS:
@@ -168,7 +154,6 @@ class MeteoData(Base):
168
154
  ).agg("first")
169
155
 
170
156
  else:
171
-
172
157
  # Join score and elevation of involved weather stations
173
158
  data = self._data.join(stations[["score", "elevation"]], on="station")
174
159