PyPI - meteostat - Versions diffs - 1.6.8__py3-none-any.whl → 1.7.1__py3-none-any.whl - Mend

meteostat 1.6.8py3-none-any.whl → 1.7.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

meteostat/__init__.py +12 -1
meteostat/core/cache.py +0 -2
meteostat/core/loader.py +26 -28
meteostat/core/warn.py +1 -1
meteostat/interface/base.py +10 -7
meteostat/interface/daily.py +44 -31
meteostat/interface/hourly.py +44 -43
meteostat/interface/meteodata.py +54 -69
meteostat/interface/monthly.py +24 -19
meteostat/interface/normals.py +59 -19
meteostat/interface/point.py +1 -4
meteostat/interface/stations.py +9 -8
meteostat/interface/timeseries.py +97 -65
meteostat/series/aggregate.py +0 -1
meteostat/series/convert.py +1 -1
meteostat/series/interpolate.py +12 -2
meteostat/series/normalize.py +4 -5
meteostat/utilities/endpoint.py +1 -1
meteostat/utilities/helpers.py +38 -0
meteostat/utilities/mutations.py +10 -0
{meteostat-1.6.8.dist-info → meteostat-1.7.1.dist-info}/METADATA +4 -4
meteostat-1.7.1.dist-info/RECORD +39 -0
{meteostat-1.6.8.dist-info → meteostat-1.7.1.dist-info}/WHEEL +1 -1
meteostat-1.6.8.dist-info/RECORD +0 -39
{meteostat-1.6.8.dist-info → meteostat-1.7.1.dist-info}/LICENSE +0 -0
{meteostat-1.6.8.dist-info → meteostat-1.7.1.dist-info}/top_level.txt +0 -0

meteostat/__init__.py CHANGED Viewed

@@ -12,7 +12,7 @@ The code is licensed under the MIT license.
 """
 __appname__ = "meteostat"
-__version__ = "1.6.8"
+__version__ = "1.7.1"
 from .interface.base import Base
 from .interface.timeseries import TimeSeries
@@ -22,3 +22,14 @@ from .interface.hourly import Hourly
 from .interface.daily import Daily
 from .interface.monthly import Monthly
 from .interface.normals import Normals
+__all__ = [
+    "Base",
+    "TimeSeries",
+    "Stations",
+    "Point",
+    "Hourly",
+    "Daily",
+    "Monthly",
+    "Normals",
+]

meteostat/core/cache.py CHANGED Viewed

@@ -53,7 +53,6 @@ def clear_cache(cls, max_age: int = None) -> None:
     """
     if os.path.exists(cls.cache_dir + os.sep + cls.cache_subdir):
         # Set max_age
         if max_age is None:
             max_age = cls.max_age
@@ -63,7 +62,6 @@ def clear_cache(cls, max_age: int = None) -> None:
         # Go through all files
         for file in os.listdir(cls.cache_dir + os.sep + cls.cache_subdir):
             # Get full path
             path = os.path.join(cls.cache_dir + os.sep + cls.cache_subdir, file)

meteostat/core/loader.py CHANGED Viewed

@@ -8,16 +8,19 @@ under the terms of the Creative Commons Attribution-NonCommercial
 The code is licensed under the MIT license.
 """
+from io import BytesIO
+from gzip import GzipFile
+from urllib.request import Request, ProxyHandler, build_opener
 from urllib.error import HTTPError
 from multiprocessing import Pool
 from multiprocessing.pool import ThreadPool
-from typing import Callable, Union
+from typing import Callable, List, Optional
 import pandas as pd
 from meteostat.core.warn import warn
 def processing_handler(
-    datasets: list, load: Callable[[dict], None], cores: int, threads: int
+    datasets: List, load: Callable[[dict], None], cores: int, threads: int
 ) -> None:
     """
     Load multiple datasets (simultaneously)
@@ -28,10 +31,8 @@ def processing_handler(
     # Multi-core processing
     if cores > 1 and len(datasets) > 1:
         # Create process pool
         with Pool(cores) as pool:
             # Process datasets in pool
             output = pool.starmap(load, datasets)
@@ -41,10 +42,8 @@ def processing_handler(
     # Multi-thread processing
     elif threads > 1 and len(datasets) > 1:
         # Create process pool
         with ThreadPool(threads) as pool:
             # Process datasets in pool
             output = pool.starmap(load, datasets)
@@ -54,12 +53,11 @@ def processing_handler(
     # Single-thread processing
     else:
         for dataset in datasets:
             output.append(load(*dataset))
     # Remove empty DataFrames
-    filtered = list(filter(lambda df: df.index.size > 0, output))
+    filtered = list(filter(lambda df: not df.empty, output))
     return pd.concat(filtered) if len(filtered) > 0 else output[0]
@@ -67,36 +65,36 @@ def processing_handler(
 def load_handler(
     endpoint: str,
     path: str,
-    columns: list,
-    types: Union[dict, None],
-    parse_dates: list,
-    coerce_dates: bool = False,
+    proxy: Optional[str] = None,
+    names: Optional[List] = None,
+    dtype: Optional[dict] = None,
+    parse_dates: Optional[List] = None,
+    default_df: Optional[pd.DataFrame] = None,
 ) -> pd.DataFrame:
     """
     Load a single CSV file into a DataFrame
     """
     try:
+        handlers = []
+        # Set a proxy
+        if proxy:
+            handlers.append(ProxyHandler({"http": proxy, "https": proxy}))
         # Read CSV file from Meteostat endpoint
-        df = pd.read_csv(
-            endpoint + path,
-            compression="gzip",
-            names=columns,
-            dtype=types,
-            parse_dates=parse_dates,
-        )
-        # Force datetime conversion
-        if coerce_dates:
-            df.iloc[:, parse_dates] = df.iloc[:, parse_dates].apply(
-                pd.to_datetime, errors="coerce"
-            )
+        with build_opener(*handlers).open(Request(endpoint + path)) as response:
+            # Decompress the content
+            with GzipFile(fileobj=BytesIO(response.read()), mode="rb") as file:
+                df = pd.read_csv(
+                    file,
+                    names=names,
+                    dtype=dtype,
+                    parse_dates=parse_dates,
+                )
     except (FileNotFoundError, HTTPError):
-        # Create empty DataFrane
-        df = pd.DataFrame(columns=[*types])
+        df = default_df if default_df is not None else pd.DataFrame(columns=names)
         # Display warning
         warn(f"Cannot load {path} from {endpoint}")

meteostat/core/warn.py CHANGED Viewed

@@ -16,7 +16,7 @@ def _format(message, category, _filename, _lineno, _line=None) -> str:
     Print warning on a single line
     """
-    return "%s: %s\n" % (category.__name__, message)
+    return f"{category.__name__}: {message}\n"
 # Set warning format

meteostat/interface/base.py CHANGED Viewed

@@ -9,28 +9,31 @@ The code is licensed under the MIT license.
 """
 import os
+from typing import Optional
 class Base:
     """
     Base class that provides features which are used across the package
     """
     # Base URL of the Meteostat bulk data interface
-    endpoint: str = "https://bulk.meteostat.net/v2/"
+    endpoint = "https://bulk.meteostat.net/v2/"
+    # Proxy URL for the Meteostat (bulk) data interface
+    proxy: Optional[str] = None
     # Location of the cache directory
-    cache_dir: str = os.path.expanduser("~") + os.sep + ".meteostat" + os.sep + "cache"
+    cache_dir = os.path.expanduser("~") + os.sep + ".meteostat" + os.sep + "cache"
     # Auto clean cache directories?
-    autoclean: bool = True
+    autoclean = True
     # Maximum age of a cached file in seconds
-    max_age: int = 24 * 60 * 60
+    max_age = 24 * 60 * 60
     # Number of processes used for processing files
-    processes: int = 1
+    processes = 1
     # Number of threads used for processing files
-    threads: int = 1
+    threads = 1

meteostat/interface/daily.py CHANGED Viewed

@@ -8,7 +8,7 @@ under the terms of the Creative Commons Attribution-NonCommercial
 The code is licensed under the MIT license.
 """
-from datetime import datetime
+from datetime import datetime, timedelta
 from typing import Union
 import pandas as pd
 from meteostat.enumerations.granularity import Granularity
@@ -18,33 +18,54 @@ from meteostat.interface.point import Point
 class Daily(TimeSeries):
     """
     Retrieve daily weather observations for one or multiple weather stations or
     a single geographical point
     """
     # The cache subdirectory
-    cache_subdir: str = "daily"
+    cache_subdir = "daily"
     # Granularity
     granularity = Granularity.DAILY
+    # Download data as annual chunks
+    # This cannot be changed and is only kept for backward compatibility
+    chunked = True
     # Default frequency
-    _freq: str = "1D"
+    _freq = "1D"
+    # Source mappings
+    _source_mappings = {
+        "dwd_daily": "A",
+        "eccc_daily": "A",
+        "ghcnd": "B",
+        "dwd_hourly": "C",
+        "eccc_hourly": "C",
+        "isd_lite": "D",
+        "synop": "E",
+        "dwd_poi": "E",
+        "metar": "F",
+        "model": "G",
+        "dwd_mosmix": "G",
+        "metno_forecast": "G",
+    }
     # Flag which represents model data
     _model_flag = "G"
     # Columns
-    _columns: list = [
-        "date",
-        "tavg",
+    _columns = [
+        "year",
+        "month",
+        "day",
+        {"tavg": "temp"},
         "tmin",
         "tmax",
         "prcp",
-        "snow",
-        "wdir",
+        {"snow": "snwd"},
+        {"wdir": None},
         "wspd",
         "wpgt",
         "pres",
@@ -52,27 +73,13 @@ class Daily(TimeSeries):
     ]
     # Index of first meteorological column
-    _first_met_col = 1
-    # Data types
-    _types: dict = {
-        "tavg": "float64",
-        "tmin": "float64",
-        "tmax": "float64",
-        "prcp": "float64",
-        "snow": "float64",
-        "wdir": "float64",
-        "wspd": "float64",
-        "wpgt": "float64",
-        "pres": "float64",
-        "tsun": "float64",
-    }
+    _first_met_col = 3
     # Columns for date parsing
-    _parse_dates: dict = {"time": [0]}
+    _parse_dates = ["year", "month", "day"]
     # Default aggregation functions
-    aggregations: dict = {
+    aggregations = {
         "tavg": "mean",
         "tmin": "min",
         "tmax": "max",
@@ -88,12 +95,18 @@ class Daily(TimeSeries):
     def __init__(
         self,
         loc: Union[pd.DataFrame, Point, list, str],  # Station(s) or geo point
-        start: datetime = None,
-        end: datetime = None,
-        model: bool = True,  # Include model data?
-        flags: bool = False,  # Load source flags?
+        start=datetime(1781, 1, 1, 0, 0, 0),
+        end=datetime.combine(
+            datetime.today().date() + timedelta(days=10), datetime.max.time()
+        ),
+        model=True,  # Include model data?
+        flags=False,  # Load source flags?
     ) -> None:
+        # Extract relevant years
+        if self.chunked:
+            self._annual_steps = [
+                start.year + i for i in range(end.year - start.year + 1)
+            ]
         # Initialize time series
         self._init_time_series(loc, start, end, model, flags)

meteostat/interface/hourly.py CHANGED Viewed

@@ -9,50 +9,66 @@ The code is licensed under the MIT license.
 """
 from math import floor
-from datetime import datetime
-from typing import Union
+from datetime import datetime, timedelta
+from typing import Optional, Union
 import pytz
 import pandas as pd
 from meteostat.enumerations.granularity import Granularity
 from meteostat.utilities.aggregations import degree_mean
 from meteostat.interface.timeseries import TimeSeries
 from meteostat.interface.point import Point
+from meteostat.utilities.mutations import calculate_dwpt
 class Hourly(TimeSeries):
     """
     Retrieve hourly weather observations for one or multiple weather stations or
     a single geographical point
     """
     # The cache subdirectory
-    cache_subdir: str = "hourly"
+    cache_subdir = "hourly"
     # Granularity
     granularity = Granularity.HOURLY
     # Download data as annual chunks
-    chunked: bool = True
+    # This cannot be changed and is only kept for backward compatibility
+    chunked = True
     # The time zone
-    _timezone: str = None
+    _timezone: Optional[str] = None
     # Default frequency
-    _freq: str = "1H"
+    _freq = "1h"
+    # Source mappings
+    _source_mappings = {
+        "metar": "D",
+        "model": "E",
+        "isd_lite": "B",
+        "synop": "C",
+        "dwd_poi": "C",
+        "dwd_hourly": "A",
+        "dwd_mosmix": "E",
+        "metno_forecast": "E",
+        "eccc_hourly": "A",
+    }
     # Flag which represents model data
     _model_flag = "E"
     # Raw data columns
-    _columns: list = [
-        "date",
+    _columns = [
+        "year",
+        "month",
+        "day",
         "hour",
         "temp",
-        "dwpt",
+        {"dwpt": calculate_dwpt},
         "rhum",
         "prcp",
-        "snow",
+        {"snow": "snwd"},
         "wdir",
         "wspd",
         "wpgt",
@@ -62,28 +78,13 @@ class Hourly(TimeSeries):
     ]
     # Index of first meteorological column
-    _first_met_col = 2
-    # Data types
-    _types: dict = {
-        "temp": "float64",
-        "dwpt": "float64",
-        "rhum": "float64",
-        "prcp": "float64",
-        "snow": "float64",
-        "wdir": "float64",
-        "wspd": "float64",
-        "wpgt": "float64",
-        "pres": "float64",
-        "tsun": "float64",
-        "coco": "float64",
-    }
+    _first_met_col = 4
     # Columns for date parsing
-    _parse_dates: dict = {"time": [0, 1]}
+    _parse_dates = ["year", "month", "day", "hour"]
     # Default aggregation functions
-    aggregations: dict = {
+    aggregations = {
         "temp": "mean",
         "dwpt": "mean",
         "rhum": "mean",
@@ -98,22 +99,19 @@ class Hourly(TimeSeries):
     }
     def _set_time(
-        self, start: datetime = None, end: datetime = None, timezone: str = None
+        self,
+        start: Optional[datetime] = None,
+        end: Optional[datetime] = None,
+        timezone: Optional[str] = None,
     ) -> None:
         """
         Set & adapt the period's time zone
         """
-        # Don't use chunks if full dataset is requested
-        if start == None:
-            self.chunked = False
         if timezone:
             # Save timezone
             self._timezone = timezone
             if start and end:
                 # Initialize time zone
                 timezone = pytz.timezone(self._timezone)
@@ -124,7 +122,9 @@ class Hourly(TimeSeries):
                 end = timezone.localize(end, is_dst=None).astimezone(pytz.utc)
         if self.chunked:
-            self._annual_steps = [start.year + i for i in range(end.year - start.year + 1)]
+            self._annual_steps = [
+                start.year + i for i in range(end.year - start.year + 1)
+            ]
         self._start = start
         self._end = end
@@ -132,13 +132,14 @@ class Hourly(TimeSeries):
     def __init__(
         self,
         loc: Union[pd.DataFrame, Point, list, str],  # Station(s) or geo point
-        start: datetime = None,
-        end: datetime = None,
-        timezone: str = None,
-        model: bool = True,  # Include model data?
-        flags: bool = False,  # Load source flags?
+        start=datetime(1890, 1, 1, 0, 0, 0),
+        end=datetime.combine(
+            datetime.today().date() + timedelta(days=10), datetime.max.time()
+        ),
+        timezone: Optional[str] = None,
+        model=True,  # Include model data?
+        flags=False,  # Load source flags?
     ) -> None:
         # Set time zone and adapt period
         self._set_time(start, end, timezone)

meteostat/interface/meteodata.py CHANGED Viewed

@@ -11,20 +11,17 @@ under the terms of the Creative Commons Attribution-NonCommercial
 The code is licensed under the MIT license.
 """
-from typing import Union
+from collections.abc import Callable
+from typing import Dict, List, Union
 import pandas as pd
 from meteostat.enumerations.granularity import Granularity
-from meteostat.core.cache import get_local_file_path, file_in_cache
-from meteostat.core.loader import processing_handler, load_handler
-from meteostat.utilities.mutations import localize, filter_time, adjust_temp
-from meteostat.utilities.validations import validate_series
+from meteostat.core.loader import processing_handler
+from meteostat.utilities.mutations import adjust_temp
 from meteostat.utilities.aggregations import weighted_average
-from meteostat.utilities.endpoint import generate_endpoint_path
 from meteostat.interface.base import Base
 class MeteoData(Base):
     """
     A parent class for both time series and
     climate normals data
@@ -36,73 +33,66 @@ class MeteoData(Base):
     # The data frame
     _data: pd.DataFrame = pd.DataFrame()
-    def _load_data(self, station: str, year: Union[int, None] = None) -> None:
+    @property
+    def _raw_columns(self) -> List[str]:
         """
-        Load file for a single station from Meteostat
+        Get the list of raw data columns, excluding any dicts with callable values
         """
-        # File name
-        file = generate_endpoint_path(self.granularity, station, year)
-        # Get local file path
-        path = get_local_file_path(self.cache_dir, self.cache_subdir, file)
-        # Check if file in cache
-        if self.max_age > 0 and file_in_cache(path, self.max_age):
-            # Read cached data
-            df = pd.read_pickle(path)
-        else:
-            # Get data from Meteostat
-            df = load_handler(
-                self.endpoint, file, self._columns, self._types, self._parse_dates
+        return [
+            list(col.values())[0] if isinstance(col, dict) else col
+            for col in self._columns
+            if not (
+                isinstance(col, dict)
+                and (
+                    isinstance(list(col.values())[0], Callable)
+                    or list(col.values())[0] is None
+                )
             )
+        ]
-            # Validate and prepare data for further processing
-            if self.granularity == Granularity.NORMALS and df.index.size > 0:
-                # Add weather station ID
-                # pylint: disable=unsupported-assignment-operation
-                df["station"] = station
-                # Set index
-                df = df.set_index(["station", "start", "end", "month"])
+    @property
+    def _processed_columns(self) -> List[str]:
+        """
+        Get the list of processed data columns, excluding any dicts with callable values
+        """
+        return [
+            list(col.keys())[0] if isinstance(col, dict) else col
+            for col in self._columns[self._first_met_col :]
+        ]
-            else:
-                df = validate_series(df, station)
-            # Save as Pickle
-            if self.max_age > 0:
-                df.to_pickle(path)
-        # Localize time column
-        if (
-            self.granularity == Granularity.HOURLY
-            and self._timezone is not None
-            and len(df.index) > 0
-        ):
-            df = localize(df, self._timezone)
-        # Filter time period and append to DataFrame
-        # pylint: disable=no-else-return
-        if self.granularity == Granularity.NORMALS and df.index.size > 0 and self._end:
-            # Get time index
-            end = df.index.get_level_values("end")
-            # Filter & return
-            return df.loc[end == self._end]
-        elif not self.granularity == Granularity.NORMALS:
-            df = filter_time(df, self._start, self._end)
-        # Return
-        return df
+    @property
+    def _renamed_columns(self) -> Dict[str, str]:
+        """
+        Get the dict of renamed data columns, including `_source` suffixes
+        """
+        return {
+            new_key: new_val
+            for d in self._columns
+            if isinstance(d, dict)
+            for k, v in d.items()
+            if not isinstance(v, Callable)
+            for new_key, new_val in ((v, k), (f"{v}_source", f"{k}_source"))
+        }
+    @property
+    def _virtual_columns(self) -> Dict[str, str]:
+        """
+        Get the dict of virtual data columns
+        """
+        return {
+            k: v
+            for d in self._columns
+            if isinstance(d, dict)
+            for k, v in d.items()
+            if isinstance(v, Callable)
+        }
     def _get_datasets(self) -> list:
         """
         Get list of datasets
         """
-        if self.granularity == Granularity.HOURLY and self.chunked:
+        if self.granularity in (Granularity.HOURLY, Granularity.DAILY):
             datasets = [
                 (str(station), year)
                 for station in self._stations
@@ -119,7 +109,6 @@ class MeteoData(Base):
         """
         if len(self._stations) > 0:
             # Get list of datasets
             datasets = self._get_datasets()
@@ -140,12 +129,10 @@ class MeteoData(Base):
         """
         if self._stations.size == 0 or self._data.size == 0:
-            return None
+            return
         if method == "nearest":
             if adapt_temp:
                 # Join elevation of involved weather stations
                 data = self._data.join(stations["elevation"], on="station")
@@ -156,7 +143,6 @@ class MeteoData(Base):
                 data = data.drop("elevation", axis=1).round(1)
             else:
                 data = self._data
             if self.granularity == Granularity.NORMALS:
@@ -168,7 +154,6 @@ class MeteoData(Base):
                 ).agg("first")
         else:
             # Join score and elevation of involved weather stations
             data = self._data.join(stations[["score", "elevation"]], on="station")

meteostat 1.6.8__py3-none-any.whl → 1.7.1__py3-none-any.whl

meteostat 1.6.8py3-none-any.whl → 1.7.1py3-none-any.whl