PyPI - meteostat - Versions diffs - 1.6.8__py3-none-any.whl → 1.7.1__py3-none-any.whl - Mend

meteostat 1.6.8py3-none-any.whl → 1.7.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

meteostat/__init__.py +12 -1
meteostat/core/cache.py +0 -2
meteostat/core/loader.py +26 -28
meteostat/core/warn.py +1 -1
meteostat/interface/base.py +10 -7
meteostat/interface/daily.py +44 -31
meteostat/interface/hourly.py +44 -43
meteostat/interface/meteodata.py +54 -69
meteostat/interface/monthly.py +24 -19
meteostat/interface/normals.py +59 -19
meteostat/interface/point.py +1 -4
meteostat/interface/stations.py +9 -8
meteostat/interface/timeseries.py +97 -65
meteostat/series/aggregate.py +0 -1
meteostat/series/convert.py +1 -1
meteostat/series/interpolate.py +12 -2
meteostat/series/normalize.py +4 -5
meteostat/utilities/endpoint.py +1 -1
meteostat/utilities/helpers.py +38 -0
meteostat/utilities/mutations.py +10 -0
{meteostat-1.6.8.dist-info → meteostat-1.7.1.dist-info}/METADATA +4 -4
meteostat-1.7.1.dist-info/RECORD +39 -0
{meteostat-1.6.8.dist-info → meteostat-1.7.1.dist-info}/WHEEL +1 -1
meteostat-1.6.8.dist-info/RECORD +0 -39
{meteostat-1.6.8.dist-info → meteostat-1.7.1.dist-info}/LICENSE +0 -0
{meteostat-1.6.8.dist-info → meteostat-1.7.1.dist-info}/top_level.txt +0 -0

meteostat/interface/monthly.py CHANGED Viewed

@@ -17,29 +17,46 @@ from meteostat.interface.point import Point
 class Monthly(TimeSeries):
     """
     Retrieve monthly weather data for one or multiple weather stations or
     a single geographical point
     """
     # The cache subdirectory
-    cache_subdir: str = "monthly"
+    cache_subdir = "monthly"
     # Granularity
     granularity = Granularity.MONTHLY
     # Default frequency
-    _freq: str = "1MS"
+    _freq = "1MS"
+    # Source mappings
+    _source_mappings = {
+        "dwd_monthly": "A",
+        "eccc_monthly": "A",
+        "dwd_daily": "C",
+        "eccc_daily": "C",
+        "ghcnd": "D",
+        "dwd_hourly": "E",
+        "eccc_hourly": "E",
+        "isd_lite": "F",
+        "synop": "G",
+        "dwd_poi": "G",
+        "metar": "H",
+        "model": "I",
+        "dwd_mosmix": "I",
+        "metno_forecast": "I",
+    }
     # Flag which represents model data
     _model_flag = "I"
     # Columns
-    _columns: list = [
+    _columns = [
         "year",
         "month",
-        "tavg",
+        {"tavg": "temp"},
         "tmin",
         "tmax",
         "prcp",
@@ -51,22 +68,11 @@ class Monthly(TimeSeries):
     # Index of first meteorological column
     _first_met_col = 2
-    # Data types
-    _types: dict = {
-        "tavg": "float64",
-        "tmin": "float64",
-        "tmax": "float64",
-        "prcp": "float64",
-        "wspd": "float64",
-        "pres": "float64",
-        "tsun": "float64",
-    }
     # Columns for date parsing
-    _parse_dates: dict = {"time": [0, 1]}
+    _parse_dates = ["year", "month"]
     # Default aggregation functions
-    aggregations: dict = {
+    aggregations = {
         "tavg": "mean",
         "tmin": "mean",
         "tmax": "mean",
@@ -84,7 +90,6 @@ class Monthly(TimeSeries):
         model: bool = True,  # Include model data?
         flags: bool = False,  # Load source flags?
     ) -> None:
         # Set start date
         if start is not None:
             start = start.replace(day=1)

meteostat/interface/normals.py CHANGED Viewed

@@ -9,10 +9,13 @@ The code is licensed under the MIT license.
 """
 from copy import copy
-from typing import Union
+from typing import Optional, Union
 from datetime import datetime
 import numpy as np
 import pandas as pd
+from meteostat.core.cache import file_in_cache, get_local_file_path
+from meteostat.core.loader import load_handler
+from meteostat.utilities.endpoint import generate_endpoint_path
 from meteostat.enumerations.granularity import Granularity
 from meteostat.core.warn import warn
 from meteostat.interface.meteodata import MeteoData
@@ -20,32 +23,31 @@ from meteostat.interface.point import Point
 class Normals(MeteoData):
     """
     Retrieve climate normals for one or multiple weather stations or
     a single geographical point
     """
     # The cache subdirectory
-    cache_subdir: str = "normals"
+    cache_subdir = "normals"
     # Granularity
     granularity = Granularity.NORMALS
     # The list of weather Stations
-    _stations: pd.Index = None
+    _stations: Optional[pd.Index] = None
     # The first year of the period
-    _start: int = None
+    _start: Optional[int] = None
     # The last year of the period
-    _end: int = None
+    _end: Optional[int] = None
     # The data frame
     _data: pd.DataFrame = pd.DataFrame()
     # Columns
-    _columns: list = [
+    _columns = [
         "start",
         "end",
         "month",
@@ -60,26 +62,62 @@ class Normals(MeteoData):
     # Index of first meteorological column
     _first_met_col = 3
-    # Data types
-    _types: dict = {
-        "tmin": "float64",
-        "tmax": "float64",
-        "prcp": "float64",
-        "wspd": "float64",
-        "pres": "float64",
-        "tsun": "float64",
-    }
     # Which columns should be parsed as dates?
     _parse_dates = None
+    def _load_data(self, station: str, year: Optional[int] = None) -> None:
+        """
+        Load file for a single station from Meteostat
+        """
+        # File name
+        file = generate_endpoint_path(self.granularity, station, year)
+        # Get local file path
+        path = get_local_file_path(self.cache_dir, self.cache_subdir, file)
+        # Check if file in cache
+        if self.max_age > 0 and file_in_cache(path, self.max_age):
+            # Read cached data
+            df = pd.read_pickle(path)
+        else:
+            # Get data from Meteostat
+            df = load_handler(
+                self.endpoint,
+                file,
+                self.proxy,
+                self._columns,
+            )
+            # Validate and prepare data for further processing
+            if not df.empty:
+                # Add weather station ID
+                df["station"] = station
+                # Set index
+                df = df.set_index(["station", "start", "end", "month"])
+            # Save as Pickle
+            if self.max_age > 0:
+                df.to_pickle(path)
+        # Filter time period and append to DataFrame
+        if self.granularity == Granularity.NORMALS and not df.empty and self._end:
+            # Get time index
+            end = df.index.get_level_values("end")
+            # Filter & return
+            return df.loc[end == self._end]
+        # Return
+        return df
     def __init__(
         self,
         loc: Union[pd.DataFrame, Point, list, str],
         start: int = None,
         end: int = None,
     ) -> None:
         # Set list of weather stations
         if isinstance(loc, pd.DataFrame):
             self._stations = loc.index
@@ -146,7 +184,9 @@ class Normals(MeteoData):
             # Go through all periods
             for period in periods:
                 # Create DataFrame
-                df = pd.DataFrame(columns=temp._columns[temp._first_met_col :])
+                df = pd.DataFrame(
+                    columns=temp._columns[temp._first_met_col :], dtype="float64"
+                )
                 # Populate index columns
                 df["month"] = range(1, 13)
                 df["station"] = station

meteostat/interface/point.py CHANGED Viewed

@@ -14,7 +14,6 @@ from meteostat.interface.stations import Stations
 class Point:
     """
     Automatically select weather stations by geographic location
     """
@@ -53,7 +52,6 @@ class Point:
     _alt: int = None
     def __init__(self, lat: float, lon: float, alt: int = None) -> None:
         self._lat = lat
         self._lon = lon
         self._alt = alt
@@ -90,7 +88,7 @@ class Point:
         # Apply inventory filter
         if freq and start and end:
             age = (datetime.now() - end).days
-            if model == False or age > 180:
+            if model is False or age > 180:
                 stations = stations.inventory(freq, (start, end))
         # Apply altitude filter
@@ -110,7 +108,6 @@ class Point:
         # Score values
         if self.radius:
             # Calculate score values
             stations["score"] = (
                 (1 - (stations["distance"] / self.radius)) * self.weight_dist

meteostat/interface/stations.py CHANGED Viewed

@@ -19,7 +19,6 @@ from meteostat.utilities.helpers import get_distance
 class Stations(Base):
     """
     Select weather stations from the full list of stations
     """
@@ -80,15 +79,18 @@ class Stations(Base):
         # Check if file in cache
         if self.max_age > 0 and file_in_cache(path, self.max_age):
             # Read cached data
             df = pd.read_pickle(path)
         else:
             # Get data from Meteostat
             df = load_handler(
-                self.endpoint, file, self._columns, self._types, self._parse_dates, True
+                self.endpoint,
+                file,
+                self.proxy,
+                self._columns,
+                self._types,
+                self._parse_dates,
             )
             # Add index
@@ -102,7 +104,6 @@ class Stations(Base):
         self._data = df
     def __init__(self) -> None:
         # Get all weather stations
         self._load()
@@ -179,12 +180,12 @@ class Stations(Base):
         if required is True:
             # Make sure data exists at all
-            temp._data = temp._data[(pd.isna(temp._data[freq + "_start"]) == False)]
+            temp._data = temp._data[~pd.isna(temp._data[f"{freq}_start"])]
         elif isinstance(required, tuple):
             # Make sure data exists across period
             temp._data = temp._data[
-                (pd.isna(temp._data[freq + "_start"]) == False)
+                (~pd.isna(temp._data[f"{freq}_start"]))
                 & (temp._data[freq + "_start"] <= required[0])
                 & (
                     temp._data[freq + "_end"] + timedelta(seconds=temp.max_age)
@@ -195,7 +196,7 @@ class Stations(Base):
         else:
             # Make sure data exists on a certain day
             temp._data = temp._data[
-                (pd.isna(temp._data[freq + "_start"]) == False)
+                (~pd.isna(temp._data[f"{freq}_start"]))
                 & (temp._data[freq + "_start"] <= required)
                 & (
                     temp._data[freq + "_end"] + timedelta(seconds=temp.max_age)

meteostat/interface/timeseries.py CHANGED Viewed

@@ -9,72 +9,118 @@ The code is licensed under the MIT license.
 """
 from datetime import datetime
-from typing import Union
-import numpy as np
+from typing import Optional, Union
 import pandas as pd
+from meteostat.core.cache import file_in_cache, get_local_file_path
+from meteostat.core.loader import load_handler
 from meteostat.enumerations.granularity import Granularity
-from meteostat.core.cache import get_local_file_path, file_in_cache
-from meteostat.core.loader import processing_handler, load_handler
-from meteostat.utilities.mutations import localize, filter_time
-from meteostat.utilities.validations import validate_series
 from meteostat.utilities.endpoint import generate_endpoint_path
+from meteostat.utilities.mutations import filter_time, localize
+from meteostat.utilities.validations import validate_series
+from meteostat.utilities.helpers import get_flag_from_source_factory, with_suffix
 from meteostat.interface.point import Point
 from meteostat.interface.meteodata import MeteoData
 class TimeSeries(MeteoData):
     """
     TimeSeries class which provides features which are
     used across all time series classes
     """
+    # Base URL of the Meteostat bulk data interface
+    endpoint = "https://data.meteostat.net/"
     # The list of origin weather Stations
-    _origin_stations: Union[pd.Index, None] = None
+    _origin_stations: Optional[pd.Index] = None
     # The start date
-    _start: Union[datetime, None] = None
+    _start: Optional[datetime] = None
     # The end date
-    _end: Union[datetime, None] = None
+    _end: Optional[datetime] = None
     # Include model data?
-    _model: bool = True
+    _model = True
     # Fetch source flags?
-    _flags = bool = False
+    _flags = False
-    def _load_flags(self, station: str, year: Union[int, None] = None) -> None:
+    def _load_data(self, station: str, year: Optional[int] = None) -> None:
         """
-        Load flag file for a single station from Meteostat
+        Load file for a single station from Meteostat
         """
         # File name
-        file = generate_endpoint_path(self.granularity, station, year, True)
+        file = generate_endpoint_path(self.granularity, station, year)
         # Get local file path
         path = get_local_file_path(self.cache_dir, self.cache_subdir, file)
         # Check if file in cache
         if self.max_age > 0 and file_in_cache(path, self.max_age):
             # Read cached data
             df = pd.read_pickle(path)
         else:
             # Get data from Meteostat
             df = load_handler(
                 self.endpoint,
                 file,
-                self._columns,
-                {key: "string" for key in self._columns[self._first_met_col :]},
-                self._parse_dates,
+                self.proxy,
+                default_df=pd.DataFrame(
+                    columns=self._raw_columns
+                    + with_suffix(self._raw_columns, "_source")
+                ),
+            )
+            # Add time column and drop original columns
+            if len(self._parse_dates) < 3:
+                df["day"] = 1
+            df["time"] = pd.to_datetime(
+                df[
+                    (
+                        self._parse_dates
+                        if len(self._parse_dates) > 2
+                        else self._parse_dates + ["day"]
+                    )
+                ]
             )
+            df = df.drop(self._parse_dates, axis=1)
-            # Validate Series
+            # Validate and prepare data for further processing
             df = validate_series(df, station)
+            # Rename columns
+            df = df.rename(columns=self._renamed_columns, errors="ignore")
+            # Convert sources to flags
+            for col in df.columns:
+                basecol = col[:-7] if col.endswith("_source") else col
+                if basecol not in self._processed_columns:
+                    df.drop(col, axis=1, inplace=True)
+                    continue
+                if basecol == col:
+                    df[col] = df[col].astype("Float64")
+                if col.endswith("_source"):
+                    flagcol = f"{basecol}_flag"
+                    df[flagcol] = pd.NA
+                    df[flagcol] = df[flagcol].astype("string")
+                    mask = df[col].notna()
+                    df.loc[mask, flagcol] = df.loc[mask, col].apply(
+                        get_flag_from_source_factory(
+                            self._source_mappings, self._model_flag
+                        )
+                    )
+                    df.drop(col, axis=1, inplace=True)
+            # Process virtual columns
+            for key, value in self._virtual_columns.items():
+                df = value(df, key)
             # Save as Pickle
             if self.max_age > 0:
                 df.to_pickle(path)
@@ -88,59 +134,33 @@ class TimeSeries(MeteoData):
             df = localize(df, self._timezone)
         # Filter time period and append to DataFrame
-        if self._start and self._end:
-            df = filter_time(df, self._start, self._end)
+        df = filter_time(df, self._start, self._end)
+        # Return
         return df
-    def _get_flags(self) -> None:
-        """
-        Get all source flags
-        """
-        if len(self._stations) > 0:
-            # Get list of datasets
-            datasets = self._get_datasets()
-            # Data Processings
-            return processing_handler(
-                datasets, self._load_flags, self.processes, self.threads
-            )
-        # Empty DataFrame
-        return pd.DataFrame(columns=[*self._types])
     def _filter_model(self) -> None:
         """
         Remove model data from time series
         """
-        columns = self._columns[self._first_met_col :]
-        for col_name in columns:
+        for col_name in self._processed_columns:
             self._data.loc[
                 (pd.isna(self._data[f"{col_name}_flag"]))
                 | (self._data[f"{col_name}_flag"].str.contains(self._model_flag)),
                 col_name,
-            ] = np.nan
-        # Conditionally, remove flags from DataFrame
-        if not self._flags:
-            self._data.drop(
-                map(lambda col_name: f"{col_name}_flag", columns), axis=1, inplace=True
-            )
+            ] = pd.NA
         # Drop nan-only rows
-        self._data.dropna(how="all", subset=columns, inplace=True)
+        self._data.dropna(how="all", subset=self._processed_columns, inplace=True)
     def _init_time_series(
         self,
         loc: Union[pd.DataFrame, Point, list, str],  # Station(s) or geo point
         start: datetime = None,
         end: datetime = None,
-        model: bool = True,  # Include model data?
-        flags: bool = False,  # Load source flags?
+        model=True,  # Include model data?
+        flags=False,  # Load source flags?
     ) -> None:
         """
         Common initialization for all time series, regardless
@@ -169,20 +189,32 @@ class TimeSeries(MeteoData):
         # Get data for all weather stations
         self._data = self._get_data()
-        # Load source flags through map file
-        # if flags are explicitly requested or
-        # model data is excluded
-        if flags or not model:
-            flags = self._get_flags()
-            self._data = self._data.merge(
-                flags, on=["station", "time"], how="left", suffixes=[None, "_flag"]
-            )
+        # Fill columns if they don't exist
+        for col in self._processed_columns:
+            if col not in self._data.columns:
+                self._data[col] = pd.NA
+                self._data[col] = self._data[col].astype("Float64")
+                self._data[f"{col}_flag"] = pd.NA
+                self._data[f"{col}_flag"] = self._data[f"{col}_flag"].astype("string")
-        # Remove model data from DataFrame and
-        # drop flags if not specified otherwise
+        # Reorder the DataFrame
+        self._data = self._data[
+            self._processed_columns + with_suffix(self._processed_columns, "_flag")
+        ]
+        # Remove model data from DataFrame
         if not model:
             self._filter_model()
+        # Conditionally, remove flags from DataFrame
+        if not self._flags:
+            self._data.drop(
+                with_suffix(self._processed_columns, "_flag"),
+                axis=1,
+                errors="ignore",
+                inplace=True,
+            )
         # Interpolate data spatially if requested
         # location is a geographical point
         if isinstance(loc, Point):

meteostat/series/aggregate.py CHANGED Viewed

@@ -19,7 +19,6 @@ def aggregate(self, freq: str = None, spatial: bool = False):
     """
     if self.count() > 0 and not self._data.isnull().values.all():
         # Create temporal instance
         temp = copy(self)

meteostat/series/convert.py CHANGED Viewed

@@ -21,7 +21,7 @@ def convert(self, units: dict):
     # Change data units
     for parameter, unit in units.items():
-        if parameter in temp._columns:
+        if parameter in temp._processed_columns:
             temp._data[parameter] = temp._data[parameter].apply(unit)
     # Return class instance

meteostat/series/interpolate.py CHANGED Viewed

@@ -9,6 +9,7 @@ The code is licensed under the MIT license.
 """
 from copy import copy
+import numpy as np
 from meteostat.core.warn import warn
@@ -18,17 +19,26 @@ def interpolate(self, limit: int = 3):
     """
     if self.count() > 0 and not self._data.isnull().values.all():
         # Create temporal instance
         temp = copy(self)
+        # Convert to float64
+        temp._data = temp._data.astype("float64")
         # Apply interpolation
         temp._data = temp._data.groupby("station", group_keys=False).apply(
             lambda group: group.interpolate(
-                method="linear", limit=limit, limit_direction="both", axis=0
+                method="linear",
+                limit=limit,
+                limit_direction="both",
+                axis=0,
+                fill_value=np.nan,
             )
         )
+        # Convert to original type
+        temp._data = temp._data.astype("Float64")
         # Return class instance
         return temp

meteostat/series/normalize.py CHANGED Viewed

@@ -27,9 +27,8 @@ def normalize(self):
     temp = copy(self)
     if temp._start and temp._end and temp.coverage() < 1:
         # Create result DataFrame
-        result = pd.DataFrame(columns=temp._columns[temp._first_met_col :])
+        result = pd.DataFrame(columns=temp._processed_columns, dtype="Float64")
         # Handle tz-aware date ranges
         if hasattr(temp, "_timezone") and temp._timezone is not None:
@@ -43,7 +42,7 @@ def normalize(self):
         # Go through list of weather stations
         for station in temp._stations:
             # Create data frame
-            df = pd.DataFrame(columns=temp._columns[temp._first_met_col :])
+            df = pd.DataFrame(columns=temp._processed_columns, dtype="Float64")
             # Add time series
             df["time"] = pd.date_range(
                 start,
@@ -54,7 +53,7 @@ def normalize(self):
             # Add station ID
             df["station"] = station
             # Add columns
-            for column in temp._columns[temp._first_met_col :]:
+            for column in temp._processed_columns:
                 # Add column to DataFrame
                 df[column] = nan
@@ -71,7 +70,7 @@ def normalize(self):
         )
         # None -> nan
-        temp._data = temp._data.fillna(nan)
+        temp._data = temp._data.fillna(pd.NA)
     # Return class instance
     return temp

meteostat/utilities/endpoint.py CHANGED Viewed

@@ -25,7 +25,7 @@ def generate_endpoint_path(
     # Base path
     path = f"{granularity.value}/"
-    if granularity == Granularity.HOURLY and year:
+    if granularity in (Granularity.HOURLY, Granularity.DAILY) and year:
         path += f"{year}/"
     appendix = ".map" if map_file else ""

meteostat 1.6.8__py3-none-any.whl → 1.7.1__py3-none-any.whl

meteostat 1.6.8py3-none-any.whl → 1.7.1py3-none-any.whl