PyPI - meteostat - Versions diffs - 1.7.6__py3-none-any.whl → 2.0.1__py3-none-any.whl - Mend

meteostat 1.7.6py3-none-any.whl → 2.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (94) hide show

meteostat/__init__.py +38 -19
meteostat/api/config.py +158 -0
meteostat/api/daily.py +76 -0
meteostat/api/hourly.py +80 -0
meteostat/api/interpolate.py +378 -0
meteostat/api/inventory.py +59 -0
meteostat/api/merge.py +103 -0
meteostat/api/monthly.py +73 -0
meteostat/api/normals.py +144 -0
meteostat/api/point.py +30 -0
meteostat/api/stations.py +234 -0
meteostat/api/timeseries.py +334 -0
meteostat/core/cache.py +212 -59
meteostat/core/data.py +203 -0
meteostat/core/logger.py +9 -0
meteostat/core/network.py +82 -0
meteostat/core/parameters.py +112 -0
meteostat/core/providers.py +184 -0
meteostat/core/schema.py +170 -0
meteostat/core/validator.py +38 -0
meteostat/enumerations.py +149 -0
meteostat/interpolation/idw.py +120 -0
meteostat/interpolation/lapserate.py +91 -0
meteostat/interpolation/nearest.py +31 -0
meteostat/parameters.py +354 -0
meteostat/providers/dwd/climat.py +166 -0
meteostat/providers/dwd/daily.py +144 -0
meteostat/providers/dwd/hourly.py +218 -0
meteostat/providers/dwd/monthly.py +138 -0
meteostat/providers/dwd/mosmix.py +351 -0
meteostat/providers/dwd/poi.py +117 -0
meteostat/providers/dwd/shared.py +155 -0
meteostat/providers/eccc/daily.py +87 -0
meteostat/providers/eccc/hourly.py +104 -0
meteostat/providers/eccc/monthly.py +66 -0
meteostat/providers/eccc/shared.py +45 -0
meteostat/providers/index.py +496 -0
meteostat/providers/meteostat/daily.py +65 -0
meteostat/providers/meteostat/daily_derived.py +110 -0
meteostat/providers/meteostat/hourly.py +66 -0
meteostat/providers/meteostat/monthly.py +45 -0
meteostat/providers/meteostat/monthly_derived.py +106 -0
meteostat/providers/meteostat/shared.py +93 -0
meteostat/providers/metno/forecast.py +186 -0
meteostat/providers/noaa/ghcnd.py +228 -0
meteostat/providers/noaa/isd_lite.py +142 -0
meteostat/providers/noaa/metar.py +163 -0
meteostat/typing.py +113 -0
meteostat/utils/conversions.py +231 -0
meteostat/utils/data.py +194 -0
meteostat/utils/geo.py +28 -0
meteostat/utils/guards.py +51 -0
meteostat/utils/parsers.py +161 -0
meteostat/utils/types.py +113 -0
meteostat/utils/validators.py +31 -0
meteostat-2.0.1.dist-info/METADATA +130 -0
meteostat-2.0.1.dist-info/RECORD +64 -0
{meteostat-1.7.6.dist-info → meteostat-2.0.1.dist-info}/WHEEL +1 -2
meteostat/core/loader.py +0 -103
meteostat/core/warn.py +0 -34
meteostat/enumerations/granularity.py +0 -22
meteostat/interface/base.py +0 -39
meteostat/interface/daily.py +0 -118
meteostat/interface/hourly.py +0 -154
meteostat/interface/meteodata.py +0 -210
meteostat/interface/monthly.py +0 -109
meteostat/interface/normals.py +0 -245
meteostat/interface/point.py +0 -143
meteostat/interface/stations.py +0 -252
meteostat/interface/timeseries.py +0 -237
meteostat/series/aggregate.py +0 -48
meteostat/series/convert.py +0 -28
meteostat/series/count.py +0 -17
meteostat/series/coverage.py +0 -20
meteostat/series/fetch.py +0 -28
meteostat/series/interpolate.py +0 -47
meteostat/series/normalize.py +0 -76
meteostat/series/stations.py +0 -22
meteostat/units.py +0 -149
meteostat/utilities/__init__.py +0 -0
meteostat/utilities/aggregations.py +0 -37
meteostat/utilities/endpoint.py +0 -33
meteostat/utilities/helpers.py +0 -70
meteostat/utilities/mutations.py +0 -89
meteostat/utilities/validations.py +0 -30
meteostat-1.7.6.dist-info/METADATA +0 -112
meteostat-1.7.6.dist-info/RECORD +0 -39
meteostat-1.7.6.dist-info/top_level.txt +0 -1
/meteostat/{core → api}/__init__.py +0 -0
/meteostat/{enumerations → interpolation}/__init__.py +0 -0
/meteostat/{interface → providers}/__init__.py +0 -0
/meteostat/{interface/interpolate.py → py.typed} +0 -0
/meteostat/{series → utils}/__init__.py +0 -0
{meteostat-1.7.6.dist-info → meteostat-2.0.1.dist-info/licenses}/LICENSE +0 -0

meteostat/api/interpolate.py ADDED Viewed

@@ -0,0 +1,378 @@
+"""
+Interpolation Module
+Provides spatial interpolation functions for meteorological data.
+"""
+from typing import Optional, Union
+import numpy as np
+import pandas as pd
+from meteostat.api.point import Point
+from meteostat.api.timeseries import TimeSeries
+from meteostat.typing import Station
+from meteostat.enumerations import Parameter
+from meteostat.interpolation.lapserate import apply_lapse_rate
+from meteostat.interpolation.nearest import nearest_neighbor
+from meteostat.interpolation.idw import inverse_distance_weighting
+from meteostat.utils.data import aggregate_sources, reshape_by_source, stations_to_df
+from meteostat.utils.geo import get_distance
+from meteostat.utils.parsers import parse_station
+from meteostat.core.schema import schema_service
+from meteostat.core.logger import logger
+# Parameters that are categorical and should not use IDW interpolation
+CATEGORICAL_PARAMETERS = {Parameter.WDIR, Parameter.CLDC, Parameter.COCO}
+def _create_timeseries(
+    ts: TimeSeries, point: Point, df: Optional[pd.DataFrame] = None
+) -> TimeSeries:
+    """
+    Create a TimeSeries object from interpolated DataFrame
+    """
+    parsed = parse_station(point)
+    stations_list = [parsed] if isinstance(parsed, Station) else parsed
+    # Convert stations to DataFrame
+    stations_df = stations_to_df(stations_list)
+    return TimeSeries(
+        ts.granularity,
+        stations_df,
+        df=df,
+        start=ts.start,
+        end=ts.end,
+        timezone=ts.timezone,
+    )
+def _add_source_columns(
+    result: pd.DataFrame,
+    df: pd.DataFrame,
+) -> pd.DataFrame:
+    """
+    Add source columns to the result DataFrame
+    """
+    source_cols = [c for c in df.columns if c.endswith("_source")]
+    if source_cols:
+        grouped = df.groupby("time")[source_cols].agg(aggregate_sources)
+        if isinstance(grouped, pd.Series):
+            grouped = grouped.to_frame(name=source_cols[0])
+        grouped.index.name = "time"
+        # Safely align on time and add/fill source columns without causing overlaps
+        result_has_time_col = "time" in result.columns
+        if result_has_time_col:
+            result = result.set_index("time")
+        # Ensure both frames align on the same index (time)
+        # For each source column, add it if missing or fill NaNs if present
+        for col in source_cols:
+            if col in grouped.columns:
+                if col in result.columns:
+                    # Fill missing values in result using aggregated sources
+                    result[col] = result[col].where(result[col].notna(), grouped[col])
+                else:
+                    # Add aggregated source column
+                    result[col] = grouped[col]
+        if result_has_time_col:
+            result = result.reset_index()
+    return result
+def _prepare_data_with_distances(
+    df: pd.DataFrame, point: Point, elevation_weight: float
+) -> pd.DataFrame:
+    """
+    Add distance and elevation calculations to the DataFrame
+    """
+    # Add distance column
+    df["distance"] = get_distance(
+        point.latitude, point.longitude, df["latitude"], df["longitude"]
+    )
+    # Add effective distance column if elevation is available
+    if point.elevation is not None and "elevation" in df.columns:
+        elev_diff = np.abs(df["elevation"] - point.elevation)
+        df["effective_distance"] = np.sqrt(
+            df["distance"] ** 2 + (elev_diff * elevation_weight) ** 2
+        )
+    else:
+        df["effective_distance"] = df["distance"]
+    # Add elevation difference column
+    if "elevation" in df.columns and point.elevation is not None:
+        df["elevation_diff"] = np.abs(df["elevation"] - point.elevation)
+    else:
+        df["elevation_diff"] = np.nan
+    return df
+def _should_use_nearest_neighbor(
+    df: pd.DataFrame,
+    point: Point,
+    distance_threshold: Union[int, None],
+    elevation_threshold: Union[int, None],
+) -> bool:
+    """
+    Determine if nearest neighbor should be used based on thresholds
+    """
+    min_distance = df["distance"].min()
+    use_nearest = distance_threshold is None or min_distance <= distance_threshold
+    if use_nearest and point.elevation is not None and "elevation" in df.columns:
+        min_elev_diff = np.abs(df["elevation"] - point.elevation).min()
+        use_nearest = (
+            elevation_threshold is None or min_elev_diff <= elevation_threshold
+        )
+    return use_nearest
+def _get_categorical_columns(df: pd.DataFrame) -> list:
+    """
+    Identify categorical columns in the data (excluding source columns)
+    """
+    data_cols = [c for c in df.columns if not c.endswith("_source")]
+    return [c for c in data_cols if c in CATEGORICAL_PARAMETERS]
+def _interpolate_with_nearest_neighbor(
+    df: pd.DataFrame,
+    ts: TimeSeries,
+    point: Point,
+    distance_threshold: Union[int, None],
+    elevation_threshold: Union[int, None],
+) -> Optional[pd.DataFrame]:
+    """
+    Perform nearest neighbor interpolation with threshold filtering
+    """
+    distance_filter = (
+        pd.Series([True] * len(df), index=df.index)
+        if distance_threshold is None
+        else (df["distance"] <= distance_threshold)
+    )
+    elevation_filter = (
+        pd.Series([True] * len(df), index=df.index)
+        if elevation_threshold is None
+        else (np.abs(df["elevation"] - point.elevation) <= elevation_threshold)
+    )
+    df_filtered = df[distance_filter & elevation_filter]
+    return nearest_neighbor(df_filtered, ts, point)
+def _interpolate_with_idw_and_categorical(
+    df: pd.DataFrame,
+    ts: TimeSeries,
+    point: Point,
+    categorical_cols: list,
+    power: float,
+) -> Optional[pd.DataFrame]:
+    """
+    Perform IDW interpolation for non-categorical parameters and nearest neighbor for categorical
+    """
+    # For categorical parameters, always use nearest neighbor
+    if categorical_cols:
+        df_categorical = nearest_neighbor(df, ts, point)
+        # Keep only categorical columns that exist in the result
+        existing_categorical = [
+            c for c in categorical_cols if c in df_categorical.columns
+        ]
+        df_categorical = (
+            df_categorical[existing_categorical]
+            if existing_categorical
+            else pd.DataFrame()
+        )
+    else:
+        df_categorical = pd.DataFrame()
+    # Perform IDW interpolation for all parameters
+    idw_func = inverse_distance_weighting(power=power)
+    df_idw = idw_func(df, ts, point)
+    # Remove categorical columns from IDW result if they exist
+    if not df_categorical.empty and df_idw is not None:
+        # Drop categorical columns from IDW result
+        idw_cols_to_keep = [c for c in df_idw.columns if c not in categorical_cols]
+        df_idw = df_idw[idw_cols_to_keep] if idw_cols_to_keep else pd.DataFrame()
+    # Combine categorical (nearest) and non-categorical (IDW) results
+    if not df_categorical.empty and not df_idw.empty:
+        return pd.concat([df_idw, df_categorical], axis=1)
+    elif not df_categorical.empty:
+        return df_categorical
+    else:
+        return df_idw
+def _merge_interpolation_results(
+    df_nearest: Optional[pd.DataFrame],
+    df_idw: Optional[pd.DataFrame],
+    use_nearest: bool,
+) -> Optional[pd.DataFrame]:
+    """
+    Merge nearest neighbor and IDW results with appropriate priority
+    """
+    if use_nearest and df_nearest is not None and len(df_nearest) > 0:
+        if df_idw is not None:
+            # Combine nearest and IDW results, prioritizing nearest values
+            return df_nearest.combine_first(df_idw)
+        else:
+            return df_nearest
+    else:
+        return df_idw
+def _postprocess_result(
+    result: pd.DataFrame, df: pd.DataFrame, ts: TimeSeries
+) -> pd.DataFrame:
+    """
+    Post-process the interpolation result: drop location columns, add sources, format, reshape
+    """
+    # Drop location-related columns
+    result = result.drop(
+        [
+            "latitude",
+            "longitude",
+            "elevation",
+            "distance",
+            "effective_distance",
+            "elevation_diff",
+        ],
+        axis=1,
+        errors="ignore",
+    )
+    # Add source columns
+    result = _add_source_columns(result, df)
+    # Reshape by source
+    result = reshape_by_source(result)
+    # Add station index
+    result["station"] = "$0001"
+    result = result.set_index("station", append=True).reorder_levels(
+        ["station", "time", "source"]
+    )
+    # Reorder columns to match the canonical schema order
+    result = schema_service.purge(result, ts.parameters)
+    # Format the result using schema_service to apply proper rounding
+    result = schema_service.format(result, ts.granularity)
+    return result
+def interpolate(
+    ts: TimeSeries,
+    point: Point,
+    distance_threshold: Union[int, None] = 5000,
+    elevation_threshold: Union[int, None] = 50,
+    elevation_weight: float = 10,
+    power: float = 2.0,
+    lapse_rate: Union[float, None] = 6.5,
+    lapse_rate_threshold: int = 50,
+) -> TimeSeries:
+    """
+    Interpolate time series data spatially to a specific point.
+    Parameters
+    ----------
+    ts : TimeSeries
+        The time series to interpolate.
+    point : Point
+        The point to interpolate the data for.
+    distance_threshold : int, optional
+        Maximum distance (in meters) to use nearest neighbor (default: 5000).
+        Beyond this, IDW is used.
+    elevation_threshold : int, optional
+        Maximum elevation difference (in meters) to use nearest neighbor (default: 50).
+        Beyond this, IDW is used even if distance is within threshold.
+    elevation_weight : float, optional
+        Weight for elevation difference in distance calculation (default: 0.1).
+        The effective distance is calculated as:
+        sqrt(horizontal_distance^2 + (elevation_diff * elevation_weight)^2)
+    power : float, optional
+        Power parameter for IDW (default: 2.0). Higher values give more
+        weight to closer stations.
+    lapse_rate : float, optional
+        Apply lapse rate correction based on elevation difference (default: 6.5).
+    lapse_rate_threshold : int, optional
+        Elevation difference threshold (in meters) to apply lapse rate correction
+        (default: 50). If the elevation difference between the point and stations
+        is less than this, no correction is applied.
+    Returns
+    -------
+    pd.DataFrame or None
+        A DataFrame containing the interpolated data for the specified point,
+        or None if no data is available.
+    """
+    # Fetch DataFrame, filling missing values and adding location data
+    df = ts.fetch(fill=True, location=True, sources=True)
+    # If no data is returned, return None
+    if df is None:
+        logger.debug("No data available for interpolation. Returning empty TimeSeries.")
+        return _create_timeseries(ts, point)
+    # Prepare data with distance and elevation calculations
+    df = _prepare_data_with_distances(df, point, elevation_weight)
+    # Apply lapse rate if specified and elevation is available
+    if (
+        lapse_rate
+        and point.elevation
+        and df["elevation_diff"].max() >= lapse_rate_threshold
+    ):
+        logger.debug("Applying lapse rate correction.")
+        df = apply_lapse_rate(df, point.elevation, lapse_rate)
+    # Determine if nearest neighbor should be used
+    use_nearest = _should_use_nearest_neighbor(
+        df, point, distance_threshold, elevation_threshold
+    )
+    # Identify categorical columns
+    categorical_cols = _get_categorical_columns(df)
+    logger.debug(f"Categorical columns identified: {categorical_cols}")
+    # Perform interpolation
+    df_nearest = None
+    df_idw = None
+    if use_nearest:
+        logger.debug("Using nearest neighbor interpolation.")
+        df_nearest = _interpolate_with_nearest_neighbor(
+            df, ts, point, distance_threshold, elevation_threshold
+        )
+    # Use IDW if nearest neighbor doesn't provide complete data
+    if (
+        not use_nearest
+        or df_nearest is None
+        or len(df_nearest) == 0
+        or df_nearest.isna().any().any()
+    ):
+        logger.debug("Using IDW interpolation.")
+        df_idw = _interpolate_with_idw_and_categorical(
+            df, ts, point, categorical_cols, power
+        )
+    # Merge results
+    result = _merge_interpolation_results(df_nearest, df_idw, use_nearest)
+    # If no data is returned, return None
+    if result is None or result.empty:
+        return _create_timeseries(ts, point)
+    # Post-process result
+    result = _postprocess_result(result, df, ts)
+    return _create_timeseries(ts, point, result)

meteostat/api/inventory.py ADDED Viewed

@@ -0,0 +1,59 @@
+"""
+Inventory Module
+Provides classes for working with weather station data inventories.
+"""
+from datetime import date, datetime
+from typing import List, Optional
+import pandas as pd
+from meteostat.enumerations import Parameter
+class Inventory:
+    """
+    A weather station's data inventory
+    """
+    df: Optional[pd.DataFrame] = None
+    def __init__(self, df: Optional[pd.DataFrame] = None):
+        if df is not None and not df.empty:
+            self.df = df
+    @property
+    def start(self) -> Optional[date]:
+        """
+        Get the earliest start date from the inventory
+        """
+        return (
+            datetime.strptime(self.df["start"].min(), "%Y-%m-%d").date()
+            if self.df is not None
+            else None
+        )
+    @property
+    def end(self) -> Optional[date]:
+        """
+        Get the latest end date from the inventory
+        """
+        return (
+            datetime.strptime(self.df["end"].max(), "%Y-%m-%d").date()
+            if self.df is not None
+            else None
+        )
+    @property
+    def parameters(self) -> Optional[List[Parameter]]:
+        """
+        Get the list of available parameters from the inventory
+        """
+        if self.df is None:
+            return []
+        return [
+            Parameter[parameter.upper()]
+            for parameter in self.df.index.get_level_values("parameter").unique()
+        ]

meteostat/api/merge.py ADDED Viewed

@@ -0,0 +1,103 @@
+"""
+Concatenation Module
+Provides functions to concatenate multiple time series objects into one.
+"""
+from copy import copy
+from datetime import datetime
+from typing import List, Optional
+import pandas as pd
+from meteostat.core.data import data_service
+from meteostat.core.schema import schema_service
+from meteostat.api.timeseries import TimeSeries
+def _get_dt(
+    dt_a: Optional[datetime], dt_b: Optional[datetime], start=True
+) -> Optional[datetime]:
+    """
+    Return the earlier or later (depending on "start" argument) of two datetimes,
+    considering None as 'no value'.
+    If both are None, return None.
+    """
+    if dt_a is None:
+        return dt_b
+    if dt_b is None:
+        return dt_a
+    return min(dt_a, dt_b) if start else max(dt_a, dt_b)
+def merge(objs: List[TimeSeries]) -> TimeSeries:
+    """
+    Merge one or multiple Meteostat time series into a common one
+    In case of duplicate index, the last row will be prefered.
+    Hence, please pass newest data last.
+    Parameters
+    ----------
+    objs : List[TimeSeries]
+        List of time series objects to concatenate
+    Returns
+    -------
+    TimeSeries
+        Concatenated time series object
+    Raises
+    ------
+    ValueError
+        If the time series objects have divergent granularity or time zone
+    """
+    ts = objs[0]
+    if not all(
+        obj.granularity == ts.granularity and obj.timezone == ts.timezone
+        for obj in objs[1:]
+    ):
+        raise ValueError(
+            "Can't concatenate time series objects with divergent granularity or time zone"
+        )
+    stations = copy(ts.stations)
+    start = copy(ts.start)
+    end = copy(ts.end)
+    parameters = ts.parameters
+    multi_station = ts._multi_station
+    for obj in objs[1:]:
+        stations = (
+            pd.concat([stations, obj.stations])
+            .reset_index()
+            .drop_duplicates(subset=["id"])
+            .set_index("id")
+        )
+        start = _get_dt(start, obj.start)
+        end = _get_dt(end, obj.end, False)
+        parameters.extend(obj.parameters)
+        if (
+            obj._multi_station
+            or stations.index.get_level_values("id")[0]
+            != obj.stations.index.get_level_values("id")[0]
+        ):
+            multi_station = True
+    df = data_service.concat_fragments(
+        [obj._df for obj in objs if obj._df is not None],
+        list(dict.fromkeys(parameters)),
+    )
+    df = schema_service.format(df, ts.granularity)
+    return TimeSeries(
+        ts.granularity,
+        stations,
+        df,
+        start,
+        end,
+        ts.timezone,
+        multi_station=multi_station,
+    )

meteostat/api/monthly.py ADDED Viewed

@@ -0,0 +1,73 @@
+"""
+Monthly Time Series Data
+Access monthly time series data for one or multiple weather stations.
+"""
+from typing import List, Optional
+from datetime import datetime, date
+import pandas as pd
+from meteostat.core.data import data_service
+from meteostat.enumerations import Parameter, Provider, Granularity
+from meteostat.typing import Station, Request
+from meteostat.api.point import Point
+from meteostat.utils.parsers import parse_station, parse_time
+DEFAULT_PARAMETERS = [
+    Parameter.TEMP,
+    Parameter.TMIN,
+    Parameter.TMAX,
+    Parameter.TXMN,
+    Parameter.TXMX,
+    Parameter.PRCP,
+    Parameter.PRES,
+    Parameter.TSUN,
+]
+def monthly(
+    station: str | Station | Point | List[str | Station | Point] | pd.DataFrame,
+    start: Optional[datetime | date],
+    end: Optional[datetime | date],
+    parameters: Optional[List[Parameter]] = None,
+    providers: Optional[List[Provider]] = None,
+):
+    """
+    Access monthly time series data.
+    Parameters
+    ----------
+    station : str, Station, Point, List[str | Station | Point], pd.Index, pd.Series
+        Weather station(s) or Point(s) to query data for. Can be a single station/point or a list.
+        Points are converted to virtual stations with IDs like $0001, $0002, etc.
+    start : datetime, date, optional
+        Start date for the data query. If None, the earliest available date will be used.
+    end : datetime, date, optional
+        End date for the data query. If None, the latest available date will be used.
+    parameters : List[Parameter], optional
+        List of parameters to include in the data query. Defaults to a set of common parameters.
+    providers : List[Provider], optional
+        List of data providers to use for the query. Defaults to the monthly provider.
+    Returns
+    -------
+    TimeSeries
+        A TimeSeries object containing the monthly data for the specified stations and parameters.
+    """
+    if parameters is None:
+        parameters = DEFAULT_PARAMETERS
+    if providers is None:
+        providers = [Provider.MONTHLY]
+    req = Request(
+        granularity=Granularity.MONTHLY,
+        providers=providers,
+        parameters=parameters,
+        station=parse_station(station),
+        start=parse_time(start),
+        end=parse_time(end, is_end=True),
+    )
+    return data_service.fetch(req)

meteostat 1.7.6__py3-none-any.whl → 2.0.1__py3-none-any.whl

meteostat 1.7.6py3-none-any.whl → 2.0.1py3-none-any.whl