meteostat 1.7.5__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meteostat/__init__.py +32 -19
- meteostat/api/daily.py +76 -0
- meteostat/api/hourly.py +80 -0
- meteostat/api/interpolate.py +240 -0
- meteostat/api/inventory.py +59 -0
- meteostat/api/merge.py +103 -0
- meteostat/api/monthly.py +73 -0
- meteostat/api/normals.py +144 -0
- meteostat/api/point.py +30 -0
- meteostat/api/stations.py +234 -0
- meteostat/api/timeseries.py +334 -0
- meteostat/core/cache.py +212 -59
- meteostat/core/config.py +158 -0
- meteostat/core/data.py +199 -0
- meteostat/core/logger.py +9 -0
- meteostat/core/network.py +82 -0
- meteostat/core/parameters.py +112 -0
- meteostat/core/providers.py +184 -0
- meteostat/core/schema.py +170 -0
- meteostat/core/validator.py +38 -0
- meteostat/enumerations.py +149 -0
- meteostat/interpolation/idw.py +120 -0
- meteostat/interpolation/lapserate.py +91 -0
- meteostat/interpolation/nearest.py +31 -0
- meteostat/parameters.py +354 -0
- meteostat/providers/dwd/climat.py +166 -0
- meteostat/providers/dwd/daily.py +144 -0
- meteostat/providers/dwd/hourly.py +218 -0
- meteostat/providers/dwd/monthly.py +138 -0
- meteostat/providers/dwd/mosmix.py +351 -0
- meteostat/providers/dwd/poi.py +117 -0
- meteostat/providers/dwd/shared.py +155 -0
- meteostat/providers/eccc/daily.py +87 -0
- meteostat/providers/eccc/hourly.py +104 -0
- meteostat/providers/eccc/monthly.py +66 -0
- meteostat/providers/eccc/shared.py +45 -0
- meteostat/providers/index.py +496 -0
- meteostat/providers/meteostat/daily.py +65 -0
- meteostat/providers/meteostat/daily_derived.py +110 -0
- meteostat/providers/meteostat/hourly.py +66 -0
- meteostat/providers/meteostat/monthly.py +45 -0
- meteostat/providers/meteostat/monthly_derived.py +106 -0
- meteostat/providers/meteostat/shared.py +93 -0
- meteostat/providers/metno/forecast.py +186 -0
- meteostat/providers/noaa/ghcnd.py +228 -0
- meteostat/providers/noaa/isd_lite.py +142 -0
- meteostat/providers/noaa/metar.py +163 -0
- meteostat/typing.py +113 -0
- meteostat/utils/conversions.py +231 -0
- meteostat/utils/data.py +194 -0
- meteostat/utils/geo.py +28 -0
- meteostat/utils/parsers.py +168 -0
- meteostat/utils/types.py +113 -0
- meteostat/utils/validators.py +31 -0
- meteostat-2.0.0.dist-info/METADATA +134 -0
- meteostat-2.0.0.dist-info/RECORD +63 -0
- {meteostat-1.7.5.dist-info → meteostat-2.0.0.dist-info}/WHEEL +1 -2
- meteostat/core/loader.py +0 -103
- meteostat/core/warn.py +0 -34
- meteostat/enumerations/granularity.py +0 -22
- meteostat/interface/base.py +0 -39
- meteostat/interface/daily.py +0 -118
- meteostat/interface/hourly.py +0 -154
- meteostat/interface/meteodata.py +0 -210
- meteostat/interface/monthly.py +0 -109
- meteostat/interface/normals.py +0 -245
- meteostat/interface/point.py +0 -143
- meteostat/interface/stations.py +0 -252
- meteostat/interface/timeseries.py +0 -237
- meteostat/series/aggregate.py +0 -48
- meteostat/series/convert.py +0 -28
- meteostat/series/count.py +0 -17
- meteostat/series/coverage.py +0 -20
- meteostat/series/fetch.py +0 -28
- meteostat/series/interpolate.py +0 -47
- meteostat/series/normalize.py +0 -76
- meteostat/series/stations.py +0 -22
- meteostat/units.py +0 -149
- meteostat/utilities/__init__.py +0 -0
- meteostat/utilities/aggregations.py +0 -37
- meteostat/utilities/endpoint.py +0 -33
- meteostat/utilities/helpers.py +0 -70
- meteostat/utilities/mutations.py +0 -85
- meteostat/utilities/validations.py +0 -30
- meteostat-1.7.5.dist-info/METADATA +0 -112
- meteostat-1.7.5.dist-info/RECORD +0 -39
- meteostat-1.7.5.dist-info/top_level.txt +0 -1
- /meteostat/{core → api}/__init__.py +0 -0
- /meteostat/{enumerations → interpolation}/__init__.py +0 -0
- /meteostat/{interface → providers}/__init__.py +0 -0
- /meteostat/{interface/interpolate.py → py.typed} +0 -0
- /meteostat/{series → utils}/__init__.py +0 -0
- {meteostat-1.7.5.dist-info → meteostat-2.0.0.dist-info/licenses}/LICENSE +0 -0
meteostat/__init__.py
CHANGED
|
@@ -2,34 +2,47 @@
|
|
|
2
2
|
█▀▄▀█ █▀▀ ▀█▀ █▀▀ █▀█ █▀ ▀█▀ ▄▀█ ▀█▀
|
|
3
3
|
█░▀░█ ██▄ ░█░ ██▄ █▄█ ▄█ ░█░ █▀█ ░█░
|
|
4
4
|
|
|
5
|
-
A Python library for accessing open weather and climate data
|
|
5
|
+
A Python library for accessing open weather and climate data.
|
|
6
6
|
|
|
7
7
|
Meteorological data provided by Meteostat (https://dev.meteostat.net)
|
|
8
|
-
under the terms of the Creative Commons Attribution
|
|
9
|
-
|
|
8
|
+
under the terms of the Creative Commons Attribution 4.0 International
|
|
9
|
+
License.
|
|
10
10
|
|
|
11
11
|
The code is licensed under the MIT license.
|
|
12
12
|
"""
|
|
13
13
|
|
|
14
14
|
__appname__ = "meteostat"
|
|
15
|
-
__version__ = "
|
|
15
|
+
__version__ = "2.0.0"
|
|
16
16
|
|
|
17
|
-
from .
|
|
18
|
-
from .
|
|
19
|
-
from .
|
|
20
|
-
from .
|
|
21
|
-
from .
|
|
22
|
-
from .
|
|
23
|
-
from .
|
|
24
|
-
from .
|
|
17
|
+
from meteostat.api.daily import daily
|
|
18
|
+
from meteostat.api.hourly import hourly
|
|
19
|
+
from meteostat.api.interpolate import interpolate
|
|
20
|
+
from meteostat.api.merge import merge
|
|
21
|
+
from meteostat.api.monthly import monthly
|
|
22
|
+
from meteostat.api.normals import normals
|
|
23
|
+
from meteostat.api.point import Point
|
|
24
|
+
from meteostat.api.stations import stations
|
|
25
|
+
from meteostat.core.cache import purge
|
|
26
|
+
from meteostat.core.config import config
|
|
27
|
+
from meteostat.enumerations import Parameter, Provider, UnitSystem
|
|
28
|
+
from meteostat.interpolation.lapserate import lapse_rate
|
|
29
|
+
from meteostat.typing import Station
|
|
25
30
|
|
|
31
|
+
# Export public API
|
|
26
32
|
__all__ = [
|
|
27
|
-
"
|
|
28
|
-
"
|
|
29
|
-
"
|
|
33
|
+
"config",
|
|
34
|
+
"daily",
|
|
35
|
+
"hourly",
|
|
36
|
+
"interpolate",
|
|
37
|
+
"lapse_rate",
|
|
38
|
+
"merge",
|
|
39
|
+
"monthly",
|
|
40
|
+
"normals",
|
|
41
|
+
"Parameter",
|
|
30
42
|
"Point",
|
|
31
|
-
"
|
|
32
|
-
"
|
|
33
|
-
"
|
|
34
|
-
"
|
|
43
|
+
"Provider",
|
|
44
|
+
"purge",
|
|
45
|
+
"Station",
|
|
46
|
+
"stations",
|
|
47
|
+
"UnitSystem",
|
|
35
48
|
]
|
meteostat/api/daily.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Daily Time Series Data
|
|
3
|
+
|
|
4
|
+
Access daily time series data for one or multiple weather stations.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import List, Optional
|
|
8
|
+
from datetime import datetime, date
|
|
9
|
+
|
|
10
|
+
import pandas as pd
|
|
11
|
+
|
|
12
|
+
from meteostat.core.data import data_service
|
|
13
|
+
from meteostat.enumerations import Parameter, Provider, Granularity
|
|
14
|
+
from meteostat.typing import Station, Request
|
|
15
|
+
from meteostat.api.point import Point
|
|
16
|
+
from meteostat.utils.parsers import parse_station, parse_time
|
|
17
|
+
|
|
18
|
+
DEFAULT_PARAMETERS = [
|
|
19
|
+
Parameter.TEMP,
|
|
20
|
+
Parameter.TMIN,
|
|
21
|
+
Parameter.TMAX,
|
|
22
|
+
Parameter.RHUM,
|
|
23
|
+
Parameter.PRCP,
|
|
24
|
+
Parameter.SNWD,
|
|
25
|
+
Parameter.WSPD,
|
|
26
|
+
Parameter.WPGT,
|
|
27
|
+
Parameter.PRES,
|
|
28
|
+
Parameter.TSUN,
|
|
29
|
+
Parameter.CLDC,
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def daily(
|
|
34
|
+
station: str | Station | Point | List[str | Station | Point] | pd.DataFrame,
|
|
35
|
+
start: Optional[datetime | date],
|
|
36
|
+
end: Optional[datetime | date],
|
|
37
|
+
parameters: Optional[List[Parameter]] = None,
|
|
38
|
+
providers: Optional[List[Provider]] = None,
|
|
39
|
+
):
|
|
40
|
+
"""
|
|
41
|
+
Access daily time series data.
|
|
42
|
+
|
|
43
|
+
Parameters
|
|
44
|
+
----------
|
|
45
|
+
station : str, Station, Point, List[str | Station | Point], pd.Index, pd.Series
|
|
46
|
+
Weather station(s) or Point(s) to query data for. Can be a single station/point or a list.
|
|
47
|
+
Points are converted to virtual stations with IDs like $0001, $0002, etc.
|
|
48
|
+
start : datetime, date, optional
|
|
49
|
+
Start date for the data query. If None, the earliest available date will be used.
|
|
50
|
+
end : datetime, date, optional
|
|
51
|
+
End date for the data query. If None, the latest available date will be used.
|
|
52
|
+
parameters : List[Parameter], optional
|
|
53
|
+
List of parameters to include in the data query. Defaults to a set of common parameters.
|
|
54
|
+
providers : List[Provider], optional
|
|
55
|
+
List of data providers to use for the query. Defaults to the daily provider.
|
|
56
|
+
|
|
57
|
+
Returns
|
|
58
|
+
-------
|
|
59
|
+
TimeSeries
|
|
60
|
+
A TimeSeries object containing the daily data for the specified stations and parameters.
|
|
61
|
+
"""
|
|
62
|
+
if parameters is None:
|
|
63
|
+
parameters = DEFAULT_PARAMETERS
|
|
64
|
+
if providers is None:
|
|
65
|
+
providers = [Provider.DAILY]
|
|
66
|
+
|
|
67
|
+
req = Request(
|
|
68
|
+
granularity=Granularity.DAILY,
|
|
69
|
+
providers=providers,
|
|
70
|
+
parameters=parameters,
|
|
71
|
+
station=parse_station(station),
|
|
72
|
+
start=parse_time(start),
|
|
73
|
+
end=parse_time(end, is_end=True),
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
return data_service.fetch(req)
|
meteostat/api/hourly.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Hourly Time Series Data
|
|
3
|
+
|
|
4
|
+
Access hourly time series data for one or multiple weather stations.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import List, Optional
|
|
8
|
+
from datetime import datetime, date
|
|
9
|
+
|
|
10
|
+
import pandas as pd
|
|
11
|
+
|
|
12
|
+
from meteostat.core.data import data_service
|
|
13
|
+
from meteostat.enumerations import Parameter, Provider, Granularity
|
|
14
|
+
from meteostat.typing import Station, Request
|
|
15
|
+
from meteostat.api.point import Point
|
|
16
|
+
from meteostat.utils.parsers import parse_station, parse_time
|
|
17
|
+
|
|
18
|
+
DEFAULT_PARAMETERS = [
|
|
19
|
+
Parameter.TEMP,
|
|
20
|
+
Parameter.RHUM,
|
|
21
|
+
Parameter.PRCP,
|
|
22
|
+
Parameter.SNWD,
|
|
23
|
+
Parameter.WDIR,
|
|
24
|
+
Parameter.WSPD,
|
|
25
|
+
Parameter.WPGT,
|
|
26
|
+
Parameter.PRES,
|
|
27
|
+
Parameter.TSUN,
|
|
28
|
+
Parameter.CLDC,
|
|
29
|
+
Parameter.COCO,
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def hourly(
|
|
34
|
+
station: str | Station | Point | List[str | Station | Point] | pd.DataFrame,
|
|
35
|
+
start: Optional[datetime | date],
|
|
36
|
+
end: Optional[datetime | date],
|
|
37
|
+
timezone: Optional[str] = None,
|
|
38
|
+
parameters: Optional[List[Parameter]] = None,
|
|
39
|
+
providers: Optional[List[Provider]] = None,
|
|
40
|
+
):
|
|
41
|
+
"""
|
|
42
|
+
Access hourly time series data.
|
|
43
|
+
|
|
44
|
+
Parameters
|
|
45
|
+
----------
|
|
46
|
+
station : str, Station, Point, List[str | Station | Point], pd.Index, pd.Series
|
|
47
|
+
Weather station(s) or Point(s) to query data for. Can be a single station/point or a list.
|
|
48
|
+
Points are converted to virtual stations with IDs like $0001, $0002, etc.
|
|
49
|
+
start : datetime, date, optional
|
|
50
|
+
Start date for the data query. If None, the earliest available date will be used.
|
|
51
|
+
end : datetime, date, optional
|
|
52
|
+
End date for the data query. If None, the latest available date will be used.
|
|
53
|
+
timezone : str, optional
|
|
54
|
+
Time zone for the data query. If None, UTC will be used.
|
|
55
|
+
parameters : List[Parameter], optional
|
|
56
|
+
List of parameters to include in the data query. Defaults to a set of common parameters.
|
|
57
|
+
providers : List[Provider], optional
|
|
58
|
+
List of data providers to use for the query. Defaults to the hourly provider.
|
|
59
|
+
|
|
60
|
+
Returns
|
|
61
|
+
-------
|
|
62
|
+
TimeSeries
|
|
63
|
+
A TimeSeries object containing the hourly data for the specified stations and parameters.
|
|
64
|
+
"""
|
|
65
|
+
if parameters is None:
|
|
66
|
+
parameters = DEFAULT_PARAMETERS
|
|
67
|
+
if providers is None:
|
|
68
|
+
providers = [Provider.HOURLY]
|
|
69
|
+
|
|
70
|
+
req = Request(
|
|
71
|
+
granularity=Granularity.HOURLY,
|
|
72
|
+
providers=providers,
|
|
73
|
+
parameters=parameters,
|
|
74
|
+
station=parse_station(station),
|
|
75
|
+
start=parse_time(start, timezone),
|
|
76
|
+
end=parse_time(end, timezone, is_end=True),
|
|
77
|
+
timezone=timezone,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
return data_service.fetch(req)
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Interpolation Module
|
|
3
|
+
|
|
4
|
+
Provides spatial interpolation functions for meteorological data.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Optional, Union
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
import pandas as pd
|
|
11
|
+
|
|
12
|
+
from meteostat.api.point import Point
|
|
13
|
+
from meteostat.api.timeseries import TimeSeries
|
|
14
|
+
from meteostat.typing import Station
|
|
15
|
+
from meteostat.interpolation.lapserate import apply_lapse_rate
|
|
16
|
+
from meteostat.interpolation.nearest import nearest_neighbor
|
|
17
|
+
from meteostat.interpolation.idw import inverse_distance_weighting
|
|
18
|
+
from meteostat.utils.data import aggregate_sources, reshape_by_source, stations_to_df
|
|
19
|
+
from meteostat.utils.geo import get_distance
|
|
20
|
+
from meteostat.utils.parsers import parse_station
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _create_timeseries(
|
|
24
|
+
ts: TimeSeries, point: Point, df: Optional[pd.DataFrame] = None
|
|
25
|
+
) -> TimeSeries:
|
|
26
|
+
"""
|
|
27
|
+
Create a TimeSeries object from interpolated DataFrame
|
|
28
|
+
"""
|
|
29
|
+
parsed = parse_station(point)
|
|
30
|
+
stations_list = [parsed] if isinstance(parsed, Station) else parsed
|
|
31
|
+
|
|
32
|
+
# Convert stations to DataFrame
|
|
33
|
+
stations_df = stations_to_df(stations_list)
|
|
34
|
+
|
|
35
|
+
return TimeSeries(
|
|
36
|
+
ts.granularity,
|
|
37
|
+
stations_df,
|
|
38
|
+
df=df,
|
|
39
|
+
start=ts.start,
|
|
40
|
+
end=ts.end,
|
|
41
|
+
timezone=ts.timezone,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _add_source_columns(
|
|
46
|
+
result: pd.DataFrame,
|
|
47
|
+
df: pd.DataFrame,
|
|
48
|
+
) -> pd.DataFrame:
|
|
49
|
+
"""
|
|
50
|
+
Add source columns to the result DataFrame
|
|
51
|
+
"""
|
|
52
|
+
source_cols = [c for c in df.columns if c.endswith("_source")]
|
|
53
|
+
if source_cols:
|
|
54
|
+
grouped = df.groupby("time")[source_cols].agg(aggregate_sources)
|
|
55
|
+
if isinstance(grouped, pd.Series):
|
|
56
|
+
grouped = grouped.to_frame(name=source_cols[0])
|
|
57
|
+
grouped.index.name = "time"
|
|
58
|
+
|
|
59
|
+
# Safely align on time and add/fill source columns without causing overlaps
|
|
60
|
+
result_has_time_col = "time" in result.columns
|
|
61
|
+
if result_has_time_col:
|
|
62
|
+
result = result.set_index("time")
|
|
63
|
+
|
|
64
|
+
# Ensure both frames align on the same index (time)
|
|
65
|
+
# For each source column, add it if missing or fill NaNs if present
|
|
66
|
+
for col in source_cols:
|
|
67
|
+
if col in grouped.columns:
|
|
68
|
+
if col in result.columns:
|
|
69
|
+
# Fill missing values in result using aggregated sources
|
|
70
|
+
result[col] = result[col].where(result[col].notna(), grouped[col])
|
|
71
|
+
else:
|
|
72
|
+
# Add aggregated source column
|
|
73
|
+
result[col] = grouped[col]
|
|
74
|
+
|
|
75
|
+
if result_has_time_col:
|
|
76
|
+
result = result.reset_index()
|
|
77
|
+
return result
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def interpolate(
|
|
81
|
+
ts: TimeSeries,
|
|
82
|
+
point: Point,
|
|
83
|
+
distance_threshold: Union[int, None] = 5000,
|
|
84
|
+
elevation_threshold: Union[int, None] = 50,
|
|
85
|
+
elevation_weight: float = 10,
|
|
86
|
+
power: float = 2.0,
|
|
87
|
+
lapse_rate: Union[float, None] = 6.5,
|
|
88
|
+
lapse_rate_threshold: int = 50,
|
|
89
|
+
) -> TimeSeries:
|
|
90
|
+
"""
|
|
91
|
+
Interpolate time series data spatially to a specific point.
|
|
92
|
+
|
|
93
|
+
Parameters
|
|
94
|
+
----------
|
|
95
|
+
ts : TimeSeries
|
|
96
|
+
The time series to interpolate.
|
|
97
|
+
point : Point
|
|
98
|
+
The point to interpolate the data for.
|
|
99
|
+
distance_threshold : int, optional
|
|
100
|
+
Maximum distance (in meters) to use nearest neighbor (default: 5000).
|
|
101
|
+
Beyond this, IDW is used.
|
|
102
|
+
elevation_threshold : int, optional
|
|
103
|
+
Maximum elevation difference (in meters) to use nearest neighbor (default: 50).
|
|
104
|
+
Beyond this, IDW is used even if distance is within threshold.
|
|
105
|
+
elevation_weight : float, optional
|
|
106
|
+
Weight for elevation difference in distance calculation (default: 0.1).
|
|
107
|
+
The effective distance is calculated as:
|
|
108
|
+
sqrt(horizontal_distance^2 + (elevation_diff * elevation_weight)^2)
|
|
109
|
+
power : float, optional
|
|
110
|
+
Power parameter for IDW (default: 2.0). Higher values give more
|
|
111
|
+
weight to closer stations.
|
|
112
|
+
lapse_rate : float, optional
|
|
113
|
+
Apply lapse rate correction based on elevation difference (default: 6.5).
|
|
114
|
+
lapse_rate_threshold : int, optional
|
|
115
|
+
Elevation difference threshold (in meters) to apply lapse rate correction
|
|
116
|
+
(default: 50). If the elevation difference between the point and stations
|
|
117
|
+
is less than this, no correction is applied.
|
|
118
|
+
|
|
119
|
+
Returns
|
|
120
|
+
-------
|
|
121
|
+
pd.DataFrame or None
|
|
122
|
+
A DataFrame containing the interpolated data for the specified point,
|
|
123
|
+
or None if no data is available.
|
|
124
|
+
"""
|
|
125
|
+
# Fetch DataFrame, filling missing values and adding location data
|
|
126
|
+
df = ts.fetch(fill=True, location=True, sources=True)
|
|
127
|
+
|
|
128
|
+
# If no data is returned, return None
|
|
129
|
+
if df is None:
|
|
130
|
+
return _create_timeseries(ts, point)
|
|
131
|
+
|
|
132
|
+
# Add distance column
|
|
133
|
+
df["distance"] = get_distance(
|
|
134
|
+
point.latitude, point.longitude, df["latitude"], df["longitude"]
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
# Add effective distance column if elevation is available
|
|
138
|
+
if point.elevation is not None and "elevation" in df.columns:
|
|
139
|
+
elev_diff = np.abs(df["elevation"] - point.elevation)
|
|
140
|
+
df["effective_distance"] = np.sqrt(
|
|
141
|
+
df["distance"] ** 2 + (elev_diff * elevation_weight) ** 2
|
|
142
|
+
)
|
|
143
|
+
else:
|
|
144
|
+
df["effective_distance"] = df["distance"]
|
|
145
|
+
|
|
146
|
+
# Add elevation difference column
|
|
147
|
+
if "elevation" in df.columns and point.elevation is not None:
|
|
148
|
+
df["elevation_diff"] = np.abs(df["elevation"] - point.elevation)
|
|
149
|
+
else:
|
|
150
|
+
df["elevation_diff"] = np.nan
|
|
151
|
+
|
|
152
|
+
# Apply lapse rate if specified and elevation is available
|
|
153
|
+
if (
|
|
154
|
+
lapse_rate
|
|
155
|
+
and point.elevation
|
|
156
|
+
and df["elevation_diff"].max() >= lapse_rate_threshold
|
|
157
|
+
):
|
|
158
|
+
df = apply_lapse_rate(df, point.elevation, lapse_rate)
|
|
159
|
+
|
|
160
|
+
# Check if any stations are close enough for nearest neighbor
|
|
161
|
+
min_distance = df["distance"].min()
|
|
162
|
+
use_nearest = distance_threshold is None or min_distance <= distance_threshold
|
|
163
|
+
if use_nearest and point.elevation is not None and "elevation" in df.columns:
|
|
164
|
+
# Calculate minimum elevation difference
|
|
165
|
+
min_elev_diff = np.abs(df["elevation"] - point.elevation).min()
|
|
166
|
+
use_nearest = (
|
|
167
|
+
elevation_threshold is None or min_elev_diff <= elevation_threshold
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
# Initialize variables
|
|
171
|
+
df_nearest = None
|
|
172
|
+
df_idw = None
|
|
173
|
+
|
|
174
|
+
# Perform nearest neighbor if applicable
|
|
175
|
+
if use_nearest:
|
|
176
|
+
# Filter applicable stations based on thresholds
|
|
177
|
+
distance_filter = (
|
|
178
|
+
pd.Series([True] * len(df), index=df.index)
|
|
179
|
+
if distance_threshold is None
|
|
180
|
+
else (df["distance"] <= distance_threshold)
|
|
181
|
+
)
|
|
182
|
+
elevation_filter = (
|
|
183
|
+
pd.Series([True] * len(df), index=df.index)
|
|
184
|
+
if elevation_threshold is None
|
|
185
|
+
else (np.abs(df["elevation"] - point.elevation) <= elevation_threshold)
|
|
186
|
+
)
|
|
187
|
+
df_filtered = df[distance_filter & elevation_filter]
|
|
188
|
+
df_nearest = nearest_neighbor(df_filtered, ts, point)
|
|
189
|
+
|
|
190
|
+
# Check if we need to use IDW
|
|
191
|
+
if (
|
|
192
|
+
not use_nearest
|
|
193
|
+
or df_nearest is None
|
|
194
|
+
or len(df_nearest) == 0
|
|
195
|
+
or df_nearest.isna().any().any()
|
|
196
|
+
):
|
|
197
|
+
# Perform IDW interpolation
|
|
198
|
+
idw_func = inverse_distance_weighting(power=power)
|
|
199
|
+
df_idw = idw_func(df, ts, point)
|
|
200
|
+
|
|
201
|
+
# Merge DataFrames with priority to nearest neighbor
|
|
202
|
+
if use_nearest and df_nearest is not None and len(df_nearest) > 0:
|
|
203
|
+
if df_idw is not None:
|
|
204
|
+
# Combine nearest and IDW results, prioritizing nearest values
|
|
205
|
+
result = df_nearest.combine_first(df_idw)
|
|
206
|
+
else:
|
|
207
|
+
result = df_nearest
|
|
208
|
+
else:
|
|
209
|
+
result = df_idw
|
|
210
|
+
|
|
211
|
+
# If no data is returned, return None
|
|
212
|
+
if result is None or result.empty:
|
|
213
|
+
return _create_timeseries(ts, point)
|
|
214
|
+
|
|
215
|
+
# Drop location-related columns & return
|
|
216
|
+
result = result.drop(
|
|
217
|
+
[
|
|
218
|
+
"latitude",
|
|
219
|
+
"longitude",
|
|
220
|
+
"elevation",
|
|
221
|
+
"distance",
|
|
222
|
+
"effective_distance",
|
|
223
|
+
"elevation_diff",
|
|
224
|
+
],
|
|
225
|
+
axis=1,
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
# Add source columns: aggregate all columns that end with "_source"
|
|
229
|
+
result = _add_source_columns(result, df)
|
|
230
|
+
|
|
231
|
+
# Reshape by source
|
|
232
|
+
result = reshape_by_source(result)
|
|
233
|
+
|
|
234
|
+
# Add station index
|
|
235
|
+
result["station"] = "$0001"
|
|
236
|
+
result = result.set_index("station", append=True).reorder_levels(
|
|
237
|
+
["station", "time", "source"]
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
return _create_timeseries(ts, point, result)
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Inventory Module
|
|
3
|
+
|
|
4
|
+
Provides classes for working with weather station data inventories.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from datetime import date, datetime
|
|
8
|
+
from typing import List, Optional
|
|
9
|
+
|
|
10
|
+
import pandas as pd
|
|
11
|
+
|
|
12
|
+
from meteostat.enumerations import Parameter
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class Inventory:
|
|
16
|
+
"""
|
|
17
|
+
A weather station's data inventory
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
df: Optional[pd.DataFrame] = None
|
|
21
|
+
|
|
22
|
+
def __init__(self, df: Optional[pd.DataFrame] = None):
|
|
23
|
+
if df is not None and not df.empty:
|
|
24
|
+
self.df = df
|
|
25
|
+
|
|
26
|
+
@property
|
|
27
|
+
def start(self) -> Optional[date]:
|
|
28
|
+
"""
|
|
29
|
+
Get the earliest start date from the inventory
|
|
30
|
+
"""
|
|
31
|
+
return (
|
|
32
|
+
datetime.strptime(self.df["start"].min(), "%Y-%m-%d").date()
|
|
33
|
+
if self.df is not None
|
|
34
|
+
else None
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
@property
|
|
38
|
+
def end(self) -> Optional[date]:
|
|
39
|
+
"""
|
|
40
|
+
Get the latest end date from the inventory
|
|
41
|
+
"""
|
|
42
|
+
return (
|
|
43
|
+
datetime.strptime(self.df["end"].max(), "%Y-%m-%d").date()
|
|
44
|
+
if self.df is not None
|
|
45
|
+
else None
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def parameters(self) -> Optional[List[Parameter]]:
|
|
50
|
+
"""
|
|
51
|
+
Get the list of available parameters from the inventory
|
|
52
|
+
"""
|
|
53
|
+
if self.df is None:
|
|
54
|
+
return []
|
|
55
|
+
|
|
56
|
+
return [
|
|
57
|
+
Parameter[parameter.upper()]
|
|
58
|
+
for parameter in self.df.index.get_level_values("parameter").unique()
|
|
59
|
+
]
|
meteostat/api/merge.py
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Concatenation Module
|
|
3
|
+
|
|
4
|
+
Provides functions to concatenate multiple time series objects into one.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from copy import copy
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
from typing import List, Optional
|
|
10
|
+
|
|
11
|
+
import pandas as pd
|
|
12
|
+
|
|
13
|
+
from meteostat.core.data import data_service
|
|
14
|
+
from meteostat.core.schema import schema_service
|
|
15
|
+
from meteostat.api.timeseries import TimeSeries
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _get_dt(
|
|
19
|
+
dt_a: Optional[datetime], dt_b: Optional[datetime], start=True
|
|
20
|
+
) -> Optional[datetime]:
|
|
21
|
+
"""
|
|
22
|
+
Return the earlier or later (depending on "start" argument) of two datetimes,
|
|
23
|
+
considering None as 'no value'.
|
|
24
|
+
|
|
25
|
+
If both are None, return None.
|
|
26
|
+
"""
|
|
27
|
+
if dt_a is None:
|
|
28
|
+
return dt_b
|
|
29
|
+
if dt_b is None:
|
|
30
|
+
return dt_a
|
|
31
|
+
return min(dt_a, dt_b) if start else max(dt_a, dt_b)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def merge(objs: List[TimeSeries]) -> TimeSeries:
|
|
35
|
+
"""
|
|
36
|
+
Merge one or multiple Meteostat time series into a common one
|
|
37
|
+
|
|
38
|
+
In case of duplicate index, the last row will be prefered.
|
|
39
|
+
Hence, please pass newest data last.
|
|
40
|
+
|
|
41
|
+
Parameters
|
|
42
|
+
----------
|
|
43
|
+
objs : List[TimeSeries]
|
|
44
|
+
List of time series objects to concatenate
|
|
45
|
+
|
|
46
|
+
Returns
|
|
47
|
+
-------
|
|
48
|
+
TimeSeries
|
|
49
|
+
Concatenated time series object
|
|
50
|
+
|
|
51
|
+
Raises
|
|
52
|
+
------
|
|
53
|
+
ValueError
|
|
54
|
+
If the time series objects have divergent granularity or time zone
|
|
55
|
+
"""
|
|
56
|
+
ts = objs[0]
|
|
57
|
+
|
|
58
|
+
if not all(
|
|
59
|
+
obj.granularity == ts.granularity and obj.timezone == ts.timezone
|
|
60
|
+
for obj in objs[1:]
|
|
61
|
+
):
|
|
62
|
+
raise ValueError(
|
|
63
|
+
"Can't concatenate time series objects with divergent granularity or time zone"
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
stations = copy(ts.stations)
|
|
67
|
+
start = copy(ts.start)
|
|
68
|
+
end = copy(ts.end)
|
|
69
|
+
parameters = ts.parameters
|
|
70
|
+
multi_station = ts._multi_station
|
|
71
|
+
|
|
72
|
+
for obj in objs[1:]:
|
|
73
|
+
stations = (
|
|
74
|
+
pd.concat([stations, obj.stations])
|
|
75
|
+
.reset_index()
|
|
76
|
+
.drop_duplicates(subset=["id"])
|
|
77
|
+
.set_index("id")
|
|
78
|
+
)
|
|
79
|
+
start = _get_dt(start, obj.start)
|
|
80
|
+
end = _get_dt(end, obj.end, False)
|
|
81
|
+
parameters.extend(obj.parameters)
|
|
82
|
+
if (
|
|
83
|
+
obj._multi_station
|
|
84
|
+
or stations.index.get_level_values("id")[0]
|
|
85
|
+
!= obj.stations.index.get_level_values("id")[0]
|
|
86
|
+
):
|
|
87
|
+
multi_station = True
|
|
88
|
+
|
|
89
|
+
df = data_service.concat_fragments(
|
|
90
|
+
[obj._df for obj in objs if obj._df is not None],
|
|
91
|
+
list(dict.fromkeys(parameters)),
|
|
92
|
+
)
|
|
93
|
+
df = schema_service.format(df, ts.granularity)
|
|
94
|
+
|
|
95
|
+
return TimeSeries(
|
|
96
|
+
ts.granularity,
|
|
97
|
+
stations,
|
|
98
|
+
df,
|
|
99
|
+
start,
|
|
100
|
+
end,
|
|
101
|
+
ts.timezone,
|
|
102
|
+
multi_station=multi_station,
|
|
103
|
+
)
|
meteostat/api/monthly.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Monthly Time Series Data
|
|
3
|
+
|
|
4
|
+
Access monthly time series data for one or multiple weather stations.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import List, Optional
|
|
8
|
+
from datetime import datetime, date
|
|
9
|
+
|
|
10
|
+
import pandas as pd
|
|
11
|
+
|
|
12
|
+
from meteostat.core.data import data_service
|
|
13
|
+
from meteostat.enumerations import Parameter, Provider, Granularity
|
|
14
|
+
from meteostat.typing import Station, Request
|
|
15
|
+
from meteostat.api.point import Point
|
|
16
|
+
from meteostat.utils.parsers import parse_station, parse_time
|
|
17
|
+
|
|
18
|
+
DEFAULT_PARAMETERS = [
|
|
19
|
+
Parameter.TEMP,
|
|
20
|
+
Parameter.TMIN,
|
|
21
|
+
Parameter.TMAX,
|
|
22
|
+
Parameter.TXMN,
|
|
23
|
+
Parameter.TXMX,
|
|
24
|
+
Parameter.PRCP,
|
|
25
|
+
Parameter.PRES,
|
|
26
|
+
Parameter.TSUN,
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def monthly(
|
|
31
|
+
station: str | Station | Point | List[str | Station | Point] | pd.DataFrame,
|
|
32
|
+
start: Optional[datetime | date],
|
|
33
|
+
end: Optional[datetime | date],
|
|
34
|
+
parameters: Optional[List[Parameter]] = None,
|
|
35
|
+
providers: Optional[List[Provider]] = None,
|
|
36
|
+
):
|
|
37
|
+
"""
|
|
38
|
+
Access monthly time series data.
|
|
39
|
+
|
|
40
|
+
Parameters
|
|
41
|
+
----------
|
|
42
|
+
station : str, Station, Point, List[str | Station | Point], pd.Index, pd.Series
|
|
43
|
+
Weather station(s) or Point(s) to query data for. Can be a single station/point or a list.
|
|
44
|
+
Points are converted to virtual stations with IDs like $0001, $0002, etc.
|
|
45
|
+
start : datetime, date, optional
|
|
46
|
+
Start date for the data query. If None, the earliest available date will be used.
|
|
47
|
+
end : datetime, date, optional
|
|
48
|
+
End date for the data query. If None, the latest available date will be used.
|
|
49
|
+
parameters : List[Parameter], optional
|
|
50
|
+
List of parameters to include in the data query. Defaults to a set of common parameters.
|
|
51
|
+
providers : List[Provider], optional
|
|
52
|
+
List of data providers to use for the query. Defaults to the monthly provider.
|
|
53
|
+
|
|
54
|
+
Returns
|
|
55
|
+
-------
|
|
56
|
+
TimeSeries
|
|
57
|
+
A TimeSeries object containing the monthly data for the specified stations and parameters.
|
|
58
|
+
"""
|
|
59
|
+
if parameters is None:
|
|
60
|
+
parameters = DEFAULT_PARAMETERS
|
|
61
|
+
if providers is None:
|
|
62
|
+
providers = [Provider.MONTHLY]
|
|
63
|
+
|
|
64
|
+
req = Request(
|
|
65
|
+
granularity=Granularity.MONTHLY,
|
|
66
|
+
providers=providers,
|
|
67
|
+
parameters=parameters,
|
|
68
|
+
station=parse_station(station),
|
|
69
|
+
start=parse_time(start),
|
|
70
|
+
end=parse_time(end, is_end=True),
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
return data_service.fetch(req)
|