meteostat 1.7.6__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meteostat/__init__.py +32 -19
- meteostat/api/daily.py +76 -0
- meteostat/api/hourly.py +80 -0
- meteostat/api/interpolate.py +240 -0
- meteostat/api/inventory.py +59 -0
- meteostat/api/merge.py +103 -0
- meteostat/api/monthly.py +73 -0
- meteostat/api/normals.py +144 -0
- meteostat/api/point.py +30 -0
- meteostat/api/stations.py +234 -0
- meteostat/api/timeseries.py +334 -0
- meteostat/core/cache.py +212 -59
- meteostat/core/config.py +158 -0
- meteostat/core/data.py +199 -0
- meteostat/core/logger.py +9 -0
- meteostat/core/network.py +82 -0
- meteostat/core/parameters.py +112 -0
- meteostat/core/providers.py +184 -0
- meteostat/core/schema.py +170 -0
- meteostat/core/validator.py +38 -0
- meteostat/enumerations.py +149 -0
- meteostat/interpolation/idw.py +120 -0
- meteostat/interpolation/lapserate.py +91 -0
- meteostat/interpolation/nearest.py +31 -0
- meteostat/parameters.py +354 -0
- meteostat/providers/dwd/climat.py +166 -0
- meteostat/providers/dwd/daily.py +144 -0
- meteostat/providers/dwd/hourly.py +218 -0
- meteostat/providers/dwd/monthly.py +138 -0
- meteostat/providers/dwd/mosmix.py +351 -0
- meteostat/providers/dwd/poi.py +117 -0
- meteostat/providers/dwd/shared.py +155 -0
- meteostat/providers/eccc/daily.py +87 -0
- meteostat/providers/eccc/hourly.py +104 -0
- meteostat/providers/eccc/monthly.py +66 -0
- meteostat/providers/eccc/shared.py +45 -0
- meteostat/providers/index.py +496 -0
- meteostat/providers/meteostat/daily.py +65 -0
- meteostat/providers/meteostat/daily_derived.py +110 -0
- meteostat/providers/meteostat/hourly.py +66 -0
- meteostat/providers/meteostat/monthly.py +45 -0
- meteostat/providers/meteostat/monthly_derived.py +106 -0
- meteostat/providers/meteostat/shared.py +93 -0
- meteostat/providers/metno/forecast.py +186 -0
- meteostat/providers/noaa/ghcnd.py +228 -0
- meteostat/providers/noaa/isd_lite.py +142 -0
- meteostat/providers/noaa/metar.py +163 -0
- meteostat/typing.py +113 -0
- meteostat/utils/conversions.py +231 -0
- meteostat/utils/data.py +194 -0
- meteostat/utils/geo.py +28 -0
- meteostat/utils/parsers.py +168 -0
- meteostat/utils/types.py +113 -0
- meteostat/utils/validators.py +31 -0
- meteostat-2.0.0.dist-info/METADATA +134 -0
- meteostat-2.0.0.dist-info/RECORD +63 -0
- {meteostat-1.7.6.dist-info → meteostat-2.0.0.dist-info}/WHEEL +1 -2
- meteostat/core/loader.py +0 -103
- meteostat/core/warn.py +0 -34
- meteostat/enumerations/granularity.py +0 -22
- meteostat/interface/base.py +0 -39
- meteostat/interface/daily.py +0 -118
- meteostat/interface/hourly.py +0 -154
- meteostat/interface/meteodata.py +0 -210
- meteostat/interface/monthly.py +0 -109
- meteostat/interface/normals.py +0 -245
- meteostat/interface/point.py +0 -143
- meteostat/interface/stations.py +0 -252
- meteostat/interface/timeseries.py +0 -237
- meteostat/series/aggregate.py +0 -48
- meteostat/series/convert.py +0 -28
- meteostat/series/count.py +0 -17
- meteostat/series/coverage.py +0 -20
- meteostat/series/fetch.py +0 -28
- meteostat/series/interpolate.py +0 -47
- meteostat/series/normalize.py +0 -76
- meteostat/series/stations.py +0 -22
- meteostat/units.py +0 -149
- meteostat/utilities/__init__.py +0 -0
- meteostat/utilities/aggregations.py +0 -37
- meteostat/utilities/endpoint.py +0 -33
- meteostat/utilities/helpers.py +0 -70
- meteostat/utilities/mutations.py +0 -89
- meteostat/utilities/validations.py +0 -30
- meteostat-1.7.6.dist-info/METADATA +0 -112
- meteostat-1.7.6.dist-info/RECORD +0 -39
- meteostat-1.7.6.dist-info/top_level.txt +0 -1
- /meteostat/{core → api}/__init__.py +0 -0
- /meteostat/{enumerations → interpolation}/__init__.py +0 -0
- /meteostat/{interface → providers}/__init__.py +0 -0
- /meteostat/{interface/interpolate.py → py.typed} +0 -0
- /meteostat/{series → utils}/__init__.py +0 -0
- {meteostat-1.7.6.dist-info → meteostat-2.0.0.dist-info/licenses}/LICENSE +0 -0
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Provider Service
|
|
3
|
+
|
|
4
|
+
The Provider Service provides methods to interact with data providers.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
from importlib import import_module
|
|
9
|
+
from statistics import fmean
|
|
10
|
+
from typing import List, Optional, TypeGuard, cast
|
|
11
|
+
|
|
12
|
+
import pandas as pd
|
|
13
|
+
|
|
14
|
+
from meteostat.core.logger import logger
|
|
15
|
+
from meteostat.enumerations import Granularity, Priority, Provider
|
|
16
|
+
from meteostat.providers.index import DEFAULT_PROVIDERS
|
|
17
|
+
from meteostat.typing import (
|
|
18
|
+
ProviderRequest,
|
|
19
|
+
ProviderSpec,
|
|
20
|
+
Station,
|
|
21
|
+
Request,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class ProviderService:
|
|
26
|
+
"""
|
|
27
|
+
Provider Service
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
_providers: List[ProviderSpec]
|
|
31
|
+
|
|
32
|
+
def __init__(self, providers: List[ProviderSpec]) -> None:
|
|
33
|
+
self._providers = providers
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
def providers(self) -> List[ProviderSpec]:
|
|
37
|
+
"""
|
|
38
|
+
Get supported providers
|
|
39
|
+
"""
|
|
40
|
+
return self._providers
|
|
41
|
+
|
|
42
|
+
def register(self, provider: ProviderSpec) -> None:
|
|
43
|
+
"""
|
|
44
|
+
Register a provider
|
|
45
|
+
"""
|
|
46
|
+
self._providers.append(provider)
|
|
47
|
+
|
|
48
|
+
def get_provider(self, provider_id: Provider | str) -> Optional[ProviderSpec]:
|
|
49
|
+
"""
|
|
50
|
+
Get provider by ID
|
|
51
|
+
"""
|
|
52
|
+
return next(
|
|
53
|
+
(provider for provider in self._providers if provider.id == provider_id),
|
|
54
|
+
None,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
def _get_provider_priority(self, provider_id: Provider | str) -> int:
|
|
58
|
+
"""
|
|
59
|
+
Get priority of a provider by its ID
|
|
60
|
+
"""
|
|
61
|
+
baselines = {
|
|
62
|
+
Granularity.HOURLY: 0,
|
|
63
|
+
Granularity.DAILY: 100,
|
|
64
|
+
Granularity.MONTHLY: 200,
|
|
65
|
+
Granularity.NORMALS: 300,
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
provider = self.get_provider(provider_id)
|
|
69
|
+
|
|
70
|
+
if not provider:
|
|
71
|
+
return Priority.NONE
|
|
72
|
+
|
|
73
|
+
baseline = baselines[provider.granularity]
|
|
74
|
+
|
|
75
|
+
return int(provider.priority + baseline)
|
|
76
|
+
|
|
77
|
+
def get_source_priority(self, source: str) -> float:
|
|
78
|
+
"""
|
|
79
|
+
Get priority of a source string
|
|
80
|
+
"""
|
|
81
|
+
provider_ids = source.split(" ")
|
|
82
|
+
|
|
83
|
+
if len(provider_ids) == 1:
|
|
84
|
+
return self._get_provider_priority(provider_ids[0])
|
|
85
|
+
|
|
86
|
+
priorities = [
|
|
87
|
+
self._get_provider_priority(provider) for provider in provider_ids
|
|
88
|
+
]
|
|
89
|
+
|
|
90
|
+
return fmean(priorities)
|
|
91
|
+
|
|
92
|
+
def filter_providers(self, query: Request, station: Station) -> List[Provider]:
|
|
93
|
+
"""
|
|
94
|
+
Get a filtered list of providers
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
def _filter(provider_id: Provider) -> TypeGuard[Provider]:
|
|
98
|
+
provider = self.get_provider(provider_id)
|
|
99
|
+
|
|
100
|
+
if provider is None:
|
|
101
|
+
return False
|
|
102
|
+
|
|
103
|
+
# Filter out providers with diverging granularities
|
|
104
|
+
if provider.granularity is not query.granularity:
|
|
105
|
+
logger.error(
|
|
106
|
+
"Provider '%s' does not support granularity '%s'",
|
|
107
|
+
provider_id,
|
|
108
|
+
query.granularity,
|
|
109
|
+
)
|
|
110
|
+
return False
|
|
111
|
+
|
|
112
|
+
# Filter out providers with no overlap in parameters
|
|
113
|
+
if set(provider.parameters).isdisjoint(query.parameters):
|
|
114
|
+
logger.info(
|
|
115
|
+
"Provider '%s' does not support any requested parameter",
|
|
116
|
+
provider_id,
|
|
117
|
+
)
|
|
118
|
+
return False
|
|
119
|
+
|
|
120
|
+
# Filter out providers which do not serve the station's country
|
|
121
|
+
if provider.countries and station.country not in provider.countries:
|
|
122
|
+
logger.info(
|
|
123
|
+
"Skipping provider '%s' as it does not serve the station's country ('%s')",
|
|
124
|
+
provider_id,
|
|
125
|
+
station.country,
|
|
126
|
+
)
|
|
127
|
+
return False
|
|
128
|
+
|
|
129
|
+
# Filter out providers which stopped providing data before the request's start date
|
|
130
|
+
if query.end and query.end < datetime.combine(
|
|
131
|
+
provider.start, datetime.min.time()
|
|
132
|
+
):
|
|
133
|
+
logger.info(
|
|
134
|
+
"Skipping provider '%s' as it stopped providing data before request start",
|
|
135
|
+
provider_id,
|
|
136
|
+
)
|
|
137
|
+
return False
|
|
138
|
+
|
|
139
|
+
# Filter out providers which only started providing data after the request's end date
|
|
140
|
+
if (
|
|
141
|
+
provider.end is not None
|
|
142
|
+
and query.start is not None
|
|
143
|
+
and query.start > datetime.combine(provider.end, datetime.max.time())
|
|
144
|
+
):
|
|
145
|
+
logger.info(
|
|
146
|
+
"Skipping provider '%s' as it only started providing data after request end",
|
|
147
|
+
provider_id,
|
|
148
|
+
)
|
|
149
|
+
return False
|
|
150
|
+
|
|
151
|
+
return True
|
|
152
|
+
|
|
153
|
+
return list(filter(_filter, query.providers))
|
|
154
|
+
|
|
155
|
+
def fetch_data(
|
|
156
|
+
self, provider_id: Provider, req: Request, station: Station
|
|
157
|
+
) -> Optional[pd.DataFrame]:
|
|
158
|
+
"""
|
|
159
|
+
Fetch data from a given provider
|
|
160
|
+
"""
|
|
161
|
+
provider = self.get_provider(provider_id)
|
|
162
|
+
|
|
163
|
+
if not provider:
|
|
164
|
+
return None
|
|
165
|
+
|
|
166
|
+
query = ProviderRequest(
|
|
167
|
+
station=station,
|
|
168
|
+
start=req.start
|
|
169
|
+
or (
|
|
170
|
+
datetime.combine(provider.start, datetime.min.time())
|
|
171
|
+
if provider.start
|
|
172
|
+
else None
|
|
173
|
+
),
|
|
174
|
+
end=req.end or (provider.end or datetime.now()),
|
|
175
|
+
parameters=req.parameters,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
module = import_module(cast(str, provider.module))
|
|
179
|
+
df = module.fetch(query)
|
|
180
|
+
|
|
181
|
+
return df
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
provider_service = ProviderService(providers=DEFAULT_PROVIDERS)
|
meteostat/core/schema.py
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Schema Service
|
|
3
|
+
|
|
4
|
+
The Schema Service provides methods to clean and format
|
|
5
|
+
DataFrames based on a set of parameters.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from copy import copy
|
|
9
|
+
from inspect import isfunction
|
|
10
|
+
from typing import Callable, List
|
|
11
|
+
|
|
12
|
+
import pandas as pd
|
|
13
|
+
|
|
14
|
+
from meteostat.core.logger import logger
|
|
15
|
+
from meteostat.core.parameters import parameter_service
|
|
16
|
+
from meteostat.core.validator import Validator
|
|
17
|
+
from meteostat.enumerations import Granularity, Parameter, UnitSystem
|
|
18
|
+
from meteostat.utils.conversions import CONVERSION_MAPPINGS, to_condition, to_direction
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class SchemaService:
|
|
22
|
+
"""
|
|
23
|
+
Schema service
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
@staticmethod
|
|
27
|
+
def _apply_validator(
|
|
28
|
+
validator: Validator | Callable, df: pd.DataFrame, col: str
|
|
29
|
+
) -> pd.Series:
|
|
30
|
+
"""
|
|
31
|
+
Apply a validator
|
|
32
|
+
"""
|
|
33
|
+
if isfunction(validator):
|
|
34
|
+
v: Validator = validator() # type: ignore
|
|
35
|
+
else:
|
|
36
|
+
v = validator
|
|
37
|
+
|
|
38
|
+
if not isinstance(v, Validator):
|
|
39
|
+
return pd.Series(data=True, index=df.index, dtype=bool)
|
|
40
|
+
|
|
41
|
+
if v.ignore_na:
|
|
42
|
+
result = pd.Series(data=True, index=df.index, dtype=bool)
|
|
43
|
+
test_result = v.test(
|
|
44
|
+
df.loc[df[col].notnull()][col],
|
|
45
|
+
df.loc[df[col].notnull()],
|
|
46
|
+
col,
|
|
47
|
+
)
|
|
48
|
+
if isinstance(test_result, pd.Series):
|
|
49
|
+
result.update(test_result)
|
|
50
|
+
return result.astype(bool)
|
|
51
|
+
|
|
52
|
+
test_result = v.test(df[col], df, col)
|
|
53
|
+
if isinstance(test_result, bool):
|
|
54
|
+
return pd.Series(data=test_result, index=df.index, dtype=bool)
|
|
55
|
+
return test_result
|
|
56
|
+
|
|
57
|
+
@staticmethod
|
|
58
|
+
def purge(df: pd.DataFrame, parameters: List[Parameter]) -> pd.DataFrame:
|
|
59
|
+
"""
|
|
60
|
+
Remove DataFrame columns which are not a known parameter
|
|
61
|
+
"""
|
|
62
|
+
columns = [parameter for parameter in parameters if parameter in df.columns]
|
|
63
|
+
return df[columns]
|
|
64
|
+
|
|
65
|
+
@staticmethod
|
|
66
|
+
def fill(df: pd.DataFrame, parameters: List[Parameter]) -> pd.DataFrame:
|
|
67
|
+
"""
|
|
68
|
+
Add missing schema columns to DataFrame
|
|
69
|
+
"""
|
|
70
|
+
for parameter_id in parameters:
|
|
71
|
+
if parameter_id not in df:
|
|
72
|
+
df[parameter_id] = None
|
|
73
|
+
|
|
74
|
+
return df
|
|
75
|
+
|
|
76
|
+
@staticmethod
|
|
77
|
+
def format(df: pd.DataFrame, granularity: Granularity) -> pd.DataFrame:
|
|
78
|
+
"""
|
|
79
|
+
Set data types and round values
|
|
80
|
+
"""
|
|
81
|
+
temp = copy(df)
|
|
82
|
+
|
|
83
|
+
for col in df.columns:
|
|
84
|
+
parameter = parameter_service.get_parameter(col, granularity)
|
|
85
|
+
|
|
86
|
+
if not parameter:
|
|
87
|
+
logger.warning(
|
|
88
|
+
"Column %s is not a valid column name and won't be formatted", col
|
|
89
|
+
)
|
|
90
|
+
continue
|
|
91
|
+
|
|
92
|
+
if "int" in str(parameter.dtype).lower():
|
|
93
|
+
temp[col] = pd.to_numeric(temp[col]).round(0)
|
|
94
|
+
|
|
95
|
+
temp[col] = temp[col].astype(parameter.dtype, errors="ignore")
|
|
96
|
+
|
|
97
|
+
if "float" in str(parameter.dtype).lower():
|
|
98
|
+
temp[col] = temp[col].round(1)
|
|
99
|
+
|
|
100
|
+
return temp
|
|
101
|
+
|
|
102
|
+
@classmethod
|
|
103
|
+
def clean(
|
|
104
|
+
cls, df: pd.DataFrame, granularity: Granularity, fill=None
|
|
105
|
+
) -> pd.DataFrame:
|
|
106
|
+
"""
|
|
107
|
+
Remove invalid data from a DataFrame
|
|
108
|
+
"""
|
|
109
|
+
temp = copy(df)
|
|
110
|
+
|
|
111
|
+
for col in temp.columns:
|
|
112
|
+
if "_source" in col:
|
|
113
|
+
continue
|
|
114
|
+
|
|
115
|
+
parameter = parameter_service.get_parameter(col, granularity)
|
|
116
|
+
|
|
117
|
+
if not parameter:
|
|
118
|
+
logger.warning(
|
|
119
|
+
"Column %s is not a valid column name and won't be cleaned", col
|
|
120
|
+
)
|
|
121
|
+
continue
|
|
122
|
+
|
|
123
|
+
for validator in parameter.validators:
|
|
124
|
+
test = cls._apply_validator(validator, temp, col)
|
|
125
|
+
temp.loc[~test, col] = fill
|
|
126
|
+
|
|
127
|
+
return temp
|
|
128
|
+
|
|
129
|
+
def humanize(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
130
|
+
"""
|
|
131
|
+
Convert wind direction and condition codes to human-readable values
|
|
132
|
+
"""
|
|
133
|
+
temp = copy(df)
|
|
134
|
+
|
|
135
|
+
if Parameter.WDIR in temp.columns:
|
|
136
|
+
temp[Parameter.WDIR] = temp[Parameter.WDIR].apply(to_direction)
|
|
137
|
+
|
|
138
|
+
if Parameter.COCO in temp.columns:
|
|
139
|
+
temp[Parameter.COCO] = temp[Parameter.COCO].apply(to_condition)
|
|
140
|
+
|
|
141
|
+
return temp
|
|
142
|
+
|
|
143
|
+
@classmethod
|
|
144
|
+
def convert(cls, df, granularity: Granularity, units: UnitSystem) -> pd.DataFrame:
|
|
145
|
+
"""
|
|
146
|
+
Convert units in a DataFrame
|
|
147
|
+
"""
|
|
148
|
+
temp = copy(df)
|
|
149
|
+
|
|
150
|
+
for col in temp.columns:
|
|
151
|
+
if "_source" in col:
|
|
152
|
+
continue
|
|
153
|
+
|
|
154
|
+
parameter = parameter_service.get_parameter(col, granularity)
|
|
155
|
+
|
|
156
|
+
if not parameter:
|
|
157
|
+
logger.warning(
|
|
158
|
+
"Column %s is not a valid column name and won't be converted", col
|
|
159
|
+
)
|
|
160
|
+
continue
|
|
161
|
+
|
|
162
|
+
if parameter.unit in CONVERSION_MAPPINGS:
|
|
163
|
+
if units in CONVERSION_MAPPINGS[parameter.unit]:
|
|
164
|
+
conversion_func = CONVERSION_MAPPINGS[parameter.unit][units]
|
|
165
|
+
temp[col] = temp[col].apply(conversion_func)
|
|
166
|
+
|
|
167
|
+
return temp
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
schema_service = SchemaService()
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Schema Column Validator
|
|
3
|
+
|
|
4
|
+
This class is used to validate DataFrame columns based
|
|
5
|
+
on a parameter specification.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from inspect import signature
|
|
9
|
+
from typing import Callable
|
|
10
|
+
|
|
11
|
+
from pandas import DataFrame, Series
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Validator:
|
|
15
|
+
"""
|
|
16
|
+
Schema Column Validator
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
func: Callable
|
|
20
|
+
ignore_na = False
|
|
21
|
+
is_relational = False
|
|
22
|
+
|
|
23
|
+
def __init__(self, func: Callable, ignore_na=False, is_relational=False):
|
|
24
|
+
self.func = func
|
|
25
|
+
self.ignore_na = ignore_na
|
|
26
|
+
self.is_relational = is_relational
|
|
27
|
+
|
|
28
|
+
def test(self, series: Series, df: DataFrame, column: str) -> bool | Series:
|
|
29
|
+
"""
|
|
30
|
+
Run validator
|
|
31
|
+
|
|
32
|
+
Returns a bool series:
|
|
33
|
+
True -> Check passed
|
|
34
|
+
False -> Check failed
|
|
35
|
+
"""
|
|
36
|
+
arg_count = len((signature(self.func)).parameters)
|
|
37
|
+
args = [series, df, column]
|
|
38
|
+
return self.func(*args[0:arg_count])
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Meteostat Enumerations
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from enum import StrEnum, IntEnum
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Granularity(StrEnum):
|
|
9
|
+
"""
|
|
10
|
+
The different levels of time series granularity
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
HOURLY = "hourly"
|
|
14
|
+
DAILY = "daily"
|
|
15
|
+
MONTHLY = "monthly"
|
|
16
|
+
NORMALS = "normals"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Frequency(StrEnum):
|
|
20
|
+
"""
|
|
21
|
+
The different levels of time series frequency
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
HOURLY = "h"
|
|
25
|
+
DAILY = "D"
|
|
26
|
+
MONTHLY = "MS"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class Parameter(StrEnum):
|
|
30
|
+
"""
|
|
31
|
+
The different meteorological parameters supported by Meteostat
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
TEMP = "temp" # Air temperature (aggregation: mean)
|
|
35
|
+
TMIN = "tmin" # Daily minimum air temperature (aggregation: mean)
|
|
36
|
+
TMAX = "tmax" # Daily maximum air temperature (aggregation: mean)
|
|
37
|
+
TXMN = "txmn" # Extreme minimum air temperature (aggregation: min)
|
|
38
|
+
TXMX = "txmx" # Extreme maximum air temperature (aggregation: max)
|
|
39
|
+
DWPT = "dwpt" # Dew point (aggregation: mean)
|
|
40
|
+
PRCP = "prcp" # Precipitation (aggregation: sum)
|
|
41
|
+
PDAY = "pday" # Days with precipitation equal to or greater than 1 millimeter (aggregation: sum)
|
|
42
|
+
WDIR = "wdir" # Wind direction at observation time
|
|
43
|
+
WSPD = "wspd" # Wind speed (aggregation: mean)
|
|
44
|
+
WPGT = "wpgt" # Peak wind gust (aggregation: max)
|
|
45
|
+
RHUM = "rhum" # Relative humidity (aggregation: mean)
|
|
46
|
+
PRES = "pres" # Air pressure at MSL (aggregation: mean)
|
|
47
|
+
SNWD = "snwd" # Snow depth on ground
|
|
48
|
+
SNOW = "snow" # Snowfall (aggregation: sum)
|
|
49
|
+
TSUN = "tsun" # Sunshine duration (aggregation: sum)
|
|
50
|
+
SGHI = "sghi" # TBD
|
|
51
|
+
SDNI = "sdni" # TBD
|
|
52
|
+
SDHI = "sdhi" # TBD
|
|
53
|
+
CLDC = "cldc" # Cloud cover (aggregation: mean)
|
|
54
|
+
VSBY = "vsby" # Visibility (aggregation: mean)
|
|
55
|
+
COCO = "coco" # Weather condition code at time of observation
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class Unit(StrEnum):
|
|
59
|
+
"""
|
|
60
|
+
Data Units
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
CELSIUS = "°C"
|
|
64
|
+
FAHRENHEIT = "°F"
|
|
65
|
+
KELVIN = "K"
|
|
66
|
+
PERCENTAGE = "%"
|
|
67
|
+
HPA = "hPa"
|
|
68
|
+
MILLIMETERS = "mm"
|
|
69
|
+
CENTIMETERS = "cm"
|
|
70
|
+
METERS = "m"
|
|
71
|
+
KMH = "km/h"
|
|
72
|
+
DEGREES = "°"
|
|
73
|
+
MINUTES = "min"
|
|
74
|
+
OKTAS = "okta"
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class UnitSystem(StrEnum):
|
|
78
|
+
"""
|
|
79
|
+
Unit Systems
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
SI = "si"
|
|
83
|
+
METRIC = "metric"
|
|
84
|
+
IMPERIAL = "imperial"
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class Provider(StrEnum):
|
|
88
|
+
"""
|
|
89
|
+
Providers supported by Meteostat
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
ISD_LITE = "isd_lite"
|
|
93
|
+
METAR = "metar"
|
|
94
|
+
GHCND = "ghcnd"
|
|
95
|
+
CLIMAT = "climat"
|
|
96
|
+
DWD_HOURLY = "dwd_hourly"
|
|
97
|
+
DWD_POI = "dwd_poi"
|
|
98
|
+
DWD_MOSMIX = "dwd_mosmix"
|
|
99
|
+
DWD_DAILY = "dwd_daily"
|
|
100
|
+
DWD_MONTHLY = "dwd_monthly"
|
|
101
|
+
ECCC_HOURLY = "eccc_hourly"
|
|
102
|
+
ECCC_DAILY = "eccc_daily"
|
|
103
|
+
ECCC_MONTHLY = "eccc_monthly"
|
|
104
|
+
METNO_FORECAST = "metno_forecast"
|
|
105
|
+
|
|
106
|
+
HOURLY = "hourly"
|
|
107
|
+
DAILY = "daily"
|
|
108
|
+
DAILY_DERIVED = "daily_derived"
|
|
109
|
+
MONTHLY = "monthly"
|
|
110
|
+
MONTHLY_DERIVED = "monthly_derived"
|
|
111
|
+
|
|
112
|
+
SYNOP = "synop"
|
|
113
|
+
METAR_LEGACY = "metar_legacy"
|
|
114
|
+
MODEL = "model"
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
class Priority(IntEnum):
|
|
118
|
+
"""
|
|
119
|
+
Provider priorities
|
|
120
|
+
"""
|
|
121
|
+
|
|
122
|
+
HIGHEST = 25
|
|
123
|
+
HIGH = 20
|
|
124
|
+
MEDIUM = 15
|
|
125
|
+
LOW = 10
|
|
126
|
+
LOWEST = 5
|
|
127
|
+
NONE = 0
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class Grade(IntEnum):
|
|
131
|
+
"""
|
|
132
|
+
Provider quality grades
|
|
133
|
+
"""
|
|
134
|
+
|
|
135
|
+
RECORD = 4
|
|
136
|
+
OBSERVATION = 3
|
|
137
|
+
ANALYSIS = 2
|
|
138
|
+
FORECAST = 1
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class TTL(IntEnum):
|
|
142
|
+
"""
|
|
143
|
+
Cache TTLs
|
|
144
|
+
"""
|
|
145
|
+
|
|
146
|
+
HOUR = 60 * 60
|
|
147
|
+
DAY = 60 * 60 * 24
|
|
148
|
+
WEEK = 60 * 60 * 24 * 7
|
|
149
|
+
MONTH = 60 * 60 * 24 * 30
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Inverse Distance Weighting (IDW) Interpolation
|
|
3
|
+
|
|
4
|
+
Implements IDW interpolation for spatial weather data with support for
|
|
5
|
+
elevation-weighted distance calculations.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Callable
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
import pandas as pd
|
|
12
|
+
|
|
13
|
+
from meteostat.api.point import Point
|
|
14
|
+
from meteostat.api.timeseries import TimeSeries
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def inverse_distance_weighting(
|
|
18
|
+
power: float = 2.0,
|
|
19
|
+
) -> Callable:
|
|
20
|
+
"""
|
|
21
|
+
Interpolate values using Inverse Distance Weighting (IDW).
|
|
22
|
+
|
|
23
|
+
This method calculates interpolated values as a weighted average of nearby
|
|
24
|
+
stations, where weights decrease with distance. Optionally incorporates
|
|
25
|
+
elevation differences in the distance calculation.
|
|
26
|
+
|
|
27
|
+
Parameters
|
|
28
|
+
----------
|
|
29
|
+
power : float, optional
|
|
30
|
+
Power parameter for IDW (default: 2.0). Higher values give more
|
|
31
|
+
weight to closer stations.
|
|
32
|
+
elevation_weight : float, optional
|
|
33
|
+
Weight for elevation difference in distance calculation (default: 0.1).
|
|
34
|
+
The effective distance is calculated as:
|
|
35
|
+
sqrt(horizontal_distance^2 + (elevation_diff * elevation_weight)^2)
|
|
36
|
+
|
|
37
|
+
Notes
|
|
38
|
+
-----
|
|
39
|
+
- If elevation data is missing for either the point or stations, only
|
|
40
|
+
horizontal distance is used.
|
|
41
|
+
- Stations with zero effective distance get weight of 1.0, all others get 0.
|
|
42
|
+
- All numeric columns except location-related ones are interpolated.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
def _get_df(
|
|
46
|
+
df: pd.DataFrame,
|
|
47
|
+
ts: TimeSeries,
|
|
48
|
+
point: Point,
|
|
49
|
+
) -> pd.DataFrame:
|
|
50
|
+
# Group by time to interpolate each timestamp separately
|
|
51
|
+
grouped = df.groupby(pd.Grouper(level="time", freq=ts.freq))
|
|
52
|
+
|
|
53
|
+
# List to store interpolated results for each time period
|
|
54
|
+
interpolated_results = []
|
|
55
|
+
|
|
56
|
+
for time_idx, group in grouped:
|
|
57
|
+
if group.empty:
|
|
58
|
+
continue
|
|
59
|
+
|
|
60
|
+
effective_distance = group["effective_distance"]
|
|
61
|
+
|
|
62
|
+
# Calculate weights using IDW formula: w = 1 / d^p
|
|
63
|
+
# Handle zero distance case (station at exact location)
|
|
64
|
+
min_distance = effective_distance.min()
|
|
65
|
+
if min_distance == 0:
|
|
66
|
+
# If any station is at the exact location, use only that station
|
|
67
|
+
weights = (effective_distance == 0).astype(float)
|
|
68
|
+
else:
|
|
69
|
+
# Standard IDW weights
|
|
70
|
+
weights = 1.0 / (effective_distance**power)
|
|
71
|
+
|
|
72
|
+
# Normalize weights so they sum to 1
|
|
73
|
+
weights = weights / weights.sum()
|
|
74
|
+
|
|
75
|
+
# Get numeric columns to interpolate (exclude location-related columns)
|
|
76
|
+
location_cols = ["latitude", "longitude", "elevation", "distance"]
|
|
77
|
+
numeric_cols = [
|
|
78
|
+
col
|
|
79
|
+
for col in group.columns
|
|
80
|
+
if col not in location_cols
|
|
81
|
+
and pd.api.types.is_numeric_dtype(group[col])
|
|
82
|
+
]
|
|
83
|
+
|
|
84
|
+
# Calculate weighted average for each numeric column
|
|
85
|
+
interpolated_row = {}
|
|
86
|
+
for col in numeric_cols:
|
|
87
|
+
# Only use non-NaN values for interpolation
|
|
88
|
+
valid_mask = group[col].notna()
|
|
89
|
+
if valid_mask.any():
|
|
90
|
+
valid_values = group.loc[valid_mask, col]
|
|
91
|
+
valid_weights = weights[valid_mask]
|
|
92
|
+
# Re-normalize weights for valid values only
|
|
93
|
+
valid_weights = valid_weights / valid_weights.sum()
|
|
94
|
+
interpolated_row[col] = (valid_values * valid_weights).sum()
|
|
95
|
+
else:
|
|
96
|
+
# If all values are NaN, result is NaN
|
|
97
|
+
interpolated_row[col] = np.nan
|
|
98
|
+
|
|
99
|
+
# Add location information from the point
|
|
100
|
+
interpolated_row["latitude"] = point.latitude
|
|
101
|
+
interpolated_row["longitude"] = point.longitude
|
|
102
|
+
if point.elevation is not None:
|
|
103
|
+
interpolated_row["elevation"] = point.elevation
|
|
104
|
+
interpolated_row["distance"] = 0 # Distance from point to itself
|
|
105
|
+
|
|
106
|
+
# Create a DataFrame row with the time index
|
|
107
|
+
result_df = pd.DataFrame(
|
|
108
|
+
[interpolated_row], index=pd.DatetimeIndex([time_idx])
|
|
109
|
+
)
|
|
110
|
+
result_df.index.name = "time"
|
|
111
|
+
interpolated_results.append(result_df)
|
|
112
|
+
|
|
113
|
+
# Combine all time periods
|
|
114
|
+
if interpolated_results:
|
|
115
|
+
result = pd.concat(interpolated_results)
|
|
116
|
+
return result
|
|
117
|
+
else:
|
|
118
|
+
return pd.DataFrame()
|
|
119
|
+
|
|
120
|
+
return _get_df
|