meteostat 1.7.6__py3-none-any.whl → 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meteostat/__init__.py +38 -19
- meteostat/api/config.py +158 -0
- meteostat/api/daily.py +76 -0
- meteostat/api/hourly.py +80 -0
- meteostat/api/interpolate.py +378 -0
- meteostat/api/inventory.py +59 -0
- meteostat/api/merge.py +103 -0
- meteostat/api/monthly.py +73 -0
- meteostat/api/normals.py +144 -0
- meteostat/api/point.py +30 -0
- meteostat/api/stations.py +234 -0
- meteostat/api/timeseries.py +334 -0
- meteostat/core/cache.py +212 -59
- meteostat/core/data.py +203 -0
- meteostat/core/logger.py +9 -0
- meteostat/core/network.py +82 -0
- meteostat/core/parameters.py +112 -0
- meteostat/core/providers.py +184 -0
- meteostat/core/schema.py +170 -0
- meteostat/core/validator.py +38 -0
- meteostat/enumerations.py +149 -0
- meteostat/interpolation/idw.py +120 -0
- meteostat/interpolation/lapserate.py +91 -0
- meteostat/interpolation/nearest.py +31 -0
- meteostat/parameters.py +354 -0
- meteostat/providers/dwd/climat.py +166 -0
- meteostat/providers/dwd/daily.py +144 -0
- meteostat/providers/dwd/hourly.py +218 -0
- meteostat/providers/dwd/monthly.py +138 -0
- meteostat/providers/dwd/mosmix.py +351 -0
- meteostat/providers/dwd/poi.py +117 -0
- meteostat/providers/dwd/shared.py +155 -0
- meteostat/providers/eccc/daily.py +87 -0
- meteostat/providers/eccc/hourly.py +104 -0
- meteostat/providers/eccc/monthly.py +66 -0
- meteostat/providers/eccc/shared.py +45 -0
- meteostat/providers/index.py +496 -0
- meteostat/providers/meteostat/daily.py +65 -0
- meteostat/providers/meteostat/daily_derived.py +110 -0
- meteostat/providers/meteostat/hourly.py +66 -0
- meteostat/providers/meteostat/monthly.py +45 -0
- meteostat/providers/meteostat/monthly_derived.py +106 -0
- meteostat/providers/meteostat/shared.py +93 -0
- meteostat/providers/metno/forecast.py +186 -0
- meteostat/providers/noaa/ghcnd.py +228 -0
- meteostat/providers/noaa/isd_lite.py +142 -0
- meteostat/providers/noaa/metar.py +163 -0
- meteostat/typing.py +113 -0
- meteostat/utils/conversions.py +231 -0
- meteostat/utils/data.py +194 -0
- meteostat/utils/geo.py +28 -0
- meteostat/utils/guards.py +51 -0
- meteostat/utils/parsers.py +161 -0
- meteostat/utils/types.py +113 -0
- meteostat/utils/validators.py +31 -0
- meteostat-2.0.1.dist-info/METADATA +130 -0
- meteostat-2.0.1.dist-info/RECORD +64 -0
- {meteostat-1.7.6.dist-info → meteostat-2.0.1.dist-info}/WHEEL +1 -2
- meteostat/core/loader.py +0 -103
- meteostat/core/warn.py +0 -34
- meteostat/enumerations/granularity.py +0 -22
- meteostat/interface/base.py +0 -39
- meteostat/interface/daily.py +0 -118
- meteostat/interface/hourly.py +0 -154
- meteostat/interface/meteodata.py +0 -210
- meteostat/interface/monthly.py +0 -109
- meteostat/interface/normals.py +0 -245
- meteostat/interface/point.py +0 -143
- meteostat/interface/stations.py +0 -252
- meteostat/interface/timeseries.py +0 -237
- meteostat/series/aggregate.py +0 -48
- meteostat/series/convert.py +0 -28
- meteostat/series/count.py +0 -17
- meteostat/series/coverage.py +0 -20
- meteostat/series/fetch.py +0 -28
- meteostat/series/interpolate.py +0 -47
- meteostat/series/normalize.py +0 -76
- meteostat/series/stations.py +0 -22
- meteostat/units.py +0 -149
- meteostat/utilities/__init__.py +0 -0
- meteostat/utilities/aggregations.py +0 -37
- meteostat/utilities/endpoint.py +0 -33
- meteostat/utilities/helpers.py +0 -70
- meteostat/utilities/mutations.py +0 -89
- meteostat/utilities/validations.py +0 -30
- meteostat-1.7.6.dist-info/METADATA +0 -112
- meteostat-1.7.6.dist-info/RECORD +0 -39
- meteostat-1.7.6.dist-info/top_level.txt +0 -1
- /meteostat/{core → api}/__init__.py +0 -0
- /meteostat/{enumerations → interpolation}/__init__.py +0 -0
- /meteostat/{interface → providers}/__init__.py +0 -0
- /meteostat/{interface/interpolate.py → py.typed} +0 -0
- /meteostat/{series → utils}/__init__.py +0 -0
- {meteostat-1.7.6.dist-info → meteostat-2.0.1.dist-info/licenses}/LICENSE +0 -0
meteostat/core/schema.py
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Schema Service
|
|
3
|
+
|
|
4
|
+
The Schema Service provides methods to clean and format
|
|
5
|
+
DataFrames based on a set of parameters.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from copy import copy
|
|
9
|
+
from inspect import isfunction
|
|
10
|
+
from typing import Callable, List
|
|
11
|
+
|
|
12
|
+
import pandas as pd
|
|
13
|
+
|
|
14
|
+
from meteostat.core.logger import logger
|
|
15
|
+
from meteostat.core.parameters import parameter_service
|
|
16
|
+
from meteostat.core.validator import Validator
|
|
17
|
+
from meteostat.enumerations import Granularity, Parameter, UnitSystem
|
|
18
|
+
from meteostat.utils.conversions import CONVERSION_MAPPINGS, to_condition, to_direction
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class SchemaService:
|
|
22
|
+
"""
|
|
23
|
+
Schema service
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
@staticmethod
|
|
27
|
+
def _apply_validator(
|
|
28
|
+
validator: Validator | Callable, df: pd.DataFrame, col: str
|
|
29
|
+
) -> pd.Series:
|
|
30
|
+
"""
|
|
31
|
+
Apply a validator
|
|
32
|
+
"""
|
|
33
|
+
if isfunction(validator):
|
|
34
|
+
v: Validator = validator() # type: ignore
|
|
35
|
+
else:
|
|
36
|
+
v = validator
|
|
37
|
+
|
|
38
|
+
if not isinstance(v, Validator):
|
|
39
|
+
return pd.Series(data=True, index=df.index, dtype=bool)
|
|
40
|
+
|
|
41
|
+
if v.ignore_na:
|
|
42
|
+
result = pd.Series(data=True, index=df.index, dtype=bool)
|
|
43
|
+
test_result = v.test(
|
|
44
|
+
df.loc[df[col].notnull()][col],
|
|
45
|
+
df.loc[df[col].notnull()],
|
|
46
|
+
col,
|
|
47
|
+
)
|
|
48
|
+
if isinstance(test_result, pd.Series):
|
|
49
|
+
result.update(test_result)
|
|
50
|
+
return result.astype(bool)
|
|
51
|
+
|
|
52
|
+
test_result = v.test(df[col], df, col)
|
|
53
|
+
if isinstance(test_result, bool):
|
|
54
|
+
return pd.Series(data=test_result, index=df.index, dtype=bool)
|
|
55
|
+
return test_result
|
|
56
|
+
|
|
57
|
+
@staticmethod
|
|
58
|
+
def purge(df: pd.DataFrame, parameters: List[Parameter]) -> pd.DataFrame:
|
|
59
|
+
"""
|
|
60
|
+
Remove DataFrame columns which are not a known parameter
|
|
61
|
+
"""
|
|
62
|
+
columns = [parameter for parameter in parameters if parameter in df.columns]
|
|
63
|
+
return df[columns]
|
|
64
|
+
|
|
65
|
+
@staticmethod
|
|
66
|
+
def fill(df: pd.DataFrame, parameters: List[Parameter]) -> pd.DataFrame:
|
|
67
|
+
"""
|
|
68
|
+
Add missing schema columns to DataFrame
|
|
69
|
+
"""
|
|
70
|
+
for parameter_id in parameters:
|
|
71
|
+
if parameter_id not in df:
|
|
72
|
+
df[parameter_id] = None
|
|
73
|
+
|
|
74
|
+
return df
|
|
75
|
+
|
|
76
|
+
@staticmethod
|
|
77
|
+
def format(df: pd.DataFrame, granularity: Granularity) -> pd.DataFrame:
|
|
78
|
+
"""
|
|
79
|
+
Set data types and round values
|
|
80
|
+
"""
|
|
81
|
+
temp = copy(df)
|
|
82
|
+
|
|
83
|
+
for col in df.columns:
|
|
84
|
+
parameter = parameter_service.get_parameter(col, granularity)
|
|
85
|
+
|
|
86
|
+
if not parameter:
|
|
87
|
+
logger.warning(
|
|
88
|
+
"Column %s is not a valid column name and won't be formatted", col
|
|
89
|
+
)
|
|
90
|
+
continue
|
|
91
|
+
|
|
92
|
+
if "int" in str(parameter.dtype).lower():
|
|
93
|
+
temp[col] = pd.to_numeric(temp[col]).round(0)
|
|
94
|
+
|
|
95
|
+
temp[col] = temp[col].astype(parameter.dtype, errors="ignore")
|
|
96
|
+
|
|
97
|
+
if "float" in str(parameter.dtype).lower():
|
|
98
|
+
temp[col] = temp[col].round(1)
|
|
99
|
+
|
|
100
|
+
return temp
|
|
101
|
+
|
|
102
|
+
@classmethod
|
|
103
|
+
def clean(
|
|
104
|
+
cls, df: pd.DataFrame, granularity: Granularity, fill=None
|
|
105
|
+
) -> pd.DataFrame:
|
|
106
|
+
"""
|
|
107
|
+
Remove invalid data from a DataFrame
|
|
108
|
+
"""
|
|
109
|
+
temp = copy(df)
|
|
110
|
+
|
|
111
|
+
for col in temp.columns:
|
|
112
|
+
if "_source" in col:
|
|
113
|
+
continue
|
|
114
|
+
|
|
115
|
+
parameter = parameter_service.get_parameter(col, granularity)
|
|
116
|
+
|
|
117
|
+
if not parameter:
|
|
118
|
+
logger.warning(
|
|
119
|
+
"Column %s is not a valid column name and won't be cleaned", col
|
|
120
|
+
)
|
|
121
|
+
continue
|
|
122
|
+
|
|
123
|
+
for validator in parameter.validators:
|
|
124
|
+
test = cls._apply_validator(validator, temp, col)
|
|
125
|
+
temp.loc[~test, col] = fill
|
|
126
|
+
|
|
127
|
+
return temp
|
|
128
|
+
|
|
129
|
+
def humanize(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
130
|
+
"""
|
|
131
|
+
Convert wind direction and condition codes to human-readable values
|
|
132
|
+
"""
|
|
133
|
+
temp = copy(df)
|
|
134
|
+
|
|
135
|
+
if Parameter.WDIR in temp.columns:
|
|
136
|
+
temp[Parameter.WDIR] = temp[Parameter.WDIR].apply(to_direction)
|
|
137
|
+
|
|
138
|
+
if Parameter.COCO in temp.columns:
|
|
139
|
+
temp[Parameter.COCO] = temp[Parameter.COCO].apply(to_condition)
|
|
140
|
+
|
|
141
|
+
return temp
|
|
142
|
+
|
|
143
|
+
@classmethod
|
|
144
|
+
def convert(cls, df, granularity: Granularity, units: UnitSystem) -> pd.DataFrame:
|
|
145
|
+
"""
|
|
146
|
+
Convert units in a DataFrame
|
|
147
|
+
"""
|
|
148
|
+
temp = copy(df)
|
|
149
|
+
|
|
150
|
+
for col in temp.columns:
|
|
151
|
+
if "_source" in col:
|
|
152
|
+
continue
|
|
153
|
+
|
|
154
|
+
parameter = parameter_service.get_parameter(col, granularity)
|
|
155
|
+
|
|
156
|
+
if not parameter:
|
|
157
|
+
logger.warning(
|
|
158
|
+
"Column %s is not a valid column name and won't be converted", col
|
|
159
|
+
)
|
|
160
|
+
continue
|
|
161
|
+
|
|
162
|
+
if parameter.unit in CONVERSION_MAPPINGS:
|
|
163
|
+
if units in CONVERSION_MAPPINGS[parameter.unit]:
|
|
164
|
+
conversion_func = CONVERSION_MAPPINGS[parameter.unit][units]
|
|
165
|
+
temp[col] = temp[col].apply(conversion_func)
|
|
166
|
+
|
|
167
|
+
return temp
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
schema_service = SchemaService()
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Schema Column Validator
|
|
3
|
+
|
|
4
|
+
This class is used to validate DataFrame columns based
|
|
5
|
+
on a parameter specification.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from inspect import signature
|
|
9
|
+
from typing import Callable
|
|
10
|
+
|
|
11
|
+
from pandas import DataFrame, Series
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Validator:
|
|
15
|
+
"""
|
|
16
|
+
Schema Column Validator
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
func: Callable
|
|
20
|
+
ignore_na = False
|
|
21
|
+
is_relational = False
|
|
22
|
+
|
|
23
|
+
def __init__(self, func: Callable, ignore_na=False, is_relational=False):
|
|
24
|
+
self.func = func
|
|
25
|
+
self.ignore_na = ignore_na
|
|
26
|
+
self.is_relational = is_relational
|
|
27
|
+
|
|
28
|
+
def test(self, series: Series, df: DataFrame, column: str) -> bool | Series:
|
|
29
|
+
"""
|
|
30
|
+
Run validator
|
|
31
|
+
|
|
32
|
+
Returns a bool series:
|
|
33
|
+
True -> Check passed
|
|
34
|
+
False -> Check failed
|
|
35
|
+
"""
|
|
36
|
+
arg_count = len((signature(self.func)).parameters)
|
|
37
|
+
args = [series, df, column]
|
|
38
|
+
return self.func(*args[0:arg_count])
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Meteostat Enumerations
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from enum import StrEnum, IntEnum
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Granularity(StrEnum):
|
|
9
|
+
"""
|
|
10
|
+
The different levels of time series granularity
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
HOURLY = "hourly"
|
|
14
|
+
DAILY = "daily"
|
|
15
|
+
MONTHLY = "monthly"
|
|
16
|
+
NORMALS = "normals"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Frequency(StrEnum):
|
|
20
|
+
"""
|
|
21
|
+
The different levels of time series frequency
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
HOURLY = "h"
|
|
25
|
+
DAILY = "D"
|
|
26
|
+
MONTHLY = "MS"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class Parameter(StrEnum):
|
|
30
|
+
"""
|
|
31
|
+
The different meteorological parameters supported by Meteostat
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
TEMP = "temp" # Air temperature (aggregation: mean)
|
|
35
|
+
TMIN = "tmin" # Daily minimum air temperature (aggregation: mean)
|
|
36
|
+
TMAX = "tmax" # Daily maximum air temperature (aggregation: mean)
|
|
37
|
+
TXMN = "txmn" # Extreme minimum air temperature (aggregation: min)
|
|
38
|
+
TXMX = "txmx" # Extreme maximum air temperature (aggregation: max)
|
|
39
|
+
DWPT = "dwpt" # Dew point (aggregation: mean)
|
|
40
|
+
PRCP = "prcp" # Precipitation (aggregation: sum)
|
|
41
|
+
PDAY = "pday" # Days with precipitation equal to or greater than 1 millimeter (aggregation: sum)
|
|
42
|
+
WDIR = "wdir" # Wind direction at observation time
|
|
43
|
+
WSPD = "wspd" # Wind speed (aggregation: mean)
|
|
44
|
+
WPGT = "wpgt" # Peak wind gust (aggregation: max)
|
|
45
|
+
RHUM = "rhum" # Relative humidity (aggregation: mean)
|
|
46
|
+
PRES = "pres" # Air pressure at MSL (aggregation: mean)
|
|
47
|
+
SNWD = "snwd" # Snow depth on ground
|
|
48
|
+
SNOW = "snow" # Snowfall (aggregation: sum)
|
|
49
|
+
TSUN = "tsun" # Sunshine duration (aggregation: sum)
|
|
50
|
+
SGHI = "sghi" # TBD
|
|
51
|
+
SDNI = "sdni" # TBD
|
|
52
|
+
SDHI = "sdhi" # TBD
|
|
53
|
+
CLDC = "cldc" # Cloud cover (aggregation: mean)
|
|
54
|
+
VSBY = "vsby" # Visibility (aggregation: mean)
|
|
55
|
+
COCO = "coco" # Weather condition code at time of observation
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class Unit(StrEnum):
|
|
59
|
+
"""
|
|
60
|
+
Data Units
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
CELSIUS = "°C"
|
|
64
|
+
FAHRENHEIT = "°F"
|
|
65
|
+
KELVIN = "K"
|
|
66
|
+
PERCENTAGE = "%"
|
|
67
|
+
HPA = "hPa"
|
|
68
|
+
MILLIMETERS = "mm"
|
|
69
|
+
CENTIMETERS = "cm"
|
|
70
|
+
METERS = "m"
|
|
71
|
+
KMH = "km/h"
|
|
72
|
+
DEGREES = "°"
|
|
73
|
+
MINUTES = "min"
|
|
74
|
+
OKTAS = "okta"
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class UnitSystem(StrEnum):
|
|
78
|
+
"""
|
|
79
|
+
Unit Systems
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
SI = "si"
|
|
83
|
+
METRIC = "metric"
|
|
84
|
+
IMPERIAL = "imperial"
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class Provider(StrEnum):
|
|
88
|
+
"""
|
|
89
|
+
Providers supported by Meteostat
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
ISD_LITE = "isd_lite"
|
|
93
|
+
METAR = "metar"
|
|
94
|
+
GHCND = "ghcnd"
|
|
95
|
+
CLIMAT = "climat"
|
|
96
|
+
DWD_HOURLY = "dwd_hourly"
|
|
97
|
+
DWD_POI = "dwd_poi"
|
|
98
|
+
DWD_MOSMIX = "dwd_mosmix"
|
|
99
|
+
DWD_DAILY = "dwd_daily"
|
|
100
|
+
DWD_MONTHLY = "dwd_monthly"
|
|
101
|
+
ECCC_HOURLY = "eccc_hourly"
|
|
102
|
+
ECCC_DAILY = "eccc_daily"
|
|
103
|
+
ECCC_MONTHLY = "eccc_monthly"
|
|
104
|
+
METNO_FORECAST = "metno_forecast"
|
|
105
|
+
|
|
106
|
+
HOURLY = "hourly"
|
|
107
|
+
DAILY = "daily"
|
|
108
|
+
DAILY_DERIVED = "daily_derived"
|
|
109
|
+
MONTHLY = "monthly"
|
|
110
|
+
MONTHLY_DERIVED = "monthly_derived"
|
|
111
|
+
|
|
112
|
+
SYNOP = "synop"
|
|
113
|
+
METAR_LEGACY = "metar_legacy"
|
|
114
|
+
MODEL = "model"
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
class Priority(IntEnum):
|
|
118
|
+
"""
|
|
119
|
+
Provider priorities
|
|
120
|
+
"""
|
|
121
|
+
|
|
122
|
+
HIGHEST = 25
|
|
123
|
+
HIGH = 20
|
|
124
|
+
MEDIUM = 15
|
|
125
|
+
LOW = 10
|
|
126
|
+
LOWEST = 5
|
|
127
|
+
NONE = 0
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class Grade(IntEnum):
|
|
131
|
+
"""
|
|
132
|
+
Provider quality grades
|
|
133
|
+
"""
|
|
134
|
+
|
|
135
|
+
RECORD = 4
|
|
136
|
+
OBSERVATION = 3
|
|
137
|
+
ANALYSIS = 2
|
|
138
|
+
FORECAST = 1
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class TTL(IntEnum):
|
|
142
|
+
"""
|
|
143
|
+
Cache TTLs
|
|
144
|
+
"""
|
|
145
|
+
|
|
146
|
+
HOUR = 60 * 60
|
|
147
|
+
DAY = 60 * 60 * 24
|
|
148
|
+
WEEK = 60 * 60 * 24 * 7
|
|
149
|
+
MONTH = 60 * 60 * 24 * 30
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Inverse Distance Weighting (IDW) Interpolation
|
|
3
|
+
|
|
4
|
+
Implements IDW interpolation for spatial weather data with support for
|
|
5
|
+
elevation-weighted distance calculations.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Callable
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
import pandas as pd
|
|
12
|
+
|
|
13
|
+
from meteostat.api.point import Point
|
|
14
|
+
from meteostat.api.timeseries import TimeSeries
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def inverse_distance_weighting(
|
|
18
|
+
power: float = 2.0,
|
|
19
|
+
) -> Callable:
|
|
20
|
+
"""
|
|
21
|
+
Interpolate values using Inverse Distance Weighting (IDW).
|
|
22
|
+
|
|
23
|
+
This method calculates interpolated values as a weighted average of nearby
|
|
24
|
+
stations, where weights decrease with distance. Optionally incorporates
|
|
25
|
+
elevation differences in the distance calculation.
|
|
26
|
+
|
|
27
|
+
Parameters
|
|
28
|
+
----------
|
|
29
|
+
power : float, optional
|
|
30
|
+
Power parameter for IDW (default: 2.0). Higher values give more
|
|
31
|
+
weight to closer stations.
|
|
32
|
+
elevation_weight : float, optional
|
|
33
|
+
Weight for elevation difference in distance calculation (default: 0.1).
|
|
34
|
+
The effective distance is calculated as:
|
|
35
|
+
sqrt(horizontal_distance^2 + (elevation_diff * elevation_weight)^2)
|
|
36
|
+
|
|
37
|
+
Notes
|
|
38
|
+
-----
|
|
39
|
+
- If elevation data is missing for either the point or stations, only
|
|
40
|
+
horizontal distance is used.
|
|
41
|
+
- Stations with zero effective distance get weight of 1.0, all others get 0.
|
|
42
|
+
- All numeric columns except location-related ones are interpolated.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
def _get_df(
|
|
46
|
+
df: pd.DataFrame,
|
|
47
|
+
ts: TimeSeries,
|
|
48
|
+
point: Point,
|
|
49
|
+
) -> pd.DataFrame:
|
|
50
|
+
# Group by time to interpolate each timestamp separately
|
|
51
|
+
grouped = df.groupby(pd.Grouper(level="time", freq=ts.freq))
|
|
52
|
+
|
|
53
|
+
# List to store interpolated results for each time period
|
|
54
|
+
interpolated_results = []
|
|
55
|
+
|
|
56
|
+
for time_idx, group in grouped:
|
|
57
|
+
if group.empty:
|
|
58
|
+
continue
|
|
59
|
+
|
|
60
|
+
effective_distance = group["effective_distance"]
|
|
61
|
+
|
|
62
|
+
# Calculate weights using IDW formula: w = 1 / d^p
|
|
63
|
+
# Handle zero distance case (station at exact location)
|
|
64
|
+
min_distance = effective_distance.min()
|
|
65
|
+
if min_distance == 0:
|
|
66
|
+
# If any station is at the exact location, use only that station
|
|
67
|
+
weights = (effective_distance == 0).astype(float)
|
|
68
|
+
else:
|
|
69
|
+
# Standard IDW weights
|
|
70
|
+
weights = 1.0 / (effective_distance**power)
|
|
71
|
+
|
|
72
|
+
# Normalize weights so they sum to 1
|
|
73
|
+
weights = weights / weights.sum()
|
|
74
|
+
|
|
75
|
+
# Get numeric columns to interpolate (exclude location-related columns)
|
|
76
|
+
location_cols = ["latitude", "longitude", "elevation", "distance"]
|
|
77
|
+
numeric_cols = [
|
|
78
|
+
col
|
|
79
|
+
for col in group.columns
|
|
80
|
+
if col not in location_cols
|
|
81
|
+
and pd.api.types.is_numeric_dtype(group[col])
|
|
82
|
+
]
|
|
83
|
+
|
|
84
|
+
# Calculate weighted average for each numeric column
|
|
85
|
+
interpolated_row = {}
|
|
86
|
+
for col in numeric_cols:
|
|
87
|
+
# Only use non-NaN values for interpolation
|
|
88
|
+
valid_mask = group[col].notna()
|
|
89
|
+
if valid_mask.any():
|
|
90
|
+
valid_values = group.loc[valid_mask, col]
|
|
91
|
+
valid_weights = weights[valid_mask]
|
|
92
|
+
# Re-normalize weights for valid values only
|
|
93
|
+
valid_weights = valid_weights / valid_weights.sum()
|
|
94
|
+
interpolated_row[col] = (valid_values * valid_weights).sum()
|
|
95
|
+
else:
|
|
96
|
+
# If all values are NaN, result is NaN
|
|
97
|
+
interpolated_row[col] = np.nan
|
|
98
|
+
|
|
99
|
+
# Add location information from the point
|
|
100
|
+
interpolated_row["latitude"] = point.latitude
|
|
101
|
+
interpolated_row["longitude"] = point.longitude
|
|
102
|
+
if point.elevation is not None:
|
|
103
|
+
interpolated_row["elevation"] = point.elevation
|
|
104
|
+
interpolated_row["distance"] = 0 # Distance from point to itself
|
|
105
|
+
|
|
106
|
+
# Create a DataFrame row with the time index
|
|
107
|
+
result_df = pd.DataFrame(
|
|
108
|
+
[interpolated_row], index=pd.DatetimeIndex([time_idx])
|
|
109
|
+
)
|
|
110
|
+
result_df.index.name = "time"
|
|
111
|
+
interpolated_results.append(result_df)
|
|
112
|
+
|
|
113
|
+
# Combine all time periods
|
|
114
|
+
if interpolated_results:
|
|
115
|
+
result = pd.concat(interpolated_results)
|
|
116
|
+
return result
|
|
117
|
+
else:
|
|
118
|
+
return pd.DataFrame()
|
|
119
|
+
|
|
120
|
+
return _get_df
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
from itertools import combinations
|
|
2
|
+
from statistics import mean
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
from meteostat.api.timeseries import TimeSeries
|
|
8
|
+
from meteostat.api.config import config
|
|
9
|
+
from meteostat.enumerations import Parameter
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def lapse_rate(ts: TimeSeries, parameter: Parameter = Parameter.TEMP) -> float | None:
|
|
13
|
+
"""
|
|
14
|
+
Calculate the lapse rate (temperature gradient) in degrees Celsius per kilometer
|
|
15
|
+
based on temperature and elevation data from multiple stations.
|
|
16
|
+
|
|
17
|
+
Parameters
|
|
18
|
+
----------
|
|
19
|
+
df : pd.DataFrame
|
|
20
|
+
DataFrame containing temperature and elevation data for multiple stations.
|
|
21
|
+
|
|
22
|
+
Returns
|
|
23
|
+
-------
|
|
24
|
+
float
|
|
25
|
+
Calculated lapse rate in degrees Celsius per kilometer.
|
|
26
|
+
"""
|
|
27
|
+
df = ts.fetch(location=True)
|
|
28
|
+
|
|
29
|
+
if df is None or "elevation" not in df.columns or parameter not in df.columns:
|
|
30
|
+
return None
|
|
31
|
+
|
|
32
|
+
elev_by_station = df["elevation"].groupby(level="station").first()
|
|
33
|
+
temp_by_station = df[parameter].groupby(level="station").mean()
|
|
34
|
+
|
|
35
|
+
if len(elev_by_station) < 2 or len(temp_by_station) < 2:
|
|
36
|
+
return None
|
|
37
|
+
|
|
38
|
+
lapse_rates = []
|
|
39
|
+
|
|
40
|
+
for a, b in combinations(elev_by_station.index, 2):
|
|
41
|
+
if (
|
|
42
|
+
pd.isna(elev_by_station[a])
|
|
43
|
+
or pd.isna(elev_by_station[b])
|
|
44
|
+
or pd.isna(temp_by_station[a])
|
|
45
|
+
or pd.isna(temp_by_station[b])
|
|
46
|
+
or elev_by_station[a] == elev_by_station[b]
|
|
47
|
+
):
|
|
48
|
+
continue
|
|
49
|
+
|
|
50
|
+
temp_diff = temp_by_station[a] - temp_by_station[b]
|
|
51
|
+
elev_diff = elev_by_station[a] - elev_by_station[b]
|
|
52
|
+
|
|
53
|
+
# multiply by -1 to get positive lapse rate for decreasing temp
|
|
54
|
+
# with increasing elevation
|
|
55
|
+
lapse_rate = (temp_diff / elev_diff) * 1000 * -1
|
|
56
|
+
lapse_rates.append(lapse_rate)
|
|
57
|
+
|
|
58
|
+
if not lapse_rates:
|
|
59
|
+
return None
|
|
60
|
+
|
|
61
|
+
return mean(lapse_rates)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def apply_lapse_rate(
|
|
65
|
+
df: pd.DataFrame, elevation: int, lapse_rate: float
|
|
66
|
+
) -> pd.DataFrame:
|
|
67
|
+
"""
|
|
68
|
+
Calculate approximate temperature at target elevation
|
|
69
|
+
using a given lapse rate.
|
|
70
|
+
|
|
71
|
+
Parameters
|
|
72
|
+
----------
|
|
73
|
+
df : pd.DataFrame
|
|
74
|
+
DataFrame containing the data to be adjusted.
|
|
75
|
+
elevation : int
|
|
76
|
+
Target elevation in meters.
|
|
77
|
+
lapse_rate : float
|
|
78
|
+
Lapse rate (temperature gradient) in degrees Celsius per kilometer.
|
|
79
|
+
|
|
80
|
+
Returns
|
|
81
|
+
-------
|
|
82
|
+
pd.DataFrame
|
|
83
|
+
DataFrame with adjusted temperature values.
|
|
84
|
+
"""
|
|
85
|
+
for col in config.lapse_rate_parameters:
|
|
86
|
+
if col in df.columns:
|
|
87
|
+
df.loc[df[col] != np.nan, col] = round(
|
|
88
|
+
df[col] + ((lapse_rate / 1000) * (df["elevation"] - elevation)), 1
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
return df
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
|
|
3
|
+
from meteostat.api.point import Point
|
|
4
|
+
from meteostat.api.timeseries import TimeSeries
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def nearest_neighbor(df: pd.DataFrame, ts: TimeSeries, _point: Point) -> pd.DataFrame:
|
|
8
|
+
"""
|
|
9
|
+
Get nearest neighbor value for each record in a DataFrame.
|
|
10
|
+
|
|
11
|
+
Parameters
|
|
12
|
+
----------
|
|
13
|
+
df : pd.DataFrame
|
|
14
|
+
DataFrame containing the data to be adjusted.
|
|
15
|
+
ts : TimeSeries
|
|
16
|
+
TimeSeries object containing the target data.
|
|
17
|
+
_point : Point
|
|
18
|
+
Point object representing the target location.
|
|
19
|
+
|
|
20
|
+
Returns
|
|
21
|
+
-------
|
|
22
|
+
pd.DataFrame
|
|
23
|
+
DataFrame with nearest neighbor values for each record.
|
|
24
|
+
"""
|
|
25
|
+
df = (
|
|
26
|
+
df.sort_values("distance")
|
|
27
|
+
.groupby(pd.Grouper(level="time", freq=ts.freq))
|
|
28
|
+
.agg("first")
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
return df
|