meteostat 1.6.8__py3-none-any.whl → 1.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meteostat/__init__.py +12 -1
- meteostat/core/cache.py +0 -2
- meteostat/core/loader.py +26 -28
- meteostat/core/warn.py +1 -1
- meteostat/interface/base.py +10 -7
- meteostat/interface/daily.py +44 -31
- meteostat/interface/hourly.py +44 -43
- meteostat/interface/meteodata.py +54 -69
- meteostat/interface/monthly.py +24 -19
- meteostat/interface/normals.py +59 -19
- meteostat/interface/point.py +1 -4
- meteostat/interface/stations.py +9 -8
- meteostat/interface/timeseries.py +97 -65
- meteostat/series/aggregate.py +0 -1
- meteostat/series/convert.py +1 -1
- meteostat/series/interpolate.py +12 -2
- meteostat/series/normalize.py +4 -5
- meteostat/utilities/endpoint.py +1 -1
- meteostat/utilities/helpers.py +38 -0
- meteostat/utilities/mutations.py +10 -0
- {meteostat-1.6.8.dist-info → meteostat-1.7.1.dist-info}/METADATA +4 -4
- meteostat-1.7.1.dist-info/RECORD +39 -0
- {meteostat-1.6.8.dist-info → meteostat-1.7.1.dist-info}/WHEEL +1 -1
- meteostat-1.6.8.dist-info/RECORD +0 -39
- {meteostat-1.6.8.dist-info → meteostat-1.7.1.dist-info}/LICENSE +0 -0
- {meteostat-1.6.8.dist-info → meteostat-1.7.1.dist-info}/top_level.txt +0 -0
meteostat/interface/monthly.py
CHANGED
|
@@ -17,29 +17,46 @@ from meteostat.interface.point import Point
|
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
class Monthly(TimeSeries):
|
|
20
|
-
|
|
21
20
|
"""
|
|
22
21
|
Retrieve monthly weather data for one or multiple weather stations or
|
|
23
22
|
a single geographical point
|
|
24
23
|
"""
|
|
25
24
|
|
|
26
25
|
# The cache subdirectory
|
|
27
|
-
cache_subdir
|
|
26
|
+
cache_subdir = "monthly"
|
|
28
27
|
|
|
29
28
|
# Granularity
|
|
30
29
|
granularity = Granularity.MONTHLY
|
|
31
30
|
|
|
32
31
|
# Default frequency
|
|
33
|
-
_freq
|
|
32
|
+
_freq = "1MS"
|
|
33
|
+
|
|
34
|
+
# Source mappings
|
|
35
|
+
_source_mappings = {
|
|
36
|
+
"dwd_monthly": "A",
|
|
37
|
+
"eccc_monthly": "A",
|
|
38
|
+
"dwd_daily": "C",
|
|
39
|
+
"eccc_daily": "C",
|
|
40
|
+
"ghcnd": "D",
|
|
41
|
+
"dwd_hourly": "E",
|
|
42
|
+
"eccc_hourly": "E",
|
|
43
|
+
"isd_lite": "F",
|
|
44
|
+
"synop": "G",
|
|
45
|
+
"dwd_poi": "G",
|
|
46
|
+
"metar": "H",
|
|
47
|
+
"model": "I",
|
|
48
|
+
"dwd_mosmix": "I",
|
|
49
|
+
"metno_forecast": "I",
|
|
50
|
+
}
|
|
34
51
|
|
|
35
52
|
# Flag which represents model data
|
|
36
53
|
_model_flag = "I"
|
|
37
54
|
|
|
38
55
|
# Columns
|
|
39
|
-
_columns
|
|
56
|
+
_columns = [
|
|
40
57
|
"year",
|
|
41
58
|
"month",
|
|
42
|
-
"tavg",
|
|
59
|
+
{"tavg": "temp"},
|
|
43
60
|
"tmin",
|
|
44
61
|
"tmax",
|
|
45
62
|
"prcp",
|
|
@@ -51,22 +68,11 @@ class Monthly(TimeSeries):
|
|
|
51
68
|
# Index of first meteorological column
|
|
52
69
|
_first_met_col = 2
|
|
53
70
|
|
|
54
|
-
# Data types
|
|
55
|
-
_types: dict = {
|
|
56
|
-
"tavg": "float64",
|
|
57
|
-
"tmin": "float64",
|
|
58
|
-
"tmax": "float64",
|
|
59
|
-
"prcp": "float64",
|
|
60
|
-
"wspd": "float64",
|
|
61
|
-
"pres": "float64",
|
|
62
|
-
"tsun": "float64",
|
|
63
|
-
}
|
|
64
|
-
|
|
65
71
|
# Columns for date parsing
|
|
66
|
-
_parse_dates
|
|
72
|
+
_parse_dates = ["year", "month"]
|
|
67
73
|
|
|
68
74
|
# Default aggregation functions
|
|
69
|
-
aggregations
|
|
75
|
+
aggregations = {
|
|
70
76
|
"tavg": "mean",
|
|
71
77
|
"tmin": "mean",
|
|
72
78
|
"tmax": "mean",
|
|
@@ -84,7 +90,6 @@ class Monthly(TimeSeries):
|
|
|
84
90
|
model: bool = True, # Include model data?
|
|
85
91
|
flags: bool = False, # Load source flags?
|
|
86
92
|
) -> None:
|
|
87
|
-
|
|
88
93
|
# Set start date
|
|
89
94
|
if start is not None:
|
|
90
95
|
start = start.replace(day=1)
|
meteostat/interface/normals.py
CHANGED
|
@@ -9,10 +9,13 @@ The code is licensed under the MIT license.
|
|
|
9
9
|
"""
|
|
10
10
|
|
|
11
11
|
from copy import copy
|
|
12
|
-
from typing import Union
|
|
12
|
+
from typing import Optional, Union
|
|
13
13
|
from datetime import datetime
|
|
14
14
|
import numpy as np
|
|
15
15
|
import pandas as pd
|
|
16
|
+
from meteostat.core.cache import file_in_cache, get_local_file_path
|
|
17
|
+
from meteostat.core.loader import load_handler
|
|
18
|
+
from meteostat.utilities.endpoint import generate_endpoint_path
|
|
16
19
|
from meteostat.enumerations.granularity import Granularity
|
|
17
20
|
from meteostat.core.warn import warn
|
|
18
21
|
from meteostat.interface.meteodata import MeteoData
|
|
@@ -20,32 +23,31 @@ from meteostat.interface.point import Point
|
|
|
20
23
|
|
|
21
24
|
|
|
22
25
|
class Normals(MeteoData):
|
|
23
|
-
|
|
24
26
|
"""
|
|
25
27
|
Retrieve climate normals for one or multiple weather stations or
|
|
26
28
|
a single geographical point
|
|
27
29
|
"""
|
|
28
30
|
|
|
29
31
|
# The cache subdirectory
|
|
30
|
-
cache_subdir
|
|
32
|
+
cache_subdir = "normals"
|
|
31
33
|
|
|
32
34
|
# Granularity
|
|
33
35
|
granularity = Granularity.NORMALS
|
|
34
36
|
|
|
35
37
|
# The list of weather Stations
|
|
36
|
-
_stations: pd.Index = None
|
|
38
|
+
_stations: Optional[pd.Index] = None
|
|
37
39
|
|
|
38
40
|
# The first year of the period
|
|
39
|
-
_start: int = None
|
|
41
|
+
_start: Optional[int] = None
|
|
40
42
|
|
|
41
43
|
# The last year of the period
|
|
42
|
-
_end: int = None
|
|
44
|
+
_end: Optional[int] = None
|
|
43
45
|
|
|
44
46
|
# The data frame
|
|
45
47
|
_data: pd.DataFrame = pd.DataFrame()
|
|
46
48
|
|
|
47
49
|
# Columns
|
|
48
|
-
_columns
|
|
50
|
+
_columns = [
|
|
49
51
|
"start",
|
|
50
52
|
"end",
|
|
51
53
|
"month",
|
|
@@ -60,26 +62,62 @@ class Normals(MeteoData):
|
|
|
60
62
|
# Index of first meteorological column
|
|
61
63
|
_first_met_col = 3
|
|
62
64
|
|
|
63
|
-
# Data types
|
|
64
|
-
_types: dict = {
|
|
65
|
-
"tmin": "float64",
|
|
66
|
-
"tmax": "float64",
|
|
67
|
-
"prcp": "float64",
|
|
68
|
-
"wspd": "float64",
|
|
69
|
-
"pres": "float64",
|
|
70
|
-
"tsun": "float64",
|
|
71
|
-
}
|
|
72
|
-
|
|
73
65
|
# Which columns should be parsed as dates?
|
|
74
66
|
_parse_dates = None
|
|
75
67
|
|
|
68
|
+
def _load_data(self, station: str, year: Optional[int] = None) -> None:
|
|
69
|
+
"""
|
|
70
|
+
Load file for a single station from Meteostat
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
# File name
|
|
74
|
+
file = generate_endpoint_path(self.granularity, station, year)
|
|
75
|
+
|
|
76
|
+
# Get local file path
|
|
77
|
+
path = get_local_file_path(self.cache_dir, self.cache_subdir, file)
|
|
78
|
+
|
|
79
|
+
# Check if file in cache
|
|
80
|
+
if self.max_age > 0 and file_in_cache(path, self.max_age):
|
|
81
|
+
# Read cached data
|
|
82
|
+
df = pd.read_pickle(path)
|
|
83
|
+
|
|
84
|
+
else:
|
|
85
|
+
# Get data from Meteostat
|
|
86
|
+
df = load_handler(
|
|
87
|
+
self.endpoint,
|
|
88
|
+
file,
|
|
89
|
+
self.proxy,
|
|
90
|
+
self._columns,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# Validate and prepare data for further processing
|
|
94
|
+
if not df.empty:
|
|
95
|
+
# Add weather station ID
|
|
96
|
+
df["station"] = station
|
|
97
|
+
|
|
98
|
+
# Set index
|
|
99
|
+
df = df.set_index(["station", "start", "end", "month"])
|
|
100
|
+
|
|
101
|
+
# Save as Pickle
|
|
102
|
+
if self.max_age > 0:
|
|
103
|
+
df.to_pickle(path)
|
|
104
|
+
|
|
105
|
+
# Filter time period and append to DataFrame
|
|
106
|
+
if self.granularity == Granularity.NORMALS and not df.empty and self._end:
|
|
107
|
+
# Get time index
|
|
108
|
+
end = df.index.get_level_values("end")
|
|
109
|
+
# Filter & return
|
|
110
|
+
return df.loc[end == self._end]
|
|
111
|
+
|
|
112
|
+
# Return
|
|
113
|
+
return df
|
|
114
|
+
|
|
76
115
|
def __init__(
|
|
77
116
|
self,
|
|
78
117
|
loc: Union[pd.DataFrame, Point, list, str],
|
|
79
118
|
start: int = None,
|
|
80
119
|
end: int = None,
|
|
81
120
|
) -> None:
|
|
82
|
-
|
|
83
121
|
# Set list of weather stations
|
|
84
122
|
if isinstance(loc, pd.DataFrame):
|
|
85
123
|
self._stations = loc.index
|
|
@@ -146,7 +184,9 @@ class Normals(MeteoData):
|
|
|
146
184
|
# Go through all periods
|
|
147
185
|
for period in periods:
|
|
148
186
|
# Create DataFrame
|
|
149
|
-
df = pd.DataFrame(
|
|
187
|
+
df = pd.DataFrame(
|
|
188
|
+
columns=temp._columns[temp._first_met_col :], dtype="float64"
|
|
189
|
+
)
|
|
150
190
|
# Populate index columns
|
|
151
191
|
df["month"] = range(1, 13)
|
|
152
192
|
df["station"] = station
|
meteostat/interface/point.py
CHANGED
|
@@ -14,7 +14,6 @@ from meteostat.interface.stations import Stations
|
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class Point:
|
|
17
|
-
|
|
18
17
|
"""
|
|
19
18
|
Automatically select weather stations by geographic location
|
|
20
19
|
"""
|
|
@@ -53,7 +52,6 @@ class Point:
|
|
|
53
52
|
_alt: int = None
|
|
54
53
|
|
|
55
54
|
def __init__(self, lat: float, lon: float, alt: int = None) -> None:
|
|
56
|
-
|
|
57
55
|
self._lat = lat
|
|
58
56
|
self._lon = lon
|
|
59
57
|
self._alt = alt
|
|
@@ -90,7 +88,7 @@ class Point:
|
|
|
90
88
|
# Apply inventory filter
|
|
91
89
|
if freq and start and end:
|
|
92
90
|
age = (datetime.now() - end).days
|
|
93
|
-
if model
|
|
91
|
+
if model is False or age > 180:
|
|
94
92
|
stations = stations.inventory(freq, (start, end))
|
|
95
93
|
|
|
96
94
|
# Apply altitude filter
|
|
@@ -110,7 +108,6 @@ class Point:
|
|
|
110
108
|
|
|
111
109
|
# Score values
|
|
112
110
|
if self.radius:
|
|
113
|
-
|
|
114
111
|
# Calculate score values
|
|
115
112
|
stations["score"] = (
|
|
116
113
|
(1 - (stations["distance"] / self.radius)) * self.weight_dist
|
meteostat/interface/stations.py
CHANGED
|
@@ -19,7 +19,6 @@ from meteostat.utilities.helpers import get_distance
|
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
class Stations(Base):
|
|
22
|
-
|
|
23
22
|
"""
|
|
24
23
|
Select weather stations from the full list of stations
|
|
25
24
|
"""
|
|
@@ -80,15 +79,18 @@ class Stations(Base):
|
|
|
80
79
|
|
|
81
80
|
# Check if file in cache
|
|
82
81
|
if self.max_age > 0 and file_in_cache(path, self.max_age):
|
|
83
|
-
|
|
84
82
|
# Read cached data
|
|
85
83
|
df = pd.read_pickle(path)
|
|
86
84
|
|
|
87
85
|
else:
|
|
88
|
-
|
|
89
86
|
# Get data from Meteostat
|
|
90
87
|
df = load_handler(
|
|
91
|
-
self.endpoint,
|
|
88
|
+
self.endpoint,
|
|
89
|
+
file,
|
|
90
|
+
self.proxy,
|
|
91
|
+
self._columns,
|
|
92
|
+
self._types,
|
|
93
|
+
self._parse_dates,
|
|
92
94
|
)
|
|
93
95
|
|
|
94
96
|
# Add index
|
|
@@ -102,7 +104,6 @@ class Stations(Base):
|
|
|
102
104
|
self._data = df
|
|
103
105
|
|
|
104
106
|
def __init__(self) -> None:
|
|
105
|
-
|
|
106
107
|
# Get all weather stations
|
|
107
108
|
self._load()
|
|
108
109
|
|
|
@@ -179,12 +180,12 @@ class Stations(Base):
|
|
|
179
180
|
|
|
180
181
|
if required is True:
|
|
181
182
|
# Make sure data exists at all
|
|
182
|
-
temp._data = temp._data[
|
|
183
|
+
temp._data = temp._data[~pd.isna(temp._data[f"{freq}_start"])]
|
|
183
184
|
|
|
184
185
|
elif isinstance(required, tuple):
|
|
185
186
|
# Make sure data exists across period
|
|
186
187
|
temp._data = temp._data[
|
|
187
|
-
(pd.isna(temp._data[freq
|
|
188
|
+
(~pd.isna(temp._data[f"{freq}_start"]))
|
|
188
189
|
& (temp._data[freq + "_start"] <= required[0])
|
|
189
190
|
& (
|
|
190
191
|
temp._data[freq + "_end"] + timedelta(seconds=temp.max_age)
|
|
@@ -195,7 +196,7 @@ class Stations(Base):
|
|
|
195
196
|
else:
|
|
196
197
|
# Make sure data exists on a certain day
|
|
197
198
|
temp._data = temp._data[
|
|
198
|
-
(pd.isna(temp._data[freq
|
|
199
|
+
(~pd.isna(temp._data[f"{freq}_start"]))
|
|
199
200
|
& (temp._data[freq + "_start"] <= required)
|
|
200
201
|
& (
|
|
201
202
|
temp._data[freq + "_end"] + timedelta(seconds=temp.max_age)
|
|
@@ -9,72 +9,118 @@ The code is licensed under the MIT license.
|
|
|
9
9
|
"""
|
|
10
10
|
|
|
11
11
|
from datetime import datetime
|
|
12
|
-
from typing import Union
|
|
13
|
-
import numpy as np
|
|
12
|
+
from typing import Optional, Union
|
|
14
13
|
import pandas as pd
|
|
14
|
+
from meteostat.core.cache import file_in_cache, get_local_file_path
|
|
15
|
+
from meteostat.core.loader import load_handler
|
|
15
16
|
from meteostat.enumerations.granularity import Granularity
|
|
16
|
-
from meteostat.core.cache import get_local_file_path, file_in_cache
|
|
17
|
-
from meteostat.core.loader import processing_handler, load_handler
|
|
18
|
-
from meteostat.utilities.mutations import localize, filter_time
|
|
19
|
-
from meteostat.utilities.validations import validate_series
|
|
20
17
|
from meteostat.utilities.endpoint import generate_endpoint_path
|
|
18
|
+
from meteostat.utilities.mutations import filter_time, localize
|
|
19
|
+
from meteostat.utilities.validations import validate_series
|
|
20
|
+
from meteostat.utilities.helpers import get_flag_from_source_factory, with_suffix
|
|
21
21
|
from meteostat.interface.point import Point
|
|
22
22
|
from meteostat.interface.meteodata import MeteoData
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
class TimeSeries(MeteoData):
|
|
26
|
-
|
|
27
26
|
"""
|
|
28
27
|
TimeSeries class which provides features which are
|
|
29
28
|
used across all time series classes
|
|
30
29
|
"""
|
|
31
30
|
|
|
31
|
+
# Base URL of the Meteostat bulk data interface
|
|
32
|
+
endpoint = "https://data.meteostat.net/"
|
|
33
|
+
|
|
32
34
|
# The list of origin weather Stations
|
|
33
|
-
_origin_stations:
|
|
35
|
+
_origin_stations: Optional[pd.Index] = None
|
|
34
36
|
|
|
35
37
|
# The start date
|
|
36
|
-
_start:
|
|
38
|
+
_start: Optional[datetime] = None
|
|
37
39
|
|
|
38
40
|
# The end date
|
|
39
|
-
_end:
|
|
41
|
+
_end: Optional[datetime] = None
|
|
40
42
|
|
|
41
43
|
# Include model data?
|
|
42
|
-
_model
|
|
44
|
+
_model = True
|
|
43
45
|
|
|
44
46
|
# Fetch source flags?
|
|
45
|
-
_flags =
|
|
47
|
+
_flags = False
|
|
46
48
|
|
|
47
|
-
def
|
|
49
|
+
def _load_data(self, station: str, year: Optional[int] = None) -> None:
|
|
48
50
|
"""
|
|
49
|
-
Load
|
|
51
|
+
Load file for a single station from Meteostat
|
|
50
52
|
"""
|
|
51
|
-
|
|
52
53
|
# File name
|
|
53
|
-
file = generate_endpoint_path(self.granularity, station, year
|
|
54
|
+
file = generate_endpoint_path(self.granularity, station, year)
|
|
54
55
|
|
|
55
56
|
# Get local file path
|
|
56
57
|
path = get_local_file_path(self.cache_dir, self.cache_subdir, file)
|
|
57
58
|
|
|
58
59
|
# Check if file in cache
|
|
59
60
|
if self.max_age > 0 and file_in_cache(path, self.max_age):
|
|
60
|
-
|
|
61
61
|
# Read cached data
|
|
62
62
|
df = pd.read_pickle(path)
|
|
63
63
|
|
|
64
64
|
else:
|
|
65
|
-
|
|
66
65
|
# Get data from Meteostat
|
|
67
66
|
df = load_handler(
|
|
68
67
|
self.endpoint,
|
|
69
68
|
file,
|
|
70
|
-
self.
|
|
71
|
-
|
|
72
|
-
|
|
69
|
+
self.proxy,
|
|
70
|
+
default_df=pd.DataFrame(
|
|
71
|
+
columns=self._raw_columns
|
|
72
|
+
+ with_suffix(self._raw_columns, "_source")
|
|
73
|
+
),
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# Add time column and drop original columns
|
|
77
|
+
if len(self._parse_dates) < 3:
|
|
78
|
+
df["day"] = 1
|
|
79
|
+
|
|
80
|
+
df["time"] = pd.to_datetime(
|
|
81
|
+
df[
|
|
82
|
+
(
|
|
83
|
+
self._parse_dates
|
|
84
|
+
if len(self._parse_dates) > 2
|
|
85
|
+
else self._parse_dates + ["day"]
|
|
86
|
+
)
|
|
87
|
+
]
|
|
73
88
|
)
|
|
89
|
+
df = df.drop(self._parse_dates, axis=1)
|
|
74
90
|
|
|
75
|
-
# Validate
|
|
91
|
+
# Validate and prepare data for further processing
|
|
76
92
|
df = validate_series(df, station)
|
|
77
93
|
|
|
94
|
+
# Rename columns
|
|
95
|
+
df = df.rename(columns=self._renamed_columns, errors="ignore")
|
|
96
|
+
|
|
97
|
+
# Convert sources to flags
|
|
98
|
+
for col in df.columns:
|
|
99
|
+
basecol = col[:-7] if col.endswith("_source") else col
|
|
100
|
+
|
|
101
|
+
if basecol not in self._processed_columns:
|
|
102
|
+
df.drop(col, axis=1, inplace=True)
|
|
103
|
+
continue
|
|
104
|
+
|
|
105
|
+
if basecol == col:
|
|
106
|
+
df[col] = df[col].astype("Float64")
|
|
107
|
+
|
|
108
|
+
if col.endswith("_source"):
|
|
109
|
+
flagcol = f"{basecol}_flag"
|
|
110
|
+
df[flagcol] = pd.NA
|
|
111
|
+
df[flagcol] = df[flagcol].astype("string")
|
|
112
|
+
mask = df[col].notna()
|
|
113
|
+
df.loc[mask, flagcol] = df.loc[mask, col].apply(
|
|
114
|
+
get_flag_from_source_factory(
|
|
115
|
+
self._source_mappings, self._model_flag
|
|
116
|
+
)
|
|
117
|
+
)
|
|
118
|
+
df.drop(col, axis=1, inplace=True)
|
|
119
|
+
|
|
120
|
+
# Process virtual columns
|
|
121
|
+
for key, value in self._virtual_columns.items():
|
|
122
|
+
df = value(df, key)
|
|
123
|
+
|
|
78
124
|
# Save as Pickle
|
|
79
125
|
if self.max_age > 0:
|
|
80
126
|
df.to_pickle(path)
|
|
@@ -88,59 +134,33 @@ class TimeSeries(MeteoData):
|
|
|
88
134
|
df = localize(df, self._timezone)
|
|
89
135
|
|
|
90
136
|
# Filter time period and append to DataFrame
|
|
91
|
-
|
|
92
|
-
df = filter_time(df, self._start, self._end)
|
|
137
|
+
df = filter_time(df, self._start, self._end)
|
|
93
138
|
|
|
139
|
+
# Return
|
|
94
140
|
return df
|
|
95
141
|
|
|
96
|
-
def _get_flags(self) -> None:
|
|
97
|
-
"""
|
|
98
|
-
Get all source flags
|
|
99
|
-
"""
|
|
100
|
-
|
|
101
|
-
if len(self._stations) > 0:
|
|
102
|
-
|
|
103
|
-
# Get list of datasets
|
|
104
|
-
datasets = self._get_datasets()
|
|
105
|
-
|
|
106
|
-
# Data Processings
|
|
107
|
-
return processing_handler(
|
|
108
|
-
datasets, self._load_flags, self.processes, self.threads
|
|
109
|
-
)
|
|
110
|
-
|
|
111
|
-
# Empty DataFrame
|
|
112
|
-
return pd.DataFrame(columns=[*self._types])
|
|
113
|
-
|
|
114
142
|
def _filter_model(self) -> None:
|
|
115
143
|
"""
|
|
116
144
|
Remove model data from time series
|
|
117
145
|
"""
|
|
118
146
|
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
for col_name in columns:
|
|
147
|
+
for col_name in self._processed_columns:
|
|
122
148
|
self._data.loc[
|
|
123
149
|
(pd.isna(self._data[f"{col_name}_flag"]))
|
|
124
150
|
| (self._data[f"{col_name}_flag"].str.contains(self._model_flag)),
|
|
125
151
|
col_name,
|
|
126
|
-
] =
|
|
127
|
-
|
|
128
|
-
# Conditionally, remove flags from DataFrame
|
|
129
|
-
if not self._flags:
|
|
130
|
-
self._data.drop(
|
|
131
|
-
map(lambda col_name: f"{col_name}_flag", columns), axis=1, inplace=True
|
|
132
|
-
)
|
|
152
|
+
] = pd.NA
|
|
133
153
|
|
|
134
154
|
# Drop nan-only rows
|
|
135
|
-
self._data.dropna(how="all", subset=
|
|
155
|
+
self._data.dropna(how="all", subset=self._processed_columns, inplace=True)
|
|
136
156
|
|
|
137
157
|
def _init_time_series(
|
|
138
158
|
self,
|
|
139
159
|
loc: Union[pd.DataFrame, Point, list, str], # Station(s) or geo point
|
|
140
160
|
start: datetime = None,
|
|
141
161
|
end: datetime = None,
|
|
142
|
-
model
|
|
143
|
-
flags
|
|
162
|
+
model=True, # Include model data?
|
|
163
|
+
flags=False, # Load source flags?
|
|
144
164
|
) -> None:
|
|
145
165
|
"""
|
|
146
166
|
Common initialization for all time series, regardless
|
|
@@ -169,20 +189,32 @@ class TimeSeries(MeteoData):
|
|
|
169
189
|
# Get data for all weather stations
|
|
170
190
|
self._data = self._get_data()
|
|
171
191
|
|
|
172
|
-
#
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
)
|
|
192
|
+
# Fill columns if they don't exist
|
|
193
|
+
for col in self._processed_columns:
|
|
194
|
+
if col not in self._data.columns:
|
|
195
|
+
self._data[col] = pd.NA
|
|
196
|
+
self._data[col] = self._data[col].astype("Float64")
|
|
197
|
+
self._data[f"{col}_flag"] = pd.NA
|
|
198
|
+
self._data[f"{col}_flag"] = self._data[f"{col}_flag"].astype("string")
|
|
180
199
|
|
|
181
|
-
#
|
|
182
|
-
|
|
200
|
+
# Reorder the DataFrame
|
|
201
|
+
self._data = self._data[
|
|
202
|
+
self._processed_columns + with_suffix(self._processed_columns, "_flag")
|
|
203
|
+
]
|
|
204
|
+
|
|
205
|
+
# Remove model data from DataFrame
|
|
183
206
|
if not model:
|
|
184
207
|
self._filter_model()
|
|
185
208
|
|
|
209
|
+
# Conditionally, remove flags from DataFrame
|
|
210
|
+
if not self._flags:
|
|
211
|
+
self._data.drop(
|
|
212
|
+
with_suffix(self._processed_columns, "_flag"),
|
|
213
|
+
axis=1,
|
|
214
|
+
errors="ignore",
|
|
215
|
+
inplace=True,
|
|
216
|
+
)
|
|
217
|
+
|
|
186
218
|
# Interpolate data spatially if requested
|
|
187
219
|
# location is a geographical point
|
|
188
220
|
if isinstance(loc, Point):
|
meteostat/series/aggregate.py
CHANGED
meteostat/series/convert.py
CHANGED
|
@@ -21,7 +21,7 @@ def convert(self, units: dict):
|
|
|
21
21
|
|
|
22
22
|
# Change data units
|
|
23
23
|
for parameter, unit in units.items():
|
|
24
|
-
if parameter in temp.
|
|
24
|
+
if parameter in temp._processed_columns:
|
|
25
25
|
temp._data[parameter] = temp._data[parameter].apply(unit)
|
|
26
26
|
|
|
27
27
|
# Return class instance
|
meteostat/series/interpolate.py
CHANGED
|
@@ -9,6 +9,7 @@ The code is licensed under the MIT license.
|
|
|
9
9
|
"""
|
|
10
10
|
|
|
11
11
|
from copy import copy
|
|
12
|
+
import numpy as np
|
|
12
13
|
from meteostat.core.warn import warn
|
|
13
14
|
|
|
14
15
|
|
|
@@ -18,17 +19,26 @@ def interpolate(self, limit: int = 3):
|
|
|
18
19
|
"""
|
|
19
20
|
|
|
20
21
|
if self.count() > 0 and not self._data.isnull().values.all():
|
|
21
|
-
|
|
22
22
|
# Create temporal instance
|
|
23
23
|
temp = copy(self)
|
|
24
24
|
|
|
25
|
+
# Convert to float64
|
|
26
|
+
temp._data = temp._data.astype("float64")
|
|
27
|
+
|
|
25
28
|
# Apply interpolation
|
|
26
29
|
temp._data = temp._data.groupby("station", group_keys=False).apply(
|
|
27
30
|
lambda group: group.interpolate(
|
|
28
|
-
method="linear",
|
|
31
|
+
method="linear",
|
|
32
|
+
limit=limit,
|
|
33
|
+
limit_direction="both",
|
|
34
|
+
axis=0,
|
|
35
|
+
fill_value=np.nan,
|
|
29
36
|
)
|
|
30
37
|
)
|
|
31
38
|
|
|
39
|
+
# Convert to original type
|
|
40
|
+
temp._data = temp._data.astype("Float64")
|
|
41
|
+
|
|
32
42
|
# Return class instance
|
|
33
43
|
return temp
|
|
34
44
|
|
meteostat/series/normalize.py
CHANGED
|
@@ -27,9 +27,8 @@ def normalize(self):
|
|
|
27
27
|
temp = copy(self)
|
|
28
28
|
|
|
29
29
|
if temp._start and temp._end and temp.coverage() < 1:
|
|
30
|
-
|
|
31
30
|
# Create result DataFrame
|
|
32
|
-
result = pd.DataFrame(columns=temp.
|
|
31
|
+
result = pd.DataFrame(columns=temp._processed_columns, dtype="Float64")
|
|
33
32
|
|
|
34
33
|
# Handle tz-aware date ranges
|
|
35
34
|
if hasattr(temp, "_timezone") and temp._timezone is not None:
|
|
@@ -43,7 +42,7 @@ def normalize(self):
|
|
|
43
42
|
# Go through list of weather stations
|
|
44
43
|
for station in temp._stations:
|
|
45
44
|
# Create data frame
|
|
46
|
-
df = pd.DataFrame(columns=temp.
|
|
45
|
+
df = pd.DataFrame(columns=temp._processed_columns, dtype="Float64")
|
|
47
46
|
# Add time series
|
|
48
47
|
df["time"] = pd.date_range(
|
|
49
48
|
start,
|
|
@@ -54,7 +53,7 @@ def normalize(self):
|
|
|
54
53
|
# Add station ID
|
|
55
54
|
df["station"] = station
|
|
56
55
|
# Add columns
|
|
57
|
-
for column in temp.
|
|
56
|
+
for column in temp._processed_columns:
|
|
58
57
|
# Add column to DataFrame
|
|
59
58
|
df[column] = nan
|
|
60
59
|
|
|
@@ -71,7 +70,7 @@ def normalize(self):
|
|
|
71
70
|
)
|
|
72
71
|
|
|
73
72
|
# None -> nan
|
|
74
|
-
temp._data = temp._data.fillna(
|
|
73
|
+
temp._data = temp._data.fillna(pd.NA)
|
|
75
74
|
|
|
76
75
|
# Return class instance
|
|
77
76
|
return temp
|
meteostat/utilities/endpoint.py
CHANGED
|
@@ -25,7 +25,7 @@ def generate_endpoint_path(
|
|
|
25
25
|
# Base path
|
|
26
26
|
path = f"{granularity.value}/"
|
|
27
27
|
|
|
28
|
-
if granularity
|
|
28
|
+
if granularity in (Granularity.HOURLY, Granularity.DAILY) and year:
|
|
29
29
|
path += f"{year}/"
|
|
30
30
|
|
|
31
31
|
appendix = ".map" if map_file else ""
|