anemoi-datasets 0.4.5__py3-none-any.whl → 0.5.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/_version.py +2 -2
- anemoi/datasets/commands/create.py +3 -2
- anemoi/datasets/commands/inspect.py +1 -1
- anemoi/datasets/commands/publish.py +30 -0
- anemoi/datasets/create/__init__.py +72 -35
- anemoi/datasets/create/check.py +6 -0
- anemoi/datasets/create/config.py +4 -3
- anemoi/datasets/create/functions/filters/pressure_level_relative_humidity_to_specific_humidity.py +57 -0
- anemoi/datasets/create/functions/filters/pressure_level_specific_humidity_to_relative_humidity.py +57 -0
- anemoi/datasets/create/functions/filters/rename.py +2 -3
- anemoi/datasets/create/functions/filters/single_level_dewpoint_to_relative_humidity.py +54 -0
- anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_dewpoint.py +59 -0
- anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_specific_humidity.py +115 -0
- anemoi/datasets/create/functions/filters/single_level_specific_humidity_to_relative_humidity.py +390 -0
- anemoi/datasets/create/functions/filters/speeddir_to_uv.py +77 -0
- anemoi/datasets/create/functions/filters/uv_to_speeddir.py +55 -0
- anemoi/datasets/create/functions/sources/__init__.py +7 -1
- anemoi/datasets/create/functions/sources/accumulations.py +2 -0
- anemoi/datasets/create/functions/sources/grib.py +87 -2
- anemoi/datasets/create/functions/sources/hindcasts.py +14 -73
- anemoi/datasets/create/functions/sources/mars.py +9 -3
- anemoi/datasets/create/functions/sources/xarray/__init__.py +6 -1
- anemoi/datasets/create/functions/sources/xarray/coordinates.py +6 -1
- anemoi/datasets/create/functions/sources/xarray/field.py +20 -5
- anemoi/datasets/create/functions/sources/xarray/fieldlist.py +16 -16
- anemoi/datasets/create/functions/sources/xarray/flavour.py +126 -12
- anemoi/datasets/create/functions/sources/xarray/grid.py +106 -17
- anemoi/datasets/create/functions/sources/xarray/metadata.py +6 -12
- anemoi/datasets/create/functions/sources/xarray/time.py +1 -5
- anemoi/datasets/create/functions/sources/xarray/variable.py +10 -10
- anemoi/datasets/create/input/__init__.py +69 -0
- anemoi/datasets/create/input/action.py +123 -0
- anemoi/datasets/create/input/concat.py +92 -0
- anemoi/datasets/create/input/context.py +59 -0
- anemoi/datasets/create/input/data_sources.py +71 -0
- anemoi/datasets/create/input/empty.py +42 -0
- anemoi/datasets/create/input/filter.py +76 -0
- anemoi/datasets/create/input/function.py +122 -0
- anemoi/datasets/create/input/join.py +57 -0
- anemoi/datasets/create/input/misc.py +85 -0
- anemoi/datasets/create/input/pipe.py +33 -0
- anemoi/datasets/create/input/repeated_dates.py +217 -0
- anemoi/datasets/create/input/result.py +413 -0
- anemoi/datasets/create/input/step.py +99 -0
- anemoi/datasets/create/{template.py → input/template.py} +0 -42
- anemoi/datasets/create/persistent.py +1 -1
- anemoi/datasets/create/statistics/__init__.py +1 -1
- anemoi/datasets/create/utils.py +3 -0
- anemoi/datasets/create/zarr.py +4 -2
- anemoi/datasets/data/dataset.py +11 -1
- anemoi/datasets/data/debug.py +5 -1
- anemoi/datasets/data/masked.py +2 -2
- anemoi/datasets/data/rescale.py +147 -0
- anemoi/datasets/data/stores.py +20 -7
- anemoi/datasets/dates/__init__.py +113 -30
- anemoi/datasets/dates/groups.py +92 -19
- anemoi/datasets/fields.py +66 -0
- anemoi/datasets/utils/fields.py +47 -0
- {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.5.dist-info}/METADATA +10 -19
- anemoi_datasets-0.5.5.dist-info/RECORD +121 -0
- {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.5.dist-info}/WHEEL +1 -1
- anemoi/datasets/create/input.py +0 -1065
- anemoi_datasets-0.4.5.dist-info/RECORD +0 -96
- /anemoi/datasets/create/{trace.py → input/trace.py} +0 -0
- {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.5.dist-info}/LICENSE +0 -0
- {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.5.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.5.dist-info}/top_level.txt +0 -0
|
@@ -10,8 +10,11 @@ import datetime
|
|
|
10
10
|
import warnings
|
|
11
11
|
|
|
12
12
|
# from anemoi.utils.dates import as_datetime
|
|
13
|
+
from anemoi.utils.dates import DateTimes
|
|
13
14
|
from anemoi.utils.dates import as_datetime
|
|
15
|
+
from anemoi.utils.dates import frequency_to_string
|
|
14
16
|
from anemoi.utils.dates import frequency_to_timedelta
|
|
17
|
+
from anemoi.utils.hindcasts import HindcastDatesTimes
|
|
15
18
|
from anemoi.utils.humanize import print_dates
|
|
16
19
|
|
|
17
20
|
|
|
@@ -30,32 +33,32 @@ def extend(x):
|
|
|
30
33
|
step = frequency_to_timedelta(step)
|
|
31
34
|
while start <= end:
|
|
32
35
|
yield start
|
|
33
|
-
start +=
|
|
36
|
+
start += step
|
|
34
37
|
return
|
|
35
38
|
|
|
36
39
|
yield as_datetime(x)
|
|
37
40
|
|
|
38
41
|
|
|
39
|
-
class
|
|
42
|
+
class DatesProvider:
|
|
40
43
|
"""Base class for date generation.
|
|
41
44
|
|
|
42
|
-
>>>
|
|
45
|
+
>>> DatesProvider.from_config(**{"start": "2023-01-01 00:00", "end": "2023-01-02 00:00", "frequency": "1d"}).values
|
|
43
46
|
[datetime.datetime(2023, 1, 1, 0, 0), datetime.datetime(2023, 1, 2, 0, 0)]
|
|
44
47
|
|
|
45
|
-
>>>
|
|
48
|
+
>>> DatesProvider.from_config(**{"start": "2023-01-01 00:00", "end": "2023-01-03 00:00", "frequency": "18h"}).values
|
|
46
49
|
[datetime.datetime(2023, 1, 1, 0, 0), datetime.datetime(2023, 1, 1, 18, 0), datetime.datetime(2023, 1, 2, 12, 0)]
|
|
47
50
|
|
|
48
|
-
>>>
|
|
51
|
+
>>> DatesProvider.from_config(start="2023-01-01 00:00", end="2023-01-02 00:00", frequency=6).as_dict()
|
|
49
52
|
{'start': '2023-01-01T00:00:00', 'end': '2023-01-02T00:00:00', 'frequency': '6h'}
|
|
50
53
|
|
|
51
|
-
>>> len(
|
|
54
|
+
>>> len(DatesProvider.from_config(start="2023-01-01 00:00", end="2023-01-02 00:00", frequency=12))
|
|
52
55
|
3
|
|
53
|
-
>>> len(
|
|
56
|
+
>>> len(DatesProvider.from_config(start="2023-01-01 00:00",
|
|
54
57
|
... end="2023-01-02 00:00",
|
|
55
58
|
... frequency=12,
|
|
56
59
|
... missing=["2023-01-01 12:00"]))
|
|
57
60
|
3
|
|
58
|
-
>>> len(
|
|
61
|
+
>>> len(DatesProvider.from_config(start="2023-01-01 00:00",
|
|
59
62
|
... end="2023-01-02 00:00",
|
|
60
63
|
... frequency=12,
|
|
61
64
|
... missing=["2099-01-01 12:00"]))
|
|
@@ -67,12 +70,18 @@ class Dates:
|
|
|
67
70
|
missing = []
|
|
68
71
|
self.missing = list(extend(missing))
|
|
69
72
|
if set(self.missing) - set(self.values):
|
|
70
|
-
|
|
73
|
+
diff = set(self.missing) - set(self.values)
|
|
74
|
+
warnings.warn(f"Missing dates {len(diff)=} not in list.")
|
|
71
75
|
|
|
72
76
|
@classmethod
|
|
73
77
|
def from_config(cls, **kwargs):
|
|
78
|
+
|
|
79
|
+
if kwargs.pop("hindcasts", False):
|
|
80
|
+
return HindcastsDates(**kwargs)
|
|
81
|
+
|
|
74
82
|
if "values" in kwargs:
|
|
75
83
|
return ValuesDates(**kwargs)
|
|
84
|
+
|
|
76
85
|
return StartEndDates(**kwargs)
|
|
77
86
|
|
|
78
87
|
def __iter__(self):
|
|
@@ -89,7 +98,7 @@ class Dates:
|
|
|
89
98
|
return f"📅 {self.values[0]} ... {self.values[-1]}"
|
|
90
99
|
|
|
91
100
|
|
|
92
|
-
class ValuesDates(
|
|
101
|
+
class ValuesDates(DatesProvider):
|
|
93
102
|
def __init__(self, values, **kwargs):
|
|
94
103
|
self.values = sorted([as_datetime(_) for _ in values])
|
|
95
104
|
super().__init__(**kwargs)
|
|
@@ -101,8 +110,9 @@ class ValuesDates(Dates):
|
|
|
101
110
|
return {"values": self.values[0]}
|
|
102
111
|
|
|
103
112
|
|
|
104
|
-
class StartEndDates(
|
|
105
|
-
def __init__(self, start, end, frequency=1,
|
|
113
|
+
class StartEndDates(DatesProvider):
|
|
114
|
+
def __init__(self, start, end, frequency=1, **kwargs):
|
|
115
|
+
|
|
106
116
|
frequency = frequency_to_timedelta(frequency)
|
|
107
117
|
assert isinstance(frequency, datetime.timedelta), frequency
|
|
108
118
|
|
|
@@ -123,35 +133,108 @@ class StartEndDates(Dates):
|
|
|
123
133
|
start = as_datetime(start)
|
|
124
134
|
end = as_datetime(end)
|
|
125
135
|
|
|
126
|
-
# if end <= start:
|
|
127
|
-
# raise ValueError(f"End date {end} must be after start date {start}")
|
|
128
|
-
|
|
129
|
-
increment = frequency
|
|
130
|
-
|
|
131
136
|
self.start = start
|
|
132
137
|
self.end = end
|
|
133
138
|
self.frequency = frequency
|
|
134
139
|
|
|
135
|
-
|
|
136
|
-
self.values = []
|
|
137
|
-
while date <= end:
|
|
140
|
+
missing = kwargs.pop("missing", [])
|
|
138
141
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
date += increment
|
|
142
|
-
continue
|
|
142
|
+
self.values = list(DateTimes(start, end, increment=frequency, **kwargs))
|
|
143
|
+
self.kwargs = kwargs
|
|
143
144
|
|
|
144
|
-
|
|
145
|
-
date += increment
|
|
146
|
-
|
|
147
|
-
super().__init__(**kwargs)
|
|
145
|
+
super().__init__(missing=missing)
|
|
148
146
|
|
|
149
147
|
def as_dict(self):
|
|
150
148
|
return {
|
|
151
149
|
"start": self.start.isoformat(),
|
|
152
150
|
"end": self.end.isoformat(),
|
|
153
|
-
"frequency":
|
|
154
|
-
}
|
|
151
|
+
"frequency": frequency_to_string(self.frequency),
|
|
152
|
+
}.update(self.kwargs)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
class Hindcast:
|
|
156
|
+
|
|
157
|
+
def __init__(self, date, refdate, hdate, step):
|
|
158
|
+
self.date = date
|
|
159
|
+
self.refdate = refdate
|
|
160
|
+
self.hdate = hdate
|
|
161
|
+
self.step = step
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
class HindcastsDates(DatesProvider):
|
|
165
|
+
def __init__(self, start, end, steps=[0], years=20, **kwargs):
|
|
166
|
+
|
|
167
|
+
if not isinstance(start, list):
|
|
168
|
+
start = [start]
|
|
169
|
+
end = [end]
|
|
170
|
+
|
|
171
|
+
reference_dates = []
|
|
172
|
+
for s, e in zip(start, end):
|
|
173
|
+
reference_dates.extend(list(DateTimes(s, e, increment=24, **kwargs)))
|
|
174
|
+
# reference_dates = list(DateTimes(start, end, increment=24, **kwargs))
|
|
175
|
+
dates = []
|
|
176
|
+
|
|
177
|
+
seen = {}
|
|
178
|
+
|
|
179
|
+
for hdate, refdate in HindcastDatesTimes(reference_dates=reference_dates, years=years):
|
|
180
|
+
assert refdate - hdate >= datetime.timedelta(days=365), (refdate - hdate, refdate, hdate)
|
|
181
|
+
for step in steps:
|
|
182
|
+
|
|
183
|
+
date = hdate + datetime.timedelta(hours=step)
|
|
184
|
+
|
|
185
|
+
if date in seen:
|
|
186
|
+
raise ValueError(f"Duplicate date {date}={hdate}+{step} for {refdate} and {seen[date]}")
|
|
187
|
+
|
|
188
|
+
seen[date] = Hindcast(date, refdate, hdate, step)
|
|
189
|
+
|
|
190
|
+
assert refdate - date > datetime.timedelta(days=360), (refdate - date, refdate, date, hdate, step)
|
|
191
|
+
|
|
192
|
+
dates.append(date)
|
|
193
|
+
|
|
194
|
+
dates = sorted(dates)
|
|
195
|
+
|
|
196
|
+
mindelta = None
|
|
197
|
+
for a, b in zip(dates, dates[1:]):
|
|
198
|
+
delta = b - a
|
|
199
|
+
assert isinstance(delta, datetime.timedelta), delta
|
|
200
|
+
if mindelta is None:
|
|
201
|
+
mindelta = delta
|
|
202
|
+
else:
|
|
203
|
+
mindelta = min(mindelta, delta)
|
|
204
|
+
|
|
205
|
+
self.frequency = mindelta
|
|
206
|
+
assert mindelta.total_seconds() > 0, mindelta
|
|
207
|
+
|
|
208
|
+
print("🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥", dates[0], dates[-1], mindelta)
|
|
209
|
+
|
|
210
|
+
# Use all values between start and end by frequency, and set the ones that are missing
|
|
211
|
+
self.values = []
|
|
212
|
+
missing = []
|
|
213
|
+
date = dates[0]
|
|
214
|
+
last = date
|
|
215
|
+
print("------", date, dates[-1])
|
|
216
|
+
dateset = set(dates)
|
|
217
|
+
while date <= dates[-1]:
|
|
218
|
+
self.values.append(date)
|
|
219
|
+
if date not in dateset:
|
|
220
|
+
missing.append(date)
|
|
221
|
+
seen[date] = seen[last]
|
|
222
|
+
else:
|
|
223
|
+
last = date
|
|
224
|
+
date = date + mindelta
|
|
225
|
+
|
|
226
|
+
self.mapping = seen
|
|
227
|
+
|
|
228
|
+
print("🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥", self.values[0], self.values[-1], mindelta)
|
|
229
|
+
print("🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥", f"{len(self.values)=} - {len(missing)=}")
|
|
230
|
+
|
|
231
|
+
super().__init__(missing=missing)
|
|
232
|
+
|
|
233
|
+
def __repr__(self):
|
|
234
|
+
return f"{self.__class__.__name__}({self.values[0]}..{self.values[-1]})"
|
|
235
|
+
|
|
236
|
+
def as_dict(self):
|
|
237
|
+
return {"hindcasts": self.hindcasts}
|
|
155
238
|
|
|
156
239
|
|
|
157
240
|
if __name__ == "__main__":
|
anemoi/datasets/dates/groups.py
CHANGED
|
@@ -7,11 +7,42 @@
|
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
import itertools
|
|
10
|
+
from functools import cached_property
|
|
10
11
|
|
|
11
|
-
from anemoi.datasets.dates import
|
|
12
|
+
from anemoi.datasets.dates import DatesProvider
|
|
12
13
|
from anemoi.datasets.dates import as_datetime
|
|
13
14
|
|
|
14
15
|
|
|
16
|
+
def _shorten(dates):
|
|
17
|
+
if isinstance(dates, (list, tuple)):
|
|
18
|
+
dates = [d.isoformat() for d in dates]
|
|
19
|
+
if len(dates) > 5:
|
|
20
|
+
return f"{dates[0]}...{dates[-1]}"
|
|
21
|
+
return dates
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class GroupOfDates:
|
|
25
|
+
def __init__(self, dates, provider, partial_ok=False):
|
|
26
|
+
assert isinstance(provider, DatesProvider), type(provider)
|
|
27
|
+
assert isinstance(dates, list)
|
|
28
|
+
|
|
29
|
+
self.dates = dates
|
|
30
|
+
self.provider = provider
|
|
31
|
+
self.partial_ok = partial_ok
|
|
32
|
+
|
|
33
|
+
def __len__(self):
|
|
34
|
+
return len(self.dates)
|
|
35
|
+
|
|
36
|
+
def __iter__(self):
|
|
37
|
+
return iter(self.dates)
|
|
38
|
+
|
|
39
|
+
def __repr__(self) -> str:
|
|
40
|
+
return f"GroupOfDates(dates={_shorten(self.dates)})"
|
|
41
|
+
|
|
42
|
+
def __eq__(self, other: object) -> bool:
|
|
43
|
+
return isinstance(other, GroupOfDates) and self.dates == other.dates
|
|
44
|
+
|
|
45
|
+
|
|
15
46
|
class Groups:
|
|
16
47
|
""">>> list(Groups(group_by="daily", start="2023-01-01 00:00", end="2023-01-05 00:00", frequency=12))[0]
|
|
17
48
|
[datetime.datetime(2023, 1, 1, 0, 0), datetime.datetime(2023, 1, 1, 12, 0)]
|
|
@@ -41,33 +72,48 @@ class Groups:
|
|
|
41
72
|
|
|
42
73
|
def __init__(self, **kwargs):
|
|
43
74
|
group_by = kwargs.pop("group_by")
|
|
44
|
-
self.
|
|
45
|
-
self.
|
|
46
|
-
self.
|
|
75
|
+
self._dates = DatesProvider.from_config(**kwargs)
|
|
76
|
+
self._grouper = Grouper.from_config(group_by)
|
|
77
|
+
self._filter = Filter(self._dates.missing)
|
|
78
|
+
|
|
79
|
+
@property
|
|
80
|
+
def provider(self):
|
|
81
|
+
return self._dates
|
|
47
82
|
|
|
48
83
|
def __iter__(self):
|
|
49
|
-
for
|
|
50
|
-
dates = self.
|
|
84
|
+
for go in self._grouper(self._dates):
|
|
85
|
+
dates = self._filter(go.dates)
|
|
51
86
|
if not dates:
|
|
52
87
|
continue
|
|
53
|
-
yield dates
|
|
88
|
+
yield GroupOfDates(dates, go.provider)
|
|
54
89
|
|
|
55
90
|
def __len__(self):
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
91
|
+
return self._len
|
|
92
|
+
|
|
93
|
+
@cached_property
|
|
94
|
+
def _len(self):
|
|
95
|
+
n = 0
|
|
96
|
+
for go in self._grouper(self._dates):
|
|
97
|
+
dates = self._filter(go.dates)
|
|
59
98
|
if not dates:
|
|
60
99
|
continue
|
|
61
|
-
|
|
62
|
-
return
|
|
100
|
+
n += 1
|
|
101
|
+
return n
|
|
63
102
|
|
|
64
103
|
def __repr__(self):
|
|
65
|
-
return f"{self.__class__.__name__}(dates={len(self)})"
|
|
104
|
+
return f"{self.__class__.__name__}(dates={len(self)},{_shorten(self._dates)})"
|
|
105
|
+
|
|
106
|
+
def describe(self):
|
|
107
|
+
return self.dates.summary
|
|
108
|
+
|
|
109
|
+
def one_date(self):
|
|
110
|
+
go = next(iter(self))
|
|
111
|
+
return GroupOfDates([go.dates[0]], go.provider)
|
|
66
112
|
|
|
67
113
|
|
|
68
114
|
class Filter:
|
|
69
115
|
def __init__(self, missing):
|
|
70
|
-
self.missing =
|
|
116
|
+
self.missing = set(as_datetime(m) for m in missing)
|
|
71
117
|
|
|
72
118
|
def __call__(self, dates):
|
|
73
119
|
return [d for d in dates if d not in self.missing]
|
|
@@ -76,10 +122,16 @@ class Filter:
|
|
|
76
122
|
class Grouper:
|
|
77
123
|
@classmethod
|
|
78
124
|
def from_config(cls, group_by):
|
|
125
|
+
|
|
79
126
|
if isinstance(group_by, int) and group_by > 0:
|
|
80
127
|
return GrouperByFixedSize(group_by)
|
|
128
|
+
|
|
81
129
|
if group_by is None:
|
|
82
130
|
return GrouperOneGroup()
|
|
131
|
+
|
|
132
|
+
if group_by == "reference_date":
|
|
133
|
+
return ReferenceDateGroup()
|
|
134
|
+
|
|
83
135
|
key = {
|
|
84
136
|
"monthly": lambda dt: (dt.year, dt.month),
|
|
85
137
|
"daily": lambda dt: (dt.year, dt.month, dt.day),
|
|
@@ -89,30 +141,51 @@ class Grouper:
|
|
|
89
141
|
return GrouperByKey(key)
|
|
90
142
|
|
|
91
143
|
|
|
144
|
+
class ReferenceDateGroup(Grouper):
|
|
145
|
+
def __call__(self, dates):
|
|
146
|
+
assert isinstance(dates, DatesProvider), type(dates)
|
|
147
|
+
|
|
148
|
+
mapping = dates.mapping
|
|
149
|
+
|
|
150
|
+
def same_refdate(dt):
|
|
151
|
+
return mapping[dt].refdate
|
|
152
|
+
|
|
153
|
+
for _, g in itertools.groupby(sorted(dates, key=same_refdate), key=same_refdate):
|
|
154
|
+
yield GroupOfDates(list(g), dates)
|
|
155
|
+
|
|
156
|
+
|
|
92
157
|
class GrouperOneGroup(Grouper):
|
|
93
158
|
def __call__(self, dates):
|
|
94
|
-
|
|
159
|
+
assert isinstance(dates, DatesProvider), type(dates)
|
|
160
|
+
|
|
161
|
+
yield GroupOfDates(dates.values, dates)
|
|
95
162
|
|
|
96
163
|
|
|
97
164
|
class GrouperByKey(Grouper):
|
|
165
|
+
"""Group dates by a key."""
|
|
166
|
+
|
|
98
167
|
def __init__(self, key):
|
|
99
168
|
self.key = key
|
|
100
169
|
|
|
101
170
|
def __call__(self, dates):
|
|
102
|
-
for _, g in itertools.groupby(dates, key=self.key):
|
|
103
|
-
yield list(g)
|
|
171
|
+
for _, g in itertools.groupby(sorted(dates, key=self.key), key=self.key):
|
|
172
|
+
yield GroupOfDates(list(g), dates)
|
|
104
173
|
|
|
105
174
|
|
|
106
175
|
class GrouperByFixedSize(Grouper):
|
|
176
|
+
"""Group dates by a fixed size."""
|
|
177
|
+
|
|
107
178
|
def __init__(self, size):
|
|
108
179
|
self.size = size
|
|
109
180
|
|
|
110
181
|
def __call__(self, dates):
|
|
111
182
|
batch = []
|
|
183
|
+
|
|
112
184
|
for d in dates:
|
|
113
185
|
batch.append(d)
|
|
114
186
|
if len(batch) == self.size:
|
|
115
|
-
yield batch
|
|
187
|
+
yield GroupOfDates(batch, dates)
|
|
116
188
|
batch = []
|
|
189
|
+
|
|
117
190
|
if batch:
|
|
118
|
-
yield batch
|
|
191
|
+
yield GroupOfDates(batch, dates)
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
|
|
2
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
3
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
4
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
5
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
6
|
+
# nor does it submit to any jurisdiction.
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
from earthkit.data.indexing.fieldlist import FieldArray
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def list_to_fieldlist(fields):
|
|
13
|
+
return FieldArray(fields)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def empty_fieldlist():
|
|
17
|
+
return FieldArray([])
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class WrappedField:
|
|
21
|
+
def __init__(self, field):
|
|
22
|
+
self._field = field
|
|
23
|
+
|
|
24
|
+
def __getattr__(self, name):
|
|
25
|
+
return getattr(self._field, name)
|
|
26
|
+
|
|
27
|
+
def __repr__(self) -> str:
|
|
28
|
+
return repr(self._field)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class NewDataField(WrappedField):
|
|
32
|
+
def __init__(self, field, data):
|
|
33
|
+
super().__init__(field)
|
|
34
|
+
self._data = data
|
|
35
|
+
self.shape = data.shape
|
|
36
|
+
|
|
37
|
+
def to_numpy(self, flatten=False, dtype=None, index=None):
|
|
38
|
+
data = self._data
|
|
39
|
+
if dtype is not None:
|
|
40
|
+
data = data.astype(dtype)
|
|
41
|
+
if flatten:
|
|
42
|
+
data = data.flatten()
|
|
43
|
+
if index is not None:
|
|
44
|
+
data = data[index]
|
|
45
|
+
return data
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class NewMetadataField(WrappedField):
|
|
49
|
+
def __init__(self, field, **kwargs):
|
|
50
|
+
super().__init__(field)
|
|
51
|
+
self._metadata = kwargs
|
|
52
|
+
|
|
53
|
+
def metadata(self, *args, **kwargs):
|
|
54
|
+
if len(args) == 1 and args[0] in self._metadata:
|
|
55
|
+
return self._metadata[args[0]]
|
|
56
|
+
return self._field.metadata(*args, **kwargs)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class NewValidDateTimeField(NewMetadataField):
|
|
60
|
+
def __init__(self, field, valid_datetime):
|
|
61
|
+
date = valid_datetime.date().strftime("%Y%m%d")
|
|
62
|
+
time = valid_datetime.time().strftime("%H%M")
|
|
63
|
+
|
|
64
|
+
self.valid_datetime = valid_datetime
|
|
65
|
+
|
|
66
|
+
super().__init__(field, date=date, time=time, step=0, valid_datetime=valid_datetime.isoformat())
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# (C) Copyright 2024 ECMWF.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
6
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
7
|
+
# nor does it submit to any jurisdiction.
|
|
8
|
+
#
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class WrappedField:
|
|
12
|
+
def __init__(self, field):
|
|
13
|
+
self._field = field
|
|
14
|
+
|
|
15
|
+
def __getattr__(self, name):
|
|
16
|
+
return getattr(self._field, name)
|
|
17
|
+
|
|
18
|
+
def __repr__(self) -> str:
|
|
19
|
+
return repr(self._field)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class NewDataField(WrappedField):
|
|
23
|
+
def __init__(self, field, data):
|
|
24
|
+
super().__init__(field)
|
|
25
|
+
self._data = data
|
|
26
|
+
self.shape = data.shape
|
|
27
|
+
|
|
28
|
+
def to_numpy(self, flatten=False, dtype=None, index=None):
|
|
29
|
+
data = self._data
|
|
30
|
+
if dtype is not None:
|
|
31
|
+
data = data.astype(dtype)
|
|
32
|
+
if flatten:
|
|
33
|
+
data = data.flatten()
|
|
34
|
+
if index is not None:
|
|
35
|
+
data = data[index]
|
|
36
|
+
return data
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class NewMetadataField(WrappedField):
|
|
40
|
+
def __init__(self, field, **kwargs):
|
|
41
|
+
super().__init__(field)
|
|
42
|
+
self._metadata = kwargs
|
|
43
|
+
|
|
44
|
+
def metadata(self, *args, **kwargs):
|
|
45
|
+
if len(args) == 1 and args[0] in self._metadata:
|
|
46
|
+
return self._metadata[args[0]]
|
|
47
|
+
return self._field.metadata(*args, **kwargs)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: anemoi-datasets
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.5
|
|
4
4
|
Summary: A package to hold various functions to support training of ML models on ECMWF data.
|
|
5
5
|
Author-email: "European Centre for Medium-Range Weather Forecasts (ECMWF)" <software.support@ecmwf.int>
|
|
6
6
|
License: Apache License
|
|
@@ -224,38 +224,36 @@ Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
|
224
224
|
Requires-Python: >=3.9
|
|
225
225
|
License-File: LICENSE
|
|
226
226
|
Requires-Dist: anemoi-utils[provenance] >=0.3.15
|
|
227
|
+
Requires-Dist: cfunits
|
|
227
228
|
Requires-Dist: numpy
|
|
228
229
|
Requires-Dist: pyyaml
|
|
229
230
|
Requires-Dist: semantic-version
|
|
230
231
|
Requires-Dist: tqdm
|
|
231
|
-
Requires-Dist: zarr
|
|
232
|
+
Requires-Dist: zarr <=2.17
|
|
232
233
|
Provides-Extra: all
|
|
233
|
-
Requires-Dist: aiohttp ; extra == 'all'
|
|
234
234
|
Requires-Dist: boto3 ; extra == 'all'
|
|
235
235
|
Requires-Dist: earthkit-data[mars] >=0.9 ; extra == 'all'
|
|
236
236
|
Requires-Dist: earthkit-geo >=0.2 ; extra == 'all'
|
|
237
237
|
Requires-Dist: earthkit-meteo ; extra == 'all'
|
|
238
|
-
Requires-Dist:
|
|
238
|
+
Requires-Dist: ecmwflibs >=0.6.3 ; extra == 'all'
|
|
239
239
|
Requires-Dist: entrypoints ; extra == 'all'
|
|
240
240
|
Requires-Dist: gcsfs ; extra == 'all'
|
|
241
241
|
Requires-Dist: kerchunk ; extra == 'all'
|
|
242
242
|
Requires-Dist: pyproj ; extra == 'all'
|
|
243
243
|
Requires-Dist: requests ; extra == 'all'
|
|
244
|
-
Requires-Dist: s3fs ; extra == 'all'
|
|
245
244
|
Provides-Extra: create
|
|
246
245
|
Requires-Dist: earthkit-data[mars] >=0.9 ; extra == 'create'
|
|
247
246
|
Requires-Dist: earthkit-geo >=0.2 ; extra == 'create'
|
|
248
247
|
Requires-Dist: earthkit-meteo ; extra == 'create'
|
|
249
|
-
Requires-Dist:
|
|
248
|
+
Requires-Dist: ecmwflibs >=0.6.3 ; extra == 'create'
|
|
250
249
|
Requires-Dist: entrypoints ; extra == 'create'
|
|
251
250
|
Requires-Dist: pyproj ; extra == 'create'
|
|
252
251
|
Provides-Extra: dev
|
|
253
|
-
Requires-Dist: aiohttp ; extra == 'dev'
|
|
254
252
|
Requires-Dist: boto3 ; extra == 'dev'
|
|
255
253
|
Requires-Dist: earthkit-data[mars] >=0.9 ; extra == 'dev'
|
|
256
254
|
Requires-Dist: earthkit-geo >=0.2 ; extra == 'dev'
|
|
257
255
|
Requires-Dist: earthkit-meteo ; extra == 'dev'
|
|
258
|
-
Requires-Dist:
|
|
256
|
+
Requires-Dist: ecmwflibs >=0.6.3 ; extra == 'dev'
|
|
259
257
|
Requires-Dist: entrypoints ; extra == 'dev'
|
|
260
258
|
Requires-Dist: gcsfs ; extra == 'dev'
|
|
261
259
|
Requires-Dist: kerchunk ; extra == 'dev'
|
|
@@ -264,27 +262,20 @@ Requires-Dist: pandoc ; extra == 'dev'
|
|
|
264
262
|
Requires-Dist: pyproj ; extra == 'dev'
|
|
265
263
|
Requires-Dist: pytest ; extra == 'dev'
|
|
266
264
|
Requires-Dist: requests ; extra == 'dev'
|
|
267
|
-
Requires-Dist: rstfmt ; extra == 'dev'
|
|
268
|
-
Requires-Dist: s3fs ; extra == 'dev'
|
|
269
265
|
Requires-Dist: sphinx ; extra == 'dev'
|
|
270
|
-
Requires-Dist: sphinx-argparse <0.5 ; extra == 'dev'
|
|
271
266
|
Requires-Dist: sphinx-rtd-theme ; extra == 'dev'
|
|
272
267
|
Provides-Extra: docs
|
|
273
268
|
Requires-Dist: nbsphinx ; extra == 'docs'
|
|
274
269
|
Requires-Dist: pandoc ; extra == 'docs'
|
|
275
|
-
Requires-Dist: rstfmt ; extra == 'docs'
|
|
276
270
|
Requires-Dist: sphinx ; extra == 'docs'
|
|
277
|
-
Requires-Dist: sphinx-argparse
|
|
271
|
+
Requires-Dist: sphinx-argparse ; extra == 'docs'
|
|
278
272
|
Requires-Dist: sphinx-rtd-theme ; extra == 'docs'
|
|
279
|
-
Provides-Extra: kerchunk
|
|
280
|
-
Requires-Dist: gcsfs ; extra == 'kerchunk'
|
|
281
|
-
Requires-Dist: kerchunk ; extra == 'kerchunk'
|
|
282
|
-
Requires-Dist: s3fs ; extra == 'kerchunk'
|
|
283
273
|
Provides-Extra: remote
|
|
284
|
-
Requires-Dist: aiohttp ; extra == 'remote'
|
|
285
274
|
Requires-Dist: boto3 ; extra == 'remote'
|
|
286
275
|
Requires-Dist: requests ; extra == 'remote'
|
|
287
|
-
Requires-Dist: s3fs ; extra == 'remote'
|
|
288
276
|
Provides-Extra: tests
|
|
289
277
|
Requires-Dist: pytest ; extra == 'tests'
|
|
278
|
+
Provides-Extra: xarray
|
|
279
|
+
Requires-Dist: gcsfs ; extra == 'xarray'
|
|
280
|
+
Requires-Dist: kerchunk ; extra == 'xarray'
|
|
290
281
|
|