anemoi-datasets 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. anemoi/datasets/_version.py +2 -2
  2. anemoi/datasets/commands/cleanup.py +44 -0
  3. anemoi/datasets/commands/create.py +52 -21
  4. anemoi/datasets/commands/finalise-additions.py +45 -0
  5. anemoi/datasets/commands/finalise.py +39 -0
  6. anemoi/datasets/commands/init-additions.py +45 -0
  7. anemoi/datasets/commands/init.py +67 -0
  8. anemoi/datasets/commands/inspect.py +1 -1
  9. anemoi/datasets/commands/load-additions.py +47 -0
  10. anemoi/datasets/commands/load.py +47 -0
  11. anemoi/datasets/commands/patch.py +39 -0
  12. anemoi/datasets/create/__init__.py +959 -146
  13. anemoi/datasets/create/check.py +5 -3
  14. anemoi/datasets/create/config.py +54 -2
  15. anemoi/datasets/create/functions/filters/pressure_level_relative_humidity_to_specific_humidity.py +57 -0
  16. anemoi/datasets/create/functions/filters/pressure_level_specific_humidity_to_relative_humidity.py +57 -0
  17. anemoi/datasets/create/functions/filters/single_level_dewpoint_to_relative_humidity.py +54 -0
  18. anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_dewpoint.py +59 -0
  19. anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_specific_humidity.py +115 -0
  20. anemoi/datasets/create/functions/filters/single_level_specific_humidity_to_relative_humidity.py +390 -0
  21. anemoi/datasets/create/functions/filters/speeddir_to_uv.py +77 -0
  22. anemoi/datasets/create/functions/filters/uv_to_speeddir.py +55 -0
  23. anemoi/datasets/create/functions/sources/grib.py +86 -1
  24. anemoi/datasets/create/functions/sources/hindcasts.py +14 -73
  25. anemoi/datasets/create/functions/sources/mars.py +9 -3
  26. anemoi/datasets/create/functions/sources/xarray/__init__.py +12 -2
  27. anemoi/datasets/create/functions/sources/xarray/coordinates.py +7 -0
  28. anemoi/datasets/create/functions/sources/xarray/field.py +8 -2
  29. anemoi/datasets/create/functions/sources/xarray/fieldlist.py +0 -2
  30. anemoi/datasets/create/functions/sources/xarray/flavour.py +21 -1
  31. anemoi/datasets/create/functions/sources/xarray/metadata.py +40 -40
  32. anemoi/datasets/create/functions/sources/xarray/time.py +63 -30
  33. anemoi/datasets/create/functions/sources/xarray/variable.py +15 -38
  34. anemoi/datasets/create/input.py +62 -39
  35. anemoi/datasets/create/persistent.py +1 -1
  36. anemoi/datasets/create/statistics/__init__.py +39 -23
  37. anemoi/datasets/create/utils.py +6 -2
  38. anemoi/datasets/data/__init__.py +1 -0
  39. anemoi/datasets/data/concat.py +46 -2
  40. anemoi/datasets/data/dataset.py +119 -34
  41. anemoi/datasets/data/debug.py +5 -1
  42. anemoi/datasets/data/forwards.py +17 -8
  43. anemoi/datasets/data/grids.py +17 -3
  44. anemoi/datasets/data/interpolate.py +133 -0
  45. anemoi/datasets/data/masked.py +2 -2
  46. anemoi/datasets/data/misc.py +56 -66
  47. anemoi/datasets/data/missing.py +240 -0
  48. anemoi/datasets/data/rescale.py +147 -0
  49. anemoi/datasets/data/select.py +7 -1
  50. anemoi/datasets/data/stores.py +23 -10
  51. anemoi/datasets/data/subset.py +47 -5
  52. anemoi/datasets/data/unchecked.py +20 -22
  53. anemoi/datasets/data/xy.py +125 -0
  54. anemoi/datasets/dates/__init__.py +124 -95
  55. anemoi/datasets/dates/groups.py +85 -20
  56. anemoi/datasets/grids.py +66 -48
  57. {anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.5.0.dist-info}/METADATA +8 -17
  58. anemoi_datasets-0.5.0.dist-info/RECORD +105 -0
  59. {anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.5.0.dist-info}/WHEEL +1 -1
  60. anemoi/datasets/create/loaders.py +0 -936
  61. anemoi_datasets-0.4.4.dist-info/RECORD +0 -86
  62. {anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.5.0.dist-info}/LICENSE +0 -0
  63. {anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.5.0.dist-info}/entry_points.txt +0 -0
  64. {anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.5.0.dist-info}/top_level.txt +0 -0
@@ -5,6 +5,7 @@
5
5
  # granted to it by virtue of its status as an intergovernmental organisation
6
6
  # nor does it submit to any jurisdiction.
7
7
 
8
+
8
9
  import logging
9
10
  import os
10
11
  import warnings
@@ -13,6 +14,7 @@ from urllib.parse import urlparse
13
14
 
14
15
  import numpy as np
15
16
  import zarr
17
+ from anemoi.utils.dates import frequency_to_timedelta
16
18
 
17
19
  from . import MissingDateError
18
20
  from .dataset import Dataset
@@ -82,6 +84,8 @@ class S3Store(ReadOnlyStore):
82
84
 
83
85
 
84
86
  class DebugStore(ReadOnlyStore):
87
+ """A store to debug the zarr loading."""
88
+
85
89
  def __init__(self, store):
86
90
  assert not isinstance(store, DebugStore)
87
91
  self.store = store
@@ -147,6 +151,8 @@ def open_zarr(path, dont_fail=False, cache=None):
147
151
 
148
152
 
149
153
  class Zarr(Dataset):
154
+ """A zarr dataset."""
155
+
150
156
  def __init__(self, path):
151
157
  if isinstance(path, zarr.hierarchy.Group):
152
158
  self.was_zarr = True
@@ -243,14 +249,20 @@ class Zarr(Dataset):
243
249
  delta = self.frequency
244
250
  if isinstance(delta, int):
245
251
  delta = f"{delta}h"
246
- from anemoi.datasets.create.loaders import TendenciesStatisticsAddition
252
+ from anemoi.utils.dates import frequency_to_string
253
+ from anemoi.utils.dates import frequency_to_timedelta
254
+
255
+ delta = frequency_to_timedelta(delta)
256
+ delta = frequency_to_string(delta)
257
+
258
+ def func(k):
259
+ return f"statistics_tendencies_{delta}_{k}"
247
260
 
248
- func = TendenciesStatisticsAddition.final_storage_name_from_delta
249
261
  return dict(
250
- mean=self.z[func("mean", delta)][:],
251
- stdev=self.z[func("stdev", delta)][:],
252
- maximum=self.z[func("maximum", delta)][:],
253
- minimum=self.z[func("minimum", delta)][:],
262
+ mean=self.z[func("mean")][:],
263
+ stdev=self.z[func("stdev")][:],
264
+ maximum=self.z[func("maximum")][:],
265
+ minimum=self.z[func("minimum")][:],
254
266
  )
255
267
 
256
268
  @property
@@ -268,12 +280,11 @@ class Zarr(Dataset):
268
280
  @property
269
281
  def frequency(self):
270
282
  try:
271
- return self.z.attrs["frequency"]
283
+ return frequency_to_timedelta(self.z.attrs["frequency"])
272
284
  except KeyError:
273
285
  LOG.warning("No 'frequency' in %r, computing from 'dates'", self)
274
286
  dates = self.dates
275
- delta = dates[1].astype(object) - dates[0].astype(object)
276
- return int(delta.total_seconds() / 3600)
287
+ return dates[1].astype(object) - dates[0].astype(object)
277
288
 
278
289
  @property
279
290
  def name_to_index(self):
@@ -322,11 +333,13 @@ class Zarr(Dataset):
322
333
 
323
334
 
324
335
  class ZarrWithMissingDates(Zarr):
336
+ """A zarr dataset with missing dates."""
337
+
325
338
  def __init__(self, path):
326
339
  super().__init__(path)
327
340
 
328
341
  missing_dates = self.z.attrs.get("missing_dates", [])
329
- missing_dates = [np.datetime64(x) for x in missing_dates]
342
+ missing_dates = set([np.datetime64(x) for x in missing_dates])
330
343
  self.missing_to_dates = {i: d for i, d in enumerate(self.dates) if d in missing_dates}
331
344
  self.missing = set(self.missing_to_dates)
332
345
 
@@ -9,6 +9,7 @@ import logging
9
9
  from functools import cached_property
10
10
 
11
11
  import numpy as np
12
+ from anemoi.utils.dates import frequency_to_timedelta
12
13
 
13
14
  from .debug import Node
14
15
  from .debug import Source
@@ -23,13 +24,51 @@ from .indexing import update_tuple
23
24
  LOG = logging.getLogger(__name__)
24
25
 
25
26
 
27
+ def _default(a, b, dates):
28
+ return [a, b]
29
+
30
+
31
+ def _start(a, b, dates):
32
+ from .misc import as_first_date
33
+
34
+ c = as_first_date(a, dates)
35
+ d = as_first_date(b, dates)
36
+ if c < d:
37
+ return b
38
+ else:
39
+ return a
40
+
41
+
42
+ def _end(a, b, dates):
43
+ from .misc import as_last_date
44
+
45
+ c = as_last_date(a, dates)
46
+ d = as_last_date(b, dates)
47
+ if c < d:
48
+ return a
49
+ else:
50
+ return b
51
+
52
+
53
+ def _combine_reasons(reason1, reason2, dates):
54
+
55
+ reason = reason1.copy()
56
+ for k, v in reason2.items():
57
+ if k not in reason:
58
+ reason[k] = v
59
+ else:
60
+ func = globals().get(f"_{k}", _default)
61
+ reason[k] = func(reason[k], v, dates)
62
+ return reason
63
+
64
+
26
65
  class Subset(Forwards):
27
66
  """Select a subset of the dates."""
28
67
 
29
68
  def __init__(self, dataset, indices, reason):
30
69
  while isinstance(dataset, Subset):
31
70
  indices = [dataset.indices[i] for i in indices]
32
- reason = {**reason, **dataset.reason}
71
+ reason = _combine_reasons(reason, dataset.reason, dataset.dates)
33
72
  dataset = dataset.dataset
34
73
 
35
74
  self.dataset = dataset
@@ -39,6 +78,12 @@ class Subset(Forwards):
39
78
  # Forward other properties to the super dataset
40
79
  super().__init__(dataset)
41
80
 
81
+ def clone(self, dataset):
82
+ return self.__class__(dataset, self.indices, self.reason).mutate()
83
+
84
+ def mutate(self):
85
+ return self.forward.swap_with_parent(parent=self)
86
+
42
87
  @debug_indexing
43
88
  def __getitem__(self, n):
44
89
  if isinstance(n, tuple):
@@ -66,10 +111,8 @@ class Subset(Forwards):
66
111
  @expand_list_indexing
67
112
  def _get_tuple(self, n):
68
113
  index, changes = index_to_slices(n, self.shape)
69
- # print('INDEX', index, changes)
70
114
  indices = [self.indices[i] for i in range(*index[0].indices(self._len))]
71
115
  indices = make_slice_or_index_from_list_or_tuple(indices)
72
- # print('INDICES', indices)
73
116
  index, _ = update_tuple(index, 0, indices)
74
117
  result = self.dataset[index]
75
118
  result = apply_index_to_slices_changes(result, changes)
@@ -89,8 +132,7 @@ class Subset(Forwards):
89
132
  @cached_property
90
133
  def frequency(self):
91
134
  dates = self.dates
92
- delta = dates[1].astype(object) - dates[0].astype(object)
93
- return int(delta.total_seconds() / 3600)
135
+ return frequency_to_timedelta(dates[1].astype(object) - dates[0].astype(object))
94
136
 
95
137
  def source(self, index):
96
138
  return Source(self, index, self.forward.source(index))
@@ -104,22 +104,29 @@ class Unchecked(Combined):
104
104
  def shape(self):
105
105
  raise NotImplementedError()
106
106
 
107
- @property
108
- def dtype(self):
109
- raise NotImplementedError()
107
+ # @property
108
+ # def field_shape(self):
109
+ # return tuple(d.shape for d in self.datasets)
110
110
 
111
- @property
112
- def grids(self):
113
- raise NotImplementedError()
111
+ # @property
112
+ # def latitudes(self):
113
+ # return tuple(d.latitudes for d in self.datasets)
114
114
 
115
+ # @property
116
+ # def longitudes(self):
117
+ # return tuple(d.longitudes for d in self.datasets)
115
118
 
116
- class Zip(Unchecked):
119
+ # @property
120
+ # def statistics(self):
121
+ # return tuple(d.statistics for d in self.datasets)
117
122
 
118
- def __len__(self):
119
- return min(len(d) for d in self.datasets)
123
+ # @property
124
+ # def resolution(self):
125
+ # return tuple(d.resolution for d in self.datasets)
120
126
 
121
- def __getitem__(self, n):
122
- return tuple(d[n] for d in self.datasets)
127
+ # @property
128
+ # def name_to_index(self):
129
+ # return tuple(d.name_to_index for d in self.datasets)
123
130
 
124
131
  @cached_property
125
132
  def missing(self):
@@ -142,17 +149,8 @@ class Chain(ConcatMixin, Unchecked):
142
149
  def dates(self):
143
150
  raise NotImplementedError()
144
151
 
145
-
146
- def zip_factory(args, kwargs):
147
-
148
- zip = kwargs.pop("zip")
149
- assert len(args) == 0
150
- assert isinstance(zip, (list, tuple))
151
-
152
- datasets = [_open(e) for e in zip]
153
- datasets, kwargs = _auto_adjust(datasets, kwargs)
154
-
155
- return Zip(datasets)._subset(**kwargs)
152
+ def dataset_metadata(self):
153
+ return {"multiple": [d.dataset_metadata() for d in self.datasets]}
156
154
 
157
155
 
158
156
  def chain_factory(args, kwargs):
@@ -0,0 +1,125 @@
1
+ # (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
2
+ # This software is licensed under the terms of the Apache Licence Version 2.0
3
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
4
+ # In applying this licence, ECMWF does not waive the privileges and immunities
5
+ # granted to it by virtue of its status as an intergovernmental organisation
6
+ # nor does it submit to any jurisdiction.
7
+
8
+ import logging
9
+ from functools import cached_property
10
+
11
+ from .debug import Node
12
+ from .forwards import Combined
13
+ from .misc import _auto_adjust
14
+ from .misc import _open
15
+
16
+ LOG = logging.getLogger(__name__)
17
+
18
+
19
+ class ZipBase(Combined):
20
+
21
+ def swap_with_parent(self, parent):
22
+ new_parents = [parent.clone(ds) for ds in self.datasets]
23
+ return self.clone(new_parents)
24
+
25
+ def clone(self, datasets):
26
+ return self.__class__(datasets)
27
+
28
+ def tree(self):
29
+ return Node(self, [d.tree() for d in self.datasets])
30
+
31
+ def __len__(self):
32
+ return min(len(d) for d in self.datasets)
33
+
34
+ def __getitem__(self, n):
35
+ return tuple(d[n] for d in self.datasets)
36
+
37
+ def check_same_resolution(self, d1, d2):
38
+ pass
39
+
40
+ def check_same_grid(self, d1, d2):
41
+ pass
42
+
43
+ def check_same_variables(self, d1, d2):
44
+ pass
45
+
46
+ @cached_property
47
+ def missing(self):
48
+ result = set()
49
+ for d in self.datasets:
50
+ result = result | d.missing
51
+ return result
52
+
53
+ @property
54
+ def shape(self):
55
+ return tuple(d.shape for d in self.datasets)
56
+
57
+ @property
58
+ def field_shape(self):
59
+ return tuple(d.shape for d in self.datasets)
60
+
61
+ @property
62
+ def latitudes(self):
63
+ return tuple(d.latitudes for d in self.datasets)
64
+
65
+ @property
66
+ def longitudes(self):
67
+ return tuple(d.longitudes for d in self.datasets)
68
+
69
+ @property
70
+ def dtype(self):
71
+ return tuple(d.dtype for d in self.datasets)
72
+
73
+ @property
74
+ def grids(self):
75
+ return tuple(d.grids for d in self.datasets)
76
+
77
+ @property
78
+ def statistics(self):
79
+ return tuple(d.statistics for d in self.datasets)
80
+
81
+ @property
82
+ def resolution(self):
83
+ return tuple(d.resolution for d in self.datasets)
84
+
85
+ @property
86
+ def name_to_index(self):
87
+ return tuple(d.name_to_index for d in self.datasets)
88
+
89
+
90
+ class Zip(ZipBase):
91
+ pass
92
+
93
+
94
+ class XY(ZipBase):
95
+ pass
96
+
97
+
98
+ def xy_factory(args, kwargs):
99
+
100
+ if "xy" in kwargs:
101
+ xy = kwargs.pop("xy")
102
+ else:
103
+ xy = [kwargs.pop("x"), kwargs.pop("y")]
104
+
105
+ assert len(args) == 0
106
+ assert isinstance(xy, (list, tuple))
107
+
108
+ datasets = [_open(e) for e in xy]
109
+ datasets, kwargs = _auto_adjust(datasets, kwargs)
110
+
111
+ assert len(datasets) == 2
112
+
113
+ return XY(datasets)._subset(**kwargs)
114
+
115
+
116
+ def zip_factory(args, kwargs):
117
+
118
+ zip = kwargs.pop("zip")
119
+ assert len(args) == 0
120
+ assert isinstance(zip, (list, tuple))
121
+
122
+ datasets = [_open(e) for e in zip]
123
+ datasets, kwargs = _auto_adjust(datasets, kwargs)
124
+
125
+ return Zip(datasets)._subset(**kwargs)
@@ -9,64 +9,12 @@
9
9
  import datetime
10
10
  import warnings
11
11
 
12
+ # from anemoi.utils.dates import as_datetime
13
+ from anemoi.utils.dates import DateTimes
12
14
  from anemoi.utils.dates import as_datetime
13
-
14
-
15
- def _compress_dates(dates):
16
- dates = sorted(dates)
17
- if len(dates) < 3:
18
- yield dates
19
- return
20
-
21
- prev = first = dates.pop(0)
22
- curr = dates.pop(0)
23
- delta = curr - prev
24
- while curr - prev == delta:
25
- prev = curr
26
- if not dates:
27
- break
28
- curr = dates.pop(0)
29
-
30
- yield (first, prev, delta)
31
- if dates:
32
- yield from _compress_dates([curr] + dates)
33
-
34
-
35
- def compress_dates(dates):
36
- dates = [as_datetime(_) for _ in dates]
37
- result = []
38
-
39
- for n in _compress_dates(dates):
40
- if isinstance(n, list):
41
- result.extend([str(_) for _ in n])
42
- else:
43
- result.append(" ".join([str(n[0]), "to", str(n[1]), "by", str(n[2])]))
44
-
45
- return result
46
-
47
-
48
- def print_dates(dates):
49
- print(compress_dates(dates))
50
-
51
-
52
- def no_time_zone(date):
53
- return date.replace(tzinfo=None)
54
-
55
-
56
- def frequency_to_hours(frequency):
57
- if isinstance(frequency, int):
58
- return frequency
59
- assert isinstance(frequency, str), (type(frequency), frequency)
60
-
61
- unit = frequency[-1].lower()
62
- v = int(frequency[:-1])
63
- return {"h": v, "d": v * 24}[unit]
64
-
65
-
66
- def normalize_date(x):
67
- if isinstance(x, str):
68
- return no_time_zone(datetime.datetime.fromisoformat(x))
69
- return x
15
+ from anemoi.utils.dates import frequency_to_timedelta
16
+ from anemoi.utils.hindcasts import HindcastDatesTimes
17
+ from anemoi.utils.humanize import print_dates
70
18
 
71
19
 
72
20
  def extend(x):
@@ -79,37 +27,37 @@ def extend(x):
79
27
  if isinstance(x, str):
80
28
  if "/" in x:
81
29
  start, end, step = x.split("/")
82
- start = normalize_date(start)
83
- end = normalize_date(end)
84
- step = frequency_to_hours(step)
30
+ start = as_datetime(start)
31
+ end = as_datetime(end)
32
+ step = frequency_to_timedelta(step)
85
33
  while start <= end:
86
34
  yield start
87
- start += datetime.timedelta(hours=step)
35
+ start += step
88
36
  return
89
37
 
90
- yield normalize_date(x)
38
+ yield as_datetime(x)
91
39
 
92
40
 
93
- class Dates:
41
+ class DatesProvider:
94
42
  """Base class for date generation.
95
43
 
96
- >>> Dates.from_config(**{"start": "2023-01-01 00:00", "end": "2023-01-02 00:00", "frequency": "1d"}).values
44
+ >>> DatesProvider.from_config(**{"start": "2023-01-01 00:00", "end": "2023-01-02 00:00", "frequency": "1d"}).values
97
45
  [datetime.datetime(2023, 1, 1, 0, 0), datetime.datetime(2023, 1, 2, 0, 0)]
98
46
 
99
- >>> Dates.from_config(**{"start": "2023-01-01 00:00", "end": "2023-01-03 00:00", "frequency": "18h"}).values
47
+ >>> DatesProvider.from_config(**{"start": "2023-01-01 00:00", "end": "2023-01-03 00:00", "frequency": "18h"}).values
100
48
  [datetime.datetime(2023, 1, 1, 0, 0), datetime.datetime(2023, 1, 1, 18, 0), datetime.datetime(2023, 1, 2, 12, 0)]
101
49
 
102
- >>> Dates.from_config(start="2023-01-01 00:00", end="2023-01-02 00:00", frequency=6).as_dict()
50
+ >>> DatesProvider.from_config(start="2023-01-01 00:00", end="2023-01-02 00:00", frequency=6).as_dict()
103
51
  {'start': '2023-01-01T00:00:00', 'end': '2023-01-02T00:00:00', 'frequency': '6h'}
104
52
 
105
- >>> len(Dates.from_config(start="2023-01-01 00:00", end="2023-01-02 00:00", frequency=12))
53
+ >>> len(DatesProvider.from_config(start="2023-01-01 00:00", end="2023-01-02 00:00", frequency=12))
106
54
  3
107
- >>> len(Dates.from_config(start="2023-01-01 00:00",
55
+ >>> len(DatesProvider.from_config(start="2023-01-01 00:00",
108
56
  ... end="2023-01-02 00:00",
109
57
  ... frequency=12,
110
58
  ... missing=["2023-01-01 12:00"]))
111
59
  3
112
- >>> len(Dates.from_config(start="2023-01-01 00:00",
60
+ >>> len(DatesProvider.from_config(start="2023-01-01 00:00",
113
61
  ... end="2023-01-02 00:00",
114
62
  ... frequency=12,
115
63
  ... missing=["2099-01-01 12:00"]))
@@ -121,12 +69,18 @@ class Dates:
121
69
  missing = []
122
70
  self.missing = list(extend(missing))
123
71
  if set(self.missing) - set(self.values):
124
- warnings.warn(f"Missing dates {self.missing} not in list.")
72
+ diff = set(self.missing) - set(self.values)
73
+ warnings.warn(f"Missing dates {len(diff)=} not in list.")
125
74
 
126
75
  @classmethod
127
76
  def from_config(cls, **kwargs):
77
+
78
+ if kwargs.pop("hindcasts", False):
79
+ return HindcastsDates(**kwargs)
80
+
128
81
  if "values" in kwargs:
129
82
  return ValuesDates(**kwargs)
83
+
130
84
  return StartEndDates(**kwargs)
131
85
 
132
86
  def __iter__(self):
@@ -143,9 +97,9 @@ class Dates:
143
97
  return f"📅 {self.values[0]} ... {self.values[-1]}"
144
98
 
145
99
 
146
- class ValuesDates(Dates):
100
+ class ValuesDates(DatesProvider):
147
101
  def __init__(self, values, **kwargs):
148
- self.values = sorted([no_time_zone(_) for _ in values])
102
+ self.values = sorted([as_datetime(_) for _ in values])
149
103
  super().__init__(**kwargs)
150
104
 
151
105
  def __repr__(self):
@@ -155,9 +109,11 @@ class ValuesDates(Dates):
155
109
  return {"values": self.values[0]}
156
110
 
157
111
 
158
- class StartEndDates(Dates):
159
- def __init__(self, start, end, frequency=1, months=None, **kwargs):
160
- frequency = frequency_to_hours(frequency)
112
+ class StartEndDates(DatesProvider):
113
+ def __init__(self, start, end, frequency=1, **kwargs):
114
+
115
+ frequency = frequency_to_timedelta(frequency)
116
+ assert isinstance(frequency, datetime.timedelta), frequency
161
117
 
162
118
  def _(x):
163
119
  if isinstance(x, str):
@@ -173,38 +129,111 @@ class StartEndDates(Dates):
173
129
  if isinstance(end, datetime.date) and not isinstance(end, datetime.datetime):
174
130
  end = datetime.datetime(end.year, end.month, end.day)
175
131
 
176
- start = no_time_zone(start)
177
- end = no_time_zone(end)
178
-
179
- # if end <= start:
180
- # raise ValueError(f"End date {end} must be after start date {start}")
181
-
182
- increment = datetime.timedelta(hours=frequency)
132
+ start = as_datetime(start)
133
+ end = as_datetime(end)
183
134
 
184
135
  self.start = start
185
136
  self.end = end
186
137
  self.frequency = frequency
187
138
 
188
- date = start
189
- self.values = []
190
- while date <= end:
139
+ missing = kwargs.pop("missing", [])
191
140
 
192
- if months is not None:
193
- if date.month not in months:
194
- date += increment
195
- continue
141
+ self.values = list(DateTimes(start, end, increment=frequency, **kwargs))
142
+ self.kwargs = kwargs
196
143
 
197
- self.values.append(date)
198
- date += increment
199
-
200
- super().__init__(**kwargs)
144
+ super().__init__(missing=missing)
201
145
 
202
146
  def as_dict(self):
203
147
  return {
204
148
  "start": self.start.isoformat(),
205
149
  "end": self.end.isoformat(),
206
- "frequency": f"{self.frequency}h",
207
- }
150
+ "frequency": frequency_to_string(self.frequency),
151
+ }.update(self.kwargs)
152
+
153
+
154
+ class Hindcast:
155
+
156
+ def __init__(self, date, refdate, hdate, step):
157
+ self.date = date
158
+ self.refdate = refdate
159
+ self.hdate = hdate
160
+ self.step = step
161
+
162
+
163
+ class HindcastsDates(DatesProvider):
164
+ def __init__(self, start, end, steps=[0], years=20, **kwargs):
165
+
166
+ if not isinstance(start, list):
167
+ start = [start]
168
+ end = [end]
169
+
170
+ reference_dates = []
171
+ for s, e in zip(start, end):
172
+ reference_dates.extend(list(DateTimes(s, e, increment=24, **kwargs)))
173
+ # reference_dates = list(DateTimes(start, end, increment=24, **kwargs))
174
+ dates = []
175
+
176
+ seen = {}
177
+
178
+ for hdate, refdate in HindcastDatesTimes(reference_dates=reference_dates, years=years):
179
+ assert refdate - hdate >= datetime.timedelta(days=365), (refdate - hdate, refdate, hdate)
180
+ for step in steps:
181
+
182
+ date = hdate + datetime.timedelta(hours=step)
183
+
184
+ if date in seen:
185
+ raise ValueError(f"Duplicate date {date}={hdate}+{step} for {refdate} and {seen[date]}")
186
+
187
+ seen[date] = Hindcast(date, refdate, hdate, step)
188
+
189
+ assert refdate - date > datetime.timedelta(days=360), (refdate - date, refdate, date, hdate, step)
190
+
191
+ dates.append(date)
192
+
193
+ dates = sorted(dates)
194
+
195
+ mindelta = None
196
+ for a, b in zip(dates, dates[1:]):
197
+ delta = b - a
198
+ assert isinstance(delta, datetime.timedelta), delta
199
+ if mindelta is None:
200
+ mindelta = delta
201
+ else:
202
+ mindelta = min(mindelta, delta)
203
+
204
+ self.frequency = mindelta
205
+ assert mindelta.total_seconds() > 0, mindelta
206
+
207
+ print("🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥", dates[0], dates[-1], mindelta)
208
+
209
+ # Use all values between start and end by frequency, and set the ones that are missing
210
+ self.values = []
211
+ missing = []
212
+ date = dates[0]
213
+ last = date
214
+ print("------", date, dates[-1])
215
+ dateset = set(dates)
216
+ while date <= dates[-1]:
217
+ self.values.append(date)
218
+ if date not in dateset:
219
+ missing.append(date)
220
+ seen[date] = seen[last]
221
+ else:
222
+ last = date
223
+ date = date + mindelta
224
+
225
+ self.mapping = seen
226
+
227
+ print("🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥", self.values[0], self.values[-1], mindelta)
228
+ print("🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥", f"{len(self.values)=} - {len(missing)=}")
229
+
230
+ super().__init__(missing=missing)
231
+
232
+ def __repr__(self):
233
+ return f"{self.__class__.__name__}({self.values[0]}..{self.values[-1]})"
234
+
235
+ def as_dict(self):
236
+ return {"hindcasts": self.hindcasts}
208
237
 
209
238
 
210
239
  if __name__ == "__main__":