anemoi-datasets 0.5.26__py3-none-any.whl → 0.5.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. anemoi/datasets/__init__.py +1 -2
  2. anemoi/datasets/_version.py +16 -3
  3. anemoi/datasets/commands/check.py +1 -1
  4. anemoi/datasets/commands/copy.py +1 -2
  5. anemoi/datasets/commands/create.py +1 -1
  6. anemoi/datasets/commands/inspect.py +27 -35
  7. anemoi/datasets/commands/recipe/__init__.py +93 -0
  8. anemoi/datasets/commands/recipe/format.py +55 -0
  9. anemoi/datasets/commands/recipe/migrate.py +555 -0
  10. anemoi/datasets/commands/validate.py +59 -0
  11. anemoi/datasets/compute/recentre.py +3 -6
  12. anemoi/datasets/create/__init__.py +64 -26
  13. anemoi/datasets/create/check.py +10 -12
  14. anemoi/datasets/create/chunks.py +1 -2
  15. anemoi/datasets/create/config.py +5 -6
  16. anemoi/datasets/create/input/__init__.py +44 -65
  17. anemoi/datasets/create/input/action.py +296 -238
  18. anemoi/datasets/create/input/context/__init__.py +71 -0
  19. anemoi/datasets/create/input/context/field.py +54 -0
  20. anemoi/datasets/create/input/data_sources.py +7 -9
  21. anemoi/datasets/create/input/misc.py +2 -75
  22. anemoi/datasets/create/input/repeated_dates.py +11 -130
  23. anemoi/datasets/{utils → create/input/result}/__init__.py +10 -1
  24. anemoi/datasets/create/input/{result.py → result/field.py} +36 -120
  25. anemoi/datasets/create/input/trace.py +1 -1
  26. anemoi/datasets/create/patch.py +1 -2
  27. anemoi/datasets/create/persistent.py +3 -5
  28. anemoi/datasets/create/size.py +1 -3
  29. anemoi/datasets/create/sources/accumulations.py +120 -145
  30. anemoi/datasets/create/sources/accumulations2.py +20 -53
  31. anemoi/datasets/create/sources/anemoi_dataset.py +46 -42
  32. anemoi/datasets/create/sources/constants.py +39 -40
  33. anemoi/datasets/create/sources/empty.py +22 -19
  34. anemoi/datasets/create/sources/fdb.py +133 -0
  35. anemoi/datasets/create/sources/forcings.py +29 -29
  36. anemoi/datasets/create/sources/grib.py +94 -78
  37. anemoi/datasets/create/sources/grib_index.py +57 -55
  38. anemoi/datasets/create/sources/hindcasts.py +57 -59
  39. anemoi/datasets/create/sources/legacy.py +10 -62
  40. anemoi/datasets/create/sources/mars.py +121 -149
  41. anemoi/datasets/create/sources/netcdf.py +28 -25
  42. anemoi/datasets/create/sources/opendap.py +28 -26
  43. anemoi/datasets/create/sources/patterns.py +4 -6
  44. anemoi/datasets/create/sources/recentre.py +46 -48
  45. anemoi/datasets/create/sources/repeated_dates.py +44 -0
  46. anemoi/datasets/create/sources/source.py +26 -51
  47. anemoi/datasets/create/sources/tendencies.py +68 -98
  48. anemoi/datasets/create/sources/xarray.py +4 -6
  49. anemoi/datasets/create/sources/xarray_support/__init__.py +40 -36
  50. anemoi/datasets/create/sources/xarray_support/coordinates.py +8 -12
  51. anemoi/datasets/create/sources/xarray_support/field.py +20 -16
  52. anemoi/datasets/create/sources/xarray_support/fieldlist.py +11 -15
  53. anemoi/datasets/create/sources/xarray_support/flavour.py +42 -42
  54. anemoi/datasets/create/sources/xarray_support/grid.py +15 -9
  55. anemoi/datasets/create/sources/xarray_support/metadata.py +19 -128
  56. anemoi/datasets/create/sources/xarray_support/patch.py +4 -6
  57. anemoi/datasets/create/sources/xarray_support/time.py +10 -13
  58. anemoi/datasets/create/sources/xarray_support/variable.py +21 -21
  59. anemoi/datasets/create/sources/xarray_zarr.py +28 -25
  60. anemoi/datasets/create/sources/zenodo.py +43 -41
  61. anemoi/datasets/create/statistics/__init__.py +3 -6
  62. anemoi/datasets/create/testing.py +4 -0
  63. anemoi/datasets/create/typing.py +1 -2
  64. anemoi/datasets/create/utils.py +0 -43
  65. anemoi/datasets/create/zarr.py +7 -2
  66. anemoi/datasets/data/__init__.py +15 -6
  67. anemoi/datasets/data/complement.py +7 -12
  68. anemoi/datasets/data/concat.py +5 -8
  69. anemoi/datasets/data/dataset.py +48 -47
  70. anemoi/datasets/data/debug.py +7 -9
  71. anemoi/datasets/data/ensemble.py +4 -6
  72. anemoi/datasets/data/fill_missing.py +7 -10
  73. anemoi/datasets/data/forwards.py +22 -26
  74. anemoi/datasets/data/grids.py +12 -168
  75. anemoi/datasets/data/indexing.py +9 -12
  76. anemoi/datasets/data/interpolate.py +7 -15
  77. anemoi/datasets/data/join.py +8 -12
  78. anemoi/datasets/data/masked.py +6 -11
  79. anemoi/datasets/data/merge.py +5 -9
  80. anemoi/datasets/data/misc.py +41 -45
  81. anemoi/datasets/data/missing.py +11 -16
  82. anemoi/datasets/data/observations/__init__.py +8 -14
  83. anemoi/datasets/data/padded.py +3 -5
  84. anemoi/datasets/data/records/backends/__init__.py +2 -2
  85. anemoi/datasets/data/rescale.py +5 -12
  86. anemoi/datasets/data/rolling_average.py +141 -0
  87. anemoi/datasets/data/select.py +13 -16
  88. anemoi/datasets/data/statistics.py +4 -7
  89. anemoi/datasets/data/stores.py +22 -29
  90. anemoi/datasets/data/subset.py +8 -11
  91. anemoi/datasets/data/unchecked.py +7 -11
  92. anemoi/datasets/data/xy.py +25 -21
  93. anemoi/datasets/dates/__init__.py +15 -18
  94. anemoi/datasets/dates/groups.py +7 -10
  95. anemoi/datasets/dumper.py +76 -0
  96. anemoi/datasets/grids.py +4 -185
  97. anemoi/datasets/schemas/recipe.json +131 -0
  98. anemoi/datasets/testing.py +93 -7
  99. anemoi/datasets/validate.py +598 -0
  100. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/METADATA +7 -4
  101. anemoi_datasets-0.5.28.dist-info/RECORD +134 -0
  102. anemoi/datasets/create/filter.py +0 -48
  103. anemoi/datasets/create/input/concat.py +0 -164
  104. anemoi/datasets/create/input/context.py +0 -89
  105. anemoi/datasets/create/input/empty.py +0 -54
  106. anemoi/datasets/create/input/filter.py +0 -118
  107. anemoi/datasets/create/input/function.py +0 -233
  108. anemoi/datasets/create/input/join.py +0 -130
  109. anemoi/datasets/create/input/pipe.py +0 -66
  110. anemoi/datasets/create/input/step.py +0 -177
  111. anemoi/datasets/create/input/template.py +0 -162
  112. anemoi_datasets-0.5.26.dist-info/RECORD +0 -131
  113. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/WHEEL +0 -0
  114. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/entry_points.txt +0 -0
  115. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/licenses/LICENSE +0 -0
  116. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/top_level.txt +0 -0
anemoi/datasets/grids.py CHANGED
@@ -8,15 +8,11 @@
8
8
  # nor does it submit to any jurisdiction.
9
9
 
10
10
 
11
- import base64
12
11
  import logging
13
12
  from typing import Any
14
- from typing import List
15
- from typing import Optional
16
- from typing import Tuple
17
- from typing import Union
18
13
 
19
14
  import numpy as np
15
+ from anemoi.utils.grids import latlon_to_xyz
20
16
  from numpy.typing import NDArray
21
17
 
22
18
  LOG = logging.getLogger(__name__)
@@ -92,71 +88,6 @@ def plot_mask(
92
88
  plt.savefig(path + "-global-zoomed.png")
93
89
 
94
90
 
95
- # TODO: Use the one from anemoi.utils.grids instead
96
- # from anemoi.utils.grids import ...
97
- def xyz_to_latlon(x: NDArray[Any], y: NDArray[Any], z: NDArray[Any]) -> Tuple[NDArray[Any], NDArray[Any]]:
98
- """Convert Cartesian coordinates to latitude and longitude.
99
-
100
- Parameters
101
- ----------
102
- x : NDArray[Any]
103
- X coordinates.
104
- y : NDArray[Any]
105
- Y coordinates.
106
- z : NDArray[Any]
107
- Z coordinates.
108
-
109
- Returns
110
- -------
111
- Tuple[NDArray[Any], NDArray[Any]]
112
- Latitude and longitude coordinates.
113
- """
114
- return (
115
- np.rad2deg(np.arcsin(np.minimum(1.0, np.maximum(-1.0, z)))),
116
- np.rad2deg(np.arctan2(y, x)),
117
- )
118
-
119
-
120
- # TODO: Use the one from anemoi.utils.grids instead
121
- # from anemoi.utils.grids import ...
122
- def latlon_to_xyz(
123
- lat: NDArray[Any], lon: NDArray[Any], radius: float = 1.0
124
- ) -> Tuple[NDArray[Any], NDArray[Any], NDArray[Any]]:
125
- """Convert latitude and longitude to Cartesian coordinates.
126
-
127
- Parameters
128
- ----------
129
- lat : NDArray[Any]
130
- Latitude coordinates.
131
- lon : NDArray[Any]
132
- Longitude coordinates.
133
- radius : float, optional
134
- Radius of the sphere. Defaults to 1.0.
135
-
136
- Returns
137
- -------
138
- Tuple[NDArray[Any], NDArray[Any], NDArray[Any]]
139
- X, Y, and Z coordinates.
140
- """
141
- # https://en.wikipedia.org/wiki/Geographic_coordinate_conversion#From_geodetic_to_ECEF_coordinates
142
- # We assume that the Earth is a sphere of radius 1 so N(phi) = 1
143
- # We assume h = 0
144
- #
145
- phi = np.deg2rad(lat)
146
- lda = np.deg2rad(lon)
147
-
148
- cos_phi = np.cos(phi)
149
- cos_lda = np.cos(lda)
150
- sin_phi = np.sin(phi)
151
- sin_lda = np.sin(lda)
152
-
153
- x = cos_phi * cos_lda * radius
154
- y = cos_phi * sin_lda * radius
155
- z = sin_phi * radius
156
-
157
- return x, y, z
158
-
159
-
160
91
  class Triangle3D:
161
92
  """A class to represent a 3D triangle and perform intersection tests with rays."""
162
93
 
@@ -272,8 +203,8 @@ def cutout_mask(
272
203
  global_lons: NDArray[Any],
273
204
  cropping_distance: float = 2.0,
274
205
  neighbours: int = 5,
275
- min_distance_km: Optional[Union[int, float]] = None,
276
- plot: Optional[str] = None,
206
+ min_distance_km: int | float | None = None,
207
+ plot: str | None = None,
277
208
  ) -> NDArray[Any]:
278
209
  """Return a mask for the points in [global_lats, global_lons] that are inside of [lats, lons].
279
210
 
@@ -465,7 +396,7 @@ def thinning_mask(
465
396
  return np.array([i for i in indices])
466
397
 
467
398
 
468
- def outline(lats: NDArray[Any], lons: NDArray[Any], neighbours: int = 5) -> List[int]:
399
+ def outline(lats: NDArray[Any], lons: NDArray[Any], neighbours: int = 5) -> list[int]:
469
400
  """Find the outline of the grid points.
470
401
 
471
402
  Parameters
@@ -513,92 +444,6 @@ def outline(lats: NDArray[Any], lons: NDArray[Any], neighbours: int = 5) -> List
513
444
  return outside
514
445
 
515
446
 
516
- def deserialise_mask(encoded: str) -> NDArray[Any]:
517
- """Deserialise a mask from a base64 encoded string.
518
-
519
- Parameters
520
- ----------
521
- encoded : str
522
- Base64 encoded string.
523
-
524
- Returns
525
- -------
526
- NDArray[Any]
527
- Deserialised mask array.
528
- """
529
- import pickle
530
- import zlib
531
-
532
- packed = pickle.loads(zlib.decompress(base64.b64decode(encoded)))
533
-
534
- mask = []
535
- value = False
536
- for count in packed:
537
- mask.extend([value] * count)
538
- value = not value
539
- return np.array(mask, dtype=bool)
540
-
541
-
542
- def _serialise_mask(mask: NDArray[Any]) -> str:
543
- """Serialise a mask to a base64 encoded string.
544
-
545
- Parameters
546
- ----------
547
- mask : NDArray[Any]
548
- Mask array.
549
-
550
- Returns
551
- -------
552
- str
553
- Base64 encoded string.
554
- """
555
- import pickle
556
- import zlib
557
-
558
- assert len(mask.shape) == 1
559
- assert len(mask)
560
-
561
- packed = []
562
- last = mask[0]
563
- count = 1
564
-
565
- for value in mask[1:]:
566
- if value == last:
567
- count += 1
568
- else:
569
- packed.append(count)
570
- last = value
571
- count = 1
572
-
573
- packed.append(count)
574
-
575
- # We always start with an 'off' value
576
- # So if the first value is 'on', we need to add a zero
577
- if mask[0]:
578
- packed.insert(0, 0)
579
-
580
- return base64.b64encode(zlib.compress(pickle.dumps(packed))).decode("utf-8")
581
-
582
-
583
- def serialise_mask(mask: NDArray[Any]) -> str:
584
- """Serialise a mask and ensure it can be deserialised.
585
-
586
- Parameters
587
- ----------
588
- mask : NDArray[Any]
589
- Mask array.
590
-
591
- Returns
592
- -------
593
- str
594
- Base64 encoded string.
595
- """
596
- result = _serialise_mask(mask)
597
- # Make sure we can deserialise it
598
- assert np.all(mask == deserialise_mask(result))
599
- return result
600
-
601
-
602
447
  def nearest_grid_points(
603
448
  source_latitudes: NDArray[Any],
604
449
  source_longitudes: NDArray[Any],
@@ -644,29 +489,3 @@ def nearest_grid_points(
644
489
  else:
645
490
  distances, indices = cKDTree(source_points).query(target_points, k=k, distance_upper_bound=max_distance)
646
491
  return distances, indices
647
-
648
-
649
- if __name__ == "__main__":
650
- global_lats, global_lons = np.meshgrid(
651
- np.linspace(90, -90, 90),
652
- np.linspace(-180, 180, 180),
653
- )
654
- global_lats = global_lats.flatten()
655
- global_lons = global_lons.flatten()
656
-
657
- lats, lons = np.meshgrid(
658
- np.linspace(50, 40, 100),
659
- np.linspace(-10, 15, 100),
660
- )
661
- lats = lats.flatten()
662
- lons = lons.flatten()
663
-
664
- mask = cutout_mask(lats, lons, global_lats, global_lons, cropping_distance=5.0)
665
-
666
- import matplotlib.pyplot as plt
667
-
668
- fig = plt.figure(figsize=(10, 5))
669
- plt.scatter(global_lons, global_lats, s=0.01, marker="o", c="r")
670
- plt.scatter(global_lons[mask], global_lats[mask], s=0.1, c="k")
671
- # plt.scatter(lons, lats, s=0.01)
672
- plt.savefig("cutout.png")
@@ -0,0 +1,131 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "type": "object",
4
+ "$id": "https://ecmwf.int/anemoi-datasets-recipe.schema.json",
5
+ "title": "Product",
6
+ "description": "Anemoi datasets recipe configuration",
7
+ "additionalProperties": false,
8
+ "$defs": {
9
+ "source-or-filter": {
10
+ "type": "object",
11
+ "minProperties": 1,
12
+ "maxProperties": 1
13
+ },
14
+ "pipe": {
15
+ "type": "array",
16
+ "items": {
17
+ "$ref": "#/$defs/input-object"
18
+ }
19
+ },
20
+ "join": {
21
+ "type": "array",
22
+ "items": {
23
+ "$ref": "#/$defs/input-object"
24
+ }
25
+ },
26
+ "concat": {
27
+ "type": "array",
28
+ "items": {
29
+ "type": "object",
30
+ "minProperties": 2,
31
+ "maxProperties": 2,
32
+ "required": [
33
+ "dates"
34
+ ]
35
+ }
36
+ },
37
+ "input-object": {
38
+ "oneOf": [
39
+ {
40
+ "$ref": "#/$defs/pipe"
41
+ },
42
+ {
43
+ "$ref": "#/$defs/join"
44
+ },
45
+ {
46
+ "$ref": "#/$defs/concat"
47
+ },
48
+ {
49
+ "$ref": "#/$defs/source-or-filter"
50
+ }
51
+ ]
52
+ }
53
+ },
54
+ "properties": {
55
+ "env": {
56
+ "type": "object"
57
+ },
58
+ "description": {
59
+ "type": "string"
60
+ },
61
+ "name": {
62
+ "type": "string"
63
+ },
64
+ "licence": {
65
+ "type": "string"
66
+ },
67
+ "attribution": {
68
+ "type": "string"
69
+ },
70
+ "dates": {
71
+ "type": "object",
72
+ "required": [
73
+ "start",
74
+ "end"
75
+ ],
76
+ "properties": {
77
+ "start": {
78
+ "type": "string",
79
+ "format": "date"
80
+ },
81
+ "end": {
82
+ "type": "string",
83
+ "format": "date"
84
+ },
85
+ "frequency": {
86
+ "type": [
87
+ "integer",
88
+ "string"
89
+ ]
90
+ },
91
+ "group_by": {
92
+ "type": [
93
+ "integer",
94
+ "string"
95
+ ]
96
+ }
97
+ }
98
+ },
99
+ "input": {
100
+ "$ref": "#/$defs/input-object"
101
+ },
102
+ "data_sources": {
103
+ "type": "object",
104
+ "patternProperties": {
105
+ "^[a-zA-Z_][a-zA-Z0-9_]*$": {
106
+ "$ref": "#/$defs/input-object"
107
+ }
108
+ },
109
+ "additionalProperties": false
110
+ },
111
+ "output": {
112
+ "type": "object"
113
+ },
114
+ "statistics": {
115
+ "type": "object"
116
+ },
117
+ "build": {
118
+ "type": "object"
119
+ },
120
+ "common": {
121
+ "type": "object"
122
+ },
123
+ "platform": {
124
+ "type": "object"
125
+ }
126
+ },
127
+ "required": [
128
+ "dates",
129
+ "input"
130
+ ]
131
+ }
@@ -12,19 +12,17 @@
12
12
 
13
13
  import logging
14
14
  from typing import Any
15
- from typing import List
16
- from typing import Optional
17
15
 
18
16
  LOG = logging.getLogger(__name__)
19
17
 
20
18
 
21
19
  def assert_field_list(
22
- fs: List[Any],
23
- size: Optional[int] = None,
24
- start: Optional[Any] = None,
25
- end: Optional[Any] = None,
20
+ fs: list[Any],
21
+ size: int | None = None,
22
+ start: Any | None = None,
23
+ end: Any | None = None,
26
24
  constant: bool = False,
27
- skip: Optional[Any] = None,
25
+ skip: Any | None = None,
28
26
  ) -> None:
29
27
  """Asserts various properties of a list of fields.
30
28
 
@@ -85,3 +83,91 @@ def assert_field_list(
85
83
  assert south >= -90, south
86
84
  assert east <= 360, east
87
85
  assert west >= -180, west
86
+
87
+
88
+ class IndexTester:
89
+ """Class to test indexing of datasets."""
90
+
91
+ def __init__(self, ds: Any) -> None:
92
+ """Initialise the IndexTester.
93
+
94
+ Parameters
95
+ ----------
96
+ ds : Any
97
+ Dataset.
98
+ """
99
+ self.ds = ds
100
+ self.np = ds[:] # Numpy array
101
+
102
+ assert self.ds.shape == self.np.shape, (self.ds.shape, self.np.shape)
103
+ assert (self.ds == self.np).all()
104
+
105
+ def __getitem__(self, index: Any) -> None:
106
+ """Test indexing.
107
+
108
+ Parameters
109
+ ----------
110
+ index : Any
111
+ Index.
112
+ """
113
+ LOG.info("IndexTester: %s", index)
114
+ if self.ds[index] is None:
115
+ assert False, (self.ds, index)
116
+
117
+ if not (self.ds[index] == self.np[index]).all():
118
+ assert (self.ds[index] == self.np[index]).all()
119
+
120
+
121
+ def default_test_indexing(ds):
122
+
123
+ t = IndexTester(ds)
124
+
125
+ t[0:10, :, 0]
126
+ t[:, 0:3, 0]
127
+ # t[:, :, 0]
128
+ t[0:10, 0:3, 0]
129
+ t[:, :, :]
130
+
131
+ if ds.shape[1] > 2: # Variable dimension
132
+ t[:, (1, 2), :]
133
+ t[:, (1, 2)]
134
+
135
+ t[0]
136
+ t[0, :]
137
+ t[0, 0, :]
138
+ t[0, 0, 0, :]
139
+
140
+ if ds.shape[2] > 1: # Ensemble dimension
141
+ t[0:10, :, (0, 1)]
142
+
143
+ for i in range(3):
144
+ t[i]
145
+ start = 5 * i
146
+ end = len(ds) - 5 * i
147
+ step = len(ds) // 10
148
+
149
+ t[start:end:step]
150
+ t[start:end]
151
+ t[start:]
152
+ t[:end]
153
+ t[::step]
154
+
155
+
156
+ class Trace:
157
+
158
+ def __init__(self, ds):
159
+ self.ds = ds
160
+ self.f = open("trace.txt", "a")
161
+
162
+ def __getattr__(self, name: str) -> Any:
163
+
164
+ print(name, file=self.f, flush=True)
165
+ return getattr(self.ds, name)
166
+
167
+ def __len__(self) -> int:
168
+ print("__len__", file=self.f, flush=True)
169
+ return len(self.ds)
170
+
171
+ def __getitem__(self, index: Any) -> Any:
172
+ print("__getitem__", file=self.f, flush=True)
173
+ return self.ds[index]