anemoi-datasets 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. anemoi/datasets/_version.py +2 -2
  2. anemoi/datasets/commands/compare.py +59 -0
  3. anemoi/datasets/commands/create.py +84 -3
  4. anemoi/datasets/commands/inspect.py +3 -3
  5. anemoi/datasets/create/__init__.py +44 -17
  6. anemoi/datasets/create/check.py +6 -5
  7. anemoi/datasets/create/chunks.py +1 -1
  8. anemoi/datasets/create/config.py +5 -26
  9. anemoi/datasets/create/functions/filters/rename.py +9 -1
  10. anemoi/datasets/create/functions/filters/rotate_winds.py +10 -1
  11. anemoi/datasets/create/functions/sources/__init__.py +39 -0
  12. anemoi/datasets/create/functions/sources/accumulations.py +11 -41
  13. anemoi/datasets/create/functions/sources/constants.py +3 -0
  14. anemoi/datasets/create/functions/sources/grib.py +4 -0
  15. anemoi/datasets/create/functions/sources/hindcasts.py +32 -377
  16. anemoi/datasets/create/functions/sources/mars.py +53 -22
  17. anemoi/datasets/create/functions/sources/netcdf.py +2 -60
  18. anemoi/datasets/create/functions/sources/opendap.py +3 -2
  19. anemoi/datasets/create/functions/sources/xarray/__init__.py +73 -0
  20. anemoi/datasets/create/functions/sources/xarray/coordinates.py +234 -0
  21. anemoi/datasets/create/functions/sources/xarray/field.py +109 -0
  22. anemoi/datasets/create/functions/sources/xarray/fieldlist.py +171 -0
  23. anemoi/datasets/create/functions/sources/xarray/flavour.py +330 -0
  24. anemoi/datasets/create/functions/sources/xarray/grid.py +46 -0
  25. anemoi/datasets/create/functions/sources/xarray/metadata.py +161 -0
  26. anemoi/datasets/create/functions/sources/xarray/time.py +98 -0
  27. anemoi/datasets/create/functions/sources/xarray/variable.py +198 -0
  28. anemoi/datasets/create/functions/sources/xarray_kerchunk.py +42 -0
  29. anemoi/datasets/create/functions/sources/xarray_zarr.py +15 -0
  30. anemoi/datasets/create/functions/sources/zenodo.py +40 -0
  31. anemoi/datasets/create/input.py +290 -172
  32. anemoi/datasets/create/loaders.py +120 -71
  33. anemoi/datasets/create/patch.py +17 -14
  34. anemoi/datasets/create/persistent.py +1 -1
  35. anemoi/datasets/create/size.py +4 -5
  36. anemoi/datasets/create/statistics/__init__.py +49 -16
  37. anemoi/datasets/create/template.py +11 -61
  38. anemoi/datasets/create/trace.py +91 -0
  39. anemoi/datasets/create/utils.py +0 -48
  40. anemoi/datasets/create/zarr.py +24 -10
  41. anemoi/datasets/data/misc.py +9 -37
  42. anemoi/datasets/data/stores.py +29 -14
  43. anemoi/datasets/dates/__init__.py +7 -1
  44. anemoi/datasets/dates/groups.py +3 -0
  45. {anemoi_datasets-0.4.0.dist-info → anemoi_datasets-0.4.2.dist-info}/METADATA +18 -3
  46. anemoi_datasets-0.4.2.dist-info/RECORD +86 -0
  47. {anemoi_datasets-0.4.0.dist-info → anemoi_datasets-0.4.2.dist-info}/WHEEL +1 -1
  48. anemoi_datasets-0.4.0.dist-info/RECORD +0 -73
  49. {anemoi_datasets-0.4.0.dist-info → anemoi_datasets-0.4.2.dist-info}/LICENSE +0 -0
  50. {anemoi_datasets-0.4.0.dist-info → anemoi_datasets-0.4.2.dist-info}/entry_points.txt +0 -0
  51. {anemoi_datasets-0.4.0.dist-info → anemoi_datasets-0.4.2.dist-info}/top_level.txt +0 -0
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '0.4.0'
16
- __version_tuple__ = version_tuple = (0, 4, 0)
15
+ __version__ = version = '0.4.2'
16
+ __version_tuple__ = version_tuple = (0, 4, 2)
@@ -8,6 +8,10 @@
8
8
  # nor does it submit to any jurisdiction.
9
9
  #
10
10
 
11
+ import numpy as np
12
+ import tqdm
13
+ import zarr
14
+
11
15
  from anemoi.datasets import open_dataset
12
16
 
13
17
  from . import Command
@@ -19,6 +23,8 @@ class Compare(Command):
19
23
  def add_arguments(self, command_parser):
20
24
  command_parser.add_argument("dataset1")
21
25
  command_parser.add_argument("dataset2")
26
+ command_parser.add_argument("--data", action="store_true", help="Compare the data.")
27
+ command_parser.add_argument("--statistics", action="store_true", help="Compare the statistics.")
22
28
 
23
29
  def run(self, args):
24
30
  ds1 = open_dataset(args.dataset1)
@@ -42,5 +48,58 @@ class Compare(Command):
42
48
  f"{ds2.statistics['mean'][ds2.name_to_index[v]]:14g}",
43
49
  )
44
50
 
51
+ if args.data:
52
+ print()
53
+ print("Data:")
54
+ print("-----")
55
+ print()
56
+
57
+ diff = 0
58
+ for a, b in tqdm.tqdm(zip(ds1, ds2)):
59
+ if not np.array_equal(a, b, equal_nan=True):
60
+ diff += 1
61
+
62
+ print(f"Number of different rows: {diff}/{len(ds1)}")
63
+
64
+ if args.data:
65
+ print()
66
+ print("Data 2:")
67
+ print("-----")
68
+ print()
69
+
70
+ ds1 = zarr.open(args.dataset1, mode="r")
71
+ ds2 = zarr.open(args.dataset2, mode="r")
72
+
73
+ for name in (
74
+ "data",
75
+ "count",
76
+ "sums",
77
+ "squares",
78
+ "mean",
79
+ "stdev",
80
+ "minimum",
81
+ "maximum",
82
+ "latitudes",
83
+ "longitudes",
84
+ ):
85
+ a1 = ds1[name]
86
+ a2 = ds2[name]
87
+
88
+ if len(a1) != len(a2):
89
+ print(f"{name}: lengths mismatch {len(a1)} != {len(a2)}")
90
+ continue
91
+
92
+ diff = 0
93
+ for a, b in tqdm.tqdm(zip(a1, a2), leave=False):
94
+ if not np.array_equal(a, b, equal_nan=True):
95
+ if diff == 0:
96
+ print(f"\n{name}: first different row:")
97
+ print(a[a != b])
98
+ print(b[a != b])
99
+
100
+ diff += 1
101
+
102
+ print(f"{name}: {diff} different rows out of {len(a1)}")
103
+
45
104
 
46
105
  command = Compare
@@ -1,7 +1,39 @@
1
- from anemoi.datasets.create import Creator
1
+ import datetime
2
+ import logging
3
+ import time
4
+ from concurrent.futures import ProcessPoolExecutor
5
+ from concurrent.futures import ThreadPoolExecutor
6
+ from concurrent.futures import as_completed
7
+
8
+ import tqdm
9
+ from anemoi.utils.humanize import seconds_to_human
10
+
11
+ from anemoi.datasets.create.trace import enable_trace
2
12
 
3
13
  from . import Command
4
14
 
15
+ LOG = logging.getLogger(__name__)
16
+
17
+
18
+ def task(what, options, *args, **kwargs):
19
+ """
20
+ Make sure `import Creator` is done in the sub-processes, and not in the main one.
21
+ """
22
+
23
+ now = datetime.datetime.now()
24
+ LOG.debug(f"Task {what}({args},{kwargs}) starting")
25
+
26
+ from anemoi.datasets.create import Creator
27
+
28
+ if "trace" in options:
29
+ enable_trace(options["trace"])
30
+
31
+ c = Creator(**options)
32
+ result = getattr(c, what)(*args, **kwargs)
33
+
34
+ LOG.debug(f"Task {what}({args},{kwargs}) completed ({datetime.datetime.now()-now})")
35
+ return result
36
+
5
37
 
6
38
  class Create(Command):
7
39
  """Create a dataset."""
@@ -22,12 +54,61 @@ class Create(Command):
22
54
  )
23
55
  command_parser.add_argument("config", help="Configuration yaml file defining the recipe to create the dataset.")
24
56
  command_parser.add_argument("path", help="Path to store the created data.")
57
+ group = command_parser.add_mutually_exclusive_group()
58
+ group.add_argument("--threads", help="Use `n` parallel thread workers.", type=int, default=0)
59
+ group.add_argument("--processes", help="Use `n` parallel process workers.", type=int, default=0)
60
+ command_parser.add_argument("--trace", action="store_true")
25
61
 
26
62
  def run(self, args):
27
- kwargs = vars(args)
63
+ now = time.time()
64
+ if args.threads + args.processes:
65
+ self.parallel_create(args)
66
+ else:
67
+ self.serial_create(args)
68
+ LOG.info(f"Create completed in {seconds_to_human(time.time()-now)}")
28
69
 
29
- c = Creator(**kwargs)
70
+ def serial_create(self, args):
71
+ from anemoi.datasets.create import Creator
72
+
73
+ options = vars(args)
74
+ c = Creator(**options)
30
75
  c.create()
31
76
 
77
+ def parallel_create(self, args):
78
+ """Some modules, like fsspec do not work well with fork()
79
+ Other modules may not be thread safe. So we implement
80
+ parallel loadining using multiprocessing before any
81
+ of the modules are imported.
82
+ """
83
+
84
+ options = vars(args)
85
+ parallel = args.threads + args.processes
86
+ args.use_threads = args.threads > 0
87
+
88
+ if args.use_threads:
89
+ ExecutorClass = ThreadPoolExecutor
90
+ else:
91
+ ExecutorClass = ProcessPoolExecutor
92
+
93
+ with ExecutorClass(max_workers=1) as executor:
94
+ total = executor.submit(task, "init", options).result()
95
+
96
+ futures = []
97
+
98
+ with ExecutorClass(max_workers=parallel) as executor:
99
+ for n in range(total):
100
+ futures.append(executor.submit(task, "load", options, parts=f"{n+1}/{total}"))
101
+
102
+ for future in tqdm.tqdm(
103
+ as_completed(futures), desc="Loading", total=len(futures), colour="green", position=parallel + 1
104
+ ):
105
+ future.result()
106
+
107
+ with ExecutorClass(max_workers=1) as executor:
108
+ executor.submit(task, "statistics", options).result()
109
+ executor.submit(task, "additions", options).result()
110
+ executor.submit(task, "cleanup", options).result()
111
+ executor.submit(task, "verify", options).result()
112
+
32
113
 
33
114
  command = Create
@@ -16,7 +16,7 @@ import numpy as np
16
16
  import semantic_version
17
17
  import tqdm
18
18
  from anemoi.utils.humanize import bytes
19
- from anemoi.utils.humanize import number
19
+ from anemoi.utils.humanize import bytes_to_human
20
20
  from anemoi.utils.humanize import when
21
21
  from anemoi.utils.text import dotted_line
22
22
  from anemoi.utils.text import progress
@@ -215,9 +215,9 @@ class Version:
215
215
  total_size, n = compute_directory_size(self.path)
216
216
 
217
217
  if total_size is not None:
218
- print(f"💽 Size : {bytes(total_size)} ({number(total_size)})")
218
+ print(f"💽 Size : {bytes(total_size)} ({bytes_to_human(total_size)})")
219
219
  if n is not None:
220
- print(f"📁 Files : {number(n)}")
220
+ print(f"📁 Files : {n:,}")
221
221
 
222
222
  @property
223
223
  def statistics(self):
@@ -7,8 +7,15 @@
7
7
  # nor does it submit to any jurisdiction.
8
8
  #
9
9
 
10
+ import logging
10
11
  import os
11
12
 
13
+ LOG = logging.getLogger(__name__)
14
+
15
+
16
+ def _ignore(*args, **kwargs):
17
+ pass
18
+
12
19
 
13
20
  class Creator:
14
21
  def __init__(
@@ -16,19 +23,21 @@ class Creator:
16
23
  path,
17
24
  config=None,
18
25
  cache=None,
19
- print=print,
26
+ use_threads=False,
20
27
  statistics_tmp=None,
21
28
  overwrite=False,
22
29
  test=None,
30
+ progress=None,
23
31
  **kwargs,
24
32
  ):
25
33
  self.path = path # Output path
26
34
  self.config = config
27
35
  self.cache = cache
28
- self.print = print
36
+ self.use_threads = use_threads
29
37
  self.statistics_tmp = statistics_tmp
30
38
  self.overwrite = overwrite
31
39
  self.test = test
40
+ self.progress = progress if progress is not None else _ignore
32
41
 
33
42
  def init(self, check_name=False):
34
43
  # check path
@@ -44,10 +53,11 @@ class Creator:
44
53
  path=self.path,
45
54
  config=self.config,
46
55
  statistics_tmp=self.statistics_tmp,
47
- print=self.print,
56
+ use_threads=self.use_threads,
57
+ progress=self.progress,
48
58
  test=self.test,
49
59
  )
50
- obj.initialise(check_name=check_name)
60
+ return obj.initialise(check_name=check_name)
51
61
 
52
62
  def load(self, parts=None):
53
63
  from .loaders import ContentLoader
@@ -56,7 +66,8 @@ class Creator:
56
66
  loader = ContentLoader.from_dataset_config(
57
67
  path=self.path,
58
68
  statistics_tmp=self.statistics_tmp,
59
- print=self.print,
69
+ use_threads=self.use_threads,
70
+ progress=self.progress,
60
71
  parts=parts,
61
72
  )
62
73
  loader.load()
@@ -66,7 +77,8 @@ class Creator:
66
77
 
67
78
  loader = StatisticsAdder.from_dataset(
68
79
  path=self.path,
69
- print=self.print,
80
+ use_threads=self.use_threads,
81
+ progress=self.progress,
70
82
  statistics_tmp=self.statistics_tmp,
71
83
  statistics_output=output,
72
84
  recompute=False,
@@ -74,20 +86,22 @@ class Creator:
74
86
  statistics_end=end,
75
87
  )
76
88
  loader.run()
89
+ assert loader.ready()
77
90
 
78
91
  def size(self):
79
92
  from .loaders import DatasetHandler
80
93
  from .size import compute_directory_sizes
81
94
 
82
95
  metadata = compute_directory_sizes(self.path)
83
- handle = DatasetHandler.from_dataset(path=self.path, print=self.print)
96
+ handle = DatasetHandler.from_dataset(path=self.path, use_threads=self.use_threads)
84
97
  handle.update_metadata(**metadata)
98
+ assert handle.ready()
85
99
 
86
100
  def cleanup(self):
87
101
  from .loaders import DatasetHandlerWithStatistics
88
102
 
89
103
  cleaner = DatasetHandlerWithStatistics.from_dataset(
90
- path=self.path, print=self.print, statistics_tmp=self.statistics_tmp
104
+ path=self.path, use_threads=self.use_threads, progress=self.progress, statistics_tmp=self.statistics_tmp
91
105
  )
92
106
  cleaner.tmp_statistics.delete()
93
107
  cleaner.registry.clean()
@@ -103,15 +117,17 @@ class Creator:
103
117
  from .loaders import TendenciesStatisticsDeltaNotMultipleOfFrequency
104
118
 
105
119
  if statistics:
106
- a = StatisticsAddition.from_dataset(path=self.path, print=self.print)
120
+ a = StatisticsAddition.from_dataset(path=self.path, use_threads=self.use_threads)
107
121
  a.initialise()
108
122
 
109
123
  for d in delta:
110
124
  try:
111
- a = TendenciesStatisticsAddition.from_dataset(path=self.path, print=self.print, delta=d)
125
+ a = TendenciesStatisticsAddition.from_dataset(
126
+ path=self.path, use_threads=self.use_threads, progress=self.progress, delta=d
127
+ )
112
128
  a.initialise()
113
129
  except TendenciesStatisticsDeltaNotMultipleOfFrequency:
114
- self.print(f"Skipping delta={d} as it is not a multiple of the frequency.")
130
+ LOG.info(f"Skipping delta={d} as it is not a multiple of the frequency.")
115
131
 
116
132
  def run_additions(self, parts=None, delta=[1, 3, 6, 12, 24], statistics=True):
117
133
  from .loaders import StatisticsAddition
@@ -119,15 +135,17 @@ class Creator:
119
135
  from .loaders import TendenciesStatisticsDeltaNotMultipleOfFrequency
120
136
 
121
137
  if statistics:
122
- a = StatisticsAddition.from_dataset(path=self.path, print=self.print)
138
+ a = StatisticsAddition.from_dataset(path=self.path, use_threads=self.use_threads)
123
139
  a.run(parts)
124
140
 
125
141
  for d in delta:
126
142
  try:
127
- a = TendenciesStatisticsAddition.from_dataset(path=self.path, print=self.print, delta=d)
143
+ a = TendenciesStatisticsAddition.from_dataset(
144
+ path=self.path, use_threads=self.use_threads, progress=self.progress, delta=d
145
+ )
128
146
  a.run(parts)
129
147
  except TendenciesStatisticsDeltaNotMultipleOfFrequency:
130
- self.print(f"Skipping delta={d} as it is not a multiple of the frequency.")
148
+ LOG.debug(f"Skipping delta={d} as it is not a multiple of the frequency.")
131
149
 
132
150
  def finalise_additions(self, delta=[1, 3, 6, 12, 24], statistics=True):
133
151
  from .loaders import StatisticsAddition
@@ -135,15 +153,17 @@ class Creator:
135
153
  from .loaders import TendenciesStatisticsDeltaNotMultipleOfFrequency
136
154
 
137
155
  if statistics:
138
- a = StatisticsAddition.from_dataset(path=self.path, print=self.print)
156
+ a = StatisticsAddition.from_dataset(path=self.path, use_threads=self.use_threads)
139
157
  a.finalise()
140
158
 
141
159
  for d in delta:
142
160
  try:
143
- a = TendenciesStatisticsAddition.from_dataset(path=self.path, print=self.print, delta=d)
161
+ a = TendenciesStatisticsAddition.from_dataset(
162
+ path=self.path, use_threads=self.use_threads, progress=self.progress, delta=d
163
+ )
144
164
  a.finalise()
145
165
  except TendenciesStatisticsDeltaNotMultipleOfFrequency:
146
- self.print(f"Skipping delta={d} as it is not a multiple of the frequency.")
166
+ LOG.debug(f"Skipping delta={d} as it is not a multiple of the frequency.")
147
167
 
148
168
  def finalise(self, **kwargs):
149
169
  self.statistics(**kwargs)
@@ -174,3 +194,10 @@ class Creator:
174
194
  return True
175
195
  except zarr.errors.PathNotFoundError:
176
196
  return False
197
+
198
+ def verify(self):
199
+ from .loaders import DatasetVerifier
200
+
201
+ handle = DatasetVerifier.from_dataset(path=self.path, use_threads=self.use_threads)
202
+
203
+ handle.verify()
@@ -56,7 +56,7 @@ class DatasetName:
56
56
  raise ValueError(self.error_message)
57
57
 
58
58
  def _parse(self, name):
59
- pattern = r"^(\w+)-([\w-]+)-(\w+)-(\w+)-(\d\d\d\d)-(\d\d\d\d)-(\d+h)-v(\d+)-?(.*)$"
59
+ pattern = r"^(\w+)-([\w-]+)-(\w+)-(\w+)-(\d\d\d\d)-(\d\d\d\d)-(\d+h)-v(\d+)-?([a-zA-Z0-9-]+)$"
60
60
  match = re.match(pattern, name)
61
61
 
62
62
  assert match, (name, pattern)
@@ -136,18 +136,19 @@ class StatisticsValueError(ValueError):
136
136
  pass
137
137
 
138
138
 
139
- def check_data_values(arr, *, name: str, log=[], allow_nan=False):
140
- if allow_nan is False:
141
- allow_nan = lambda x: False # noqa: E731
139
+ def check_data_values(arr, *, name: str, log=[], allow_nans=False):
142
140
 
143
- if allow_nan(name):
141
+ if (isinstance(allow_nans, (set, list, tuple, dict)) and name in allow_nans) or allow_nans:
144
142
  arr = arr[~np.isnan(arr)]
145
143
 
144
+ assert arr.size > 0, (name, *log)
145
+
146
146
  min, max = arr.min(), arr.max()
147
147
  assert not (np.isnan(arr).any()), (name, min, max, *log)
148
148
 
149
149
  if min == 9999.0:
150
150
  warnings.warn(f"Min value 9999 for {name}")
151
+
151
152
  if max == 9999.0:
152
153
  warnings.warn(f"Max value 9999 for {name}")
153
154
 
@@ -57,7 +57,7 @@ class ChunkFilter:
57
57
  if not parts:
58
58
  warnings.warn(f"Nothing to do for chunk {i}/{n}.")
59
59
 
60
- LOG.info(f"Running parts: {parts}")
60
+ LOG.debug(f"Running parts: {parts}")
61
61
 
62
62
  self.allowed = parts
63
63
 
@@ -12,10 +12,10 @@ import os
12
12
  from copy import deepcopy
13
13
 
14
14
  import yaml
15
+ from anemoi.utils.config import DotDict
16
+ from anemoi.utils.config import load_any_dict_format
15
17
  from earthkit.data.core.order import normalize_order_by
16
18
 
17
- from .utils import load_json_or_yaml
18
-
19
19
  LOG = logging.getLogger(__name__)
20
20
 
21
21
 
@@ -43,31 +43,10 @@ def check_dict_value_and_set(dic, key, value):
43
43
  if dic[key] == value:
44
44
  return
45
45
  raise ValueError(f"Cannot use {key}={dic[key]}. Must use {value}.")
46
- print(f"Setting {key}={value} in config")
46
+ LOG.info(f"Setting {key}={value} in config")
47
47
  dic[key] = value
48
48
 
49
49
 
50
- class DictObj(dict):
51
- def __init__(self, *args, **kwargs):
52
- super().__init__(*args, **kwargs)
53
- for key, value in self.items():
54
- if isinstance(value, dict):
55
- self[key] = DictObj(value)
56
- continue
57
- if isinstance(value, list):
58
- self[key] = [DictObj(item) if isinstance(item, dict) else item for item in value]
59
- continue
60
-
61
- def __getattr__(self, attr):
62
- try:
63
- return self[attr]
64
- except KeyError:
65
- raise AttributeError(attr)
66
-
67
- def __setattr__(self, attr, value):
68
- self[attr] = value
69
-
70
-
71
50
  def resolve_includes(config):
72
51
  if isinstance(config, list):
73
52
  return [resolve_includes(c) for c in config]
@@ -79,11 +58,11 @@ def resolve_includes(config):
79
58
  return config
80
59
 
81
60
 
82
- class Config(DictObj):
61
+ class Config(DotDict):
83
62
  def __init__(self, config=None, **kwargs):
84
63
  if isinstance(config, str):
85
64
  self.config_path = os.path.realpath(config)
86
- config = load_json_or_yaml(config)
65
+ config = load_any_dict_format(config)
87
66
  else:
88
67
  config = deepcopy(config if config is not None else {})
89
68
  config = resolve_includes(config)
@@ -26,15 +26,23 @@ class RenamedFieldMapping:
26
26
  self.what = what
27
27
  self.renaming = renaming
28
28
 
29
- def metadata(self, key, **kwargs):
29
+ def metadata(self, key=None, **kwargs):
30
+ if key is None:
31
+ return self.field.metadata(**kwargs)
32
+
30
33
  value = self.field.metadata(key, **kwargs)
31
34
  if key == self.what:
32
35
  return self.renaming.get(value, value)
36
+
33
37
  return value
34
38
 
35
39
  def __getattr__(self, name):
36
40
  return getattr(self.field, name)
37
41
 
42
+ def __repr__(self) -> str:
43
+ return repr(self.field)
44
+ return f"{self.field} -> {self.what} -> {self.renaming}"
45
+
38
46
 
39
47
  class RenamedFieldFormat:
40
48
  """Rename a field based on a format string.
@@ -9,6 +9,8 @@
9
9
 
10
10
  from collections import defaultdict
11
11
 
12
+ import tqdm
13
+ from anemoi.utils.humanize import plural
12
14
  from earthkit.data.indexing.fieldlist import FieldArray
13
15
  from earthkit.geo.rotate import rotate_vector
14
16
 
@@ -24,6 +26,9 @@ class NewDataField:
24
26
  def __getattr__(self, name):
25
27
  return getattr(self.field, name)
26
28
 
29
+ def __repr__(self) -> str:
30
+ return repr(self.field)
31
+
27
32
 
28
33
  def execute(
29
34
  context,
@@ -35,6 +40,8 @@ def execute(
35
40
  ):
36
41
  from pyproj import CRS
37
42
 
43
+ context.trace("🔄", "Rotating winds (extracting winds from ", plural(len(input), "field"))
44
+
38
45
  result = FieldArray()
39
46
 
40
47
  wind_params = (x_wind, y_wind)
@@ -55,7 +62,9 @@ def execute(
55
62
 
56
63
  wind_pairs[key][param] = f
57
64
 
58
- for _, pairs in wind_pairs.items():
65
+ context.trace("🔄", "Rotating", plural(len(wind_pairs), "wind"), "(speed will likely include data download)")
66
+
67
+ for _, pairs in tqdm.tqdm(list(wind_pairs.items())):
59
68
  if len(pairs) != 2:
60
69
  raise ValueError("Missing wind component")
61
70
 
@@ -6,3 +6,42 @@
6
6
  # granted to it by virtue of its status as an intergovernmental organisation
7
7
  # nor does it submit to any jurisdiction.
8
8
  #
9
+
10
+ import glob
11
+ import logging
12
+
13
+ from earthkit.data.utils.patterns import Pattern
14
+
15
+ LOG = logging.getLogger(__name__)
16
+
17
+
18
+ def _expand(paths):
19
+ for path in paths:
20
+ if path.startswith("file://"):
21
+ path = path[7:]
22
+
23
+ if path.startswith("http://"):
24
+ yield path
25
+ continue
26
+
27
+ if path.startswith("https://"):
28
+ yield path
29
+ continue
30
+
31
+ cnt = 0
32
+ for p in glob.glob(path):
33
+ yield p
34
+ cnt += 1
35
+ if cnt == 0:
36
+ yield path
37
+
38
+
39
+ def iterate_patterns(path, dates, **kwargs):
40
+ given_paths = path if isinstance(path, list) else [path]
41
+
42
+ dates = [d.isoformat() for d in dates]
43
+
44
+ for path in given_paths:
45
+ paths = Pattern(path, ignore_missing_keys=True).substitute(date=dates, **kwargs)
46
+ for path in _expand(paths):
47
+ yield path, dates