anemoi-datasets 0.3.6__py3-none-any.whl → 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,6 +9,7 @@ from ._version import __version__
9
9
  from .data import MissingDateError
10
10
  from .data import add_dataset_path
11
11
  from .data import add_named_dataset
12
+ from .data import list_dataset_names
12
13
  from .data import open_dataset
13
14
 
14
15
  __all__ = [
@@ -16,4 +17,5 @@ __all__ = [
16
17
  "MissingDateError",
17
18
  "add_dataset_path",
18
19
  "add_named_dataset",
20
+ "list_dataset_names",
19
21
  ]
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '0.3.6'
16
- __version_tuple__ = version_tuple = (0, 3, 6)
15
+ __version__ = version = '0.3.7'
16
+ __version_tuple__ = version_tuple = (0, 3, 7)
@@ -12,6 +12,8 @@ from concurrent.futures import ThreadPoolExecutor
12
12
  from concurrent.futures import as_completed
13
13
 
14
14
  import tqdm
15
+ from anemoi.utils.s3 import download
16
+ from anemoi.utils.s3 import upload
15
17
 
16
18
  from . import Command
17
19
 
@@ -22,26 +24,48 @@ try:
22
24
  except AttributeError:
23
25
  isatty = False
24
26
 
25
- """
26
27
 
27
- ~/.aws/credentials
28
+ class S3Downloader:
29
+ def __init__(self, source, target, transfers, overwrite, resume, progress, **kwargs):
30
+ self.source = source
31
+ self.target = target
32
+ self.transfers = transfers
33
+ self.overwrite = overwrite
34
+ self.resume = resume
35
+ self.progress = progress
28
36
 
29
- [default]
30
- endpoint_url = https://object-store.os-api.cci1.ecmwf.int
31
- aws_access_key_id=xxx
32
- aws_secret_access_key=xxxx
37
+ def run(self):
38
+ download(
39
+ self.source + "/" if not self.source.endswith("/") else self.source,
40
+ self.target,
41
+ overwrite=self.overwrite,
42
+ ignore_existing=self.resume,
43
+ threads=self.transfers,
44
+ show_progress=self.progress,
45
+ )
33
46
 
34
- Then:
35
47
 
36
- anemoi-datasets copy aifs-ea-an-oper-0001-mars-o96-1979-2022-1h-v3.zarr/
37
- s3://ml-datasets/stable/aifs-ea-an-oper-0001-mars-o96-1979-2022-1h-v3.zarr
48
+ class S3Uploader:
49
+ def __init__(self, source, target, transfers, overwrite, resume, progress, **kwargs):
50
+ self.source = source
51
+ self.target = target
52
+ self.transfers = transfers
53
+ self.overwrite = overwrite
54
+ self.resume = resume
55
+ self.progress = progress
38
56
 
39
- zinfo https://object-store.os-api.cci1.ecmwf.int/
40
- ml-datasets/stable/aifs-ea-an-oper-0001-mars-o96-1979-2022-1h-v3.zarr
41
- """
57
+ def run(self):
58
+ upload(
59
+ self.source,
60
+ self.target,
61
+ overwrite=self.overwrite,
62
+ ignore_existing=self.resume,
63
+ threads=self.transfers,
64
+ show_progress=self.progress,
65
+ )
42
66
 
43
67
 
44
- class Copier:
68
+ class DefaultCopier:
45
69
  def __init__(self, source, target, transfers, block_size, overwrite, resume, progress, nested, rechunk, **kwargs):
46
70
  self.source = source
47
71
  self.target = target
@@ -295,7 +319,29 @@ class CopyMixin:
295
319
  command_parser.add_argument("target", help="Target location.")
296
320
 
297
321
  def run(self, args):
298
- Copier(**vars(args)).run()
322
+ if args.source == args.target:
323
+ raise ValueError("Source and target are the same.")
324
+
325
+ kwargs = vars(args)
326
+
327
+ if args.overwrite and args.resume:
328
+ raise ValueError("Cannot use --overwrite and --resume together.")
329
+
330
+ source_in_s3 = args.source.startswith("s3://")
331
+ target_in_s3 = args.target.startswith("s3://")
332
+
333
+ copier = None
334
+
335
+ if args.rechunk or (source_in_s3 and target_in_s3):
336
+ copier = DefaultCopier(**kwargs)
337
+ else:
338
+ if source_in_s3:
339
+ copier = S3Downloader(**kwargs)
340
+
341
+ if target_in_s3:
342
+ copier = S3Uploader(**kwargs)
343
+
344
+ copier.run()
299
345
 
300
346
 
301
347
  class Copy(CopyMixin, Command):
@@ -30,3 +30,10 @@ def open_dataset(*args, **kwargs):
30
30
  ds.arguments = {"args": args, "kwargs": kwargs}
31
31
  ds._check()
32
32
  return ds
33
+
34
+
35
+ def list_dataset_names(*args, **kwargs):
36
+ ds = _open_dataset(*args, **kwargs)
37
+ names = set()
38
+ ds.get_dataset_names(names)
39
+ return sorted(names)
@@ -226,3 +226,6 @@ class Dataset:
226
226
  @property
227
227
  def label(self):
228
228
  return self.__class__.__name__.lower()
229
+
230
+ def get_dataset_names(self, names):
231
+ raise NotImplementedError(self)
@@ -103,6 +103,9 @@ class Forwards(Dataset):
103
103
  f"subclass_metadata_specific() must be implemented in derived class {self.__class__.__name__}"
104
104
  )
105
105
 
106
+ def get_dataset_names(self, names):
107
+ self.forward.get_dataset_names(names)
108
+
106
109
 
107
110
  class Combined(Forwards):
108
111
  def __init__(self, datasets):
@@ -193,6 +196,10 @@ class Combined(Forwards):
193
196
  offset += len(d)
194
197
  return result
195
198
 
199
+ def get_dataset_names(self, names):
200
+ for d in self.datasets:
201
+ d.get_dataset_names(names)
202
+
196
203
 
197
204
  class GivenAxis(Combined):
198
205
  """Given a given axis, combine the datasets along that axis."""
@@ -70,8 +70,12 @@ class Thinning(Masked):
70
70
  self.thinning = thinning
71
71
  self.method = method
72
72
 
73
- latitudes = forward.latitudes.reshape(forward.field_shape)
74
- longitudes = forward.longitudes.reshape(forward.field_shape)
73
+ shape = forward.field_shape
74
+ if len(shape) != 2:
75
+ raise ValueError("Thinning only works latitude/longitude fields")
76
+
77
+ latitudes = forward.latitudes.reshape(shape)
78
+ longitudes = forward.longitudes.reshape(shape)
75
79
  latitudes = latitudes[::thinning, ::thinning].flatten()
76
80
  longitudes = longitudes[::thinning, ::thinning].flatten()
77
81
 
@@ -39,3 +39,7 @@ class Statistics(Forwards):
39
39
 
40
40
  def tree(self):
41
41
  return Node(self, [self.forward.tree()])
42
+
43
+ def get_dataset_names(self, names):
44
+ super().get_dataset_names(names)
45
+ self._statistic.get_dataset_names(names)
@@ -6,6 +6,7 @@
6
6
  # nor does it submit to any jurisdiction.
7
7
 
8
8
  import logging
9
+ import os
9
10
  import warnings
10
11
  from functools import cached_property
11
12
 
@@ -235,7 +236,11 @@ class Zarr(Dataset):
235
236
 
236
237
  @property
237
238
  def field_shape(self):
238
- return tuple(self.z.attrs["field_shape"])
239
+ try:
240
+ return tuple(self.z.attrs["field_shape"])
241
+ except KeyError:
242
+ LOG.warning("No 'field_shape' in %r, assuming 1D fields", self)
243
+ return (self.shape[-1],)
239
244
 
240
245
  @property
241
246
  def frequency(self):
@@ -288,6 +293,10 @@ class Zarr(Dataset):
288
293
  def tree(self):
289
294
  return Node(self, [], path=self.path)
290
295
 
296
+ def get_dataset_names(self, names):
297
+ name, _ = os.path.splitext(os.path.basename(self.path))
298
+ names.add(name)
299
+
291
300
 
292
301
  class ZarrWithMissingDates(Zarr):
293
302
  def __init__(self, path):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: anemoi-datasets
3
- Version: 0.3.6
3
+ Version: 0.3.7
4
4
  Summary: A package to hold various functions to support training of ML models on ECMWF data.
5
5
  Author-email: "European Centre for Medium-Range Weather Forecasts (ECMWF)" <software.support@ecmwf.int>
6
6
  License: Apache License
@@ -223,7 +223,7 @@ Classifier: Programming Language :: Python :: Implementation :: CPython
223
223
  Classifier: Programming Language :: Python :: Implementation :: PyPy
224
224
  Requires-Python: >=3.9
225
225
  License-File: LICENSE
226
- Requires-Dist: anemoi-utils[provenance] >=0.3
226
+ Requires-Dist: anemoi-utils[provenance] >=0.3.4
227
227
  Requires-Dist: numpy
228
228
  Requires-Dist: pyyaml
229
229
  Requires-Dist: semantic-version
@@ -1,10 +1,10 @@
1
- anemoi/datasets/__init__.py,sha256=DC7ttKT--pmhBQALX_Cn7P28dngsJucKi5y-Ydm28QM,700
1
+ anemoi/datasets/__init__.py,sha256=50-v6XPwmNE4LK9PVmyWu3v42F38EhxShd2EaXt1IIA,763
2
2
  anemoi/datasets/__main__.py,sha256=cLA2PidDTOUHaDGzd0_E5iioKYNe-PSTv567Y2fuwQk,723
3
- anemoi/datasets/_version.py,sha256=IKAQ4gPrCQ2FWMXOFRqouULC2EQI1zCb4iXHsnfbmTQ,411
3
+ anemoi/datasets/_version.py,sha256=vVN20516E2VTC9JNgtvqrQNlj5XptaB_a5z2XL8NFxg,411
4
4
  anemoi/datasets/grids.py,sha256=3YBMMJodgYhavarXPAlMZHaMtDT9v2IbTmAXZTqf8Qo,8481
5
5
  anemoi/datasets/commands/__init__.py,sha256=qAybFZPBBQs0dyx7dZ3X5JsLpE90pwrqt1vSV7cqEIw,706
6
6
  anemoi/datasets/commands/compare.py,sha256=p2jQOAC3JhScCLF0GjTCO8goYLWLN8p7vzy_gf5fFcI,1473
7
- anemoi/datasets/commands/copy.py,sha256=fba-zjD0iTHHXHhPEcm8VhDzsXQXDUxlbtTA1TovyT0,9991
7
+ anemoi/datasets/commands/copy.py,sha256=CHIQuzHUs0uezU9c6jJry6yhilYeGn1wmIubGYatEaY,11450
8
8
  anemoi/datasets/commands/create.py,sha256=POdOsVDlvRrHFFkI3SNXNgNIbSxkVUUPMoo660x7Ma0,987
9
9
  anemoi/datasets/commands/inspect.py,sha256=G3fzcgiLaU8jln7GKvgamN7Y06-qC_JnFw2SbNn1_E4,18646
10
10
  anemoi/datasets/commands/scan.py,sha256=HxsLdCgBMSdEXjlJfPq5M_9LxXHHQIoZ1ZEHO_AoPgA,2881
@@ -45,29 +45,29 @@ anemoi/datasets/create/functions/sources/source.py,sha256=hPQnV_6UIxFw97uRKcTA8T
45
45
  anemoi/datasets/create/functions/sources/tendencies.py,sha256=kwS_GZt8R9kpfs5RrvxPb0Gj-5nDP0sgJgfSRCAwwww,4057
46
46
  anemoi/datasets/create/statistics/__init__.py,sha256=X50drgE-ltuNe7bSIyvyeC4GeTqGTQGbglh2-2aVWKE,15445
47
47
  anemoi/datasets/create/statistics/summary.py,sha256=sgmhA24y3VRyjmDUgTnPIqcHSlWBbFA0qynx6gJ9Xw8,3370
48
- anemoi/datasets/data/__init__.py,sha256=tacn6K_VZ-pYhLmGePG5sze8kmqGpqscYb-bMyQnWtk,888
48
+ anemoi/datasets/data/__init__.py,sha256=to9L_RZVQ4OgyHUpX6lcvt4GqJdZjBa5HCTaWx1aGKo,1046
49
49
  anemoi/datasets/data/concat.py,sha256=AkpyOs16OjW7X0cdyYFQfWSCV6dteXBp-x9WlokO-DI,3550
50
- anemoi/datasets/data/dataset.py,sha256=UDnidq2amyCT2COH05pGfDCJcmkdMj1ubtHk9cl-qcE,7384
50
+ anemoi/datasets/data/dataset.py,sha256=LBUwWhwcAcovLv0FOLT-rA-yNZhcBFUMc03BfVh7UFc,7465
51
51
  anemoi/datasets/data/debug.css,sha256=z2X_ZDSnZ9C3pyZPWnQiEyAxuMxUaxJxET4oaCImTAQ,211
52
52
  anemoi/datasets/data/debug.py,sha256=PcyrjgxaLzeb_vf12pvUtPPVvBRHNm1SimythZvqsP4,6303
53
53
  anemoi/datasets/data/ensemble.py,sha256=AsP7Xx0ZHLoZs6a4EC0jtyGYIcOvZvvKXhgNsIvqIN8,1137
54
- anemoi/datasets/data/forwards.py,sha256=t9YQCN7j75VMInt0uP9JUJoh1klF1Z1xnwy5_kDMDQs,7700
54
+ anemoi/datasets/data/forwards.py,sha256=UZOOMUblGS21aaPoFfQa0ONSUaxkqlZQF3KGRhlCr9I,7899
55
55
  anemoi/datasets/data/grids.py,sha256=rooOeR6rvjl4U8B4LO3N23fcgxvGE7ZUmhVryk1QS4M,7493
56
56
  anemoi/datasets/data/indexing.py,sha256=625m__JG5m_tDMrkz1hB6Vydenwt0oHuyAlc-o3Zwos,4799
57
57
  anemoi/datasets/data/join.py,sha256=dtCBbMTicqrRPxfBULi3RwEcQBLhQpIcvCjdN5A3XUU,4892
58
- anemoi/datasets/data/masked.py,sha256=Fzkehyka70CiS0LYSy_uyVYu2gKLwDSxlbm8GiC_pYs,3742
58
+ anemoi/datasets/data/masked.py,sha256=czAv1ZfZ9q6Wr4RqI2Xj8SEm7yoCgJrwMl-CPDs_wSI,3857
59
59
  anemoi/datasets/data/misc.py,sha256=cu2rMNtq8M8yzcR1CGHHQksE0p9C2SLVigOTMH6ilMs,10400
60
60
  anemoi/datasets/data/select.py,sha256=U3AEid80mrJKu0SF4lLc-bRWMVcAZwHNUHUHRehvuHU,3680
61
- anemoi/datasets/data/statistics.py,sha256=PKRgcCiZEb1HjkIveVGhE3TzUy9Qe3AYWGFD72Urah8,1514
62
- anemoi/datasets/data/stores.py,sha256=oEjUra0zzIysiUvh-RBQRzcbviggejEQiMO5RfpjPyM,10896
61
+ anemoi/datasets/data/statistics.py,sha256=lZCcKw9s7ttMBEp6ANyxtbXoZZvchhE7SClq-D4AUR8,1645
62
+ anemoi/datasets/data/stores.py,sha256=yy914zMHIYKm5q6mHOqGeK0dC_26VFeqKLXyb7x9NXE,11190
63
63
  anemoi/datasets/data/subset.py,sha256=9urVTXdnwCgqn0_BRYquMi8oiXn4ubAf0n4586hWfKw,3814
64
64
  anemoi/datasets/data/unchecked.py,sha256=xhdMg-ToI1UfBWHNsWyn1y2meZWngZtHx-33L0KqKp8,4037
65
65
  anemoi/datasets/dates/__init__.py,sha256=4ItowfLLh90T8L_JOjtv98lE6M7gAaWt7dV3niUrFvk,4473
66
66
  anemoi/datasets/dates/groups.py,sha256=iq310Pi7ullglOhcNblv14MmcT8FPgYCD5s45qAfV_s,3383
67
67
  anemoi/datasets/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
68
- anemoi_datasets-0.3.6.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
69
- anemoi_datasets-0.3.6.dist-info/METADATA,sha256=sEnXtS3eh-ix9Z1aR8RFDqBlnmK-XE6A42ga2uE_Utc,16017
70
- anemoi_datasets-0.3.6.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
71
- anemoi_datasets-0.3.6.dist-info/entry_points.txt,sha256=yR-o-4uiPEA_GLBL81SkMYnUoxq3CAV3hHulQiRtGG0,66
72
- anemoi_datasets-0.3.6.dist-info/top_level.txt,sha256=DYn8VPs-fNwr7fNH9XIBqeXIwiYYd2E2k5-dUFFqUz0,7
73
- anemoi_datasets-0.3.6.dist-info/RECORD,,
68
+ anemoi_datasets-0.3.7.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
69
+ anemoi_datasets-0.3.7.dist-info/METADATA,sha256=MviDtSJZZDW9XL00ALGMtwUjOJzfozG1wrxzq4tb3DA,16019
70
+ anemoi_datasets-0.3.7.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
71
+ anemoi_datasets-0.3.7.dist-info/entry_points.txt,sha256=yR-o-4uiPEA_GLBL81SkMYnUoxq3CAV3hHulQiRtGG0,66
72
+ anemoi_datasets-0.3.7.dist-info/top_level.txt,sha256=DYn8VPs-fNwr7fNH9XIBqeXIwiYYd2E2k5-dUFFqUz0,7
73
+ anemoi_datasets-0.3.7.dist-info/RECORD,,