anemoi-datasets 0.4.4__py3-none-any.whl → 0.4.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. anemoi/datasets/_version.py +2 -2
  2. anemoi/datasets/commands/cleanup.py +44 -0
  3. anemoi/datasets/commands/create.py +50 -20
  4. anemoi/datasets/commands/finalise-additions.py +45 -0
  5. anemoi/datasets/commands/finalise.py +39 -0
  6. anemoi/datasets/commands/init-additions.py +45 -0
  7. anemoi/datasets/commands/init.py +67 -0
  8. anemoi/datasets/commands/inspect.py +1 -1
  9. anemoi/datasets/commands/load-additions.py +47 -0
  10. anemoi/datasets/commands/load.py +47 -0
  11. anemoi/datasets/commands/patch.py +39 -0
  12. anemoi/datasets/create/__init__.py +961 -146
  13. anemoi/datasets/create/check.py +5 -3
  14. anemoi/datasets/create/config.py +53 -2
  15. anemoi/datasets/create/functions/sources/xarray/__init__.py +12 -2
  16. anemoi/datasets/create/functions/sources/xarray/coordinates.py +7 -0
  17. anemoi/datasets/create/functions/sources/xarray/field.py +1 -1
  18. anemoi/datasets/create/functions/sources/xarray/fieldlist.py +0 -2
  19. anemoi/datasets/create/functions/sources/xarray/flavour.py +21 -1
  20. anemoi/datasets/create/functions/sources/xarray/metadata.py +27 -29
  21. anemoi/datasets/create/functions/sources/xarray/time.py +63 -30
  22. anemoi/datasets/create/functions/sources/xarray/variable.py +15 -38
  23. anemoi/datasets/create/input.py +23 -22
  24. anemoi/datasets/create/statistics/__init__.py +39 -23
  25. anemoi/datasets/create/utils.py +3 -2
  26. anemoi/datasets/data/__init__.py +1 -0
  27. anemoi/datasets/data/concat.py +46 -2
  28. anemoi/datasets/data/dataset.py +109 -34
  29. anemoi/datasets/data/forwards.py +17 -8
  30. anemoi/datasets/data/grids.py +17 -3
  31. anemoi/datasets/data/interpolate.py +133 -0
  32. anemoi/datasets/data/misc.py +56 -66
  33. anemoi/datasets/data/missing.py +240 -0
  34. anemoi/datasets/data/select.py +7 -1
  35. anemoi/datasets/data/stores.py +3 -3
  36. anemoi/datasets/data/subset.py +47 -5
  37. anemoi/datasets/data/unchecked.py +20 -22
  38. anemoi/datasets/data/xy.py +125 -0
  39. anemoi/datasets/dates/__init__.py +13 -66
  40. anemoi/datasets/dates/groups.py +2 -2
  41. anemoi/datasets/grids.py +66 -48
  42. {anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.4.5.dist-info}/METADATA +5 -5
  43. {anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.4.5.dist-info}/RECORD +47 -37
  44. {anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.4.5.dist-info}/WHEEL +1 -1
  45. anemoi/datasets/create/loaders.py +0 -936
  46. {anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.4.5.dist-info}/LICENSE +0 -0
  47. {anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.4.5.dist-info}/entry_points.txt +0 -0
  48. {anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.4.5.dist-info}/top_level.txt +0 -0
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '0.4.4'
16
- __version_tuple__ = version_tuple = (0, 4, 4)
15
+ __version__ = version = '0.4.5'
16
+ __version_tuple__ = version_tuple = (0, 4, 5)
@@ -0,0 +1,44 @@
1
+ import logging
2
+ import time
3
+
4
+ from anemoi.utils.humanize import seconds_to_human
5
+
6
+ from anemoi.datasets.commands.create import task
7
+
8
+ from . import Command
9
+
10
+ LOG = logging.getLogger(__name__)
11
+
12
+
13
+ class Cleanup(Command):
14
+ """Create a dataset, step by step."""
15
+
16
+ internal = True
17
+ timestamp = True
18
+
19
+ def add_arguments(self, subparser):
20
+ subparser.add_argument("path", help="Path to store the created data.")
21
+ subparser.add_argument(
22
+ "--delta",
23
+ help="Compute statistics tendencies on a given time delta, if possible. Must be a multiple of the frequency.",
24
+ nargs="+",
25
+ )
26
+
27
+ def run(self, args):
28
+ options = vars(args)
29
+ options.pop("command")
30
+ now = time.time()
31
+ step = self.__class__.__name__.lower()
32
+
33
+ if "version" in options:
34
+ options.pop("version")
35
+
36
+ if "debug" in options:
37
+ options.pop("debug")
38
+
39
+ task(step, options)
40
+
41
+ LOG.info(f"Create step '{step}' completed in {seconds_to_human(time.time()-now)}")
42
+
43
+
44
+ command = Cleanup
@@ -8,8 +8,6 @@ from concurrent.futures import as_completed
8
8
  import tqdm
9
9
  from anemoi.utils.humanize import seconds_to_human
10
10
 
11
- from anemoi.datasets.create.trace import enable_trace
12
-
13
11
  from . import Command
14
12
 
15
13
  LOG = logging.getLogger(__name__)
@@ -21,15 +19,14 @@ def task(what, options, *args, **kwargs):
21
19
  """
22
20
 
23
21
  now = datetime.datetime.now()
24
- LOG.debug(f"Task {what}({args},{kwargs}) starting")
22
+ LOG.info(f"Task {what}({args},{kwargs}) starting")
25
23
 
26
- from anemoi.datasets.create import Creator
24
+ from anemoi.datasets.create import creator_factory
27
25
 
28
- if "trace" in options:
29
- enable_trace(options["trace"])
26
+ options = {k: v for k, v in options.items() if v is not None}
30
27
 
31
- c = Creator(**options)
32
- result = getattr(c, what)(*args, **kwargs)
28
+ c = creator_factory(what.replace("-", "_"), **options)
29
+ result = c.run()
33
30
 
34
31
  LOG.debug(f"Task {what}({args},{kwargs}) completed ({datetime.datetime.now()-now})")
35
32
  return result
@@ -68,11 +65,23 @@ class Create(Command):
68
65
  LOG.info(f"Create completed in {seconds_to_human(time.time()-now)}")
69
66
 
70
67
  def serial_create(self, args):
71
- from anemoi.datasets.create import Creator
72
68
 
73
69
  options = vars(args)
74
- c = Creator(**options)
75
- c.create()
70
+ options.pop("command")
71
+ options.pop("threads")
72
+ options.pop("processes")
73
+
74
+ task("init", options)
75
+ task("load", options)
76
+ task("finalise", options)
77
+
78
+ task("patch", options)
79
+
80
+ task("init_additions", options)
81
+ task("run_additions", options)
82
+ task("finalise_additions", options)
83
+ task("cleanup", options)
84
+ task("verify", options)
76
85
 
77
86
  def parallel_create(self, args):
78
87
  """Some modules, like fsspec do not work well with fork()
@@ -82,10 +91,15 @@ class Create(Command):
82
91
  """
83
92
 
84
93
  options = vars(args)
85
- parallel = args.threads + args.processes
86
- args.use_threads = args.threads > 0
94
+ options.pop("command")
87
95
 
88
- if args.use_threads:
96
+ threads = options.pop("threads")
97
+ processes = options.pop("processes")
98
+
99
+ use_threads = threads > 0
100
+ options["use_threads"] = use_threads
101
+
102
+ if use_threads:
89
103
  ExecutorClass = ThreadPoolExecutor
90
104
  else:
91
105
  ExecutorClass = ProcessPoolExecutor
@@ -95,18 +109,34 @@ class Create(Command):
95
109
 
96
110
  futures = []
97
111
 
112
+ parallel = threads + processes
113
+ with ExecutorClass(max_workers=parallel) as executor:
114
+ for n in range(total):
115
+ opt = options.copy()
116
+ opt["parts"] = f"{n+1}/{total}"
117
+ futures.append(executor.submit(task, "load", opt))
118
+
119
+ for future in tqdm.tqdm(as_completed(futures), desc="Loading", total=len(futures), colour="green", position=parallel + 1): # fmt: skip
120
+ future.result()
121
+
122
+ with ExecutorClass(max_workers=1) as executor:
123
+ executor.submit(task, "finalise", options).result()
124
+
125
+ with ExecutorClass(max_workers=1) as executor:
126
+ executor.submit(task, "init-additions", options).result()
127
+
98
128
  with ExecutorClass(max_workers=parallel) as executor:
129
+ opt = options.copy()
130
+ opt["parts"] = f"{n+1}/{total}"
131
+ futures.append(executor.submit(task, "load", opt))
99
132
  for n in range(total):
100
- futures.append(executor.submit(task, "load", options, parts=f"{n+1}/{total}"))
133
+ futures.append(executor.submit(task, "load-additions", opt))
101
134
 
102
- for future in tqdm.tqdm(
103
- as_completed(futures), desc="Loading", total=len(futures), colour="green", position=parallel + 1
104
- ):
135
+ for future in tqdm.tqdm(as_completed(futures), desc="Computing additions", total=len(futures), colour="green", position=parallel + 1): # fmt: skip
105
136
  future.result()
106
137
 
107
138
  with ExecutorClass(max_workers=1) as executor:
108
- executor.submit(task, "statistics", options).result()
109
- executor.submit(task, "additions", options).result()
139
+ executor.submit(task, "finalise-additions", options).result()
110
140
  executor.submit(task, "cleanup", options).result()
111
141
  executor.submit(task, "verify", options).result()
112
142
 
@@ -0,0 +1,45 @@
1
+ import logging
2
+ import time
3
+
4
+ from anemoi.utils.humanize import seconds_to_human
5
+
6
+ from anemoi.datasets.commands.create import task
7
+
8
+ from . import Command
9
+
10
+ LOG = logging.getLogger(__name__)
11
+
12
+
13
+ class FinaliseAdditions(Command):
14
+ """Create a dataset, step by step."""
15
+
16
+ internal = True
17
+ timestamp = True
18
+
19
+ def add_arguments(self, command_parser):
20
+ command_parser.add_argument(
21
+ "--delta",
22
+ help="Compute statistics tendencies on a given time delta, if possible. Must be a multiple of the frequency.",
23
+ nargs="+",
24
+ )
25
+
26
+ command_parser.add_argument("path", help="Path to store the created data.")
27
+ command_parser.add_argument("--trace", action="store_true")
28
+
29
+ def run(self, args):
30
+ options = vars(args)
31
+ options.pop("command")
32
+ step = "finalise-additions"
33
+ now = time.time()
34
+
35
+ if "version" in options:
36
+ options.pop("version")
37
+
38
+ if "debug" in options:
39
+ options.pop("debug")
40
+ task(step, options)
41
+
42
+ LOG.info(f"Create step '{step}' completed in {seconds_to_human(time.time()-now)}")
43
+
44
+
45
+ command = FinaliseAdditions
@@ -0,0 +1,39 @@
1
+ import logging
2
+ import time
3
+
4
+ from anemoi.utils.humanize import seconds_to_human
5
+
6
+ from anemoi.datasets.commands.create import task
7
+
8
+ from . import Command
9
+
10
+ LOG = logging.getLogger(__name__)
11
+
12
+
13
+ class Finalise(Command):
14
+ """Create a dataset, step by step."""
15
+
16
+ internal = True
17
+ timestamp = True
18
+
19
+ def add_arguments(self, command_parser):
20
+ command_parser.add_argument("path", help="Path to store the created data.")
21
+ command_parser.add_argument("--trace", action="store_true")
22
+
23
+ def run(self, args):
24
+ options = vars(args)
25
+ options.pop("command")
26
+ now = time.time()
27
+ step = "finalise"
28
+
29
+ if "version" in options:
30
+ options.pop("version")
31
+
32
+ if "debug" in options:
33
+ options.pop("debug")
34
+ task(step, options)
35
+
36
+ LOG.info(f"Create step '{step}' completed in {seconds_to_human(time.time()-now)}")
37
+
38
+
39
+ command = Finalise
@@ -0,0 +1,45 @@
1
+ import logging
2
+ import time
3
+
4
+ from anemoi.utils.humanize import seconds_to_human
5
+
6
+ from anemoi.datasets.commands.create import task
7
+
8
+ from . import Command
9
+
10
+ LOG = logging.getLogger(__name__)
11
+
12
+
13
+ class InitAdditions(Command):
14
+ """Create a dataset, step by step."""
15
+
16
+ internal = True
17
+ timestamp = True
18
+
19
+ def add_arguments(self, command_parser):
20
+ command_parser.add_argument(
21
+ "--delta",
22
+ help="Compute statistics tendencies on a given time delta, if possible. Must be a multiple of the frequency.",
23
+ nargs="+",
24
+ )
25
+
26
+ command_parser.add_argument("path", help="Path to store the created data.")
27
+ command_parser.add_argument("--trace", action="store_true")
28
+
29
+ def run(self, args):
30
+ options = vars(args)
31
+ options.pop("command")
32
+ step = "init-additions"
33
+ now = time.time()
34
+
35
+ if "version" in options:
36
+ options.pop("version")
37
+
38
+ if "debug" in options:
39
+ options.pop("debug")
40
+ task(step, options)
41
+
42
+ LOG.info(f"Create step '{step}' completed in {seconds_to_human(time.time()-now)}")
43
+
44
+
45
+ command = InitAdditions
@@ -0,0 +1,67 @@
1
+ import logging
2
+ import time
3
+
4
+ from anemoi.utils.humanize import seconds_to_human
5
+
6
+ from anemoi.datasets.commands.create import task
7
+
8
+ from . import Command
9
+
10
+ LOG = logging.getLogger(__name__)
11
+
12
+
13
+ class Init(Command):
14
+ """Create a dataset, step by step."""
15
+
16
+ internal = True
17
+ timestamp = True
18
+
19
+ def add_arguments(self, subparser):
20
+
21
+ subparser.add_argument("config", help="Configuration yaml file defining the recipe to create the dataset.")
22
+ subparser.add_argument("path", help="Path to store the created data.")
23
+
24
+ subparser.add_argument(
25
+ "--overwrite",
26
+ action="store_true",
27
+ help="Overwrite existing files. This will delete the target dataset if it already exists.",
28
+ )
29
+ subparser.add_argument(
30
+ "--test",
31
+ action="store_true",
32
+ help="Build a small dataset, using only the first dates. And, when possible, using low resolution and less ensemble members.",
33
+ )
34
+ subparser.add_argument(
35
+ "--check-name",
36
+ dest="check_name",
37
+ action="store_true",
38
+ help="Check if the dataset name is valid before creating it.",
39
+ )
40
+ subparser.add_argument(
41
+ "--no-check-name",
42
+ dest="check_name",
43
+ action="store_false",
44
+ help="Do not check if the dataset name is valid before creating it.",
45
+ )
46
+ subparser.set_defaults(check_name=False)
47
+ subparser.add_argument("--cache", help="Location to store the downloaded data.", metavar="DIR")
48
+
49
+ subparser.add_argument("--trace", action="store_true")
50
+
51
+ def run(self, args):
52
+ options = vars(args)
53
+ options.pop("command")
54
+ now = time.time()
55
+
56
+ if "version" in options:
57
+ options.pop("version")
58
+
59
+ if "debug" in options:
60
+ options.pop("debug")
61
+
62
+ task("init", options)
63
+
64
+ LOG.info(f"Create step '{self.__class__.__name__.lower()}' completed in {seconds_to_human(time.time()-now)}")
65
+
66
+
67
+ command = Init
@@ -149,7 +149,7 @@ class Version:
149
149
  print()
150
150
  print(f'📅 Start : {self.first_date.strftime("%Y-%m-%d %H:%M")}')
151
151
  print(f'📅 End : {self.last_date.strftime("%Y-%m-%d %H:%M")}')
152
- print(f"⏰ Frequency : {self.frequency}h")
152
+ print(f"⏰ Frequency : {self.frequency}")
153
153
  if self.n_missing_dates is not None:
154
154
  print(f"🚫 Missing : {self.n_missing_dates:,}")
155
155
  print(f"🌎 Resolution : {self.resolution}")
@@ -0,0 +1,47 @@
1
+ import logging
2
+ import time
3
+
4
+ from anemoi.utils.humanize import seconds_to_human
5
+
6
+ from anemoi.datasets.commands.create import task
7
+
8
+ from . import Command
9
+
10
+ LOG = logging.getLogger(__name__)
11
+
12
+
13
+ class LoadAdditions(Command):
14
+ """Create a dataset, step by step."""
15
+
16
+ internal = True
17
+ timestamp = True
18
+
19
+ def add_arguments(self, subparser):
20
+
21
+ subparser.add_argument("--parts", nargs="+", help="Only load the specified parts of the dataset.")
22
+ subparser.add_argument(
23
+ "--delta",
24
+ help="Compute statistics tendencies on a given time delta, if possible. Must be a multiple of the frequency.",
25
+ nargs="+",
26
+ )
27
+
28
+ subparser.add_argument("path", help="Path to store the created data.")
29
+ subparser.add_argument("--trace", action="store_true")
30
+
31
+ def run(self, args):
32
+ options = vars(args)
33
+ options.pop("command")
34
+ now = time.time()
35
+ step = "load-additions"
36
+
37
+ if "version" in options:
38
+ options.pop("version")
39
+
40
+ if "debug" in options:
41
+ options.pop("debug")
42
+ task(step, options)
43
+
44
+ LOG.info(f"Create step '{step}' completed in {seconds_to_human(time.time()-now)}")
45
+
46
+
47
+ command = LoadAdditions
@@ -0,0 +1,47 @@
1
+ import logging
2
+ import time
3
+
4
+ from anemoi.utils.humanize import seconds_to_human
5
+
6
+ from anemoi.datasets.commands.create import task
7
+
8
+ from . import Command
9
+
10
+ LOG = logging.getLogger(__name__)
11
+
12
+
13
+ class Load(Command):
14
+ """Create a dataset, step by step."""
15
+
16
+ internal = True
17
+ timestamp = True
18
+
19
+ def add_arguments(self, subparser):
20
+
21
+ subparser.add_argument("--parts", nargs="+", help="Only load the specified parts of the dataset.")
22
+ # subparser.add_argument(
23
+ # "--delta",
24
+ # help="Compute statistics tendencies on a given time delta, if possible. Must be a multiple of the frequency.",
25
+ # )
26
+
27
+ subparser.add_argument("path", help="Path to store the created data.")
28
+ subparser.add_argument("--cache", help="Location to store the downloaded data.", metavar="DIR")
29
+ subparser.add_argument("--trace", action="store_true")
30
+
31
+ def run(self, args):
32
+ options = vars(args)
33
+ options.pop("command")
34
+ now = time.time()
35
+ step = "load"
36
+
37
+ if "version" in options:
38
+ options.pop("version")
39
+
40
+ if "debug" in options:
41
+ options.pop("debug")
42
+ task(step, options)
43
+
44
+ LOG.info(f"Create step '{step}' completed in {seconds_to_human(time.time()-now)}")
45
+
46
+
47
+ command = Load
@@ -0,0 +1,39 @@
1
+ import logging
2
+ import time
3
+
4
+ from anemoi.utils.humanize import seconds_to_human
5
+
6
+ from anemoi.datasets.commands.create import task
7
+
8
+ from . import Command
9
+
10
+ LOG = logging.getLogger(__name__)
11
+
12
+
13
+ class Patch(Command):
14
+ """Create a dataset, step by step."""
15
+
16
+ internal = True
17
+ timestamp = True
18
+
19
+ def add_arguments(self, parser):
20
+ parser.add_argument("path", help="Path to store the created data.")
21
+
22
+ def run(self, args):
23
+ options = vars(args)
24
+ options.pop("command")
25
+ now = time.time()
26
+ step = self.__class__.__name__.lower()
27
+
28
+ if "version" in options:
29
+ options.pop("version")
30
+
31
+ if "debug" in options:
32
+ options.pop("debug")
33
+
34
+ task(step, options)
35
+
36
+ LOG.info(f"Create step '{step}' completed in {seconds_to_human(time.time()-now)}")
37
+
38
+
39
+ command = Patch