anemoi-datasets 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. anemoi/datasets/_version.py +2 -2
  2. anemoi/datasets/commands/cleanup.py +44 -0
  3. anemoi/datasets/commands/create.py +52 -21
  4. anemoi/datasets/commands/finalise-additions.py +45 -0
  5. anemoi/datasets/commands/finalise.py +39 -0
  6. anemoi/datasets/commands/init-additions.py +45 -0
  7. anemoi/datasets/commands/init.py +67 -0
  8. anemoi/datasets/commands/inspect.py +1 -1
  9. anemoi/datasets/commands/load-additions.py +47 -0
  10. anemoi/datasets/commands/load.py +47 -0
  11. anemoi/datasets/commands/patch.py +39 -0
  12. anemoi/datasets/create/__init__.py +959 -146
  13. anemoi/datasets/create/check.py +5 -3
  14. anemoi/datasets/create/config.py +54 -2
  15. anemoi/datasets/create/functions/filters/pressure_level_relative_humidity_to_specific_humidity.py +57 -0
  16. anemoi/datasets/create/functions/filters/pressure_level_specific_humidity_to_relative_humidity.py +57 -0
  17. anemoi/datasets/create/functions/filters/single_level_dewpoint_to_relative_humidity.py +54 -0
  18. anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_dewpoint.py +59 -0
  19. anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_specific_humidity.py +115 -0
  20. anemoi/datasets/create/functions/filters/single_level_specific_humidity_to_relative_humidity.py +390 -0
  21. anemoi/datasets/create/functions/filters/speeddir_to_uv.py +77 -0
  22. anemoi/datasets/create/functions/filters/uv_to_speeddir.py +55 -0
  23. anemoi/datasets/create/functions/sources/grib.py +86 -1
  24. anemoi/datasets/create/functions/sources/hindcasts.py +14 -73
  25. anemoi/datasets/create/functions/sources/mars.py +9 -3
  26. anemoi/datasets/create/functions/sources/xarray/__init__.py +12 -2
  27. anemoi/datasets/create/functions/sources/xarray/coordinates.py +7 -0
  28. anemoi/datasets/create/functions/sources/xarray/field.py +8 -2
  29. anemoi/datasets/create/functions/sources/xarray/fieldlist.py +0 -2
  30. anemoi/datasets/create/functions/sources/xarray/flavour.py +21 -1
  31. anemoi/datasets/create/functions/sources/xarray/metadata.py +40 -40
  32. anemoi/datasets/create/functions/sources/xarray/time.py +63 -30
  33. anemoi/datasets/create/functions/sources/xarray/variable.py +15 -38
  34. anemoi/datasets/create/input.py +62 -39
  35. anemoi/datasets/create/persistent.py +1 -1
  36. anemoi/datasets/create/statistics/__init__.py +39 -23
  37. anemoi/datasets/create/utils.py +6 -2
  38. anemoi/datasets/data/__init__.py +1 -0
  39. anemoi/datasets/data/concat.py +46 -2
  40. anemoi/datasets/data/dataset.py +119 -34
  41. anemoi/datasets/data/debug.py +5 -1
  42. anemoi/datasets/data/forwards.py +17 -8
  43. anemoi/datasets/data/grids.py +17 -3
  44. anemoi/datasets/data/interpolate.py +133 -0
  45. anemoi/datasets/data/masked.py +2 -2
  46. anemoi/datasets/data/misc.py +56 -66
  47. anemoi/datasets/data/missing.py +240 -0
  48. anemoi/datasets/data/rescale.py +147 -0
  49. anemoi/datasets/data/select.py +7 -1
  50. anemoi/datasets/data/stores.py +23 -10
  51. anemoi/datasets/data/subset.py +47 -5
  52. anemoi/datasets/data/unchecked.py +20 -22
  53. anemoi/datasets/data/xy.py +125 -0
  54. anemoi/datasets/dates/__init__.py +124 -95
  55. anemoi/datasets/dates/groups.py +85 -20
  56. anemoi/datasets/grids.py +66 -48
  57. {anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.5.0.dist-info}/METADATA +8 -17
  58. anemoi_datasets-0.5.0.dist-info/RECORD +105 -0
  59. {anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.5.0.dist-info}/WHEEL +1 -1
  60. anemoi/datasets/create/loaders.py +0 -936
  61. anemoi_datasets-0.4.4.dist-info/RECORD +0 -86
  62. {anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.5.0.dist-info}/LICENSE +0 -0
  63. {anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.5.0.dist-info}/entry_points.txt +0 -0
  64. {anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.5.0.dist-info}/top_level.txt +0 -0
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '0.4.4'
16
- __version_tuple__ = version_tuple = (0, 4, 4)
15
+ __version__ = version = '0.5.0'
16
+ __version_tuple__ = version_tuple = (0, 5, 0)
@@ -0,0 +1,44 @@
1
+ import logging
2
+ import time
3
+
4
+ from anemoi.utils.humanize import seconds_to_human
5
+
6
+ from anemoi.datasets.commands.create import task
7
+
8
+ from . import Command
9
+
10
+ LOG = logging.getLogger(__name__)
11
+
12
+
13
+ class Cleanup(Command):
14
+ """Create a dataset, step by step."""
15
+
16
+ internal = True
17
+ timestamp = True
18
+
19
+ def add_arguments(self, subparser):
20
+ subparser.add_argument("path", help="Path to store the created data.")
21
+ subparser.add_argument(
22
+ "--delta",
23
+ help="Compute statistics tendencies on a given time delta, if possible. Must be a multiple of the frequency.",
24
+ nargs="+",
25
+ )
26
+
27
+ def run(self, args):
28
+ options = vars(args)
29
+ options.pop("command")
30
+ now = time.time()
31
+ step = self.__class__.__name__.lower()
32
+
33
+ if "version" in options:
34
+ options.pop("version")
35
+
36
+ if "debug" in options:
37
+ options.pop("debug")
38
+
39
+ task(step, options)
40
+
41
+ LOG.info(f"Create step '{step}' completed in {seconds_to_human(time.time()-now)}")
42
+
43
+
44
+ command = Cleanup
@@ -8,8 +8,6 @@ from concurrent.futures import as_completed
8
8
  import tqdm
9
9
  from anemoi.utils.humanize import seconds_to_human
10
10
 
11
- from anemoi.datasets.create.trace import enable_trace
12
-
13
11
  from . import Command
14
12
 
15
13
  LOG = logging.getLogger(__name__)
@@ -21,17 +19,16 @@ def task(what, options, *args, **kwargs):
21
19
  """
22
20
 
23
21
  now = datetime.datetime.now()
24
- LOG.debug(f"Task {what}({args},{kwargs}) starting")
22
+ LOG.info(f"🎬 Task {what}({args},{kwargs}) starting")
25
23
 
26
- from anemoi.datasets.create import Creator
24
+ from anemoi.datasets.create import creator_factory
27
25
 
28
- if "trace" in options:
29
- enable_trace(options["trace"])
26
+ options = {k: v for k, v in options.items() if v is not None}
30
27
 
31
- c = Creator(**options)
32
- result = getattr(c, what)(*args, **kwargs)
28
+ c = creator_factory(what.replace("-", "_"), **options)
29
+ result = c.run()
33
30
 
34
- LOG.debug(f"Task {what}({args},{kwargs}) completed ({datetime.datetime.now()-now})")
31
+ LOG.info(f"🏁 Task {what}({args},{kwargs}) completed ({datetime.datetime.now()-now})")
35
32
  return result
36
33
 
37
34
 
@@ -60,6 +57,7 @@ class Create(Command):
60
57
  command_parser.add_argument("--trace", action="store_true")
61
58
 
62
59
  def run(self, args):
60
+
63
61
  now = time.time()
64
62
  if args.threads + args.processes:
65
63
  self.parallel_create(args)
@@ -68,11 +66,23 @@ class Create(Command):
68
66
  LOG.info(f"Create completed in {seconds_to_human(time.time()-now)}")
69
67
 
70
68
  def serial_create(self, args):
71
- from anemoi.datasets.create import Creator
72
69
 
73
70
  options = vars(args)
74
- c = Creator(**options)
75
- c.create()
71
+ options.pop("command")
72
+ options.pop("threads")
73
+ options.pop("processes")
74
+
75
+ task("init", options)
76
+ task("load", options)
77
+ task("finalise", options)
78
+
79
+ task("patch", options)
80
+
81
+ task("init_additions", options)
82
+ task("run_additions", options)
83
+ task("finalise_additions", options)
84
+ task("cleanup", options)
85
+ task("verify", options)
76
86
 
77
87
  def parallel_create(self, args):
78
88
  """Some modules, like fsspec do not work well with fork()
@@ -82,10 +92,15 @@ class Create(Command):
82
92
  """
83
93
 
84
94
  options = vars(args)
85
- parallel = args.threads + args.processes
86
- args.use_threads = args.threads > 0
95
+ options.pop("command")
96
+
97
+ threads = options.pop("threads")
98
+ processes = options.pop("processes")
87
99
 
88
- if args.use_threads:
100
+ use_threads = threads > 0
101
+ options["use_threads"] = use_threads
102
+
103
+ if use_threads:
89
104
  ExecutorClass = ThreadPoolExecutor
90
105
  else:
91
106
  ExecutorClass = ProcessPoolExecutor
@@ -95,18 +110,34 @@ class Create(Command):
95
110
 
96
111
  futures = []
97
112
 
113
+ parallel = threads + processes
114
+ with ExecutorClass(max_workers=parallel) as executor:
115
+ for n in range(total):
116
+ opt = options.copy()
117
+ opt["parts"] = f"{n+1}/{total}"
118
+ futures.append(executor.submit(task, "load", opt))
119
+
120
+ for future in tqdm.tqdm(as_completed(futures), desc="Loading", total=len(futures), colour="green", position=parallel + 1): # fmt: skip
121
+ future.result()
122
+
123
+ with ExecutorClass(max_workers=1) as executor:
124
+ executor.submit(task, "finalise", options).result()
125
+
126
+ with ExecutorClass(max_workers=1) as executor:
127
+ executor.submit(task, "init-additions", options).result()
128
+
98
129
  with ExecutorClass(max_workers=parallel) as executor:
130
+ opt = options.copy()
131
+ opt["parts"] = f"{n+1}/{total}"
132
+ futures.append(executor.submit(task, "load", opt))
99
133
  for n in range(total):
100
- futures.append(executor.submit(task, "load", options, parts=f"{n+1}/{total}"))
134
+ futures.append(executor.submit(task, "load-additions", opt))
101
135
 
102
- for future in tqdm.tqdm(
103
- as_completed(futures), desc="Loading", total=len(futures), colour="green", position=parallel + 1
104
- ):
136
+ for future in tqdm.tqdm(as_completed(futures), desc="Computing additions", total=len(futures), colour="green", position=parallel + 1): # fmt: skip
105
137
  future.result()
106
138
 
107
139
  with ExecutorClass(max_workers=1) as executor:
108
- executor.submit(task, "statistics", options).result()
109
- executor.submit(task, "additions", options).result()
140
+ executor.submit(task, "finalise-additions", options).result()
110
141
  executor.submit(task, "cleanup", options).result()
111
142
  executor.submit(task, "verify", options).result()
112
143
 
@@ -0,0 +1,45 @@
1
+ import logging
2
+ import time
3
+
4
+ from anemoi.utils.humanize import seconds_to_human
5
+
6
+ from anemoi.datasets.commands.create import task
7
+
8
+ from . import Command
9
+
10
+ LOG = logging.getLogger(__name__)
11
+
12
+
13
+ class FinaliseAdditions(Command):
14
+ """Create a dataset, step by step."""
15
+
16
+ internal = True
17
+ timestamp = True
18
+
19
+ def add_arguments(self, command_parser):
20
+ command_parser.add_argument(
21
+ "--delta",
22
+ help="Compute statistics tendencies on a given time delta, if possible. Must be a multiple of the frequency.",
23
+ nargs="+",
24
+ )
25
+
26
+ command_parser.add_argument("path", help="Path to store the created data.")
27
+ command_parser.add_argument("--trace", action="store_true")
28
+
29
+ def run(self, args):
30
+ options = vars(args)
31
+ options.pop("command")
32
+ step = "finalise-additions"
33
+ now = time.time()
34
+
35
+ if "version" in options:
36
+ options.pop("version")
37
+
38
+ if "debug" in options:
39
+ options.pop("debug")
40
+ task(step, options)
41
+
42
+ LOG.info(f"Create step '{step}' completed in {seconds_to_human(time.time()-now)}")
43
+
44
+
45
+ command = FinaliseAdditions
@@ -0,0 +1,39 @@
1
+ import logging
2
+ import time
3
+
4
+ from anemoi.utils.humanize import seconds_to_human
5
+
6
+ from anemoi.datasets.commands.create import task
7
+
8
+ from . import Command
9
+
10
+ LOG = logging.getLogger(__name__)
11
+
12
+
13
+ class Finalise(Command):
14
+ """Create a dataset, step by step."""
15
+
16
+ internal = True
17
+ timestamp = True
18
+
19
+ def add_arguments(self, command_parser):
20
+ command_parser.add_argument("path", help="Path to store the created data.")
21
+ command_parser.add_argument("--trace", action="store_true")
22
+
23
+ def run(self, args):
24
+ options = vars(args)
25
+ options.pop("command")
26
+ now = time.time()
27
+ step = "finalise"
28
+
29
+ if "version" in options:
30
+ options.pop("version")
31
+
32
+ if "debug" in options:
33
+ options.pop("debug")
34
+ task(step, options)
35
+
36
+ LOG.info(f"Create step '{step}' completed in {seconds_to_human(time.time()-now)}")
37
+
38
+
39
+ command = Finalise
@@ -0,0 +1,45 @@
1
+ import logging
2
+ import time
3
+
4
+ from anemoi.utils.humanize import seconds_to_human
5
+
6
+ from anemoi.datasets.commands.create import task
7
+
8
+ from . import Command
9
+
10
+ LOG = logging.getLogger(__name__)
11
+
12
+
13
+ class InitAdditions(Command):
14
+ """Create a dataset, step by step."""
15
+
16
+ internal = True
17
+ timestamp = True
18
+
19
+ def add_arguments(self, command_parser):
20
+ command_parser.add_argument(
21
+ "--delta",
22
+ help="Compute statistics tendencies on a given time delta, if possible. Must be a multiple of the frequency.",
23
+ nargs="+",
24
+ )
25
+
26
+ command_parser.add_argument("path", help="Path to store the created data.")
27
+ command_parser.add_argument("--trace", action="store_true")
28
+
29
+ def run(self, args):
30
+ options = vars(args)
31
+ options.pop("command")
32
+ step = "init-additions"
33
+ now = time.time()
34
+
35
+ if "version" in options:
36
+ options.pop("version")
37
+
38
+ if "debug" in options:
39
+ options.pop("debug")
40
+ task(step, options)
41
+
42
+ LOG.info(f"Create step '{step}' completed in {seconds_to_human(time.time()-now)}")
43
+
44
+
45
+ command = InitAdditions
@@ -0,0 +1,67 @@
1
+ import logging
2
+ import time
3
+
4
+ from anemoi.utils.humanize import seconds_to_human
5
+
6
+ from anemoi.datasets.commands.create import task
7
+
8
+ from . import Command
9
+
10
+ LOG = logging.getLogger(__name__)
11
+
12
+
13
+ class Init(Command):
14
+ """Create a dataset, step by step."""
15
+
16
+ internal = True
17
+ timestamp = True
18
+
19
+ def add_arguments(self, subparser):
20
+
21
+ subparser.add_argument("config", help="Configuration yaml file defining the recipe to create the dataset.")
22
+ subparser.add_argument("path", help="Path to store the created data.")
23
+
24
+ subparser.add_argument(
25
+ "--overwrite",
26
+ action="store_true",
27
+ help="Overwrite existing files. This will delete the target dataset if it already exists.",
28
+ )
29
+ subparser.add_argument(
30
+ "--test",
31
+ action="store_true",
32
+ help="Build a small dataset, using only the first dates. And, when possible, using low resolution and less ensemble members.",
33
+ )
34
+ subparser.add_argument(
35
+ "--check-name",
36
+ dest="check_name",
37
+ action="store_true",
38
+ help="Check if the dataset name is valid before creating it.",
39
+ )
40
+ subparser.add_argument(
41
+ "--no-check-name",
42
+ dest="check_name",
43
+ action="store_false",
44
+ help="Do not check if the dataset name is valid before creating it.",
45
+ )
46
+ subparser.set_defaults(check_name=False)
47
+ subparser.add_argument("--cache", help="Location to store the downloaded data.", metavar="DIR")
48
+
49
+ subparser.add_argument("--trace", action="store_true")
50
+
51
+ def run(self, args):
52
+ options = vars(args)
53
+ options.pop("command")
54
+ now = time.time()
55
+
56
+ if "version" in options:
57
+ options.pop("version")
58
+
59
+ if "debug" in options:
60
+ options.pop("debug")
61
+
62
+ task("init", options)
63
+
64
+ LOG.info(f"Create step '{self.__class__.__name__.lower()}' completed in {seconds_to_human(time.time()-now)}")
65
+
66
+
67
+ command = Init
@@ -149,7 +149,7 @@ class Version:
149
149
  print()
150
150
  print(f'📅 Start : {self.first_date.strftime("%Y-%m-%d %H:%M")}')
151
151
  print(f'📅 End : {self.last_date.strftime("%Y-%m-%d %H:%M")}')
152
- print(f"⏰ Frequency : {self.frequency}h")
152
+ print(f"⏰ Frequency : {self.frequency}")
153
153
  if self.n_missing_dates is not None:
154
154
  print(f"🚫 Missing : {self.n_missing_dates:,}")
155
155
  print(f"🌎 Resolution : {self.resolution}")
@@ -0,0 +1,47 @@
1
+ import logging
2
+ import time
3
+
4
+ from anemoi.utils.humanize import seconds_to_human
5
+
6
+ from anemoi.datasets.commands.create import task
7
+
8
+ from . import Command
9
+
10
+ LOG = logging.getLogger(__name__)
11
+
12
+
13
+ class LoadAdditions(Command):
14
+ """Create a dataset, step by step."""
15
+
16
+ internal = True
17
+ timestamp = True
18
+
19
+ def add_arguments(self, subparser):
20
+
21
+ subparser.add_argument("--parts", nargs="+", help="Only load the specified parts of the dataset.")
22
+ subparser.add_argument(
23
+ "--delta",
24
+ help="Compute statistics tendencies on a given time delta, if possible. Must be a multiple of the frequency.",
25
+ nargs="+",
26
+ )
27
+
28
+ subparser.add_argument("path", help="Path to store the created data.")
29
+ subparser.add_argument("--trace", action="store_true")
30
+
31
+ def run(self, args):
32
+ options = vars(args)
33
+ options.pop("command")
34
+ now = time.time()
35
+ step = "load-additions"
36
+
37
+ if "version" in options:
38
+ options.pop("version")
39
+
40
+ if "debug" in options:
41
+ options.pop("debug")
42
+ task(step, options)
43
+
44
+ LOG.info(f"Create step '{step}' completed in {seconds_to_human(time.time()-now)}")
45
+
46
+
47
+ command = LoadAdditions
@@ -0,0 +1,47 @@
1
+ import logging
2
+ import time
3
+
4
+ from anemoi.utils.humanize import seconds_to_human
5
+
6
+ from anemoi.datasets.commands.create import task
7
+
8
+ from . import Command
9
+
10
+ LOG = logging.getLogger(__name__)
11
+
12
+
13
+ class Load(Command):
14
+ """Create a dataset, step by step."""
15
+
16
+ internal = True
17
+ timestamp = True
18
+
19
+ def add_arguments(self, subparser):
20
+
21
+ subparser.add_argument("--parts", nargs="+", help="Only load the specified parts of the dataset.")
22
+ # subparser.add_argument(
23
+ # "--delta",
24
+ # help="Compute statistics tendencies on a given time delta, if possible. Must be a multiple of the frequency.",
25
+ # )
26
+
27
+ subparser.add_argument("path", help="Path to store the created data.")
28
+ subparser.add_argument("--cache", help="Location to store the downloaded data.", metavar="DIR")
29
+ subparser.add_argument("--trace", action="store_true")
30
+
31
+ def run(self, args):
32
+ options = vars(args)
33
+ options.pop("command")
34
+ now = time.time()
35
+ step = "load"
36
+
37
+ if "version" in options:
38
+ options.pop("version")
39
+
40
+ if "debug" in options:
41
+ options.pop("debug")
42
+ task(step, options)
43
+
44
+ LOG.info(f"Create step '{step}' completed in {seconds_to_human(time.time()-now)}")
45
+
46
+
47
+ command = Load
@@ -0,0 +1,39 @@
1
+ import logging
2
+ import time
3
+
4
+ from anemoi.utils.humanize import seconds_to_human
5
+
6
+ from anemoi.datasets.commands.create import task
7
+
8
+ from . import Command
9
+
10
+ LOG = logging.getLogger(__name__)
11
+
12
+
13
+ class Patch(Command):
14
+ """Create a dataset, step by step."""
15
+
16
+ internal = True
17
+ timestamp = True
18
+
19
+ def add_arguments(self, parser):
20
+ parser.add_argument("path", help="Path to store the created data.")
21
+
22
+ def run(self, args):
23
+ options = vars(args)
24
+ options.pop("command")
25
+ now = time.time()
26
+ step = self.__class__.__name__.lower()
27
+
28
+ if "version" in options:
29
+ options.pop("version")
30
+
31
+ if "debug" in options:
32
+ options.pop("debug")
33
+
34
+ task(step, options)
35
+
36
+ LOG.info(f"Create step '{step}' completed in {seconds_to_human(time.time()-now)}")
37
+
38
+
39
+ command = Patch