anemoi-datasets 0.5.0__py3-none-any.whl → 0.5.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/_version.py +2 -2
- anemoi/datasets/commands/inspect.py +1 -1
- anemoi/datasets/commands/publish.py +30 -0
- anemoi/datasets/create/__init__.py +42 -3
- anemoi/datasets/create/check.py +6 -0
- anemoi/datasets/create/functions/filters/rename.py +2 -3
- anemoi/datasets/create/functions/sources/__init__.py +7 -1
- anemoi/datasets/create/functions/sources/accumulations.py +2 -0
- anemoi/datasets/create/functions/sources/grib.py +1 -1
- anemoi/datasets/create/functions/sources/xarray/__init__.py +6 -1
- anemoi/datasets/create/functions/sources/xarray/coordinates.py +6 -1
- anemoi/datasets/create/functions/sources/xarray/field.py +13 -4
- anemoi/datasets/create/functions/sources/xarray/fieldlist.py +16 -16
- anemoi/datasets/create/functions/sources/xarray/flavour.py +126 -12
- anemoi/datasets/create/functions/sources/xarray/grid.py +106 -17
- anemoi/datasets/create/functions/sources/xarray/metadata.py +3 -11
- anemoi/datasets/create/functions/sources/xarray/time.py +1 -5
- anemoi/datasets/create/functions/sources/xarray/variable.py +10 -10
- anemoi/datasets/create/input/__init__.py +69 -0
- anemoi/datasets/create/input/action.py +123 -0
- anemoi/datasets/create/input/concat.py +92 -0
- anemoi/datasets/create/input/context.py +59 -0
- anemoi/datasets/create/input/data_sources.py +71 -0
- anemoi/datasets/create/input/empty.py +42 -0
- anemoi/datasets/create/input/filter.py +76 -0
- anemoi/datasets/create/input/function.py +122 -0
- anemoi/datasets/create/input/join.py +57 -0
- anemoi/datasets/create/input/misc.py +85 -0
- anemoi/datasets/create/input/pipe.py +33 -0
- anemoi/datasets/create/input/repeated_dates.py +217 -0
- anemoi/datasets/create/input/result.py +413 -0
- anemoi/datasets/create/input/step.py +99 -0
- anemoi/datasets/create/{template.py → input/template.py} +0 -42
- anemoi/datasets/create/statistics/__init__.py +1 -1
- anemoi/datasets/create/zarr.py +4 -2
- anemoi/datasets/dates/__init__.py +1 -0
- anemoi/datasets/dates/groups.py +12 -4
- anemoi/datasets/fields.py +66 -0
- anemoi/datasets/utils/fields.py +47 -0
- {anemoi_datasets-0.5.0.dist-info → anemoi_datasets-0.5.5.dist-info}/METADATA +1 -1
- {anemoi_datasets-0.5.0.dist-info → anemoi_datasets-0.5.5.dist-info}/RECORD +46 -30
- anemoi/datasets/create/input.py +0 -1087
- /anemoi/datasets/create/{trace.py → input/trace.py} +0 -0
- {anemoi_datasets-0.5.0.dist-info → anemoi_datasets-0.5.5.dist-info}/LICENSE +0 -0
- {anemoi_datasets-0.5.0.dist-info → anemoi_datasets-0.5.5.dist-info}/WHEEL +0 -0
- {anemoi_datasets-0.5.0.dist-info → anemoi_datasets-0.5.5.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.5.0.dist-info → anemoi_datasets-0.5.5.dist-info}/top_level.txt +0 -0
anemoi/datasets/_version.py
CHANGED
|
@@ -311,7 +311,7 @@ class Version:
|
|
|
311
311
|
print(f"🕰️ Dataset initialized {when(start)}.")
|
|
312
312
|
if built and latest:
|
|
313
313
|
speed = (latest - start) / built
|
|
314
|
-
eta = datetime.datetime.
|
|
314
|
+
eta = datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None) + speed * (total - built)
|
|
315
315
|
print(f"🏁 ETA {when(eta)}.")
|
|
316
316
|
else:
|
|
317
317
|
if latest:
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from . import Command
|
|
4
|
+
|
|
5
|
+
LOG = logging.getLogger(__name__)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Publish(Command):
|
|
9
|
+
"""Publish a dataset."""
|
|
10
|
+
|
|
11
|
+
# This is a command that is used to publish a dataset.
|
|
12
|
+
# it is a class, inheriting from Command.
|
|
13
|
+
|
|
14
|
+
internal = True
|
|
15
|
+
timestamp = True
|
|
16
|
+
|
|
17
|
+
def add_arguments(self, parser):
|
|
18
|
+
parser.add_argument("path", help="Path of the dataset to publish.")
|
|
19
|
+
|
|
20
|
+
def run(self, args):
|
|
21
|
+
try:
|
|
22
|
+
from anemoi.registry import publish_dataset
|
|
23
|
+
except ImportError:
|
|
24
|
+
LOG.error("anemoi-registry is not installed. Please install it to use this command.")
|
|
25
|
+
return
|
|
26
|
+
|
|
27
|
+
publish_dataset(args.path)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
command = Publish
|
|
@@ -14,6 +14,7 @@ import os
|
|
|
14
14
|
import time
|
|
15
15
|
import uuid
|
|
16
16
|
import warnings
|
|
17
|
+
from copy import deepcopy
|
|
17
18
|
from functools import cached_property
|
|
18
19
|
|
|
19
20
|
import numpy as np
|
|
@@ -24,9 +25,11 @@ from anemoi.utils.dates import frequency_to_string
|
|
|
24
25
|
from anemoi.utils.dates import frequency_to_timedelta
|
|
25
26
|
from anemoi.utils.humanize import compress_dates
|
|
26
27
|
from anemoi.utils.humanize import seconds_to_human
|
|
28
|
+
from earthkit.data.core.order import build_remapping
|
|
27
29
|
|
|
28
30
|
from anemoi.datasets import MissingDateError
|
|
29
31
|
from anemoi.datasets import open_dataset
|
|
32
|
+
from anemoi.datasets.create.input.trace import enable_trace
|
|
30
33
|
from anemoi.datasets.create.persistent import build_storage
|
|
31
34
|
from anemoi.datasets.data.misc import as_first_date
|
|
32
35
|
from anemoi.datasets.data.misc import as_last_date
|
|
@@ -308,7 +311,6 @@ class HasElementForDataMixin:
|
|
|
308
311
|
|
|
309
312
|
|
|
310
313
|
def build_input_(main_config, output_config):
|
|
311
|
-
from earthkit.data.core.order import build_remapping
|
|
312
314
|
|
|
313
315
|
builder = build_input(
|
|
314
316
|
main_config.input,
|
|
@@ -323,6 +325,43 @@ def build_input_(main_config, output_config):
|
|
|
323
325
|
return builder
|
|
324
326
|
|
|
325
327
|
|
|
328
|
+
def tidy_recipe(config: object):
|
|
329
|
+
"""Remove potentially private information in the config"""
|
|
330
|
+
config = deepcopy(config)
|
|
331
|
+
if isinstance(config, (tuple, list)):
|
|
332
|
+
return [tidy_recipe(_) for _ in config]
|
|
333
|
+
if isinstance(config, (dict, DotDict)):
|
|
334
|
+
for k, v in config.items():
|
|
335
|
+
if k.startswith("_"):
|
|
336
|
+
config[k] = "*** REMOVED FOR SECURITY ***"
|
|
337
|
+
else:
|
|
338
|
+
config[k] = tidy_recipe(v)
|
|
339
|
+
if isinstance(config, str):
|
|
340
|
+
if config.startswith("_"):
|
|
341
|
+
return "*** REMOVED FOR SECURITY ***"
|
|
342
|
+
if config.startswith("s3://"):
|
|
343
|
+
return "*** REMOVED FOR SECURITY ***"
|
|
344
|
+
if config.startswith("gs://"):
|
|
345
|
+
return "*** REMOVED FOR SECURITY ***"
|
|
346
|
+
if config.startswith("http"):
|
|
347
|
+
return "*** REMOVED FOR SECURITY ***"
|
|
348
|
+
if config.startswith("ftp"):
|
|
349
|
+
return "*** REMOVED FOR SECURITY ***"
|
|
350
|
+
if config.startswith("file"):
|
|
351
|
+
return "*** REMOVED FOR SECURITY ***"
|
|
352
|
+
if config.startswith("ssh"):
|
|
353
|
+
return "*** REMOVED FOR SECURITY ***"
|
|
354
|
+
if config.startswith("scp"):
|
|
355
|
+
return "*** REMOVED FOR SECURITY ***"
|
|
356
|
+
if config.startswith("rsync"):
|
|
357
|
+
return "*** REMOVED FOR SECURITY ***"
|
|
358
|
+
if config.startswith("/"):
|
|
359
|
+
return "*** REMOVED FOR SECURITY ***"
|
|
360
|
+
if "@" in config:
|
|
361
|
+
return "*** REMOVED FOR SECURITY ***"
|
|
362
|
+
return config
|
|
363
|
+
|
|
364
|
+
|
|
326
365
|
class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixin):
|
|
327
366
|
dataset_class = NewDataset
|
|
328
367
|
def __init__(self, path, config, check_name=False, overwrite=False, use_threads=False, statistics_temp_dir=None, progress=None, test=False, cache=None, **kwargs): # fmt: skip
|
|
@@ -409,6 +448,7 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
|
|
|
409
448
|
metadata.update(self.main_config.get("add_metadata", {}))
|
|
410
449
|
|
|
411
450
|
metadata["_create_yaml_config"] = self.main_config.get_serialisable_dict()
|
|
451
|
+
metadata["recipe"] = tidy_recipe(self.main_config.get_serialisable_dict())
|
|
412
452
|
|
|
413
453
|
metadata["description"] = self.main_config.description
|
|
414
454
|
metadata["licence"] = self.main_config["licence"]
|
|
@@ -524,7 +564,7 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
|
|
|
524
564
|
# assert isinstance(group[0], datetime.datetime), type(group[0])
|
|
525
565
|
LOG.debug(f"Building data for group {igroup}/{self.n_groups}")
|
|
526
566
|
|
|
527
|
-
result = self.input.select(
|
|
567
|
+
result = self.input.select(group_of_dates=group)
|
|
528
568
|
assert result.group_of_dates == group, (len(result.group_of_dates), len(group), group)
|
|
529
569
|
|
|
530
570
|
# There are several groups.
|
|
@@ -992,7 +1032,6 @@ def chain(tasks):
|
|
|
992
1032
|
|
|
993
1033
|
def creator_factory(name, trace=None, **kwargs):
|
|
994
1034
|
if trace:
|
|
995
|
-
from anemoi.datasets.create.trace import enable_trace
|
|
996
1035
|
|
|
997
1036
|
enable_trace(trace)
|
|
998
1037
|
|
anemoi/datasets/create/check.py
CHANGED
|
@@ -140,9 +140,15 @@ class StatisticsValueError(ValueError):
|
|
|
140
140
|
|
|
141
141
|
def check_data_values(arr, *, name: str, log=[], allow_nans=False):
|
|
142
142
|
|
|
143
|
+
shape = arr.shape
|
|
144
|
+
|
|
143
145
|
if (isinstance(allow_nans, (set, list, tuple, dict)) and name in allow_nans) or allow_nans:
|
|
144
146
|
arr = arr[~np.isnan(arr)]
|
|
145
147
|
|
|
148
|
+
if arr.size == 0:
|
|
149
|
+
warnings.warn(f"Empty array for {name} ({shape})")
|
|
150
|
+
return
|
|
151
|
+
|
|
146
152
|
assert arr.size > 0, (name, *log)
|
|
147
153
|
|
|
148
154
|
min, max = arr.min(), arr.max()
|
|
@@ -32,7 +32,7 @@ class RenamedFieldMapping:
|
|
|
32
32
|
|
|
33
33
|
value = self.field.metadata(key, **kwargs)
|
|
34
34
|
if key == self.what:
|
|
35
|
-
return self.renaming.get(value, value)
|
|
35
|
+
return self.renaming.get(self.what, {}).get(value, value)
|
|
36
36
|
|
|
37
37
|
return value
|
|
38
38
|
|
|
@@ -68,8 +68,7 @@ class RenamedFieldFormat:
|
|
|
68
68
|
|
|
69
69
|
|
|
70
70
|
def execute(context, input, what="param", **kwargs):
|
|
71
|
-
|
|
72
|
-
if what in kwargs:
|
|
71
|
+
if what in kwargs and isinstance(kwargs[what], str):
|
|
73
72
|
return FieldArray([RenamedFieldFormat(fs, kwargs[what]) for fs in input])
|
|
74
73
|
|
|
75
74
|
return FieldArray([RenamedFieldMapping(fs, what, kwargs) for fs in input])
|
|
@@ -16,6 +16,10 @@ LOG = logging.getLogger(__name__)
|
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
def _expand(paths):
|
|
19
|
+
|
|
20
|
+
if not isinstance(paths, list):
|
|
21
|
+
paths = [paths]
|
|
22
|
+
|
|
19
23
|
for path in paths:
|
|
20
24
|
if path.startswith("file://"):
|
|
21
25
|
path = path[7:]
|
|
@@ -40,8 +44,10 @@ def iterate_patterns(path, dates, **kwargs):
|
|
|
40
44
|
given_paths = path if isinstance(path, list) else [path]
|
|
41
45
|
|
|
42
46
|
dates = [d.isoformat() for d in dates]
|
|
47
|
+
if len(dates) > 0:
|
|
48
|
+
kwargs["date"] = dates
|
|
43
49
|
|
|
44
50
|
for path in given_paths:
|
|
45
|
-
paths = Pattern(path, ignore_missing_keys=True).substitute(
|
|
51
|
+
paths = Pattern(path, ignore_missing_keys=True).substitute(**kwargs)
|
|
46
52
|
for path in _expand(paths):
|
|
47
53
|
yield path, dates
|
|
@@ -375,6 +375,8 @@ def accumulations(context, dates, **request):
|
|
|
375
375
|
("od", "elda"): dict(base_times=(6, 18)),
|
|
376
376
|
("ea", "oper"): dict(data_accumulation_period=1, base_times=(6, 18)),
|
|
377
377
|
("ea", "enda"): dict(data_accumulation_period=3, base_times=(6, 18)),
|
|
378
|
+
("rr", "oper"): dict(data_accumulation_period=3, base_times=(0, 3, 6, 9, 12, 15, 18, 21)),
|
|
379
|
+
("l5", "oper"): dict(data_accumulation_period=1, base_times=(0,)),
|
|
378
380
|
}
|
|
379
381
|
|
|
380
382
|
kwargs = KWARGS.get((class_, stream), {})
|
|
@@ -135,7 +135,7 @@ def execute(context, dates, path, latitudes=None, longitudes=None, *args, **kwar
|
|
|
135
135
|
s = s.sel(valid_datetime=dates, **kwargs)
|
|
136
136
|
ds = ds + s
|
|
137
137
|
|
|
138
|
-
if kwargs:
|
|
138
|
+
if kwargs and not context.partial_ok:
|
|
139
139
|
check(ds, given_paths, valid_datetime=dates, **kwargs)
|
|
140
140
|
|
|
141
141
|
if geography is not None:
|
|
@@ -12,6 +12,7 @@ import logging
|
|
|
12
12
|
from earthkit.data.core.fieldlist import MultiFieldList
|
|
13
13
|
|
|
14
14
|
from anemoi.datasets.data.stores import name_to_zarr_store
|
|
15
|
+
from anemoi.datasets.utils.fields import NewMetadataField as NewMetadataField
|
|
15
16
|
|
|
16
17
|
from .. import iterate_patterns
|
|
17
18
|
from .fieldlist import XarrayFieldList
|
|
@@ -49,7 +50,11 @@ def load_one(emoji, context, dates, dataset, options={}, flavour=None, **kwargs)
|
|
|
49
50
|
data = xr.open_dataset(dataset, **options)
|
|
50
51
|
|
|
51
52
|
fs = XarrayFieldList.from_xarray(data, flavour)
|
|
52
|
-
|
|
53
|
+
|
|
54
|
+
if len(dates) == 0:
|
|
55
|
+
return fs.sel(**kwargs)
|
|
56
|
+
else:
|
|
57
|
+
result = MultiFieldList([fs.sel(valid_datetime=date, **kwargs) for date in dates])
|
|
53
58
|
|
|
54
59
|
if len(result) == 0:
|
|
55
60
|
LOG.warning(f"No data found for {dataset} and dates {dates} and {kwargs}")
|
|
@@ -56,6 +56,8 @@ class Coordinate:
|
|
|
56
56
|
is_step = False
|
|
57
57
|
is_date = False
|
|
58
58
|
is_member = False
|
|
59
|
+
is_x = False
|
|
60
|
+
is_y = False
|
|
59
61
|
|
|
60
62
|
def __init__(self, variable):
|
|
61
63
|
self.variable = variable
|
|
@@ -66,10 +68,11 @@ class Coordinate:
|
|
|
66
68
|
return 1 if self.scalar else len(self.variable)
|
|
67
69
|
|
|
68
70
|
def __repr__(self):
|
|
69
|
-
return "%s[name=%s,values=%s]" % (
|
|
71
|
+
return "%s[name=%s,values=%s,shape=%s]" % (
|
|
70
72
|
self.__class__.__name__,
|
|
71
73
|
self.variable.name,
|
|
72
74
|
self.variable.values if self.scalar else len(self),
|
|
75
|
+
self.variable.shape,
|
|
73
76
|
)
|
|
74
77
|
|
|
75
78
|
def reduced(self, i):
|
|
@@ -225,11 +228,13 @@ class LatitudeCoordinate(Coordinate):
|
|
|
225
228
|
|
|
226
229
|
class XCoordinate(Coordinate):
|
|
227
230
|
is_grid = True
|
|
231
|
+
is_x = True
|
|
228
232
|
mars_names = ("x",)
|
|
229
233
|
|
|
230
234
|
|
|
231
235
|
class YCoordinate(Coordinate):
|
|
232
236
|
is_grid = True
|
|
237
|
+
is_y = True
|
|
233
238
|
mars_names = ("y",)
|
|
234
239
|
|
|
235
240
|
|
|
@@ -72,13 +72,18 @@ class XArrayField(Field):
|
|
|
72
72
|
def shape(self):
|
|
73
73
|
return self._shape
|
|
74
74
|
|
|
75
|
-
def to_numpy(self, flatten=False, dtype=None):
|
|
76
|
-
|
|
75
|
+
def to_numpy(self, flatten=False, dtype=None, index=None):
|
|
76
|
+
if index is not None:
|
|
77
|
+
values = self.selection[index]
|
|
78
|
+
else:
|
|
79
|
+
values = self.selection
|
|
77
80
|
|
|
78
81
|
assert dtype is None
|
|
82
|
+
|
|
79
83
|
if flatten:
|
|
80
|
-
return values.flatten()
|
|
81
|
-
|
|
84
|
+
return values.values.flatten()
|
|
85
|
+
|
|
86
|
+
return values # .reshape(self.shape)
|
|
82
87
|
|
|
83
88
|
def _make_metadata(self):
|
|
84
89
|
return XArrayMetadata(self)
|
|
@@ -113,3 +118,7 @@ class XArrayField(Field):
|
|
|
113
118
|
|
|
114
119
|
def __repr__(self):
|
|
115
120
|
return repr(self._metadata)
|
|
121
|
+
|
|
122
|
+
def _values(self):
|
|
123
|
+
# we don't use .values as this will download the data
|
|
124
|
+
return self.selection
|
|
@@ -70,10 +70,10 @@ class XarrayFieldList(FieldList):
|
|
|
70
70
|
skip.update(attr_val.split(" "))
|
|
71
71
|
|
|
72
72
|
for name in ds.data_vars:
|
|
73
|
-
|
|
74
|
-
_skip_attr(
|
|
75
|
-
_skip_attr(
|
|
76
|
-
_skip_attr(
|
|
73
|
+
variable = ds[name]
|
|
74
|
+
_skip_attr(variable, "coordinates")
|
|
75
|
+
_skip_attr(variable, "bounds")
|
|
76
|
+
_skip_attr(variable, "grid_mapping")
|
|
77
77
|
|
|
78
78
|
# Select only geographical variables
|
|
79
79
|
for name in ds.data_vars:
|
|
@@ -81,14 +81,14 @@ class XarrayFieldList(FieldList):
|
|
|
81
81
|
if name in skip:
|
|
82
82
|
continue
|
|
83
83
|
|
|
84
|
-
|
|
84
|
+
variable = ds[name]
|
|
85
85
|
coordinates = []
|
|
86
86
|
|
|
87
|
-
for coord in
|
|
87
|
+
for coord in variable.coords:
|
|
88
88
|
|
|
89
89
|
c = guess.guess(ds[coord], coord)
|
|
90
90
|
assert c, f"Could not guess coordinate for {coord}"
|
|
91
|
-
if coord not in
|
|
91
|
+
if coord not in variable.dims:
|
|
92
92
|
c.is_dim = False
|
|
93
93
|
coordinates.append(c)
|
|
94
94
|
|
|
@@ -98,17 +98,17 @@ class XarrayFieldList(FieldList):
|
|
|
98
98
|
if grid_coords < 2:
|
|
99
99
|
continue
|
|
100
100
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
metadata={},
|
|
109
|
-
)
|
|
101
|
+
v = Variable(
|
|
102
|
+
ds=ds,
|
|
103
|
+
variable=variable,
|
|
104
|
+
coordinates=coordinates,
|
|
105
|
+
grid=guess.grid(coordinates, variable),
|
|
106
|
+
time=Time.from_coordinates(coordinates),
|
|
107
|
+
metadata={},
|
|
110
108
|
)
|
|
111
109
|
|
|
110
|
+
variables.append(v)
|
|
111
|
+
|
|
112
112
|
return cls(ds, variables)
|
|
113
113
|
|
|
114
114
|
def sel(self, **kwargs):
|
|
@@ -8,6 +8,8 @@
|
|
|
8
8
|
#
|
|
9
9
|
|
|
10
10
|
|
|
11
|
+
import logging
|
|
12
|
+
|
|
11
13
|
from .coordinates import DateCoordinate
|
|
12
14
|
from .coordinates import EnsembleCoordinate
|
|
13
15
|
from .coordinates import LatitudeCoordinate
|
|
@@ -18,8 +20,13 @@ from .coordinates import StepCoordinate
|
|
|
18
20
|
from .coordinates import TimeCoordinate
|
|
19
21
|
from .coordinates import XCoordinate
|
|
20
22
|
from .coordinates import YCoordinate
|
|
23
|
+
from .coordinates import is_scalar
|
|
21
24
|
from .grid import MeshedGrid
|
|
25
|
+
from .grid import MeshProjectionGrid
|
|
22
26
|
from .grid import UnstructuredGrid
|
|
27
|
+
from .grid import UnstructuredProjectionGrid
|
|
28
|
+
|
|
29
|
+
LOG = logging.getLogger(__name__)
|
|
23
30
|
|
|
24
31
|
|
|
25
32
|
class CoordinateGuesser:
|
|
@@ -155,31 +162,138 @@ class CoordinateGuesser:
|
|
|
155
162
|
f" {long_name=}, {standard_name=}, units\n\n{c}\n\n{type(c.values)} {c.shape}"
|
|
156
163
|
)
|
|
157
164
|
|
|
158
|
-
def grid(self, coordinates):
|
|
165
|
+
def grid(self, coordinates, variable):
|
|
159
166
|
lat = [c for c in coordinates if c.is_lat]
|
|
160
167
|
lon = [c for c in coordinates if c.is_lon]
|
|
161
168
|
|
|
162
|
-
if len(lat)
|
|
163
|
-
|
|
169
|
+
if len(lat) == 1 and len(lon) == 1:
|
|
170
|
+
return self._lat_lon_provided(lat, lon, variable)
|
|
171
|
+
|
|
172
|
+
x = [c for c in coordinates if c.is_x]
|
|
173
|
+
y = [c for c in coordinates if c.is_y]
|
|
174
|
+
|
|
175
|
+
if len(x) == 1 and len(y) == 1:
|
|
176
|
+
return self._x_y_provided(x, y, variable)
|
|
177
|
+
|
|
178
|
+
raise NotImplementedError(f"Cannot establish grid {coordinates}")
|
|
179
|
+
|
|
180
|
+
def _check_dims(self, variable, x_or_lon, y_or_lat):
|
|
181
|
+
|
|
182
|
+
x_dims = set(x_or_lon.variable.dims)
|
|
183
|
+
y_dims = set(y_or_lat.variable.dims)
|
|
184
|
+
variable_dims = set(variable.dims)
|
|
164
185
|
|
|
165
|
-
if
|
|
166
|
-
raise
|
|
186
|
+
if not (x_dims <= variable_dims) or not (y_dims <= variable_dims):
|
|
187
|
+
raise ValueError(
|
|
188
|
+
f"Dimensions do not match {variable.name}{variable.dims} !="
|
|
189
|
+
f" {x_or_lon.name}{x_or_lon.variable.dims} and {y_or_lat.name}{y_or_lat.variable.dims}"
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
variable_dims = tuple(v for v in variable.dims if v in (x_dims | y_dims))
|
|
193
|
+
if x_dims == y_dims:
|
|
194
|
+
# It's unstructured
|
|
195
|
+
return variable_dims, True
|
|
167
196
|
|
|
197
|
+
if len(x_dims) == 1 and len(y_dims) == 1:
|
|
198
|
+
# It's a mesh
|
|
199
|
+
return variable_dims, False
|
|
200
|
+
|
|
201
|
+
raise ValueError(
|
|
202
|
+
f"Cannot establish grid for {variable.name}{variable.dims},"
|
|
203
|
+
f" {x_or_lon.name}{x_or_lon.variable.dims},"
|
|
204
|
+
f" {y_or_lat.name}{y_or_lat.variable.dims}"
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
def _lat_lon_provided(self, lat, lon, variable):
|
|
168
208
|
lat = lat[0]
|
|
169
209
|
lon = lon[0]
|
|
170
210
|
|
|
171
|
-
|
|
172
|
-
|
|
211
|
+
dim_vars, unstructured = self._check_dims(variable, lon, lat)
|
|
212
|
+
|
|
213
|
+
if (lat.name, lon.name, dim_vars) in self._cache:
|
|
214
|
+
return self._cache[(lat.name, lon.name, dim_vars)]
|
|
173
215
|
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
grid = MeshedGrid(lat, lon)
|
|
216
|
+
if unstructured:
|
|
217
|
+
grid = UnstructuredGrid(lat, lon, dim_vars)
|
|
177
218
|
else:
|
|
178
|
-
grid =
|
|
219
|
+
grid = MeshedGrid(lat, lon, dim_vars)
|
|
179
220
|
|
|
180
|
-
self._cache[(lat.name, lon.name)] = grid
|
|
221
|
+
self._cache[(lat.name, lon.name, dim_vars)] = grid
|
|
181
222
|
return grid
|
|
182
223
|
|
|
224
|
+
def _x_y_provided(self, x, y, variable):
|
|
225
|
+
x = x[0]
|
|
226
|
+
y = y[0]
|
|
227
|
+
|
|
228
|
+
_, unstructured = self._check_dims(variable, x, y)
|
|
229
|
+
|
|
230
|
+
if x.variable.dims != y.variable.dims:
|
|
231
|
+
raise ValueError(f"Dimensions do not match {x.name}{x.variable.dims} != {y.name}{y.variable.dims}")
|
|
232
|
+
|
|
233
|
+
if (x.name, y.name) in self._cache:
|
|
234
|
+
return self._cache[(x.name, y.name)]
|
|
235
|
+
|
|
236
|
+
if (x.name, y.name) in self._cache:
|
|
237
|
+
return self._cache[(x.name, y.name)]
|
|
238
|
+
|
|
239
|
+
assert len(x.variable.shape) == len(y.variable.shape), (x.variable.shape, y.variable.shape)
|
|
240
|
+
|
|
241
|
+
grid_mapping = variable.attrs.get("grid_mapping", None)
|
|
242
|
+
|
|
243
|
+
if grid_mapping is None:
|
|
244
|
+
LOG.warning(f"No 'grid_mapping' attribute provided for '{variable.name}'")
|
|
245
|
+
LOG.warning("Trying to guess...")
|
|
246
|
+
|
|
247
|
+
PROBE = {
|
|
248
|
+
"prime_meridian_name",
|
|
249
|
+
"reference_ellipsoid_name",
|
|
250
|
+
"crs_wkt",
|
|
251
|
+
"horizontal_datum_name",
|
|
252
|
+
"semi_major_axis",
|
|
253
|
+
"spatial_ref",
|
|
254
|
+
"inverse_flattening",
|
|
255
|
+
"semi_minor_axis",
|
|
256
|
+
"geographic_crs_name",
|
|
257
|
+
"GeoTransform",
|
|
258
|
+
"grid_mapping_name",
|
|
259
|
+
"longitude_of_prime_meridian",
|
|
260
|
+
}
|
|
261
|
+
candidate = None
|
|
262
|
+
for v in self.ds.variables:
|
|
263
|
+
var = self.ds[v]
|
|
264
|
+
if not is_scalar(var):
|
|
265
|
+
continue
|
|
266
|
+
|
|
267
|
+
if PROBE.intersection(var.attrs.keys()):
|
|
268
|
+
if candidate:
|
|
269
|
+
raise ValueError(f"Multiple candidates for 'grid_mapping': {candidate} and {v}")
|
|
270
|
+
candidate = v
|
|
271
|
+
|
|
272
|
+
if candidate:
|
|
273
|
+
LOG.warning(f"Using '{candidate}' as 'grid_mapping'")
|
|
274
|
+
grid_mapping = candidate
|
|
275
|
+
else:
|
|
276
|
+
LOG.warning("Could not fine a candidate for 'grid_mapping'")
|
|
277
|
+
|
|
278
|
+
if grid_mapping is None:
|
|
279
|
+
if "crs" in self.ds[variable].attrs:
|
|
280
|
+
grid_mapping = self.ds[variable].attrs["crs"]
|
|
281
|
+
LOG.warning(f"Using CRS {grid_mapping} from variable '{variable.name}' attributes")
|
|
282
|
+
|
|
283
|
+
if grid_mapping is None:
|
|
284
|
+
if "crs" in self.ds.attrs:
|
|
285
|
+
grid_mapping = self.ds.attrs["crs"]
|
|
286
|
+
LOG.warning(f"Using CRS {grid_mapping} from global attributes")
|
|
287
|
+
|
|
288
|
+
if grid_mapping is not None:
|
|
289
|
+
if unstructured:
|
|
290
|
+
return UnstructuredProjectionGrid(x, y, grid_mapping)
|
|
291
|
+
else:
|
|
292
|
+
return MeshProjectionGrid(x, y, grid_mapping)
|
|
293
|
+
|
|
294
|
+
LOG.error("Could not fine a candidate for 'grid_mapping'")
|
|
295
|
+
raise NotImplementedError(f"Unstructured grid {x.name} {y.name}")
|
|
296
|
+
|
|
183
297
|
|
|
184
298
|
class DefaultCoordinateGuesser(CoordinateGuesser):
|
|
185
299
|
def __init__(self, ds):
|