anemoi-datasets 0.5.19__py3-none-any.whl → 0.5.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/_version.py +2 -2
- anemoi/datasets/commands/compare-lam.py +401 -0
- anemoi/datasets/commands/grib-index.py +114 -0
- anemoi/datasets/create/filters/pressure_level_relative_humidity_to_specific_humidity.py +3 -1
- anemoi/datasets/create/filters/pressure_level_specific_humidity_to_relative_humidity.py +3 -1
- anemoi/datasets/create/filters/wz_to_w.py +3 -2
- anemoi/datasets/create/input/action.py +2 -0
- anemoi/datasets/create/input/result.py +1 -1
- anemoi/datasets/create/sources/anemoi_dataset.py +73 -0
- anemoi/datasets/create/sources/grib.py +7 -0
- anemoi/datasets/create/sources/grib_index.py +614 -0
- anemoi/datasets/create/sources/xarray_support/__init__.py +1 -1
- anemoi/datasets/create/sources/xarray_support/fieldlist.py +2 -2
- anemoi/datasets/create/sources/xarray_support/flavour.py +6 -0
- anemoi/datasets/data/__init__.py +16 -0
- anemoi/datasets/data/complement.py +4 -1
- anemoi/datasets/data/dataset.py +14 -0
- anemoi/datasets/data/interpolate.py +76 -0
- anemoi/datasets/data/masked.py +77 -0
- anemoi/datasets/data/misc.py +159 -0
- anemoi/datasets/grids.py +8 -2
- {anemoi_datasets-0.5.19.dist-info → anemoi_datasets-0.5.20.dist-info}/METADATA +10 -4
- {anemoi_datasets-0.5.19.dist-info → anemoi_datasets-0.5.20.dist-info}/RECORD +27 -23
- {anemoi_datasets-0.5.19.dist-info → anemoi_datasets-0.5.20.dist-info}/WHEEL +0 -0
- {anemoi_datasets-0.5.19.dist-info → anemoi_datasets-0.5.20.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.5.19.dist-info → anemoi_datasets-0.5.20.dist-info}/licenses/LICENSE +0 -0
- {anemoi_datasets-0.5.19.dist-info → anemoi_datasets-0.5.20.dist-info}/top_level.txt +0 -0
|
@@ -17,6 +17,7 @@ from typing import Optional
|
|
|
17
17
|
from typing import Union
|
|
18
18
|
|
|
19
19
|
import earthkit.data as ekd
|
|
20
|
+
from anemoi.transform.flavour import RuleBasedFlavour
|
|
20
21
|
from earthkit.data import from_source
|
|
21
22
|
from earthkit.data.indexing.fieldlist import FieldArray
|
|
22
23
|
from earthkit.data.utils.patterns import Pattern
|
|
@@ -244,6 +245,7 @@ def execute(
|
|
|
244
245
|
path: Union[str, List[str]],
|
|
245
246
|
latitudes: Optional[Dict[str, Any]] = None,
|
|
246
247
|
longitudes: Optional[Dict[str, Any]] = None,
|
|
248
|
+
flavour: Optional[Union[str, Dict[str, Any]]] = None,
|
|
247
249
|
*args: Any,
|
|
248
250
|
**kwargs: Any,
|
|
249
251
|
) -> ekd.FieldList:
|
|
@@ -255,6 +257,7 @@ def execute(
|
|
|
255
257
|
path (Union[str, List[str]]): Path or list of paths to the GRIB files.
|
|
256
258
|
latitudes (Optional[Dict[str, Any]], optional): Latitude information. Defaults to None.
|
|
257
259
|
longitudes (Optional[Dict[str, Any]], optional): Longitude information. Defaults to None.
|
|
260
|
+
flavour (Optional[Union[str, Dict[str, Any]]], optional): Flavour information. Defaults to None.
|
|
258
261
|
*args (Any): Additional arguments.
|
|
259
262
|
**kwargs (Any): Additional keyword arguments.
|
|
260
263
|
|
|
@@ -264,6 +267,8 @@ def execute(
|
|
|
264
267
|
The loaded dataset.
|
|
265
268
|
"""
|
|
266
269
|
given_paths = path if isinstance(path, list) else [path]
|
|
270
|
+
if flavour is not None:
|
|
271
|
+
flavour = RuleBasedFlavour(flavour)
|
|
267
272
|
|
|
268
273
|
geography = None
|
|
269
274
|
if latitudes is not None and longitudes is not None:
|
|
@@ -282,6 +287,8 @@ def execute(
|
|
|
282
287
|
for path in _expand(paths):
|
|
283
288
|
context.trace("📁", "PATH", path)
|
|
284
289
|
s = from_source("file", path)
|
|
290
|
+
if flavour is not None:
|
|
291
|
+
s = flavour.map(s)
|
|
285
292
|
s = s.sel(valid_datetime=dates, **kwargs)
|
|
286
293
|
ds = ds + s
|
|
287
294
|
|
|
@@ -0,0 +1,614 @@
|
|
|
1
|
+
# (C) Copyright 2025 Anemoi contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
|
+
# nor does it submit to any jurisdiction.
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
import os
|
|
12
|
+
import sqlite3
|
|
13
|
+
from typing import Any
|
|
14
|
+
from typing import Iterator
|
|
15
|
+
from typing import List
|
|
16
|
+
from typing import Optional
|
|
17
|
+
|
|
18
|
+
import earthkit.data as ekd
|
|
19
|
+
import tqdm
|
|
20
|
+
from anemoi.transform.flavour import RuleBasedFlavour
|
|
21
|
+
from cachetools import LRUCache
|
|
22
|
+
from earthkit.data.indexing.fieldlist import FieldArray
|
|
23
|
+
|
|
24
|
+
from .legacy import legacy_source
|
|
25
|
+
|
|
26
|
+
LOG = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
KEYS1 = ("class", "type", "stream", "expver", "levtype")
|
|
29
|
+
KEYS2 = ("shortName", "paramId", "level", "step", "number", "date", "time", "valid_datetime", "levelist")
|
|
30
|
+
|
|
31
|
+
KEYS = KEYS1 + KEYS2
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class GribIndex:
|
|
35
|
+
def __init__(
|
|
36
|
+
self,
|
|
37
|
+
database: str,
|
|
38
|
+
*,
|
|
39
|
+
keys: Optional[List[str] | str] = None,
|
|
40
|
+
flavour: Optional[str] = None,
|
|
41
|
+
update: bool = False,
|
|
42
|
+
overwrite: bool = False,
|
|
43
|
+
) -> None:
|
|
44
|
+
"""Initialize the GribIndex object.
|
|
45
|
+
|
|
46
|
+
Parameters
|
|
47
|
+
----------
|
|
48
|
+
database : str
|
|
49
|
+
Path to the SQLite database file.
|
|
50
|
+
keys : Optional[List[str] | str], optional
|
|
51
|
+
List of keys or a string of keys to use for indexing, by default None.
|
|
52
|
+
flavour : Optional[str], optional
|
|
53
|
+
Flavour configuration for mapping fields, by default None.
|
|
54
|
+
update : bool, optional
|
|
55
|
+
Whether to update the database, by default False.
|
|
56
|
+
overwrite : bool, optional
|
|
57
|
+
Whether to overwrite the database if it exists, by default False.
|
|
58
|
+
"""
|
|
59
|
+
self.database = database
|
|
60
|
+
if overwrite:
|
|
61
|
+
assert update
|
|
62
|
+
if os.path.exists(database):
|
|
63
|
+
os.remove(database)
|
|
64
|
+
|
|
65
|
+
if not update:
|
|
66
|
+
if not os.path.exists(database):
|
|
67
|
+
raise FileNotFoundError(f"Database {database} does not exist")
|
|
68
|
+
|
|
69
|
+
if keys is not None:
|
|
70
|
+
if isinstance(keys, str):
|
|
71
|
+
if keys.startswith("+"):
|
|
72
|
+
keys = set(KEYS) | set(keys[1:].split(","))
|
|
73
|
+
else:
|
|
74
|
+
keys = set(",".split(keys.split(",")))
|
|
75
|
+
keys = list(keys)
|
|
76
|
+
|
|
77
|
+
self.conn = sqlite3.connect(database)
|
|
78
|
+
self.cursor = self.conn.cursor()
|
|
79
|
+
|
|
80
|
+
if flavour is not None:
|
|
81
|
+
self.flavour = RuleBasedFlavour(flavour)
|
|
82
|
+
else:
|
|
83
|
+
self.flavour = None
|
|
84
|
+
|
|
85
|
+
self.update = update
|
|
86
|
+
self.cache = None
|
|
87
|
+
self.keys = keys
|
|
88
|
+
self._columns = None
|
|
89
|
+
|
|
90
|
+
if update:
|
|
91
|
+
if self.keys is None:
|
|
92
|
+
self.keys = KEYS
|
|
93
|
+
LOG.info(f"Using keys: {sorted(self.keys)}")
|
|
94
|
+
self._create_tables()
|
|
95
|
+
else:
|
|
96
|
+
assert keys is None
|
|
97
|
+
self.keys = self._all_columns()
|
|
98
|
+
self.cache = LRUCache(maxsize=50)
|
|
99
|
+
|
|
100
|
+
self.warnings = {}
|
|
101
|
+
self.cache = {}
|
|
102
|
+
|
|
103
|
+
def _create_tables(self) -> None:
|
|
104
|
+
"""Create the necessary tables in the database."""
|
|
105
|
+
assert self.update
|
|
106
|
+
|
|
107
|
+
self.cursor.execute(
|
|
108
|
+
"""
|
|
109
|
+
CREATE TABLE IF NOT EXISTS paths (
|
|
110
|
+
id INTEGER PRIMARY KEY,
|
|
111
|
+
path TEXT not null
|
|
112
|
+
)
|
|
113
|
+
"""
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
columns = ("valid_datetime",)
|
|
117
|
+
# We don't use NULL as a default because NULL is considered a different value
|
|
118
|
+
# in UNIQUE INDEX constraints (https://www.sqlite.org/lang_createindex.html)
|
|
119
|
+
|
|
120
|
+
self.cursor.execute(
|
|
121
|
+
f"""
|
|
122
|
+
CREATE TABLE IF NOT EXISTS grib_index (
|
|
123
|
+
_id INTEGER PRIMARY KEY,
|
|
124
|
+
_path_id INTEGER not null,
|
|
125
|
+
_offset INTEGER not null,
|
|
126
|
+
_length INTEGER not null,
|
|
127
|
+
{', '.join(f"{key} TEXT not null default ''" for key in columns)},
|
|
128
|
+
FOREIGN KEY(_path_id) REFERENCES paths(id))
|
|
129
|
+
"""
|
|
130
|
+
) # ,
|
|
131
|
+
|
|
132
|
+
self.cursor.execute(
|
|
133
|
+
"""
|
|
134
|
+
CREATE UNIQUE INDEX IF NOT EXISTS idx_grib_index_path_offset
|
|
135
|
+
ON grib_index (_path_id, _offset)
|
|
136
|
+
"""
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
self.cursor.execute(
|
|
140
|
+
f"""
|
|
141
|
+
CREATE UNIQUE INDEX IF NOT EXISTS idx_grib_index_all_keys
|
|
142
|
+
ON grib_index ({', '.join(columns)})
|
|
143
|
+
"""
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
for key in columns:
|
|
147
|
+
self.cursor.execute(
|
|
148
|
+
f"""
|
|
149
|
+
CREATE INDEX IF NOT EXISTS idx_grib_index_{key}
|
|
150
|
+
ON grib_index ({key})
|
|
151
|
+
"""
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
self._commit()
|
|
155
|
+
|
|
156
|
+
def _commit(self) -> None:
|
|
157
|
+
"""Commit the current transaction to the database."""
|
|
158
|
+
self.conn.commit()
|
|
159
|
+
|
|
160
|
+
def _get_metadata_keys(self) -> List[str]:
|
|
161
|
+
"""Retrieve the metadata keys from the database.
|
|
162
|
+
|
|
163
|
+
Returns
|
|
164
|
+
-------
|
|
165
|
+
List[str]
|
|
166
|
+
A list of metadata keys stored in the database.
|
|
167
|
+
"""
|
|
168
|
+
self.cursor.execute("SELECT key FROM metadata_keys")
|
|
169
|
+
return [row[0] for row in self.cursor.fetchall()]
|
|
170
|
+
|
|
171
|
+
def _path_id(self, path: str) -> int:
|
|
172
|
+
"""Get the id of a path in the database.
|
|
173
|
+
|
|
174
|
+
Parameters
|
|
175
|
+
----------
|
|
176
|
+
path : str
|
|
177
|
+
The file path to retrieve or insert.
|
|
178
|
+
|
|
179
|
+
Returns
|
|
180
|
+
-------
|
|
181
|
+
int
|
|
182
|
+
The ID of the path in the database.
|
|
183
|
+
"""
|
|
184
|
+
self.cursor.execute("SELECT id FROM paths WHERE path = ?", (path,))
|
|
185
|
+
row = self.cursor.fetchone()
|
|
186
|
+
if row is None:
|
|
187
|
+
self.cursor.execute("INSERT INTO paths (path) VALUES (?)", (path,))
|
|
188
|
+
self._commit()
|
|
189
|
+
return self.cursor.lastrowid
|
|
190
|
+
return row[0]
|
|
191
|
+
|
|
192
|
+
def _add_grib(self, **kwargs: Any) -> None:
|
|
193
|
+
"""Add a GRIB record to the database.
|
|
194
|
+
|
|
195
|
+
Parameters
|
|
196
|
+
----------
|
|
197
|
+
**kwargs : Any
|
|
198
|
+
Key-value pairs representing the GRIB record fields.
|
|
199
|
+
"""
|
|
200
|
+
assert self.update
|
|
201
|
+
|
|
202
|
+
try:
|
|
203
|
+
|
|
204
|
+
self.cursor.execute(
|
|
205
|
+
f"""
|
|
206
|
+
INSERT INTO grib_index ({', '.join(kwargs.keys())})
|
|
207
|
+
VALUES ({', '.join('?' for _ in kwargs)})
|
|
208
|
+
""",
|
|
209
|
+
tuple(kwargs.values()),
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
except sqlite3.IntegrityError:
|
|
213
|
+
LOG.error(f"Error adding grib record: {kwargs}")
|
|
214
|
+
LOG.error("Record already exists")
|
|
215
|
+
LOG.info(f"Path: {self._get_path(kwargs['_path_id'])}")
|
|
216
|
+
for n in ("_path_id", "_offset", "_length"):
|
|
217
|
+
kwargs.pop(n)
|
|
218
|
+
self.cursor.execute(
|
|
219
|
+
"SELECT * FROM grib_index WHERE " + " AND ".join(f"{key} = ?" for key in kwargs.keys()),
|
|
220
|
+
tuple(kwargs.values()),
|
|
221
|
+
)
|
|
222
|
+
existing_record = self.cursor.fetchone()
|
|
223
|
+
if existing_record:
|
|
224
|
+
LOG.info(f"Existing record found: {existing_record}")
|
|
225
|
+
LOG.info(f"Path: {self._get_path(existing_record[1])}")
|
|
226
|
+
raise
|
|
227
|
+
|
|
228
|
+
def _all_columns(self) -> List[str]:
|
|
229
|
+
"""Retrieve all column names from the grib_index table.
|
|
230
|
+
|
|
231
|
+
Returns
|
|
232
|
+
-------
|
|
233
|
+
List[str]
|
|
234
|
+
A list of column names.
|
|
235
|
+
"""
|
|
236
|
+
if self._columns is not None:
|
|
237
|
+
return self._columns
|
|
238
|
+
|
|
239
|
+
self.cursor.execute("PRAGMA table_info(grib_index)")
|
|
240
|
+
columns = {row[1] for row in self.cursor.fetchall()}
|
|
241
|
+
self._columns = [col for col in columns if not col.startswith("_")]
|
|
242
|
+
return self._columns
|
|
243
|
+
|
|
244
|
+
def _ensure_columns(self, columns: List[str]) -> None:
|
|
245
|
+
"""Add missing columns to the grib_index table.
|
|
246
|
+
|
|
247
|
+
Parameters
|
|
248
|
+
----------
|
|
249
|
+
columns : List[str]
|
|
250
|
+
List of column names to ensure in the table.
|
|
251
|
+
"""
|
|
252
|
+
assert self.update
|
|
253
|
+
|
|
254
|
+
existing_columns = self._all_columns()
|
|
255
|
+
new_columns = [column for column in columns if column not in existing_columns]
|
|
256
|
+
|
|
257
|
+
if not new_columns:
|
|
258
|
+
return
|
|
259
|
+
|
|
260
|
+
self._columns = None
|
|
261
|
+
|
|
262
|
+
for column in new_columns:
|
|
263
|
+
self.cursor.execute(f"ALTER TABLE grib_index ADD COLUMN {column} TEXT not null default ''")
|
|
264
|
+
|
|
265
|
+
self.cursor.execute("""DROP INDEX IF EXISTS idx_grib_index_all_keys""")
|
|
266
|
+
all_columns = self._all_columns()
|
|
267
|
+
|
|
268
|
+
self.cursor.execute(
|
|
269
|
+
f"""
|
|
270
|
+
CREATE UNIQUE INDEX IF NOT EXISTS idx_grib_index_all_keys
|
|
271
|
+
ON grib_index ({', '.join(all_columns)})
|
|
272
|
+
"""
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
for key in all_columns:
|
|
276
|
+
self.cursor.execute(
|
|
277
|
+
f"""
|
|
278
|
+
CREATE INDEX IF NOT EXISTS idx_grib_index_{key}
|
|
279
|
+
ON grib_index ({key})
|
|
280
|
+
"""
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
def add_grib_file(self, path: str) -> None:
|
|
284
|
+
"""Add a GRIB file to the database.
|
|
285
|
+
|
|
286
|
+
Parameters
|
|
287
|
+
----------
|
|
288
|
+
path : str
|
|
289
|
+
Path to the GRIB file to add.
|
|
290
|
+
"""
|
|
291
|
+
path_id = self._path_id(path)
|
|
292
|
+
|
|
293
|
+
fields = ekd.from_source("file", path)
|
|
294
|
+
if self.flavour is not None:
|
|
295
|
+
fields = self.flavour.map(fields)
|
|
296
|
+
|
|
297
|
+
for i, field in enumerate(tqdm.tqdm(fields, leave=False)):
|
|
298
|
+
|
|
299
|
+
keys = field.metadata(namespace="mars").copy()
|
|
300
|
+
keys.update({k: field.metadata(k, default=None) for k in self.keys})
|
|
301
|
+
|
|
302
|
+
keys.setdefault("param", keys.get("shortName", keys.get("paramId")))
|
|
303
|
+
|
|
304
|
+
keys = {k: v for k, v in keys.items() if v is not None}
|
|
305
|
+
|
|
306
|
+
if keys.get("param") in (0, "unknown"):
|
|
307
|
+
param = (
|
|
308
|
+
field.metadata("discipline", default=None),
|
|
309
|
+
field.metadata("parameterCategory", default=None),
|
|
310
|
+
field.metadata("parameterNumber", default=None),
|
|
311
|
+
)
|
|
312
|
+
if param not in self.warnings:
|
|
313
|
+
self._unknown(path, field, i, param)
|
|
314
|
+
self.warnings[param] = True
|
|
315
|
+
|
|
316
|
+
self._ensure_columns(list(keys.keys()))
|
|
317
|
+
|
|
318
|
+
self._add_grib(
|
|
319
|
+
_path_id=path_id,
|
|
320
|
+
_offset=field.metadata("offset"),
|
|
321
|
+
_length=field.metadata("totalLength"),
|
|
322
|
+
**keys,
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
self._commit()
|
|
326
|
+
|
|
327
|
+
def _paramdb(self, category: int, discipline: int) -> Optional[dict]:
|
|
328
|
+
"""Fetch parameter information from the parameter database.
|
|
329
|
+
|
|
330
|
+
Parameters
|
|
331
|
+
----------
|
|
332
|
+
category : int
|
|
333
|
+
The parameter category.
|
|
334
|
+
discipline : int
|
|
335
|
+
The parameter discipline.
|
|
336
|
+
|
|
337
|
+
Returns
|
|
338
|
+
-------
|
|
339
|
+
Optional[dict]
|
|
340
|
+
The parameter information, or None if unavailable.
|
|
341
|
+
"""
|
|
342
|
+
if (category, discipline) in self.cache:
|
|
343
|
+
return self.cache[(category, discipline)]
|
|
344
|
+
|
|
345
|
+
try:
|
|
346
|
+
import requests
|
|
347
|
+
|
|
348
|
+
r = requests.get(
|
|
349
|
+
f"https://codes.ecmwf.int/parameter-database/api/v1/param?category={category}&discipline={discipline}"
|
|
350
|
+
)
|
|
351
|
+
r.raise_for_status()
|
|
352
|
+
self.cache[(category, discipline)] = r.json()
|
|
353
|
+
return self.cache[(category, discipline)]
|
|
354
|
+
|
|
355
|
+
except Exception as e:
|
|
356
|
+
LOG.warning(f"Failed to fetch information from parameter database: {e}")
|
|
357
|
+
|
|
358
|
+
def _param_grib2_info(self, paramId: int) -> List[dict]:
|
|
359
|
+
"""Fetch GRIB2 parameter information for a given parameter ID.
|
|
360
|
+
|
|
361
|
+
Parameters
|
|
362
|
+
----------
|
|
363
|
+
paramId : int
|
|
364
|
+
The parameter ID.
|
|
365
|
+
|
|
366
|
+
Returns
|
|
367
|
+
-------
|
|
368
|
+
List[dict]
|
|
369
|
+
A list of GRIB2 parameter information.
|
|
370
|
+
"""
|
|
371
|
+
if ("grib2", paramId) in self.cache:
|
|
372
|
+
return self.cache[("grib2", paramId)]
|
|
373
|
+
|
|
374
|
+
try:
|
|
375
|
+
import requests
|
|
376
|
+
|
|
377
|
+
r = requests.get(f"https://codes.ecmwf.int/parameter-database/api/v1/param/{paramId}/grib2/")
|
|
378
|
+
r.raise_for_status()
|
|
379
|
+
self.cache[("grib2", paramId)] = r.json()
|
|
380
|
+
return self.cache[("grib2", paramId)]
|
|
381
|
+
|
|
382
|
+
except Exception as e:
|
|
383
|
+
LOG.warning(f"Failed to fetch information from parameter database: {e}")
|
|
384
|
+
return []
|
|
385
|
+
|
|
386
|
+
def _param_id_info(self, paramId: int) -> Optional[dict]:
|
|
387
|
+
"""Fetch detailed information for a given parameter ID.
|
|
388
|
+
|
|
389
|
+
Parameters
|
|
390
|
+
----------
|
|
391
|
+
paramId : int
|
|
392
|
+
The parameter ID.
|
|
393
|
+
|
|
394
|
+
Returns
|
|
395
|
+
-------
|
|
396
|
+
Optional[dict]
|
|
397
|
+
The parameter information, or None if unavailable.
|
|
398
|
+
"""
|
|
399
|
+
if ("info", paramId) in self.cache:
|
|
400
|
+
return self.cache[("info", paramId)]
|
|
401
|
+
|
|
402
|
+
try:
|
|
403
|
+
import requests
|
|
404
|
+
|
|
405
|
+
r = requests.get(f"https://codes.ecmwf.int/parameter-database/api/v1/param/{paramId}/")
|
|
406
|
+
r.raise_for_status()
|
|
407
|
+
self.cache[("info", paramId)] = r.json()
|
|
408
|
+
return self.cache[("info", paramId)]
|
|
409
|
+
|
|
410
|
+
except Exception as e:
|
|
411
|
+
LOG.warning(f"Failed to fetch information from parameter database: {e}")
|
|
412
|
+
|
|
413
|
+
return None
|
|
414
|
+
|
|
415
|
+
def _param_id_unit(self, unitId: int) -> Optional[dict]:
|
|
416
|
+
"""Fetch unit information for a given unit ID.
|
|
417
|
+
|
|
418
|
+
Parameters
|
|
419
|
+
----------
|
|
420
|
+
unitId : int
|
|
421
|
+
The unit ID.
|
|
422
|
+
|
|
423
|
+
Returns
|
|
424
|
+
-------
|
|
425
|
+
Optional[dict]
|
|
426
|
+
The unit information, or None if unavailable.
|
|
427
|
+
"""
|
|
428
|
+
if ("unit", unitId) in self.cache:
|
|
429
|
+
return self.cache[("unit", unitId)]
|
|
430
|
+
|
|
431
|
+
try:
|
|
432
|
+
import requests
|
|
433
|
+
|
|
434
|
+
r = requests.get(f"https://codes.ecmwf.int/parameter-database/api/v1/unit/{unitId}/")
|
|
435
|
+
r.raise_for_status()
|
|
436
|
+
self.cache[("unit", unitId)] = r.json()
|
|
437
|
+
return self.cache[("unit", unitId)]
|
|
438
|
+
|
|
439
|
+
except Exception as e:
|
|
440
|
+
LOG.warning(f"Failed to fetch information from parameter database: {e}")
|
|
441
|
+
|
|
442
|
+
return None
|
|
443
|
+
|
|
444
|
+
def _unknown(self, path: str, field: ekd.Field, i: int, param: tuple) -> None:
|
|
445
|
+
"""Log information about unknown parameters.
|
|
446
|
+
|
|
447
|
+
Parameters
|
|
448
|
+
----------
|
|
449
|
+
path : str
|
|
450
|
+
Path to the GRIB file.
|
|
451
|
+
field : ekd.Field
|
|
452
|
+
The GRIB field object.
|
|
453
|
+
i : int
|
|
454
|
+
The index of the field in the file.
|
|
455
|
+
param : tuple
|
|
456
|
+
The parameter tuple (discipline, category, parameterNumber).
|
|
457
|
+
"""
|
|
458
|
+
|
|
459
|
+
def _(s):
|
|
460
|
+
try:
|
|
461
|
+
return int(s)
|
|
462
|
+
except ValueError:
|
|
463
|
+
return s
|
|
464
|
+
|
|
465
|
+
LOG.warning(
|
|
466
|
+
f"Unknown param for message {i+1} in {path} at offset {int(field.metadata('offset', default=None))}"
|
|
467
|
+
)
|
|
468
|
+
LOG.warning(
|
|
469
|
+
f"shortName/paramId: {field.metadata('shortName', default=None)}/{field.metadata('paramId', default=None)}"
|
|
470
|
+
)
|
|
471
|
+
name = field.metadata("parameterName", default=None)
|
|
472
|
+
units = field.metadata("parameterUnits", default=None)
|
|
473
|
+
LOG.warning(f"Discipline/category/parameter: {param} ({name}, {units})")
|
|
474
|
+
LOG.warning(f"grib_copy -w count={i+1} {path} tmp.grib")
|
|
475
|
+
|
|
476
|
+
info = self._paramdb(discipline=param[0], category=param[1])
|
|
477
|
+
found = set()
|
|
478
|
+
if info is not None:
|
|
479
|
+
for n in tqdm.tqdm(info, desc="Scanning parameter database"):
|
|
480
|
+
|
|
481
|
+
for p in self._param_grib2_info(n["id"]):
|
|
482
|
+
|
|
483
|
+
keys = {k["name"]: _(k["value"]) for k in p["keys"]}
|
|
484
|
+
if keys.get("parameterNumber") == param[2]:
|
|
485
|
+
found.add(n["id"])
|
|
486
|
+
|
|
487
|
+
for n in found:
|
|
488
|
+
info = self._param_id_info(n)
|
|
489
|
+
if "unit_id" in info:
|
|
490
|
+
info["unit_id"] = self._param_id_unit(info["unit_id"])["name"]
|
|
491
|
+
|
|
492
|
+
LOG.info("%s", f"Possible match: {n}")
|
|
493
|
+
LOG.info("%s", f" Name: {info.get('name')}")
|
|
494
|
+
LOG.info("%s", f" Short name: {info.get('shortname')}")
|
|
495
|
+
LOG.info("%s", f" Units: {info.get('unit_id')}")
|
|
496
|
+
LOG.info("%s", f" Description: {info.get('description')}")
|
|
497
|
+
LOG.info("")
|
|
498
|
+
|
|
499
|
+
def _get_path(self, path_id: int) -> str:
|
|
500
|
+
"""Retrieve the path corresponding to a given path_id.
|
|
501
|
+
|
|
502
|
+
Parameters
|
|
503
|
+
----------
|
|
504
|
+
path_id : int
|
|
505
|
+
The ID of the path to retrieve.
|
|
506
|
+
|
|
507
|
+
Returns
|
|
508
|
+
-------
|
|
509
|
+
str
|
|
510
|
+
The path corresponding to the given path_id.
|
|
511
|
+
|
|
512
|
+
Raises
|
|
513
|
+
------
|
|
514
|
+
ValueError
|
|
515
|
+
If the path_id does not exist in the database.
|
|
516
|
+
"""
|
|
517
|
+
self.cursor.execute("SELECT path FROM paths WHERE id = ?", (path_id,))
|
|
518
|
+
row = self.cursor.fetchone()
|
|
519
|
+
if row is None:
|
|
520
|
+
raise ValueError(f"No path found for path_id {path_id}")
|
|
521
|
+
return row[0]
|
|
522
|
+
|
|
523
|
+
def retrieve(self, dates: List[Any], **kwargs: Any) -> Iterator[Any]:
|
|
524
|
+
"""Retrieve GRIB data from the database.
|
|
525
|
+
|
|
526
|
+
Parameters
|
|
527
|
+
----------
|
|
528
|
+
dates : List[Any]
|
|
529
|
+
List of dates to retrieve data for.
|
|
530
|
+
**kwargs : Any
|
|
531
|
+
Additional filtering criteria.
|
|
532
|
+
|
|
533
|
+
Returns
|
|
534
|
+
------
|
|
535
|
+
Iterator[Any]
|
|
536
|
+
The GRIB data matching the criteria.
|
|
537
|
+
"""
|
|
538
|
+
assert not self.update
|
|
539
|
+
|
|
540
|
+
dates = [d.isoformat() for d in dates]
|
|
541
|
+
|
|
542
|
+
query = """SELECT _path_id, _offset, _length
|
|
543
|
+
FROM grib_index WHERE valid_datetime IN ({})""".format(
|
|
544
|
+
", ".join("?" for _ in dates)
|
|
545
|
+
)
|
|
546
|
+
params = dates
|
|
547
|
+
|
|
548
|
+
for k, v in kwargs.items():
|
|
549
|
+
if isinstance(v, list):
|
|
550
|
+
query += f" AND {k} IN ({', '.join('?' for _ in v)})"
|
|
551
|
+
params.extend([str(_) for _ in v])
|
|
552
|
+
else:
|
|
553
|
+
query += f" AND {k} = ?"
|
|
554
|
+
params.append(str(v))
|
|
555
|
+
|
|
556
|
+
print("SELECT", query)
|
|
557
|
+
print("SELECT", params)
|
|
558
|
+
|
|
559
|
+
self.cursor.execute(query, params)
|
|
560
|
+
for path_id, offset, length in self.cursor.fetchall():
|
|
561
|
+
if path_id in self.cache:
|
|
562
|
+
file = self.cache[path_id]
|
|
563
|
+
else:
|
|
564
|
+
path = self._get_path(path_id)
|
|
565
|
+
LOG.info(f"Opening {path}")
|
|
566
|
+
self.cache[path_id] = open(path, "rb")
|
|
567
|
+
file = self.cache[path_id]
|
|
568
|
+
|
|
569
|
+
file.seek(offset)
|
|
570
|
+
data = file.read(length)
|
|
571
|
+
yield data
|
|
572
|
+
|
|
573
|
+
|
|
574
|
+
@legacy_source(__file__)
|
|
575
|
+
def execute(
|
|
576
|
+
context: Any,
|
|
577
|
+
dates: List[Any],
|
|
578
|
+
indexdb: str,
|
|
579
|
+
flavour: Optional[str] = None,
|
|
580
|
+
**kwargs: Any,
|
|
581
|
+
) -> FieldArray:
|
|
582
|
+
"""Execute the GRIB data retrieval process.
|
|
583
|
+
|
|
584
|
+
Parameters
|
|
585
|
+
----------
|
|
586
|
+
context : Any
|
|
587
|
+
The execution context.
|
|
588
|
+
dates : List[Any]
|
|
589
|
+
List of dates to retrieve data for.
|
|
590
|
+
indexdb : str
|
|
591
|
+
Path to the GRIB index database.
|
|
592
|
+
flavour : Optional[str], optional
|
|
593
|
+
Flavour configuration for mapping fields, by default None.
|
|
594
|
+
**kwargs : Any
|
|
595
|
+
Additional filtering criteria.
|
|
596
|
+
|
|
597
|
+
Returns
|
|
598
|
+
-------
|
|
599
|
+
FieldArray
|
|
600
|
+
An array of retrieved GRIB fields.
|
|
601
|
+
"""
|
|
602
|
+
index = GribIndex(indexdb)
|
|
603
|
+
result = []
|
|
604
|
+
|
|
605
|
+
if flavour is not None:
|
|
606
|
+
flavour = RuleBasedFlavour(flavour)
|
|
607
|
+
|
|
608
|
+
for grib in index.retrieve(dates, **kwargs):
|
|
609
|
+
field = ekd.from_source("memory", grib)[0]
|
|
610
|
+
if flavour:
|
|
611
|
+
field = flavour.apply(field)
|
|
612
|
+
result.append(field)
|
|
613
|
+
|
|
614
|
+
return FieldArray(result)
|
|
@@ -90,7 +90,7 @@ def load_one(
|
|
|
90
90
|
"""
|
|
91
91
|
|
|
92
92
|
"""
|
|
93
|
-
We manage the S3 client
|
|
93
|
+
We manage the S3 client ourselves, bypassing fsspec and s3fs layers, because sometimes something on the stack
|
|
94
94
|
zarr/fsspec/s3fs/boto3 (?) seem to flags files as missing when they actually are not (maybe when S3 reports some sort of
|
|
95
95
|
connection error). In that case, Zarr will silently fill the chunks that could not be downloaded with NaNs.
|
|
96
96
|
See https://github.com/pydata/xarray/issues/8842
|
|
@@ -165,8 +165,8 @@ class XarrayFieldList(FieldList):
|
|
|
165
165
|
c.is_dim = False
|
|
166
166
|
coordinates.append(c)
|
|
167
167
|
|
|
168
|
-
grid_coords: int = sum(1 for c in coordinates if c.is_grid
|
|
169
|
-
assert grid_coords <= 2
|
|
168
|
+
grid_coords: int = sum(1 for c in coordinates if c.is_grid)
|
|
169
|
+
# assert grid_coords <= 2, [c for c in coordinates if c.is_grid]
|
|
170
170
|
|
|
171
171
|
if grid_coords < 2:
|
|
172
172
|
LOG.debug("Skipping %s (not 2D): %s", variable, [(c, c.is_grid, c.is_dim) for c in coordinates])
|
|
@@ -565,6 +565,9 @@ class DefaultCoordinateGuesser(CoordinateGuesser):
|
|
|
565
565
|
Optional[LongitudeCoordinate]
|
|
566
566
|
The LongitudeCoordinate if matched, else None.
|
|
567
567
|
"""
|
|
568
|
+
|
|
569
|
+
# https://cfconventions.org/Data/cf-conventions/cf-conventions-1.12/cf-conventions.html#longitude-coordinate
|
|
570
|
+
|
|
568
571
|
if attributes.standard_name == "longitude":
|
|
569
572
|
return LongitudeCoordinate(c)
|
|
570
573
|
|
|
@@ -591,6 +594,9 @@ class DefaultCoordinateGuesser(CoordinateGuesser):
|
|
|
591
594
|
Optional[LatitudeCoordinate]
|
|
592
595
|
The LatitudeCoordinate if matched, else None.
|
|
593
596
|
"""
|
|
597
|
+
|
|
598
|
+
# https://cfconventions.org/Data/cf-conventions/cf-conventions-1.12/cf-conventions.html#latitude-coordinate
|
|
599
|
+
|
|
594
600
|
if attributes.standard_name == "latitude":
|
|
595
601
|
return LatitudeCoordinate(c)
|
|
596
602
|
|