axsdb 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- axsdb/__init__.py +27 -0
- axsdb/_version.py +8 -0
- axsdb/cli.py +75 -0
- axsdb/core.py +893 -0
- axsdb/error.py +200 -0
- axsdb/factory.py +135 -0
- axsdb/py.typed +0 -0
- axsdb/typing.py +4 -0
- axsdb/units.py +72 -0
- axsdb-0.0.2.dist-info/METADATA +30 -0
- axsdb-0.0.2.dist-info/RECORD +13 -0
- axsdb-0.0.2.dist-info/WHEEL +4 -0
- axsdb-0.0.2.dist-info/entry_points.txt +2 -0
axsdb/core.py
ADDED
|
@@ -0,0 +1,893 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import errno
|
|
4
|
+
import glob
|
|
5
|
+
import json
|
|
6
|
+
import logging
|
|
7
|
+
import os
|
|
8
|
+
import re
|
|
9
|
+
import textwrap
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any, Callable, Hashable, Literal
|
|
12
|
+
|
|
13
|
+
import attrs
|
|
14
|
+
import numpy as np
|
|
15
|
+
import pandas as pd
|
|
16
|
+
import pint
|
|
17
|
+
import xarray as xr
|
|
18
|
+
from cachetools import LRUCache, cachedmethod
|
|
19
|
+
|
|
20
|
+
from .error import (
|
|
21
|
+
DataError,
|
|
22
|
+
ErrorHandlingAction,
|
|
23
|
+
ErrorHandlingConfiguration,
|
|
24
|
+
get_error_handling_config,
|
|
25
|
+
)
|
|
26
|
+
from .typing import PathLike
|
|
27
|
+
from .units import ensure_units, ureg, xarray_to_quantity
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger("axsdb")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@attrs.define(repr=False, eq=False)
|
|
33
|
+
class AbsorptionDatabase:
|
|
34
|
+
"""
|
|
35
|
+
Common parent type for absorption coefficient databases.
|
|
36
|
+
|
|
37
|
+
This class implements most of the data indexing logic common to all
|
|
38
|
+
absorption coefficient databases.
|
|
39
|
+
A database is composed of a set of NetCDF files compliant with the
|
|
40
|
+
absorption coefficient database format specification and placed in the
|
|
41
|
+
same directory. A database instance is initialized by specifying the path
|
|
42
|
+
to the directory where the files are stored.
|
|
43
|
+
|
|
44
|
+
If it exists, a ``metadata.json`` file is loaded into the :attr:`metadata`
|
|
45
|
+
attribute.
|
|
46
|
+
|
|
47
|
+
Databases are usually not initialized using the constructor, but rather
|
|
48
|
+
using the class method constructors :meth:`from_directory` and
|
|
49
|
+
:meth:`from_dict`.
|
|
50
|
+
|
|
51
|
+
Parameters
|
|
52
|
+
----------
|
|
53
|
+
dir_path : path-like
|
|
54
|
+
Path to database root directory.
|
|
55
|
+
|
|
56
|
+
index : DataFrame
|
|
57
|
+
File index, assumed sorted by ascending wavelengths.
|
|
58
|
+
|
|
59
|
+
spectral_coverage : DataFrame
|
|
60
|
+
Dataframe that unrolls the spectral information contained in all data
|
|
61
|
+
files in the database.
|
|
62
|
+
|
|
63
|
+
metadata : dict, optional
|
|
64
|
+
Dictionary that contains the database metadata.
|
|
65
|
+
|
|
66
|
+
cache : cachetools.LRUCache, optional
|
|
67
|
+
A mapping that implements an LRU caching policy.
|
|
68
|
+
|
|
69
|
+
error_handling_config : ErrorHandlingConfiguration, optional
|
|
70
|
+
Default error handling policy. If unset, a global default is used.
|
|
71
|
+
|
|
72
|
+
Notes
|
|
73
|
+
-----
|
|
74
|
+
A file index, stored as the :attr:`_index` private attribute, associates
|
|
75
|
+
to each file the spectral region it covers. The index is preferably loaded
|
|
76
|
+
from a CSV file that contains all this information; if it is not found, the
|
|
77
|
+
table is built upon database initialization and saved to the database
|
|
78
|
+
directory. The indexing step requires to access all files and may take a
|
|
79
|
+
while. The file index table is used during queries to select efficiently the
|
|
80
|
+
file where data will be read. For convenience, information about bounds
|
|
81
|
+
contained in the index is assembled into a spectral mesh suitable for
|
|
82
|
+
query using :func:`numpy.digitize` and stored in the :attr:`_chunks`
|
|
83
|
+
dictionary.
|
|
84
|
+
|
|
85
|
+
A spectral coverage table, stored as the :attr:`_spectral_coverage` private
|
|
86
|
+
attribute, merges the spectral coordinates of all files into a consistent
|
|
87
|
+
index. This table is used to provide spectral coverage information to
|
|
88
|
+
higher-level components that drive the simulation. Table contents are
|
|
89
|
+
preferably loaded from a CSV file; if it is not found, the table is build
|
|
90
|
+
upon database initialization and saved to the database directory. This
|
|
91
|
+
indexing step also requires to access all files and may take a while.
|
|
92
|
+
|
|
93
|
+
Database access and memory usage can be controlled through two parameters:
|
|
94
|
+
|
|
95
|
+
* File queries are stored in an LRU cache. The initial size is set to a low
|
|
96
|
+
value (8) and should be appropriate for most situations. If more cache
|
|
97
|
+
control is needed, the :meth:`cache_clear`, :meth:`cache_close` and
|
|
98
|
+
:meth:`cache_reset` methods can be used.
|
|
99
|
+
* Datasets can be open with an eager or lazy approach. This behaviour is
|
|
100
|
+
controlled using the ``lazy`` constructor parameter. In eager mode, the
|
|
101
|
+
entire file used for a query is loaded into memory. This can bring
|
|
102
|
+
significant access overhead when using large files. If desired, datasets
|
|
103
|
+
can instead be open lazily, triggering disk access only for the specific
|
|
104
|
+
data that are used.
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
#: Path to database root directory.
|
|
108
|
+
_dir_path: Path = attrs.field(converter=lambda x: Path(x).absolute().resolve())
|
|
109
|
+
|
|
110
|
+
@_dir_path.validator
|
|
111
|
+
def _dir_path_validator(self, attribute, value):
|
|
112
|
+
if not value.is_dir():
|
|
113
|
+
raise ValueError(
|
|
114
|
+
f"while validating '{attribute.name}': path '{value}' is not a "
|
|
115
|
+
"directory"
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
#: File index, assumed sorted by ascending wavelengths.
|
|
119
|
+
_index: pd.DataFrame = attrs.field(repr=False)
|
|
120
|
+
|
|
121
|
+
@_index.validator
|
|
122
|
+
def _index_validator(self, attribute, value):
|
|
123
|
+
if value.empty:
|
|
124
|
+
raise ValueError(f"while validating '{attribute.name}': index is empty")
|
|
125
|
+
|
|
126
|
+
wavelengths = value["wl_min [nm]"].values
|
|
127
|
+
if not np.all(wavelengths[:-1] < wavelengths[1:]):
|
|
128
|
+
raise ValueError(
|
|
129
|
+
f"while validating '{attribute.name}': index must be sorted by "
|
|
130
|
+
"ascending wavelength values"
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
#: Dataframe that unrolls the spectral information contained in all data
|
|
134
|
+
#: files in the database.
|
|
135
|
+
_spectral_coverage: pd.DataFrame = (attrs.field(repr=False),)
|
|
136
|
+
|
|
137
|
+
#: Dictionary that contains the database metadata.
|
|
138
|
+
_metadata: dict = attrs.field(factory=dict, repr=False)
|
|
139
|
+
|
|
140
|
+
#: Dictionary mapping spectral lookup mode keys ('wl' or 'wn') to arrays
|
|
141
|
+
#: containing the nodes of the spectral chunk mesh, which is used to perform
|
|
142
|
+
#: spectral coordinate-based file lookup.
|
|
143
|
+
_chunks: dict[str, np.ndarray] = attrs.field(factory=dict, repr=False, init=False)
|
|
144
|
+
|
|
145
|
+
#: Access mode switch: if ``True``, load data lazily; else, load data eagerly.
|
|
146
|
+
lazy: bool = attrs.field(default=False, repr=False)
|
|
147
|
+
|
|
148
|
+
#: A mapping that implements an LRU caching policy.
|
|
149
|
+
_cache: LRUCache = attrs.field(factory=lambda: LRUCache(8), repr=False)
|
|
150
|
+
|
|
151
|
+
#: Default error handling policy. If unset, the global default is used.
|
|
152
|
+
_error_handling_config: ErrorHandlingConfiguration | None = attrs.field(
|
|
153
|
+
default=None
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
@property
|
|
157
|
+
def error_handling_config(self) -> ErrorHandlingConfiguration:
|
|
158
|
+
"""
|
|
159
|
+
Default error handling policy. If unset, the global default is used.
|
|
160
|
+
"""
|
|
161
|
+
return (
|
|
162
|
+
self._error_handling_config
|
|
163
|
+
if self._error_handling_config is not None
|
|
164
|
+
else get_error_handling_config()
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
@error_handling_config.setter
|
|
168
|
+
def error_handling_config(self, value: Any) -> None:
|
|
169
|
+
try:
|
|
170
|
+
self._error_handling_config = (
|
|
171
|
+
None if value is None else ErrorHandlingConfiguration.convert(value)
|
|
172
|
+
)
|
|
173
|
+
except Exception as e:
|
|
174
|
+
raise ValueError(
|
|
175
|
+
"value cannot be converted to an ErrorHandlingConfiguration"
|
|
176
|
+
) from e
|
|
177
|
+
|
|
178
|
+
def __attrs_post_init__(self):
|
|
179
|
+
# Parse field names and units
|
|
180
|
+
regex = re.compile(r"(?P<coord>.*)\_(?P<minmax>min|max) \[(?P<units>.*)\]")
|
|
181
|
+
quantities = {}
|
|
182
|
+
for colname in self._index.columns:
|
|
183
|
+
if colname == "filename":
|
|
184
|
+
continue
|
|
185
|
+
|
|
186
|
+
m = regex.match(colname)
|
|
187
|
+
units = m.group("units")
|
|
188
|
+
magnitude = self._index[colname].values
|
|
189
|
+
quantities[f"{m.group('coord')}_{m.group('minmax')}"] = ureg.Quantity(
|
|
190
|
+
magnitude, units
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
# Populate spectral mesh (nodes) for both wavelength and wavenumber
|
|
194
|
+
# lookup modes
|
|
195
|
+
self._chunks["wl"] = np.concatenate(
|
|
196
|
+
(quantities["wl_min"], [quantities["wl_max"][-1]])
|
|
197
|
+
)
|
|
198
|
+
self._chunks["wn"] = np.concatenate(
|
|
199
|
+
(quantities["wn_max"], [quantities["wn_min"][-1]])
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
def __repr__(self) -> str:
|
|
203
|
+
with pd.option_context("display.max_columns", 4):
|
|
204
|
+
result = (
|
|
205
|
+
f"<{type(self).__name__}> {self._dir_path}\n"
|
|
206
|
+
f"Access mode: {'lazy' if self.lazy else 'eager'}\n"
|
|
207
|
+
"Index:\n"
|
|
208
|
+
f"{textwrap.indent(repr(self._index), ' ')}"
|
|
209
|
+
)
|
|
210
|
+
return result
|
|
211
|
+
|
|
212
|
+
@staticmethod
|
|
213
|
+
def _make_index(filenames: list[PathLike]) -> pd.DataFrame:
|
|
214
|
+
# Implementation is concrete class-specific
|
|
215
|
+
raise NotImplementedError
|
|
216
|
+
|
|
217
|
+
@staticmethod
|
|
218
|
+
def _make_spectral_coverage(filenames: list[PathLike]) -> pd.DataFrame:
|
|
219
|
+
with xr.open_dataset(filenames[0]) as ds:
|
|
220
|
+
dims = set(ds.dims)
|
|
221
|
+
db_type = None
|
|
222
|
+
if "w" in dims:
|
|
223
|
+
db_type = "mono"
|
|
224
|
+
if "g" in dims:
|
|
225
|
+
db_type = "ckd"
|
|
226
|
+
|
|
227
|
+
if db_type is None:
|
|
228
|
+
raise ValueError
|
|
229
|
+
|
|
230
|
+
wavenumber_spectral_lookup_mode = ureg(ds["w"].units).check("[length]^-1")
|
|
231
|
+
|
|
232
|
+
index = []
|
|
233
|
+
headers = ["wbound_lower [nm]", "wbound_upper [nm]"]
|
|
234
|
+
rows = None
|
|
235
|
+
|
|
236
|
+
for filename in filenames:
|
|
237
|
+
filename = Path(filename)
|
|
238
|
+
with xr.open_dataset(filename) as ds:
|
|
239
|
+
w = xarray_to_quantity(ds["w"])
|
|
240
|
+
|
|
241
|
+
if wavenumber_spectral_lookup_mode: # Convert to wavelength
|
|
242
|
+
w = 1.0 / w
|
|
243
|
+
w = w.m_as("nm")
|
|
244
|
+
|
|
245
|
+
if db_type == "mono":
|
|
246
|
+
wbounds_lower = np.full((len(w),), np.nan)
|
|
247
|
+
wbounds_upper = np.full((len(w),), np.nan)
|
|
248
|
+
else:
|
|
249
|
+
wbounds_lower = xarray_to_quantity(ds["wbounds"].sel(wbv="lower"))
|
|
250
|
+
wbounds_upper = xarray_to_quantity(ds["wbounds"].sel(wbv="upper"))
|
|
251
|
+
if wavenumber_spectral_lookup_mode: # Convert to wavelength
|
|
252
|
+
wbounds_lower = 1.0 / wbounds_lower
|
|
253
|
+
wbounds_upper = 1.0 / wbounds_upper
|
|
254
|
+
wbounds_lower = wbounds_lower.m_as("nm")
|
|
255
|
+
wbounds_upper = wbounds_upper.m_as("nm")
|
|
256
|
+
|
|
257
|
+
index.extend([(filename.name, x) for x in w])
|
|
258
|
+
|
|
259
|
+
if rows is None:
|
|
260
|
+
rows = np.stack((wbounds_lower, wbounds_upper), axis=1)
|
|
261
|
+
else:
|
|
262
|
+
rows = np.concatenate(
|
|
263
|
+
(
|
|
264
|
+
rows,
|
|
265
|
+
np.stack((wbounds_lower, wbounds_upper), axis=1),
|
|
266
|
+
),
|
|
267
|
+
axis=0,
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
index = pd.MultiIndex.from_tuples(index, names=["filename", "wavelength [nm]"])
|
|
271
|
+
# Sort index by wavelength
|
|
272
|
+
result = pd.DataFrame(rows, index=index, columns=headers).sort_index(level=1)
|
|
273
|
+
return result
|
|
274
|
+
|
|
275
|
+
@classmethod
|
|
276
|
+
def from_directory(
|
|
277
|
+
cls, dir_path: PathLike, lazy: bool = False, fix: bool = True
|
|
278
|
+
) -> AbsorptionDatabase:
|
|
279
|
+
"""
|
|
280
|
+
Initialize a CKD database from a directory that contains one or several
|
|
281
|
+
datasets.
|
|
282
|
+
|
|
283
|
+
Parameters
|
|
284
|
+
----------
|
|
285
|
+
dir_path : path-like
|
|
286
|
+
Path where the CKD database is located.
|
|
287
|
+
|
|
288
|
+
lazy : bool, default: False
|
|
289
|
+
Access mode switch: if True, load data lazily; else, load data
|
|
290
|
+
eagerly.
|
|
291
|
+
|
|
292
|
+
fix : bool, default: True
|
|
293
|
+
If ``True``, attempt generating missing index files upon
|
|
294
|
+
initialization. Otherwise, raise if they are missing.
|
|
295
|
+
|
|
296
|
+
Returns
|
|
297
|
+
-------
|
|
298
|
+
AbsorptionDatabase
|
|
299
|
+
|
|
300
|
+
Raises
|
|
301
|
+
------
|
|
302
|
+
FileNotFoundError
|
|
303
|
+
If an index file is missing and ``fix`` is ``False``.
|
|
304
|
+
"""
|
|
305
|
+
dir_path = Path(dir_path).resolve()
|
|
306
|
+
|
|
307
|
+
try:
|
|
308
|
+
with open(os.path.join(dir_path, "metadata.json")) as f:
|
|
309
|
+
metadata = json.load(f)
|
|
310
|
+
except FileNotFoundError:
|
|
311
|
+
metadata = {}
|
|
312
|
+
|
|
313
|
+
filenames = glob.glob(os.path.join(dir_path, "*.nc"))
|
|
314
|
+
|
|
315
|
+
def load_index(
|
|
316
|
+
index_filename: PathLike,
|
|
317
|
+
read_csv: Callable[[Path], pd.DataFrame],
|
|
318
|
+
make_index: Callable[[list[PathLike]], pd.DataFrame],
|
|
319
|
+
to_csv: Callable[[pd.DataFrame, Path], None],
|
|
320
|
+
):
|
|
321
|
+
if index_filename.is_file():
|
|
322
|
+
try:
|
|
323
|
+
df = read_csv(index_filename)
|
|
324
|
+
except pd.errors.EmptyDataError as e:
|
|
325
|
+
raise DataError(
|
|
326
|
+
f"Error loading index file '{index_filename}'"
|
|
327
|
+
) from e
|
|
328
|
+
|
|
329
|
+
elif fix:
|
|
330
|
+
logger.warning(
|
|
331
|
+
f"Could not find index file '{index_filename}', building it"
|
|
332
|
+
)
|
|
333
|
+
df = make_index(filenames)
|
|
334
|
+
to_csv(df, index_filename)
|
|
335
|
+
|
|
336
|
+
else:
|
|
337
|
+
logger.critical(f"Could not find index file '{index_filename}'")
|
|
338
|
+
raise FileNotFoundError(
|
|
339
|
+
errno.ENOENT, "Missing index file", index_filename
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
if df.empty:
|
|
343
|
+
raise DataError(f"Index loaded from '{index_filename}' is empty")
|
|
344
|
+
|
|
345
|
+
return df
|
|
346
|
+
|
|
347
|
+
index_path = dir_path / "index.csv"
|
|
348
|
+
logger.debug(f"Loading index from '{index_path}'")
|
|
349
|
+
index = load_index(
|
|
350
|
+
index_filename=index_path,
|
|
351
|
+
read_csv=pd.read_csv,
|
|
352
|
+
make_index=cls._make_index,
|
|
353
|
+
to_csv=lambda df, filename: df.to_csv(filename, index=False),
|
|
354
|
+
)
|
|
355
|
+
index = index.sort_values(by="wl_min [nm]").reset_index(drop=True)
|
|
356
|
+
|
|
357
|
+
spectral_coverage_path = dir_path / "spectral.csv"
|
|
358
|
+
logger.debug(f"Loading spectral coverage table from '{spectral_coverage_path}'")
|
|
359
|
+
spectral_coverage = load_index(
|
|
360
|
+
index_filename=spectral_coverage_path,
|
|
361
|
+
read_csv=lambda df: pd.read_csv(df, index_col=(0, 1)),
|
|
362
|
+
make_index=cls._make_spectral_coverage,
|
|
363
|
+
to_csv=lambda df, filename: df.to_csv(filename),
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
return cls(dir_path, index, spectral_coverage, metadata=metadata, lazy=lazy)
|
|
367
|
+
|
|
368
|
+
@classmethod
|
|
369
|
+
def from_dict(cls, value: dict) -> AbsorptionDatabase:
|
|
370
|
+
"""
|
|
371
|
+
Construct from a dictionary. The dictionary has a required entry ``"construct"``
|
|
372
|
+
that specifies the constructor that will be used to instantiate the
|
|
373
|
+
database. Additional entries are keyword arguments passed to the selected
|
|
374
|
+
constructor.
|
|
375
|
+
|
|
376
|
+
Parameters
|
|
377
|
+
----------
|
|
378
|
+
value : dict
|
|
379
|
+
Converted value.
|
|
380
|
+
|
|
381
|
+
Returns
|
|
382
|
+
-------
|
|
383
|
+
AbsorptionDatabase
|
|
384
|
+
"""
|
|
385
|
+
|
|
386
|
+
raise NotImplementedError
|
|
387
|
+
|
|
388
|
+
@staticmethod
|
|
389
|
+
def convert(value: Any, mode: Literal["mono", "ckd"]) -> AbsorptionDatabase:
|
|
390
|
+
"""
|
|
391
|
+
Attempt conversion of a value to an absorption database.
|
|
392
|
+
|
|
393
|
+
Parameters
|
|
394
|
+
----------
|
|
395
|
+
value
|
|
396
|
+
The value for which conversion is attempted.
|
|
397
|
+
|
|
398
|
+
mode : {"mono", "ckd"}
|
|
399
|
+
Mode router to the desired database type.
|
|
400
|
+
|
|
401
|
+
Returns
|
|
402
|
+
-------
|
|
403
|
+
MonoAbsorptionDatabase or CKDAbsorptionDatabase
|
|
404
|
+
|
|
405
|
+
Notes
|
|
406
|
+
-----
|
|
407
|
+
Conversion rules are as follows:
|
|
408
|
+
|
|
409
|
+
* If ``value`` is a string or a path, try converting using the
|
|
410
|
+
:meth:`.from_directory` constructor. The returned type is consistent
|
|
411
|
+
with the active mode.
|
|
412
|
+
* If ``value`` is a dict, try converting using the :meth:`.from_dict`
|
|
413
|
+
constructor. The returned type is consistent with the active mode.
|
|
414
|
+
* Otherwise, do not convert.
|
|
415
|
+
"""
|
|
416
|
+
if isinstance(value, (str, Path, dict)):
|
|
417
|
+
cls = get_absdb_type(mode)
|
|
418
|
+
|
|
419
|
+
if isinstance(value, (str, Path)):
|
|
420
|
+
return cls.from_directory(value)
|
|
421
|
+
|
|
422
|
+
if isinstance(value, dict):
|
|
423
|
+
return cls.from_dict(value)
|
|
424
|
+
|
|
425
|
+
return value
|
|
426
|
+
|
|
427
|
+
@property
|
|
428
|
+
def dir_path(self) -> Path:
|
|
429
|
+
"""
|
|
430
|
+
Database root path.
|
|
431
|
+
"""
|
|
432
|
+
return self._dir_path
|
|
433
|
+
|
|
434
|
+
@property
|
|
435
|
+
def metadata(self) -> dict:
|
|
436
|
+
"""
|
|
437
|
+
Database metadata.
|
|
438
|
+
"""
|
|
439
|
+
return self._metadata
|
|
440
|
+
|
|
441
|
+
@property
|
|
442
|
+
def spectral_coverage(self) -> pd.DataFrame:
|
|
443
|
+
"""
|
|
444
|
+
Spectral coverage table.
|
|
445
|
+
"""
|
|
446
|
+
return self._spectral_coverage
|
|
447
|
+
|
|
448
|
+
@cachedmethod(lambda self: self._cache)
|
|
449
|
+
def load_dataset(self, fname: str) -> xr.Dataset:
|
|
450
|
+
"""
|
|
451
|
+
Convenience method to load a dataset. This method is decorated with
|
|
452
|
+
:func:`functools.lru_cache` with ``maxsize=1``, which limits the number
|
|
453
|
+
of reload events when repeatedly querying the same file.
|
|
454
|
+
|
|
455
|
+
The behaviour of this method is also affected by the ``lazy`` parameter:
|
|
456
|
+
if ``lazy`` is ``False``, files are loaded eagerly with
|
|
457
|
+
:func:`xarray.load_dataset`; if ``lazy`` is ``True``, files are loaded
|
|
458
|
+
lazily with :func:`xarray.open_dataset`.
|
|
459
|
+
|
|
460
|
+
Parameters
|
|
461
|
+
----------
|
|
462
|
+
fname : str
|
|
463
|
+
Name of the file that is to be loaded.
|
|
464
|
+
|
|
465
|
+
Returns
|
|
466
|
+
-------
|
|
467
|
+
Dataset
|
|
468
|
+
"""
|
|
469
|
+
path = self._dir_path / fname
|
|
470
|
+
|
|
471
|
+
if self.lazy:
|
|
472
|
+
logger.debug("Opening '%s'" % path)
|
|
473
|
+
return xr.open_dataset(path)
|
|
474
|
+
else:
|
|
475
|
+
logger.debug("Loading '%s'" % path)
|
|
476
|
+
return xr.load_dataset(path)
|
|
477
|
+
|
|
478
|
+
def cache_clear(self) -> None:
|
|
479
|
+
"""
|
|
480
|
+
Clear the cache.
|
|
481
|
+
"""
|
|
482
|
+
self._cache.clear()
|
|
483
|
+
|
|
484
|
+
def cache_close(self) -> None:
|
|
485
|
+
"""
|
|
486
|
+
Close all cached datasets.
|
|
487
|
+
"""
|
|
488
|
+
for value in self._cache.values():
|
|
489
|
+
value.close()
|
|
490
|
+
|
|
491
|
+
def cache_reset(self, maxsize: int) -> None:
|
|
492
|
+
"""
|
|
493
|
+
Reset the cache with the specified maximum size.
|
|
494
|
+
"""
|
|
495
|
+
self._cache.clear()
|
|
496
|
+
self._cache = LRUCache(maxsize=maxsize)
|
|
497
|
+
|
|
498
|
+
def lookup_filenames(self, /, **kwargs) -> list[str]:
|
|
499
|
+
"""
|
|
500
|
+
Look up a filename in the index table from the coordinate values passed
|
|
501
|
+
as keyword arguments.
|
|
502
|
+
|
|
503
|
+
Parameters
|
|
504
|
+
----------
|
|
505
|
+
wl : quantity or array-like, optional
|
|
506
|
+
Wavelength (scalar or array, quantity or unitless). If passed as a
|
|
507
|
+
unitless value, it is interpreted using the units of the wavelength
|
|
508
|
+
chunk bounds.
|
|
509
|
+
|
|
510
|
+
wn : quantity or array-like, optional
|
|
511
|
+
Wavenumber (scalar or array, quantity or unitless). If passed as a
|
|
512
|
+
unitless value, it is interpreted using the units of the wavenumber
|
|
513
|
+
chunk bounds.
|
|
514
|
+
|
|
515
|
+
Returns
|
|
516
|
+
-------
|
|
517
|
+
filenames : list of str
|
|
518
|
+
Names of the successfully looked up files, relative to the database
|
|
519
|
+
root directory.
|
|
520
|
+
|
|
521
|
+
Raises
|
|
522
|
+
------
|
|
523
|
+
ValueError
|
|
524
|
+
If the requested spectral coordinate is out of bounds.
|
|
525
|
+
|
|
526
|
+
Notes
|
|
527
|
+
-----
|
|
528
|
+
Depending on the specified keyword argument (``wl`` or ``wn``), the
|
|
529
|
+
lookup will be performed in wavelength or wavenumber mode. Both are
|
|
530
|
+
equivalent.
|
|
531
|
+
"""
|
|
532
|
+
if len(kwargs) != 1:
|
|
533
|
+
raise ValueError(
|
|
534
|
+
"only one of the 'wl' and 'wn' keyword arguments is allowed"
|
|
535
|
+
)
|
|
536
|
+
lookup_mode, values = next(iter(kwargs.items()))
|
|
537
|
+
chunks = self._chunks[lookup_mode]
|
|
538
|
+
|
|
539
|
+
# Make sure that 'values' has the right units
|
|
540
|
+
values = ensure_units(np.atleast_1d(values), chunks.units)
|
|
541
|
+
|
|
542
|
+
# Perform bound check
|
|
543
|
+
out_bound = (values < chunks.min()) | (values > chunks.max())
|
|
544
|
+
if np.any(out_bound):
|
|
545
|
+
# TODO: handle this error better?
|
|
546
|
+
raise ValueError("out-of-bound spectral coordinate value")
|
|
547
|
+
|
|
548
|
+
indexes = np.digitize(values.m_as(chunks.units), bins=chunks.magnitude) - 1
|
|
549
|
+
return list(self._index["filename"].iloc[indexes])
|
|
550
|
+
|
|
551
|
+
def lookup_datasets(self, /, **kwargs) -> list[xr.Dataset]:
|
|
552
|
+
"""
|
|
553
|
+
Perform a dataset lookup based on the requested spectral coordinate.
|
|
554
|
+
See :meth:`lookup_filenames` for the accepted arguments.
|
|
555
|
+
"""
|
|
556
|
+
filenames = self.lookup_filenames(**kwargs)
|
|
557
|
+
return [self.load_dataset(filename) for filename in filenames]
|
|
558
|
+
|
|
559
|
+
def eval_sigma_a_mono(
|
|
560
|
+
self,
|
|
561
|
+
w: pint.Quantity,
|
|
562
|
+
thermoprops: xr.Dataset,
|
|
563
|
+
error_handling_config: ErrorHandlingConfiguration | None = None,
|
|
564
|
+
) -> xr.DataArray:
|
|
565
|
+
"""
|
|
566
|
+
Compute the absorption coefficient given spectral coordinates and a
|
|
567
|
+
thermophysical profile (mono variant). The default implementation
|
|
568
|
+
raises.
|
|
569
|
+
|
|
570
|
+
Parameters
|
|
571
|
+
----------
|
|
572
|
+
w : quantity
|
|
573
|
+
The wavelength for which the absorption coefficient is evaluated.
|
|
574
|
+
|
|
575
|
+
thermoprops : Dataset
|
|
576
|
+
The thermophysical profile for which the absorption coefficient is
|
|
577
|
+
evaluated.
|
|
578
|
+
|
|
579
|
+
error_handling_config : ErrorHandlingConfiguration, optional
|
|
580
|
+
The error handling policy applied if coordinates are missing, do not
|
|
581
|
+
have the appropriate dimension or are out of the dataset's bounds.
|
|
582
|
+
If set, this overrides the configuration set in
|
|
583
|
+
:data:`error_handling_config`.
|
|
584
|
+
|
|
585
|
+
Returns
|
|
586
|
+
-------
|
|
587
|
+
~xarray.DataArray
|
|
588
|
+
A data array containing the evaluated absorption coefficient as a
|
|
589
|
+
function of the spectral coordinate and altitude.
|
|
590
|
+
"""
|
|
591
|
+
raise NotImplementedError
|
|
592
|
+
|
|
593
|
+
def eval_sigma_a_ckd(
|
|
594
|
+
self,
|
|
595
|
+
w: pint.Quantity,
|
|
596
|
+
g: float,
|
|
597
|
+
thermoprops: xr.Dataset,
|
|
598
|
+
error_handling_config: ErrorHandlingConfiguration | None = None,
|
|
599
|
+
) -> xr.DataArray:
|
|
600
|
+
"""
|
|
601
|
+
Compute the absorption coefficient given spectral coordinates and a
|
|
602
|
+
thermophysical profile (CKD variant). The default implementation raises.
|
|
603
|
+
|
|
604
|
+
Parameters
|
|
605
|
+
----------
|
|
606
|
+
w : quantity
|
|
607
|
+
The wavelength for which the absorption coefficient is evaluated.
|
|
608
|
+
|
|
609
|
+
g : float
|
|
610
|
+
The g-point for which the absorption coefficient is evaluated.
|
|
611
|
+
|
|
612
|
+
thermoprops : Dataset
|
|
613
|
+
The thermophysical profile for which the absorption coefficient is
|
|
614
|
+
evaluated.
|
|
615
|
+
|
|
616
|
+
error_handling_config : ErrorHandlingConfiguration, optional
|
|
617
|
+
The error handling policy applied if coordinates are missing, do not
|
|
618
|
+
have the appropriate dimension or are out of the dataset's bounds.
|
|
619
|
+
If set, this overrides the configuration set in
|
|
620
|
+
:data:`error_handling_config`.
|
|
621
|
+
|
|
622
|
+
Returns
|
|
623
|
+
-------
|
|
624
|
+
~xarray.DataArray
|
|
625
|
+
A data array containing the evaluated absorption coefficient as a
|
|
626
|
+
function of the spectral coordinate and altitude.
|
|
627
|
+
"""
|
|
628
|
+
raise NotImplementedError
|
|
629
|
+
|
|
630
|
+
@staticmethod
|
|
631
|
+
def _interp_thermophysical(
|
|
632
|
+
ds: xr.Dataset,
|
|
633
|
+
da: xr.DataArray,
|
|
634
|
+
thermoprops: xr.Dataset,
|
|
635
|
+
error_handling_config: ErrorHandlingConfiguration,
|
|
636
|
+
) -> tuple[xr.DataArray, list[Hashable]]:
|
|
637
|
+
# Interpolate on temperature
|
|
638
|
+
bounds_error = error_handling_config.t.bounds is ErrorHandlingAction.RAISE
|
|
639
|
+
fill_value = None if bounds_error else 0.0 # TODO: use 2-element tuple?
|
|
640
|
+
result = da.interp(
|
|
641
|
+
t=thermoprops["t"],
|
|
642
|
+
kwargs={"bounds_error": bounds_error, "fill_value": fill_value},
|
|
643
|
+
)
|
|
644
|
+
|
|
645
|
+
# Interpolate on pressure
|
|
646
|
+
bounds_error = error_handling_config.p.bounds is ErrorHandlingAction.RAISE
|
|
647
|
+
fill_value = None if bounds_error else 0.0 # TODO: use 2-element tuple?
|
|
648
|
+
result = result.interp(
|
|
649
|
+
p=thermoprops["p"],
|
|
650
|
+
kwargs={"bounds_error": bounds_error, "fill_value": fill_value},
|
|
651
|
+
)
|
|
652
|
+
|
|
653
|
+
# Interpolate on concentrations
|
|
654
|
+
|
|
655
|
+
# -- List requested species concentrations
|
|
656
|
+
x_ds = [coord for coord in ds.coords if coord.startswith("x_")]
|
|
657
|
+
x_ds_scalar = [coord for coord in x_ds if ds[coord].size == 1]
|
|
658
|
+
x_ds_array = set(x_ds) - set(x_ds_scalar)
|
|
659
|
+
|
|
660
|
+
x_thermoprops = [dv for dv in thermoprops.data_vars if dv.startswith("x_")]
|
|
661
|
+
x_missing = set(x_ds_array) - set(x_thermoprops)
|
|
662
|
+
x_ds_array = x_ds_array - x_missing
|
|
663
|
+
|
|
664
|
+
# -- Select on scalar coordinates
|
|
665
|
+
result = result.isel(**{x: 0 for x in x_ds_scalar + list(x_missing)})
|
|
666
|
+
|
|
667
|
+
# -- Interpolate on array coordinates
|
|
668
|
+
bounds_error = error_handling_config.x.bounds is ErrorHandlingAction.RAISE
|
|
669
|
+
fill_value = None if bounds_error else 0.0 # TODO: use 2-element tuple?
|
|
670
|
+
result = result.interp(
|
|
671
|
+
thermoprops[x_ds_array],
|
|
672
|
+
kwargs={"bounds_error": bounds_error, "fill_value": fill_value},
|
|
673
|
+
)
|
|
674
|
+
|
|
675
|
+
return result, x_ds
|
|
676
|
+
|
|
677
|
+
|
|
678
|
+
@attrs.define(repr=False, eq=False)
|
|
679
|
+
class MonoAbsorptionDatabase(AbsorptionDatabase):
|
|
680
|
+
"""
|
|
681
|
+
Absorption coefficient database (monochromatic variant).
|
|
682
|
+
"""
|
|
683
|
+
|
|
684
|
+
@staticmethod
|
|
685
|
+
def _make_index(filenames) -> pd.DataFrame:
|
|
686
|
+
headers = [
|
|
687
|
+
"filename",
|
|
688
|
+
"wn_min [cm^-1]",
|
|
689
|
+
"wn_max [cm^-1]",
|
|
690
|
+
"wl_min [nm]",
|
|
691
|
+
"wl_max [nm]",
|
|
692
|
+
]
|
|
693
|
+
rows = []
|
|
694
|
+
|
|
695
|
+
for filename in filenames:
|
|
696
|
+
filename = Path(filename)
|
|
697
|
+
with xr.open_dataset(filename) as ds:
|
|
698
|
+
w_u = ureg(ds["w"].units)
|
|
699
|
+
|
|
700
|
+
if w_u.check("[length]^-1"): # wavenumber mode
|
|
701
|
+
wn_min = float(ds["w"].min()) * w_u
|
|
702
|
+
wn_max = float(ds["w"].max()) * w_u
|
|
703
|
+
wl_min = 1.0 / wn_max
|
|
704
|
+
wl_max = 1.0 / wn_min
|
|
705
|
+
elif w_u.check("[length]"): # wavelength mode
|
|
706
|
+
wl_min = float(ds["w"].min()) * w_u
|
|
707
|
+
wl_max = float(ds["w"].max()) * w_u
|
|
708
|
+
wn_min = 1.0 / wl_max
|
|
709
|
+
wn_max = 1.0 / wl_min
|
|
710
|
+
else:
|
|
711
|
+
raise ValueError(f"Cannot interpret units '{w_u}'")
|
|
712
|
+
|
|
713
|
+
rows.append(
|
|
714
|
+
[
|
|
715
|
+
filename.name,
|
|
716
|
+
wn_min.m_as("1/cm"),
|
|
717
|
+
wn_max.m_as("1/cm"),
|
|
718
|
+
wl_min.m_as("nm"),
|
|
719
|
+
wl_max.m_as("nm"),
|
|
720
|
+
]
|
|
721
|
+
)
|
|
722
|
+
|
|
723
|
+
return pd.DataFrame(rows, columns=headers).sort_values("wl_min [nm]")
|
|
724
|
+
|
|
725
|
+
@classmethod
|
|
726
|
+
def from_dict(cls, value: dict) -> MonoAbsorptionDatabase:
|
|
727
|
+
# Inherit docstring
|
|
728
|
+
value = value.copy()
|
|
729
|
+
constructor = getattr(cls, value.pop("construct"))
|
|
730
|
+
return constructor(**value)
|
|
731
|
+
|
|
732
|
+
def eval_sigma_a_mono(
|
|
733
|
+
self,
|
|
734
|
+
w: pint.Quantity,
|
|
735
|
+
thermoprops: xr.Dataset,
|
|
736
|
+
error_handling_config: ErrorHandlingConfiguration | None = None,
|
|
737
|
+
) -> xr.DataArray:
|
|
738
|
+
# Inherit docstring
|
|
739
|
+
|
|
740
|
+
if error_handling_config is None:
|
|
741
|
+
error_handling_config = self.error_handling_config
|
|
742
|
+
|
|
743
|
+
# Lookup dataset
|
|
744
|
+
ds = self.lookup_datasets(wl=w)[0]
|
|
745
|
+
|
|
746
|
+
# Interpolate on spectral dimension
|
|
747
|
+
# TODO: Optimize
|
|
748
|
+
w_u = ureg(ds["w"].units)
|
|
749
|
+
# Note: Support for wavenumber spectral lookup mode is suboptimal
|
|
750
|
+
w_m = (1.0 / w).m_as(w_u) if w_u.check("[length]^-1") else w.m_as(w_u)
|
|
751
|
+
result = ds["sigma_a"].interp(w=w_m, method="linear")
|
|
752
|
+
|
|
753
|
+
# Interpolate on thermophysical dimensions
|
|
754
|
+
result, x_ds = self._interp_thermophysical(
|
|
755
|
+
ds, result, thermoprops, error_handling_config
|
|
756
|
+
)
|
|
757
|
+
|
|
758
|
+
# Drop thermophysical coordinates, ensure spectral dimension
|
|
759
|
+
result = result.drop_vars(["p", "t", *x_ds])
|
|
760
|
+
if "w" not in result.dims:
|
|
761
|
+
result = result.expand_dims("w")
|
|
762
|
+
|
|
763
|
+
return result.transpose("w", "z")
|
|
764
|
+
|
|
765
|
+
|
|
766
|
+
@attrs.define(repr=False, eq=False)
|
|
767
|
+
class CKDAbsorptionDatabase(AbsorptionDatabase):
|
|
768
|
+
"""
|
|
769
|
+
Absorption coefficient database (CKD variant).
|
|
770
|
+
"""
|
|
771
|
+
|
|
772
|
+
@staticmethod
|
|
773
|
+
def _make_index(filenames) -> pd.DataFrame:
|
|
774
|
+
headers = [
|
|
775
|
+
"filename",
|
|
776
|
+
"wn_min [cm^-1]",
|
|
777
|
+
"wn_max [cm^-1]",
|
|
778
|
+
"wl_min [nm]",
|
|
779
|
+
"wl_max [nm]",
|
|
780
|
+
]
|
|
781
|
+
rows = []
|
|
782
|
+
|
|
783
|
+
for filename in filenames:
|
|
784
|
+
filename = Path(filename)
|
|
785
|
+
with xr.open_dataset(filename) as ds:
|
|
786
|
+
w_u = ureg(ds["w"].units)
|
|
787
|
+
|
|
788
|
+
if w_u.check("[length]^-1"): # wavenumber mode
|
|
789
|
+
wn_min = float(ds["wbounds"].sel(wbv="lower").min()) * w_u
|
|
790
|
+
wn_max = float(ds["wbounds"].sel(wbv="upper").max()) * w_u
|
|
791
|
+
wl_min = 1.0 / wn_max
|
|
792
|
+
wl_max = 1.0 / wn_min
|
|
793
|
+
elif w_u.check("[length]"): # wavelength mode
|
|
794
|
+
wl_min = float(ds["wbounds"].sel(wbv="lower").min()) * w_u
|
|
795
|
+
wl_max = float(ds["wbounds"].sel(wbv="upper").max()) * w_u
|
|
796
|
+
wn_min = 1.0 / wl_max
|
|
797
|
+
wn_max = 1.0 / wl_min
|
|
798
|
+
else:
|
|
799
|
+
raise ValueError(f"Cannot interpret units '{w_u}'")
|
|
800
|
+
|
|
801
|
+
rows.append(
|
|
802
|
+
[
|
|
803
|
+
filename.name,
|
|
804
|
+
wn_min.m_as("1/cm"),
|
|
805
|
+
wn_max.m_as("1/cm"),
|
|
806
|
+
wl_min.m_as("nm"),
|
|
807
|
+
wl_max.m_as("nm"),
|
|
808
|
+
]
|
|
809
|
+
)
|
|
810
|
+
|
|
811
|
+
return pd.DataFrame(rows, columns=headers).sort_values("wl_min [nm]")
|
|
812
|
+
|
|
813
|
+
@classmethod
|
|
814
|
+
def from_dict(cls, value: dict) -> CKDAbsorptionDatabase:
|
|
815
|
+
# Inherit docstring
|
|
816
|
+
value = value.copy()
|
|
817
|
+
constructor = getattr(cls, value.pop("construct"))
|
|
818
|
+
return constructor(**value)
|
|
819
|
+
|
|
820
|
+
def eval_sigma_a_ckd(
|
|
821
|
+
self,
|
|
822
|
+
w: pint.Quantity,
|
|
823
|
+
g: float,
|
|
824
|
+
thermoprops: xr.Dataset,
|
|
825
|
+
error_handling_config: ErrorHandlingConfiguration | None = None,
|
|
826
|
+
) -> xr.DataArray:
|
|
827
|
+
# Inherit docstring
|
|
828
|
+
|
|
829
|
+
# TODO: Implement new bounds error handling policy. This policy is as
|
|
830
|
+
# follows:
|
|
831
|
+
# * Interpolation is done for an altitude range such that the pressure
|
|
832
|
+
# is higher than the lower bound of the pressure variable in the
|
|
833
|
+
# CKD table. This is implemented at a higher level (not here).
|
|
834
|
+
# * The default bound error handling policy for the pressure and
|
|
835
|
+
# temperature variables is 'extrapolate'.
|
|
836
|
+
# * Above the cut-off altitude, the profile is filled with zeros.
|
|
837
|
+
# Cut-off detection is implemented with pressure-based masking.
|
|
838
|
+
|
|
839
|
+
# TODO: Use the 'assume_sorted' parameter of DataArray.interp()
|
|
840
|
+
|
|
841
|
+
if error_handling_config is None:
|
|
842
|
+
error_handling_config = self.error_handling_config
|
|
843
|
+
|
|
844
|
+
# Lookup dataset
|
|
845
|
+
ds = self.lookup_datasets(wl=w)[0]
|
|
846
|
+
|
|
847
|
+
# Select bin
|
|
848
|
+
# TODO: Optimize
|
|
849
|
+
w_u = ureg(ds["w"].units)
|
|
850
|
+
w_m = w.m_as(w_u)
|
|
851
|
+
result = ds["sigma_a"].sel(w=w_m, method="nearest")
|
|
852
|
+
|
|
853
|
+
# Interpolate along g
|
|
854
|
+
result = result.interp(g=g).drop_vars("g")
|
|
855
|
+
|
|
856
|
+
# Interpolate on thermophysical dimensions
|
|
857
|
+
result, x_ds = self._interp_thermophysical(
|
|
858
|
+
ds, result, thermoprops, error_handling_config
|
|
859
|
+
)
|
|
860
|
+
|
|
861
|
+
# Drop thermophysical coordinates, ensure spectral dimension
|
|
862
|
+
result = result.drop_vars(["p", "t", *x_ds])
|
|
863
|
+
if "w" not in result.dims:
|
|
864
|
+
result = result.expand_dims("w")
|
|
865
|
+
|
|
866
|
+
return result.transpose("w", "z")
|
|
867
|
+
|
|
868
|
+
|
|
869
|
+
def get_absdb_type(mode: Literal["mono", "ckd"]) -> type:
|
|
870
|
+
"""
|
|
871
|
+
Get the :class:`.AbsorptionDatabase` subtype that corresponds to the mode
|
|
872
|
+
passed mode key.
|
|
873
|
+
|
|
874
|
+
Parameters
|
|
875
|
+
----------
|
|
876
|
+
mode : {"mono", "ckd"}
|
|
877
|
+
Mode key.
|
|
878
|
+
|
|
879
|
+
Returns
|
|
880
|
+
-------
|
|
881
|
+
type
|
|
882
|
+
|
|
883
|
+
Raises
|
|
884
|
+
------
|
|
885
|
+
ValueError
|
|
886
|
+
If ``mode`` value is unsupported.
|
|
887
|
+
"""
|
|
888
|
+
if mode == "mono":
|
|
889
|
+
return MonoAbsorptionDatabase
|
|
890
|
+
elif mode == "ckd":
|
|
891
|
+
return CKDAbsorptionDatabase
|
|
892
|
+
else:
|
|
893
|
+
raise ValueError(f"unsupported mode {mode!r}")
|