axsdb 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
axsdb/core.py ADDED
@@ -0,0 +1,893 @@
1
+ from __future__ import annotations
2
+
3
+ import errno
4
+ import glob
5
+ import json
6
+ import logging
7
+ import os
8
+ import re
9
+ import textwrap
10
+ from pathlib import Path
11
+ from typing import Any, Callable, Hashable, Literal
12
+
13
+ import attrs
14
+ import numpy as np
15
+ import pandas as pd
16
+ import pint
17
+ import xarray as xr
18
+ from cachetools import LRUCache, cachedmethod
19
+
20
+ from .error import (
21
+ DataError,
22
+ ErrorHandlingAction,
23
+ ErrorHandlingConfiguration,
24
+ get_error_handling_config,
25
+ )
26
+ from .typing import PathLike
27
+ from .units import ensure_units, ureg, xarray_to_quantity
28
+
29
+ logger = logging.getLogger("axsdb")
30
+
31
+
32
+ @attrs.define(repr=False, eq=False)
33
+ class AbsorptionDatabase:
34
+ """
35
+ Common parent type for absorption coefficient databases.
36
+
37
+ This class implements most of the data indexing logic common to all
38
+ absorption coefficient databases.
39
+ A database is composed of a set of NetCDF files compliant with the
40
+ absorption coefficient database format specification and placed in the
41
+ same directory. A database instance is initialized by specifying the path
42
+ to the directory where the files are stored.
43
+
44
+ If it exists, a ``metadata.json`` file is loaded into the :attr:`metadata`
45
+ attribute.
46
+
47
+ Databases are usually not initialized using the constructor, but rather
48
+ using the class method constructors :meth:`from_directory` and
49
+ :meth:`from_dict`.
50
+
51
+ Parameters
52
+ ----------
53
+ dir_path : path-like
54
+ Path to database root directory.
55
+
56
+ index : DataFrame
57
+ File index, assumed sorted by ascending wavelengths.
58
+
59
+ spectral_coverage : DataFrame
60
+ Dataframe that unrolls the spectral information contained in all data
61
+ files in the database.
62
+
63
+ metadata : dict, optional
64
+ Dictionary that contains the database metadata.
65
+
66
+ cache : cachetools.LRUCache, optional
67
+ A mapping that implements an LRU caching policy.
68
+
69
+ error_handling_config : ErrorHandlingConfiguration, optional
70
+ Default error handling policy. If unset, a global default is used.
71
+
72
+ Notes
73
+ -----
74
+ A file index, stored as the :attr:`_index` private attribute, associates
75
+ to each file the spectral region it covers. The index is preferably loaded
76
+ from a CSV file that contains all this information; if it is not found, the
77
+ table is built upon database initialization and saved to the database
78
+ directory. The indexing step requires to access all files and may take a
79
+ while. The file index table is used during queries to select efficiently the
80
+ file where data will be read. For convenience, information about bounds
81
+ contained in the index is assembled into a spectral mesh suitable for
82
+ query using :func:`numpy.digitize` and stored in the :attr:`_chunks`
83
+ dictionary.
84
+
85
+ A spectral coverage table, stored as the :attr:`_spectral_coverage` private
86
+ attribute, merges the spectral coordinates of all files into a consistent
87
+ index. This table is used to provide spectral coverage information to
88
+ higher-level components that drive the simulation. Table contents are
89
+ preferably loaded from a CSV file; if it is not found, the table is build
90
+ upon database initialization and saved to the database directory. This
91
+ indexing step also requires to access all files and may take a while.
92
+
93
+ Database access and memory usage can be controlled through two parameters:
94
+
95
+ * File queries are stored in an LRU cache. The initial size is set to a low
96
+ value (8) and should be appropriate for most situations. If more cache
97
+ control is needed, the :meth:`cache_clear`, :meth:`cache_close` and
98
+ :meth:`cache_reset` methods can be used.
99
+ * Datasets can be open with an eager or lazy approach. This behaviour is
100
+ controlled using the ``lazy`` constructor parameter. In eager mode, the
101
+ entire file used for a query is loaded into memory. This can bring
102
+ significant access overhead when using large files. If desired, datasets
103
+ can instead be open lazily, triggering disk access only for the specific
104
+ data that are used.
105
+ """
106
+
107
+ #: Path to database root directory.
108
+ _dir_path: Path = attrs.field(converter=lambda x: Path(x).absolute().resolve())
109
+
110
+ @_dir_path.validator
111
+ def _dir_path_validator(self, attribute, value):
112
+ if not value.is_dir():
113
+ raise ValueError(
114
+ f"while validating '{attribute.name}': path '{value}' is not a "
115
+ "directory"
116
+ )
117
+
118
+ #: File index, assumed sorted by ascending wavelengths.
119
+ _index: pd.DataFrame = attrs.field(repr=False)
120
+
121
+ @_index.validator
122
+ def _index_validator(self, attribute, value):
123
+ if value.empty:
124
+ raise ValueError(f"while validating '{attribute.name}': index is empty")
125
+
126
+ wavelengths = value["wl_min [nm]"].values
127
+ if not np.all(wavelengths[:-1] < wavelengths[1:]):
128
+ raise ValueError(
129
+ f"while validating '{attribute.name}': index must be sorted by "
130
+ "ascending wavelength values"
131
+ )
132
+
133
+ #: Dataframe that unrolls the spectral information contained in all data
134
+ #: files in the database.
135
+ _spectral_coverage: pd.DataFrame = (attrs.field(repr=False),)
136
+
137
+ #: Dictionary that contains the database metadata.
138
+ _metadata: dict = attrs.field(factory=dict, repr=False)
139
+
140
+ #: Dictionary mapping spectral lookup mode keys ('wl' or 'wn') to arrays
141
+ #: containing the nodes of the spectral chunk mesh, which is used to perform
142
+ #: spectral coordinate-based file lookup.
143
+ _chunks: dict[str, np.ndarray] = attrs.field(factory=dict, repr=False, init=False)
144
+
145
+ #: Access mode switch: if ``True``, load data lazily; else, load data eagerly.
146
+ lazy: bool = attrs.field(default=False, repr=False)
147
+
148
+ #: A mapping that implements an LRU caching policy.
149
+ _cache: LRUCache = attrs.field(factory=lambda: LRUCache(8), repr=False)
150
+
151
+ #: Default error handling policy. If unset, the global default is used.
152
+ _error_handling_config: ErrorHandlingConfiguration | None = attrs.field(
153
+ default=None
154
+ )
155
+
156
+ @property
157
+ def error_handling_config(self) -> ErrorHandlingConfiguration:
158
+ """
159
+ Default error handling policy. If unset, the global default is used.
160
+ """
161
+ return (
162
+ self._error_handling_config
163
+ if self._error_handling_config is not None
164
+ else get_error_handling_config()
165
+ )
166
+
167
+ @error_handling_config.setter
168
+ def error_handling_config(self, value: Any) -> None:
169
+ try:
170
+ self._error_handling_config = (
171
+ None if value is None else ErrorHandlingConfiguration.convert(value)
172
+ )
173
+ except Exception as e:
174
+ raise ValueError(
175
+ "value cannot be converted to an ErrorHandlingConfiguration"
176
+ ) from e
177
+
178
+ def __attrs_post_init__(self):
179
+ # Parse field names and units
180
+ regex = re.compile(r"(?P<coord>.*)\_(?P<minmax>min|max) \[(?P<units>.*)\]")
181
+ quantities = {}
182
+ for colname in self._index.columns:
183
+ if colname == "filename":
184
+ continue
185
+
186
+ m = regex.match(colname)
187
+ units = m.group("units")
188
+ magnitude = self._index[colname].values
189
+ quantities[f"{m.group('coord')}_{m.group('minmax')}"] = ureg.Quantity(
190
+ magnitude, units
191
+ )
192
+
193
+ # Populate spectral mesh (nodes) for both wavelength and wavenumber
194
+ # lookup modes
195
+ self._chunks["wl"] = np.concatenate(
196
+ (quantities["wl_min"], [quantities["wl_max"][-1]])
197
+ )
198
+ self._chunks["wn"] = np.concatenate(
199
+ (quantities["wn_max"], [quantities["wn_min"][-1]])
200
+ )
201
+
202
+ def __repr__(self) -> str:
203
+ with pd.option_context("display.max_columns", 4):
204
+ result = (
205
+ f"<{type(self).__name__}> {self._dir_path}\n"
206
+ f"Access mode: {'lazy' if self.lazy else 'eager'}\n"
207
+ "Index:\n"
208
+ f"{textwrap.indent(repr(self._index), ' ')}"
209
+ )
210
+ return result
211
+
212
+ @staticmethod
213
+ def _make_index(filenames: list[PathLike]) -> pd.DataFrame:
214
+ # Implementation is concrete class-specific
215
+ raise NotImplementedError
216
+
217
+ @staticmethod
218
+ def _make_spectral_coverage(filenames: list[PathLike]) -> pd.DataFrame:
219
+ with xr.open_dataset(filenames[0]) as ds:
220
+ dims = set(ds.dims)
221
+ db_type = None
222
+ if "w" in dims:
223
+ db_type = "mono"
224
+ if "g" in dims:
225
+ db_type = "ckd"
226
+
227
+ if db_type is None:
228
+ raise ValueError
229
+
230
+ wavenumber_spectral_lookup_mode = ureg(ds["w"].units).check("[length]^-1")
231
+
232
+ index = []
233
+ headers = ["wbound_lower [nm]", "wbound_upper [nm]"]
234
+ rows = None
235
+
236
+ for filename in filenames:
237
+ filename = Path(filename)
238
+ with xr.open_dataset(filename) as ds:
239
+ w = xarray_to_quantity(ds["w"])
240
+
241
+ if wavenumber_spectral_lookup_mode: # Convert to wavelength
242
+ w = 1.0 / w
243
+ w = w.m_as("nm")
244
+
245
+ if db_type == "mono":
246
+ wbounds_lower = np.full((len(w),), np.nan)
247
+ wbounds_upper = np.full((len(w),), np.nan)
248
+ else:
249
+ wbounds_lower = xarray_to_quantity(ds["wbounds"].sel(wbv="lower"))
250
+ wbounds_upper = xarray_to_quantity(ds["wbounds"].sel(wbv="upper"))
251
+ if wavenumber_spectral_lookup_mode: # Convert to wavelength
252
+ wbounds_lower = 1.0 / wbounds_lower
253
+ wbounds_upper = 1.0 / wbounds_upper
254
+ wbounds_lower = wbounds_lower.m_as("nm")
255
+ wbounds_upper = wbounds_upper.m_as("nm")
256
+
257
+ index.extend([(filename.name, x) for x in w])
258
+
259
+ if rows is None:
260
+ rows = np.stack((wbounds_lower, wbounds_upper), axis=1)
261
+ else:
262
+ rows = np.concatenate(
263
+ (
264
+ rows,
265
+ np.stack((wbounds_lower, wbounds_upper), axis=1),
266
+ ),
267
+ axis=0,
268
+ )
269
+
270
+ index = pd.MultiIndex.from_tuples(index, names=["filename", "wavelength [nm]"])
271
+ # Sort index by wavelength
272
+ result = pd.DataFrame(rows, index=index, columns=headers).sort_index(level=1)
273
+ return result
274
+
275
+ @classmethod
276
+ def from_directory(
277
+ cls, dir_path: PathLike, lazy: bool = False, fix: bool = True
278
+ ) -> AbsorptionDatabase:
279
+ """
280
+ Initialize a CKD database from a directory that contains one or several
281
+ datasets.
282
+
283
+ Parameters
284
+ ----------
285
+ dir_path : path-like
286
+ Path where the CKD database is located.
287
+
288
+ lazy : bool, default: False
289
+ Access mode switch: if True, load data lazily; else, load data
290
+ eagerly.
291
+
292
+ fix : bool, default: True
293
+ If ``True``, attempt generating missing index files upon
294
+ initialization. Otherwise, raise if they are missing.
295
+
296
+ Returns
297
+ -------
298
+ AbsorptionDatabase
299
+
300
+ Raises
301
+ ------
302
+ FileNotFoundError
303
+ If an index file is missing and ``fix`` is ``False``.
304
+ """
305
+ dir_path = Path(dir_path).resolve()
306
+
307
+ try:
308
+ with open(os.path.join(dir_path, "metadata.json")) as f:
309
+ metadata = json.load(f)
310
+ except FileNotFoundError:
311
+ metadata = {}
312
+
313
+ filenames = glob.glob(os.path.join(dir_path, "*.nc"))
314
+
315
+ def load_index(
316
+ index_filename: PathLike,
317
+ read_csv: Callable[[Path], pd.DataFrame],
318
+ make_index: Callable[[list[PathLike]], pd.DataFrame],
319
+ to_csv: Callable[[pd.DataFrame, Path], None],
320
+ ):
321
+ if index_filename.is_file():
322
+ try:
323
+ df = read_csv(index_filename)
324
+ except pd.errors.EmptyDataError as e:
325
+ raise DataError(
326
+ f"Error loading index file '{index_filename}'"
327
+ ) from e
328
+
329
+ elif fix:
330
+ logger.warning(
331
+ f"Could not find index file '{index_filename}', building it"
332
+ )
333
+ df = make_index(filenames)
334
+ to_csv(df, index_filename)
335
+
336
+ else:
337
+ logger.critical(f"Could not find index file '{index_filename}'")
338
+ raise FileNotFoundError(
339
+ errno.ENOENT, "Missing index file", index_filename
340
+ )
341
+
342
+ if df.empty:
343
+ raise DataError(f"Index loaded from '{index_filename}' is empty")
344
+
345
+ return df
346
+
347
+ index_path = dir_path / "index.csv"
348
+ logger.debug(f"Loading index from '{index_path}'")
349
+ index = load_index(
350
+ index_filename=index_path,
351
+ read_csv=pd.read_csv,
352
+ make_index=cls._make_index,
353
+ to_csv=lambda df, filename: df.to_csv(filename, index=False),
354
+ )
355
+ index = index.sort_values(by="wl_min [nm]").reset_index(drop=True)
356
+
357
+ spectral_coverage_path = dir_path / "spectral.csv"
358
+ logger.debug(f"Loading spectral coverage table from '{spectral_coverage_path}'")
359
+ spectral_coverage = load_index(
360
+ index_filename=spectral_coverage_path,
361
+ read_csv=lambda df: pd.read_csv(df, index_col=(0, 1)),
362
+ make_index=cls._make_spectral_coverage,
363
+ to_csv=lambda df, filename: df.to_csv(filename),
364
+ )
365
+
366
+ return cls(dir_path, index, spectral_coverage, metadata=metadata, lazy=lazy)
367
+
368
+ @classmethod
369
+ def from_dict(cls, value: dict) -> AbsorptionDatabase:
370
+ """
371
+ Construct from a dictionary. The dictionary has a required entry ``"construct"``
372
+ that specifies the constructor that will be used to instantiate the
373
+ database. Additional entries are keyword arguments passed to the selected
374
+ constructor.
375
+
376
+ Parameters
377
+ ----------
378
+ value : dict
379
+ Converted value.
380
+
381
+ Returns
382
+ -------
383
+ AbsorptionDatabase
384
+ """
385
+
386
+ raise NotImplementedError
387
+
388
+ @staticmethod
389
+ def convert(value: Any, mode: Literal["mono", "ckd"]) -> AbsorptionDatabase:
390
+ """
391
+ Attempt conversion of a value to an absorption database.
392
+
393
+ Parameters
394
+ ----------
395
+ value
396
+ The value for which conversion is attempted.
397
+
398
+ mode : {"mono", "ckd"}
399
+ Mode router to the desired database type.
400
+
401
+ Returns
402
+ -------
403
+ MonoAbsorptionDatabase or CKDAbsorptionDatabase
404
+
405
+ Notes
406
+ -----
407
+ Conversion rules are as follows:
408
+
409
+ * If ``value`` is a string or a path, try converting using the
410
+ :meth:`.from_directory` constructor. The returned type is consistent
411
+ with the active mode.
412
+ * If ``value`` is a dict, try converting using the :meth:`.from_dict`
413
+ constructor. The returned type is consistent with the active mode.
414
+ * Otherwise, do not convert.
415
+ """
416
+ if isinstance(value, (str, Path, dict)):
417
+ cls = get_absdb_type(mode)
418
+
419
+ if isinstance(value, (str, Path)):
420
+ return cls.from_directory(value)
421
+
422
+ if isinstance(value, dict):
423
+ return cls.from_dict(value)
424
+
425
+ return value
426
+
427
+ @property
428
+ def dir_path(self) -> Path:
429
+ """
430
+ Database root path.
431
+ """
432
+ return self._dir_path
433
+
434
+ @property
435
+ def metadata(self) -> dict:
436
+ """
437
+ Database metadata.
438
+ """
439
+ return self._metadata
440
+
441
+ @property
442
+ def spectral_coverage(self) -> pd.DataFrame:
443
+ """
444
+ Spectral coverage table.
445
+ """
446
+ return self._spectral_coverage
447
+
448
+ @cachedmethod(lambda self: self._cache)
449
+ def load_dataset(self, fname: str) -> xr.Dataset:
450
+ """
451
+ Convenience method to load a dataset. This method is decorated with
452
+ :func:`functools.lru_cache` with ``maxsize=1``, which limits the number
453
+ of reload events when repeatedly querying the same file.
454
+
455
+ The behaviour of this method is also affected by the ``lazy`` parameter:
456
+ if ``lazy`` is ``False``, files are loaded eagerly with
457
+ :func:`xarray.load_dataset`; if ``lazy`` is ``True``, files are loaded
458
+ lazily with :func:`xarray.open_dataset`.
459
+
460
+ Parameters
461
+ ----------
462
+ fname : str
463
+ Name of the file that is to be loaded.
464
+
465
+ Returns
466
+ -------
467
+ Dataset
468
+ """
469
+ path = self._dir_path / fname
470
+
471
+ if self.lazy:
472
+ logger.debug("Opening '%s'" % path)
473
+ return xr.open_dataset(path)
474
+ else:
475
+ logger.debug("Loading '%s'" % path)
476
+ return xr.load_dataset(path)
477
+
478
+ def cache_clear(self) -> None:
479
+ """
480
+ Clear the cache.
481
+ """
482
+ self._cache.clear()
483
+
484
+ def cache_close(self) -> None:
485
+ """
486
+ Close all cached datasets.
487
+ """
488
+ for value in self._cache.values():
489
+ value.close()
490
+
491
+ def cache_reset(self, maxsize: int) -> None:
492
+ """
493
+ Reset the cache with the specified maximum size.
494
+ """
495
+ self._cache.clear()
496
+ self._cache = LRUCache(maxsize=maxsize)
497
+
498
+ def lookup_filenames(self, /, **kwargs) -> list[str]:
499
+ """
500
+ Look up a filename in the index table from the coordinate values passed
501
+ as keyword arguments.
502
+
503
+ Parameters
504
+ ----------
505
+ wl : quantity or array-like, optional
506
+ Wavelength (scalar or array, quantity or unitless). If passed as a
507
+ unitless value, it is interpreted using the units of the wavelength
508
+ chunk bounds.
509
+
510
+ wn : quantity or array-like, optional
511
+ Wavenumber (scalar or array, quantity or unitless). If passed as a
512
+ unitless value, it is interpreted using the units of the wavenumber
513
+ chunk bounds.
514
+
515
+ Returns
516
+ -------
517
+ filenames : list of str
518
+ Names of the successfully looked up files, relative to the database
519
+ root directory.
520
+
521
+ Raises
522
+ ------
523
+ ValueError
524
+ If the requested spectral coordinate is out of bounds.
525
+
526
+ Notes
527
+ -----
528
+ Depending on the specified keyword argument (``wl`` or ``wn``), the
529
+ lookup will be performed in wavelength or wavenumber mode. Both are
530
+ equivalent.
531
+ """
532
+ if len(kwargs) != 1:
533
+ raise ValueError(
534
+ "only one of the 'wl' and 'wn' keyword arguments is allowed"
535
+ )
536
+ lookup_mode, values = next(iter(kwargs.items()))
537
+ chunks = self._chunks[lookup_mode]
538
+
539
+ # Make sure that 'values' has the right units
540
+ values = ensure_units(np.atleast_1d(values), chunks.units)
541
+
542
+ # Perform bound check
543
+ out_bound = (values < chunks.min()) | (values > chunks.max())
544
+ if np.any(out_bound):
545
+ # TODO: handle this error better?
546
+ raise ValueError("out-of-bound spectral coordinate value")
547
+
548
+ indexes = np.digitize(values.m_as(chunks.units), bins=chunks.magnitude) - 1
549
+ return list(self._index["filename"].iloc[indexes])
550
+
551
+ def lookup_datasets(self, /, **kwargs) -> list[xr.Dataset]:
552
+ """
553
+ Perform a dataset lookup based on the requested spectral coordinate.
554
+ See :meth:`lookup_filenames` for the accepted arguments.
555
+ """
556
+ filenames = self.lookup_filenames(**kwargs)
557
+ return [self.load_dataset(filename) for filename in filenames]
558
+
559
+ def eval_sigma_a_mono(
560
+ self,
561
+ w: pint.Quantity,
562
+ thermoprops: xr.Dataset,
563
+ error_handling_config: ErrorHandlingConfiguration | None = None,
564
+ ) -> xr.DataArray:
565
+ """
566
+ Compute the absorption coefficient given spectral coordinates and a
567
+ thermophysical profile (mono variant). The default implementation
568
+ raises.
569
+
570
+ Parameters
571
+ ----------
572
+ w : quantity
573
+ The wavelength for which the absorption coefficient is evaluated.
574
+
575
+ thermoprops : Dataset
576
+ The thermophysical profile for which the absorption coefficient is
577
+ evaluated.
578
+
579
+ error_handling_config : ErrorHandlingConfiguration, optional
580
+ The error handling policy applied if coordinates are missing, do not
581
+ have the appropriate dimension or are out of the dataset's bounds.
582
+ If set, this overrides the configuration set in
583
+ :data:`error_handling_config`.
584
+
585
+ Returns
586
+ -------
587
+ ~xarray.DataArray
588
+ A data array containing the evaluated absorption coefficient as a
589
+ function of the spectral coordinate and altitude.
590
+ """
591
+ raise NotImplementedError
592
+
593
+ def eval_sigma_a_ckd(
594
+ self,
595
+ w: pint.Quantity,
596
+ g: float,
597
+ thermoprops: xr.Dataset,
598
+ error_handling_config: ErrorHandlingConfiguration | None = None,
599
+ ) -> xr.DataArray:
600
+ """
601
+ Compute the absorption coefficient given spectral coordinates and a
602
+ thermophysical profile (CKD variant). The default implementation raises.
603
+
604
+ Parameters
605
+ ----------
606
+ w : quantity
607
+ The wavelength for which the absorption coefficient is evaluated.
608
+
609
+ g : float
610
+ The g-point for which the absorption coefficient is evaluated.
611
+
612
+ thermoprops : Dataset
613
+ The thermophysical profile for which the absorption coefficient is
614
+ evaluated.
615
+
616
+ error_handling_config : ErrorHandlingConfiguration, optional
617
+ The error handling policy applied if coordinates are missing, do not
618
+ have the appropriate dimension or are out of the dataset's bounds.
619
+ If set, this overrides the configuration set in
620
+ :data:`error_handling_config`.
621
+
622
+ Returns
623
+ -------
624
+ ~xarray.DataArray
625
+ A data array containing the evaluated absorption coefficient as a
626
+ function of the spectral coordinate and altitude.
627
+ """
628
+ raise NotImplementedError
629
+
630
+ @staticmethod
631
+ def _interp_thermophysical(
632
+ ds: xr.Dataset,
633
+ da: xr.DataArray,
634
+ thermoprops: xr.Dataset,
635
+ error_handling_config: ErrorHandlingConfiguration,
636
+ ) -> tuple[xr.DataArray, list[Hashable]]:
637
+ # Interpolate on temperature
638
+ bounds_error = error_handling_config.t.bounds is ErrorHandlingAction.RAISE
639
+ fill_value = None if bounds_error else 0.0 # TODO: use 2-element tuple?
640
+ result = da.interp(
641
+ t=thermoprops["t"],
642
+ kwargs={"bounds_error": bounds_error, "fill_value": fill_value},
643
+ )
644
+
645
+ # Interpolate on pressure
646
+ bounds_error = error_handling_config.p.bounds is ErrorHandlingAction.RAISE
647
+ fill_value = None if bounds_error else 0.0 # TODO: use 2-element tuple?
648
+ result = result.interp(
649
+ p=thermoprops["p"],
650
+ kwargs={"bounds_error": bounds_error, "fill_value": fill_value},
651
+ )
652
+
653
+ # Interpolate on concentrations
654
+
655
+ # -- List requested species concentrations
656
+ x_ds = [coord for coord in ds.coords if coord.startswith("x_")]
657
+ x_ds_scalar = [coord for coord in x_ds if ds[coord].size == 1]
658
+ x_ds_array = set(x_ds) - set(x_ds_scalar)
659
+
660
+ x_thermoprops = [dv for dv in thermoprops.data_vars if dv.startswith("x_")]
661
+ x_missing = set(x_ds_array) - set(x_thermoprops)
662
+ x_ds_array = x_ds_array - x_missing
663
+
664
+ # -- Select on scalar coordinates
665
+ result = result.isel(**{x: 0 for x in x_ds_scalar + list(x_missing)})
666
+
667
+ # -- Interpolate on array coordinates
668
+ bounds_error = error_handling_config.x.bounds is ErrorHandlingAction.RAISE
669
+ fill_value = None if bounds_error else 0.0 # TODO: use 2-element tuple?
670
+ result = result.interp(
671
+ thermoprops[x_ds_array],
672
+ kwargs={"bounds_error": bounds_error, "fill_value": fill_value},
673
+ )
674
+
675
+ return result, x_ds
676
+
677
+
678
+ @attrs.define(repr=False, eq=False)
679
+ class MonoAbsorptionDatabase(AbsorptionDatabase):
680
+ """
681
+ Absorption coefficient database (monochromatic variant).
682
+ """
683
+
684
+ @staticmethod
685
+ def _make_index(filenames) -> pd.DataFrame:
686
+ headers = [
687
+ "filename",
688
+ "wn_min [cm^-1]",
689
+ "wn_max [cm^-1]",
690
+ "wl_min [nm]",
691
+ "wl_max [nm]",
692
+ ]
693
+ rows = []
694
+
695
+ for filename in filenames:
696
+ filename = Path(filename)
697
+ with xr.open_dataset(filename) as ds:
698
+ w_u = ureg(ds["w"].units)
699
+
700
+ if w_u.check("[length]^-1"): # wavenumber mode
701
+ wn_min = float(ds["w"].min()) * w_u
702
+ wn_max = float(ds["w"].max()) * w_u
703
+ wl_min = 1.0 / wn_max
704
+ wl_max = 1.0 / wn_min
705
+ elif w_u.check("[length]"): # wavelength mode
706
+ wl_min = float(ds["w"].min()) * w_u
707
+ wl_max = float(ds["w"].max()) * w_u
708
+ wn_min = 1.0 / wl_max
709
+ wn_max = 1.0 / wl_min
710
+ else:
711
+ raise ValueError(f"Cannot interpret units '{w_u}'")
712
+
713
+ rows.append(
714
+ [
715
+ filename.name,
716
+ wn_min.m_as("1/cm"),
717
+ wn_max.m_as("1/cm"),
718
+ wl_min.m_as("nm"),
719
+ wl_max.m_as("nm"),
720
+ ]
721
+ )
722
+
723
+ return pd.DataFrame(rows, columns=headers).sort_values("wl_min [nm]")
724
+
725
+ @classmethod
726
+ def from_dict(cls, value: dict) -> MonoAbsorptionDatabase:
727
+ # Inherit docstring
728
+ value = value.copy()
729
+ constructor = getattr(cls, value.pop("construct"))
730
+ return constructor(**value)
731
+
732
+ def eval_sigma_a_mono(
733
+ self,
734
+ w: pint.Quantity,
735
+ thermoprops: xr.Dataset,
736
+ error_handling_config: ErrorHandlingConfiguration | None = None,
737
+ ) -> xr.DataArray:
738
+ # Inherit docstring
739
+
740
+ if error_handling_config is None:
741
+ error_handling_config = self.error_handling_config
742
+
743
+ # Lookup dataset
744
+ ds = self.lookup_datasets(wl=w)[0]
745
+
746
+ # Interpolate on spectral dimension
747
+ # TODO: Optimize
748
+ w_u = ureg(ds["w"].units)
749
+ # Note: Support for wavenumber spectral lookup mode is suboptimal
750
+ w_m = (1.0 / w).m_as(w_u) if w_u.check("[length]^-1") else w.m_as(w_u)
751
+ result = ds["sigma_a"].interp(w=w_m, method="linear")
752
+
753
+ # Interpolate on thermophysical dimensions
754
+ result, x_ds = self._interp_thermophysical(
755
+ ds, result, thermoprops, error_handling_config
756
+ )
757
+
758
+ # Drop thermophysical coordinates, ensure spectral dimension
759
+ result = result.drop_vars(["p", "t", *x_ds])
760
+ if "w" not in result.dims:
761
+ result = result.expand_dims("w")
762
+
763
+ return result.transpose("w", "z")
764
+
765
+
766
+ @attrs.define(repr=False, eq=False)
767
+ class CKDAbsorptionDatabase(AbsorptionDatabase):
768
+ """
769
+ Absorption coefficient database (CKD variant).
770
+ """
771
+
772
+ @staticmethod
773
+ def _make_index(filenames) -> pd.DataFrame:
774
+ headers = [
775
+ "filename",
776
+ "wn_min [cm^-1]",
777
+ "wn_max [cm^-1]",
778
+ "wl_min [nm]",
779
+ "wl_max [nm]",
780
+ ]
781
+ rows = []
782
+
783
+ for filename in filenames:
784
+ filename = Path(filename)
785
+ with xr.open_dataset(filename) as ds:
786
+ w_u = ureg(ds["w"].units)
787
+
788
+ if w_u.check("[length]^-1"): # wavenumber mode
789
+ wn_min = float(ds["wbounds"].sel(wbv="lower").min()) * w_u
790
+ wn_max = float(ds["wbounds"].sel(wbv="upper").max()) * w_u
791
+ wl_min = 1.0 / wn_max
792
+ wl_max = 1.0 / wn_min
793
+ elif w_u.check("[length]"): # wavelength mode
794
+ wl_min = float(ds["wbounds"].sel(wbv="lower").min()) * w_u
795
+ wl_max = float(ds["wbounds"].sel(wbv="upper").max()) * w_u
796
+ wn_min = 1.0 / wl_max
797
+ wn_max = 1.0 / wl_min
798
+ else:
799
+ raise ValueError(f"Cannot interpret units '{w_u}'")
800
+
801
+ rows.append(
802
+ [
803
+ filename.name,
804
+ wn_min.m_as("1/cm"),
805
+ wn_max.m_as("1/cm"),
806
+ wl_min.m_as("nm"),
807
+ wl_max.m_as("nm"),
808
+ ]
809
+ )
810
+
811
+ return pd.DataFrame(rows, columns=headers).sort_values("wl_min [nm]")
812
+
813
+ @classmethod
814
+ def from_dict(cls, value: dict) -> CKDAbsorptionDatabase:
815
+ # Inherit docstring
816
+ value = value.copy()
817
+ constructor = getattr(cls, value.pop("construct"))
818
+ return constructor(**value)
819
+
820
+ def eval_sigma_a_ckd(
821
+ self,
822
+ w: pint.Quantity,
823
+ g: float,
824
+ thermoprops: xr.Dataset,
825
+ error_handling_config: ErrorHandlingConfiguration | None = None,
826
+ ) -> xr.DataArray:
827
+ # Inherit docstring
828
+
829
+ # TODO: Implement new bounds error handling policy. This policy is as
830
+ # follows:
831
+ # * Interpolation is done for an altitude range such that the pressure
832
+ # is higher than the lower bound of the pressure variable in the
833
+ # CKD table. This is implemented at a higher level (not here).
834
+ # * The default bound error handling policy for the pressure and
835
+ # temperature variables is 'extrapolate'.
836
+ # * Above the cut-off altitude, the profile is filled with zeros.
837
+ # Cut-off detection is implemented with pressure-based masking.
838
+
839
+ # TODO: Use the 'assume_sorted' parameter of DataArray.interp()
840
+
841
+ if error_handling_config is None:
842
+ error_handling_config = self.error_handling_config
843
+
844
+ # Lookup dataset
845
+ ds = self.lookup_datasets(wl=w)[0]
846
+
847
+ # Select bin
848
+ # TODO: Optimize
849
+ w_u = ureg(ds["w"].units)
850
+ w_m = w.m_as(w_u)
851
+ result = ds["sigma_a"].sel(w=w_m, method="nearest")
852
+
853
+ # Interpolate along g
854
+ result = result.interp(g=g).drop_vars("g")
855
+
856
+ # Interpolate on thermophysical dimensions
857
+ result, x_ds = self._interp_thermophysical(
858
+ ds, result, thermoprops, error_handling_config
859
+ )
860
+
861
+ # Drop thermophysical coordinates, ensure spectral dimension
862
+ result = result.drop_vars(["p", "t", *x_ds])
863
+ if "w" not in result.dims:
864
+ result = result.expand_dims("w")
865
+
866
+ return result.transpose("w", "z")
867
+
868
+
869
+ def get_absdb_type(mode: Literal["mono", "ckd"]) -> type:
870
+ """
871
+ Get the :class:`.AbsorptionDatabase` subtype that corresponds to the mode
872
+ passed mode key.
873
+
874
+ Parameters
875
+ ----------
876
+ mode : {"mono", "ckd"}
877
+ Mode key.
878
+
879
+ Returns
880
+ -------
881
+ type
882
+
883
+ Raises
884
+ ------
885
+ ValueError
886
+ If ``mode`` value is unsupported.
887
+ """
888
+ if mode == "mono":
889
+ return MonoAbsorptionDatabase
890
+ elif mode == "ckd":
891
+ return CKDAbsorptionDatabase
892
+ else:
893
+ raise ValueError(f"unsupported mode {mode!r}")