ASDCache 0.2.3__tar.gz → 0.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {asdcache-0.2.3 → asdcache-0.2.4}/ASDCache/ASDCache.py +71 -128
- {asdcache-0.2.3 → asdcache-0.2.4}/ASDCache/__init__.py +4 -4
- {asdcache-0.2.3 → asdcache-0.2.4}/ASDCache/_version.py +2 -2
- asdcache-0.2.4/ASDCache/utils.py +66 -0
- {asdcache-0.2.3 → asdcache-0.2.4}/PKG-INFO +10 -5
- {asdcache-0.2.3 → asdcache-0.2.4}/pyproject.toml +25 -14
- {asdcache-0.2.3 → asdcache-0.2.4}/.gitignore +0 -0
- {asdcache-0.2.3 → asdcache-0.2.4}/LICENSE +0 -0
|
@@ -3,14 +3,16 @@
|
|
|
3
3
|
It contains both the [SpectraCache][(m).] and [BibCache][(m).] classes which allow you to interact with the ASD and the relevant bibliographic databases.
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
|
+
from pathlib import Path
|
|
6
7
|
import importlib.util
|
|
7
8
|
import warnings
|
|
9
|
+
import numpy as np
|
|
8
10
|
import pandas as pd
|
|
9
|
-
from requests_cache import CachedSession,
|
|
11
|
+
from requests_cache import CachedSession, CachedResponse, OriginalResponse
|
|
12
|
+
from requests import Response
|
|
10
13
|
from io import StringIO
|
|
11
14
|
from datetime import timedelta
|
|
12
15
|
import re
|
|
13
|
-
import numpy as np
|
|
14
16
|
from bs4 import BeautifulSoup
|
|
15
17
|
import sys
|
|
16
18
|
import logging
|
|
@@ -23,12 +25,10 @@ if importlib.util.find_spec("polars"):
|
|
|
23
25
|
else:
|
|
24
26
|
POLARS_AVAILABLE = False
|
|
25
27
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
stream=sys.stdout,
|
|
31
|
-
)
|
|
28
|
+
from .utils import wavenumber_to_refractive_index, extract_state_from_response
|
|
29
|
+
from ._version import version
|
|
30
|
+
|
|
31
|
+
logger = logging.getLogger("ASDCache")
|
|
32
32
|
|
|
33
33
|
ASDSchema = {
|
|
34
34
|
"element": str,
|
|
@@ -61,8 +61,6 @@ ASDSchema = {
|
|
|
61
61
|
"line_ref": str,
|
|
62
62
|
}
|
|
63
63
|
|
|
64
|
-
STATE_EXPR = r"spectra=([\w]+)\+?([IVX]+)?"
|
|
65
|
-
"""Regex pattern for extracting (element,charge) tuple for a single-state query, which uses roman numerals."""
|
|
66
64
|
SCI_EXPR = r"([+-]?\d*\.?\d+(?:[eE][+-]?\d+)?)"
|
|
67
65
|
"""Regex pattern for processing scientific notation"""
|
|
68
66
|
|
|
@@ -76,9 +74,9 @@ class SpectraCache:
|
|
|
76
74
|
|
|
77
75
|
The `ASDCache` instance acts as an access point to the cache, which stores responses on the local system in a SQLite database.
|
|
78
76
|
|
|
79
|
-
Data retrieval from cache is much faster (order milliseconds) than fetching from the internet (order seconds), and avoids wastefull requests to the server.
|
|
77
|
+
Data retrieval from cache is much faster (order milliseconds) than fetching from the internet (order of seconds to minutes), and avoids wastefull requests to the server.
|
|
80
78
|
|
|
81
|
-
Cache time-to-live is
|
|
79
|
+
Cache time-to-live is two weeks by default.
|
|
82
80
|
|
|
83
81
|
Since the NIST ASD is usually updated less frequently than that, this is a compromise between having the latest data, and overall fast performance.
|
|
84
82
|
|
|
@@ -91,12 +89,12 @@ class SpectraCache:
|
|
|
91
89
|
"submit": "Retrieve Data",
|
|
92
90
|
"unit": 1,
|
|
93
91
|
"de": 0,
|
|
94
|
-
"plot_out": 0,
|
|
92
|
+
# "plot_out": 0,
|
|
95
93
|
"I_scale_type": 1,
|
|
96
94
|
"format": 3,
|
|
97
95
|
"line_out": 0,
|
|
98
|
-
"remove_js": "on",
|
|
99
|
-
"no_spaces": "on",
|
|
96
|
+
# "remove_js": "on",
|
|
97
|
+
# "no_spaces": "on",
|
|
100
98
|
"en_unit": 0,
|
|
101
99
|
"output": 0,
|
|
102
100
|
"bibrefs": 1,
|
|
@@ -124,48 +122,30 @@ class SpectraCache:
|
|
|
124
122
|
# "include_Ritz_E1": 1, # Does not appear mandatory in retrospect, see issue #1
|
|
125
123
|
}
|
|
126
124
|
"""Request parameters used by the NIST ASD form."""
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
"
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
"
|
|
143
|
-
"Ei(cm-1)",
|
|
144
|
-
"Ek(cm-1)",
|
|
145
|
-
"conf_i",
|
|
146
|
-
"term_i",
|
|
147
|
-
"J_i",
|
|
148
|
-
"conf_k",
|
|
149
|
-
"term_k",
|
|
150
|
-
"J_k",
|
|
151
|
-
"g_i",
|
|
152
|
-
"g_k",
|
|
153
|
-
"Type",
|
|
154
|
-
"tp_ref",
|
|
155
|
-
"line_ref",
|
|
156
|
-
]
|
|
157
|
-
"""Fixed order of columns for consistent schema of data."""
|
|
158
|
-
|
|
159
|
-
def __init__(self, use_polars_backend=False, cache_expiry=timedelta(weeks=1), strict_matching=True):
|
|
160
|
-
"""Initialize an instance that handles cached data lookup of the NIST ASD."""
|
|
125
|
+
|
|
126
|
+
def __init__(
|
|
127
|
+
self,
|
|
128
|
+
use_polars_backend=False,
|
|
129
|
+
cache_expiry=timedelta(weeks=2),
|
|
130
|
+
strict_matching=True,
|
|
131
|
+
cache_path: Optional[Path] = None,
|
|
132
|
+
):
|
|
133
|
+
"""Initialize an instance that handles cached data lookup of the NIST ASD.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
use_polars_backend (bool): Flag to use polars as DataFrame backend, if available
|
|
137
|
+
cache_expiry (timedelta): Span of time beyond which an entry will be considered expired, and a refresh attempted
|
|
138
|
+
strict_matching (bool): If true, use all request parameters to hash urls for cache matching (recommended).
|
|
139
|
+
cache_path (Path, Optional): Path to a location to store the cache in
|
|
140
|
+
"""
|
|
161
141
|
self.strict_matching = strict_matching
|
|
162
142
|
self.session = CachedSession(
|
|
163
|
-
"NIST_ASD_cache",
|
|
143
|
+
"NIST_ASD_cache" if cache_path is None else cache_path,
|
|
164
144
|
use_cache_dir=True,
|
|
165
145
|
expire_after=cache_expiry,
|
|
166
146
|
stale_if_error=True,
|
|
167
147
|
filter_fn=self._check_response_success,
|
|
168
|
-
ignored_parameters=list(self.query_params.keys()) if self.strict_matching is False else
|
|
148
|
+
ignored_parameters=list(self.query_params.keys()) if self.strict_matching is False else [],
|
|
169
149
|
)
|
|
170
150
|
if (use_polars_backend) & (not POLARS_AVAILABLE):
|
|
171
151
|
warnings.warn("Cannot find `polars` as a backend, falling back to `pandas`", stacklevel=2)
|
|
@@ -209,9 +189,7 @@ class SpectraCache:
|
|
|
209
189
|
|
|
210
190
|
Note that this only works for data queries, not for bibliographic metadata by `BibCache`.
|
|
211
191
|
"""
|
|
212
|
-
return not (
|
|
213
|
-
not response.ok or response.content.startswith(b"<!DOCTYPE") or b"Error Message" in response.content
|
|
214
|
-
)
|
|
192
|
+
return not (not response.ok or response.content.startswith(b"<!DOCTYPE"))
|
|
215
193
|
|
|
216
194
|
def _get_data(self, species: str, wl_range: tuple[float, float] = (170, 1000), **kwargs) -> Response:
|
|
217
195
|
"""Retrieve raw, ASCII-formatted data from the NIST ASD with a GET request.
|
|
@@ -236,14 +214,17 @@ class SpectraCache:
|
|
|
236
214
|
response.raise_for_status()
|
|
237
215
|
# Check if response is not a HTML document instead of ASCII formatted data, indicating query error.
|
|
238
216
|
if response.content.startswith(b"<!DOCTYPE"):
|
|
239
|
-
|
|
240
|
-
|
|
217
|
+
body = BeautifulSoup(response.text, features="html.parser").body
|
|
218
|
+
reason = body.text.strip().replace("\n", " ") if body else ""
|
|
219
|
+
logger.error(
|
|
220
|
+
"NIST ASD responded with %s instead of ASCII-data for species=%s, wl_range=%s\nQuery: %s",
|
|
221
|
+
reason,
|
|
241
222
|
species,
|
|
242
223
|
wl_range,
|
|
243
224
|
response.url,
|
|
244
225
|
)
|
|
245
226
|
raise ASDQueryError(
|
|
246
|
-
f"Query for {species=} {wl_range=} did not receive ASCII-data. This means the ASD could not interpret your query. Check if your query is malformed."
|
|
227
|
+
f"Query for {species=} {wl_range=} did not receive ASCII-data. {reason=} This means the ASD could not interpret your query. Check if your query is malformed."
|
|
247
228
|
)
|
|
248
229
|
return response
|
|
249
230
|
|
|
@@ -260,7 +241,7 @@ class SpectraCache:
|
|
|
260
241
|
for elem in self.species_expr.search(u).group(1).split("%3B")
|
|
261
242
|
]
|
|
262
243
|
|
|
263
|
-
def fetch(self, species, wl_range=(170, 1000)
|
|
244
|
+
def fetch(self, species, wl_range=(170, 1000)) -> "pd.DataFrame|pl.DataFrame":
|
|
264
245
|
"""Fetch information on a species from the ASD, first checking the cache.
|
|
265
246
|
|
|
266
247
|
This supports loading multiple species in one go by using the same notation as the NIST ASD form.
|
|
@@ -273,7 +254,8 @@ class SpectraCache:
|
|
|
273
254
|
|
|
274
255
|
Both these operations will fetch data online and be stored as separate cache entries.
|
|
275
256
|
"""
|
|
276
|
-
|
|
257
|
+
# TODO: add kwargs for read-only/offline access etc.
|
|
258
|
+
response = self._get_data(species, wl_range)
|
|
277
259
|
return self.create_dataframe(response)
|
|
278
260
|
|
|
279
261
|
def create_dataframe(self, response) -> "pd.DataFrame|pl.DataFrame":
|
|
@@ -290,7 +272,7 @@ class SpectraCache:
|
|
|
290
272
|
|
|
291
273
|
Note that this conversion is only performed for lines with $200 nm < \lambda < 2000 nm$, like the ASD.
|
|
292
274
|
|
|
293
|
-
For lines outside of this range,
|
|
275
|
+
For lines outside of this range, it uses NaN values.
|
|
294
276
|
"""
|
|
295
277
|
schema = {
|
|
296
278
|
"obs_wl_vac(nm)": str,
|
|
@@ -331,25 +313,25 @@ class SpectraCache:
|
|
|
331
313
|
else:
|
|
332
314
|
df["Type"] = df.loc[:, "Type"].astype(str).replace("nan", "E1")
|
|
333
315
|
df["tp_ref"] = df.loc[:, "tp_ref"].fillna("")
|
|
334
|
-
df["obs_wl_air(nm)"] =
|
|
335
|
-
|
|
336
|
-
|
|
316
|
+
df["obs_wl_air(nm)"] = np.nan
|
|
317
|
+
air_equiv_range = df["wn(cm-1)"].between(5000, 50000) # range where air wavelength is computed.
|
|
318
|
+
df["obs_wl_air(nm)"] = df.loc[air_equiv_range, "obs_wl_vac(nm)"] / wavenumber_to_refractive_index(
|
|
319
|
+
df.loc[air_equiv_range, "wn(cm-1)"]
|
|
337
320
|
)
|
|
338
|
-
df["ritz_wl_air(nm)"] =
|
|
339
|
-
df["ritz_wl_air(nm)"] = df[
|
|
340
|
-
df[
|
|
321
|
+
df["ritz_wl_air(nm)"] = np.nan
|
|
322
|
+
df["ritz_wl_air(nm)"] = df.loc[air_equiv_range, "ritz_wl_vac(nm)"] / wavenumber_to_refractive_index(
|
|
323
|
+
df.loc[air_equiv_range, "wn(cm-1)"]
|
|
341
324
|
)
|
|
342
325
|
df = df.drop([c for c in df.columns if "Unnamed" in c], axis=1).reset_index(drop=True)
|
|
343
326
|
if "element" not in df.columns:
|
|
344
327
|
# cast roman numerals to int for consistency with queries with multiple ionization states, e.g. Ar I vs Ar I-II
|
|
345
328
|
# As 'element' and 'sp_num' columns are only missing for single-species queries, assign as constants, not vectors.
|
|
346
|
-
element,
|
|
347
|
-
numeric: int = cls.roman_to_int(numeral)
|
|
329
|
+
element, numeric = extract_state_from_response(response)
|
|
348
330
|
df["element"] = element
|
|
349
331
|
df["sp_num"] = numeric
|
|
350
332
|
df["unc_obs_wl"] = pd.to_numeric(df["unc_obs_wl"]) if "unc_obs_wl" in df.columns else np.nan
|
|
351
333
|
df["unc_ritz_wl"] = pd.to_numeric(df["unc_ritz_wl"]) if "unc_ritz_wl" in df.columns else np.nan
|
|
352
|
-
return df.loc[:,
|
|
334
|
+
return df.loc[:, list(ASDSchema)]
|
|
353
335
|
|
|
354
336
|
@classmethod
|
|
355
337
|
def _from_polars(cls, response: Response) -> "pl.DataFrame":
|
|
@@ -359,7 +341,7 @@ class SpectraCache:
|
|
|
359
341
|
|
|
360
342
|
Note that this conversion is only performed for lines with $200 nm < \lambda < 2000 nm$, like the ASD.
|
|
361
343
|
|
|
362
|
-
For lines outside of this range,
|
|
344
|
+
For lines outside of this range, it uses NaN values.
|
|
363
345
|
"""
|
|
364
346
|
schema = {
|
|
365
347
|
"obs_wl_vac(nm)": pl.String,
|
|
@@ -401,67 +383,27 @@ class SpectraCache:
|
|
|
401
383
|
pl.col("tp_ref").replace(None, ""),
|
|
402
384
|
).drop([""])
|
|
403
385
|
# compute air wavelengths between 5000 cm-1 and 50000 cm-1
|
|
386
|
+
air_equiv_range = pl.col("wn(cm-1)").is_between(5000, 50000)
|
|
404
387
|
df = df.with_columns(
|
|
405
|
-
pl.when(
|
|
406
|
-
.then(
|
|
407
|
-
|
|
408
|
-
/ pl.col("wn(cm-1)").map_elements(cls.wn_to_n_refractive, return_dtype=pl.Float64)
|
|
409
|
-
)
|
|
410
|
-
.otherwise(pl.col("obs_wl_vac(nm)"))
|
|
411
|
-
.cast(pl.Float64)
|
|
388
|
+
pl.when(air_equiv_range)
|
|
389
|
+
.then(pl.col("obs_wl_vac(nm)") / wavenumber_to_refractive_index(pl.col("wn(cm-1)")))
|
|
390
|
+
.otherwise(np.nan)
|
|
412
391
|
.alias("obs_wl_air(nm)"),
|
|
413
|
-
pl.when(
|
|
414
|
-
.then(
|
|
415
|
-
|
|
416
|
-
/ pl.col("wn(cm-1)").map_elements(cls.wn_to_n_refractive, return_dtype=pl.Float64)
|
|
417
|
-
)
|
|
418
|
-
.otherwise(pl.col("ritz_wl_vac(nm)"))
|
|
419
|
-
.cast(pl.Float64)
|
|
392
|
+
pl.when(air_equiv_range)
|
|
393
|
+
.then(pl.col("ritz_wl_vac(nm)") / wavenumber_to_refractive_index(pl.col("wn(cm-1)")))
|
|
394
|
+
.otherwise(np.nan)
|
|
420
395
|
.alias("ritz_wl_air(nm)"),
|
|
421
396
|
)
|
|
422
397
|
if "element" not in df.columns:
|
|
423
|
-
element,
|
|
424
|
-
numeric: int = cls.roman_to_int(numeral) if numeral else 1
|
|
425
|
-
# cast roman numerals to int for consistency with queries with multiple ionization states, e.g. Ar I vs Ar I-II
|
|
398
|
+
element, numeric = extract_state_from_response(response)
|
|
426
399
|
df = df.with_columns(pl.lit(element).alias("element"), pl.lit(numeric, dtype=pl.Int64).alias("sp_num"))
|
|
427
|
-
|
|
400
|
+
# Cast to float, or create column filled with `null` if missing.
|
|
401
|
+
exprs = [
|
|
402
|
+
(pl.col(c) if c in df.columns else pl.lit(None).alias(c)).cast(pl.Float64)
|
|
403
|
+
for c in ["unc_obs_wl", "unc_ritz_wl"]
|
|
404
|
+
]
|
|
428
405
|
df = df.with_columns(exprs)
|
|
429
|
-
return df.select(*
|
|
430
|
-
|
|
431
|
-
@staticmethod
|
|
432
|
-
def roman_to_int(roman: str) -> int:
|
|
433
|
-
"""Transform Roman numerals to integers.
|
|
434
|
-
|
|
435
|
-
Does only support numerals including up to `L`.
|
|
436
|
-
"""
|
|
437
|
-
roman_numerals = {"I": 1, "V": 5, "X": 10, "L": 50}
|
|
438
|
-
total = 0
|
|
439
|
-
previous = 0
|
|
440
|
-
for char in reversed(roman):
|
|
441
|
-
current_value = roman_numerals[char]
|
|
442
|
-
if current_value < previous:
|
|
443
|
-
total -= current_value # Subtract if the current value is less than the previous value
|
|
444
|
-
else:
|
|
445
|
-
total += current_value
|
|
446
|
-
previous = current_value
|
|
447
|
-
return total
|
|
448
|
-
|
|
449
|
-
@staticmethod
|
|
450
|
-
def wn_to_n_refractive(wavenumbers: float) -> float:
|
|
451
|
-
r"""Calculate the refractive index $n$ in air for a transition, using the 5-term Sellmeier formula used by NIST.
|
|
452
|
-
|
|
453
|
-
The used Sellmeier formula is the one from E.R. Peck and K. Reeder [J. Opt. Soc. Am. 62, 958 (1972)](http://dx.doi.org/10.1364/JOSA.62.000958).
|
|
454
|
-
|
|
455
|
-
This formula is fitted to data in the range of 185 nm to 1700 nm for air at 15 °C, 101 325 Pa pressure, with 0.033 % CO2.
|
|
456
|
-
|
|
457
|
-
This is the same formula used by the NIST ASD to calculate air wavelengths in the interval of 200 nm to 2000 nm.
|
|
458
|
-
|
|
459
|
-
See also [the ASD documentation on the topic](https://physics.nist.gov/PhysRefData/ASD/Html/lineshelp.html#Conversion%20between%20air%20and%20vacuum%20wavelengths).
|
|
460
|
-
|
|
461
|
-
Using this refractive index, air equivalent wavelengths consistent with the ASD can be calculated, without the need to query them separately.
|
|
462
|
-
"""
|
|
463
|
-
sigma = wavenumbers * 1e-4 # um^-1
|
|
464
|
-
return 1 + 1e-8 * (8060.51 + 2480990 / (132.274 - sigma**2) + 17455.7 / (39.32957 - sigma**2))
|
|
406
|
+
return df.select(*ASDSchema)
|
|
465
407
|
|
|
466
408
|
def get_all_cached(self) -> "pd.DataFrame|pl.DataFrame":
|
|
467
409
|
"""Retrieve all cached data into a single dataframe."""
|
|
@@ -530,7 +472,7 @@ class BibCache:
|
|
|
530
472
|
"""
|
|
531
473
|
is_success = (response.status_code == 200) & (b"There was a problem" not in response.content)
|
|
532
474
|
if not is_success:
|
|
533
|
-
|
|
475
|
+
logger.warning(f"Request was unsuccesful status:{response.status_code} , url:{response.url}")
|
|
534
476
|
return is_success
|
|
535
477
|
|
|
536
478
|
@classmethod
|
|
@@ -546,9 +488,10 @@ class BibCache:
|
|
|
546
488
|
comment (str): An additional comment included in the reference, can be fetched separately.
|
|
547
489
|
"""
|
|
548
490
|
if reference_code.startswith("n"):
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
491
|
+
return ("T", None, "n")
|
|
492
|
+
matched = cls.reference_expr.match(reference_code)
|
|
493
|
+
if (not reference_code.startswith("LS")) and (matched is not None):
|
|
494
|
+
db, ref, comment = matched.groups()
|
|
552
495
|
comment = comment if "LS" not in reference_code else "LS"
|
|
553
496
|
else:
|
|
554
497
|
db, ref, comment = "T", None, "LS"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
r"""`
|
|
1
|
+
r"""`ASDCache` is a package to fetch data from the NIST Atomic Spectra Database (ASD), utlizing caching for fast responses.
|
|
2
2
|
|
|
3
|
-
To make the most use out of the cache, `
|
|
3
|
+
To make the most use out of the cache, `ASDCache` is opinionated in the information it retrieves from the ASD; it always requests the same schema of information and locally computes additional fields.
|
|
4
4
|
|
|
5
5
|
Data is initially fetched from the online published NIST page, using the tab-separated ASCII output format.
|
|
6
6
|
|
|
@@ -13,12 +13,12 @@ To ensure a consistent schema of the retrieved data, lines are always retrieved
|
|
|
13
13
|
|
|
14
14
|
Wavenumbers and Ritz wavelength will be included in the response.
|
|
15
15
|
|
|
16
|
-
In the range $5000\ \mathrm{cm}^{-1}<\nu<50000\ \mathrm{cm}^{-1}$ the air equivalent observed and Ritz wavelengths are calculated using the same Sellmeier equation as the NIST ASD (see [here][.
|
|
16
|
+
In the range $5000\ \mathrm{cm}^{-1}<\nu<50000\ \mathrm{cm}^{-1}$ the air equivalent observed and Ritz wavelengths are calculated using the same Sellmeier equation as the NIST ASD (see [here][.utils.wavenumber_to_refractive_index]).
|
|
17
17
|
This is consistent with the approach of the ASD.
|
|
18
18
|
|
|
19
19
|
## Making use of the cache
|
|
20
20
|
|
|
21
|
-
Each response from the NIST page is cached (
|
|
21
|
+
Each response from the NIST page is cached (2 weeks by default) on the local system.
|
|
22
22
|
|
|
23
23
|
This makes it much faster to load the same data, even across different script runs and/or user programs/sessions.
|
|
24
24
|
|
|
@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
|
|
|
18
18
|
commit_id: str | None
|
|
19
19
|
__commit_id__: str | None
|
|
20
20
|
|
|
21
|
-
__version__ = version = '0.2.
|
|
22
|
-
__version_tuple__ = version_tuple = (0, 2,
|
|
21
|
+
__version__ = version = '0.2.4'
|
|
22
|
+
__version_tuple__ = version_tuple = (0, 2, 4)
|
|
23
23
|
|
|
24
24
|
__commit_id__ = commit_id = None
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""Module containing small helper utility functions for extracting and processing input from the ASD."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
5
|
+
|
|
6
|
+
if TYPE_CHECKING:
|
|
7
|
+
from requests import Response
|
|
8
|
+
|
|
9
|
+
ROMAN_NUMERALS = {"I": 1, "V": 5, "X": 10, "L": 50, "C": 100, "D": 500, "M": 1000}
|
|
10
|
+
STATE_EXPR = r"spectra=([\w]+)\+?([IVX]+)?"
|
|
11
|
+
"""Regex pattern for extracting (element,charge) tuple for a single-state query, which uses roman numerals."""
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def roman_to_int(roman: str) -> int:
|
|
15
|
+
"""Parse a Roman numeral into an integer.
|
|
16
|
+
|
|
17
|
+
Supports numerals up to "M".
|
|
18
|
+
"""
|
|
19
|
+
roman = roman.upper().strip()
|
|
20
|
+
total = 0
|
|
21
|
+
previous = 0
|
|
22
|
+
for char in reversed(roman):
|
|
23
|
+
current_value = ROMAN_NUMERALS[char]
|
|
24
|
+
if current_value < previous:
|
|
25
|
+
total -= current_value # Subtract if the current value is less than the previous value
|
|
26
|
+
else:
|
|
27
|
+
total += current_value
|
|
28
|
+
previous = current_value
|
|
29
|
+
return total
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def wavenumber_to_refractive_index(wavenumbers: float) -> float:
|
|
33
|
+
r"""Calculate the refractive index $n$ in air for a transition, using the 5-term Sellmeier formula used by NIST.
|
|
34
|
+
|
|
35
|
+
The used Sellmeier formula is the one from E.R. Peck and K. Reeder [J. Opt. Soc. Am. 62, 958 (1972)](http://dx.doi.org/10.1364/JOSA.62.000958).
|
|
36
|
+
|
|
37
|
+
This formula is fitted to data in the range of 185 nm to 1700 nm for air at 15 °C, 101 325 Pa pressure, with 0.033 % CO2.
|
|
38
|
+
|
|
39
|
+
This is the same formula used by the NIST ASD to calculate air wavelengths in the interval of 200 nm to 2000 nm.
|
|
40
|
+
|
|
41
|
+
See also [the ASD documentation on the topic](https://physics.nist.gov/PhysRefData/ASD/Html/lineshelp.html#Conversion%20between%20air%20and%20vacuum%20wavelengths).
|
|
42
|
+
|
|
43
|
+
Using this refractive index, air equivalent wavelengths consistent with the ASD can be calculated, without the need to query them separately.
|
|
44
|
+
"""
|
|
45
|
+
sigma = wavenumbers * 1e-4 # um^-1
|
|
46
|
+
return 1 + 1e-8 * (8060.51 + 2480990 / (132.274 - sigma**2) + 17455.7 / (39.32957 - sigma**2))
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def extract_state_from_response(response: "Response") -> tuple[str, int]:
|
|
50
|
+
"""Extract the element and ionization state from the url of a response.
|
|
51
|
+
|
|
52
|
+
When querying only a single state, e.g. 'H I', this information will not be present as a column in data: the `element` and `sp_num` columns will not be included.
|
|
53
|
+
|
|
54
|
+
This information is parsed from the query url instead, so it can be added.
|
|
55
|
+
|
|
56
|
+
Since the `sp_num` column is of an integer type, the roman numerals in the url are converted to integers.
|
|
57
|
+
"""
|
|
58
|
+
matched = re.search(STATE_EXPR, str(response.url))
|
|
59
|
+
if not matched:
|
|
60
|
+
raise ValueError(
|
|
61
|
+
"URL did not contain a `spectra` parameter satisfying %s; Could not identify element and sp_num",
|
|
62
|
+
STATE_EXPR,
|
|
63
|
+
)
|
|
64
|
+
element, numeral = matched.groups()
|
|
65
|
+
numeric: int = roman_to_int(numeral) if numeral else 1
|
|
66
|
+
return element, numeric
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ASDCache
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.4
|
|
4
4
|
Summary: A Python module to retrieve data from the NIST Atomic Spectra Database (ASD), using caching for fast, efficient data handling
|
|
5
5
|
Project-URL: Documentation, https://antoinetue.github.io/asdcache
|
|
6
6
|
Project-URL: Source, https://github.com/AntoineTUE/asdcache
|
|
@@ -12,14 +12,16 @@ Classifier: Development Status :: 4 - Beta
|
|
|
12
12
|
Classifier: Intended Audience :: Science/Research
|
|
13
13
|
Classifier: License :: OSI Approved :: MIT License
|
|
14
14
|
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
15
16
|
Classifier: Programming Language :: Python :: 3.10
|
|
16
17
|
Classifier: Programming Language :: Python :: 3.11
|
|
17
18
|
Classifier: Programming Language :: Python :: 3.12
|
|
18
19
|
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
19
21
|
Classifier: Topic :: Scientific/Engineering
|
|
20
22
|
Requires-Python: >=3.9
|
|
21
23
|
Requires-Dist: beautifulsoup4>=4.12
|
|
22
|
-
Requires-Dist: numpy>=
|
|
24
|
+
Requires-Dist: numpy>=1.20.3
|
|
23
25
|
Requires-Dist: pandas>=2.0
|
|
24
26
|
Requires-Dist: requests-cache>=1.2.0
|
|
25
27
|
Provides-Extra: docs
|
|
@@ -37,8 +39,8 @@ Requires-Dist: pygments>=2.20.0; extra == 'docs'
|
|
|
37
39
|
Requires-Dist: ruff>=0.15.13; extra == 'docs'
|
|
38
40
|
Provides-Extra: polars
|
|
39
41
|
Requires-Dist: polars[pandas]; extra == 'polars'
|
|
40
|
-
Provides-Extra: polars-
|
|
41
|
-
Requires-Dist: polars[pandas,rtcompat]; extra == 'polars-
|
|
42
|
+
Provides-Extra: polars-compat
|
|
43
|
+
Requires-Dist: polars[pandas,rtcompat]; extra == 'polars-compat'
|
|
42
44
|
Description-Content-Type: text/markdown
|
|
43
45
|
|
|
44
46
|
# ASDCache
|
|
@@ -52,6 +54,7 @@ Description-Content-Type: text/markdown
|
|
|
52
54
|
[](https://pypi.python.org/pypi/ASDCache)
|
|
53
55
|
[](https://pypi.python.org/pypi/ASDCache)
|
|
54
56
|
[](https://pypistats.org/packages/asdcache)
|
|
57
|
+
[](https://pepy.tech/projects/asdcache)
|
|
55
58
|
[](https://github.com/astral-sh/ruff)
|
|
56
59
|
[](https://github.com/pypa/hatch)
|
|
57
60
|
|
|
@@ -69,7 +72,7 @@ The main goals and benefits of `ASDCache` are:
|
|
|
69
72
|
- [x] Retrieve a consistent schema of the data that represents the 'human readable' format, but enforce strictly numeric data for important columns
|
|
70
73
|
- [ ] This removes footnotes and other annotations, be sure to check the ASD itself as well for this information.
|
|
71
74
|
- [x] Use caching to dramatically speed up data retrieval, from minutes down to milliseconds in some cases
|
|
72
|
-
- [x] Cache time-to-live is
|
|
75
|
+
- [x] Cache time-to-live is two weeks by default, meaning you still get updates to the ASD in a reasonable time frame
|
|
73
76
|
- [x] The cache time-to-live can be adjusted
|
|
74
77
|
- [x] Cache data to allow working offline, or even transfering the ASD data to an offline system.
|
|
75
78
|
- [x] The cache is only updated when a request for new data succeeds
|
|
@@ -78,6 +81,7 @@ The main goals and benefits of `ASDCache` are:
|
|
|
78
81
|
`ASDCache` is not affiliated with NIST or the NIST ASD in any way, it simply tries to help make it more accessible.
|
|
79
82
|
|
|
80
83
|
## Installing
|
|
84
|
+
|
|
81
85
|
`ASDCache` can be installed with `pip`.
|
|
82
86
|
|
|
83
87
|
```console
|
|
@@ -99,6 +103,7 @@ Installing the `polars` feature is not required, in case `polars` is already ins
|
|
|
99
103
|
Documentation for `ASDCache` is available on [this page](https://antoinetue.github.io/ASDCache).
|
|
100
104
|
|
|
101
105
|
### Example
|
|
106
|
+
|
|
102
107
|
A brief example below demonstrates how to use `SpectraCache` to query the NIST ASD for spectroscopic data for different species and plot their respective relative intensities.
|
|
103
108
|
|
|
104
109
|
Note that these relative intensities are in principle not comparable between different species or sources and merely serve as a guide.
|
|
@@ -19,17 +19,19 @@ classifiers = [
|
|
|
19
19
|
"Topic :: Scientific/Engineering",
|
|
20
20
|
"Intended Audience :: Science/Research",
|
|
21
21
|
"Operating System :: OS Independent",
|
|
22
|
+
"Programming Language :: Python :: 3.9",
|
|
22
23
|
"Programming Language :: Python :: 3.10",
|
|
23
24
|
"Programming Language :: Python :: 3.11",
|
|
24
25
|
"Programming Language :: Python :: 3.12",
|
|
25
26
|
"Programming Language :: Python :: 3.13",
|
|
27
|
+
"Programming Language :: Python :: 3.14",
|
|
26
28
|
]
|
|
27
|
-
dependencies = ["requests_cache>=1.2.0", "pandas>=2.0","numpy>=
|
|
29
|
+
dependencies = ["requests_cache>=1.2.0", "pandas>=2.0","numpy>=1.20.3", "beautifulsoup4>=4.12"]
|
|
28
30
|
dynamic = ["version"]
|
|
29
31
|
|
|
30
32
|
[project.optional-dependencies]
|
|
31
33
|
polars = ["polars[pandas]"]
|
|
32
|
-
polars-
|
|
34
|
+
polars-compat = ["polars[rtcompat,pandas]"]
|
|
33
35
|
docs = [
|
|
34
36
|
"properdocs>=1.6.7",
|
|
35
37
|
"mkdocs-material==9.7.6",
|
|
@@ -69,7 +71,7 @@ exclude = ["/.github"]
|
|
|
69
71
|
minversion = "6.0"
|
|
70
72
|
addopts = "-ra -q --doctest-glob='*.md'"
|
|
71
73
|
testpaths = ["tests"]
|
|
72
|
-
markers = ["
|
|
74
|
+
markers = ["online: run test that retrieve data online from the ASD"]
|
|
73
75
|
|
|
74
76
|
[tool.coverage.run]
|
|
75
77
|
branch = true
|
|
@@ -158,7 +160,6 @@ dependencies = [
|
|
|
158
160
|
"pytest-recording",
|
|
159
161
|
"pytest-sugar~=1.1.1",
|
|
160
162
|
"hypothesis",
|
|
161
|
-
'virtualenv<21; python_version<"3.10"',
|
|
162
163
|
]
|
|
163
164
|
|
|
164
165
|
|
|
@@ -191,22 +192,32 @@ fix = [
|
|
|
191
192
|
|
|
192
193
|
[tool.hatch.envs.hatch-test.overrides]
|
|
193
194
|
matrix.pandas.dependencies = [
|
|
194
|
-
{ value = "pandas>=2.0
|
|
195
|
-
{ value = "
|
|
196
|
-
|
|
197
|
-
|
|
195
|
+
{ value = "pandas>=2.0", if = ["pandas-2.x"] },
|
|
196
|
+
{ value = "pandas>=3.0", if = ["pandas-3.x"] },
|
|
197
|
+
]
|
|
198
|
+
matrix.numpy.dependencies = [
|
|
199
|
+
{ value = "numpy>=2.0", if = ["numpy-2.x"] },
|
|
200
|
+
{value = "numpy<2.0", if = ["numpy-legacy"]},
|
|
198
201
|
]
|
|
199
202
|
matrix.polars.features = [
|
|
200
203
|
{ value = "polars", if = ["polars"]},
|
|
201
|
-
{ value = "polars-
|
|
204
|
+
{ value = "polars-compat", if = ["polars-compat"]},
|
|
202
205
|
]
|
|
203
206
|
|
|
204
207
|
[[tool.hatch.envs.hatch-test.matrix]]
|
|
205
208
|
python = ["3.9","3.10"]
|
|
206
|
-
|
|
207
|
-
|
|
209
|
+
numpy = ["numpy-2.x","numpy-legacy"]
|
|
210
|
+
pandas = ["pandas-2.x"]
|
|
211
|
+
polars = ["polars","polars-compat"]
|
|
212
|
+
|
|
213
|
+
[[tool.hatch.envs.hatch-test.matrix]]
|
|
214
|
+
python = ["3.11","3.12","3.13", "3.14"]
|
|
215
|
+
numpy = ["numpy-2.x","numpy-legacy"]
|
|
216
|
+
pandas = ["pandas-2.x","pandas-3.x"]
|
|
217
|
+
polars = ["polars","polars-compat"]
|
|
208
218
|
|
|
209
219
|
[[tool.hatch.envs.hatch-test.matrix]]
|
|
210
|
-
python = ["3.
|
|
211
|
-
|
|
212
|
-
|
|
220
|
+
python = ["3.13", "3.14"]
|
|
221
|
+
numpy = ["numpy-2.x"]
|
|
222
|
+
pandas = ["pandas-2.x","pandas-3.x"]
|
|
223
|
+
polars = ["polars","polars-compat"]
|
|
File without changes
|
|
File without changes
|