ASDCache 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ASDCache/ASDCache.py ADDED
@@ -0,0 +1,538 @@
1
+ r"""`ASDcache` is a module to fetch data from the NIST Atomic Spectra Database (ASD), utlizing caching for fast responses.
2
+
3
+ To make the most use out of the cache, `ASDcache` is opinionated in the information it retrieves from the ASD; it always requests the same schema of information and locally computes additional fields.
4
+
5
+ Data is initially fetched from the online published NIST page, using the tab-separated ASCII output format.
6
+ The benefit of this format is that it is more 'machine readable' than the formatted ASCII of HTML options.
7
+ This means it requires far less bespoke parsing to get rid of 'human readable' features such as repeated page column headers, or empty lines.
8
+ To ensure a consistent schema of the retrieved data, lines are always retrieved as a function of wavelength, using `vacuum wavelength`, even between 200 to 2000 nm.
9
+ Wavenumbers and Ritz wavelength will be included in the response.
10
+
11
+ In the range $5000 \mathrm{cm}^{-1}<\nu<50000 \mathrm{cm}^{-1}$ the air equivalent observed and Ritz wavelengths are calculated using the same Sellmeier equation as the NIST ASD (see [here][ASDcache.readASD.ASDCache.wn_to_n_refractive]).
12
+ This is consistent with the approach of the ASD.
13
+
14
+ Each response from the NIST page is cached (1 week by default) on the local system.
15
+ This makes it much faster to load the same data, even across different script runs and/or user programs/sessions.
16
+ As an example: reading all spectra between 200 and 1000 nm can take over 2 minutes without using the cache, but can be as fast as 0.2 seconds using the `polars` backend.
17
+ In addition, it means that an internet connection is not required after initial data fetching.
18
+ The cached response is only updated upon succesfull retrieval of a new response of the NIST page.
19
+ If unable to succesfully fetch new data, we fall back to a 'stale' cached response.
20
+
21
+ The cache can be shared to another system, to give offline/airgapped systems access to the same data.
22
+ To that end, the file `NIST_ASD_cache.sqlite` in the user's cache directory has to be copied over.
23
+
24
+ The standard cache directories are as follows:
25
+
26
+ === "Windows"
27
+ `%USERPROFILE%/AppData/Local`
28
+ === "Linux"
29
+ `~/.cache/http_cache/`
30
+ === "MacOS"
31
+ `/Users/user/Library/Caches/http_cache/`
32
+
33
+ Queries to the NIST ASD are hashed by the keys (or parameters) of the requests.
34
+ This means that any change to either one of these parameters, will result in a new cache entry, even if the returned data is equivalent.
35
+ """
36
+
37
+ import importlib
38
+ import warnings
39
+ import pandas as pd
40
+ from requests_cache import CachedSession, CachedResponse
41
+ from io import StringIO
42
+ from datetime import timedelta
43
+ import re
44
+ import numpy as np
45
+ from bs4 import BeautifulSoup
46
+ import sys
47
+ import logging
48
+ from typing import Any, Optional
49
+
50
+ if importlib.util.find_spec("polars"):
51
+ POLARS_AVAILABLE = True
52
+ """Check if `polars` is installed and available in the active environments"""
53
+ import polars as pl
54
+ else:
55
+ POLARS_AVAILABLE = False
56
+
57
+ logging.basicConfig(
58
+ level=logging.INFO,
59
+ format="[%(asctime)s] %(levelname)s [%(name)s.%(funcName)s:%(lineno)d] %(message)s",
60
+ datefmt="%d/%b/%Y %H:%M:%S",
61
+ stream=sys.stdout,
62
+ )
63
+
64
+
65
+ class SpectraCache:
66
+ """A class acting as the entrypoint to retrieve data from the NIST Atomic Spectra Database that uses caching.
67
+
68
+ The `ASDCache` instance acts as an access point to the cache, which stores responses on the local system in a SQLite database.
69
+
70
+ Data retrieval from cache is much faster (order milliseconds) than fetching from the internet (order seconds), and avoids wastefull requests to the server.
71
+
72
+ Cache time-to-live is one week by default.
73
+
74
+ Since the NIST ASD is usually updated less frequently than that, this is a compromise between having the latest data, and overall fast performance.
75
+
76
+ Note that the same cache is shared across different class-instances, thread-safety is not guaranteed.
77
+ """
78
+
79
+ nist_url = "https://physics.nist.gov/cgi-bin/ASD/lines1.pl"
80
+ species_expr = re.compile(r"spectra=([\w\+\-\%3]+)&")
81
+ query_params = {
82
+ "unit": 1,
83
+ "de": 0,
84
+ "plot_out": 0,
85
+ "I_scale_type": 1,
86
+ "format": 3,
87
+ "line_out": 0,
88
+ "remove_js": "on",
89
+ "no_spaces": "on",
90
+ "en_unit": 0,
91
+ "output": 0,
92
+ "bibrefs": 1,
93
+ "show_obs_wl": 1,
94
+ "show_calc_wl": 1,
95
+ "show_wn": 1,
96
+ "unc_out": 1,
97
+ "order_out": 0,
98
+ "show_av": 3, # 3: wavelength in vac, 2: wavelength in air
99
+ "tsb_value": 0,
100
+ "A_out": 0,
101
+ "S_out": "on",
102
+ "f_out": "on",
103
+ "loggf_out": "on",
104
+ "intens_out": "on",
105
+ "conf_out": "on",
106
+ "term_out": "on",
107
+ "enrg_out": "on",
108
+ "J_out": "on",
109
+ "g_out": "on",
110
+ "diag_out": "on",
111
+ "allowed_out": 1,
112
+ "forbid_out": 1,
113
+ "submit": "Retrieve Data",
114
+ }
115
+ """Request parameters used by the NIST ASD form."""
116
+ column_order = [
117
+ "element",
118
+ "sp_num",
119
+ "obs_wl_vac(nm)",
120
+ "unc_obs_wl",
121
+ "obs_wl_air(nm)",
122
+ "ritz_wl_vac(nm)",
123
+ "unc_ritz_wl",
124
+ "ritz_wl_air(nm)",
125
+ "wn(cm-1)",
126
+ "intens",
127
+ "Aki(s^-1)",
128
+ "fik",
129
+ "S(a.u.)",
130
+ "log_gf",
131
+ "Acc",
132
+ "Ei(cm-1)",
133
+ "Ek(cm-1)",
134
+ "conf_i",
135
+ "term_i",
136
+ "J_i",
137
+ "conf_k",
138
+ "term_k",
139
+ "J_k",
140
+ "g_i",
141
+ "g_k",
142
+ "Type",
143
+ "tp_ref",
144
+ "line_ref",
145
+ ]
146
+ """Fixed order of columns for consistent schema of data."""
147
+
148
+ def __init__(self, use_polars_backend=False, cache_expiry=timedelta(weeks=1), strict_matching=True):
149
+ """Initialize an instance that handles cached data lookup of the NIST ASD."""
150
+ self.strict_matching = strict_matching
151
+ self.cache_expiry = cache_expiry
152
+ self.session = CachedSession(
153
+ "NIST_ASD_cache",
154
+ use_cache_dir=True,
155
+ expire_after=cache_expiry,
156
+ stale_if_error=True,
157
+ filter_fn=self._check_response_success,
158
+ ignored_parameters=list(self.query_params.keys()) if self.strict_matching is False else None,
159
+ )
160
+ if (use_polars_backend) & (not POLARS_AVAILABLE):
161
+ warnings.warn("Cannot find `polars` as a backend, falling back to `pandas`", stacklevel=2)
162
+ self.use_polars = False
163
+ else:
164
+ self.use_polars = use_polars_backend
165
+
166
+ self.known_species = self.list_cached_species()
167
+
168
+ @staticmethod
169
+ def _check_response_success(response: "CachedResponse") -> bool:
170
+ """Validate that data has been fetched succesfully.
171
+
172
+ If this check fails, the cache should not update with this response, even when marked as stale.
173
+ """
174
+ return (response.status_code == 200) & (b"Error Message" not in response.content)
175
+
176
+ def list_cached_species(self) -> list[str]:
177
+ """List all species in the cache, based on the string of the original query URL."""
178
+ return [
179
+ elem.replace("+", " ")
180
+ for u in self.session.cache.urls()
181
+ for elem in self.species_expr.search(u).group(1).split("%3B")
182
+ ]
183
+
184
+ def fetch(self, species, wl_range=(170, 1000), **kwargs) -> "pd.DataFrame|pl.DataFrame|CachedResponse":
185
+ """Fetch information on a species from the ASD, first checking the cache.
186
+
187
+ This supports loading multiple species in one go by using the same notation as the NIST ASD page.
188
+
189
+ Note however that cache keys are computed for unique options for `species` and `wl_range`.
190
+
191
+ This means that you won't get caching benefits by using different queries.
192
+
193
+ In other words: the cache cannot deduplicate queries such as `ASD.fetch('H', (200,1000))` followed by `ASD.fetch('H I', (650,660))`.
194
+
195
+ Both these operations will fetch data online and be stored as separate cache entries.
196
+ """
197
+ query_params = {
198
+ "spectra": species,
199
+ "output_type": 0,
200
+ "low_w": min(wl_range),
201
+ "upp_w": max(wl_range),
202
+ **self.query_params,
203
+ }
204
+ response = self.session.get(self.nist_url, params=query_params)
205
+
206
+ # if response.status_code == 200:
207
+ response.raise_for_status()
208
+ return self.create_dataframe(response)
209
+ # else:
210
+ # print(f"Error: Received status code {response.status_code}")
211
+ # print(response.url)
212
+ # return response
213
+
214
+ def create_dataframe(self, response) -> "pd.DataFrame|pl.DataFrame":
215
+ """Create a dataframe from the (cached) NIST ASD response, using the chosen backend at class instantiation."""
216
+ if self.use_polars:
217
+ return self._from_polars(response)
218
+ return self._from_pandas(response)
219
+
220
+ @classmethod
221
+ def _from_pandas(cls, response: "CachedResponse") -> "pd.DataFrame":
222
+ r"""Transform a (cached) NIST ASD response into a pandas DataFrame.
223
+
224
+ Calculates the air equivalent wavelength from the vacuum wavelength using the same Sellmeier equation as the NIST ASD.
225
+
226
+ Note that this conversion is only performed for lines with $200 nm < \lambda < 2000 nm$, like the ASD.
227
+
228
+ For lines outside of this range, the conversion falls back to their vacuum wavelength.
229
+ """
230
+ schema = {
231
+ "obs_wl_vac(nm)": str,
232
+ "ritz_wl_vac(nm)": str,
233
+ "wn(cm-1)": float,
234
+ "intens": str,
235
+ "Aki(s^-1)": float,
236
+ "fik": float,
237
+ "S(a.u.)": float,
238
+ "log_gf": float,
239
+ "Acc": str,
240
+ "Ei(cm-1)": str,
241
+ "Ek(cm-1)": str,
242
+ "conf_i": str,
243
+ "conf_k": str,
244
+ "term_i": str,
245
+ "term_k": str,
246
+ "g_i": float,
247
+ "g_k": float,
248
+ "J_i": str,
249
+ "J_k": str,
250
+ "": str,
251
+ }
252
+ df = pd.read_csv(StringIO(response.text), sep="\t", dtype=schema)
253
+ for col in ["obs_wl_vac(nm)", "ritz_wl_vac(nm)", "intens", "Ei(cm-1)", "Ek(cm-1)"]:
254
+ df[col] = df.loc[:, col].str.extract(r"([+-]?\d*\.?\d+(?:[eE][+-]?\d+)?)").astype(float)
255
+ df["Type"] = df.loc[:, "Type"].astype(str).replace("nan", "E1")
256
+ df["tp_ref"] = df.loc[:, "tp_ref"].fillna("")
257
+ df["obs_wl_air(nm)"] = df["obs_wl_vac(nm)"]
258
+ df["obs_wl_air(nm)"] = df[df["wn(cm-1)"].between(5000, 50000)]["obs_wl_air(nm)"] / cls.wn_to_n_refractive(
259
+ df[df["wn(cm-1)"].between(5000, 50000)]["wn(cm-1)"]
260
+ )
261
+ df["ritz_wl_air(nm)"] = df["ritz_wl_vac(nm)"]
262
+ df["ritz_wl_air(nm)"] = df[df["wn(cm-1)"].between(5000, 50000)]["ritz_wl_air(nm)"] / cls.wn_to_n_refractive(
263
+ df[df["wn(cm-1)"].between(5000, 50000)]["wn(cm-1)"]
264
+ )
265
+ df = df.drop([c for c in df.columns if "Unnamed" in c], axis=1).reset_index(drop=True)
266
+ if "element" not in df.columns:
267
+ expr = re.compile(r"spectra=([\w]+)\+?([IVX]+)?")
268
+ element, numeral = expr.search(response.url).groups()
269
+ df["element"] = element
270
+ df["sp_num"] = numeral
271
+ # cast roman numerals to int for consistency with queries with multiple ionization states, e.g. Ar I vs Ar I-II
272
+ df["sp_num"] = df["sp_num"].map(cls.roman_to_int)
273
+ df = (
274
+ df.assign(unc_obs_wl=df["unc_obs_wl"].astype(float), unc_ritz_wl=df["unc_ritz_wl"].astype(float))
275
+ if "unc_obs_wl" in df.columns
276
+ else df.assign(unc_obs_wl=np.nan, unc_ritz_wl=np.nan)
277
+ )
278
+ return df.loc[:, cls.column_order]
279
+
280
+ @classmethod
281
+ def _from_polars(cls, response: "CachedResponse") -> "pl.DataFrame":
282
+ r"""Transform a (cached) NIST ASD response into a polars DataFrame.
283
+
284
+ Calculates the air equivalent wavelength from the vacuum wavelength using the same Sellmeier equation as the NIST ASD.
285
+
286
+ Note that this conversion is only performed for lines with $200 nm < \lambda < 2000 nm$, like the ASD.
287
+
288
+ For lines outside of this range, the conversion falls back to their vacuum wavelength.
289
+ """
290
+ schema = {
291
+ "obs_wl_vac(nm)": pl.String,
292
+ "ritz_wl_vac(nm)": pl.String,
293
+ "wn(cm-1)": pl.Float64,
294
+ "intens": pl.String,
295
+ "Aki(s^-1)": pl.Float64,
296
+ "fik": pl.Float64,
297
+ "S(a.u.)": pl.Float64,
298
+ "log_gf": pl.Float64,
299
+ "Acc": pl.String,
300
+ "Ei(cm-1)": pl.String,
301
+ "Ek(cm-1)": pl.String,
302
+ "conf_i": pl.String,
303
+ "conf_k": pl.String,
304
+ "term_i": pl.String,
305
+ "term_k": pl.String,
306
+ "g_i": pl.Float64,
307
+ "g_k": pl.Float64,
308
+ "J_i": pl.String,
309
+ "J_k": pl.String,
310
+ "": pl.String,
311
+ }
312
+ # annotation_chars_to_strip = "(?i)()[]?*w,bGhilmprsq:+xzgacHd "
313
+ df = (
314
+ pl.read_csv(
315
+ StringIO(response.text),
316
+ separator="\t",
317
+ schema_overrides=schema,
318
+ null_values="",
319
+ )
320
+ .with_columns(
321
+ pl.col("obs_wl_vac(nm)", "Ei(cm-1)", "Ek(cm-1)", "intens")
322
+ # .str.strip_chars(annotation_chars_to_strip).str.replace("&dagger;", "", literal=True)
323
+ .str.extract(r"([+-]?\d*\.?\d+(?:[eE][+-]?\d+)?)")
324
+ # .str.extract(r"([+-]?\d*\.?\d+e[+-]?\d+)")
325
+ .replace("", None)
326
+ .cast(pl.Float64),
327
+ pl.col("ritz_wl_vac(nm)").str.strip_chars('"+*').replace("", None).cast(pl.Float64),
328
+ pl.col("S(a.u.)").cast(pl.Float64),
329
+ pl.col("Type").replace(None, "E1"),
330
+ pl.col("tp_ref").replace(None, ""),
331
+ )
332
+ .drop([""])
333
+ ).with_columns(
334
+ pl.when(pl.col("wn(cm-1)").is_between(5000, 50000))
335
+ .then(
336
+ pl.col("obs_wl_vac(nm)").cast(pl.Float64)
337
+ / pl.col("wn(cm-1)").map_elements(cls.wn_to_n_refractive, return_dtype=pl.Float64)
338
+ )
339
+ .otherwise(pl.col("obs_wl_vac(nm)"))
340
+ .cast(pl.Float64)
341
+ .alias("obs_wl_air(nm)"),
342
+ pl.when(pl.col("wn(cm-1)").is_between(5000, 50000))
343
+ .then(
344
+ pl.col("ritz_wl_vac(nm)").cast(pl.Float64)
345
+ / pl.col("wn(cm-1)").map_elements(cls.wn_to_n_refractive, return_dtype=pl.Float64)
346
+ )
347
+ .otherwise(pl.col("ritz_wl_vac(nm)"))
348
+ .cast(pl.Float64)
349
+ .alias("ritz_wl_air(nm)"),
350
+ )
351
+ if "element" not in df.columns:
352
+ expr = re.compile(r"spectra=([\w]+)\+?([IVX]+)?")
353
+ element, numeral = expr.search(response.url).groups()
354
+ # cast roman numerals to int for consistency with queries with multiple ionization states, e.g. Ar I vs Ar I-II
355
+ df = df.with_columns(
356
+ pl.lit(element).alias("element"),
357
+ pl.lit("I" if numeral is None else numeral)
358
+ .cast(pl.String)
359
+ .alias("sp_num")
360
+ .map_elements(cls.roman_to_int, return_dtype=pl.Int64),
361
+ )
362
+ df = (
363
+ df.with_columns(pl.col("unc_obs_wl").cast(pl.Float64), pl.col("unc_ritz_wl").cast(pl.Float64))
364
+ if "unc_obs_wl" in df.columns
365
+ else df.with_columns(
366
+ pl.lit(None).cast(pl.Float64).alias("unc_obs_wl"), pl.lit(None).cast(pl.Float64).alias("unc_ritz_wl")
367
+ )
368
+ )
369
+
370
+ return df.select(*cls.column_order)
371
+
372
+ @staticmethod
373
+ def roman_to_int(roman: str) -> int:
374
+ """Transform Roman numerals to integers.
375
+
376
+ Does only support numerals including up to `L`.
377
+ """
378
+ roman_numerals = {"I": 1, "V": 5, "X": 10, "L": 50}
379
+ total = 0
380
+ previous = 0
381
+ for char in reversed(roman):
382
+ current_value = roman_numerals[char]
383
+ if current_value < previous:
384
+ total -= current_value # Subtract if the current value is less than the previous value
385
+ else:
386
+ total += current_value
387
+ previous = current_value
388
+ return total
389
+
390
+ @staticmethod
391
+ def wn_to_n_refractive(wavenumbers: float) -> float:
392
+ r"""Calculate the refractive index $n$ in air for a transition, using the 5-term Sellmeier formula used by NIST.
393
+
394
+ The used Sellmeier formula is the one from E.R. Peck and K. Reeder [J. Opt. Soc. Am. 62, 958 (1972)](http://dx.doi.org/10.1364/JOSA.62.000958).
395
+
396
+ This formula is fitted to data in the range of 185 nm to 1700 nm for air at 15 °C, 101 325 Pa pressure, with 0.033 % CO2.
397
+
398
+ This is the same formula used by the NIST ASD to calculate air wavelengths in the interval of 200 nm to 2000 nm.
399
+
400
+ See also [the ASD documentation on the topic](https://physics.nist.gov/PhysRefData/ASD/Html/lineshelp.html#Conversion%20between%20air%20and%20vacuum%20wavelengths).
401
+
402
+ Using this refractive index, air equivalent wavelengths consistent with the ASD can be calculated, without the need to query them separately.
403
+ """
404
+ sigma = wavenumbers * 1e-4 # um^-1
405
+ return 1 + 1e-8 * (8060.51 + 2480990 / (132.274 - sigma**2) + 17455.7 / (39.32957 - sigma**2))
406
+
407
+ def get_all_cached(self) -> "pd.DataFrame|pl.DataFrame":
408
+ """Retrieve all cached data into a single dataframe."""
409
+ cached_frames = [self.create_dataframe(cached) for cached in self.session.cache.filter()]
410
+ if self.use_polars:
411
+ return pl.concat(cached_frames).unique()
412
+ return pd.concat(cached_frames).drop_duplicates().reset_index(drop=True)
413
+
414
+
415
+ class BibCache:
416
+ r"""A class for handling lookups of bibliographic metadata from the NIST ASD.
417
+
418
+ Supports both bibliographic reference databases curated by NIST:
419
+
420
+ * Atomic Transition Probability Bibliographic Database: [10.18434/T46C7N](https://doi.org/10.18434/T46C7N)
421
+ * Atomic Energy Levels and Spectral Bibliographic Database: [10.18434/T40K53](https://doi.org/10.18434/T40K53)
422
+
423
+ References to these databases in the NIST ASD data can be looked up and will be cached.
424
+ """
425
+
426
+ nist_url = "https://physics.nist.gov/cgi-bin/ASBib1/get_ASBib_ref.cgi"
427
+ reference_expr = re.compile(r"([A-Z])?([\d]+)?([a-z]+[\d]*)?")
428
+
429
+ def __init__(self, cache_expiry=timedelta(weeks=1)):
430
+ """Initialize an instance that handles cached retrieval of ASD bibliographic references."""
431
+ self.cache_expiry = cache_expiry
432
+ self.session = CachedSession(
433
+ "NIST_ASD_Bibliography_cache",
434
+ use_cache_dir=True,
435
+ expire_after=cache_expiry,
436
+ stale_if_error=True,
437
+ filter_fn=self._check_response_success,
438
+ ignored_parameters=["element", "spectr_charge", "type", "ref"],
439
+ )
440
+
441
+ @staticmethod
442
+ def _check_response_success(response: "CachedResponse") -> bool:
443
+ """Validate that data has been fetched succesfully.
444
+
445
+ If this check fails, the cache should not update with this response, even when marked as stale.
446
+ """
447
+ is_success = (response.status_code == 200) & (b"There was a problem" not in response.content)
448
+ if not is_success:
449
+ logging.warning(f"Request was unsuccesful status:{response.status_code} , url:{response.url}")
450
+ return is_success
451
+
452
+ @classmethod
453
+ def parse_reference_code(cls, reference_code: str) -> tuple[str, Optional[str], str]:
454
+ r"""Parse a reference code from the NIST ASD into the constituent parts that can be used to look up references.
455
+
456
+ Args:
457
+ * reference_code (str): A NIST ASD bibliographic reference string, such as `L13456n3`, or `T6936n`.
458
+
459
+ Returns:
460
+ * db (str) : A label for which bibliographic database to target
461
+ * ref (str) : The database ID for the reference to look up
462
+ * comment (str) : An additional comment included in the reference, can be fetched separately.
463
+ """
464
+ if reference_code.startswith("n"):
465
+ db, ref, comment = "T", None, "n"
466
+ elif (not reference_code.startswith("LS")) & (cls.reference_expr.match(reference_code) is not None):
467
+ db, ref, comment = cls.reference_expr.match(reference_code).groups()
468
+ comment = comment if "LS" not in reference_code else "LS"
469
+ else:
470
+ db, ref, comment = "T", None, "LS"
471
+ return db, ref, comment if comment is not None else ""
472
+
473
+ def lookup(self, element: str, sp_num: int, reference_code: str) -> dict[str, Any]:
474
+ """Look up a reference code for a given element state.
475
+
476
+ Args:
477
+ element (str) : The element name, e.g. `H`
478
+ sp_num (int) : The ionization state of the element, with 1 corresponding to the atom
479
+ reference_code (str) : The bibliographic reference code from the ASD columns `tp_ref` or `line_ref`.
480
+
481
+ Returns:
482
+ bib_data (dict) : A dictionary containing bibliographic metadata for the reference, if available/applicable. Contains a url to look it up.
483
+ """
484
+ db, ref, comment = self.parse_reference_code(reference_code)
485
+ params = {
486
+ "db": "tp" if db == "T" else "el",
487
+ "db_id": ref,
488
+ "comment_code": "",
489
+ "element": element,
490
+ "spectr_charge": sp_num,
491
+ }
492
+ if ref is not None:
493
+ response = self.session.get(self.nist_url, params=params)
494
+ response.raise_for_status()
495
+ soup = BeautifulSoup(response.text, features="html.parser")
496
+ title = soup.find("font", {"size": "+1"})
497
+ doi = soup.find("a", {"id": "ad"})
498
+ authors = soup.find_all("a", {"id": "aa"})
499
+ title = "" if title is None else title.text.replace("\xa0", " ").strip()
500
+ doi = "" if doi is None else doi.text.strip()
501
+ authors = authors if authors == [] else [author.text.replace("\xa0", " ").strip() for author in authors]
502
+ text = "\n".join([tr.text.strip() for tr in soup.find("table").find_all("tr")]).strip()
503
+ url = (
504
+ response.url.replace("REDACTED", f"{element}", 1).replace("REDACTED", f"{sp_num}", 1)
505
+ + f"&comment_code={comment}"
506
+ )
507
+ else:
508
+ title = ""
509
+ doi = ""
510
+ authors = []
511
+ text = ""
512
+ url = None
513
+
514
+ # separately look up comments such that we benefit from the cache here as well
515
+ if comment != "":
516
+ comment_params = {
517
+ "db": "tp" if db == "T" else "el",
518
+ "db_id": "",
519
+ "comment_code": comment,
520
+ "element": "H", # not cached
521
+ "spectr_charge": 1, # not cached
522
+ }
523
+ comment_response = self.session.get(self.nist_url, params=comment_params)
524
+ comment_response.raise_for_status()
525
+ text += BeautifulSoup(comment_response.text, features="html.parser").table.find("td", {"colspan": "2"}).text
526
+ url = (
527
+ comment_response.url.replace("REDACTED", f"{element}", 1).replace("REDACTED", f"{sp_num}", 1)
528
+ + f"&db_id={'' if ref is None else ref}"
529
+ )
530
+
531
+ bib_data = {
532
+ "title": title,
533
+ "doi": doi,
534
+ "authors": authors,
535
+ "text": text,
536
+ "url": url,
537
+ }
538
+ return bib_data
ASDCache/__init__.py ADDED
@@ -0,0 +1,10 @@
1
+ """ASDCache is a module to retrieve data from the NIST Atomic Spectra Database that uses caching for fast local access.
2
+
3
+ To make the most use out of the cache, `ASDCache` is opinionated in the information it retrieves from the ASD; it always requests the same schema of information and locally computes additional fields.
4
+
5
+ The `SpectraCache` class acts as the entrypoint to retrieve this data.
6
+ """
7
+
8
+ from .ASDCache import SpectraCache, BibCache
9
+
10
+ __all__ = ["SpectraCache", "BibCache"]
ASDCache/_version.py ADDED
@@ -0,0 +1,16 @@
1
+ # file generated by setuptools_scm
2
+ # don't change, don't track in version control
3
+ TYPE_CHECKING = False
4
+ if TYPE_CHECKING:
5
+ from typing import Tuple, Union
6
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
7
+ else:
8
+ VERSION_TUPLE = object
9
+
10
+ version: str
11
+ __version__: str
12
+ __version_tuple__: VERSION_TUPLE
13
+ version_tuple: VERSION_TUPLE
14
+
15
+ __version__ = version = '0.2.0'
16
+ __version_tuple__ = version_tuple = (0, 2, 0)
@@ -0,0 +1,137 @@
1
+ Metadata-Version: 2.4
2
+ Name: ASDCache
3
+ Version: 0.2.0
4
+ Summary: A Python module to retrieve data from the NIST Atomic Spectra Database (ASD), using caching for fast, efficient data handling
5
+ Project-URL: Documentation, https://antoinetue.github.io/asdcache
6
+ Project-URL: Source, https://github.com/AntoineTUE/asdcache
7
+ Author-email: Antoine Salden <t.p.w.salden@tue.nl>
8
+ License: MIT
9
+ License-File: LICENSE
10
+ Keywords: ASD,Atomic Spectra Database,NIST,atoms,spectra,spectroscopy,spectrum
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Operating System :: OS Independent
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Topic :: Scientific/Engineering
20
+ Requires-Python: >=3.9
21
+ Requires-Dist: bs4
22
+ Requires-Dist: numpy
23
+ Requires-Dist: pandas
24
+ Requires-Dist: requests
25
+ Requires-Dist: requests-cache
26
+ Provides-Extra: docs
27
+ Requires-Dist: black; extra == 'docs'
28
+ Requires-Dist: mkdocs; extra == 'docs'
29
+ Requires-Dist: mkdocs-autorefs; extra == 'docs'
30
+ Requires-Dist: mkdocs-gen-files; extra == 'docs'
31
+ Requires-Dist: mkdocs-git-revision-date-localized-plugin; extra == 'docs'
32
+ Requires-Dist: mkdocs-include-markdown-plugin; extra == 'docs'
33
+ Requires-Dist: mkdocs-jupyter; extra == 'docs'
34
+ Requires-Dist: mkdocs-literate-nav; extra == 'docs'
35
+ Requires-Dist: mkdocs-material; extra == 'docs'
36
+ Requires-Dist: mkdocs-section-index; extra == 'docs'
37
+ Requires-Dist: mkdocstrings; extra == 'docs'
38
+ Requires-Dist: mkdocstrings-python; extra == 'docs'
39
+ Provides-Extra: polars
40
+ Requires-Dist: polars; extra == 'polars'
41
+ Description-Content-Type: text/markdown
42
+
43
+ # ASDCache
44
+
45
+ ![ASDCache logo](./docs/assets/logo.svg)
46
+
47
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.14673488.svg)](https://doi.org/10.5281/zenodo.14673488)
48
+ [![GitHub License](https://img.shields.io/github/license/AntoineTUE/ASDCache)](https//www.github.com/AntoineTUE/ASDCache/blob/main/LICENSE)
49
+ [![GitHub Workflow Status build](https://img.shields.io/github/actions/workflow/status/AntoineTUE/ASDCache/build.yml?label=PyPI%20build)](https://pypi.python.org/pypi/ASDCache)
50
+ [![GitHub Workflow Status docs](https://img.shields.io/github/actions/workflow/status/AntoineTUE/ASDCache/documentation.yml?label=Documentation%20build)](https://antoinetue.github.io/ASDCache)
51
+ [![PyPI - Version](https://img.shields.io/pypi/v/ASDCache)](https://pypi.python.org/pypi/ASDCache)
52
+ [![PyPI - Python versions](https://img.shields.io/pypi/pyversions/ASDCache.svg)](https://pypi.python.org/pypi/ASDCache)
53
+ [![PyPI - Downloads](https://img.shields.io/pypi/dw/ASDCache)](https://pypistats.org/packages/ASDCache)
54
+ [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
55
+ [![Hatch project](https://img.shields.io/badge/%F0%9F%A5%9A-Hatch-4051b5.svg)](https://github.com/pypa/hatch)
56
+
57
+ `ASDCache` is a Python project to retrieve data from the NIST Atomic Spectra Database (ASD), using caching for fast, efficient data handling.
58
+
59
+ To make the most use out of the cache, `ASDCache` is opinionated in the information it retrieves from the ASD; it always requests the same schema of information and locally computes additional fields, to provide a more 'machine-useable' experience.
60
+
61
+ It also coerces most of the retrieved data to be of a strictly numeric type, which strips out footnotes and annotations, but preserves e.g. bibliographic reference labels.
62
+
63
+ You should thus still be sure to check and attribute the NIST ASD when making use of `ASDCache`!
64
+
65
+ The main goals and benefits of `ASDCache` are:
66
+
67
+ - [x] Make the data from the NIST ASD locally accessible as a Dataframe for use in analysis of spectra
68
+ - [x] Retrieve a consistent schema of the data that represents the 'human readable' format, but enforce strictly numeric data for important columns
69
+ - [ ] This removes footnotes and other annotations, be sure to check the ASD itself as well for this information.
70
+ - [x] Use caching to dramatically speed up data retrieval, from minutes down to milliseconds in some cases
71
+ - [x] Cache time-to-live is 1 week by default, meaning you still get updates to the ASD in a reasonable time frame
72
+ - [x] The cache time-to-live can be adjusted
73
+ - [x] Cache data to allow working offline, or even transfering the ASD data to an offline system.
74
+ - [x] The cache is only updated when a request for new data succeeds
75
+ - [x] Limit repeated queries for the same information, avoiding network overhead and server load.
76
+
77
+ `ASDCache` is not affiliated with NIST or the NIST ASD in any way, it simply tries to help make it more accessible.
78
+
79
+ ## Installing
80
+ `ASDCache` can be installed with `pip`.
81
+
82
+ ```console
83
+ pip install ASDCache
84
+ ```
85
+
86
+ Further optional features can be installed by specifying the `polars` or `docs` feature flag, as defined in [pyproject.toml](https://github.com/AntoineTUE/ASDCache/blob/main/pyproject.toml).
87
+
88
+ To install all dependencies to locally serve and update the documentation for instance, you can run:
89
+
90
+ ```console
91
+ pip install ASDCache[docs]
92
+ ```
93
+
94
+ Installing the `polars` feature is not required, in case `polars` is already installed in the active environment, it is possible to use `polars` instead of `pandas` as a Dataframe backend for `ASDCache`.
95
+
96
+ ## Documentation
97
+
98
+ Documentation for `ASDCache` is available on [this page](https://antoinetue.github.io/ASDCache).
99
+
100
+ ### Example
101
+ A brief example below demonstrates how to use `SpectraCache` to query the NIST ASD for spectroscopic data for different species and plot their respective relative intensities.
102
+
103
+ Note that these relative intensities are in principle not comparable between different species or sources and merely serve as a guide.
104
+
105
+ More elaborate examples can be found in the [example section of the documentation](https://antoinetue.github.io/ASDCache/examples)
106
+
107
+ ```python
108
+ from ASDCache import SpectraCache, BibCache
109
+ import matplotlib.pyplot as plt
110
+
111
+ nist = ASDCache()
112
+ lines_H_I = nist.fetch("H I")
113
+
114
+
115
+ plt.plot(lines_H_I['obs_wl_air(nm)'], lines_H_I['intens'], label=f"{lines_H_I['element'].unique()[0]} {lines_H_I['sp_num'].unique()[0]}")
116
+
117
+
118
+ nist.fetch("O I-III") # caches data from NIST but does not assign to a variable
119
+
120
+ # Oxygen I-III will still be plotted, each ionization state separately.
121
+ lines_all_cached = nist.get_all_cached()
122
+ for species,lines in lines_all_cached.groupby(["element","sp_num"]):
123
+ plt.plot(lines['obs_wl_air(nm)'], lines['intens'], label=f"{species[0]} {species[1]}", marker='x', ls='none')
124
+ plt.legend()
125
+ ```
126
+
127
+ ## Citing
128
+
129
+ Be sure to cite the NIST ASD when using `ASDCache` in your work, since it is the source of the data.
130
+
131
+ `ASDCache` itself can be cited using the following DOI provided via Zenodo: [10.5281/zenodo.14673488](https://doi.org/10.5281/zenodo.14673488)
132
+
133
+ See also [this page for more information](https://antoinetue.github.io/ASDCache/citing)
134
+
135
+ ## License
136
+
137
+ ASDCache is licensed under the MIT license.
@@ -0,0 +1,7 @@
1
+ ASDCache/ASDCache.py,sha256=y8u3YGLfb4PaWaw3nI4YzALoAqBmoUxgfoa34iA7q08,23392
2
+ ASDCache/__init__.py,sha256=paLOzphfIp08KMG2lLKUlDAy9ywaucszgDuMvw4rqnw,484
3
+ ASDCache/_version.py,sha256=H-qsvrxCpdhaQzyddR-yajEqI71hPxLa4KxzpP3uS1g,411
4
+ asdcache-0.2.0.dist-info/METADATA,sha256=_7TqlNsgDlHa44qRRvBaD1MDvysSQvjc2VA-IhMKBtM,7078
5
+ asdcache-0.2.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
6
+ asdcache-0.2.0.dist-info/licenses/LICENSE,sha256=rK2sbb9pYa-j_eVCPSax1rrR895GVRCCVvbo3vcDkxY,1072
7
+ asdcache-0.2.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.27.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024, Antoine Salden
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.