ASDCache 0.2.2__tar.gz → 0.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {asdcache-0.2.2 → asdcache-0.2.3}/ASDCache/ASDCache.py +115 -110
- asdcache-0.2.3/ASDCache/__init__.py +61 -0
- asdcache-0.2.3/ASDCache/_version.py +24 -0
- {asdcache-0.2.2 → asdcache-0.2.3}/PKG-INFO +16 -16
- {asdcache-0.2.2 → asdcache-0.2.3}/pyproject.toml +55 -29
- asdcache-0.2.2/ASDCache/__init__.py +0 -10
- asdcache-0.2.2/ASDCache/_version.py +0 -21
- {asdcache-0.2.2 → asdcache-0.2.3}/.gitignore +0 -0
- {asdcache-0.2.2 → asdcache-0.2.3}/LICENSE +0 -0
|
@@ -1,43 +1,12 @@
|
|
|
1
|
-
|
|
1
|
+
"""The ASDCache module.
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
Data is initially fetched from the online published NIST page, using the tab-separated ASCII output format.
|
|
6
|
-
The benefit of this format is that it is more 'machine readable' than the formatted ASCII of HTML options.
|
|
7
|
-
This means it requires far less bespoke parsing to get rid of 'human readable' features such as repeated page column headers, or empty lines.
|
|
8
|
-
To ensure a consistent schema of the retrieved data, lines are always retrieved as a function of wavelength, using `vacuum wavelength`, even between 200 to 2000 nm.
|
|
9
|
-
Wavenumbers and Ritz wavelength will be included in the response.
|
|
10
|
-
|
|
11
|
-
In the range $5000 \mathrm{cm}^{-1}<\nu<50000 \mathrm{cm}^{-1}$ the air equivalent observed and Ritz wavelengths are calculated using the same Sellmeier equation as the NIST ASD (see [here][ASDcache.readASD.ASDCache.wn_to_n_refractive]).
|
|
12
|
-
This is consistent with the approach of the ASD.
|
|
13
|
-
|
|
14
|
-
Each response from the NIST page is cached (1 week by default) on the local system.
|
|
15
|
-
This makes it much faster to load the same data, even across different script runs and/or user programs/sessions.
|
|
16
|
-
As an example: reading all spectra between 200 and 1000 nm can take over 2 minutes without using the cache, but can be as fast as 0.2 seconds using the `polars` backend.
|
|
17
|
-
In addition, it means that an internet connection is not required after initial data fetching.
|
|
18
|
-
The cached response is only updated upon succesfull retrieval of a new response of the NIST page.
|
|
19
|
-
If unable to succesfully fetch new data, we fall back to a 'stale' cached response.
|
|
20
|
-
|
|
21
|
-
The cache can be shared to another system, to give offline/airgapped systems access to the same data.
|
|
22
|
-
To that end, the file `NIST_ASD_cache.sqlite` in the user's cache directory has to be copied over.
|
|
23
|
-
|
|
24
|
-
The standard cache directories are as follows:
|
|
25
|
-
|
|
26
|
-
=== "Windows"
|
|
27
|
-
`%USERPROFILE%/AppData/Local`
|
|
28
|
-
=== "Linux"
|
|
29
|
-
`~/.cache/http_cache/`
|
|
30
|
-
=== "MacOS"
|
|
31
|
-
`/Users/user/Library/Caches/http_cache/`
|
|
32
|
-
|
|
33
|
-
Queries to the NIST ASD are hashed by the keys (or parameters) of the requests.
|
|
34
|
-
This means that any change to either one of these parameters, will result in a new cache entry, even if the returned data is equivalent.
|
|
3
|
+
It contains both the [SpectraCache][(m).] and [BibCache][(m).] classes which allow you to interact with the ASD and the relevant bibliographic databases.
|
|
35
4
|
"""
|
|
36
5
|
|
|
37
|
-
import importlib
|
|
6
|
+
import importlib.util
|
|
38
7
|
import warnings
|
|
39
8
|
import pandas as pd
|
|
40
|
-
from requests_cache import CachedSession,
|
|
9
|
+
from requests_cache import CachedSession, Response
|
|
41
10
|
from io import StringIO
|
|
42
11
|
from datetime import timedelta
|
|
43
12
|
import re
|
|
@@ -45,7 +14,7 @@ import numpy as np
|
|
|
45
14
|
from bs4 import BeautifulSoup
|
|
46
15
|
import sys
|
|
47
16
|
import logging
|
|
48
|
-
from typing import Any, Optional
|
|
17
|
+
from typing import Any, Optional, Union
|
|
49
18
|
|
|
50
19
|
if importlib.util.find_spec("polars"):
|
|
51
20
|
POLARS_AVAILABLE = True
|
|
@@ -98,6 +67,10 @@ SCI_EXPR = r"([+-]?\d*\.?\d+(?:[eE][+-]?\d+)?)"
|
|
|
98
67
|
"""Regex pattern for processing scientific notation"""
|
|
99
68
|
|
|
100
69
|
|
|
70
|
+
class ASDQueryError(Exception):
|
|
71
|
+
"""Exception raised when the NIST ASD has indicated an error with a query."""
|
|
72
|
+
|
|
73
|
+
|
|
101
74
|
class SpectraCache:
|
|
102
75
|
"""A class acting as the entrypoint to retrieve data from the NIST Atomic Spectra Database that uses caching.
|
|
103
76
|
|
|
@@ -115,6 +88,7 @@ class SpectraCache:
|
|
|
115
88
|
nist_url = "https://physics.nist.gov/cgi-bin/ASD/lines1.pl"
|
|
116
89
|
species_expr = re.compile(r"spectra=([\w\+\-\%3]+)&")
|
|
117
90
|
query_params = {
|
|
91
|
+
"submit": "Retrieve Data",
|
|
118
92
|
"unit": 1,
|
|
119
93
|
"de": 0,
|
|
120
94
|
"plot_out": 0,
|
|
@@ -143,10 +117,11 @@ class SpectraCache:
|
|
|
143
117
|
"enrg_out": "on",
|
|
144
118
|
"J_out": "on",
|
|
145
119
|
"g_out": "on",
|
|
146
|
-
"diag_out": "on",
|
|
120
|
+
# "diag_out": "on", # avoid diagnostic data, it leads to multi-species queries failing; which can appear as if keys below are needed. See issue #1
|
|
147
121
|
"allowed_out": 1,
|
|
148
122
|
"forbid_out": 1,
|
|
149
|
-
"
|
|
123
|
+
# "show_diff_obs_calc": 1, # Does not appear mandatory in retrospect, see issue #1
|
|
124
|
+
# "include_Ritz_E1": 1, # Does not appear mandatory in retrospect, see issue #1
|
|
150
125
|
}
|
|
151
126
|
"""Request parameters used by the NIST ASD form."""
|
|
152
127
|
column_order = [
|
|
@@ -209,7 +184,7 @@ class SpectraCache:
|
|
|
209
184
|
"""
|
|
210
185
|
return self.session.settings.expire_after
|
|
211
186
|
|
|
212
|
-
def set_cache_expiry(self, new: timedelta = None, **kwargs):
|
|
187
|
+
def set_cache_expiry(self, new: Optional[timedelta] = None, **kwargs):
|
|
213
188
|
"""Set the cache expiry to a different interval (default: 1 week).
|
|
214
189
|
|
|
215
190
|
Can be done by either passing in a `timedelta` object, or valid keyword arguments for `timedelta` itself.
|
|
@@ -219,12 +194,58 @@ class SpectraCache:
|
|
|
219
194
|
self.session.settings.expire_after = new
|
|
220
195
|
|
|
221
196
|
@staticmethod
|
|
222
|
-
def _check_response_success(response:
|
|
197
|
+
def _check_response_success(response: Response) -> bool:
|
|
223
198
|
"""Validate that data has been fetched succesfully.
|
|
224
199
|
|
|
225
200
|
If this check fails, the cache should not update with this response, even when marked as stale.
|
|
201
|
+
|
|
202
|
+
The first obvious way to check success is if an error is indicated by the HTTP status code.
|
|
203
|
+
|
|
204
|
+
However, when a query for data is incorrect, the NIST ASD returns a HTML page indicating `<title>NIST ASD : Input Error</title>` in the `<head>` tag, or "Error Message".
|
|
205
|
+
|
|
206
|
+
A successfull query would not receive HTML as a response, but raw ASCII values instead.
|
|
207
|
+
|
|
208
|
+
We can thus check for the start of a HTML document.
|
|
209
|
+
|
|
210
|
+
Note that this only works for data queries, not for bibliographic metadata by `BibCache`.
|
|
226
211
|
"""
|
|
227
|
-
return
|
|
212
|
+
return not (
|
|
213
|
+
not response.ok or response.content.startswith(b"<!DOCTYPE") or b"Error Message" in response.content
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
def _get_data(self, species: str, wl_range: tuple[float, float] = (170, 1000), **kwargs) -> Response:
|
|
217
|
+
"""Retrieve raw, ASCII-formatted data from the NIST ASD with a GET request.
|
|
218
|
+
|
|
219
|
+
To retrieve data and parse it into a DataFrame, use [fetch][..] instead.
|
|
220
|
+
|
|
221
|
+
Returns the raw response, which will be cached if it contains valid data (see [_check_response_success][..]).
|
|
222
|
+
|
|
223
|
+
If the response does not contain ASCII data, but HTML instead, an [ASDQueryError][(m).] will be raised.
|
|
224
|
+
|
|
225
|
+
It is possible to override any standard query parameter (see [query_params][..]]) by passing them as kwargs.
|
|
226
|
+
"""
|
|
227
|
+
query_params = {
|
|
228
|
+
"spectra": species,
|
|
229
|
+
"output_type": 0,
|
|
230
|
+
"low_w": min(wl_range),
|
|
231
|
+
"upp_w": max(wl_range),
|
|
232
|
+
**{k: v for k, v in self.query_params.items() if k not in kwargs},
|
|
233
|
+
**{k: v for k, v in kwargs.items() if k in self.query_params},
|
|
234
|
+
}
|
|
235
|
+
response: Response = self.session.get(self.nist_url, params=query_params)
|
|
236
|
+
response.raise_for_status()
|
|
237
|
+
# Check if response is not a HTML document instead of ASCII formatted data, indicating query error.
|
|
238
|
+
if response.content.startswith(b"<!DOCTYPE"):
|
|
239
|
+
logging.error(
|
|
240
|
+
"NIST ASD responded with HTML instead of ASCII-data for species=%s, wl_range=%s\nQuery: %s",
|
|
241
|
+
species,
|
|
242
|
+
wl_range,
|
|
243
|
+
response.url,
|
|
244
|
+
)
|
|
245
|
+
raise ASDQueryError(
|
|
246
|
+
f"Query for {species=} {wl_range=} did not receive ASCII-data. This means the ASD could not interpret your query. Check if your query is malformed."
|
|
247
|
+
)
|
|
248
|
+
return response
|
|
228
249
|
|
|
229
250
|
@property
|
|
230
251
|
def cached_species(self) -> list[str]:
|
|
@@ -239,35 +260,21 @@ class SpectraCache:
|
|
|
239
260
|
for elem in self.species_expr.search(u).group(1).split("%3B")
|
|
240
261
|
]
|
|
241
262
|
|
|
242
|
-
def fetch(self, species, wl_range=(170, 1000), **kwargs) -> "pd.DataFrame|pl.DataFrame
|
|
263
|
+
def fetch(self, species, wl_range=(170, 1000), **kwargs) -> "pd.DataFrame|pl.DataFrame":
|
|
243
264
|
"""Fetch information on a species from the ASD, first checking the cache.
|
|
244
265
|
|
|
245
|
-
This supports loading multiple species in one go by using the same notation as the NIST ASD
|
|
266
|
+
This supports loading multiple species in one go by using the same notation as the NIST ASD form.
|
|
246
267
|
|
|
247
268
|
Note however that cache keys are computed for unique options for `species` and `wl_range`.
|
|
248
269
|
|
|
249
270
|
This means that you won't get caching benefits by using different queries.
|
|
250
271
|
|
|
251
|
-
In other words: the cache cannot deduplicate queries such as `ASD.fetch('H', (200,1000))` followed by `ASD.fetch('H I', (650,660))
|
|
272
|
+
In other words: the cache cannot deduplicate queries such as `ASD.fetch('H', (200,1000))` followed by `ASD.fetch('H I', (650,660))` (or vice versa).
|
|
252
273
|
|
|
253
274
|
Both these operations will fetch data online and be stored as separate cache entries.
|
|
254
275
|
"""
|
|
255
|
-
|
|
256
|
-
"spectra": species,
|
|
257
|
-
"output_type": 0,
|
|
258
|
-
"low_w": min(wl_range),
|
|
259
|
-
"upp_w": max(wl_range),
|
|
260
|
-
**self.query_params,
|
|
261
|
-
}
|
|
262
|
-
response = self.session.get(self.nist_url, params=query_params)
|
|
263
|
-
|
|
264
|
-
# if response.status_code == 200:
|
|
265
|
-
response.raise_for_status()
|
|
276
|
+
response = self._get_data(species, wl_range, **kwargs)
|
|
266
277
|
return self.create_dataframe(response)
|
|
267
|
-
# else:
|
|
268
|
-
# print(f"Error: Received status code {response.status_code}")
|
|
269
|
-
# print(response.url)
|
|
270
|
-
# return response
|
|
271
278
|
|
|
272
279
|
def create_dataframe(self, response) -> "pd.DataFrame|pl.DataFrame":
|
|
273
280
|
"""Create a dataframe from the (cached) NIST ASD response, using the chosen backend at class instantiation."""
|
|
@@ -276,7 +283,7 @@ class SpectraCache:
|
|
|
276
283
|
return self._from_pandas(response)
|
|
277
284
|
|
|
278
285
|
@classmethod
|
|
279
|
-
def _from_pandas(cls, response:
|
|
286
|
+
def _from_pandas(cls, response: Response) -> "pd.DataFrame":
|
|
280
287
|
r"""Transform a (cached) NIST ASD response into a pandas DataFrame.
|
|
281
288
|
|
|
282
289
|
Calculates the air equivalent wavelength from the vacuum wavelength using the same Sellmeier equation as the NIST ASD.
|
|
@@ -311,9 +318,18 @@ class SpectraCache:
|
|
|
311
318
|
"": str,
|
|
312
319
|
}
|
|
313
320
|
df = pd.read_csv(StringIO(response.text), sep="\t", dtype=schema)
|
|
321
|
+
# Detect if pandas uses new `StringDtype`, or legacy `object` dtype for strings.
|
|
322
|
+
# This affects NaN handling for strings.
|
|
323
|
+
# Pandas 3.0 and up use the StringDtype, while pandas 2 can opt-in to this
|
|
324
|
+
# The 'Type' column should exist, 'element' may not.
|
|
325
|
+
uses_new_string_dtype = pd.api.types.is_string_dtype(df["Type"])
|
|
314
326
|
for col in ["obs_wl_vac(nm)", "ritz_wl_vac(nm)", "intens", "Ei(cm-1)", "Ek(cm-1)"]:
|
|
315
327
|
df[col] = df.loc[:, col].str.extract(SCI_EXPR).astype(float)
|
|
316
|
-
|
|
328
|
+
# Any missing value implies line is an E1 (electric dipole) transition
|
|
329
|
+
if uses_new_string_dtype:
|
|
330
|
+
df["Type"] = df.loc[:, "Type"].fillna("E1")
|
|
331
|
+
else:
|
|
332
|
+
df["Type"] = df.loc[:, "Type"].astype(str).replace("nan", "E1")
|
|
317
333
|
df["tp_ref"] = df.loc[:, "tp_ref"].fillna("")
|
|
318
334
|
df["obs_wl_air(nm)"] = df["obs_wl_vac(nm)"]
|
|
319
335
|
df["obs_wl_air(nm)"] = df[df["wn(cm-1)"].between(5000, 50000)]["obs_wl_air(nm)"] / cls.wn_to_n_refractive(
|
|
@@ -325,17 +341,18 @@ class SpectraCache:
|
|
|
325
341
|
)
|
|
326
342
|
df = df.drop([c for c in df.columns if "Unnamed" in c], axis=1).reset_index(drop=True)
|
|
327
343
|
if "element" not in df.columns:
|
|
344
|
+
# cast roman numerals to int for consistency with queries with multiple ionization states, e.g. Ar I vs Ar I-II
|
|
345
|
+
# As 'element' and 'sp_num' columns are only missing for single-species queries, assign as constants, not vectors.
|
|
328
346
|
element, numeral = re.search(STATE_EXPR, response.url).groups()
|
|
347
|
+
numeric: int = cls.roman_to_int(numeral)
|
|
329
348
|
df["element"] = element
|
|
330
|
-
df["sp_num"] =
|
|
331
|
-
# cast roman numerals to int for consistency with queries with multiple ionization states, e.g. Ar I vs Ar I-II
|
|
332
|
-
df["sp_num"] = df["sp_num"].map(cls.roman_to_int)
|
|
349
|
+
df["sp_num"] = numeric
|
|
333
350
|
df["unc_obs_wl"] = pd.to_numeric(df["unc_obs_wl"]) if "unc_obs_wl" in df.columns else np.nan
|
|
334
351
|
df["unc_ritz_wl"] = pd.to_numeric(df["unc_ritz_wl"]) if "unc_ritz_wl" in df.columns else np.nan
|
|
335
352
|
return df.loc[:, cls.column_order]
|
|
336
353
|
|
|
337
354
|
@classmethod
|
|
338
|
-
def _from_polars(cls, response:
|
|
355
|
+
def _from_polars(cls, response: Response) -> "pl.DataFrame":
|
|
339
356
|
r"""Transform a (cached) NIST ASD response into a polars DataFrame.
|
|
340
357
|
|
|
341
358
|
Calculates the air equivalent wavelength from the vacuum wavelength using the same Sellmeier equation as the NIST ASD.
|
|
@@ -366,28 +383,25 @@ class SpectraCache:
|
|
|
366
383
|
"J_k": pl.String,
|
|
367
384
|
"": pl.String,
|
|
368
385
|
}
|
|
369
|
-
|
|
370
|
-
df = (
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
)
|
|
389
|
-
.drop([""])
|
|
390
|
-
).with_columns(
|
|
386
|
+
|
|
387
|
+
df = pl.read_csv(
|
|
388
|
+
StringIO(response.text),
|
|
389
|
+
separator="\t",
|
|
390
|
+
schema_overrides=schema,
|
|
391
|
+
null_values="",
|
|
392
|
+
)
|
|
393
|
+
sci_cols = ["obs_wl_vac(nm)", "Ei(cm-1)", "Ek(cm-1)", "intens", "ritz_wl_vac(nm)"]
|
|
394
|
+
cast_to_scientific_notation = [
|
|
395
|
+
pl.col(c).str.extract(SCI_EXPR).replace("", None).cast(pl.Float64).alias(c) for c in sci_cols
|
|
396
|
+
]
|
|
397
|
+
df = df.with_columns(
|
|
398
|
+
*cast_to_scientific_notation,
|
|
399
|
+
pl.col("S(a.u.)").cast(pl.Float64),
|
|
400
|
+
pl.col("Type").replace(None, "E1"),
|
|
401
|
+
pl.col("tp_ref").replace(None, ""),
|
|
402
|
+
).drop([""])
|
|
403
|
+
# compute air wavelengths between 5000 cm-1 and 50000 cm-1
|
|
404
|
+
df = df.with_columns(
|
|
391
405
|
pl.when(pl.col("wn(cm-1)").is_between(5000, 50000))
|
|
392
406
|
.then(
|
|
393
407
|
pl.col("obs_wl_vac(nm)").cast(pl.Float64)
|
|
@@ -407,20 +421,11 @@ class SpectraCache:
|
|
|
407
421
|
)
|
|
408
422
|
if "element" not in df.columns:
|
|
409
423
|
element, numeral = re.search(STATE_EXPR, response.url).groups()
|
|
424
|
+
numeric: int = cls.roman_to_int(numeral) if numeral else 1
|
|
410
425
|
# cast roman numerals to int for consistency with queries with multiple ionization states, e.g. Ar I vs Ar I-II
|
|
411
|
-
df = df.with_columns(
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
.cast(pl.String)
|
|
415
|
-
.alias("sp_num")
|
|
416
|
-
.map_elements(cls.roman_to_int, return_dtype=pl.Int64)
|
|
417
|
-
.first(),
|
|
418
|
-
)
|
|
419
|
-
df = df.with_columns(
|
|
420
|
-
unc_obs_wl=pl.col("unc_obs_wl") if "unc_obs_wl" in df.columns else None,
|
|
421
|
-
unc_ritz_wl=pl.col("unc_ritz_wl") if "unc_ritz_wl" in df.columns else None,
|
|
422
|
-
).with_columns(pl.col("unc_obs_wl").cast(pl.Float64), pl.col("unc_ritz_wl").cast(pl.Float64))
|
|
423
|
-
|
|
426
|
+
df = df.with_columns(pl.lit(element).alias("element"), pl.lit(numeric, dtype=pl.Int64).alias("sp_num"))
|
|
427
|
+
exprs = [pl.col(c).cast(pl.Float64) for c in ["unc_obs_wl", "unc_ritz_wl"] if c in df.columns]
|
|
428
|
+
df = df.with_columns(exprs)
|
|
424
429
|
return df.select(*cls.column_order)
|
|
425
430
|
|
|
426
431
|
@staticmethod
|
|
@@ -508,7 +513,7 @@ class BibCache:
|
|
|
508
513
|
"""
|
|
509
514
|
return self.session.settings.expire_after
|
|
510
515
|
|
|
511
|
-
def set_cache_expiry(self, new: timedelta = None, **kwargs):
|
|
516
|
+
def set_cache_expiry(self, new: Optional[timedelta] = None, **kwargs):
|
|
512
517
|
"""Set the cache expiry to a different interval (default: 1 week).
|
|
513
518
|
|
|
514
519
|
Can be done by either passing in a `timedelta` object, or valid keyword arguments for `timedelta` itself.
|
|
@@ -518,7 +523,7 @@ class BibCache:
|
|
|
518
523
|
self.session.settings.expire_after = new
|
|
519
524
|
|
|
520
525
|
@staticmethod
|
|
521
|
-
def _check_response_success(response:
|
|
526
|
+
def _check_response_success(response: Response) -> bool:
|
|
522
527
|
"""Validate that data has been fetched succesfully.
|
|
523
528
|
|
|
524
529
|
If this check fails, the cache should not update with this response, even when marked as stale.
|
|
@@ -533,12 +538,12 @@ class BibCache:
|
|
|
533
538
|
r"""Parse a reference code from the NIST ASD into the constituent parts that can be used to look up references.
|
|
534
539
|
|
|
535
540
|
Args:
|
|
536
|
-
|
|
541
|
+
reference_code (str): A NIST ASD bibliographic reference string, such as `L13456n3`, or `T6936n`.
|
|
537
542
|
|
|
538
543
|
Returns:
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
544
|
+
db (str): A label for which bibliographic database to target
|
|
545
|
+
ref (str|None): The database ID for the reference to look up
|
|
546
|
+
comment (str): An additional comment included in the reference, can be fetched separately.
|
|
542
547
|
"""
|
|
543
548
|
if reference_code.startswith("n"):
|
|
544
549
|
db, ref, comment = "T", None, "n"
|
|
@@ -553,12 +558,12 @@ class BibCache:
|
|
|
553
558
|
"""Look up a reference code for a given element state.
|
|
554
559
|
|
|
555
560
|
Args:
|
|
556
|
-
element (str)
|
|
557
|
-
sp_num (int)
|
|
558
|
-
reference_code (str)
|
|
561
|
+
element (str): The element name, e.g. `H`
|
|
562
|
+
sp_num (int): The ionization state of the element, with 1 corresponding to the atom
|
|
563
|
+
reference_code (str): The bibliographic reference code from the ASD columns `tp_ref` or `line_ref`.
|
|
559
564
|
|
|
560
565
|
Returns:
|
|
561
|
-
bib_data (dict)
|
|
566
|
+
bib_data (dict[str,Any]): A dictionary containing bibliographic metadata for the reference, if available/applicable. Contains a url to look it up.
|
|
562
567
|
"""
|
|
563
568
|
db, ref, comment = self.parse_reference_code(reference_code)
|
|
564
569
|
params = {
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
r"""`ASDcache` is a package to fetch data from the NIST Atomic Spectra Database (ASD), utlizing caching for fast responses.
|
|
2
|
+
|
|
3
|
+
To make the most use out of the cache, `ASDcache` is opinionated in the information it retrieves from the ASD; it always requests the same schema of information and locally computes additional fields.
|
|
4
|
+
|
|
5
|
+
Data is initially fetched from the online published NIST page, using the tab-separated ASCII output format.
|
|
6
|
+
|
|
7
|
+
The benefit of this format is that it is more 'machine readable' than the formatted ASCII of HTML options.
|
|
8
|
+
|
|
9
|
+
This means it requires far less bespoke parsing to get rid of 'human readable' features such as repeated page column headers, or empty lines.
|
|
10
|
+
|
|
11
|
+
## Air wavelength
|
|
12
|
+
To ensure a consistent schema of the retrieved data, lines are always retrieved as a function of wavelength, using `vacuum wavelength`, even between 200 to 2000 nm.
|
|
13
|
+
|
|
14
|
+
Wavenumbers and Ritz wavelength will be included in the response.
|
|
15
|
+
|
|
16
|
+
In the range $5000\ \mathrm{cm}^{-1}<\nu<50000\ \mathrm{cm}^{-1}$ the air equivalent observed and Ritz wavelengths are calculated using the same Sellmeier equation as the NIST ASD (see [here][.SpectraCache.wn_to_n_refractive]).
|
|
17
|
+
This is consistent with the approach of the ASD.
|
|
18
|
+
|
|
19
|
+
## Making use of the cache
|
|
20
|
+
|
|
21
|
+
Each response from the NIST page is cached (1 week by default) on the local system.
|
|
22
|
+
|
|
23
|
+
This makes it much faster to load the same data, even across different script runs and/or user programs/sessions.
|
|
24
|
+
|
|
25
|
+
As an example: retrieving and parsing the data for all spectra between 200 and 1000 nm can take over 2 minutes without using the cache, but can be as fast as 0.2 seconds using the `polars` backend.
|
|
26
|
+
|
|
27
|
+
In addition, it means that an internet connection is not required after initial data fetching.
|
|
28
|
+
|
|
29
|
+
The cached response is only updated upon succesfull retrieval of a new response of the NIST page.
|
|
30
|
+
|
|
31
|
+
If unable to succesfully fetch new data, we fall back to a 'stale' cached response.
|
|
32
|
+
|
|
33
|
+
The cache can be shared to another system, to give offline/airgapped systems access to the same data.
|
|
34
|
+
|
|
35
|
+
To that end, the file `NIST_ASD_cache.sqlite` in the user's cache directory has to be copied over.
|
|
36
|
+
|
|
37
|
+
### Default cache locations
|
|
38
|
+
|
|
39
|
+
The standard cache directories are as follows:
|
|
40
|
+
|
|
41
|
+
=== "Windows"
|
|
42
|
+
`%USERPROFILE%/AppData/Local`
|
|
43
|
+
=== "Linux"
|
|
44
|
+
`~/.cache/http_cache/`
|
|
45
|
+
=== "MacOS"
|
|
46
|
+
`/Users/user/Library/Caches/http_cache/`
|
|
47
|
+
|
|
48
|
+
### Cache keys and uniqueness
|
|
49
|
+
|
|
50
|
+
Queries to the NIST ASD are hashed by the keys (or parameters) of the requests.
|
|
51
|
+
|
|
52
|
+
This means that any change to either one of these parameters, will result in a new cache entry, even if the returned data is equivalent.
|
|
53
|
+
|
|
54
|
+
In other words: the cache cannot deduplicate queries such as `SpectraCache().fetch('H', (200,1000))` followed by `SpectraCache().fetch('H I', (650,660))` (or vice versa).
|
|
55
|
+
|
|
56
|
+
It is often better (and faster) to fetch a range of data beyond what you need, and then filter down the dataframe you retrieve according to your needs.
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
from .ASDCache import SpectraCache, BibCache
|
|
60
|
+
|
|
61
|
+
__all__ = ["SpectraCache", "BibCache"]
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# file generated by vcs-versioning
|
|
2
|
+
# don't change, don't track in version control
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
__all__ = [
|
|
6
|
+
"__version__",
|
|
7
|
+
"__version_tuple__",
|
|
8
|
+
"version",
|
|
9
|
+
"version_tuple",
|
|
10
|
+
"__commit_id__",
|
|
11
|
+
"commit_id",
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
version: str
|
|
15
|
+
__version__: str
|
|
16
|
+
__version_tuple__: tuple[int | str, ...]
|
|
17
|
+
version_tuple: tuple[int | str, ...]
|
|
18
|
+
commit_id: str | None
|
|
19
|
+
__commit_id__: str | None
|
|
20
|
+
|
|
21
|
+
__version__ = version = '0.2.3'
|
|
22
|
+
__version_tuple__ = version_tuple = (0, 2, 3)
|
|
23
|
+
|
|
24
|
+
__commit_id__ = commit_id = None
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ASDCache
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Summary: A Python module to retrieve data from the NIST Atomic Spectra Database (ASD), using caching for fast, efficient data handling
|
|
5
5
|
Project-URL: Documentation, https://antoinetue.github.io/asdcache
|
|
6
6
|
Project-URL: Source, https://github.com/AntoineTUE/asdcache
|
|
@@ -12,33 +12,33 @@ Classifier: Development Status :: 4 - Beta
|
|
|
12
12
|
Classifier: Intended Audience :: Science/Research
|
|
13
13
|
Classifier: License :: OSI Approved :: MIT License
|
|
14
14
|
Classifier: Operating System :: OS Independent
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
16
15
|
Classifier: Programming Language :: Python :: 3.10
|
|
17
16
|
Classifier: Programming Language :: Python :: 3.11
|
|
18
17
|
Classifier: Programming Language :: Python :: 3.12
|
|
19
18
|
Classifier: Programming Language :: Python :: 3.13
|
|
20
19
|
Classifier: Topic :: Scientific/Engineering
|
|
21
20
|
Requires-Python: >=3.9
|
|
22
|
-
Requires-Dist:
|
|
23
|
-
Requires-Dist: numpy
|
|
24
|
-
Requires-Dist: pandas
|
|
25
|
-
Requires-Dist: requests
|
|
26
|
-
Requires-Dist: requests-cache
|
|
21
|
+
Requires-Dist: beautifulsoup4>=4.12
|
|
22
|
+
Requires-Dist: numpy>=2.0
|
|
23
|
+
Requires-Dist: pandas>=2.0
|
|
24
|
+
Requires-Dist: requests-cache>=1.2.0
|
|
27
25
|
Provides-Extra: docs
|
|
28
|
-
Requires-Dist:
|
|
29
|
-
Requires-Dist: mkdocs; extra == 'docs'
|
|
26
|
+
Requires-Dist: mkdocs-api-autonav; extra == 'docs'
|
|
30
27
|
Requires-Dist: mkdocs-autorefs; extra == 'docs'
|
|
31
|
-
Requires-Dist: mkdocs-gen-files; extra == 'docs'
|
|
32
28
|
Requires-Dist: mkdocs-git-revision-date-localized-plugin; extra == 'docs'
|
|
33
29
|
Requires-Dist: mkdocs-include-markdown-plugin; extra == 'docs'
|
|
34
|
-
Requires-Dist: mkdocs-jupyter; extra == 'docs'
|
|
35
|
-
Requires-Dist: mkdocs-
|
|
36
|
-
Requires-Dist: mkdocs-material; extra == 'docs'
|
|
30
|
+
Requires-Dist: mkdocs-jupyter>=0.26.3; extra == 'docs'
|
|
31
|
+
Requires-Dist: mkdocs-material==9.7.6; extra == 'docs'
|
|
37
32
|
Requires-Dist: mkdocs-section-index; extra == 'docs'
|
|
38
33
|
Requires-Dist: mkdocstrings; extra == 'docs'
|
|
39
|
-
Requires-Dist: mkdocstrings-python; extra == 'docs'
|
|
34
|
+
Requires-Dist: mkdocstrings-python-xref>=2.1.1; extra == 'docs'
|
|
35
|
+
Requires-Dist: properdocs>=1.6.7; extra == 'docs'
|
|
36
|
+
Requires-Dist: pygments>=2.20.0; extra == 'docs'
|
|
37
|
+
Requires-Dist: ruff>=0.15.13; extra == 'docs'
|
|
40
38
|
Provides-Extra: polars
|
|
41
|
-
Requires-Dist: polars; extra == 'polars'
|
|
39
|
+
Requires-Dist: polars[pandas]; extra == 'polars'
|
|
40
|
+
Provides-Extra: polars-lts
|
|
41
|
+
Requires-Dist: polars[pandas,rtcompat]; extra == 'polars-lts'
|
|
42
42
|
Description-Content-Type: text/markdown
|
|
43
43
|
|
|
44
44
|
# ASDCache
|
|
@@ -51,7 +51,7 @@ Description-Content-Type: text/markdown
|
|
|
51
51
|
[](https://antoinetue.github.io/ASDCache)
|
|
52
52
|
[](https://pypi.python.org/pypi/ASDCache)
|
|
53
53
|
[](https://pypi.python.org/pypi/ASDCache)
|
|
54
|
-
[](https://pypistats.org/packages/asdcache)
|
|
55
55
|
[](https://github.com/astral-sh/ruff)
|
|
56
56
|
[](https://github.com/pypa/hatch)
|
|
57
57
|
|
|
@@ -19,39 +19,38 @@ classifiers = [
|
|
|
19
19
|
"Topic :: Scientific/Engineering",
|
|
20
20
|
"Intended Audience :: Science/Research",
|
|
21
21
|
"Operating System :: OS Independent",
|
|
22
|
-
"Programming Language :: Python :: 3.9",
|
|
23
22
|
"Programming Language :: Python :: 3.10",
|
|
24
23
|
"Programming Language :: Python :: 3.11",
|
|
25
24
|
"Programming Language :: Python :: 3.12",
|
|
26
25
|
"Programming Language :: Python :: 3.13",
|
|
27
26
|
]
|
|
28
|
-
dependencies = ["
|
|
27
|
+
dependencies = ["requests_cache>=1.2.0", "pandas>=2.0","numpy>=2.0", "beautifulsoup4>=4.12"]
|
|
29
28
|
dynamic = ["version"]
|
|
30
29
|
|
|
31
30
|
[project.optional-dependencies]
|
|
32
|
-
polars = ["polars"]
|
|
31
|
+
polars = ["polars[pandas]"]
|
|
32
|
+
polars-lts = ["polars[rtcompat,pandas]"]
|
|
33
33
|
docs = [
|
|
34
|
-
"
|
|
34
|
+
"properdocs>=1.6.7",
|
|
35
|
+
"mkdocs-material==9.7.6",
|
|
35
36
|
"mkdocs-autorefs",
|
|
36
|
-
"mkdocs-gen-files",
|
|
37
|
+
# "mkdocs-gen-files",
|
|
37
38
|
"mkdocs-git-revision-date-localized-plugin",
|
|
38
39
|
"mkdocs-include-markdown-plugin",
|
|
39
|
-
"mkdocs-jupyter",
|
|
40
|
-
"mkdocs-literate-nav",
|
|
41
|
-
"mkdocs-material",
|
|
40
|
+
"mkdocs-jupyter>=0.26.3",
|
|
41
|
+
# "mkdocs-literate-nav",
|
|
42
42
|
"mkdocs-section-index",
|
|
43
43
|
"mkdocstrings",
|
|
44
|
-
"mkdocstrings-python",
|
|
45
|
-
"
|
|
44
|
+
"mkdocstrings-python-xref>=2.1.1",
|
|
45
|
+
"mkdocs-api-autonav",
|
|
46
|
+
"ruff>=0.15.13",
|
|
47
|
+
"pygments>=2.20.0"
|
|
46
48
|
]
|
|
47
49
|
|
|
48
50
|
[project.urls]
|
|
49
51
|
Documentation = "https://antoinetue.github.io/asdcache"
|
|
50
52
|
Source = "https://github.com/AntoineTUE/asdcache"
|
|
51
53
|
|
|
52
|
-
[tool.hatch.metadata]
|
|
53
|
-
# direct dependency references, e.g `pip @ git+https://github.com/pypa/pip.git@master`
|
|
54
|
-
allow-direct-references = true
|
|
55
54
|
|
|
56
55
|
[tool.hatch.version]
|
|
57
56
|
source = "vcs"
|
|
@@ -110,7 +109,7 @@ extend-exclude = ["docs/assets/scripts/gen_ref_pages.py"]
|
|
|
110
109
|
|
|
111
110
|
[tool.ruff.lint]
|
|
112
111
|
select = ["E4", "E7", "E9", "F","C4", "SIM", "NPY", "PD","B","UP","D"]
|
|
113
|
-
ignore = ["
|
|
112
|
+
ignore = ["F401"]
|
|
114
113
|
|
|
115
114
|
[tool.ruff.lint.pydocstyle]
|
|
116
115
|
convention = "pep257"
|
|
@@ -138,16 +137,6 @@ dependencies = ["matplotlib", "ipython","ipykernel","pre-commit"]
|
|
|
138
137
|
installer = "uv"
|
|
139
138
|
features = ["polars"]
|
|
140
139
|
|
|
141
|
-
[tool.hatch.envs.test]
|
|
142
|
-
dependencies = [
|
|
143
|
-
"coverage[toml]>=6.2",
|
|
144
|
-
"pytest",
|
|
145
|
-
"pytest-cov",
|
|
146
|
-
"pytest-mock",
|
|
147
|
-
"pytest-recording",
|
|
148
|
-
"pytest-sugar",
|
|
149
|
-
"hypothesis",
|
|
150
|
-
]
|
|
151
140
|
|
|
152
141
|
[tool.hatch.envs.hatch-test]
|
|
153
142
|
randomize = false
|
|
@@ -155,6 +144,23 @@ parallel = false # avoid cache access conflicts
|
|
|
155
144
|
retries = 2
|
|
156
145
|
retry-delay = 1
|
|
157
146
|
features = ["polars"]
|
|
147
|
+
dependencies = [
|
|
148
|
+
"coverage-enable-subprocess==1.0",
|
|
149
|
+
'coverage[toml]>=6.2,<7.11; python_version<"3.10"',
|
|
150
|
+
'coverage[toml]~=7.11; python_version>="3.10"',
|
|
151
|
+
'pytest~=8.4; python_version<"3.10"',
|
|
152
|
+
'pytest~=9.0; python_version>="3.10"',
|
|
153
|
+
"pytest-mock~=3.12",
|
|
154
|
+
"pytest-randomly~=3.15",
|
|
155
|
+
"pytest-rerunfailures~=14.0",
|
|
156
|
+
"pytest-xdist[psutil]~=3.5",
|
|
157
|
+
'pytest-cov~=7.1.0; python_version>="3.10"',
|
|
158
|
+
"pytest-recording",
|
|
159
|
+
"pytest-sugar~=1.1.1",
|
|
160
|
+
"hypothesis",
|
|
161
|
+
'virtualenv<21; python_version<"3.10"',
|
|
162
|
+
]
|
|
163
|
+
|
|
158
164
|
|
|
159
165
|
[tool.hatch.envs.docs]
|
|
160
166
|
skip-install = true
|
|
@@ -162,13 +168,13 @@ features = ["docs"]
|
|
|
162
168
|
dependencies = ["mike"]
|
|
163
169
|
|
|
164
170
|
[tool.hatch.envs.docs.scripts]
|
|
165
|
-
serve = "
|
|
166
|
-
build = "
|
|
167
|
-
ci-build = "mike deploy --config-file mkdocs.yml
|
|
171
|
+
serve = "properdocs serve -f mkdocs.yml {args}"
|
|
172
|
+
build = "properdocs build --clean -f mkdocs.yml {args}"
|
|
173
|
+
ci-build = "mike deploy --config-file mkdocs.yml {args}"
|
|
168
174
|
|
|
169
175
|
[tool.hatch.envs.lint]
|
|
170
176
|
template = "lint"
|
|
171
|
-
dependencies = ["ruff>=0.
|
|
177
|
+
dependencies = ["ruff>=0.15.13"]
|
|
172
178
|
|
|
173
179
|
[tool.hatch.envs.lint.scripts]
|
|
174
180
|
style = [
|
|
@@ -182,5 +188,25 @@ fix = [
|
|
|
182
188
|
"style", # feedback on what is not fixable
|
|
183
189
|
]
|
|
184
190
|
|
|
191
|
+
|
|
192
|
+
[tool.hatch.envs.hatch-test.overrides]
|
|
193
|
+
matrix.pandas.dependencies = [
|
|
194
|
+
{ value = "pandas>=2.0.0", if = ["pandas-2.0"] },
|
|
195
|
+
{ value = "numpy>=2.0", if = ["pandas-2.0"] },
|
|
196
|
+
{ value = "pandas>=3.0.0", if = ["pandas-3.0"] },
|
|
197
|
+
{ value = "numpy>=2.0", if = ["pandas-3.0"] },
|
|
198
|
+
]
|
|
199
|
+
matrix.polars.features = [
|
|
200
|
+
{ value = "polars", if = ["polars"]},
|
|
201
|
+
{ value = "polars-lts", if = ["polars-lts"]},
|
|
202
|
+
]
|
|
203
|
+
|
|
204
|
+
[[tool.hatch.envs.hatch-test.matrix]]
|
|
205
|
+
python = ["3.9","3.10"]
|
|
206
|
+
pandas = ["pandas-2.0"]
|
|
207
|
+
polars = ["polars","polars-lts"]
|
|
208
|
+
|
|
185
209
|
[[tool.hatch.envs.hatch-test.matrix]]
|
|
186
|
-
python = ["3.
|
|
210
|
+
python = ["3.11","3.12","3.13"]
|
|
211
|
+
pandas = ["pandas-2.0","pandas-3.0"]
|
|
212
|
+
polars = ["polars","polars-lts"]
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
"""ASDCache is a module to retrieve data from the NIST Atomic Spectra Database that uses caching for fast local access.
|
|
2
|
-
|
|
3
|
-
To make the most use out of the cache, `ASDCache` is opinionated in the information it retrieves from the ASD; it always requests the same schema of information and locally computes additional fields.
|
|
4
|
-
|
|
5
|
-
The `SpectraCache` class acts as the entrypoint to retrieve this data.
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
from .ASDCache import SpectraCache, BibCache
|
|
9
|
-
|
|
10
|
-
__all__ = ["SpectraCache", "BibCache"]
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
# file generated by setuptools-scm
|
|
2
|
-
# don't change, don't track in version control
|
|
3
|
-
|
|
4
|
-
__all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
|
|
5
|
-
|
|
6
|
-
TYPE_CHECKING = False
|
|
7
|
-
if TYPE_CHECKING:
|
|
8
|
-
from typing import Tuple
|
|
9
|
-
from typing import Union
|
|
10
|
-
|
|
11
|
-
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
12
|
-
else:
|
|
13
|
-
VERSION_TUPLE = object
|
|
14
|
-
|
|
15
|
-
version: str
|
|
16
|
-
__version__: str
|
|
17
|
-
__version_tuple__: VERSION_TUPLE
|
|
18
|
-
version_tuple: VERSION_TUPLE
|
|
19
|
-
|
|
20
|
-
__version__ = version = '0.2.2'
|
|
21
|
-
__version_tuple__ = version_tuple = (0, 2, 2)
|
|
File without changes
|
|
File without changes
|