ASDCache 0.2.2__tar.gz → 0.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {asdcache-0.2.2 → asdcache-0.2.4}/ASDCache/ASDCache.py +174 -226
- asdcache-0.2.4/ASDCache/__init__.py +61 -0
- asdcache-0.2.4/ASDCache/_version.py +24 -0
- asdcache-0.2.4/ASDCache/utils.py +66 -0
- {asdcache-0.2.2 → asdcache-0.2.4}/PKG-INFO +21 -16
- {asdcache-0.2.2 → asdcache-0.2.4}/pyproject.toml +66 -29
- asdcache-0.2.2/ASDCache/__init__.py +0 -10
- asdcache-0.2.2/ASDCache/_version.py +0 -21
- {asdcache-0.2.2 → asdcache-0.2.4}/.gitignore +0 -0
- {asdcache-0.2.2 → asdcache-0.2.4}/LICENSE +0 -0
|
@@ -1,51 +1,22 @@
|
|
|
1
|
-
|
|
1
|
+
"""The ASDCache module.
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
Data is initially fetched from the online published NIST page, using the tab-separated ASCII output format.
|
|
6
|
-
The benefit of this format is that it is more 'machine readable' than the formatted ASCII of HTML options.
|
|
7
|
-
This means it requires far less bespoke parsing to get rid of 'human readable' features such as repeated page column headers, or empty lines.
|
|
8
|
-
To ensure a consistent schema of the retrieved data, lines are always retrieved as a function of wavelength, using `vacuum wavelength`, even between 200 to 2000 nm.
|
|
9
|
-
Wavenumbers and Ritz wavelength will be included in the response.
|
|
10
|
-
|
|
11
|
-
In the range $5000 \mathrm{cm}^{-1}<\nu<50000 \mathrm{cm}^{-1}$ the air equivalent observed and Ritz wavelengths are calculated using the same Sellmeier equation as the NIST ASD (see [here][ASDcache.readASD.ASDCache.wn_to_n_refractive]).
|
|
12
|
-
This is consistent with the approach of the ASD.
|
|
13
|
-
|
|
14
|
-
Each response from the NIST page is cached (1 week by default) on the local system.
|
|
15
|
-
This makes it much faster to load the same data, even across different script runs and/or user programs/sessions.
|
|
16
|
-
As an example: reading all spectra between 200 and 1000 nm can take over 2 minutes without using the cache, but can be as fast as 0.2 seconds using the `polars` backend.
|
|
17
|
-
In addition, it means that an internet connection is not required after initial data fetching.
|
|
18
|
-
The cached response is only updated upon succesfull retrieval of a new response of the NIST page.
|
|
19
|
-
If unable to succesfully fetch new data, we fall back to a 'stale' cached response.
|
|
20
|
-
|
|
21
|
-
The cache can be shared to another system, to give offline/airgapped systems access to the same data.
|
|
22
|
-
To that end, the file `NIST_ASD_cache.sqlite` in the user's cache directory has to be copied over.
|
|
23
|
-
|
|
24
|
-
The standard cache directories are as follows:
|
|
25
|
-
|
|
26
|
-
=== "Windows"
|
|
27
|
-
`%USERPROFILE%/AppData/Local`
|
|
28
|
-
=== "Linux"
|
|
29
|
-
`~/.cache/http_cache/`
|
|
30
|
-
=== "MacOS"
|
|
31
|
-
`/Users/user/Library/Caches/http_cache/`
|
|
32
|
-
|
|
33
|
-
Queries to the NIST ASD are hashed by the keys (or parameters) of the requests.
|
|
34
|
-
This means that any change to either one of these parameters, will result in a new cache entry, even if the returned data is equivalent.
|
|
3
|
+
It contains both the [SpectraCache][(m).] and [BibCache][(m).] classes which allow you to interact with the ASD and the relevant bibliographic databases.
|
|
35
4
|
"""
|
|
36
5
|
|
|
37
|
-
import
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
import importlib.util
|
|
38
8
|
import warnings
|
|
9
|
+
import numpy as np
|
|
39
10
|
import pandas as pd
|
|
40
|
-
from requests_cache import CachedSession, CachedResponse
|
|
11
|
+
from requests_cache import CachedSession, CachedResponse, OriginalResponse
|
|
12
|
+
from requests import Response
|
|
41
13
|
from io import StringIO
|
|
42
14
|
from datetime import timedelta
|
|
43
15
|
import re
|
|
44
|
-
import numpy as np
|
|
45
16
|
from bs4 import BeautifulSoup
|
|
46
17
|
import sys
|
|
47
18
|
import logging
|
|
48
|
-
from typing import Any, Optional
|
|
19
|
+
from typing import Any, Optional, Union
|
|
49
20
|
|
|
50
21
|
if importlib.util.find_spec("polars"):
|
|
51
22
|
POLARS_AVAILABLE = True
|
|
@@ -54,12 +25,10 @@ if importlib.util.find_spec("polars"):
|
|
|
54
25
|
else:
|
|
55
26
|
POLARS_AVAILABLE = False
|
|
56
27
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
stream=sys.stdout,
|
|
62
|
-
)
|
|
28
|
+
from .utils import wavenumber_to_refractive_index, extract_state_from_response
|
|
29
|
+
from ._version import version
|
|
30
|
+
|
|
31
|
+
logger = logging.getLogger("ASDCache")
|
|
63
32
|
|
|
64
33
|
ASDSchema = {
|
|
65
34
|
"element": str,
|
|
@@ -92,20 +61,22 @@ ASDSchema = {
|
|
|
92
61
|
"line_ref": str,
|
|
93
62
|
}
|
|
94
63
|
|
|
95
|
-
STATE_EXPR = r"spectra=([\w]+)\+?([IVX]+)?"
|
|
96
|
-
"""Regex pattern for extracting (element,charge) tuple for a single-state query, which uses roman numerals."""
|
|
97
64
|
SCI_EXPR = r"([+-]?\d*\.?\d+(?:[eE][+-]?\d+)?)"
|
|
98
65
|
"""Regex pattern for processing scientific notation"""
|
|
99
66
|
|
|
100
67
|
|
|
68
|
+
class ASDQueryError(Exception):
|
|
69
|
+
"""Exception raised when the NIST ASD has indicated an error with a query."""
|
|
70
|
+
|
|
71
|
+
|
|
101
72
|
class SpectraCache:
|
|
102
73
|
"""A class acting as the entrypoint to retrieve data from the NIST Atomic Spectra Database that uses caching.
|
|
103
74
|
|
|
104
75
|
The `ASDCache` instance acts as an access point to the cache, which stores responses on the local system in a SQLite database.
|
|
105
76
|
|
|
106
|
-
Data retrieval from cache is much faster (order milliseconds) than fetching from the internet (order seconds), and avoids wastefull requests to the server.
|
|
77
|
+
Data retrieval from cache is much faster (order milliseconds) than fetching from the internet (order of seconds to minutes), and avoids wastefull requests to the server.
|
|
107
78
|
|
|
108
|
-
Cache time-to-live is
|
|
79
|
+
Cache time-to-live is two weeks by default.
|
|
109
80
|
|
|
110
81
|
Since the NIST ASD is usually updated less frequently than that, this is a compromise between having the latest data, and overall fast performance.
|
|
111
82
|
|
|
@@ -115,14 +86,15 @@ class SpectraCache:
|
|
|
115
86
|
nist_url = "https://physics.nist.gov/cgi-bin/ASD/lines1.pl"
|
|
116
87
|
species_expr = re.compile(r"spectra=([\w\+\-\%3]+)&")
|
|
117
88
|
query_params = {
|
|
89
|
+
"submit": "Retrieve Data",
|
|
118
90
|
"unit": 1,
|
|
119
91
|
"de": 0,
|
|
120
|
-
"plot_out": 0,
|
|
92
|
+
# "plot_out": 0,
|
|
121
93
|
"I_scale_type": 1,
|
|
122
94
|
"format": 3,
|
|
123
95
|
"line_out": 0,
|
|
124
|
-
"remove_js": "on",
|
|
125
|
-
"no_spaces": "on",
|
|
96
|
+
# "remove_js": "on",
|
|
97
|
+
# "no_spaces": "on",
|
|
126
98
|
"en_unit": 0,
|
|
127
99
|
"output": 0,
|
|
128
100
|
"bibrefs": 1,
|
|
@@ -143,54 +115,37 @@ class SpectraCache:
|
|
|
143
115
|
"enrg_out": "on",
|
|
144
116
|
"J_out": "on",
|
|
145
117
|
"g_out": "on",
|
|
146
|
-
"diag_out": "on",
|
|
118
|
+
# "diag_out": "on", # avoid diagnostic data, it leads to multi-species queries failing; which can appear as if keys below are needed. See issue #1
|
|
147
119
|
"allowed_out": 1,
|
|
148
120
|
"forbid_out": 1,
|
|
149
|
-
"
|
|
121
|
+
# "show_diff_obs_calc": 1, # Does not appear mandatory in retrospect, see issue #1
|
|
122
|
+
# "include_Ritz_E1": 1, # Does not appear mandatory in retrospect, see issue #1
|
|
150
123
|
}
|
|
151
124
|
"""Request parameters used by the NIST ASD form."""
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
"
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
"
|
|
168
|
-
"Ei(cm-1)",
|
|
169
|
-
"Ek(cm-1)",
|
|
170
|
-
"conf_i",
|
|
171
|
-
"term_i",
|
|
172
|
-
"J_i",
|
|
173
|
-
"conf_k",
|
|
174
|
-
"term_k",
|
|
175
|
-
"J_k",
|
|
176
|
-
"g_i",
|
|
177
|
-
"g_k",
|
|
178
|
-
"Type",
|
|
179
|
-
"tp_ref",
|
|
180
|
-
"line_ref",
|
|
181
|
-
]
|
|
182
|
-
"""Fixed order of columns for consistent schema of data."""
|
|
183
|
-
|
|
184
|
-
def __init__(self, use_polars_backend=False, cache_expiry=timedelta(weeks=1), strict_matching=True):
|
|
185
|
-
"""Initialize an instance that handles cached data lookup of the NIST ASD."""
|
|
125
|
+
|
|
126
|
+
def __init__(
|
|
127
|
+
self,
|
|
128
|
+
use_polars_backend=False,
|
|
129
|
+
cache_expiry=timedelta(weeks=2),
|
|
130
|
+
strict_matching=True,
|
|
131
|
+
cache_path: Optional[Path] = None,
|
|
132
|
+
):
|
|
133
|
+
"""Initialize an instance that handles cached data lookup of the NIST ASD.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
use_polars_backend (bool): Flag to use polars as DataFrame backend, if available
|
|
137
|
+
cache_expiry (timedelta): Span of time beyond which an entry will be considered expired, and a refresh attempted
|
|
138
|
+
strict_matching (bool): If true, use all request parameters to hash urls for cache matching (recommended).
|
|
139
|
+
cache_path (Path, Optional): Path to a location to store the cache in
|
|
140
|
+
"""
|
|
186
141
|
self.strict_matching = strict_matching
|
|
187
142
|
self.session = CachedSession(
|
|
188
|
-
"NIST_ASD_cache",
|
|
143
|
+
"NIST_ASD_cache" if cache_path is None else cache_path,
|
|
189
144
|
use_cache_dir=True,
|
|
190
145
|
expire_after=cache_expiry,
|
|
191
146
|
stale_if_error=True,
|
|
192
147
|
filter_fn=self._check_response_success,
|
|
193
|
-
ignored_parameters=list(self.query_params.keys()) if self.strict_matching is False else
|
|
148
|
+
ignored_parameters=list(self.query_params.keys()) if self.strict_matching is False else [],
|
|
194
149
|
)
|
|
195
150
|
if (use_polars_backend) & (not POLARS_AVAILABLE):
|
|
196
151
|
warnings.warn("Cannot find `polars` as a backend, falling back to `pandas`", stacklevel=2)
|
|
@@ -209,7 +164,7 @@ class SpectraCache:
|
|
|
209
164
|
"""
|
|
210
165
|
return self.session.settings.expire_after
|
|
211
166
|
|
|
212
|
-
def set_cache_expiry(self, new: timedelta = None, **kwargs):
|
|
167
|
+
def set_cache_expiry(self, new: Optional[timedelta] = None, **kwargs):
|
|
213
168
|
"""Set the cache expiry to a different interval (default: 1 week).
|
|
214
169
|
|
|
215
170
|
Can be done by either passing in a `timedelta` object, or valid keyword arguments for `timedelta` itself.
|
|
@@ -219,12 +174,59 @@ class SpectraCache:
|
|
|
219
174
|
self.session.settings.expire_after = new
|
|
220
175
|
|
|
221
176
|
@staticmethod
|
|
222
|
-
def _check_response_success(response:
|
|
177
|
+
def _check_response_success(response: Response) -> bool:
|
|
223
178
|
"""Validate that data has been fetched succesfully.
|
|
224
179
|
|
|
225
180
|
If this check fails, the cache should not update with this response, even when marked as stale.
|
|
181
|
+
|
|
182
|
+
The first obvious way to check success is if an error is indicated by the HTTP status code.
|
|
183
|
+
|
|
184
|
+
However, when a query for data is incorrect, the NIST ASD returns a HTML page indicating `<title>NIST ASD : Input Error</title>` in the `<head>` tag, or "Error Message".
|
|
185
|
+
|
|
186
|
+
A successfull query would not receive HTML as a response, but raw ASCII values instead.
|
|
187
|
+
|
|
188
|
+
We can thus check for the start of a HTML document.
|
|
189
|
+
|
|
190
|
+
Note that this only works for data queries, not for bibliographic metadata by `BibCache`.
|
|
191
|
+
"""
|
|
192
|
+
return not (not response.ok or response.content.startswith(b"<!DOCTYPE"))
|
|
193
|
+
|
|
194
|
+
def _get_data(self, species: str, wl_range: tuple[float, float] = (170, 1000), **kwargs) -> Response:
|
|
195
|
+
"""Retrieve raw, ASCII-formatted data from the NIST ASD with a GET request.
|
|
196
|
+
|
|
197
|
+
To retrieve data and parse it into a DataFrame, use [fetch][..] instead.
|
|
198
|
+
|
|
199
|
+
Returns the raw response, which will be cached if it contains valid data (see [_check_response_success][..]).
|
|
200
|
+
|
|
201
|
+
If the response does not contain ASCII data, but HTML instead, an [ASDQueryError][(m).] will be raised.
|
|
202
|
+
|
|
203
|
+
It is possible to override any standard query parameter (see [query_params][..]]) by passing them as kwargs.
|
|
226
204
|
"""
|
|
227
|
-
|
|
205
|
+
query_params = {
|
|
206
|
+
"spectra": species,
|
|
207
|
+
"output_type": 0,
|
|
208
|
+
"low_w": min(wl_range),
|
|
209
|
+
"upp_w": max(wl_range),
|
|
210
|
+
**{k: v for k, v in self.query_params.items() if k not in kwargs},
|
|
211
|
+
**{k: v for k, v in kwargs.items() if k in self.query_params},
|
|
212
|
+
}
|
|
213
|
+
response: Response = self.session.get(self.nist_url, params=query_params)
|
|
214
|
+
response.raise_for_status()
|
|
215
|
+
# Check if response is not a HTML document instead of ASCII formatted data, indicating query error.
|
|
216
|
+
if response.content.startswith(b"<!DOCTYPE"):
|
|
217
|
+
body = BeautifulSoup(response.text, features="html.parser").body
|
|
218
|
+
reason = body.text.strip().replace("\n", " ") if body else ""
|
|
219
|
+
logger.error(
|
|
220
|
+
"NIST ASD responded with %s instead of ASCII-data for species=%s, wl_range=%s\nQuery: %s",
|
|
221
|
+
reason,
|
|
222
|
+
species,
|
|
223
|
+
wl_range,
|
|
224
|
+
response.url,
|
|
225
|
+
)
|
|
226
|
+
raise ASDQueryError(
|
|
227
|
+
f"Query for {species=} {wl_range=} did not receive ASCII-data. {reason=} This means the ASD could not interpret your query. Check if your query is malformed."
|
|
228
|
+
)
|
|
229
|
+
return response
|
|
228
230
|
|
|
229
231
|
@property
|
|
230
232
|
def cached_species(self) -> list[str]:
|
|
@@ -239,35 +241,22 @@ class SpectraCache:
|
|
|
239
241
|
for elem in self.species_expr.search(u).group(1).split("%3B")
|
|
240
242
|
]
|
|
241
243
|
|
|
242
|
-
def fetch(self, species, wl_range=(170, 1000)
|
|
244
|
+
def fetch(self, species, wl_range=(170, 1000)) -> "pd.DataFrame|pl.DataFrame":
|
|
243
245
|
"""Fetch information on a species from the ASD, first checking the cache.
|
|
244
246
|
|
|
245
|
-
This supports loading multiple species in one go by using the same notation as the NIST ASD
|
|
247
|
+
This supports loading multiple species in one go by using the same notation as the NIST ASD form.
|
|
246
248
|
|
|
247
249
|
Note however that cache keys are computed for unique options for `species` and `wl_range`.
|
|
248
250
|
|
|
249
251
|
This means that you won't get caching benefits by using different queries.
|
|
250
252
|
|
|
251
|
-
In other words: the cache cannot deduplicate queries such as `ASD.fetch('H', (200,1000))` followed by `ASD.fetch('H I', (650,660))
|
|
253
|
+
In other words: the cache cannot deduplicate queries such as `ASD.fetch('H', (200,1000))` followed by `ASD.fetch('H I', (650,660))` (or vice versa).
|
|
252
254
|
|
|
253
255
|
Both these operations will fetch data online and be stored as separate cache entries.
|
|
254
256
|
"""
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
"output_type": 0,
|
|
258
|
-
"low_w": min(wl_range),
|
|
259
|
-
"upp_w": max(wl_range),
|
|
260
|
-
**self.query_params,
|
|
261
|
-
}
|
|
262
|
-
response = self.session.get(self.nist_url, params=query_params)
|
|
263
|
-
|
|
264
|
-
# if response.status_code == 200:
|
|
265
|
-
response.raise_for_status()
|
|
257
|
+
# TODO: add kwargs for read-only/offline access etc.
|
|
258
|
+
response = self._get_data(species, wl_range)
|
|
266
259
|
return self.create_dataframe(response)
|
|
267
|
-
# else:
|
|
268
|
-
# print(f"Error: Received status code {response.status_code}")
|
|
269
|
-
# print(response.url)
|
|
270
|
-
# return response
|
|
271
260
|
|
|
272
261
|
def create_dataframe(self, response) -> "pd.DataFrame|pl.DataFrame":
|
|
273
262
|
"""Create a dataframe from the (cached) NIST ASD response, using the chosen backend at class instantiation."""
|
|
@@ -276,14 +265,14 @@ class SpectraCache:
|
|
|
276
265
|
return self._from_pandas(response)
|
|
277
266
|
|
|
278
267
|
@classmethod
|
|
279
|
-
def _from_pandas(cls, response:
|
|
268
|
+
def _from_pandas(cls, response: Response) -> "pd.DataFrame":
|
|
280
269
|
r"""Transform a (cached) NIST ASD response into a pandas DataFrame.
|
|
281
270
|
|
|
282
271
|
Calculates the air equivalent wavelength from the vacuum wavelength using the same Sellmeier equation as the NIST ASD.
|
|
283
272
|
|
|
284
273
|
Note that this conversion is only performed for lines with $200 nm < \lambda < 2000 nm$, like the ASD.
|
|
285
274
|
|
|
286
|
-
For lines outside of this range,
|
|
275
|
+
For lines outside of this range, it uses NaN values.
|
|
287
276
|
"""
|
|
288
277
|
schema = {
|
|
289
278
|
"obs_wl_vac(nm)": str,
|
|
@@ -311,38 +300,48 @@ class SpectraCache:
|
|
|
311
300
|
"": str,
|
|
312
301
|
}
|
|
313
302
|
df = pd.read_csv(StringIO(response.text), sep="\t", dtype=schema)
|
|
303
|
+
# Detect if pandas uses new `StringDtype`, or legacy `object` dtype for strings.
|
|
304
|
+
# This affects NaN handling for strings.
|
|
305
|
+
# Pandas 3.0 and up use the StringDtype, while pandas 2 can opt-in to this
|
|
306
|
+
# The 'Type' column should exist, 'element' may not.
|
|
307
|
+
uses_new_string_dtype = pd.api.types.is_string_dtype(df["Type"])
|
|
314
308
|
for col in ["obs_wl_vac(nm)", "ritz_wl_vac(nm)", "intens", "Ei(cm-1)", "Ek(cm-1)"]:
|
|
315
309
|
df[col] = df.loc[:, col].str.extract(SCI_EXPR).astype(float)
|
|
316
|
-
|
|
310
|
+
# Any missing value implies line is an E1 (electric dipole) transition
|
|
311
|
+
if uses_new_string_dtype:
|
|
312
|
+
df["Type"] = df.loc[:, "Type"].fillna("E1")
|
|
313
|
+
else:
|
|
314
|
+
df["Type"] = df.loc[:, "Type"].astype(str).replace("nan", "E1")
|
|
317
315
|
df["tp_ref"] = df.loc[:, "tp_ref"].fillna("")
|
|
318
|
-
df["obs_wl_air(nm)"] =
|
|
319
|
-
|
|
320
|
-
|
|
316
|
+
df["obs_wl_air(nm)"] = np.nan
|
|
317
|
+
air_equiv_range = df["wn(cm-1)"].between(5000, 50000) # range where air wavelength is computed.
|
|
318
|
+
df["obs_wl_air(nm)"] = df.loc[air_equiv_range, "obs_wl_vac(nm)"] / wavenumber_to_refractive_index(
|
|
319
|
+
df.loc[air_equiv_range, "wn(cm-1)"]
|
|
321
320
|
)
|
|
322
|
-
df["ritz_wl_air(nm)"] =
|
|
323
|
-
df["ritz_wl_air(nm)"] = df[
|
|
324
|
-
df[
|
|
321
|
+
df["ritz_wl_air(nm)"] = np.nan
|
|
322
|
+
df["ritz_wl_air(nm)"] = df.loc[air_equiv_range, "ritz_wl_vac(nm)"] / wavenumber_to_refractive_index(
|
|
323
|
+
df.loc[air_equiv_range, "wn(cm-1)"]
|
|
325
324
|
)
|
|
326
325
|
df = df.drop([c for c in df.columns if "Unnamed" in c], axis=1).reset_index(drop=True)
|
|
327
326
|
if "element" not in df.columns:
|
|
328
|
-
element, numeral = re.search(STATE_EXPR, response.url).groups()
|
|
329
|
-
df["element"] = element
|
|
330
|
-
df["sp_num"] = numeral
|
|
331
327
|
# cast roman numerals to int for consistency with queries with multiple ionization states, e.g. Ar I vs Ar I-II
|
|
332
|
-
|
|
328
|
+
# As 'element' and 'sp_num' columns are only missing for single-species queries, assign as constants, not vectors.
|
|
329
|
+
element, numeric = extract_state_from_response(response)
|
|
330
|
+
df["element"] = element
|
|
331
|
+
df["sp_num"] = numeric
|
|
333
332
|
df["unc_obs_wl"] = pd.to_numeric(df["unc_obs_wl"]) if "unc_obs_wl" in df.columns else np.nan
|
|
334
333
|
df["unc_ritz_wl"] = pd.to_numeric(df["unc_ritz_wl"]) if "unc_ritz_wl" in df.columns else np.nan
|
|
335
|
-
return df.loc[:,
|
|
334
|
+
return df.loc[:, list(ASDSchema)]
|
|
336
335
|
|
|
337
336
|
@classmethod
|
|
338
|
-
def _from_polars(cls, response:
|
|
337
|
+
def _from_polars(cls, response: Response) -> "pl.DataFrame":
|
|
339
338
|
r"""Transform a (cached) NIST ASD response into a polars DataFrame.
|
|
340
339
|
|
|
341
340
|
Calculates the air equivalent wavelength from the vacuum wavelength using the same Sellmeier equation as the NIST ASD.
|
|
342
341
|
|
|
343
342
|
Note that this conversion is only performed for lines with $200 nm < \lambda < 2000 nm$, like the ASD.
|
|
344
343
|
|
|
345
|
-
For lines outside of this range,
|
|
344
|
+
For lines outside of this range, it uses NaN values.
|
|
346
345
|
"""
|
|
347
346
|
schema = {
|
|
348
347
|
"obs_wl_vac(nm)": pl.String,
|
|
@@ -366,97 +365,45 @@ class SpectraCache:
|
|
|
366
365
|
"J_k": pl.String,
|
|
367
366
|
"": pl.String,
|
|
368
367
|
}
|
|
369
|
-
|
|
370
|
-
df = (
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
.
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
.then(
|
|
393
|
-
pl.col("obs_wl_vac(nm)").cast(pl.Float64)
|
|
394
|
-
/ pl.col("wn(cm-1)").map_elements(cls.wn_to_n_refractive, return_dtype=pl.Float64)
|
|
395
|
-
)
|
|
396
|
-
.otherwise(pl.col("obs_wl_vac(nm)"))
|
|
397
|
-
.cast(pl.Float64)
|
|
368
|
+
|
|
369
|
+
df = pl.read_csv(
|
|
370
|
+
StringIO(response.text),
|
|
371
|
+
separator="\t",
|
|
372
|
+
schema_overrides=schema,
|
|
373
|
+
null_values="",
|
|
374
|
+
)
|
|
375
|
+
sci_cols = ["obs_wl_vac(nm)", "Ei(cm-1)", "Ek(cm-1)", "intens", "ritz_wl_vac(nm)"]
|
|
376
|
+
cast_to_scientific_notation = [
|
|
377
|
+
pl.col(c).str.extract(SCI_EXPR).replace("", None).cast(pl.Float64).alias(c) for c in sci_cols
|
|
378
|
+
]
|
|
379
|
+
df = df.with_columns(
|
|
380
|
+
*cast_to_scientific_notation,
|
|
381
|
+
pl.col("S(a.u.)").cast(pl.Float64),
|
|
382
|
+
pl.col("Type").replace(None, "E1"),
|
|
383
|
+
pl.col("tp_ref").replace(None, ""),
|
|
384
|
+
).drop([""])
|
|
385
|
+
# compute air wavelengths between 5000 cm-1 and 50000 cm-1
|
|
386
|
+
air_equiv_range = pl.col("wn(cm-1)").is_between(5000, 50000)
|
|
387
|
+
df = df.with_columns(
|
|
388
|
+
pl.when(air_equiv_range)
|
|
389
|
+
.then(pl.col("obs_wl_vac(nm)") / wavenumber_to_refractive_index(pl.col("wn(cm-1)")))
|
|
390
|
+
.otherwise(np.nan)
|
|
398
391
|
.alias("obs_wl_air(nm)"),
|
|
399
|
-
pl.when(
|
|
400
|
-
.then(
|
|
401
|
-
|
|
402
|
-
/ pl.col("wn(cm-1)").map_elements(cls.wn_to_n_refractive, return_dtype=pl.Float64)
|
|
403
|
-
)
|
|
404
|
-
.otherwise(pl.col("ritz_wl_vac(nm)"))
|
|
405
|
-
.cast(pl.Float64)
|
|
392
|
+
pl.when(air_equiv_range)
|
|
393
|
+
.then(pl.col("ritz_wl_vac(nm)") / wavenumber_to_refractive_index(pl.col("wn(cm-1)")))
|
|
394
|
+
.otherwise(np.nan)
|
|
406
395
|
.alias("ritz_wl_air(nm)"),
|
|
407
396
|
)
|
|
408
397
|
if "element" not in df.columns:
|
|
409
|
-
element,
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
)
|
|
419
|
-
df = df.with_columns(
|
|
420
|
-
unc_obs_wl=pl.col("unc_obs_wl") if "unc_obs_wl" in df.columns else None,
|
|
421
|
-
unc_ritz_wl=pl.col("unc_ritz_wl") if "unc_ritz_wl" in df.columns else None,
|
|
422
|
-
).with_columns(pl.col("unc_obs_wl").cast(pl.Float64), pl.col("unc_ritz_wl").cast(pl.Float64))
|
|
423
|
-
|
|
424
|
-
return df.select(*cls.column_order)
|
|
425
|
-
|
|
426
|
-
@staticmethod
|
|
427
|
-
def roman_to_int(roman: str) -> int:
|
|
428
|
-
"""Transform Roman numerals to integers.
|
|
429
|
-
|
|
430
|
-
Does only support numerals including up to `L`.
|
|
431
|
-
"""
|
|
432
|
-
roman_numerals = {"I": 1, "V": 5, "X": 10, "L": 50}
|
|
433
|
-
total = 0
|
|
434
|
-
previous = 0
|
|
435
|
-
for char in reversed(roman):
|
|
436
|
-
current_value = roman_numerals[char]
|
|
437
|
-
if current_value < previous:
|
|
438
|
-
total -= current_value # Subtract if the current value is less than the previous value
|
|
439
|
-
else:
|
|
440
|
-
total += current_value
|
|
441
|
-
previous = current_value
|
|
442
|
-
return total
|
|
443
|
-
|
|
444
|
-
@staticmethod
|
|
445
|
-
def wn_to_n_refractive(wavenumbers: float) -> float:
|
|
446
|
-
r"""Calculate the refractive index $n$ in air for a transition, using the 5-term Sellmeier formula used by NIST.
|
|
447
|
-
|
|
448
|
-
The used Sellmeier formula is the one from E.R. Peck and K. Reeder [J. Opt. Soc. Am. 62, 958 (1972)](http://dx.doi.org/10.1364/JOSA.62.000958).
|
|
449
|
-
|
|
450
|
-
This formula is fitted to data in the range of 185 nm to 1700 nm for air at 15 °C, 101 325 Pa pressure, with 0.033 % CO2.
|
|
451
|
-
|
|
452
|
-
This is the same formula used by the NIST ASD to calculate air wavelengths in the interval of 200 nm to 2000 nm.
|
|
453
|
-
|
|
454
|
-
See also [the ASD documentation on the topic](https://physics.nist.gov/PhysRefData/ASD/Html/lineshelp.html#Conversion%20between%20air%20and%20vacuum%20wavelengths).
|
|
455
|
-
|
|
456
|
-
Using this refractive index, air equivalent wavelengths consistent with the ASD can be calculated, without the need to query them separately.
|
|
457
|
-
"""
|
|
458
|
-
sigma = wavenumbers * 1e-4 # um^-1
|
|
459
|
-
return 1 + 1e-8 * (8060.51 + 2480990 / (132.274 - sigma**2) + 17455.7 / (39.32957 - sigma**2))
|
|
398
|
+
element, numeric = extract_state_from_response(response)
|
|
399
|
+
df = df.with_columns(pl.lit(element).alias("element"), pl.lit(numeric, dtype=pl.Int64).alias("sp_num"))
|
|
400
|
+
# Cast to float, or create column filled with `null` if missing.
|
|
401
|
+
exprs = [
|
|
402
|
+
(pl.col(c) if c in df.columns else pl.lit(None).alias(c)).cast(pl.Float64)
|
|
403
|
+
for c in ["unc_obs_wl", "unc_ritz_wl"]
|
|
404
|
+
]
|
|
405
|
+
df = df.with_columns(exprs)
|
|
406
|
+
return df.select(*ASDSchema)
|
|
460
407
|
|
|
461
408
|
def get_all_cached(self) -> "pd.DataFrame|pl.DataFrame":
|
|
462
409
|
"""Retrieve all cached data into a single dataframe."""
|
|
@@ -508,7 +455,7 @@ class BibCache:
|
|
|
508
455
|
"""
|
|
509
456
|
return self.session.settings.expire_after
|
|
510
457
|
|
|
511
|
-
def set_cache_expiry(self, new: timedelta = None, **kwargs):
|
|
458
|
+
def set_cache_expiry(self, new: Optional[timedelta] = None, **kwargs):
|
|
512
459
|
"""Set the cache expiry to a different interval (default: 1 week).
|
|
513
460
|
|
|
514
461
|
Can be done by either passing in a `timedelta` object, or valid keyword arguments for `timedelta` itself.
|
|
@@ -518,14 +465,14 @@ class BibCache:
|
|
|
518
465
|
self.session.settings.expire_after = new
|
|
519
466
|
|
|
520
467
|
@staticmethod
|
|
521
|
-
def _check_response_success(response:
|
|
468
|
+
def _check_response_success(response: Response) -> bool:
|
|
522
469
|
"""Validate that data has been fetched succesfully.
|
|
523
470
|
|
|
524
471
|
If this check fails, the cache should not update with this response, even when marked as stale.
|
|
525
472
|
"""
|
|
526
473
|
is_success = (response.status_code == 200) & (b"There was a problem" not in response.content)
|
|
527
474
|
if not is_success:
|
|
528
|
-
|
|
475
|
+
logger.warning(f"Request was unsuccesful status:{response.status_code} , url:{response.url}")
|
|
529
476
|
return is_success
|
|
530
477
|
|
|
531
478
|
@classmethod
|
|
@@ -533,17 +480,18 @@ class BibCache:
|
|
|
533
480
|
r"""Parse a reference code from the NIST ASD into the constituent parts that can be used to look up references.
|
|
534
481
|
|
|
535
482
|
Args:
|
|
536
|
-
|
|
483
|
+
reference_code (str): A NIST ASD bibliographic reference string, such as `L13456n3`, or `T6936n`.
|
|
537
484
|
|
|
538
485
|
Returns:
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
486
|
+
db (str): A label for which bibliographic database to target
|
|
487
|
+
ref (str|None): The database ID for the reference to look up
|
|
488
|
+
comment (str): An additional comment included in the reference, can be fetched separately.
|
|
542
489
|
"""
|
|
543
490
|
if reference_code.startswith("n"):
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
491
|
+
return ("T", None, "n")
|
|
492
|
+
matched = cls.reference_expr.match(reference_code)
|
|
493
|
+
if (not reference_code.startswith("LS")) and (matched is not None):
|
|
494
|
+
db, ref, comment = matched.groups()
|
|
547
495
|
comment = comment if "LS" not in reference_code else "LS"
|
|
548
496
|
else:
|
|
549
497
|
db, ref, comment = "T", None, "LS"
|
|
@@ -553,12 +501,12 @@ class BibCache:
|
|
|
553
501
|
"""Look up a reference code for a given element state.
|
|
554
502
|
|
|
555
503
|
Args:
|
|
556
|
-
element (str)
|
|
557
|
-
sp_num (int)
|
|
558
|
-
reference_code (str)
|
|
504
|
+
element (str): The element name, e.g. `H`
|
|
505
|
+
sp_num (int): The ionization state of the element, with 1 corresponding to the atom
|
|
506
|
+
reference_code (str): The bibliographic reference code from the ASD columns `tp_ref` or `line_ref`.
|
|
559
507
|
|
|
560
508
|
Returns:
|
|
561
|
-
bib_data (dict)
|
|
509
|
+
bib_data (dict[str,Any]): A dictionary containing bibliographic metadata for the reference, if available/applicable. Contains a url to look it up.
|
|
562
510
|
"""
|
|
563
511
|
db, ref, comment = self.parse_reference_code(reference_code)
|
|
564
512
|
params = {
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
r"""`ASDCache` is a package to fetch data from the NIST Atomic Spectra Database (ASD), utlizing caching for fast responses.
|
|
2
|
+
|
|
3
|
+
To make the most use out of the cache, `ASDCache` is opinionated in the information it retrieves from the ASD; it always requests the same schema of information and locally computes additional fields.
|
|
4
|
+
|
|
5
|
+
Data is initially fetched from the online published NIST page, using the tab-separated ASCII output format.
|
|
6
|
+
|
|
7
|
+
The benefit of this format is that it is more 'machine readable' than the formatted ASCII of HTML options.
|
|
8
|
+
|
|
9
|
+
This means it requires far less bespoke parsing to get rid of 'human readable' features such as repeated page column headers, or empty lines.
|
|
10
|
+
|
|
11
|
+
## Air wavelength
|
|
12
|
+
To ensure a consistent schema of the retrieved data, lines are always retrieved as a function of wavelength, using `vacuum wavelength`, even between 200 to 2000 nm.
|
|
13
|
+
|
|
14
|
+
Wavenumbers and Ritz wavelength will be included in the response.
|
|
15
|
+
|
|
16
|
+
In the range $5000\ \mathrm{cm}^{-1}<\nu<50000\ \mathrm{cm}^{-1}$ the air equivalent observed and Ritz wavelengths are calculated using the same Sellmeier equation as the NIST ASD (see [here][.utils.wavenumber_to_refractive_index]).
|
|
17
|
+
This is consistent with the approach of the ASD.
|
|
18
|
+
|
|
19
|
+
## Making use of the cache
|
|
20
|
+
|
|
21
|
+
Each response from the NIST page is cached (2 weeks by default) on the local system.
|
|
22
|
+
|
|
23
|
+
This makes it much faster to load the same data, even across different script runs and/or user programs/sessions.
|
|
24
|
+
|
|
25
|
+
As an example: retrieving and parsing the data for all spectra between 200 and 1000 nm can take over 2 minutes without using the cache, but can be as fast as 0.2 seconds using the `polars` backend.
|
|
26
|
+
|
|
27
|
+
In addition, it means that an internet connection is not required after initial data fetching.
|
|
28
|
+
|
|
29
|
+
The cached response is only updated upon succesfull retrieval of a new response of the NIST page.
|
|
30
|
+
|
|
31
|
+
If unable to succesfully fetch new data, we fall back to a 'stale' cached response.
|
|
32
|
+
|
|
33
|
+
The cache can be shared to another system, to give offline/airgapped systems access to the same data.
|
|
34
|
+
|
|
35
|
+
To that end, the file `NIST_ASD_cache.sqlite` in the user's cache directory has to be copied over.
|
|
36
|
+
|
|
37
|
+
### Default cache locations
|
|
38
|
+
|
|
39
|
+
The standard cache directories are as follows:
|
|
40
|
+
|
|
41
|
+
=== "Windows"
|
|
42
|
+
`%USERPROFILE%/AppData/Local`
|
|
43
|
+
=== "Linux"
|
|
44
|
+
`~/.cache/http_cache/`
|
|
45
|
+
=== "MacOS"
|
|
46
|
+
`/Users/user/Library/Caches/http_cache/`
|
|
47
|
+
|
|
48
|
+
### Cache keys and uniqueness
|
|
49
|
+
|
|
50
|
+
Queries to the NIST ASD are hashed by the keys (or parameters) of the requests.
|
|
51
|
+
|
|
52
|
+
This means that any change to either one of these parameters, will result in a new cache entry, even if the returned data is equivalent.
|
|
53
|
+
|
|
54
|
+
In other words: the cache cannot deduplicate queries such as `SpectraCache().fetch('H', (200,1000))` followed by `SpectraCache().fetch('H I', (650,660))` (or vice versa).
|
|
55
|
+
|
|
56
|
+
It is often better (and faster) to fetch a range of data beyond what you need, and then filter down the dataframe you retrieve according to your needs.
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
from .ASDCache import SpectraCache, BibCache
|
|
60
|
+
|
|
61
|
+
__all__ = ["SpectraCache", "BibCache"]
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# file generated by vcs-versioning
|
|
2
|
+
# don't change, don't track in version control
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
__all__ = [
|
|
6
|
+
"__version__",
|
|
7
|
+
"__version_tuple__",
|
|
8
|
+
"version",
|
|
9
|
+
"version_tuple",
|
|
10
|
+
"__commit_id__",
|
|
11
|
+
"commit_id",
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
version: str
|
|
15
|
+
__version__: str
|
|
16
|
+
__version_tuple__: tuple[int | str, ...]
|
|
17
|
+
version_tuple: tuple[int | str, ...]
|
|
18
|
+
commit_id: str | None
|
|
19
|
+
__commit_id__: str | None
|
|
20
|
+
|
|
21
|
+
__version__ = version = '0.2.4'
|
|
22
|
+
__version_tuple__ = version_tuple = (0, 2, 4)
|
|
23
|
+
|
|
24
|
+
__commit_id__ = commit_id = None
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""Module containing small helper utility functions for extracting and processing input from the ASD."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
5
|
+
|
|
6
|
+
if TYPE_CHECKING:
|
|
7
|
+
from requests import Response
|
|
8
|
+
|
|
9
|
+
ROMAN_NUMERALS = {"I": 1, "V": 5, "X": 10, "L": 50, "C": 100, "D": 500, "M": 1000}
|
|
10
|
+
STATE_EXPR = r"spectra=([\w]+)\+?([IVX]+)?"
|
|
11
|
+
"""Regex pattern for extracting (element,charge) tuple for a single-state query, which uses roman numerals."""
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def roman_to_int(roman: str) -> int:
|
|
15
|
+
"""Parse a Roman numeral into an integer.
|
|
16
|
+
|
|
17
|
+
Supports numerals up to "M".
|
|
18
|
+
"""
|
|
19
|
+
roman = roman.upper().strip()
|
|
20
|
+
total = 0
|
|
21
|
+
previous = 0
|
|
22
|
+
for char in reversed(roman):
|
|
23
|
+
current_value = ROMAN_NUMERALS[char]
|
|
24
|
+
if current_value < previous:
|
|
25
|
+
total -= current_value # Subtract if the current value is less than the previous value
|
|
26
|
+
else:
|
|
27
|
+
total += current_value
|
|
28
|
+
previous = current_value
|
|
29
|
+
return total
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def wavenumber_to_refractive_index(wavenumbers: float) -> float:
|
|
33
|
+
r"""Calculate the refractive index $n$ in air for a transition, using the 5-term Sellmeier formula used by NIST.
|
|
34
|
+
|
|
35
|
+
The used Sellmeier formula is the one from E.R. Peck and K. Reeder [J. Opt. Soc. Am. 62, 958 (1972)](http://dx.doi.org/10.1364/JOSA.62.000958).
|
|
36
|
+
|
|
37
|
+
This formula is fitted to data in the range of 185 nm to 1700 nm for air at 15 °C, 101 325 Pa pressure, with 0.033 % CO2.
|
|
38
|
+
|
|
39
|
+
This is the same formula used by the NIST ASD to calculate air wavelengths in the interval of 200 nm to 2000 nm.
|
|
40
|
+
|
|
41
|
+
See also [the ASD documentation on the topic](https://physics.nist.gov/PhysRefData/ASD/Html/lineshelp.html#Conversion%20between%20air%20and%20vacuum%20wavelengths).
|
|
42
|
+
|
|
43
|
+
Using this refractive index, air equivalent wavelengths consistent with the ASD can be calculated, without the need to query them separately.
|
|
44
|
+
"""
|
|
45
|
+
sigma = wavenumbers * 1e-4 # um^-1
|
|
46
|
+
return 1 + 1e-8 * (8060.51 + 2480990 / (132.274 - sigma**2) + 17455.7 / (39.32957 - sigma**2))
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def extract_state_from_response(response: "Response") -> tuple[str, int]:
|
|
50
|
+
"""Extract the element and ionization state from the url of a response.
|
|
51
|
+
|
|
52
|
+
When querying only a single state, e.g. 'H I', this information will not be present as a column in data: the `element` and `sp_num` columns will not be included.
|
|
53
|
+
|
|
54
|
+
This information is parsed from the query url instead, so it can be added.
|
|
55
|
+
|
|
56
|
+
Since the `sp_num` column is of an integer type, the roman numerals in the url are converted to integers.
|
|
57
|
+
"""
|
|
58
|
+
matched = re.search(STATE_EXPR, str(response.url))
|
|
59
|
+
if not matched:
|
|
60
|
+
raise ValueError(
|
|
61
|
+
"URL did not contain a `spectra` parameter satisfying %s; Could not identify element and sp_num",
|
|
62
|
+
STATE_EXPR,
|
|
63
|
+
)
|
|
64
|
+
element, numeral = matched.groups()
|
|
65
|
+
numeric: int = roman_to_int(numeral) if numeral else 1
|
|
66
|
+
return element, numeric
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ASDCache
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.4
|
|
4
4
|
Summary: A Python module to retrieve data from the NIST Atomic Spectra Database (ASD), using caching for fast, efficient data handling
|
|
5
5
|
Project-URL: Documentation, https://antoinetue.github.io/asdcache
|
|
6
6
|
Project-URL: Source, https://github.com/AntoineTUE/asdcache
|
|
@@ -17,28 +17,30 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
17
17
|
Classifier: Programming Language :: Python :: 3.11
|
|
18
18
|
Classifier: Programming Language :: Python :: 3.12
|
|
19
19
|
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
20
21
|
Classifier: Topic :: Scientific/Engineering
|
|
21
22
|
Requires-Python: >=3.9
|
|
22
|
-
Requires-Dist:
|
|
23
|
-
Requires-Dist: numpy
|
|
24
|
-
Requires-Dist: pandas
|
|
25
|
-
Requires-Dist: requests
|
|
26
|
-
Requires-Dist: requests-cache
|
|
23
|
+
Requires-Dist: beautifulsoup4>=4.12
|
|
24
|
+
Requires-Dist: numpy>=1.20.3
|
|
25
|
+
Requires-Dist: pandas>=2.0
|
|
26
|
+
Requires-Dist: requests-cache>=1.2.0
|
|
27
27
|
Provides-Extra: docs
|
|
28
|
-
Requires-Dist:
|
|
29
|
-
Requires-Dist: mkdocs; extra == 'docs'
|
|
28
|
+
Requires-Dist: mkdocs-api-autonav; extra == 'docs'
|
|
30
29
|
Requires-Dist: mkdocs-autorefs; extra == 'docs'
|
|
31
|
-
Requires-Dist: mkdocs-gen-files; extra == 'docs'
|
|
32
30
|
Requires-Dist: mkdocs-git-revision-date-localized-plugin; extra == 'docs'
|
|
33
31
|
Requires-Dist: mkdocs-include-markdown-plugin; extra == 'docs'
|
|
34
|
-
Requires-Dist: mkdocs-jupyter; extra == 'docs'
|
|
35
|
-
Requires-Dist: mkdocs-
|
|
36
|
-
Requires-Dist: mkdocs-material; extra == 'docs'
|
|
32
|
+
Requires-Dist: mkdocs-jupyter>=0.26.3; extra == 'docs'
|
|
33
|
+
Requires-Dist: mkdocs-material==9.7.6; extra == 'docs'
|
|
37
34
|
Requires-Dist: mkdocs-section-index; extra == 'docs'
|
|
38
35
|
Requires-Dist: mkdocstrings; extra == 'docs'
|
|
39
|
-
Requires-Dist: mkdocstrings-python; extra == 'docs'
|
|
36
|
+
Requires-Dist: mkdocstrings-python-xref>=2.1.1; extra == 'docs'
|
|
37
|
+
Requires-Dist: properdocs>=1.6.7; extra == 'docs'
|
|
38
|
+
Requires-Dist: pygments>=2.20.0; extra == 'docs'
|
|
39
|
+
Requires-Dist: ruff>=0.15.13; extra == 'docs'
|
|
40
40
|
Provides-Extra: polars
|
|
41
|
-
Requires-Dist: polars; extra == 'polars'
|
|
41
|
+
Requires-Dist: polars[pandas]; extra == 'polars'
|
|
42
|
+
Provides-Extra: polars-compat
|
|
43
|
+
Requires-Dist: polars[pandas,rtcompat]; extra == 'polars-compat'
|
|
42
44
|
Description-Content-Type: text/markdown
|
|
43
45
|
|
|
44
46
|
# ASDCache
|
|
@@ -51,7 +53,8 @@ Description-Content-Type: text/markdown
|
|
|
51
53
|
[](https://antoinetue.github.io/ASDCache)
|
|
52
54
|
[](https://pypi.python.org/pypi/ASDCache)
|
|
53
55
|
[](https://pypi.python.org/pypi/ASDCache)
|
|
54
|
-
[](https://pypistats.org/packages/asdcache)
|
|
57
|
+
[](https://pepy.tech/projects/asdcache)
|
|
55
58
|
[](https://github.com/astral-sh/ruff)
|
|
56
59
|
[](https://github.com/pypa/hatch)
|
|
57
60
|
|
|
@@ -69,7 +72,7 @@ The main goals and benefits of `ASDCache` are:
|
|
|
69
72
|
- [x] Retrieve a consistent schema of the data that represents the 'human readable' format, but enforce strictly numeric data for important columns
|
|
70
73
|
- [ ] This removes footnotes and other annotations, be sure to check the ASD itself as well for this information.
|
|
71
74
|
- [x] Use caching to dramatically speed up data retrieval, from minutes down to milliseconds in some cases
|
|
72
|
-
- [x] Cache time-to-live is
|
|
75
|
+
- [x] Cache time-to-live is two weeks by default, meaning you still get updates to the ASD in a reasonable time frame
|
|
73
76
|
- [x] The cache time-to-live can be adjusted
|
|
74
77
|
- [x] Cache data to allow working offline, or even transfering the ASD data to an offline system.
|
|
75
78
|
- [x] The cache is only updated when a request for new data succeeds
|
|
@@ -78,6 +81,7 @@ The main goals and benefits of `ASDCache` are:
|
|
|
78
81
|
`ASDCache` is not affiliated with NIST or the NIST ASD in any way, it simply tries to help make it more accessible.
|
|
79
82
|
|
|
80
83
|
## Installing
|
|
84
|
+
|
|
81
85
|
`ASDCache` can be installed with `pip`.
|
|
82
86
|
|
|
83
87
|
```console
|
|
@@ -99,6 +103,7 @@ Installing the `polars` feature is not required, in case `polars` is already ins
|
|
|
99
103
|
Documentation for `ASDCache` is available on [this page](https://antoinetue.github.io/ASDCache).
|
|
100
104
|
|
|
101
105
|
### Example
|
|
106
|
+
|
|
102
107
|
A brief example below demonstrates how to use `SpectraCache` to query the NIST ASD for spectroscopic data for different species and plot their respective relative intensities.
|
|
103
108
|
|
|
104
109
|
Note that these relative intensities are in principle not comparable between different species or sources and merely serve as a guide.
|
|
@@ -24,34 +24,35 @@ classifiers = [
|
|
|
24
24
|
"Programming Language :: Python :: 3.11",
|
|
25
25
|
"Programming Language :: Python :: 3.12",
|
|
26
26
|
"Programming Language :: Python :: 3.13",
|
|
27
|
+
"Programming Language :: Python :: 3.14",
|
|
27
28
|
]
|
|
28
|
-
dependencies = ["
|
|
29
|
+
dependencies = ["requests_cache>=1.2.0", "pandas>=2.0","numpy>=1.20.3", "beautifulsoup4>=4.12"]
|
|
29
30
|
dynamic = ["version"]
|
|
30
31
|
|
|
31
32
|
[project.optional-dependencies]
|
|
32
|
-
polars = ["polars"]
|
|
33
|
+
polars = ["polars[pandas]"]
|
|
34
|
+
polars-compat = ["polars[rtcompat,pandas]"]
|
|
33
35
|
docs = [
|
|
34
|
-
"
|
|
36
|
+
"properdocs>=1.6.7",
|
|
37
|
+
"mkdocs-material==9.7.6",
|
|
35
38
|
"mkdocs-autorefs",
|
|
36
|
-
"mkdocs-gen-files",
|
|
39
|
+
# "mkdocs-gen-files",
|
|
37
40
|
"mkdocs-git-revision-date-localized-plugin",
|
|
38
41
|
"mkdocs-include-markdown-plugin",
|
|
39
|
-
"mkdocs-jupyter",
|
|
40
|
-
"mkdocs-literate-nav",
|
|
41
|
-
"mkdocs-material",
|
|
42
|
+
"mkdocs-jupyter>=0.26.3",
|
|
43
|
+
# "mkdocs-literate-nav",
|
|
42
44
|
"mkdocs-section-index",
|
|
43
45
|
"mkdocstrings",
|
|
44
|
-
"mkdocstrings-python",
|
|
45
|
-
"
|
|
46
|
+
"mkdocstrings-python-xref>=2.1.1",
|
|
47
|
+
"mkdocs-api-autonav",
|
|
48
|
+
"ruff>=0.15.13",
|
|
49
|
+
"pygments>=2.20.0"
|
|
46
50
|
]
|
|
47
51
|
|
|
48
52
|
[project.urls]
|
|
49
53
|
Documentation = "https://antoinetue.github.io/asdcache"
|
|
50
54
|
Source = "https://github.com/AntoineTUE/asdcache"
|
|
51
55
|
|
|
52
|
-
[tool.hatch.metadata]
|
|
53
|
-
# direct dependency references, e.g `pip @ git+https://github.com/pypa/pip.git@master`
|
|
54
|
-
allow-direct-references = true
|
|
55
56
|
|
|
56
57
|
[tool.hatch.version]
|
|
57
58
|
source = "vcs"
|
|
@@ -70,7 +71,7 @@ exclude = ["/.github"]
|
|
|
70
71
|
minversion = "6.0"
|
|
71
72
|
addopts = "-ra -q --doctest-glob='*.md'"
|
|
72
73
|
testpaths = ["tests"]
|
|
73
|
-
markers = ["
|
|
74
|
+
markers = ["online: run test that retrieve data online from the ASD"]
|
|
74
75
|
|
|
75
76
|
[tool.coverage.run]
|
|
76
77
|
branch = true
|
|
@@ -110,7 +111,7 @@ extend-exclude = ["docs/assets/scripts/gen_ref_pages.py"]
|
|
|
110
111
|
|
|
111
112
|
[tool.ruff.lint]
|
|
112
113
|
select = ["E4", "E7", "E9", "F","C4", "SIM", "NPY", "PD","B","UP","D"]
|
|
113
|
-
ignore = ["
|
|
114
|
+
ignore = ["F401"]
|
|
114
115
|
|
|
115
116
|
[tool.ruff.lint.pydocstyle]
|
|
116
117
|
convention = "pep257"
|
|
@@ -138,16 +139,6 @@ dependencies = ["matplotlib", "ipython","ipykernel","pre-commit"]
|
|
|
138
139
|
installer = "uv"
|
|
139
140
|
features = ["polars"]
|
|
140
141
|
|
|
141
|
-
[tool.hatch.envs.test]
|
|
142
|
-
dependencies = [
|
|
143
|
-
"coverage[toml]>=6.2",
|
|
144
|
-
"pytest",
|
|
145
|
-
"pytest-cov",
|
|
146
|
-
"pytest-mock",
|
|
147
|
-
"pytest-recording",
|
|
148
|
-
"pytest-sugar",
|
|
149
|
-
"hypothesis",
|
|
150
|
-
]
|
|
151
142
|
|
|
152
143
|
[tool.hatch.envs.hatch-test]
|
|
153
144
|
randomize = false
|
|
@@ -155,6 +146,22 @@ parallel = false # avoid cache access conflicts
|
|
|
155
146
|
retries = 2
|
|
156
147
|
retry-delay = 1
|
|
157
148
|
features = ["polars"]
|
|
149
|
+
dependencies = [
|
|
150
|
+
"coverage-enable-subprocess==1.0",
|
|
151
|
+
'coverage[toml]>=6.2,<7.11; python_version<"3.10"',
|
|
152
|
+
'coverage[toml]~=7.11; python_version>="3.10"',
|
|
153
|
+
'pytest~=8.4; python_version<"3.10"',
|
|
154
|
+
'pytest~=9.0; python_version>="3.10"',
|
|
155
|
+
"pytest-mock~=3.12",
|
|
156
|
+
"pytest-randomly~=3.15",
|
|
157
|
+
"pytest-rerunfailures~=14.0",
|
|
158
|
+
"pytest-xdist[psutil]~=3.5",
|
|
159
|
+
'pytest-cov~=7.1.0; python_version>="3.10"',
|
|
160
|
+
"pytest-recording",
|
|
161
|
+
"pytest-sugar~=1.1.1",
|
|
162
|
+
"hypothesis",
|
|
163
|
+
]
|
|
164
|
+
|
|
158
165
|
|
|
159
166
|
[tool.hatch.envs.docs]
|
|
160
167
|
skip-install = true
|
|
@@ -162,13 +169,13 @@ features = ["docs"]
|
|
|
162
169
|
dependencies = ["mike"]
|
|
163
170
|
|
|
164
171
|
[tool.hatch.envs.docs.scripts]
|
|
165
|
-
serve = "
|
|
166
|
-
build = "
|
|
167
|
-
ci-build = "mike deploy --config-file mkdocs.yml
|
|
172
|
+
serve = "properdocs serve -f mkdocs.yml {args}"
|
|
173
|
+
build = "properdocs build --clean -f mkdocs.yml {args}"
|
|
174
|
+
ci-build = "mike deploy --config-file mkdocs.yml {args}"
|
|
168
175
|
|
|
169
176
|
[tool.hatch.envs.lint]
|
|
170
177
|
template = "lint"
|
|
171
|
-
dependencies = ["ruff>=0.
|
|
178
|
+
dependencies = ["ruff>=0.15.13"]
|
|
172
179
|
|
|
173
180
|
[tool.hatch.envs.lint.scripts]
|
|
174
181
|
style = [
|
|
@@ -182,5 +189,35 @@ fix = [
|
|
|
182
189
|
"style", # feedback on what is not fixable
|
|
183
190
|
]
|
|
184
191
|
|
|
192
|
+
|
|
193
|
+
[tool.hatch.envs.hatch-test.overrides]
|
|
194
|
+
matrix.pandas.dependencies = [
|
|
195
|
+
{ value = "pandas>=2.0", if = ["pandas-2.x"] },
|
|
196
|
+
{ value = "pandas>=3.0", if = ["pandas-3.x"] },
|
|
197
|
+
]
|
|
198
|
+
matrix.numpy.dependencies = [
|
|
199
|
+
{ value = "numpy>=2.0", if = ["numpy-2.x"] },
|
|
200
|
+
{value = "numpy<2.0", if = ["numpy-legacy"]},
|
|
201
|
+
]
|
|
202
|
+
matrix.polars.features = [
|
|
203
|
+
{ value = "polars", if = ["polars"]},
|
|
204
|
+
{ value = "polars-compat", if = ["polars-compat"]},
|
|
205
|
+
]
|
|
206
|
+
|
|
207
|
+
[[tool.hatch.envs.hatch-test.matrix]]
|
|
208
|
+
python = ["3.9","3.10"]
|
|
209
|
+
numpy = ["numpy-2.x","numpy-legacy"]
|
|
210
|
+
pandas = ["pandas-2.x"]
|
|
211
|
+
polars = ["polars","polars-compat"]
|
|
212
|
+
|
|
213
|
+
[[tool.hatch.envs.hatch-test.matrix]]
|
|
214
|
+
python = ["3.11","3.12","3.13", "3.14"]
|
|
215
|
+
numpy = ["numpy-2.x","numpy-legacy"]
|
|
216
|
+
pandas = ["pandas-2.x","pandas-3.x"]
|
|
217
|
+
polars = ["polars","polars-compat"]
|
|
218
|
+
|
|
185
219
|
[[tool.hatch.envs.hatch-test.matrix]]
|
|
186
|
-
python = ["3.
|
|
220
|
+
python = ["3.13", "3.14"]
|
|
221
|
+
numpy = ["numpy-2.x"]
|
|
222
|
+
pandas = ["pandas-2.x","pandas-3.x"]
|
|
223
|
+
polars = ["polars","polars-compat"]
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
"""ASDCache is a module to retrieve data from the NIST Atomic Spectra Database that uses caching for fast local access.
|
|
2
|
-
|
|
3
|
-
To make the most use out of the cache, `ASDCache` is opinionated in the information it retrieves from the ASD; it always requests the same schema of information and locally computes additional fields.
|
|
4
|
-
|
|
5
|
-
The `SpectraCache` class acts as the entrypoint to retrieve this data.
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
from .ASDCache import SpectraCache, BibCache
|
|
9
|
-
|
|
10
|
-
__all__ = ["SpectraCache", "BibCache"]
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
# file generated by setuptools-scm
|
|
2
|
-
# don't change, don't track in version control
|
|
3
|
-
|
|
4
|
-
__all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
|
|
5
|
-
|
|
6
|
-
TYPE_CHECKING = False
|
|
7
|
-
if TYPE_CHECKING:
|
|
8
|
-
from typing import Tuple
|
|
9
|
-
from typing import Union
|
|
10
|
-
|
|
11
|
-
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
12
|
-
else:
|
|
13
|
-
VERSION_TUPLE = object
|
|
14
|
-
|
|
15
|
-
version: str
|
|
16
|
-
__version__: str
|
|
17
|
-
__version_tuple__: VERSION_TUPLE
|
|
18
|
-
version_tuple: VERSION_TUPLE
|
|
19
|
-
|
|
20
|
-
__version__ = version = '0.2.2'
|
|
21
|
-
__version_tuple__ = version_tuple = (0, 2, 2)
|
|
File without changes
|
|
File without changes
|