ASDCache 0.2.0__tar.gz → 0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {asdcache-0.2.0 → asdcache-0.2.2}/ASDCache/ASDCache.py +102 -23
- {asdcache-0.2.0 → asdcache-0.2.2}/ASDCache/_version.py +9 -4
- {asdcache-0.2.0 → asdcache-0.2.2}/PKG-INFO +2 -1
- {asdcache-0.2.0 → asdcache-0.2.2}/pyproject.toml +4 -3
- {asdcache-0.2.0 → asdcache-0.2.2}/.gitignore +0 -0
- {asdcache-0.2.0 → asdcache-0.2.2}/ASDCache/__init__.py +0 -0
- {asdcache-0.2.0 → asdcache-0.2.2}/LICENSE +0 -0
|
@@ -61,6 +61,42 @@ logging.basicConfig(
|
|
|
61
61
|
stream=sys.stdout,
|
|
62
62
|
)
|
|
63
63
|
|
|
64
|
+
ASDSchema = {
|
|
65
|
+
"element": str,
|
|
66
|
+
"sp_num": int,
|
|
67
|
+
"obs_wl_vac(nm)": float,
|
|
68
|
+
"unc_obs_wl": float,
|
|
69
|
+
"obs_wl_air(nm)": float,
|
|
70
|
+
"ritz_wl_vac(nm)": float,
|
|
71
|
+
"unc_ritz_wl": float,
|
|
72
|
+
"ritz_wl_air(nm)": float,
|
|
73
|
+
"wn(cm-1)": float,
|
|
74
|
+
"intens": float,
|
|
75
|
+
"Aki(s^-1)": float,
|
|
76
|
+
"fik": float,
|
|
77
|
+
"S(a.u.)": float,
|
|
78
|
+
"log_gf": float,
|
|
79
|
+
"Acc": str,
|
|
80
|
+
"Ei(cm-1)": float,
|
|
81
|
+
"Ek(cm-1)": float,
|
|
82
|
+
"conf_i": str,
|
|
83
|
+
"term_i": str,
|
|
84
|
+
"J_i": str,
|
|
85
|
+
"conf_k": str,
|
|
86
|
+
"term_k": str,
|
|
87
|
+
"J_k": str,
|
|
88
|
+
"g_i": float,
|
|
89
|
+
"g_k": float,
|
|
90
|
+
"Type": str,
|
|
91
|
+
"tp_ref": str,
|
|
92
|
+
"line_ref": str,
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
STATE_EXPR = r"spectra=([\w]+)\+?([IVX]+)?"
|
|
96
|
+
"""Regex pattern for extracting (element,charge) tuple for a single-state query, which uses roman numerals."""
|
|
97
|
+
SCI_EXPR = r"([+-]?\d*\.?\d+(?:[eE][+-]?\d+)?)"
|
|
98
|
+
"""Regex pattern for processing scientific notation"""
|
|
99
|
+
|
|
64
100
|
|
|
65
101
|
class SpectraCache:
|
|
66
102
|
"""A class acting as the entrypoint to retrieve data from the NIST Atomic Spectra Database that uses caching.
|
|
@@ -148,7 +184,6 @@ class SpectraCache:
|
|
|
148
184
|
def __init__(self, use_polars_backend=False, cache_expiry=timedelta(weeks=1), strict_matching=True):
|
|
149
185
|
"""Initialize an instance that handles cached data lookup of the NIST ASD."""
|
|
150
186
|
self.strict_matching = strict_matching
|
|
151
|
-
self.cache_expiry = cache_expiry
|
|
152
187
|
self.session = CachedSession(
|
|
153
188
|
"NIST_ASD_cache",
|
|
154
189
|
use_cache_dir=True,
|
|
@@ -165,6 +200,24 @@ class SpectraCache:
|
|
|
165
200
|
|
|
166
201
|
self.known_species = self.list_cached_species()
|
|
167
202
|
|
|
203
|
+
@property
|
|
204
|
+
def cache_expiry(self) -> timedelta:
|
|
205
|
+
"""The cache expiry time.
|
|
206
|
+
|
|
207
|
+
Queries that are older than this time are considered stale and marked for updating, by quering the NIST ASD.
|
|
208
|
+
In case the query for new data fails, the stale, cached response will still be parsed.
|
|
209
|
+
"""
|
|
210
|
+
return self.session.settings.expire_after
|
|
211
|
+
|
|
212
|
+
def set_cache_expiry(self, new: timedelta = None, **kwargs):
|
|
213
|
+
"""Set the cache expiry to a different interval (default: 1 week).
|
|
214
|
+
|
|
215
|
+
Can be done by either passing in a `timedelta` object, or valid keyword arguments for `timedelta` itself.
|
|
216
|
+
"""
|
|
217
|
+
if new is None:
|
|
218
|
+
new = timedelta(**kwargs)
|
|
219
|
+
self.session.settings.expire_after = new
|
|
220
|
+
|
|
168
221
|
@staticmethod
|
|
169
222
|
def _check_response_success(response: "CachedResponse") -> bool:
|
|
170
223
|
"""Validate that data has been fetched succesfully.
|
|
@@ -173,6 +226,11 @@ class SpectraCache:
|
|
|
173
226
|
"""
|
|
174
227
|
return (response.status_code == 200) & (b"Error Message" not in response.content)
|
|
175
228
|
|
|
229
|
+
@property
|
|
230
|
+
def cached_species(self) -> list[str]:
|
|
231
|
+
"""A list of all cached species."""
|
|
232
|
+
return self.list_cached_species()
|
|
233
|
+
|
|
176
234
|
def list_cached_species(self) -> list[str]:
|
|
177
235
|
"""List all species in the cache, based on the string of the original query URL."""
|
|
178
236
|
return [
|
|
@@ -247,11 +305,14 @@ class SpectraCache:
|
|
|
247
305
|
"g_k": float,
|
|
248
306
|
"J_i": str,
|
|
249
307
|
"J_k": str,
|
|
308
|
+
"Type": str,
|
|
309
|
+
"tp_ref": str,
|
|
310
|
+
"line_ref": str,
|
|
250
311
|
"": str,
|
|
251
312
|
}
|
|
252
313
|
df = pd.read_csv(StringIO(response.text), sep="\t", dtype=schema)
|
|
253
314
|
for col in ["obs_wl_vac(nm)", "ritz_wl_vac(nm)", "intens", "Ei(cm-1)", "Ek(cm-1)"]:
|
|
254
|
-
df[col] = df.loc[:, col].str.extract(
|
|
315
|
+
df[col] = df.loc[:, col].str.extract(SCI_EXPR).astype(float)
|
|
255
316
|
df["Type"] = df.loc[:, "Type"].astype(str).replace("nan", "E1")
|
|
256
317
|
df["tp_ref"] = df.loc[:, "tp_ref"].fillna("")
|
|
257
318
|
df["obs_wl_air(nm)"] = df["obs_wl_vac(nm)"]
|
|
@@ -264,17 +325,13 @@ class SpectraCache:
|
|
|
264
325
|
)
|
|
265
326
|
df = df.drop([c for c in df.columns if "Unnamed" in c], axis=1).reset_index(drop=True)
|
|
266
327
|
if "element" not in df.columns:
|
|
267
|
-
|
|
268
|
-
element, numeral = expr.search(response.url).groups()
|
|
328
|
+
element, numeral = re.search(STATE_EXPR, response.url).groups()
|
|
269
329
|
df["element"] = element
|
|
270
330
|
df["sp_num"] = numeral
|
|
271
331
|
# cast roman numerals to int for consistency with queries with multiple ionization states, e.g. Ar I vs Ar I-II
|
|
272
332
|
df["sp_num"] = df["sp_num"].map(cls.roman_to_int)
|
|
273
|
-
df = (
|
|
274
|
-
|
|
275
|
-
if "unc_obs_wl" in df.columns
|
|
276
|
-
else df.assign(unc_obs_wl=np.nan, unc_ritz_wl=np.nan)
|
|
277
|
-
)
|
|
333
|
+
df["unc_obs_wl"] = pd.to_numeric(df["unc_obs_wl"]) if "unc_obs_wl" in df.columns else np.nan
|
|
334
|
+
df["unc_ritz_wl"] = pd.to_numeric(df["unc_ritz_wl"]) if "unc_ritz_wl" in df.columns else np.nan
|
|
278
335
|
return df.loc[:, cls.column_order]
|
|
279
336
|
|
|
280
337
|
@classmethod
|
|
@@ -320,7 +377,7 @@ class SpectraCache:
|
|
|
320
377
|
.with_columns(
|
|
321
378
|
pl.col("obs_wl_vac(nm)", "Ei(cm-1)", "Ek(cm-1)", "intens")
|
|
322
379
|
# .str.strip_chars(annotation_chars_to_strip).str.replace("†", "", literal=True)
|
|
323
|
-
.str.extract(
|
|
380
|
+
.str.extract(SCI_EXPR)
|
|
324
381
|
# .str.extract(r"([+-]?\d*\.?\d+e[+-]?\d+)")
|
|
325
382
|
.replace("", None)
|
|
326
383
|
.cast(pl.Float64),
|
|
@@ -349,23 +406,20 @@ class SpectraCache:
|
|
|
349
406
|
.alias("ritz_wl_air(nm)"),
|
|
350
407
|
)
|
|
351
408
|
if "element" not in df.columns:
|
|
352
|
-
|
|
353
|
-
element, numeral = expr.search(response.url).groups()
|
|
409
|
+
element, numeral = re.search(STATE_EXPR, response.url).groups()
|
|
354
410
|
# cast roman numerals to int for consistency with queries with multiple ionization states, e.g. Ar I vs Ar I-II
|
|
355
411
|
df = df.with_columns(
|
|
356
412
|
pl.lit(element).alias("element"),
|
|
357
413
|
pl.lit("I" if numeral is None else numeral)
|
|
358
414
|
.cast(pl.String)
|
|
359
415
|
.alias("sp_num")
|
|
360
|
-
.map_elements(cls.roman_to_int, return_dtype=pl.Int64)
|
|
416
|
+
.map_elements(cls.roman_to_int, return_dtype=pl.Int64)
|
|
417
|
+
.first(),
|
|
361
418
|
)
|
|
362
|
-
df = (
|
|
363
|
-
|
|
364
|
-
if "
|
|
365
|
-
|
|
366
|
-
pl.lit(None).cast(pl.Float64).alias("unc_obs_wl"), pl.lit(None).cast(pl.Float64).alias("unc_ritz_wl")
|
|
367
|
-
)
|
|
368
|
-
)
|
|
419
|
+
df = df.with_columns(
|
|
420
|
+
unc_obs_wl=pl.col("unc_obs_wl") if "unc_obs_wl" in df.columns else None,
|
|
421
|
+
unc_ritz_wl=pl.col("unc_ritz_wl") if "unc_ritz_wl" in df.columns else None,
|
|
422
|
+
).with_columns(pl.col("unc_obs_wl").cast(pl.Float64), pl.col("unc_ritz_wl").cast(pl.Float64))
|
|
369
423
|
|
|
370
424
|
return df.select(*cls.column_order)
|
|
371
425
|
|
|
@@ -408,8 +462,16 @@ class SpectraCache:
|
|
|
408
462
|
"""Retrieve all cached data into a single dataframe."""
|
|
409
463
|
cached_frames = [self.create_dataframe(cached) for cached in self.session.cache.filter()]
|
|
410
464
|
if self.use_polars:
|
|
411
|
-
return
|
|
412
|
-
|
|
465
|
+
return (
|
|
466
|
+
pl.concat(cached_frames).unique()
|
|
467
|
+
if len(cached_frames) > 0
|
|
468
|
+
else pl.DataFrame({k: [] for k in ASDSchema}, schema=ASDSchema)
|
|
469
|
+
)
|
|
470
|
+
return (
|
|
471
|
+
pd.concat(cached_frames).drop_duplicates().reset_index(drop=True)
|
|
472
|
+
if len(cached_frames) > 0
|
|
473
|
+
else pd.DataFrame({k: pd.Series(dtype=v) for k, v in ASDSchema.items()})
|
|
474
|
+
)
|
|
413
475
|
|
|
414
476
|
|
|
415
477
|
class BibCache:
|
|
@@ -428,7 +490,6 @@ class BibCache:
|
|
|
428
490
|
|
|
429
491
|
def __init__(self, cache_expiry=timedelta(weeks=1)):
|
|
430
492
|
"""Initialize an instance that handles cached retrieval of ASD bibliographic references."""
|
|
431
|
-
self.cache_expiry = cache_expiry
|
|
432
493
|
self.session = CachedSession(
|
|
433
494
|
"NIST_ASD_Bibliography_cache",
|
|
434
495
|
use_cache_dir=True,
|
|
@@ -438,6 +499,24 @@ class BibCache:
|
|
|
438
499
|
ignored_parameters=["element", "spectr_charge", "type", "ref"],
|
|
439
500
|
)
|
|
440
501
|
|
|
502
|
+
@property
|
|
503
|
+
def cache_expiry(self) -> timedelta:
|
|
504
|
+
"""The cache expiry time.
|
|
505
|
+
|
|
506
|
+
Queries that are older than this time are considered stale and marked for updating, by quering the NIST ASD.
|
|
507
|
+
In case the query for new data fails, the stale, cached response will still be parsed.
|
|
508
|
+
"""
|
|
509
|
+
return self.session.settings.expire_after
|
|
510
|
+
|
|
511
|
+
def set_cache_expiry(self, new: timedelta = None, **kwargs):
|
|
512
|
+
"""Set the cache expiry to a different interval (default: 1 week).
|
|
513
|
+
|
|
514
|
+
Can be done by either passing in a `timedelta` object, or valid keyword arguments for `timedelta` itself.
|
|
515
|
+
"""
|
|
516
|
+
if new is None:
|
|
517
|
+
new = timedelta(**kwargs)
|
|
518
|
+
self.session.settings.expire_after = new
|
|
519
|
+
|
|
441
520
|
@staticmethod
|
|
442
521
|
def _check_response_success(response: "CachedResponse") -> bool:
|
|
443
522
|
"""Validate that data has been fetched succesfully.
|
|
@@ -1,8 +1,13 @@
|
|
|
1
|
-
# file generated by
|
|
1
|
+
# file generated by setuptools-scm
|
|
2
2
|
# don't change, don't track in version control
|
|
3
|
+
|
|
4
|
+
__all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
|
|
5
|
+
|
|
3
6
|
TYPE_CHECKING = False
|
|
4
7
|
if TYPE_CHECKING:
|
|
5
|
-
from typing import Tuple
|
|
8
|
+
from typing import Tuple
|
|
9
|
+
from typing import Union
|
|
10
|
+
|
|
6
11
|
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
7
12
|
else:
|
|
8
13
|
VERSION_TUPLE = object
|
|
@@ -12,5 +17,5 @@ __version__: str
|
|
|
12
17
|
__version_tuple__: VERSION_TUPLE
|
|
13
18
|
version_tuple: VERSION_TUPLE
|
|
14
19
|
|
|
15
|
-
__version__ = version = '0.2.
|
|
16
|
-
__version_tuple__ = version_tuple = (0, 2,
|
|
20
|
+
__version__ = version = '0.2.2'
|
|
21
|
+
__version_tuple__ = version_tuple = (0, 2, 2)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ASDCache
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.2
|
|
4
4
|
Summary: A Python module to retrieve data from the NIST Atomic Spectra Database (ASD), using caching for fast, efficient data handling
|
|
5
5
|
Project-URL: Documentation, https://antoinetue.github.io/asdcache
|
|
6
6
|
Project-URL: Source, https://github.com/AntoineTUE/asdcache
|
|
@@ -16,6 +16,7 @@ Classifier: Programming Language :: Python :: 3.9
|
|
|
16
16
|
Classifier: Programming Language :: Python :: 3.10
|
|
17
17
|
Classifier: Programming Language :: Python :: 3.11
|
|
18
18
|
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
20
|
Classifier: Topic :: Scientific/Engineering
|
|
20
21
|
Requires-Python: >=3.9
|
|
21
22
|
Requires-Dist: bs4
|
|
@@ -23,6 +23,7 @@ classifiers = [
|
|
|
23
23
|
"Programming Language :: Python :: 3.10",
|
|
24
24
|
"Programming Language :: Python :: 3.11",
|
|
25
25
|
"Programming Language :: Python :: 3.12",
|
|
26
|
+
"Programming Language :: Python :: 3.13",
|
|
26
27
|
]
|
|
27
28
|
dependencies = ["requests","requests_cache", "pandas","numpy", "bs4"]
|
|
28
29
|
dynamic = ["version"]
|
|
@@ -131,9 +132,9 @@ fragments = [
|
|
|
131
132
|
cache-keys = [{ git = true }]
|
|
132
133
|
|
|
133
134
|
[tool.hatch.envs.default]
|
|
134
|
-
python = "3.
|
|
135
|
+
python = "3.12"
|
|
135
136
|
post-install-commands = ["pre-commit install"]
|
|
136
|
-
dependencies = ["matplotlib", "ipython","ipykernel","
|
|
137
|
+
dependencies = ["matplotlib", "ipython","ipykernel","pre-commit"]
|
|
137
138
|
installer = "uv"
|
|
138
139
|
features = ["polars"]
|
|
139
140
|
|
|
@@ -152,7 +153,7 @@ dependencies = [
|
|
|
152
153
|
randomize = false
|
|
153
154
|
parallel = false # avoid cache access conflicts
|
|
154
155
|
retries = 2
|
|
155
|
-
retry-delay =
|
|
156
|
+
retry-delay = 1
|
|
156
157
|
features = ["polars"]
|
|
157
158
|
|
|
158
159
|
[tool.hatch.envs.docs]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|