ASDCache 0.2.0__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -61,6 +61,42 @@ logging.basicConfig(
61
61
  stream=sys.stdout,
62
62
  )
63
63
 
64
+ ASDSchema = {
65
+ "element": str,
66
+ "sp_num": int,
67
+ "obs_wl_vac(nm)": float,
68
+ "unc_obs_wl": float,
69
+ "obs_wl_air(nm)": float,
70
+ "ritz_wl_vac(nm)": float,
71
+ "unc_ritz_wl": float,
72
+ "ritz_wl_air(nm)": float,
73
+ "wn(cm-1)": float,
74
+ "intens": float,
75
+ "Aki(s^-1)": float,
76
+ "fik": float,
77
+ "S(a.u.)": float,
78
+ "log_gf": float,
79
+ "Acc": str,
80
+ "Ei(cm-1)": float,
81
+ "Ek(cm-1)": float,
82
+ "conf_i": str,
83
+ "term_i": str,
84
+ "J_i": str,
85
+ "conf_k": str,
86
+ "term_k": str,
87
+ "J_k": str,
88
+ "g_i": float,
89
+ "g_k": float,
90
+ "Type": str,
91
+ "tp_ref": str,
92
+ "line_ref": str,
93
+ }
94
+
95
+ STATE_EXPR = r"spectra=([\w]+)\+?([IVX]+)?"
96
+ """Regex pattern for extracting (element,charge) tuple for a single-state query, which uses roman numerals."""
97
+ SCI_EXPR = r"([+-]?\d*\.?\d+(?:[eE][+-]?\d+)?)"
98
+ """Regex pattern for processing scientific notation"""
99
+
64
100
 
65
101
  class SpectraCache:
66
102
  """A class acting as the entrypoint to retrieve data from the NIST Atomic Spectra Database that uses caching.
@@ -148,7 +184,6 @@ class SpectraCache:
148
184
  def __init__(self, use_polars_backend=False, cache_expiry=timedelta(weeks=1), strict_matching=True):
149
185
  """Initialize an instance that handles cached data lookup of the NIST ASD."""
150
186
  self.strict_matching = strict_matching
151
- self.cache_expiry = cache_expiry
152
187
  self.session = CachedSession(
153
188
  "NIST_ASD_cache",
154
189
  use_cache_dir=True,
@@ -165,6 +200,24 @@ class SpectraCache:
165
200
 
166
201
  self.known_species = self.list_cached_species()
167
202
 
203
+ @property
204
+ def cache_expiry(self) -> timedelta:
205
+ """The cache expiry time.
206
+
207
+ Queries that are older than this time are considered stale and marked for updating, by quering the NIST ASD.
208
+ In case the query for new data fails, the stale, cached response will still be parsed.
209
+ """
210
+ return self.session.settings.expire_after
211
+
212
+ def set_cache_expiry(self, new: timedelta = None, **kwargs):
213
+ """Set the cache expiry to a different interval (default: 1 week).
214
+
215
+ Can be done by either passing in a `timedelta` object, or valid keyword arguments for `timedelta` itself.
216
+ """
217
+ if new is None:
218
+ new = timedelta(**kwargs)
219
+ self.session.settings.expire_after = new
220
+
168
221
  @staticmethod
169
222
  def _check_response_success(response: "CachedResponse") -> bool:
170
223
  """Validate that data has been fetched succesfully.
@@ -173,6 +226,11 @@ class SpectraCache:
173
226
  """
174
227
  return (response.status_code == 200) & (b"Error Message" not in response.content)
175
228
 
229
+ @property
230
+ def cached_species(self) -> list[str]:
231
+ """A list of all cached species."""
232
+ return self.list_cached_species()
233
+
176
234
  def list_cached_species(self) -> list[str]:
177
235
  """List all species in the cache, based on the string of the original query URL."""
178
236
  return [
@@ -247,11 +305,14 @@ class SpectraCache:
247
305
  "g_k": float,
248
306
  "J_i": str,
249
307
  "J_k": str,
308
+ "Type": str,
309
+ "tp_ref": str,
310
+ "line_ref": str,
250
311
  "": str,
251
312
  }
252
313
  df = pd.read_csv(StringIO(response.text), sep="\t", dtype=schema)
253
314
  for col in ["obs_wl_vac(nm)", "ritz_wl_vac(nm)", "intens", "Ei(cm-1)", "Ek(cm-1)"]:
254
- df[col] = df.loc[:, col].str.extract(r"([+-]?\d*\.?\d+(?:[eE][+-]?\d+)?)").astype(float)
315
+ df[col] = df.loc[:, col].str.extract(SCI_EXPR).astype(float)
255
316
  df["Type"] = df.loc[:, "Type"].astype(str).replace("nan", "E1")
256
317
  df["tp_ref"] = df.loc[:, "tp_ref"].fillna("")
257
318
  df["obs_wl_air(nm)"] = df["obs_wl_vac(nm)"]
@@ -264,17 +325,13 @@ class SpectraCache:
264
325
  )
265
326
  df = df.drop([c for c in df.columns if "Unnamed" in c], axis=1).reset_index(drop=True)
266
327
  if "element" not in df.columns:
267
- expr = re.compile(r"spectra=([\w]+)\+?([IVX]+)?")
268
- element, numeral = expr.search(response.url).groups()
328
+ element, numeral = re.search(STATE_EXPR, response.url).groups()
269
329
  df["element"] = element
270
330
  df["sp_num"] = numeral
271
331
  # cast roman numerals to int for consistency with queries with multiple ionization states, e.g. Ar I vs Ar I-II
272
332
  df["sp_num"] = df["sp_num"].map(cls.roman_to_int)
273
- df = (
274
- df.assign(unc_obs_wl=df["unc_obs_wl"].astype(float), unc_ritz_wl=df["unc_ritz_wl"].astype(float))
275
- if "unc_obs_wl" in df.columns
276
- else df.assign(unc_obs_wl=np.nan, unc_ritz_wl=np.nan)
277
- )
333
+ df["unc_obs_wl"] = pd.to_numeric(df["unc_obs_wl"]) if "unc_obs_wl" in df.columns else np.nan
334
+ df["unc_ritz_wl"] = pd.to_numeric(df["unc_ritz_wl"]) if "unc_ritz_wl" in df.columns else np.nan
278
335
  return df.loc[:, cls.column_order]
279
336
 
280
337
  @classmethod
@@ -320,7 +377,7 @@ class SpectraCache:
320
377
  .with_columns(
321
378
  pl.col("obs_wl_vac(nm)", "Ei(cm-1)", "Ek(cm-1)", "intens")
322
379
  # .str.strip_chars(annotation_chars_to_strip).str.replace("†", "", literal=True)
323
- .str.extract(r"([+-]?\d*\.?\d+(?:[eE][+-]?\d+)?)")
380
+ .str.extract(SCI_EXPR)
324
381
  # .str.extract(r"([+-]?\d*\.?\d+e[+-]?\d+)")
325
382
  .replace("", None)
326
383
  .cast(pl.Float64),
@@ -349,23 +406,20 @@ class SpectraCache:
349
406
  .alias("ritz_wl_air(nm)"),
350
407
  )
351
408
  if "element" not in df.columns:
352
- expr = re.compile(r"spectra=([\w]+)\+?([IVX]+)?")
353
- element, numeral = expr.search(response.url).groups()
409
+ element, numeral = re.search(STATE_EXPR, response.url).groups()
354
410
  # cast roman numerals to int for consistency with queries with multiple ionization states, e.g. Ar I vs Ar I-II
355
411
  df = df.with_columns(
356
412
  pl.lit(element).alias("element"),
357
413
  pl.lit("I" if numeral is None else numeral)
358
414
  .cast(pl.String)
359
415
  .alias("sp_num")
360
- .map_elements(cls.roman_to_int, return_dtype=pl.Int64),
416
+ .map_elements(cls.roman_to_int, return_dtype=pl.Int64)
417
+ .first(),
361
418
  )
362
- df = (
363
- df.with_columns(pl.col("unc_obs_wl").cast(pl.Float64), pl.col("unc_ritz_wl").cast(pl.Float64))
364
- if "unc_obs_wl" in df.columns
365
- else df.with_columns(
366
- pl.lit(None).cast(pl.Float64).alias("unc_obs_wl"), pl.lit(None).cast(pl.Float64).alias("unc_ritz_wl")
367
- )
368
- )
419
+ df = df.with_columns(
420
+ unc_obs_wl=pl.col("unc_obs_wl") if "unc_obs_wl" in df.columns else None,
421
+ unc_ritz_wl=pl.col("unc_ritz_wl") if "unc_ritz_wl" in df.columns else None,
422
+ ).with_columns(pl.col("unc_obs_wl").cast(pl.Float64), pl.col("unc_ritz_wl").cast(pl.Float64))
369
423
 
370
424
  return df.select(*cls.column_order)
371
425
 
@@ -408,8 +462,16 @@ class SpectraCache:
408
462
  """Retrieve all cached data into a single dataframe."""
409
463
  cached_frames = [self.create_dataframe(cached) for cached in self.session.cache.filter()]
410
464
  if self.use_polars:
411
- return pl.concat(cached_frames).unique()
412
- return pd.concat(cached_frames).drop_duplicates().reset_index(drop=True)
465
+ return (
466
+ pl.concat(cached_frames).unique()
467
+ if len(cached_frames) > 0
468
+ else pl.DataFrame({k: [] for k in ASDSchema}, schema=ASDSchema)
469
+ )
470
+ return (
471
+ pd.concat(cached_frames).drop_duplicates().reset_index(drop=True)
472
+ if len(cached_frames) > 0
473
+ else pd.DataFrame({k: pd.Series(dtype=v) for k, v in ASDSchema.items()})
474
+ )
413
475
 
414
476
 
415
477
  class BibCache:
@@ -428,7 +490,6 @@ class BibCache:
428
490
 
429
491
  def __init__(self, cache_expiry=timedelta(weeks=1)):
430
492
  """Initialize an instance that handles cached retrieval of ASD bibliographic references."""
431
- self.cache_expiry = cache_expiry
432
493
  self.session = CachedSession(
433
494
  "NIST_ASD_Bibliography_cache",
434
495
  use_cache_dir=True,
@@ -438,6 +499,24 @@ class BibCache:
438
499
  ignored_parameters=["element", "spectr_charge", "type", "ref"],
439
500
  )
440
501
 
502
+ @property
503
+ def cache_expiry(self) -> timedelta:
504
+ """The cache expiry time.
505
+
506
+ Queries that are older than this time are considered stale and marked for updating, by quering the NIST ASD.
507
+ In case the query for new data fails, the stale, cached response will still be parsed.
508
+ """
509
+ return self.session.settings.expire_after
510
+
511
+ def set_cache_expiry(self, new: timedelta = None, **kwargs):
512
+ """Set the cache expiry to a different interval (default: 1 week).
513
+
514
+ Can be done by either passing in a `timedelta` object, or valid keyword arguments for `timedelta` itself.
515
+ """
516
+ if new is None:
517
+ new = timedelta(**kwargs)
518
+ self.session.settings.expire_after = new
519
+
441
520
  @staticmethod
442
521
  def _check_response_success(response: "CachedResponse") -> bool:
443
522
  """Validate that data has been fetched succesfully.
@@ -1,8 +1,13 @@
1
- # file generated by setuptools_scm
1
+ # file generated by setuptools-scm
2
2
  # don't change, don't track in version control
3
+
4
+ __all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
5
+
3
6
  TYPE_CHECKING = False
4
7
  if TYPE_CHECKING:
5
- from typing import Tuple, Union
8
+ from typing import Tuple
9
+ from typing import Union
10
+
6
11
  VERSION_TUPLE = Tuple[Union[int, str], ...]
7
12
  else:
8
13
  VERSION_TUPLE = object
@@ -12,5 +17,5 @@ __version__: str
12
17
  __version_tuple__: VERSION_TUPLE
13
18
  version_tuple: VERSION_TUPLE
14
19
 
15
- __version__ = version = '0.2.0'
16
- __version_tuple__ = version_tuple = (0, 2, 0)
20
+ __version__ = version = '0.2.2'
21
+ __version_tuple__ = version_tuple = (0, 2, 2)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ASDCache
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: A Python module to retrieve data from the NIST Atomic Spectra Database (ASD), using caching for fast, efficient data handling
5
5
  Project-URL: Documentation, https://antoinetue.github.io/asdcache
6
6
  Project-URL: Source, https://github.com/AntoineTUE/asdcache
@@ -16,6 +16,7 @@ Classifier: Programming Language :: Python :: 3.9
16
16
  Classifier: Programming Language :: Python :: 3.10
17
17
  Classifier: Programming Language :: Python :: 3.11
18
18
  Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
19
20
  Classifier: Topic :: Scientific/Engineering
20
21
  Requires-Python: >=3.9
21
22
  Requires-Dist: bs4
@@ -23,6 +23,7 @@ classifiers = [
23
23
  "Programming Language :: Python :: 3.10",
24
24
  "Programming Language :: Python :: 3.11",
25
25
  "Programming Language :: Python :: 3.12",
26
+ "Programming Language :: Python :: 3.13",
26
27
  ]
27
28
  dependencies = ["requests","requests_cache", "pandas","numpy", "bs4"]
28
29
  dynamic = ["version"]
@@ -131,9 +132,9 @@ fragments = [
131
132
  cache-keys = [{ git = true }]
132
133
 
133
134
  [tool.hatch.envs.default]
134
- python = "3.9"
135
+ python = "3.12"
135
136
  post-install-commands = ["pre-commit install"]
136
- dependencies = ["matplotlib", "ipython","ipykernel","ruff"]
137
+ dependencies = ["matplotlib", "ipython","ipykernel","pre-commit"]
137
138
  installer = "uv"
138
139
  features = ["polars"]
139
140
 
@@ -152,7 +153,7 @@ dependencies = [
152
153
  randomize = false
153
154
  parallel = false # avoid cache access conflicts
154
155
  retries = 2
155
- retry-delay = 2
156
+ retry-delay = 1
156
157
  features = ["polars"]
157
158
 
158
159
  [tool.hatch.envs.docs]
File without changes
File without changes
File without changes