ASDCache 0.2.0__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,43 +1,12 @@
1
- r"""`ASDcache` is a module to fetch data from the NIST Atomic Spectra Database (ASD), utlizing caching for fast responses.
1
+ """The ASDCache module.
2
2
 
3
- To make the most use out of the cache, `ASDcache` is opinionated in the information it retrieves from the ASD; it always requests the same schema of information and locally computes additional fields.
4
-
5
- Data is initially fetched from the online published NIST page, using the tab-separated ASCII output format.
6
- The benefit of this format is that it is more 'machine readable' than the formatted ASCII of HTML options.
7
- This means it requires far less bespoke parsing to get rid of 'human readable' features such as repeated page column headers, or empty lines.
8
- To ensure a consistent schema of the retrieved data, lines are always retrieved as a function of wavelength, using `vacuum wavelength`, even between 200 to 2000 nm.
9
- Wavenumbers and Ritz wavelength will be included in the response.
10
-
11
- In the range $5000 \mathrm{cm}^{-1}<\nu<50000 \mathrm{cm}^{-1}$ the air equivalent observed and Ritz wavelengths are calculated using the same Sellmeier equation as the NIST ASD (see [here][ASDcache.readASD.ASDCache.wn_to_n_refractive]).
12
- This is consistent with the approach of the ASD.
13
-
14
- Each response from the NIST page is cached (1 week by default) on the local system.
15
- This makes it much faster to load the same data, even across different script runs and/or user programs/sessions.
16
- As an example: reading all spectra between 200 and 1000 nm can take over 2 minutes without using the cache, but can be as fast as 0.2 seconds using the `polars` backend.
17
- In addition, it means that an internet connection is not required after initial data fetching.
18
- The cached response is only updated upon succesfull retrieval of a new response of the NIST page.
19
- If unable to succesfully fetch new data, we fall back to a 'stale' cached response.
20
-
21
- The cache can be shared to another system, to give offline/airgapped systems access to the same data.
22
- To that end, the file `NIST_ASD_cache.sqlite` in the user's cache directory has to be copied over.
23
-
24
- The standard cache directories are as follows:
25
-
26
- === "Windows"
27
- `%USERPROFILE%/AppData/Local`
28
- === "Linux"
29
- `~/.cache/http_cache/`
30
- === "MacOS"
31
- `/Users/user/Library/Caches/http_cache/`
32
-
33
- Queries to the NIST ASD are hashed by the keys (or parameters) of the requests.
34
- This means that any change to either one of these parameters, will result in a new cache entry, even if the returned data is equivalent.
3
+ It contains both the [SpectraCache][(m).] and [BibCache][(m).] classes which allow you to interact with the ASD and the relevant bibliographic databases.
35
4
  """
36
5
 
37
- import importlib
6
+ import importlib.util
38
7
  import warnings
39
8
  import pandas as pd
40
- from requests_cache import CachedSession, CachedResponse
9
+ from requests_cache import CachedSession, Response
41
10
  from io import StringIO
42
11
  from datetime import timedelta
43
12
  import re
@@ -45,7 +14,7 @@ import numpy as np
45
14
  from bs4 import BeautifulSoup
46
15
  import sys
47
16
  import logging
48
- from typing import Any, Optional
17
+ from typing import Any, Optional, Union
49
18
 
50
19
  if importlib.util.find_spec("polars"):
51
20
  POLARS_AVAILABLE = True
@@ -61,6 +30,46 @@ logging.basicConfig(
61
30
  stream=sys.stdout,
62
31
  )
63
32
 
33
+ ASDSchema = {
34
+ "element": str,
35
+ "sp_num": int,
36
+ "obs_wl_vac(nm)": float,
37
+ "unc_obs_wl": float,
38
+ "obs_wl_air(nm)": float,
39
+ "ritz_wl_vac(nm)": float,
40
+ "unc_ritz_wl": float,
41
+ "ritz_wl_air(nm)": float,
42
+ "wn(cm-1)": float,
43
+ "intens": float,
44
+ "Aki(s^-1)": float,
45
+ "fik": float,
46
+ "S(a.u.)": float,
47
+ "log_gf": float,
48
+ "Acc": str,
49
+ "Ei(cm-1)": float,
50
+ "Ek(cm-1)": float,
51
+ "conf_i": str,
52
+ "term_i": str,
53
+ "J_i": str,
54
+ "conf_k": str,
55
+ "term_k": str,
56
+ "J_k": str,
57
+ "g_i": float,
58
+ "g_k": float,
59
+ "Type": str,
60
+ "tp_ref": str,
61
+ "line_ref": str,
62
+ }
63
+
64
+ STATE_EXPR = r"spectra=([\w]+)\+?([IVX]+)?"
65
+ """Regex pattern for extracting (element,charge) tuple for a single-state query, which uses roman numerals."""
66
+ SCI_EXPR = r"([+-]?\d*\.?\d+(?:[eE][+-]?\d+)?)"
67
+ """Regex pattern for processing scientific notation"""
68
+
69
+
70
+ class ASDQueryError(Exception):
71
+ """Exception raised when the NIST ASD has indicated an error with a query."""
72
+
64
73
 
65
74
  class SpectraCache:
66
75
  """A class acting as the entrypoint to retrieve data from the NIST Atomic Spectra Database that uses caching.
@@ -79,6 +88,7 @@ class SpectraCache:
79
88
  nist_url = "https://physics.nist.gov/cgi-bin/ASD/lines1.pl"
80
89
  species_expr = re.compile(r"spectra=([\w\+\-\%3]+)&")
81
90
  query_params = {
91
+ "submit": "Retrieve Data",
82
92
  "unit": 1,
83
93
  "de": 0,
84
94
  "plot_out": 0,
@@ -107,10 +117,11 @@ class SpectraCache:
107
117
  "enrg_out": "on",
108
118
  "J_out": "on",
109
119
  "g_out": "on",
110
- "diag_out": "on",
120
+ # "diag_out": "on", # avoid diagnostic data, it leads to multi-species queries failing; which can appear as if keys below are needed. See issue #1
111
121
  "allowed_out": 1,
112
122
  "forbid_out": 1,
113
- "submit": "Retrieve Data",
123
+ # "show_diff_obs_calc": 1, # Does not appear mandatory in retrospect, see issue #1
124
+ # "include_Ritz_E1": 1, # Does not appear mandatory in retrospect, see issue #1
114
125
  }
115
126
  """Request parameters used by the NIST ASD form."""
116
127
  column_order = [
@@ -148,7 +159,6 @@ class SpectraCache:
148
159
  def __init__(self, use_polars_backend=False, cache_expiry=timedelta(weeks=1), strict_matching=True):
149
160
  """Initialize an instance that handles cached data lookup of the NIST ASD."""
150
161
  self.strict_matching = strict_matching
151
- self.cache_expiry = cache_expiry
152
162
  self.session = CachedSession(
153
163
  "NIST_ASD_cache",
154
164
  use_cache_dir=True,
@@ -165,13 +175,82 @@ class SpectraCache:
165
175
 
166
176
  self.known_species = self.list_cached_species()
167
177
 
178
+ @property
179
+ def cache_expiry(self) -> timedelta:
180
+ """The cache expiry time.
181
+
182
+ Queries that are older than this time are considered stale and marked for updating, by quering the NIST ASD.
183
+ In case the query for new data fails, the stale, cached response will still be parsed.
184
+ """
185
+ return self.session.settings.expire_after
186
+
187
+ def set_cache_expiry(self, new: Optional[timedelta] = None, **kwargs):
188
+ """Set the cache expiry to a different interval (default: 1 week).
189
+
190
+ Can be done by either passing in a `timedelta` object, or valid keyword arguments for `timedelta` itself.
191
+ """
192
+ if new is None:
193
+ new = timedelta(**kwargs)
194
+ self.session.settings.expire_after = new
195
+
168
196
  @staticmethod
169
- def _check_response_success(response: "CachedResponse") -> bool:
197
+ def _check_response_success(response: Response) -> bool:
170
198
  """Validate that data has been fetched succesfully.
171
199
 
172
200
  If this check fails, the cache should not update with this response, even when marked as stale.
201
+
202
+ The first obvious way to check success is if an error is indicated by the HTTP status code.
203
+
204
+ However, when a query for data is incorrect, the NIST ASD returns a HTML page indicating `<title>NIST ASD : Input Error</title>` in the `<head>` tag, or "Error Message".
205
+
206
+ A successfull query would not receive HTML as a response, but raw ASCII values instead.
207
+
208
+ We can thus check for the start of a HTML document.
209
+
210
+ Note that this only works for data queries, not for bibliographic metadata by `BibCache`.
173
211
  """
174
- return (response.status_code == 200) & (b"Error Message" not in response.content)
212
+ return not (
213
+ not response.ok or response.content.startswith(b"<!DOCTYPE") or b"Error Message" in response.content
214
+ )
215
+
216
+ def _get_data(self, species: str, wl_range: tuple[float, float] = (170, 1000), **kwargs) -> Response:
217
+ """Retrieve raw, ASCII-formatted data from the NIST ASD with a GET request.
218
+
219
+ To retrieve data and parse it into a DataFrame, use [fetch][..] instead.
220
+
221
+ Returns the raw response, which will be cached if it contains valid data (see [_check_response_success][..]).
222
+
223
+ If the response does not contain ASCII data, but HTML instead, an [ASDQueryError][(m).] will be raised.
224
+
225
+ It is possible to override any standard query parameter (see [query_params][..]]) by passing them as kwargs.
226
+ """
227
+ query_params = {
228
+ "spectra": species,
229
+ "output_type": 0,
230
+ "low_w": min(wl_range),
231
+ "upp_w": max(wl_range),
232
+ **{k: v for k, v in self.query_params.items() if k not in kwargs},
233
+ **{k: v for k, v in kwargs.items() if k in self.query_params},
234
+ }
235
+ response: Response = self.session.get(self.nist_url, params=query_params)
236
+ response.raise_for_status()
237
+ # Check if response is not a HTML document instead of ASCII formatted data, indicating query error.
238
+ if response.content.startswith(b"<!DOCTYPE"):
239
+ logging.error(
240
+ "NIST ASD responded with HTML instead of ASCII-data for species=%s, wl_range=%s\nQuery: %s",
241
+ species,
242
+ wl_range,
243
+ response.url,
244
+ )
245
+ raise ASDQueryError(
246
+ f"Query for {species=} {wl_range=} did not receive ASCII-data. This means the ASD could not interpret your query. Check if your query is malformed."
247
+ )
248
+ return response
249
+
250
+ @property
251
+ def cached_species(self) -> list[str]:
252
+ """A list of all cached species."""
253
+ return self.list_cached_species()
175
254
 
176
255
  def list_cached_species(self) -> list[str]:
177
256
  """List all species in the cache, based on the string of the original query URL."""
@@ -181,35 +260,21 @@ class SpectraCache:
181
260
  for elem in self.species_expr.search(u).group(1).split("%3B")
182
261
  ]
183
262
 
184
- def fetch(self, species, wl_range=(170, 1000), **kwargs) -> "pd.DataFrame|pl.DataFrame|CachedResponse":
263
+ def fetch(self, species, wl_range=(170, 1000), **kwargs) -> "pd.DataFrame|pl.DataFrame":
185
264
  """Fetch information on a species from the ASD, first checking the cache.
186
265
 
187
- This supports loading multiple species in one go by using the same notation as the NIST ASD page.
266
+ This supports loading multiple species in one go by using the same notation as the NIST ASD form.
188
267
 
189
268
  Note however that cache keys are computed for unique options for `species` and `wl_range`.
190
269
 
191
270
  This means that you won't get caching benefits by using different queries.
192
271
 
193
- In other words: the cache cannot deduplicate queries such as `ASD.fetch('H', (200,1000))` followed by `ASD.fetch('H I', (650,660))`.
272
+ In other words: the cache cannot deduplicate queries such as `ASD.fetch('H', (200,1000))` followed by `ASD.fetch('H I', (650,660))` (or vice versa).
194
273
 
195
274
  Both these operations will fetch data online and be stored as separate cache entries.
196
275
  """
197
- query_params = {
198
- "spectra": species,
199
- "output_type": 0,
200
- "low_w": min(wl_range),
201
- "upp_w": max(wl_range),
202
- **self.query_params,
203
- }
204
- response = self.session.get(self.nist_url, params=query_params)
205
-
206
- # if response.status_code == 200:
207
- response.raise_for_status()
276
+ response = self._get_data(species, wl_range, **kwargs)
208
277
  return self.create_dataframe(response)
209
- # else:
210
- # print(f"Error: Received status code {response.status_code}")
211
- # print(response.url)
212
- # return response
213
278
 
214
279
  def create_dataframe(self, response) -> "pd.DataFrame|pl.DataFrame":
215
280
  """Create a dataframe from the (cached) NIST ASD response, using the chosen backend at class instantiation."""
@@ -218,7 +283,7 @@ class SpectraCache:
218
283
  return self._from_pandas(response)
219
284
 
220
285
  @classmethod
221
- def _from_pandas(cls, response: "CachedResponse") -> "pd.DataFrame":
286
+ def _from_pandas(cls, response: Response) -> "pd.DataFrame":
222
287
  r"""Transform a (cached) NIST ASD response into a pandas DataFrame.
223
288
 
224
289
  Calculates the air equivalent wavelength from the vacuum wavelength using the same Sellmeier equation as the NIST ASD.
@@ -247,12 +312,24 @@ class SpectraCache:
247
312
  "g_k": float,
248
313
  "J_i": str,
249
314
  "J_k": str,
315
+ "Type": str,
316
+ "tp_ref": str,
317
+ "line_ref": str,
250
318
  "": str,
251
319
  }
252
320
  df = pd.read_csv(StringIO(response.text), sep="\t", dtype=schema)
321
+ # Detect if pandas uses new `StringDtype`, or legacy `object` dtype for strings.
322
+ # This affects NaN handling for strings.
323
+ # Pandas 3.0 and up use the StringDtype, while pandas 2 can opt-in to this
324
+ # The 'Type' column should exist, 'element' may not.
325
+ uses_new_string_dtype = pd.api.types.is_string_dtype(df["Type"])
253
326
  for col in ["obs_wl_vac(nm)", "ritz_wl_vac(nm)", "intens", "Ei(cm-1)", "Ek(cm-1)"]:
254
- df[col] = df.loc[:, col].str.extract(r"([+-]?\d*\.?\d+(?:[eE][+-]?\d+)?)").astype(float)
255
- df["Type"] = df.loc[:, "Type"].astype(str).replace("nan", "E1")
327
+ df[col] = df.loc[:, col].str.extract(SCI_EXPR).astype(float)
328
+ # Any missing value implies line is an E1 (electric dipole) transition
329
+ if uses_new_string_dtype:
330
+ df["Type"] = df.loc[:, "Type"].fillna("E1")
331
+ else:
332
+ df["Type"] = df.loc[:, "Type"].astype(str).replace("nan", "E1")
256
333
  df["tp_ref"] = df.loc[:, "tp_ref"].fillna("")
257
334
  df["obs_wl_air(nm)"] = df["obs_wl_vac(nm)"]
258
335
  df["obs_wl_air(nm)"] = df[df["wn(cm-1)"].between(5000, 50000)]["obs_wl_air(nm)"] / cls.wn_to_n_refractive(
@@ -264,21 +341,18 @@ class SpectraCache:
264
341
  )
265
342
  df = df.drop([c for c in df.columns if "Unnamed" in c], axis=1).reset_index(drop=True)
266
343
  if "element" not in df.columns:
267
- expr = re.compile(r"spectra=([\w]+)\+?([IVX]+)?")
268
- element, numeral = expr.search(response.url).groups()
269
- df["element"] = element
270
- df["sp_num"] = numeral
271
344
  # cast roman numerals to int for consistency with queries with multiple ionization states, e.g. Ar I vs Ar I-II
272
- df["sp_num"] = df["sp_num"].map(cls.roman_to_int)
273
- df = (
274
- df.assign(unc_obs_wl=df["unc_obs_wl"].astype(float), unc_ritz_wl=df["unc_ritz_wl"].astype(float))
275
- if "unc_obs_wl" in df.columns
276
- else df.assign(unc_obs_wl=np.nan, unc_ritz_wl=np.nan)
277
- )
345
+ # As 'element' and 'sp_num' columns are only missing for single-species queries, assign as constants, not vectors.
346
+ element, numeral = re.search(STATE_EXPR, response.url).groups()
347
+ numeric: int = cls.roman_to_int(numeral)
348
+ df["element"] = element
349
+ df["sp_num"] = numeric
350
+ df["unc_obs_wl"] = pd.to_numeric(df["unc_obs_wl"]) if "unc_obs_wl" in df.columns else np.nan
351
+ df["unc_ritz_wl"] = pd.to_numeric(df["unc_ritz_wl"]) if "unc_ritz_wl" in df.columns else np.nan
278
352
  return df.loc[:, cls.column_order]
279
353
 
280
354
  @classmethod
281
- def _from_polars(cls, response: "CachedResponse") -> "pl.DataFrame":
355
+ def _from_polars(cls, response: Response) -> "pl.DataFrame":
282
356
  r"""Transform a (cached) NIST ASD response into a polars DataFrame.
283
357
 
284
358
  Calculates the air equivalent wavelength from the vacuum wavelength using the same Sellmeier equation as the NIST ASD.
@@ -309,28 +383,25 @@ class SpectraCache:
309
383
  "J_k": pl.String,
310
384
  "": pl.String,
311
385
  }
312
- # annotation_chars_to_strip = "(?i)()[]?*w,bGhilmprsq:+xzgacHd "
313
- df = (
314
- pl.read_csv(
315
- StringIO(response.text),
316
- separator="\t",
317
- schema_overrides=schema,
318
- null_values="",
319
- )
320
- .with_columns(
321
- pl.col("obs_wl_vac(nm)", "Ei(cm-1)", "Ek(cm-1)", "intens")
322
- # .str.strip_chars(annotation_chars_to_strip).str.replace("&dagger;", "", literal=True)
323
- .str.extract(r"([+-]?\d*\.?\d+(?:[eE][+-]?\d+)?)")
324
- # .str.extract(r"([+-]?\d*\.?\d+e[+-]?\d+)")
325
- .replace("", None)
326
- .cast(pl.Float64),
327
- pl.col("ritz_wl_vac(nm)").str.strip_chars('"+*').replace("", None).cast(pl.Float64),
328
- pl.col("S(a.u.)").cast(pl.Float64),
329
- pl.col("Type").replace(None, "E1"),
330
- pl.col("tp_ref").replace(None, ""),
331
- )
332
- .drop([""])
333
- ).with_columns(
386
+
387
+ df = pl.read_csv(
388
+ StringIO(response.text),
389
+ separator="\t",
390
+ schema_overrides=schema,
391
+ null_values="",
392
+ )
393
+ sci_cols = ["obs_wl_vac(nm)", "Ei(cm-1)", "Ek(cm-1)", "intens", "ritz_wl_vac(nm)"]
394
+ cast_to_scientific_notation = [
395
+ pl.col(c).str.extract(SCI_EXPR).replace("", None).cast(pl.Float64).alias(c) for c in sci_cols
396
+ ]
397
+ df = df.with_columns(
398
+ *cast_to_scientific_notation,
399
+ pl.col("S(a.u.)").cast(pl.Float64),
400
+ pl.col("Type").replace(None, "E1"),
401
+ pl.col("tp_ref").replace(None, ""),
402
+ ).drop([""])
403
+ # compute air wavelengths between 5000 cm-1 and 50000 cm-1
404
+ df = df.with_columns(
334
405
  pl.when(pl.col("wn(cm-1)").is_between(5000, 50000))
335
406
  .then(
336
407
  pl.col("obs_wl_vac(nm)").cast(pl.Float64)
@@ -349,24 +420,12 @@ class SpectraCache:
349
420
  .alias("ritz_wl_air(nm)"),
350
421
  )
351
422
  if "element" not in df.columns:
352
- expr = re.compile(r"spectra=([\w]+)\+?([IVX]+)?")
353
- element, numeral = expr.search(response.url).groups()
423
+ element, numeral = re.search(STATE_EXPR, response.url).groups()
424
+ numeric: int = cls.roman_to_int(numeral) if numeral else 1
354
425
  # cast roman numerals to int for consistency with queries with multiple ionization states, e.g. Ar I vs Ar I-II
355
- df = df.with_columns(
356
- pl.lit(element).alias("element"),
357
- pl.lit("I" if numeral is None else numeral)
358
- .cast(pl.String)
359
- .alias("sp_num")
360
- .map_elements(cls.roman_to_int, return_dtype=pl.Int64),
361
- )
362
- df = (
363
- df.with_columns(pl.col("unc_obs_wl").cast(pl.Float64), pl.col("unc_ritz_wl").cast(pl.Float64))
364
- if "unc_obs_wl" in df.columns
365
- else df.with_columns(
366
- pl.lit(None).cast(pl.Float64).alias("unc_obs_wl"), pl.lit(None).cast(pl.Float64).alias("unc_ritz_wl")
367
- )
368
- )
369
-
426
+ df = df.with_columns(pl.lit(element).alias("element"), pl.lit(numeric, dtype=pl.Int64).alias("sp_num"))
427
+ exprs = [pl.col(c).cast(pl.Float64) for c in ["unc_obs_wl", "unc_ritz_wl"] if c in df.columns]
428
+ df = df.with_columns(exprs)
370
429
  return df.select(*cls.column_order)
371
430
 
372
431
  @staticmethod
@@ -408,8 +467,16 @@ class SpectraCache:
408
467
  """Retrieve all cached data into a single dataframe."""
409
468
  cached_frames = [self.create_dataframe(cached) for cached in self.session.cache.filter()]
410
469
  if self.use_polars:
411
- return pl.concat(cached_frames).unique()
412
- return pd.concat(cached_frames).drop_duplicates().reset_index(drop=True)
470
+ return (
471
+ pl.concat(cached_frames).unique()
472
+ if len(cached_frames) > 0
473
+ else pl.DataFrame({k: [] for k in ASDSchema}, schema=ASDSchema)
474
+ )
475
+ return (
476
+ pd.concat(cached_frames).drop_duplicates().reset_index(drop=True)
477
+ if len(cached_frames) > 0
478
+ else pd.DataFrame({k: pd.Series(dtype=v) for k, v in ASDSchema.items()})
479
+ )
413
480
 
414
481
 
415
482
  class BibCache:
@@ -428,7 +495,6 @@ class BibCache:
428
495
 
429
496
  def __init__(self, cache_expiry=timedelta(weeks=1)):
430
497
  """Initialize an instance that handles cached retrieval of ASD bibliographic references."""
431
- self.cache_expiry = cache_expiry
432
498
  self.session = CachedSession(
433
499
  "NIST_ASD_Bibliography_cache",
434
500
  use_cache_dir=True,
@@ -438,8 +504,26 @@ class BibCache:
438
504
  ignored_parameters=["element", "spectr_charge", "type", "ref"],
439
505
  )
440
506
 
507
+ @property
508
+ def cache_expiry(self) -> timedelta:
509
+ """The cache expiry time.
510
+
511
+ Queries that are older than this time are considered stale and marked for updating, by quering the NIST ASD.
512
+ In case the query for new data fails, the stale, cached response will still be parsed.
513
+ """
514
+ return self.session.settings.expire_after
515
+
516
+ def set_cache_expiry(self, new: Optional[timedelta] = None, **kwargs):
517
+ """Set the cache expiry to a different interval (default: 1 week).
518
+
519
+ Can be done by either passing in a `timedelta` object, or valid keyword arguments for `timedelta` itself.
520
+ """
521
+ if new is None:
522
+ new = timedelta(**kwargs)
523
+ self.session.settings.expire_after = new
524
+
441
525
  @staticmethod
442
- def _check_response_success(response: "CachedResponse") -> bool:
526
+ def _check_response_success(response: Response) -> bool:
443
527
  """Validate that data has been fetched succesfully.
444
528
 
445
529
  If this check fails, the cache should not update with this response, even when marked as stale.
@@ -454,12 +538,12 @@ class BibCache:
454
538
  r"""Parse a reference code from the NIST ASD into the constituent parts that can be used to look up references.
455
539
 
456
540
  Args:
457
- * reference_code (str): A NIST ASD bibliographic reference string, such as `L13456n3`, or `T6936n`.
541
+ reference_code (str): A NIST ASD bibliographic reference string, such as `L13456n3`, or `T6936n`.
458
542
 
459
543
  Returns:
460
- * db (str) : A label for which bibliographic database to target
461
- * ref (str) : The database ID for the reference to look up
462
- * comment (str) : An additional comment included in the reference, can be fetched separately.
544
+ db (str): A label for which bibliographic database to target
545
+ ref (str|None): The database ID for the reference to look up
546
+ comment (str): An additional comment included in the reference, can be fetched separately.
463
547
  """
464
548
  if reference_code.startswith("n"):
465
549
  db, ref, comment = "T", None, "n"
@@ -474,12 +558,12 @@ class BibCache:
474
558
  """Look up a reference code for a given element state.
475
559
 
476
560
  Args:
477
- element (str) : The element name, e.g. `H`
478
- sp_num (int) : The ionization state of the element, with 1 corresponding to the atom
479
- reference_code (str) : The bibliographic reference code from the ASD columns `tp_ref` or `line_ref`.
561
+ element (str): The element name, e.g. `H`
562
+ sp_num (int): The ionization state of the element, with 1 corresponding to the atom
563
+ reference_code (str): The bibliographic reference code from the ASD columns `tp_ref` or `line_ref`.
480
564
 
481
565
  Returns:
482
- bib_data (dict) : A dictionary containing bibliographic metadata for the reference, if available/applicable. Contains a url to look it up.
566
+ bib_data (dict[str,Any]): A dictionary containing bibliographic metadata for the reference, if available/applicable. Contains a url to look it up.
483
567
  """
484
568
  db, ref, comment = self.parse_reference_code(reference_code)
485
569
  params = {
@@ -0,0 +1,61 @@
1
+ r"""`ASDcache` is a package to fetch data from the NIST Atomic Spectra Database (ASD), utlizing caching for fast responses.
2
+
3
+ To make the most use out of the cache, `ASDcache` is opinionated in the information it retrieves from the ASD; it always requests the same schema of information and locally computes additional fields.
4
+
5
+ Data is initially fetched from the online published NIST page, using the tab-separated ASCII output format.
6
+
7
+ The benefit of this format is that it is more 'machine readable' than the formatted ASCII of HTML options.
8
+
9
+ This means it requires far less bespoke parsing to get rid of 'human readable' features such as repeated page column headers, or empty lines.
10
+
11
+ ## Air wavelength
12
+ To ensure a consistent schema of the retrieved data, lines are always retrieved as a function of wavelength, using `vacuum wavelength`, even between 200 to 2000 nm.
13
+
14
+ Wavenumbers and Ritz wavelength will be included in the response.
15
+
16
+ In the range $5000\ \mathrm{cm}^{-1}<\nu<50000\ \mathrm{cm}^{-1}$ the air equivalent observed and Ritz wavelengths are calculated using the same Sellmeier equation as the NIST ASD (see [here][.SpectraCache.wn_to_n_refractive]).
17
+ This is consistent with the approach of the ASD.
18
+
19
+ ## Making use of the cache
20
+
21
+ Each response from the NIST page is cached (1 week by default) on the local system.
22
+
23
+ This makes it much faster to load the same data, even across different script runs and/or user programs/sessions.
24
+
25
+ As an example: retrieving and parsing the data for all spectra between 200 and 1000 nm can take over 2 minutes without using the cache, but can be as fast as 0.2 seconds using the `polars` backend.
26
+
27
+ In addition, it means that an internet connection is not required after initial data fetching.
28
+
29
+ The cached response is only updated upon succesfull retrieval of a new response of the NIST page.
30
+
31
+ If unable to succesfully fetch new data, we fall back to a 'stale' cached response.
32
+
33
+ The cache can be shared to another system, to give offline/airgapped systems access to the same data.
34
+
35
+ To that end, the file `NIST_ASD_cache.sqlite` in the user's cache directory has to be copied over.
36
+
37
+ ### Default cache locations
38
+
39
+ The standard cache directories are as follows:
40
+
41
+ === "Windows"
42
+ `%USERPROFILE%/AppData/Local`
43
+ === "Linux"
44
+ `~/.cache/http_cache/`
45
+ === "MacOS"
46
+ `/Users/user/Library/Caches/http_cache/`
47
+
48
+ ### Cache keys and uniqueness
49
+
50
+ Queries to the NIST ASD are hashed by the keys (or parameters) of the requests.
51
+
52
+ This means that any change to either one of these parameters, will result in a new cache entry, even if the returned data is equivalent.
53
+
54
+ In other words: the cache cannot deduplicate queries such as `SpectraCache().fetch('H', (200,1000))` followed by `SpectraCache().fetch('H I', (650,660))` (or vice versa).
55
+
56
+ It is often better (and faster) to fetch a range of data beyond what you need, and then filter down the dataframe you retrieve according to your needs.
57
+ """
58
+
59
+ from .ASDCache import SpectraCache, BibCache
60
+
61
+ __all__ = ["SpectraCache", "BibCache"]
@@ -0,0 +1,24 @@
1
+ # file generated by vcs-versioning
2
+ # don't change, don't track in version control
3
+ from __future__ import annotations
4
+
5
+ __all__ = [
6
+ "__version__",
7
+ "__version_tuple__",
8
+ "version",
9
+ "version_tuple",
10
+ "__commit_id__",
11
+ "commit_id",
12
+ ]
13
+
14
+ version: str
15
+ __version__: str
16
+ __version_tuple__: tuple[int | str, ...]
17
+ version_tuple: tuple[int | str, ...]
18
+ commit_id: str | None
19
+ __commit_id__: str | None
20
+
21
+ __version__ = version = '0.2.3'
22
+ __version_tuple__ = version_tuple = (0, 2, 3)
23
+
24
+ __commit_id__ = commit_id = None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ASDCache
3
- Version: 0.2.0
3
+ Version: 0.2.3
4
4
  Summary: A Python module to retrieve data from the NIST Atomic Spectra Database (ASD), using caching for fast, efficient data handling
5
5
  Project-URL: Documentation, https://antoinetue.github.io/asdcache
6
6
  Project-URL: Source, https://github.com/AntoineTUE/asdcache
@@ -12,32 +12,33 @@ Classifier: Development Status :: 4 - Beta
12
12
  Classifier: Intended Audience :: Science/Research
13
13
  Classifier: License :: OSI Approved :: MIT License
14
14
  Classifier: Operating System :: OS Independent
15
- Classifier: Programming Language :: Python :: 3.9
16
15
  Classifier: Programming Language :: Python :: 3.10
17
16
  Classifier: Programming Language :: Python :: 3.11
18
17
  Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
19
  Classifier: Topic :: Scientific/Engineering
20
20
  Requires-Python: >=3.9
21
- Requires-Dist: bs4
22
- Requires-Dist: numpy
23
- Requires-Dist: pandas
24
- Requires-Dist: requests
25
- Requires-Dist: requests-cache
21
+ Requires-Dist: beautifulsoup4>=4.12
22
+ Requires-Dist: numpy>=2.0
23
+ Requires-Dist: pandas>=2.0
24
+ Requires-Dist: requests-cache>=1.2.0
26
25
  Provides-Extra: docs
27
- Requires-Dist: black; extra == 'docs'
28
- Requires-Dist: mkdocs; extra == 'docs'
26
+ Requires-Dist: mkdocs-api-autonav; extra == 'docs'
29
27
  Requires-Dist: mkdocs-autorefs; extra == 'docs'
30
- Requires-Dist: mkdocs-gen-files; extra == 'docs'
31
28
  Requires-Dist: mkdocs-git-revision-date-localized-plugin; extra == 'docs'
32
29
  Requires-Dist: mkdocs-include-markdown-plugin; extra == 'docs'
33
- Requires-Dist: mkdocs-jupyter; extra == 'docs'
34
- Requires-Dist: mkdocs-literate-nav; extra == 'docs'
35
- Requires-Dist: mkdocs-material; extra == 'docs'
30
+ Requires-Dist: mkdocs-jupyter>=0.26.3; extra == 'docs'
31
+ Requires-Dist: mkdocs-material==9.7.6; extra == 'docs'
36
32
  Requires-Dist: mkdocs-section-index; extra == 'docs'
37
33
  Requires-Dist: mkdocstrings; extra == 'docs'
38
- Requires-Dist: mkdocstrings-python; extra == 'docs'
34
+ Requires-Dist: mkdocstrings-python-xref>=2.1.1; extra == 'docs'
35
+ Requires-Dist: properdocs>=1.6.7; extra == 'docs'
36
+ Requires-Dist: pygments>=2.20.0; extra == 'docs'
37
+ Requires-Dist: ruff>=0.15.13; extra == 'docs'
39
38
  Provides-Extra: polars
40
- Requires-Dist: polars; extra == 'polars'
39
+ Requires-Dist: polars[pandas]; extra == 'polars'
40
+ Provides-Extra: polars-lts
41
+ Requires-Dist: polars[pandas,rtcompat]; extra == 'polars-lts'
41
42
  Description-Content-Type: text/markdown
42
43
 
43
44
  # ASDCache
@@ -50,7 +51,7 @@ Description-Content-Type: text/markdown
50
51
  [![GitHub Workflow Status docs](https://img.shields.io/github/actions/workflow/status/AntoineTUE/ASDCache/documentation.yml?label=Documentation%20build)](https://antoinetue.github.io/ASDCache)
51
52
  [![PyPI - Version](https://img.shields.io/pypi/v/ASDCache)](https://pypi.python.org/pypi/ASDCache)
52
53
  [![PyPI - Python versions](https://img.shields.io/pypi/pyversions/ASDCache.svg)](https://pypi.python.org/pypi/ASDCache)
53
- [![PyPI - Downloads](https://img.shields.io/pypi/dw/ASDCache)](https://pypistats.org/packages/ASDCache)
54
+ [![PyPI - Downloads](https://img.shields.io/pypi/dm/ASDCache)](https://pypistats.org/packages/asdcache)
54
55
  [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
55
56
  [![Hatch project](https://img.shields.io/badge/%F0%9F%A5%9A-Hatch-4051b5.svg)](https://github.com/pypa/hatch)
56
57
 
@@ -19,38 +19,38 @@ classifiers = [
19
19
  "Topic :: Scientific/Engineering",
20
20
  "Intended Audience :: Science/Research",
21
21
  "Operating System :: OS Independent",
22
- "Programming Language :: Python :: 3.9",
23
22
  "Programming Language :: Python :: 3.10",
24
23
  "Programming Language :: Python :: 3.11",
25
24
  "Programming Language :: Python :: 3.12",
25
+ "Programming Language :: Python :: 3.13",
26
26
  ]
27
- dependencies = ["requests","requests_cache", "pandas","numpy", "bs4"]
27
+ dependencies = ["requests_cache>=1.2.0", "pandas>=2.0","numpy>=2.0", "beautifulsoup4>=4.12"]
28
28
  dynamic = ["version"]
29
29
 
30
30
  [project.optional-dependencies]
31
- polars = ["polars"]
31
+ polars = ["polars[pandas]"]
32
+ polars-lts = ["polars[rtcompat,pandas]"]
32
33
  docs = [
33
- "mkdocs",
34
+ "properdocs>=1.6.7",
35
+ "mkdocs-material==9.7.6",
34
36
  "mkdocs-autorefs",
35
- "mkdocs-gen-files",
37
+ # "mkdocs-gen-files",
36
38
  "mkdocs-git-revision-date-localized-plugin",
37
39
  "mkdocs-include-markdown-plugin",
38
- "mkdocs-jupyter",
39
- "mkdocs-literate-nav",
40
- "mkdocs-material",
40
+ "mkdocs-jupyter>=0.26.3",
41
+ # "mkdocs-literate-nav",
41
42
  "mkdocs-section-index",
42
43
  "mkdocstrings",
43
- "mkdocstrings-python",
44
- "black"
44
+ "mkdocstrings-python-xref>=2.1.1",
45
+ "mkdocs-api-autonav",
46
+ "ruff>=0.15.13",
47
+ "pygments>=2.20.0"
45
48
  ]
46
49
 
47
50
  [project.urls]
48
51
  Documentation = "https://antoinetue.github.io/asdcache"
49
52
  Source = "https://github.com/AntoineTUE/asdcache"
50
53
 
51
- [tool.hatch.metadata]
52
- # direct dependency references, e.g `pip @ git+https://github.com/pypa/pip.git@master`
53
- allow-direct-references = true
54
54
 
55
55
  [tool.hatch.version]
56
56
  source = "vcs"
@@ -109,7 +109,7 @@ extend-exclude = ["docs/assets/scripts/gen_ref_pages.py"]
109
109
 
110
110
  [tool.ruff.lint]
111
111
  select = ["E4", "E7", "E9", "F","C4", "SIM", "NPY", "PD","B","UP","D"]
112
- ignore = ["PD901","F401"]
112
+ ignore = ["F401"]
113
113
 
114
114
  [tool.ruff.lint.pydocstyle]
115
115
  convention = "pep257"
@@ -131,29 +131,36 @@ fragments = [
131
131
  cache-keys = [{ git = true }]
132
132
 
133
133
  [tool.hatch.envs.default]
134
- python = "3.9"
134
+ python = "3.12"
135
135
  post-install-commands = ["pre-commit install"]
136
- dependencies = ["matplotlib", "ipython","ipykernel","ruff"]
136
+ dependencies = ["matplotlib", "ipython","ipykernel","pre-commit"]
137
137
  installer = "uv"
138
138
  features = ["polars"]
139
139
 
140
- [tool.hatch.envs.test]
141
- dependencies = [
142
- "coverage[toml]>=6.2",
143
- "pytest",
144
- "pytest-cov",
145
- "pytest-mock",
146
- "pytest-recording",
147
- "pytest-sugar",
148
- "hypothesis",
149
- ]
150
140
 
151
141
  [tool.hatch.envs.hatch-test]
152
142
  randomize = false
153
143
  parallel = false # avoid cache access conflicts
154
144
  retries = 2
155
- retry-delay = 2
145
+ retry-delay = 1
156
146
  features = ["polars"]
147
+ dependencies = [
148
+ "coverage-enable-subprocess==1.0",
149
+ 'coverage[toml]>=6.2,<7.11; python_version<"3.10"',
150
+ 'coverage[toml]~=7.11; python_version>="3.10"',
151
+ 'pytest~=8.4; python_version<"3.10"',
152
+ 'pytest~=9.0; python_version>="3.10"',
153
+ "pytest-mock~=3.12",
154
+ "pytest-randomly~=3.15",
155
+ "pytest-rerunfailures~=14.0",
156
+ "pytest-xdist[psutil]~=3.5",
157
+ 'pytest-cov~=7.1.0; python_version>="3.10"',
158
+ "pytest-recording",
159
+ "pytest-sugar~=1.1.1",
160
+ "hypothesis",
161
+ 'virtualenv<21; python_version<"3.10"',
162
+ ]
163
+
157
164
 
158
165
  [tool.hatch.envs.docs]
159
166
  skip-install = true
@@ -161,13 +168,13 @@ features = ["docs"]
161
168
  dependencies = ["mike"]
162
169
 
163
170
  [tool.hatch.envs.docs.scripts]
164
- serve = "mkdocs serve -f mkdocs.yml {args}"
165
- build = "mkdocs build --clean -f mkdocs.yml {args}"
166
- ci-build = "mike deploy --config-file mkdocs.yml --update-aliases {args}"
171
+ serve = "properdocs serve -f mkdocs.yml {args}"
172
+ build = "properdocs build --clean -f mkdocs.yml {args}"
173
+ ci-build = "mike deploy --config-file mkdocs.yml {args}"
167
174
 
168
175
  [tool.hatch.envs.lint]
169
176
  template = "lint"
170
- dependencies = ["ruff>=0.7.0"]
177
+ dependencies = ["ruff>=0.15.13"]
171
178
 
172
179
  [tool.hatch.envs.lint.scripts]
173
180
  style = [
@@ -181,5 +188,25 @@ fix = [
181
188
  "style", # feedback on what is not fixable
182
189
  ]
183
190
 
191
+
192
+ [tool.hatch.envs.hatch-test.overrides]
193
+ matrix.pandas.dependencies = [
194
+ { value = "pandas>=2.0.0", if = ["pandas-2.0"] },
195
+ { value = "numpy>=2.0", if = ["pandas-2.0"] },
196
+ { value = "pandas>=3.0.0", if = ["pandas-3.0"] },
197
+ { value = "numpy>=2.0", if = ["pandas-3.0"] },
198
+ ]
199
+ matrix.polars.features = [
200
+ { value = "polars", if = ["polars"]},
201
+ { value = "polars-lts", if = ["polars-lts"]},
202
+ ]
203
+
204
+ [[tool.hatch.envs.hatch-test.matrix]]
205
+ python = ["3.9","3.10"]
206
+ pandas = ["pandas-2.0"]
207
+ polars = ["polars","polars-lts"]
208
+
184
209
  [[tool.hatch.envs.hatch-test.matrix]]
185
- python = ["3.9", "3.10", "3.11", "3.12","3.13"]
210
+ python = ["3.11","3.12","3.13"]
211
+ pandas = ["pandas-2.0","pandas-3.0"]
212
+ polars = ["polars","polars-lts"]
@@ -1,10 +0,0 @@
1
- """ASDCache is a module to retrieve data from the NIST Atomic Spectra Database that uses caching for fast local access.
2
-
3
- To make the most use out of the cache, `ASDCache` is opinionated in the information it retrieves from the ASD; it always requests the same schema of information and locally computes additional fields.
4
-
5
- The `SpectraCache` class acts as the entrypoint to retrieve this data.
6
- """
7
-
8
- from .ASDCache import SpectraCache, BibCache
9
-
10
- __all__ = ["SpectraCache", "BibCache"]
@@ -1,16 +0,0 @@
1
- # file generated by setuptools_scm
2
- # don't change, don't track in version control
3
- TYPE_CHECKING = False
4
- if TYPE_CHECKING:
5
- from typing import Tuple, Union
6
- VERSION_TUPLE = Tuple[Union[int, str], ...]
7
- else:
8
- VERSION_TUPLE = object
9
-
10
- version: str
11
- __version__: str
12
- __version_tuple__: VERSION_TUPLE
13
- version_tuple: VERSION_TUPLE
14
-
15
- __version__ = version = '0.2.0'
16
- __version_tuple__ = version_tuple = (0, 2, 0)
File without changes
File without changes