ASDCache 0.2.2__tar.gz → 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,51 +1,22 @@
1
- r"""`ASDcache` is a module to fetch data from the NIST Atomic Spectra Database (ASD), utlizing caching for fast responses.
1
+ """The ASDCache module.
2
2
 
3
- To make the most use out of the cache, `ASDcache` is opinionated in the information it retrieves from the ASD; it always requests the same schema of information and locally computes additional fields.
4
-
5
- Data is initially fetched from the online published NIST page, using the tab-separated ASCII output format.
6
- The benefit of this format is that it is more 'machine readable' than the formatted ASCII of HTML options.
7
- This means it requires far less bespoke parsing to get rid of 'human readable' features such as repeated page column headers, or empty lines.
8
- To ensure a consistent schema of the retrieved data, lines are always retrieved as a function of wavelength, using `vacuum wavelength`, even between 200 to 2000 nm.
9
- Wavenumbers and Ritz wavelength will be included in the response.
10
-
11
- In the range $5000 \mathrm{cm}^{-1}<\nu<50000 \mathrm{cm}^{-1}$ the air equivalent observed and Ritz wavelengths are calculated using the same Sellmeier equation as the NIST ASD (see [here][ASDcache.readASD.ASDCache.wn_to_n_refractive]).
12
- This is consistent with the approach of the ASD.
13
-
14
- Each response from the NIST page is cached (1 week by default) on the local system.
15
- This makes it much faster to load the same data, even across different script runs and/or user programs/sessions.
16
- As an example: reading all spectra between 200 and 1000 nm can take over 2 minutes without using the cache, but can be as fast as 0.2 seconds using the `polars` backend.
17
- In addition, it means that an internet connection is not required after initial data fetching.
18
- The cached response is only updated upon succesfull retrieval of a new response of the NIST page.
19
- If unable to succesfully fetch new data, we fall back to a 'stale' cached response.
20
-
21
- The cache can be shared to another system, to give offline/airgapped systems access to the same data.
22
- To that end, the file `NIST_ASD_cache.sqlite` in the user's cache directory has to be copied over.
23
-
24
- The standard cache directories are as follows:
25
-
26
- === "Windows"
27
- `%USERPROFILE%/AppData/Local`
28
- === "Linux"
29
- `~/.cache/http_cache/`
30
- === "MacOS"
31
- `/Users/user/Library/Caches/http_cache/`
32
-
33
- Queries to the NIST ASD are hashed by the keys (or parameters) of the requests.
34
- This means that any change to either one of these parameters, will result in a new cache entry, even if the returned data is equivalent.
3
+ It contains both the [SpectraCache][(m).] and [BibCache][(m).] classes which allow you to interact with the ASD and the relevant bibliographic databases.
35
4
  """
36
5
 
37
- import importlib
6
+ from pathlib import Path
7
+ import importlib.util
38
8
  import warnings
9
+ import numpy as np
39
10
  import pandas as pd
40
- from requests_cache import CachedSession, CachedResponse
11
+ from requests_cache import CachedSession, CachedResponse, OriginalResponse
12
+ from requests import Response
41
13
  from io import StringIO
42
14
  from datetime import timedelta
43
15
  import re
44
- import numpy as np
45
16
  from bs4 import BeautifulSoup
46
17
  import sys
47
18
  import logging
48
- from typing import Any, Optional
19
+ from typing import Any, Optional, Union
49
20
 
50
21
  if importlib.util.find_spec("polars"):
51
22
  POLARS_AVAILABLE = True
@@ -54,12 +25,10 @@ if importlib.util.find_spec("polars"):
54
25
  else:
55
26
  POLARS_AVAILABLE = False
56
27
 
57
- logging.basicConfig(
58
- level=logging.INFO,
59
- format="[%(asctime)s] %(levelname)s [%(name)s.%(funcName)s:%(lineno)d] %(message)s",
60
- datefmt="%d/%b/%Y %H:%M:%S",
61
- stream=sys.stdout,
62
- )
28
+ from .utils import wavenumber_to_refractive_index, extract_state_from_response
29
+ from ._version import version
30
+
31
+ logger = logging.getLogger("ASDCache")
63
32
 
64
33
  ASDSchema = {
65
34
  "element": str,
@@ -92,20 +61,22 @@ ASDSchema = {
92
61
  "line_ref": str,
93
62
  }
94
63
 
95
- STATE_EXPR = r"spectra=([\w]+)\+?([IVX]+)?"
96
- """Regex pattern for extracting (element,charge) tuple for a single-state query, which uses roman numerals."""
97
64
  SCI_EXPR = r"([+-]?\d*\.?\d+(?:[eE][+-]?\d+)?)"
98
65
  """Regex pattern for processing scientific notation"""
99
66
 
100
67
 
68
+ class ASDQueryError(Exception):
69
+ """Exception raised when the NIST ASD has indicated an error with a query."""
70
+
71
+
101
72
  class SpectraCache:
102
73
  """A class acting as the entrypoint to retrieve data from the NIST Atomic Spectra Database that uses caching.
103
74
 
104
75
  The `ASDCache` instance acts as an access point to the cache, which stores responses on the local system in a SQLite database.
105
76
 
106
- Data retrieval from cache is much faster (order milliseconds) than fetching from the internet (order seconds), and avoids wastefull requests to the server.
77
+ Data retrieval from cache is much faster (order milliseconds) than fetching from the internet (order of seconds to minutes), and avoids wastefull requests to the server.
107
78
 
108
- Cache time-to-live is one week by default.
79
+ Cache time-to-live is two weeks by default.
109
80
 
110
81
  Since the NIST ASD is usually updated less frequently than that, this is a compromise between having the latest data, and overall fast performance.
111
82
 
@@ -115,14 +86,15 @@ class SpectraCache:
115
86
  nist_url = "https://physics.nist.gov/cgi-bin/ASD/lines1.pl"
116
87
  species_expr = re.compile(r"spectra=([\w\+\-\%3]+)&")
117
88
  query_params = {
89
+ "submit": "Retrieve Data",
118
90
  "unit": 1,
119
91
  "de": 0,
120
- "plot_out": 0,
92
+ # "plot_out": 0,
121
93
  "I_scale_type": 1,
122
94
  "format": 3,
123
95
  "line_out": 0,
124
- "remove_js": "on",
125
- "no_spaces": "on",
96
+ # "remove_js": "on",
97
+ # "no_spaces": "on",
126
98
  "en_unit": 0,
127
99
  "output": 0,
128
100
  "bibrefs": 1,
@@ -143,54 +115,37 @@ class SpectraCache:
143
115
  "enrg_out": "on",
144
116
  "J_out": "on",
145
117
  "g_out": "on",
146
- "diag_out": "on",
118
+ # "diag_out": "on", # avoid diagnostic data, it leads to multi-species queries failing; which can appear as if keys below are needed. See issue #1
147
119
  "allowed_out": 1,
148
120
  "forbid_out": 1,
149
- "submit": "Retrieve Data",
121
+ # "show_diff_obs_calc": 1, # Does not appear mandatory in retrospect, see issue #1
122
+ # "include_Ritz_E1": 1, # Does not appear mandatory in retrospect, see issue #1
150
123
  }
151
124
  """Request parameters used by the NIST ASD form."""
152
- column_order = [
153
- "element",
154
- "sp_num",
155
- "obs_wl_vac(nm)",
156
- "unc_obs_wl",
157
- "obs_wl_air(nm)",
158
- "ritz_wl_vac(nm)",
159
- "unc_ritz_wl",
160
- "ritz_wl_air(nm)",
161
- "wn(cm-1)",
162
- "intens",
163
- "Aki(s^-1)",
164
- "fik",
165
- "S(a.u.)",
166
- "log_gf",
167
- "Acc",
168
- "Ei(cm-1)",
169
- "Ek(cm-1)",
170
- "conf_i",
171
- "term_i",
172
- "J_i",
173
- "conf_k",
174
- "term_k",
175
- "J_k",
176
- "g_i",
177
- "g_k",
178
- "Type",
179
- "tp_ref",
180
- "line_ref",
181
- ]
182
- """Fixed order of columns for consistent schema of data."""
183
-
184
- def __init__(self, use_polars_backend=False, cache_expiry=timedelta(weeks=1), strict_matching=True):
185
- """Initialize an instance that handles cached data lookup of the NIST ASD."""
125
+
126
+ def __init__(
127
+ self,
128
+ use_polars_backend=False,
129
+ cache_expiry=timedelta(weeks=2),
130
+ strict_matching=True,
131
+ cache_path: Optional[Path] = None,
132
+ ):
133
+ """Initialize an instance that handles cached data lookup of the NIST ASD.
134
+
135
+ Args:
136
+ use_polars_backend (bool): Flag to use polars as DataFrame backend, if available
137
+ cache_expiry (timedelta): Span of time beyond which an entry will be considered expired, and a refresh attempted
138
+ strict_matching (bool): If true, use all request parameters to hash urls for cache matching (recommended).
139
+ cache_path (Path, Optional): Path to a location to store the cache in
140
+ """
186
141
  self.strict_matching = strict_matching
187
142
  self.session = CachedSession(
188
- "NIST_ASD_cache",
143
+ "NIST_ASD_cache" if cache_path is None else cache_path,
189
144
  use_cache_dir=True,
190
145
  expire_after=cache_expiry,
191
146
  stale_if_error=True,
192
147
  filter_fn=self._check_response_success,
193
- ignored_parameters=list(self.query_params.keys()) if self.strict_matching is False else None,
148
+ ignored_parameters=list(self.query_params.keys()) if self.strict_matching is False else [],
194
149
  )
195
150
  if (use_polars_backend) & (not POLARS_AVAILABLE):
196
151
  warnings.warn("Cannot find `polars` as a backend, falling back to `pandas`", stacklevel=2)
@@ -209,7 +164,7 @@ class SpectraCache:
209
164
  """
210
165
  return self.session.settings.expire_after
211
166
 
212
- def set_cache_expiry(self, new: timedelta = None, **kwargs):
167
+ def set_cache_expiry(self, new: Optional[timedelta] = None, **kwargs):
213
168
  """Set the cache expiry to a different interval (default: 1 week).
214
169
 
215
170
  Can be done by either passing in a `timedelta` object, or valid keyword arguments for `timedelta` itself.
@@ -219,12 +174,59 @@ class SpectraCache:
219
174
  self.session.settings.expire_after = new
220
175
 
221
176
  @staticmethod
222
- def _check_response_success(response: "CachedResponse") -> bool:
177
+ def _check_response_success(response: Response) -> bool:
223
178
  """Validate that data has been fetched succesfully.
224
179
 
225
180
  If this check fails, the cache should not update with this response, even when marked as stale.
181
+
182
+ The first obvious way to check success is if an error is indicated by the HTTP status code.
183
+
184
+ However, when a query for data is incorrect, the NIST ASD returns a HTML page indicating `<title>NIST ASD : Input Error</title>` in the `<head>` tag, or "Error Message".
185
+
186
+ A successfull query would not receive HTML as a response, but raw ASCII values instead.
187
+
188
+ We can thus check for the start of a HTML document.
189
+
190
+ Note that this only works for data queries, not for bibliographic metadata by `BibCache`.
191
+ """
192
+ return not (not response.ok or response.content.startswith(b"<!DOCTYPE"))
193
+
194
+ def _get_data(self, species: str, wl_range: tuple[float, float] = (170, 1000), **kwargs) -> Response:
195
+ """Retrieve raw, ASCII-formatted data from the NIST ASD with a GET request.
196
+
197
+ To retrieve data and parse it into a DataFrame, use [fetch][..] instead.
198
+
199
+ Returns the raw response, which will be cached if it contains valid data (see [_check_response_success][..]).
200
+
201
+ If the response does not contain ASCII data, but HTML instead, an [ASDQueryError][(m).] will be raised.
202
+
203
+ It is possible to override any standard query parameter (see [query_params][..]]) by passing them as kwargs.
226
204
  """
227
- return (response.status_code == 200) & (b"Error Message" not in response.content)
205
+ query_params = {
206
+ "spectra": species,
207
+ "output_type": 0,
208
+ "low_w": min(wl_range),
209
+ "upp_w": max(wl_range),
210
+ **{k: v for k, v in self.query_params.items() if k not in kwargs},
211
+ **{k: v for k, v in kwargs.items() if k in self.query_params},
212
+ }
213
+ response: Response = self.session.get(self.nist_url, params=query_params)
214
+ response.raise_for_status()
215
+ # Check if response is not a HTML document instead of ASCII formatted data, indicating query error.
216
+ if response.content.startswith(b"<!DOCTYPE"):
217
+ body = BeautifulSoup(response.text, features="html.parser").body
218
+ reason = body.text.strip().replace("\n", " ") if body else ""
219
+ logger.error(
220
+ "NIST ASD responded with %s instead of ASCII-data for species=%s, wl_range=%s\nQuery: %s",
221
+ reason,
222
+ species,
223
+ wl_range,
224
+ response.url,
225
+ )
226
+ raise ASDQueryError(
227
+ f"Query for {species=} {wl_range=} did not receive ASCII-data. {reason=} This means the ASD could not interpret your query. Check if your query is malformed."
228
+ )
229
+ return response
228
230
 
229
231
  @property
230
232
  def cached_species(self) -> list[str]:
@@ -239,35 +241,22 @@ class SpectraCache:
239
241
  for elem in self.species_expr.search(u).group(1).split("%3B")
240
242
  ]
241
243
 
242
- def fetch(self, species, wl_range=(170, 1000), **kwargs) -> "pd.DataFrame|pl.DataFrame|CachedResponse":
244
+ def fetch(self, species, wl_range=(170, 1000)) -> "pd.DataFrame|pl.DataFrame":
243
245
  """Fetch information on a species from the ASD, first checking the cache.
244
246
 
245
- This supports loading multiple species in one go by using the same notation as the NIST ASD page.
247
+ This supports loading multiple species in one go by using the same notation as the NIST ASD form.
246
248
 
247
249
  Note however that cache keys are computed for unique options for `species` and `wl_range`.
248
250
 
249
251
  This means that you won't get caching benefits by using different queries.
250
252
 
251
- In other words: the cache cannot deduplicate queries such as `ASD.fetch('H', (200,1000))` followed by `ASD.fetch('H I', (650,660))`.
253
+ In other words: the cache cannot deduplicate queries such as `ASD.fetch('H', (200,1000))` followed by `ASD.fetch('H I', (650,660))` (or vice versa).
252
254
 
253
255
  Both these operations will fetch data online and be stored as separate cache entries.
254
256
  """
255
- query_params = {
256
- "spectra": species,
257
- "output_type": 0,
258
- "low_w": min(wl_range),
259
- "upp_w": max(wl_range),
260
- **self.query_params,
261
- }
262
- response = self.session.get(self.nist_url, params=query_params)
263
-
264
- # if response.status_code == 200:
265
- response.raise_for_status()
257
+ # TODO: add kwargs for read-only/offline access etc.
258
+ response = self._get_data(species, wl_range)
266
259
  return self.create_dataframe(response)
267
- # else:
268
- # print(f"Error: Received status code {response.status_code}")
269
- # print(response.url)
270
- # return response
271
260
 
272
261
  def create_dataframe(self, response) -> "pd.DataFrame|pl.DataFrame":
273
262
  """Create a dataframe from the (cached) NIST ASD response, using the chosen backend at class instantiation."""
@@ -276,14 +265,14 @@ class SpectraCache:
276
265
  return self._from_pandas(response)
277
266
 
278
267
  @classmethod
279
- def _from_pandas(cls, response: "CachedResponse") -> "pd.DataFrame":
268
+ def _from_pandas(cls, response: Response) -> "pd.DataFrame":
280
269
  r"""Transform a (cached) NIST ASD response into a pandas DataFrame.
281
270
 
282
271
  Calculates the air equivalent wavelength from the vacuum wavelength using the same Sellmeier equation as the NIST ASD.
283
272
 
284
273
  Note that this conversion is only performed for lines with $200 nm < \lambda < 2000 nm$, like the ASD.
285
274
 
286
- For lines outside of this range, the conversion falls back to their vacuum wavelength.
275
+ For lines outside of this range, it uses NaN values.
287
276
  """
288
277
  schema = {
289
278
  "obs_wl_vac(nm)": str,
@@ -311,38 +300,48 @@ class SpectraCache:
311
300
  "": str,
312
301
  }
313
302
  df = pd.read_csv(StringIO(response.text), sep="\t", dtype=schema)
303
+ # Detect if pandas uses new `StringDtype`, or legacy `object` dtype for strings.
304
+ # This affects NaN handling for strings.
305
+ # Pandas 3.0 and up use the StringDtype, while pandas 2 can opt-in to this
306
+ # The 'Type' column should exist, 'element' may not.
307
+ uses_new_string_dtype = pd.api.types.is_string_dtype(df["Type"])
314
308
  for col in ["obs_wl_vac(nm)", "ritz_wl_vac(nm)", "intens", "Ei(cm-1)", "Ek(cm-1)"]:
315
309
  df[col] = df.loc[:, col].str.extract(SCI_EXPR).astype(float)
316
- df["Type"] = df.loc[:, "Type"].astype(str).replace("nan", "E1")
310
+ # Any missing value implies line is an E1 (electric dipole) transition
311
+ if uses_new_string_dtype:
312
+ df["Type"] = df.loc[:, "Type"].fillna("E1")
313
+ else:
314
+ df["Type"] = df.loc[:, "Type"].astype(str).replace("nan", "E1")
317
315
  df["tp_ref"] = df.loc[:, "tp_ref"].fillna("")
318
- df["obs_wl_air(nm)"] = df["obs_wl_vac(nm)"]
319
- df["obs_wl_air(nm)"] = df[df["wn(cm-1)"].between(5000, 50000)]["obs_wl_air(nm)"] / cls.wn_to_n_refractive(
320
- df[df["wn(cm-1)"].between(5000, 50000)]["wn(cm-1)"]
316
+ df["obs_wl_air(nm)"] = np.nan
317
+ air_equiv_range = df["wn(cm-1)"].between(5000, 50000) # range where air wavelength is computed.
318
+ df["obs_wl_air(nm)"] = df.loc[air_equiv_range, "obs_wl_vac(nm)"] / wavenumber_to_refractive_index(
319
+ df.loc[air_equiv_range, "wn(cm-1)"]
321
320
  )
322
- df["ritz_wl_air(nm)"] = df["ritz_wl_vac(nm)"]
323
- df["ritz_wl_air(nm)"] = df[df["wn(cm-1)"].between(5000, 50000)]["ritz_wl_air(nm)"] / cls.wn_to_n_refractive(
324
- df[df["wn(cm-1)"].between(5000, 50000)]["wn(cm-1)"]
321
+ df["ritz_wl_air(nm)"] = np.nan
322
+ df["ritz_wl_air(nm)"] = df.loc[air_equiv_range, "ritz_wl_vac(nm)"] / wavenumber_to_refractive_index(
323
+ df.loc[air_equiv_range, "wn(cm-1)"]
325
324
  )
326
325
  df = df.drop([c for c in df.columns if "Unnamed" in c], axis=1).reset_index(drop=True)
327
326
  if "element" not in df.columns:
328
- element, numeral = re.search(STATE_EXPR, response.url).groups()
329
- df["element"] = element
330
- df["sp_num"] = numeral
331
327
  # cast roman numerals to int for consistency with queries with multiple ionization states, e.g. Ar I vs Ar I-II
332
- df["sp_num"] = df["sp_num"].map(cls.roman_to_int)
328
+ # As 'element' and 'sp_num' columns are only missing for single-species queries, assign as constants, not vectors.
329
+ element, numeric = extract_state_from_response(response)
330
+ df["element"] = element
331
+ df["sp_num"] = numeric
333
332
  df["unc_obs_wl"] = pd.to_numeric(df["unc_obs_wl"]) if "unc_obs_wl" in df.columns else np.nan
334
333
  df["unc_ritz_wl"] = pd.to_numeric(df["unc_ritz_wl"]) if "unc_ritz_wl" in df.columns else np.nan
335
- return df.loc[:, cls.column_order]
334
+ return df.loc[:, list(ASDSchema)]
336
335
 
337
336
  @classmethod
338
- def _from_polars(cls, response: "CachedResponse") -> "pl.DataFrame":
337
+ def _from_polars(cls, response: Response) -> "pl.DataFrame":
339
338
  r"""Transform a (cached) NIST ASD response into a polars DataFrame.
340
339
 
341
340
  Calculates the air equivalent wavelength from the vacuum wavelength using the same Sellmeier equation as the NIST ASD.
342
341
 
343
342
  Note that this conversion is only performed for lines with $200 nm < \lambda < 2000 nm$, like the ASD.
344
343
 
345
- For lines outside of this range, the conversion falls back to their vacuum wavelength.
344
+ For lines outside of this range, it uses NaN values.
346
345
  """
347
346
  schema = {
348
347
  "obs_wl_vac(nm)": pl.String,
@@ -366,97 +365,45 @@ class SpectraCache:
366
365
  "J_k": pl.String,
367
366
  "": pl.String,
368
367
  }
369
- # annotation_chars_to_strip = "(?i)()[]?*w,bGhilmprsq:+xzgacHd "
370
- df = (
371
- pl.read_csv(
372
- StringIO(response.text),
373
- separator="\t",
374
- schema_overrides=schema,
375
- null_values="",
376
- )
377
- .with_columns(
378
- pl.col("obs_wl_vac(nm)", "Ei(cm-1)", "Ek(cm-1)", "intens")
379
- # .str.strip_chars(annotation_chars_to_strip).str.replace("&dagger;", "", literal=True)
380
- .str.extract(SCI_EXPR)
381
- # .str.extract(r"([+-]?\d*\.?\d+e[+-]?\d+)")
382
- .replace("", None)
383
- .cast(pl.Float64),
384
- pl.col("ritz_wl_vac(nm)").str.strip_chars('"+*').replace("", None).cast(pl.Float64),
385
- pl.col("S(a.u.)").cast(pl.Float64),
386
- pl.col("Type").replace(None, "E1"),
387
- pl.col("tp_ref").replace(None, ""),
388
- )
389
- .drop([""])
390
- ).with_columns(
391
- pl.when(pl.col("wn(cm-1)").is_between(5000, 50000))
392
- .then(
393
- pl.col("obs_wl_vac(nm)").cast(pl.Float64)
394
- / pl.col("wn(cm-1)").map_elements(cls.wn_to_n_refractive, return_dtype=pl.Float64)
395
- )
396
- .otherwise(pl.col("obs_wl_vac(nm)"))
397
- .cast(pl.Float64)
368
+
369
+ df = pl.read_csv(
370
+ StringIO(response.text),
371
+ separator="\t",
372
+ schema_overrides=schema,
373
+ null_values="",
374
+ )
375
+ sci_cols = ["obs_wl_vac(nm)", "Ei(cm-1)", "Ek(cm-1)", "intens", "ritz_wl_vac(nm)"]
376
+ cast_to_scientific_notation = [
377
+ pl.col(c).str.extract(SCI_EXPR).replace("", None).cast(pl.Float64).alias(c) for c in sci_cols
378
+ ]
379
+ df = df.with_columns(
380
+ *cast_to_scientific_notation,
381
+ pl.col("S(a.u.)").cast(pl.Float64),
382
+ pl.col("Type").replace(None, "E1"),
383
+ pl.col("tp_ref").replace(None, ""),
384
+ ).drop([""])
385
+ # compute air wavelengths between 5000 cm-1 and 50000 cm-1
386
+ air_equiv_range = pl.col("wn(cm-1)").is_between(5000, 50000)
387
+ df = df.with_columns(
388
+ pl.when(air_equiv_range)
389
+ .then(pl.col("obs_wl_vac(nm)") / wavenumber_to_refractive_index(pl.col("wn(cm-1)")))
390
+ .otherwise(np.nan)
398
391
  .alias("obs_wl_air(nm)"),
399
- pl.when(pl.col("wn(cm-1)").is_between(5000, 50000))
400
- .then(
401
- pl.col("ritz_wl_vac(nm)").cast(pl.Float64)
402
- / pl.col("wn(cm-1)").map_elements(cls.wn_to_n_refractive, return_dtype=pl.Float64)
403
- )
404
- .otherwise(pl.col("ritz_wl_vac(nm)"))
405
- .cast(pl.Float64)
392
+ pl.when(air_equiv_range)
393
+ .then(pl.col("ritz_wl_vac(nm)") / wavenumber_to_refractive_index(pl.col("wn(cm-1)")))
394
+ .otherwise(np.nan)
406
395
  .alias("ritz_wl_air(nm)"),
407
396
  )
408
397
  if "element" not in df.columns:
409
- element, numeral = re.search(STATE_EXPR, response.url).groups()
410
- # cast roman numerals to int for consistency with queries with multiple ionization states, e.g. Ar I vs Ar I-II
411
- df = df.with_columns(
412
- pl.lit(element).alias("element"),
413
- pl.lit("I" if numeral is None else numeral)
414
- .cast(pl.String)
415
- .alias("sp_num")
416
- .map_elements(cls.roman_to_int, return_dtype=pl.Int64)
417
- .first(),
418
- )
419
- df = df.with_columns(
420
- unc_obs_wl=pl.col("unc_obs_wl") if "unc_obs_wl" in df.columns else None,
421
- unc_ritz_wl=pl.col("unc_ritz_wl") if "unc_ritz_wl" in df.columns else None,
422
- ).with_columns(pl.col("unc_obs_wl").cast(pl.Float64), pl.col("unc_ritz_wl").cast(pl.Float64))
423
-
424
- return df.select(*cls.column_order)
425
-
426
- @staticmethod
427
- def roman_to_int(roman: str) -> int:
428
- """Transform Roman numerals to integers.
429
-
430
- Does only support numerals including up to `L`.
431
- """
432
- roman_numerals = {"I": 1, "V": 5, "X": 10, "L": 50}
433
- total = 0
434
- previous = 0
435
- for char in reversed(roman):
436
- current_value = roman_numerals[char]
437
- if current_value < previous:
438
- total -= current_value # Subtract if the current value is less than the previous value
439
- else:
440
- total += current_value
441
- previous = current_value
442
- return total
443
-
444
- @staticmethod
445
- def wn_to_n_refractive(wavenumbers: float) -> float:
446
- r"""Calculate the refractive index $n$ in air for a transition, using the 5-term Sellmeier formula used by NIST.
447
-
448
- The used Sellmeier formula is the one from E.R. Peck and K. Reeder [J. Opt. Soc. Am. 62, 958 (1972)](http://dx.doi.org/10.1364/JOSA.62.000958).
449
-
450
- This formula is fitted to data in the range of 185 nm to 1700 nm for air at 15 °C, 101 325 Pa pressure, with 0.033 % CO2.
451
-
452
- This is the same formula used by the NIST ASD to calculate air wavelengths in the interval of 200 nm to 2000 nm.
453
-
454
- See also [the ASD documentation on the topic](https://physics.nist.gov/PhysRefData/ASD/Html/lineshelp.html#Conversion%20between%20air%20and%20vacuum%20wavelengths).
455
-
456
- Using this refractive index, air equivalent wavelengths consistent with the ASD can be calculated, without the need to query them separately.
457
- """
458
- sigma = wavenumbers * 1e-4 # um^-1
459
- return 1 + 1e-8 * (8060.51 + 2480990 / (132.274 - sigma**2) + 17455.7 / (39.32957 - sigma**2))
398
+ element, numeric = extract_state_from_response(response)
399
+ df = df.with_columns(pl.lit(element).alias("element"), pl.lit(numeric, dtype=pl.Int64).alias("sp_num"))
400
+ # Cast to float, or create column filled with `null` if missing.
401
+ exprs = [
402
+ (pl.col(c) if c in df.columns else pl.lit(None).alias(c)).cast(pl.Float64)
403
+ for c in ["unc_obs_wl", "unc_ritz_wl"]
404
+ ]
405
+ df = df.with_columns(exprs)
406
+ return df.select(*ASDSchema)
460
407
 
461
408
  def get_all_cached(self) -> "pd.DataFrame|pl.DataFrame":
462
409
  """Retrieve all cached data into a single dataframe."""
@@ -508,7 +455,7 @@ class BibCache:
508
455
  """
509
456
  return self.session.settings.expire_after
510
457
 
511
- def set_cache_expiry(self, new: timedelta = None, **kwargs):
458
+ def set_cache_expiry(self, new: Optional[timedelta] = None, **kwargs):
512
459
  """Set the cache expiry to a different interval (default: 1 week).
513
460
 
514
461
  Can be done by either passing in a `timedelta` object, or valid keyword arguments for `timedelta` itself.
@@ -518,14 +465,14 @@ class BibCache:
518
465
  self.session.settings.expire_after = new
519
466
 
520
467
  @staticmethod
521
- def _check_response_success(response: "CachedResponse") -> bool:
468
+ def _check_response_success(response: Response) -> bool:
522
469
  """Validate that data has been fetched succesfully.
523
470
 
524
471
  If this check fails, the cache should not update with this response, even when marked as stale.
525
472
  """
526
473
  is_success = (response.status_code == 200) & (b"There was a problem" not in response.content)
527
474
  if not is_success:
528
- logging.warning(f"Request was unsuccesful status:{response.status_code} , url:{response.url}")
475
+ logger.warning(f"Request was unsuccesful status:{response.status_code} , url:{response.url}")
529
476
  return is_success
530
477
 
531
478
  @classmethod
@@ -533,17 +480,18 @@ class BibCache:
533
480
  r"""Parse a reference code from the NIST ASD into the constituent parts that can be used to look up references.
534
481
 
535
482
  Args:
536
- * reference_code (str): A NIST ASD bibliographic reference string, such as `L13456n3`, or `T6936n`.
483
+ reference_code (str): A NIST ASD bibliographic reference string, such as `L13456n3`, or `T6936n`.
537
484
 
538
485
  Returns:
539
- * db (str) : A label for which bibliographic database to target
540
- * ref (str) : The database ID for the reference to look up
541
- * comment (str) : An additional comment included in the reference, can be fetched separately.
486
+ db (str): A label for which bibliographic database to target
487
+ ref (str|None): The database ID for the reference to look up
488
+ comment (str): An additional comment included in the reference, can be fetched separately.
542
489
  """
543
490
  if reference_code.startswith("n"):
544
- db, ref, comment = "T", None, "n"
545
- elif (not reference_code.startswith("LS")) & (cls.reference_expr.match(reference_code) is not None):
546
- db, ref, comment = cls.reference_expr.match(reference_code).groups()
491
+ return ("T", None, "n")
492
+ matched = cls.reference_expr.match(reference_code)
493
+ if (not reference_code.startswith("LS")) and (matched is not None):
494
+ db, ref, comment = matched.groups()
547
495
  comment = comment if "LS" not in reference_code else "LS"
548
496
  else:
549
497
  db, ref, comment = "T", None, "LS"
@@ -553,12 +501,12 @@ class BibCache:
553
501
  """Look up a reference code for a given element state.
554
502
 
555
503
  Args:
556
- element (str) : The element name, e.g. `H`
557
- sp_num (int) : The ionization state of the element, with 1 corresponding to the atom
558
- reference_code (str) : The bibliographic reference code from the ASD columns `tp_ref` or `line_ref`.
504
+ element (str): The element name, e.g. `H`
505
+ sp_num (int): The ionization state of the element, with 1 corresponding to the atom
506
+ reference_code (str): The bibliographic reference code from the ASD columns `tp_ref` or `line_ref`.
559
507
 
560
508
  Returns:
561
- bib_data (dict) : A dictionary containing bibliographic metadata for the reference, if available/applicable. Contains a url to look it up.
509
+ bib_data (dict[str,Any]): A dictionary containing bibliographic metadata for the reference, if available/applicable. Contains a url to look it up.
562
510
  """
563
511
  db, ref, comment = self.parse_reference_code(reference_code)
564
512
  params = {
@@ -0,0 +1,61 @@
1
+ r"""`ASDCache` is a package to fetch data from the NIST Atomic Spectra Database (ASD), utlizing caching for fast responses.
2
+
3
+ To make the most use out of the cache, `ASDCache` is opinionated in the information it retrieves from the ASD; it always requests the same schema of information and locally computes additional fields.
4
+
5
+ Data is initially fetched from the online published NIST page, using the tab-separated ASCII output format.
6
+
7
+ The benefit of this format is that it is more 'machine readable' than the formatted ASCII of HTML options.
8
+
9
+ This means it requires far less bespoke parsing to get rid of 'human readable' features such as repeated page column headers, or empty lines.
10
+
11
+ ## Air wavelength
12
+ To ensure a consistent schema of the retrieved data, lines are always retrieved as a function of wavelength, using `vacuum wavelength`, even between 200 to 2000 nm.
13
+
14
+ Wavenumbers and Ritz wavelength will be included in the response.
15
+
16
+ In the range $5000\ \mathrm{cm}^{-1}<\nu<50000\ \mathrm{cm}^{-1}$ the air equivalent observed and Ritz wavelengths are calculated using the same Sellmeier equation as the NIST ASD (see [here][.utils.wavenumber_to_refractive_index]).
17
+ This is consistent with the approach of the ASD.
18
+
19
+ ## Making use of the cache
20
+
21
+ Each response from the NIST page is cached (2 weeks by default) on the local system.
22
+
23
+ This makes it much faster to load the same data, even across different script runs and/or user programs/sessions.
24
+
25
+ As an example: retrieving and parsing the data for all spectra between 200 and 1000 nm can take over 2 minutes without using the cache, but can be as fast as 0.2 seconds using the `polars` backend.
26
+
27
+ In addition, it means that an internet connection is not required after initial data fetching.
28
+
29
+ The cached response is only updated upon succesfull retrieval of a new response of the NIST page.
30
+
31
+ If unable to succesfully fetch new data, we fall back to a 'stale' cached response.
32
+
33
+ The cache can be shared to another system, to give offline/airgapped systems access to the same data.
34
+
35
+ To that end, the file `NIST_ASD_cache.sqlite` in the user's cache directory has to be copied over.
36
+
37
+ ### Default cache locations
38
+
39
+ The standard cache directories are as follows:
40
+
41
+ === "Windows"
42
+ `%USERPROFILE%/AppData/Local`
43
+ === "Linux"
44
+ `~/.cache/http_cache/`
45
+ === "MacOS"
46
+ `/Users/user/Library/Caches/http_cache/`
47
+
48
+ ### Cache keys and uniqueness
49
+
50
+ Queries to the NIST ASD are hashed by the keys (or parameters) of the requests.
51
+
52
+ This means that any change to either one of these parameters, will result in a new cache entry, even if the returned data is equivalent.
53
+
54
+ In other words: the cache cannot deduplicate queries such as `SpectraCache().fetch('H', (200,1000))` followed by `SpectraCache().fetch('H I', (650,660))` (or vice versa).
55
+
56
+ It is often better (and faster) to fetch a range of data beyond what you need, and then filter down the dataframe you retrieve according to your needs.
57
+ """
58
+
59
+ from .ASDCache import SpectraCache, BibCache
60
+
61
+ __all__ = ["SpectraCache", "BibCache"]
@@ -0,0 +1,24 @@
1
+ # file generated by vcs-versioning
2
+ # don't change, don't track in version control
3
+ from __future__ import annotations
4
+
5
+ __all__ = [
6
+ "__version__",
7
+ "__version_tuple__",
8
+ "version",
9
+ "version_tuple",
10
+ "__commit_id__",
11
+ "commit_id",
12
+ ]
13
+
14
+ version: str
15
+ __version__: str
16
+ __version_tuple__: tuple[int | str, ...]
17
+ version_tuple: tuple[int | str, ...]
18
+ commit_id: str | None
19
+ __commit_id__: str | None
20
+
21
+ __version__ = version = '0.2.4'
22
+ __version_tuple__ = version_tuple = (0, 2, 4)
23
+
24
+ __commit_id__ = commit_id = None
@@ -0,0 +1,66 @@
1
+ """Module containing small helper utility functions for extracting and processing input from the ASD."""
2
+
3
+ import re
4
+ from typing import TYPE_CHECKING
5
+
6
+ if TYPE_CHECKING:
7
+ from requests import Response
8
+
9
+ ROMAN_NUMERALS = {"I": 1, "V": 5, "X": 10, "L": 50, "C": 100, "D": 500, "M": 1000}
10
+ STATE_EXPR = r"spectra=([\w]+)\+?([IVX]+)?"
11
+ """Regex pattern for extracting (element,charge) tuple for a single-state query, which uses roman numerals."""
12
+
13
+
14
+ def roman_to_int(roman: str) -> int:
15
+ """Parse a Roman numeral into an integer.
16
+
17
+ Supports numerals up to "M".
18
+ """
19
+ roman = roman.upper().strip()
20
+ total = 0
21
+ previous = 0
22
+ for char in reversed(roman):
23
+ current_value = ROMAN_NUMERALS[char]
24
+ if current_value < previous:
25
+ total -= current_value # Subtract if the current value is less than the previous value
26
+ else:
27
+ total += current_value
28
+ previous = current_value
29
+ return total
30
+
31
+
32
+ def wavenumber_to_refractive_index(wavenumbers: float) -> float:
33
+ r"""Calculate the refractive index $n$ in air for a transition, using the 5-term Sellmeier formula used by NIST.
34
+
35
+ The used Sellmeier formula is the one from E.R. Peck and K. Reeder [J. Opt. Soc. Am. 62, 958 (1972)](http://dx.doi.org/10.1364/JOSA.62.000958).
36
+
37
+ This formula is fitted to data in the range of 185 nm to 1700 nm for air at 15 °C, 101 325 Pa pressure, with 0.033 % CO2.
38
+
39
+ This is the same formula used by the NIST ASD to calculate air wavelengths in the interval of 200 nm to 2000 nm.
40
+
41
+ See also [the ASD documentation on the topic](https://physics.nist.gov/PhysRefData/ASD/Html/lineshelp.html#Conversion%20between%20air%20and%20vacuum%20wavelengths).
42
+
43
+ Using this refractive index, air equivalent wavelengths consistent with the ASD can be calculated, without the need to query them separately.
44
+ """
45
+ sigma = wavenumbers * 1e-4 # um^-1
46
+ return 1 + 1e-8 * (8060.51 + 2480990 / (132.274 - sigma**2) + 17455.7 / (39.32957 - sigma**2))
47
+
48
+
49
+ def extract_state_from_response(response: "Response") -> tuple[str, int]:
50
+ """Extract the element and ionization state from the url of a response.
51
+
52
+ When querying only a single state, e.g. 'H I', this information will not be present as a column in data: the `element` and `sp_num` columns will not be included.
53
+
54
+ This information is parsed from the query url instead, so it can be added.
55
+
56
+ Since the `sp_num` column is of an integer type, the roman numerals in the url are converted to integers.
57
+ """
58
+ matched = re.search(STATE_EXPR, str(response.url))
59
+ if not matched:
60
+ raise ValueError(
61
+ "URL did not contain a `spectra` parameter satisfying %s; Could not identify element and sp_num",
62
+ STATE_EXPR,
63
+ )
64
+ element, numeral = matched.groups()
65
+ numeric: int = roman_to_int(numeral) if numeral else 1
66
+ return element, numeric
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ASDCache
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: A Python module to retrieve data from the NIST Atomic Spectra Database (ASD), using caching for fast, efficient data handling
5
5
  Project-URL: Documentation, https://antoinetue.github.io/asdcache
6
6
  Project-URL: Source, https://github.com/AntoineTUE/asdcache
@@ -17,28 +17,30 @@ Classifier: Programming Language :: Python :: 3.10
17
17
  Classifier: Programming Language :: Python :: 3.11
18
18
  Classifier: Programming Language :: Python :: 3.12
19
19
  Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Programming Language :: Python :: 3.14
20
21
  Classifier: Topic :: Scientific/Engineering
21
22
  Requires-Python: >=3.9
22
- Requires-Dist: bs4
23
- Requires-Dist: numpy
24
- Requires-Dist: pandas
25
- Requires-Dist: requests
26
- Requires-Dist: requests-cache
23
+ Requires-Dist: beautifulsoup4>=4.12
24
+ Requires-Dist: numpy>=1.20.3
25
+ Requires-Dist: pandas>=2.0
26
+ Requires-Dist: requests-cache>=1.2.0
27
27
  Provides-Extra: docs
28
- Requires-Dist: black; extra == 'docs'
29
- Requires-Dist: mkdocs; extra == 'docs'
28
+ Requires-Dist: mkdocs-api-autonav; extra == 'docs'
30
29
  Requires-Dist: mkdocs-autorefs; extra == 'docs'
31
- Requires-Dist: mkdocs-gen-files; extra == 'docs'
32
30
  Requires-Dist: mkdocs-git-revision-date-localized-plugin; extra == 'docs'
33
31
  Requires-Dist: mkdocs-include-markdown-plugin; extra == 'docs'
34
- Requires-Dist: mkdocs-jupyter; extra == 'docs'
35
- Requires-Dist: mkdocs-literate-nav; extra == 'docs'
36
- Requires-Dist: mkdocs-material; extra == 'docs'
32
+ Requires-Dist: mkdocs-jupyter>=0.26.3; extra == 'docs'
33
+ Requires-Dist: mkdocs-material==9.7.6; extra == 'docs'
37
34
  Requires-Dist: mkdocs-section-index; extra == 'docs'
38
35
  Requires-Dist: mkdocstrings; extra == 'docs'
39
- Requires-Dist: mkdocstrings-python; extra == 'docs'
36
+ Requires-Dist: mkdocstrings-python-xref>=2.1.1; extra == 'docs'
37
+ Requires-Dist: properdocs>=1.6.7; extra == 'docs'
38
+ Requires-Dist: pygments>=2.20.0; extra == 'docs'
39
+ Requires-Dist: ruff>=0.15.13; extra == 'docs'
40
40
  Provides-Extra: polars
41
- Requires-Dist: polars; extra == 'polars'
41
+ Requires-Dist: polars[pandas]; extra == 'polars'
42
+ Provides-Extra: polars-compat
43
+ Requires-Dist: polars[pandas,rtcompat]; extra == 'polars-compat'
42
44
  Description-Content-Type: text/markdown
43
45
 
44
46
  # ASDCache
@@ -51,7 +53,8 @@ Description-Content-Type: text/markdown
51
53
  [![GitHub Workflow Status docs](https://img.shields.io/github/actions/workflow/status/AntoineTUE/ASDCache/documentation.yml?label=Documentation%20build)](https://antoinetue.github.io/ASDCache)
52
54
  [![PyPI - Version](https://img.shields.io/pypi/v/ASDCache)](https://pypi.python.org/pypi/ASDCache)
53
55
  [![PyPI - Python versions](https://img.shields.io/pypi/pyversions/ASDCache.svg)](https://pypi.python.org/pypi/ASDCache)
54
- [![PyPI - Downloads](https://img.shields.io/pypi/dw/ASDCache)](https://pypistats.org/packages/ASDCache)
56
+ [![PyPI - Downloads](https://img.shields.io/pypi/dm/ASDCache)](https://pypistats.org/packages/asdcache)
57
+ [![Pepy Total Downloads](https://img.shields.io/pepy/dt/asdcache)](https://pepy.tech/projects/asdcache)
55
58
  [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
56
59
  [![Hatch project](https://img.shields.io/badge/%F0%9F%A5%9A-Hatch-4051b5.svg)](https://github.com/pypa/hatch)
57
60
 
@@ -69,7 +72,7 @@ The main goals and benefits of `ASDCache` are:
69
72
  - [x] Retrieve a consistent schema of the data that represents the 'human readable' format, but enforce strictly numeric data for important columns
70
73
  - [ ] This removes footnotes and other annotations, be sure to check the ASD itself as well for this information.
71
74
  - [x] Use caching to dramatically speed up data retrieval, from minutes down to milliseconds in some cases
72
- - [x] Cache time-to-live is 1 week by default, meaning you still get updates to the ASD in a reasonable time frame
75
+ - [x] Cache time-to-live is two weeks by default, meaning you still get updates to the ASD in a reasonable time frame
73
76
  - [x] The cache time-to-live can be adjusted
74
77
  - [x] Cache data to allow working offline, or even transfering the ASD data to an offline system.
75
78
  - [x] The cache is only updated when a request for new data succeeds
@@ -78,6 +81,7 @@ The main goals and benefits of `ASDCache` are:
78
81
  `ASDCache` is not affiliated with NIST or the NIST ASD in any way, it simply tries to help make it more accessible.
79
82
 
80
83
  ## Installing
84
+
81
85
  `ASDCache` can be installed with `pip`.
82
86
 
83
87
  ```console
@@ -99,6 +103,7 @@ Installing the `polars` feature is not required, in case `polars` is already ins
99
103
  Documentation for `ASDCache` is available on [this page](https://antoinetue.github.io/ASDCache).
100
104
 
101
105
  ### Example
106
+
102
107
  A brief example below demonstrates how to use `SpectraCache` to query the NIST ASD for spectroscopic data for different species and plot their respective relative intensities.
103
108
 
104
109
  Note that these relative intensities are in principle not comparable between different species or sources and merely serve as a guide.
@@ -24,34 +24,35 @@ classifiers = [
24
24
  "Programming Language :: Python :: 3.11",
25
25
  "Programming Language :: Python :: 3.12",
26
26
  "Programming Language :: Python :: 3.13",
27
+ "Programming Language :: Python :: 3.14",
27
28
  ]
28
- dependencies = ["requests","requests_cache", "pandas","numpy", "bs4"]
29
+ dependencies = ["requests_cache>=1.2.0", "pandas>=2.0","numpy>=1.20.3", "beautifulsoup4>=4.12"]
29
30
  dynamic = ["version"]
30
31
 
31
32
  [project.optional-dependencies]
32
- polars = ["polars"]
33
+ polars = ["polars[pandas]"]
34
+ polars-compat = ["polars[rtcompat,pandas]"]
33
35
  docs = [
34
- "mkdocs",
36
+ "properdocs>=1.6.7",
37
+ "mkdocs-material==9.7.6",
35
38
  "mkdocs-autorefs",
36
- "mkdocs-gen-files",
39
+ # "mkdocs-gen-files",
37
40
  "mkdocs-git-revision-date-localized-plugin",
38
41
  "mkdocs-include-markdown-plugin",
39
- "mkdocs-jupyter",
40
- "mkdocs-literate-nav",
41
- "mkdocs-material",
42
+ "mkdocs-jupyter>=0.26.3",
43
+ # "mkdocs-literate-nav",
42
44
  "mkdocs-section-index",
43
45
  "mkdocstrings",
44
- "mkdocstrings-python",
45
- "black"
46
+ "mkdocstrings-python-xref>=2.1.1",
47
+ "mkdocs-api-autonav",
48
+ "ruff>=0.15.13",
49
+ "pygments>=2.20.0"
46
50
  ]
47
51
 
48
52
  [project.urls]
49
53
  Documentation = "https://antoinetue.github.io/asdcache"
50
54
  Source = "https://github.com/AntoineTUE/asdcache"
51
55
 
52
- [tool.hatch.metadata]
53
- # direct dependency references, e.g `pip @ git+https://github.com/pypa/pip.git@master`
54
- allow-direct-references = true
55
56
 
56
57
  [tool.hatch.version]
57
58
  source = "vcs"
@@ -70,7 +71,7 @@ exclude = ["/.github"]
70
71
  minversion = "6.0"
71
72
  addopts = "-ra -q --doctest-glob='*.md'"
72
73
  testpaths = ["tests"]
73
- markers = ["full: test using the full NIST ASD"]
74
+ markers = ["online: run test that retrieve data online from the ASD"]
74
75
 
75
76
  [tool.coverage.run]
76
77
  branch = true
@@ -110,7 +111,7 @@ extend-exclude = ["docs/assets/scripts/gen_ref_pages.py"]
110
111
 
111
112
  [tool.ruff.lint]
112
113
  select = ["E4", "E7", "E9", "F","C4", "SIM", "NPY", "PD","B","UP","D"]
113
- ignore = ["PD901","F401"]
114
+ ignore = ["F401"]
114
115
 
115
116
  [tool.ruff.lint.pydocstyle]
116
117
  convention = "pep257"
@@ -138,16 +139,6 @@ dependencies = ["matplotlib", "ipython","ipykernel","pre-commit"]
138
139
  installer = "uv"
139
140
  features = ["polars"]
140
141
 
141
- [tool.hatch.envs.test]
142
- dependencies = [
143
- "coverage[toml]>=6.2",
144
- "pytest",
145
- "pytest-cov",
146
- "pytest-mock",
147
- "pytest-recording",
148
- "pytest-sugar",
149
- "hypothesis",
150
- ]
151
142
 
152
143
  [tool.hatch.envs.hatch-test]
153
144
  randomize = false
@@ -155,6 +146,22 @@ parallel = false # avoid cache access conflicts
155
146
  retries = 2
156
147
  retry-delay = 1
157
148
  features = ["polars"]
149
+ dependencies = [
150
+ "coverage-enable-subprocess==1.0",
151
+ 'coverage[toml]>=6.2,<7.11; python_version<"3.10"',
152
+ 'coverage[toml]~=7.11; python_version>="3.10"',
153
+ 'pytest~=8.4; python_version<"3.10"',
154
+ 'pytest~=9.0; python_version>="3.10"',
155
+ "pytest-mock~=3.12",
156
+ "pytest-randomly~=3.15",
157
+ "pytest-rerunfailures~=14.0",
158
+ "pytest-xdist[psutil]~=3.5",
159
+ 'pytest-cov~=7.1.0; python_version>="3.10"',
160
+ "pytest-recording",
161
+ "pytest-sugar~=1.1.1",
162
+ "hypothesis",
163
+ ]
164
+
158
165
 
159
166
  [tool.hatch.envs.docs]
160
167
  skip-install = true
@@ -162,13 +169,13 @@ features = ["docs"]
162
169
  dependencies = ["mike"]
163
170
 
164
171
  [tool.hatch.envs.docs.scripts]
165
- serve = "mkdocs serve -f mkdocs.yml {args}"
166
- build = "mkdocs build --clean -f mkdocs.yml {args}"
167
- ci-build = "mike deploy --config-file mkdocs.yml --update-aliases {args}"
172
+ serve = "properdocs serve -f mkdocs.yml {args}"
173
+ build = "properdocs build --clean -f mkdocs.yml {args}"
174
+ ci-build = "mike deploy --config-file mkdocs.yml {args}"
168
175
 
169
176
  [tool.hatch.envs.lint]
170
177
  template = "lint"
171
- dependencies = ["ruff>=0.7.0"]
178
+ dependencies = ["ruff>=0.15.13"]
172
179
 
173
180
  [tool.hatch.envs.lint.scripts]
174
181
  style = [
@@ -182,5 +189,35 @@ fix = [
182
189
  "style", # feedback on what is not fixable
183
190
  ]
184
191
 
192
+
193
+ [tool.hatch.envs.hatch-test.overrides]
194
+ matrix.pandas.dependencies = [
195
+ { value = "pandas>=2.0", if = ["pandas-2.x"] },
196
+ { value = "pandas>=3.0", if = ["pandas-3.x"] },
197
+ ]
198
+ matrix.numpy.dependencies = [
199
+ { value = "numpy>=2.0", if = ["numpy-2.x"] },
200
+ {value = "numpy<2.0", if = ["numpy-legacy"]},
201
+ ]
202
+ matrix.polars.features = [
203
+ { value = "polars", if = ["polars"]},
204
+ { value = "polars-compat", if = ["polars-compat"]},
205
+ ]
206
+
207
+ [[tool.hatch.envs.hatch-test.matrix]]
208
+ python = ["3.9","3.10"]
209
+ numpy = ["numpy-2.x","numpy-legacy"]
210
+ pandas = ["pandas-2.x"]
211
+ polars = ["polars","polars-compat"]
212
+
213
+ [[tool.hatch.envs.hatch-test.matrix]]
214
+ python = ["3.11","3.12","3.13", "3.14"]
215
+ numpy = ["numpy-2.x","numpy-legacy"]
216
+ pandas = ["pandas-2.x","pandas-3.x"]
217
+ polars = ["polars","polars-compat"]
218
+
185
219
  [[tool.hatch.envs.hatch-test.matrix]]
186
- python = ["3.9", "3.10", "3.11", "3.12","3.13"]
220
+ python = ["3.13", "3.14"]
221
+ numpy = ["numpy-2.x"]
222
+ pandas = ["pandas-2.x","pandas-3.x"]
223
+ polars = ["polars","polars-compat"]
@@ -1,10 +0,0 @@
1
- """ASDCache is a module to retrieve data from the NIST Atomic Spectra Database that uses caching for fast local access.
2
-
3
- To make the most use out of the cache, `ASDCache` is opinionated in the information it retrieves from the ASD; it always requests the same schema of information and locally computes additional fields.
4
-
5
- The `SpectraCache` class acts as the entrypoint to retrieve this data.
6
- """
7
-
8
- from .ASDCache import SpectraCache, BibCache
9
-
10
- __all__ = ["SpectraCache", "BibCache"]
@@ -1,21 +0,0 @@
1
- # file generated by setuptools-scm
2
- # don't change, don't track in version control
3
-
4
- __all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
5
-
6
- TYPE_CHECKING = False
7
- if TYPE_CHECKING:
8
- from typing import Tuple
9
- from typing import Union
10
-
11
- VERSION_TUPLE = Tuple[Union[int, str], ...]
12
- else:
13
- VERSION_TUPLE = object
14
-
15
- version: str
16
- __version__: str
17
- __version_tuple__: VERSION_TUPLE
18
- version_tuple: VERSION_TUPLE
19
-
20
- __version__ = version = '0.2.2'
21
- __version_tuple__ = version_tuple = (0, 2, 2)
File without changes
File without changes