PyPI - ASDCache - Versions diffs - 0.2.0__tar.gz → 0.2.3__tar.gz - Mend

ASDCache 0.2.0tar.gz → 0.2.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

{asdcache-0.2.0 → asdcache-0.2.3}/ASDCache/ASDCache.py +208 -124
asdcache-0.2.3/ASDCache/__init__.py +61 -0
asdcache-0.2.3/ASDCache/_version.py +24 -0
{asdcache-0.2.0 → asdcache-0.2.3}/PKG-INFO +17 -16
{asdcache-0.2.0 → asdcache-0.2.3}/pyproject.toml +59 -32
asdcache-0.2.0/ASDCache/__init__.py +0 -10
asdcache-0.2.0/ASDCache/_version.py +0 -16
{asdcache-0.2.0 → asdcache-0.2.3}/.gitignore +0 -0
{asdcache-0.2.0 → asdcache-0.2.3}/LICENSE +0 -0

{asdcache-0.2.0 → asdcache-0.2.3}/ASDCache/ASDCache.py RENAMED Viewed

@@ -1,43 +1,12 @@
-r"""`ASDcache` is a module to fetch data from the  NIST Atomic Spectra Database (ASD), utlizing caching for fast responses.
+"""The ASDCache module.
-To make the most use out of the cache, `ASDcache` is opinionated in the information it retrieves from the ASD; it always requests the same schema of information and locally computes additional fields.
-Data is initially fetched from the online published NIST page, using the tab-separated ASCII output format.
-The benefit of this format is that it is more 'machine readable' than the formatted ASCII of HTML options.
-This means it requires far less bespoke parsing to get rid of 'human readable' features such as repeated page column headers, or empty lines.
-To ensure a consistent schema of the retrieved data, lines are always retrieved as a function of wavelength, using `vacuum wavelength`, even between 200 to 2000 nm.
-Wavenumbers and Ritz wavelength will be included in the response.
-In the range $5000 \mathrm{cm}^{-1}<\nu<50000 \mathrm{cm}^{-1}$ the air equivalent observed and Ritz wavelengths are calculated using the same Sellmeier equation as the NIST ASD (see [here][ASDcache.readASD.ASDCache.wn_to_n_refractive]).
-This is consistent with the approach of the ASD.
-Each response from the NIST page is cached (1 week by default) on the local system.
-This makes it much faster to load the same data, even across different script runs and/or user programs/sessions.
-As an example: reading all spectra between 200 and 1000 nm can take over 2 minutes without using the cache, but can be as fast as 0.2 seconds using the `polars` backend.
-In addition, it means that an internet connection is not required after initial data fetching.
-The cached response is only updated upon succesfull retrieval of a new response of the NIST page.
-If unable to succesfully fetch new data, we fall back to a 'stale' cached response.
-The cache can be shared to another system, to give offline/airgapped systems access to the same data.
-To that end, the file `NIST_ASD_cache.sqlite` in the user's cache directory has to be copied over.
-The standard cache directories are as follows:
-=== "Windows"
-    `%USERPROFILE%/AppData/Local`
-=== "Linux"
-    `~/.cache/http_cache/`
-=== "MacOS"
-    `/Users/user/Library/Caches/http_cache/`
-Queries to the NIST ASD are hashed by the keys (or parameters) of the requests.
-This means that any change to either one of these parameters, will result in a new cache entry, even if the returned data is equivalent.
+It contains both the [SpectraCache][(m).] and [BibCache][(m).] classes which allow you to interact with the ASD and the relevant bibliographic databases.
 """
-import importlib
+import importlib.util
 import warnings
 import pandas as pd
-from requests_cache import CachedSession, CachedResponse
+from requests_cache import CachedSession, Response
 from io import StringIO
 from datetime import timedelta
 import re
@@ -45,7 +14,7 @@ import numpy as np
 from bs4 import BeautifulSoup
 import sys
 import logging
-from typing import Any, Optional
+from typing import Any, Optional, Union
 if importlib.util.find_spec("polars"):
     POLARS_AVAILABLE = True
@@ -61,6 +30,46 @@ logging.basicConfig(
     stream=sys.stdout,
 )
+ASDSchema = {
+    "element": str,
+    "sp_num": int,
+    "obs_wl_vac(nm)": float,
+    "unc_obs_wl": float,
+    "obs_wl_air(nm)": float,
+    "ritz_wl_vac(nm)": float,
+    "unc_ritz_wl": float,
+    "ritz_wl_air(nm)": float,
+    "wn(cm-1)": float,
+    "intens": float,
+    "Aki(s^-1)": float,
+    "fik": float,
+    "S(a.u.)": float,
+    "log_gf": float,
+    "Acc": str,
+    "Ei(cm-1)": float,
+    "Ek(cm-1)": float,
+    "conf_i": str,
+    "term_i": str,
+    "J_i": str,
+    "conf_k": str,
+    "term_k": str,
+    "J_k": str,
+    "g_i": float,
+    "g_k": float,
+    "Type": str,
+    "tp_ref": str,
+    "line_ref": str,
+}
+STATE_EXPR = r"spectra=([\w]+)\+?([IVX]+)?"
+"""Regex pattern for extracting (element,charge) tuple for a single-state query, which uses roman numerals."""
+SCI_EXPR = r"([+-]?\d*\.?\d+(?:[eE][+-]?\d+)?)"
+"""Regex pattern for processing scientific notation"""
+class ASDQueryError(Exception):
+    """Exception raised when the NIST ASD has indicated an error with a query."""
 class SpectraCache:
     """A class acting as the entrypoint to retrieve data from the NIST Atomic Spectra Database that uses caching.
@@ -79,6 +88,7 @@ class SpectraCache:
     nist_url = "https://physics.nist.gov/cgi-bin/ASD/lines1.pl"
     species_expr = re.compile(r"spectra=([\w\+\-\%3]+)&")
     query_params = {
+        "submit": "Retrieve Data",
         "unit": 1,
         "de": 0,
         "plot_out": 0,
@@ -107,10 +117,11 @@ class SpectraCache:
         "enrg_out": "on",
         "J_out": "on",
         "g_out": "on",
-        "diag_out": "on",
+        # "diag_out": "on",  # avoid diagnostic data, it leads to multi-species queries failing; which can appear as if keys below are needed. See issue #1
         "allowed_out": 1,
         "forbid_out": 1,
-        "submit": "Retrieve Data",
+        # "show_diff_obs_calc": 1, # Does not appear mandatory in retrospect,  see issue #1
+        # "include_Ritz_E1": 1, # Does not appear mandatory in retrospect,  see issue #1
     }
     """Request parameters used by the NIST ASD form."""
     column_order = [
@@ -148,7 +159,6 @@ class SpectraCache:
     def __init__(self, use_polars_backend=False, cache_expiry=timedelta(weeks=1), strict_matching=True):
         """Initialize an instance that handles cached data lookup of the NIST ASD."""
         self.strict_matching = strict_matching
-        self.cache_expiry = cache_expiry
         self.session = CachedSession(
             "NIST_ASD_cache",
             use_cache_dir=True,
@@ -165,13 +175,82 @@ class SpectraCache:
         self.known_species = self.list_cached_species()
+    @property
+    def cache_expiry(self) -> timedelta:
+        """The cache expiry time.
+        Queries that are older than this time are considered stale and marked for updating, by quering the NIST ASD.
+        In case the query for new data fails, the stale, cached response will still be parsed.
+        """
+        return self.session.settings.expire_after
+    def set_cache_expiry(self, new: Optional[timedelta] = None, **kwargs):
+        """Set the cache expiry to a different interval (default: 1 week).
+        Can be done by either passing in a `timedelta` object, or valid keyword arguments for `timedelta` itself.
+        """
+        if new is None:
+            new = timedelta(**kwargs)
+        self.session.settings.expire_after = new
     @staticmethod
-    def _check_response_success(response: "CachedResponse") -> bool:
+    def _check_response_success(response: Response) -> bool:
         """Validate that data has been fetched succesfully.
         If this check fails, the cache should not update with this response, even when marked as stale.
+        The first obvious way to check success is if an error is indicated by the HTTP status code.
+        However, when a query for data is incorrect, the NIST ASD returns a HTML page indicating `<title>NIST ASD : Input Error</title>` in the `<head>` tag, or "Error Message".
+        A successfull query would not receive HTML as a response, but raw ASCII values instead.
+        We can thus check for the start of a HTML document.
+        Note that this only works for data queries, not for bibliographic metadata by `BibCache`.
         """
-        return (response.status_code == 200) & (b"Error Message" not in response.content)
+        return not (
+            not response.ok or response.content.startswith(b"<!DOCTYPE") or b"Error Message" in response.content
+        )
+    def _get_data(self, species: str, wl_range: tuple[float, float] = (170, 1000), **kwargs) -> Response:
+        """Retrieve raw, ASCII-formatted data from the NIST ASD with a GET request.
+        To retrieve data and parse it into a DataFrame, use [fetch][..] instead.
+        Returns the raw response, which will be cached if it contains valid data (see [_check_response_success][..]).
+        If the response does not contain ASCII data, but HTML instead, an [ASDQueryError][(m).] will be raised.
+        It is possible to override any standard query parameter (see [query_params][..]]) by passing them as kwargs.
+        """
+        query_params = {
+            "spectra": species,
+            "output_type": 0,
+            "low_w": min(wl_range),
+            "upp_w": max(wl_range),
+            **{k: v for k, v in self.query_params.items() if k not in kwargs},
+            **{k: v for k, v in kwargs.items() if k in self.query_params},
+        }
+        response: Response = self.session.get(self.nist_url, params=query_params)
+        response.raise_for_status()
+        # Check if response is not a HTML document instead of ASCII formatted data, indicating query error.
+        if response.content.startswith(b"<!DOCTYPE"):
+            logging.error(
+                "NIST ASD responded with HTML instead of ASCII-data for species=%s, wl_range=%s\nQuery: %s",
+                species,
+                wl_range,
+                response.url,
+            )
+            raise ASDQueryError(
+                f"Query for {species=} {wl_range=} did not receive ASCII-data. This means the ASD could not interpret your query. Check if your query is malformed."
+            )
+        return response
+    @property
+    def cached_species(self) -> list[str]:
+        """A list of all cached species."""
+        return self.list_cached_species()
     def list_cached_species(self) -> list[str]:
         """List all species in the cache, based on the string of the original query URL."""
@@ -181,35 +260,21 @@ class SpectraCache:
             for elem in self.species_expr.search(u).group(1).split("%3B")
         ]
-    def fetch(self, species, wl_range=(170, 1000), **kwargs) -> "pd.DataFrame|pl.DataFrame|CachedResponse":
+    def fetch(self, species, wl_range=(170, 1000), **kwargs) -> "pd.DataFrame|pl.DataFrame":
         """Fetch information on a species from the ASD, first checking the cache.
-        This supports loading multiple species in one go by using the same notation as the NIST ASD page.
+        This supports loading multiple species in one go by using the same notation as the NIST ASD form.
         Note however that cache keys are computed for unique options for `species` and `wl_range`.
         This means that you won't get caching benefits by using different queries.
-        In other words: the cache cannot deduplicate queries such as `ASD.fetch('H', (200,1000))` followed by `ASD.fetch('H I', (650,660))`.
+        In other words: the cache cannot deduplicate queries such as `ASD.fetch('H', (200,1000))` followed by `ASD.fetch('H I', (650,660))` (or vice versa).
         Both these operations will fetch data online and be stored as separate cache entries.
         """
-        query_params = {
-            "spectra": species,
-            "output_type": 0,
-            "low_w": min(wl_range),
-            "upp_w": max(wl_range),
-            **self.query_params,
-        }
-        response = self.session.get(self.nist_url, params=query_params)
-        # if response.status_code == 200:
-        response.raise_for_status()
+        response = self._get_data(species, wl_range, **kwargs)
         return self.create_dataframe(response)
-        # else:
-        #     print(f"Error: Received status code {response.status_code}")
-        #     print(response.url)
-        #     return response
     def create_dataframe(self, response) -> "pd.DataFrame|pl.DataFrame":
         """Create a dataframe from the (cached) NIST ASD response, using the chosen backend at class instantiation."""
@@ -218,7 +283,7 @@ class SpectraCache:
         return self._from_pandas(response)
     @classmethod
-    def _from_pandas(cls, response: "CachedResponse") -> "pd.DataFrame":
+    def _from_pandas(cls, response: Response) -> "pd.DataFrame":
         r"""Transform a (cached) NIST ASD response into a pandas DataFrame.
         Calculates the air equivalent wavelength from the vacuum wavelength using the same Sellmeier equation as the NIST ASD.
@@ -247,12 +312,24 @@ class SpectraCache:
             "g_k": float,
             "J_i": str,
             "J_k": str,
+            "Type": str,
+            "tp_ref": str,
+            "line_ref": str,
             "": str,
         }
         df = pd.read_csv(StringIO(response.text), sep="\t", dtype=schema)
+        # Detect if pandas uses new `StringDtype`, or legacy `object` dtype for strings.
+        # This affects NaN handling for strings.
+        # Pandas 3.0 and up use the StringDtype, while pandas 2 can opt-in to this
+        # The 'Type' column should exist, 'element' may not.
+        uses_new_string_dtype = pd.api.types.is_string_dtype(df["Type"])
         for col in ["obs_wl_vac(nm)", "ritz_wl_vac(nm)", "intens", "Ei(cm-1)", "Ek(cm-1)"]:
-            df[col] = df.loc[:, col].str.extract(r"([+-]?\d*\.?\d+(?:[eE][+-]?\d+)?)").astype(float)
-        df["Type"] = df.loc[:, "Type"].astype(str).replace("nan", "E1")
+            df[col] = df.loc[:, col].str.extract(SCI_EXPR).astype(float)
+        # Any missing value implies line is an E1 (electric dipole) transition
+        if uses_new_string_dtype:
+            df["Type"] = df.loc[:, "Type"].fillna("E1")
+        else:
+            df["Type"] = df.loc[:, "Type"].astype(str).replace("nan", "E1")
         df["tp_ref"] = df.loc[:, "tp_ref"].fillna("")
         df["obs_wl_air(nm)"] = df["obs_wl_vac(nm)"]
         df["obs_wl_air(nm)"] = df[df["wn(cm-1)"].between(5000, 50000)]["obs_wl_air(nm)"] / cls.wn_to_n_refractive(
@@ -264,21 +341,18 @@ class SpectraCache:
         )
         df = df.drop([c for c in df.columns if "Unnamed" in c], axis=1).reset_index(drop=True)
         if "element" not in df.columns:
-            expr = re.compile(r"spectra=([\w]+)\+?([IVX]+)?")
-            element, numeral = expr.search(response.url).groups()
-            df["element"] = element
-            df["sp_num"] = numeral
             # cast roman numerals to int for consistency with queries with multiple ionization states, e.g. Ar I vs Ar I-II
-            df["sp_num"] = df["sp_num"].map(cls.roman_to_int)
-        df = (
-            df.assign(unc_obs_wl=df["unc_obs_wl"].astype(float), unc_ritz_wl=df["unc_ritz_wl"].astype(float))
-            if "unc_obs_wl" in df.columns
-            else df.assign(unc_obs_wl=np.nan, unc_ritz_wl=np.nan)
-        )
+            # As 'element' and 'sp_num' columns are only missing for single-species queries, assign as constants, not vectors.
+            element, numeral = re.search(STATE_EXPR, response.url).groups()
+            numeric: int = cls.roman_to_int(numeral)
+            df["element"] = element
+            df["sp_num"] = numeric
+        df["unc_obs_wl"] = pd.to_numeric(df["unc_obs_wl"]) if "unc_obs_wl" in df.columns else np.nan
+        df["unc_ritz_wl"] = pd.to_numeric(df["unc_ritz_wl"]) if "unc_ritz_wl" in df.columns else np.nan
         return df.loc[:, cls.column_order]
     @classmethod
-    def _from_polars(cls, response: "CachedResponse") -> "pl.DataFrame":
+    def _from_polars(cls, response: Response) -> "pl.DataFrame":
         r"""Transform a (cached) NIST ASD response into a polars DataFrame.
         Calculates the air equivalent wavelength from the vacuum wavelength using the same Sellmeier equation as the NIST ASD.
@@ -309,28 +383,25 @@ class SpectraCache:
             "J_k": pl.String,
             "": pl.String,
         }
-        # annotation_chars_to_strip = "(?i)()[]?*w,bGhilmprsq:+xzgacHd "
-        df = (
-            pl.read_csv(
-                StringIO(response.text),
-                separator="\t",
-                schema_overrides=schema,
-                null_values="",
-            )
-            .with_columns(
-                pl.col("obs_wl_vac(nm)", "Ei(cm-1)", "Ek(cm-1)", "intens")
-                # .str.strip_chars(annotation_chars_to_strip).str.replace("&dagger;", "", literal=True)
-                .str.extract(r"([+-]?\d*\.?\d+(?:[eE][+-]?\d+)?)")
-                # .str.extract(r"([+-]?\d*\.?\d+e[+-]?\d+)")
-                .replace("", None)
-                .cast(pl.Float64),
-                pl.col("ritz_wl_vac(nm)").str.strip_chars('"+*').replace("", None).cast(pl.Float64),
-                pl.col("S(a.u.)").cast(pl.Float64),
-                pl.col("Type").replace(None, "E1"),
-                pl.col("tp_ref").replace(None, ""),
-            )
-            .drop([""])
-        ).with_columns(
+        df = pl.read_csv(
+            StringIO(response.text),
+            separator="\t",
+            schema_overrides=schema,
+            null_values="",
+        )
+        sci_cols = ["obs_wl_vac(nm)", "Ei(cm-1)", "Ek(cm-1)", "intens", "ritz_wl_vac(nm)"]
+        cast_to_scientific_notation = [
+            pl.col(c).str.extract(SCI_EXPR).replace("", None).cast(pl.Float64).alias(c) for c in sci_cols
+        ]
+        df = df.with_columns(
+            *cast_to_scientific_notation,
+            pl.col("S(a.u.)").cast(pl.Float64),
+            pl.col("Type").replace(None, "E1"),
+            pl.col("tp_ref").replace(None, ""),
+        ).drop([""])
+        # compute air wavelengths between 5000 cm-1 and 50000 cm-1
+        df = df.with_columns(
             pl.when(pl.col("wn(cm-1)").is_between(5000, 50000))
             .then(
                 pl.col("obs_wl_vac(nm)").cast(pl.Float64)
@@ -349,24 +420,12 @@ class SpectraCache:
             .alias("ritz_wl_air(nm)"),
         )
         if "element" not in df.columns:
-            expr = re.compile(r"spectra=([\w]+)\+?([IVX]+)?")
-            element, numeral = expr.search(response.url).groups()
+            element, numeral = re.search(STATE_EXPR, response.url).groups()
+            numeric: int = cls.roman_to_int(numeral) if numeral else 1
             # cast roman numerals to int for consistency with queries with multiple ionization states, e.g. Ar I vs Ar I-II
-            df = df.with_columns(
-                pl.lit(element).alias("element"),
-                pl.lit("I" if numeral is None else numeral)
-                .cast(pl.String)
-                .alias("sp_num")
-                .map_elements(cls.roman_to_int, return_dtype=pl.Int64),
-            )
-        df = (
-            df.with_columns(pl.col("unc_obs_wl").cast(pl.Float64), pl.col("unc_ritz_wl").cast(pl.Float64))
-            if "unc_obs_wl" in df.columns
-            else df.with_columns(
-                pl.lit(None).cast(pl.Float64).alias("unc_obs_wl"), pl.lit(None).cast(pl.Float64).alias("unc_ritz_wl")
-            )
-        )
+            df = df.with_columns(pl.lit(element).alias("element"), pl.lit(numeric, dtype=pl.Int64).alias("sp_num"))
+        exprs = [pl.col(c).cast(pl.Float64) for c in ["unc_obs_wl", "unc_ritz_wl"] if c in df.columns]
+        df = df.with_columns(exprs)
         return df.select(*cls.column_order)
     @staticmethod
@@ -408,8 +467,16 @@ class SpectraCache:
         """Retrieve all cached data into a single dataframe."""
         cached_frames = [self.create_dataframe(cached) for cached in self.session.cache.filter()]
         if self.use_polars:
-            return pl.concat(cached_frames).unique()
-        return pd.concat(cached_frames).drop_duplicates().reset_index(drop=True)
+            return (
+                pl.concat(cached_frames).unique()
+                if len(cached_frames) > 0
+                else pl.DataFrame({k: [] for k in ASDSchema}, schema=ASDSchema)
+            )
+        return (
+            pd.concat(cached_frames).drop_duplicates().reset_index(drop=True)
+            if len(cached_frames) > 0
+            else pd.DataFrame({k: pd.Series(dtype=v) for k, v in ASDSchema.items()})
+        )
 class BibCache:
@@ -428,7 +495,6 @@ class BibCache:
     def __init__(self, cache_expiry=timedelta(weeks=1)):
         """Initialize an instance that handles cached retrieval of ASD bibliographic references."""
-        self.cache_expiry = cache_expiry
         self.session = CachedSession(
             "NIST_ASD_Bibliography_cache",
             use_cache_dir=True,
@@ -438,8 +504,26 @@ class BibCache:
             ignored_parameters=["element", "spectr_charge", "type", "ref"],
         )
+    @property
+    def cache_expiry(self) -> timedelta:
+        """The cache expiry time.
+        Queries that are older than this time are considered stale and marked for updating, by quering the NIST ASD.
+        In case the query for new data fails, the stale, cached response will still be parsed.
+        """
+        return self.session.settings.expire_after
+    def set_cache_expiry(self, new: Optional[timedelta] = None, **kwargs):
+        """Set the cache expiry to a different interval (default: 1 week).
+        Can be done by either passing in a `timedelta` object, or valid keyword arguments for `timedelta` itself.
+        """
+        if new is None:
+            new = timedelta(**kwargs)
+        self.session.settings.expire_after = new
     @staticmethod
-    def _check_response_success(response: "CachedResponse") -> bool:
+    def _check_response_success(response: Response) -> bool:
         """Validate that data has been fetched succesfully.
         If this check fails, the cache should not update with this response, even when marked as stale.
@@ -454,12 +538,12 @@ class BibCache:
         r"""Parse a reference code from the NIST ASD into the constituent parts that can be used to look up references.
         Args:
-            * reference_code (str): A NIST ASD bibliographic reference string, such as `L13456n3`, or `T6936n`.
+            reference_code (str): A NIST ASD bibliographic reference string, such as `L13456n3`, or `T6936n`.
         Returns:
-            * db    (str)   :   A label for which bibliographic database to target
-            * ref   (str)   :   The database ID for the reference to look up
-            * comment (str) :   An additional comment included in the reference, can be fetched separately.
+            db (str):   A label for which bibliographic database to target
+            ref (str|None):   The database ID for the reference to look up
+            comment (str):   An additional comment included in the reference, can be fetched separately.
         """
         if reference_code.startswith("n"):
             db, ref, comment = "T", None, "n"
@@ -474,12 +558,12 @@ class BibCache:
         """Look up a reference code for a given element state.
         Args:
-            element (str)           :   The element name, e.g. `H`
-            sp_num (int)            :   The ionization state of the element, with 1 corresponding to the atom
-            reference_code (str)    :   The bibliographic reference code from the ASD columns `tp_ref` or `line_ref`.
+            element (str):   The element name, e.g. `H`
+            sp_num (int):   The ionization state of the element, with 1 corresponding to the atom
+            reference_code (str):   The bibliographic reference code from the ASD columns `tp_ref` or `line_ref`.
         Returns:
-            bib_data (dict)         : A dictionary containing bibliographic metadata for the reference, if available/applicable. Contains a url to look it up.
+            bib_data (dict[str,Any]): A dictionary containing bibliographic metadata for the reference, if available/applicable. Contains a url to look it up.
         """
         db, ref, comment = self.parse_reference_code(reference_code)
         params = {

asdcache-0.2.3/ASDCache/__init__.py ADDED Viewed

@@ -0,0 +1,61 @@
+r"""`ASDcache` is a package to fetch data from the  NIST Atomic Spectra Database (ASD), utlizing caching for fast responses.
+To make the most use out of the cache, `ASDcache` is opinionated in the information it retrieves from the ASD; it always requests the same schema of information and locally computes additional fields.
+Data is initially fetched from the online published NIST page, using the tab-separated ASCII output format.
+The benefit of this format is that it is more 'machine readable' than the formatted ASCII of HTML options.
+This means it requires far less bespoke parsing to get rid of 'human readable' features such as repeated page column headers, or empty lines.
+## Air wavelength
+To ensure a consistent schema of the retrieved data, lines are always retrieved as a function of wavelength, using `vacuum wavelength`, even between 200 to 2000 nm.
+Wavenumbers and Ritz wavelength will be included in the response.
+In the range $5000\ \mathrm{cm}^{-1}<\nu<50000\ \mathrm{cm}^{-1}$ the air equivalent observed and Ritz wavelengths are calculated using the same Sellmeier equation as the NIST ASD (see [here][.SpectraCache.wn_to_n_refractive]).
+This is consistent with the approach of the ASD.
+## Making use of the cache
+Each response from the NIST page is cached (1 week by default) on the local system.
+This makes it much faster to load the same data, even across different script runs and/or user programs/sessions.
+As an example: retrieving and parsing the data for all spectra between 200 and 1000 nm can take over 2 minutes without using the cache, but can be as fast as 0.2 seconds using the `polars` backend.
+In addition, it means that an internet connection is not required after initial data fetching.
+The cached response is only updated upon succesfull retrieval of a new response of the NIST page.
+If unable to succesfully fetch new data, we fall back to a 'stale' cached response.
+The cache can be shared to another system, to give offline/airgapped systems access to the same data.
+To that end, the file `NIST_ASD_cache.sqlite` in the user's cache directory has to be copied over.
+### Default cache locations
+The standard cache directories are as follows:
+=== "Windows"
+    `%USERPROFILE%/AppData/Local`
+=== "Linux"
+    `~/.cache/http_cache/`
+=== "MacOS"
+    `/Users/user/Library/Caches/http_cache/`
+### Cache keys and uniqueness
+Queries to the NIST ASD are hashed by the keys (or parameters) of the requests.
+This means that any change to either one of these parameters, will result in a new cache entry, even if the returned data is equivalent.
+In other words: the cache cannot deduplicate queries such as `SpectraCache().fetch('H', (200,1000))` followed by `SpectraCache().fetch('H I', (650,660))` (or vice versa).
+It is often better (and faster) to fetch a range of data beyond what you need, and then filter down the dataframe you retrieve according to your needs.
+"""
+from .ASDCache import SpectraCache, BibCache
+__all__ = ["SpectraCache", "BibCache"]

asdcache-0.2.3/ASDCache/_version.py ADDED Viewed

@@ -0,0 +1,24 @@
+# file generated by vcs-versioning
+# don't change, don't track in version control
+from __future__ import annotations
+__all__ = [
+    "__version__",
+    "__version_tuple__",
+    "version",
+    "version_tuple",
+    "__commit_id__",
+    "commit_id",
+]
+version: str
+__version__: str
+__version_tuple__: tuple[int | str, ...]
+version_tuple: tuple[int | str, ...]
+commit_id: str | None
+__commit_id__: str | None
+__version__ = version = '0.2.3'
+__version_tuple__ = version_tuple = (0, 2, 3)
+__commit_id__ = commit_id = None

{asdcache-0.2.0 → asdcache-0.2.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ASDCache
-Version: 0.2.0
+Version: 0.2.3
 Summary: A Python module to retrieve data from the NIST Atomic Spectra Database (ASD), using caching for fast, efficient data handling
 Project-URL: Documentation, https://antoinetue.github.io/asdcache
 Project-URL: Source, https://github.com/AntoineTUE/asdcache
@@ -12,32 +12,33 @@ Classifier: Development Status :: 4 - Beta
 Classifier: Intended Audience :: Science/Research
 Classifier: License :: OSI Approved :: MIT License
 Classifier: Operating System :: OS Independent
-Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
 Classifier: Topic :: Scientific/Engineering
 Requires-Python: >=3.9
-Requires-Dist: bs4
-Requires-Dist: numpy
-Requires-Dist: pandas
-Requires-Dist: requests
-Requires-Dist: requests-cache
+Requires-Dist: beautifulsoup4>=4.12
+Requires-Dist: numpy>=2.0
+Requires-Dist: pandas>=2.0
+Requires-Dist: requests-cache>=1.2.0
 Provides-Extra: docs
-Requires-Dist: black; extra == 'docs'
-Requires-Dist: mkdocs; extra == 'docs'
+Requires-Dist: mkdocs-api-autonav; extra == 'docs'
 Requires-Dist: mkdocs-autorefs; extra == 'docs'
-Requires-Dist: mkdocs-gen-files; extra == 'docs'
 Requires-Dist: mkdocs-git-revision-date-localized-plugin; extra == 'docs'
 Requires-Dist: mkdocs-include-markdown-plugin; extra == 'docs'
-Requires-Dist: mkdocs-jupyter; extra == 'docs'
-Requires-Dist: mkdocs-literate-nav; extra == 'docs'
-Requires-Dist: mkdocs-material; extra == 'docs'
+Requires-Dist: mkdocs-jupyter>=0.26.3; extra == 'docs'
+Requires-Dist: mkdocs-material==9.7.6; extra == 'docs'
 Requires-Dist: mkdocs-section-index; extra == 'docs'
 Requires-Dist: mkdocstrings; extra == 'docs'
-Requires-Dist: mkdocstrings-python; extra == 'docs'
+Requires-Dist: mkdocstrings-python-xref>=2.1.1; extra == 'docs'
+Requires-Dist: properdocs>=1.6.7; extra == 'docs'
+Requires-Dist: pygments>=2.20.0; extra == 'docs'
+Requires-Dist: ruff>=0.15.13; extra == 'docs'
 Provides-Extra: polars
-Requires-Dist: polars; extra == 'polars'
+Requires-Dist: polars[pandas]; extra == 'polars'
+Provides-Extra: polars-lts
+Requires-Dist: polars[pandas,rtcompat]; extra == 'polars-lts'
 Description-Content-Type: text/markdown
 # ASDCache
@@ -50,7 +51,7 @@ Description-Content-Type: text/markdown
 [![GitHub Workflow Status docs](https://img.shields.io/github/actions/workflow/status/AntoineTUE/ASDCache/documentation.yml?label=Documentation%20build)](https://antoinetue.github.io/ASDCache)
 [![PyPI - Version](https://img.shields.io/pypi/v/ASDCache)](https://pypi.python.org/pypi/ASDCache)
 [![PyPI - Python versions](https://img.shields.io/pypi/pyversions/ASDCache.svg)](https://pypi.python.org/pypi/ASDCache)
-[![PyPI - Downloads](https://img.shields.io/pypi/dw/ASDCache)](https://pypistats.org/packages/ASDCache)
+[![PyPI - Downloads](https://img.shields.io/pypi/dm/ASDCache)](https://pypistats.org/packages/asdcache)
 [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
 [![Hatch project](https://img.shields.io/badge/%F0%9F%A5%9A-Hatch-4051b5.svg)](https://github.com/pypa/hatch)

{asdcache-0.2.0 → asdcache-0.2.3}/pyproject.toml RENAMED Viewed

@@ -19,38 +19,38 @@ classifiers = [
     "Topic :: Scientific/Engineering",
     "Intended Audience :: Science/Research",
     "Operating System :: OS Independent",
-    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
 ]
-dependencies = ["requests","requests_cache", "pandas","numpy", "bs4"]
+dependencies = ["requests_cache>=1.2.0", "pandas>=2.0","numpy>=2.0", "beautifulsoup4>=4.12"]
 dynamic = ["version"]
 [project.optional-dependencies]
-polars = ["polars"]
+polars = ["polars[pandas]"]
+polars-lts = ["polars[rtcompat,pandas]"]
 docs = [
-    "mkdocs",
+    "properdocs>=1.6.7",
+    "mkdocs-material==9.7.6",
     "mkdocs-autorefs",
-    "mkdocs-gen-files",
+    # "mkdocs-gen-files",
     "mkdocs-git-revision-date-localized-plugin",
     "mkdocs-include-markdown-plugin",
-    "mkdocs-jupyter",
-    "mkdocs-literate-nav",
-    "mkdocs-material",
+    "mkdocs-jupyter>=0.26.3",
+    # "mkdocs-literate-nav",
     "mkdocs-section-index",
     "mkdocstrings",
-    "mkdocstrings-python",
-    "black"
+    "mkdocstrings-python-xref>=2.1.1",
+    "mkdocs-api-autonav",
+    "ruff>=0.15.13",
+    "pygments>=2.20.0"
 ]
 [project.urls]
 Documentation = "https://antoinetue.github.io/asdcache"
 Source = "https://github.com/AntoineTUE/asdcache"
-[tool.hatch.metadata]
-# direct dependency references, e.g `pip @ git+https://github.com/pypa/pip.git@master`
-allow-direct-references = true
 [tool.hatch.version]
 source = "vcs"
@@ -109,7 +109,7 @@ extend-exclude = ["docs/assets/scripts/gen_ref_pages.py"]
 [tool.ruff.lint]
 select = ["E4", "E7", "E9", "F","C4", "SIM", "NPY", "PD","B","UP","D"]
-ignore = ["PD901","F401"]
+ignore = ["F401"]
 [tool.ruff.lint.pydocstyle]
 convention = "pep257"
@@ -131,29 +131,36 @@ fragments = [
 cache-keys = [{ git = true }]
 [tool.hatch.envs.default]
-python = "3.9"
+python = "3.12"
 post-install-commands = ["pre-commit install"]
-dependencies = ["matplotlib", "ipython","ipykernel","ruff"]
+dependencies = ["matplotlib", "ipython","ipykernel","pre-commit"]
 installer = "uv"
 features = ["polars"]
-[tool.hatch.envs.test]
-dependencies = [
-    "coverage[toml]>=6.2",
-    "pytest",
-    "pytest-cov",
-    "pytest-mock",
-    "pytest-recording",
-    "pytest-sugar",
-    "hypothesis",
-]
 [tool.hatch.envs.hatch-test]
 randomize = false
 parallel = false # avoid cache access conflicts
 retries = 2
-retry-delay = 2
+retry-delay = 1
 features = ["polars"]
+dependencies = [
+    "coverage-enable-subprocess==1.0",
+    'coverage[toml]>=6.2,<7.11; python_version<"3.10"',
+    'coverage[toml]~=7.11; python_version>="3.10"',
+    'pytest~=8.4; python_version<"3.10"',
+    'pytest~=9.0; python_version>="3.10"',
+    "pytest-mock~=3.12",
+    "pytest-randomly~=3.15",
+    "pytest-rerunfailures~=14.0",
+    "pytest-xdist[psutil]~=3.5",
+    'pytest-cov~=7.1.0; python_version>="3.10"',
+    "pytest-recording",
+    "pytest-sugar~=1.1.1",
+    "hypothesis",
+    'virtualenv<21; python_version<"3.10"',
+]
 [tool.hatch.envs.docs]
 skip-install = true
@@ -161,13 +168,13 @@ features = ["docs"]
 dependencies = ["mike"]
 [tool.hatch.envs.docs.scripts]
-serve = "mkdocs serve -f mkdocs.yml {args}"
-build = "mkdocs build --clean -f mkdocs.yml {args}"
-ci-build = "mike deploy --config-file mkdocs.yml --update-aliases {args}"
+serve = "properdocs serve -f mkdocs.yml {args}"
+build = "properdocs build --clean -f mkdocs.yml {args}"
+ci-build = "mike deploy --config-file mkdocs.yml {args}"
 [tool.hatch.envs.lint]
 template = "lint"
-dependencies = ["ruff>=0.7.0"]
+dependencies = ["ruff>=0.15.13"]
 [tool.hatch.envs.lint.scripts]
 style = [
@@ -181,5 +188,25 @@ fix = [
     "style",  # feedback on what is not fixable
 ]
+[tool.hatch.envs.hatch-test.overrides]
+matrix.pandas.dependencies = [
+    { value = "pandas>=2.0.0", if = ["pandas-2.0"] },
+    { value = "numpy>=2.0", if = ["pandas-2.0"] },
+    { value = "pandas>=3.0.0", if = ["pandas-3.0"] },
+    { value = "numpy>=2.0", if = ["pandas-3.0"] },
+]
+matrix.polars.features = [
+    { value = "polars", if = ["polars"]},
+    { value = "polars-lts", if = ["polars-lts"]},
+]
+[[tool.hatch.envs.hatch-test.matrix]]
+python = ["3.9","3.10"]
+pandas = ["pandas-2.0"]
+polars = ["polars","polars-lts"]
 [[tool.hatch.envs.hatch-test.matrix]]
-python = ["3.9", "3.10", "3.11", "3.12","3.13"]
+python = ["3.11","3.12","3.13"]
+pandas = ["pandas-2.0","pandas-3.0"]
+polars = ["polars","polars-lts"]

asdcache-0.2.0/ASDCache/__init__.py DELETED Viewed

@@ -1,10 +0,0 @@
-"""ASDCache is a module to retrieve data from the NIST Atomic Spectra Database that uses caching for fast local access.
-To make the most use out of the cache, `ASDCache` is opinionated in the information it retrieves from the ASD; it always requests the same schema of information and locally computes additional fields.
-The `SpectraCache` class acts as the entrypoint to retrieve this data.
-"""
-from .ASDCache import SpectraCache, BibCache
-__all__ = ["SpectraCache", "BibCache"]

asdcache-0.2.0/ASDCache/_version.py DELETED Viewed

@@ -1,16 +0,0 @@
-# file generated by setuptools_scm
-# don't change, don't track in version control
-TYPE_CHECKING = False
-if TYPE_CHECKING:
-    from typing import Tuple, Union
-    VERSION_TUPLE = Tuple[Union[int, str], ...]
-else:
-    VERSION_TUPLE = object
-version: str
-__version__: str
-__version_tuple__: VERSION_TUPLE
-version_tuple: VERSION_TUPLE
-__version__ = version = '0.2.0'
-__version_tuple__ = version_tuple = (0, 2, 0)

{asdcache-0.2.0 → asdcache-0.2.3}/.gitignore RENAMED Viewed

File without changes

{asdcache-0.2.0 → asdcache-0.2.3}/LICENSE RENAMED Viewed

File without changes

ASDCache 0.2.0__tar.gz → 0.2.3__tar.gz

ASDCache 0.2.0tar.gz → 0.2.3tar.gz