PyPI - ASDCache - Versions diffs - 0.2.0__tar.gz - Mend

ASDCache 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

asdcache-0.2.0/.gitignore +115 -0
asdcache-0.2.0/ASDCache/ASDCache.py +538 -0
asdcache-0.2.0/ASDCache/__init__.py +10 -0
asdcache-0.2.0/ASDCache/_version.py +16 -0
asdcache-0.2.0/LICENSE +21 -0
asdcache-0.2.0/PKG-INFO +137 -0
asdcache-0.2.0/pyproject.toml +185 -0

asdcache-0.2.0/.gitignore ADDED Viewed

@@ -0,0 +1,115 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# pyenv
+.python-version
+# celery beat schedule file
+celerybeat-schedule
+# SageMath parsed files
+*.sage.py
+# dotenv
+.env
+# virtualenv
+.venv
+venv/
+ENV/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+# IDE settings
+.vscode/
+.idea/
+# Version file generated by hatch-vcs
+src/ASDcache/_version.py
+# MacOS files
+.DS_Store
+dev.ipynb
+.ruff_cache

asdcache-0.2.0/ASDCache/ASDCache.py ADDED Viewed

@@ -0,0 +1,538 @@
+r"""`ASDcache` is a module to fetch data from the  NIST Atomic Spectra Database (ASD), utlizing caching for fast responses.
+To make the most use out of the cache, `ASDcache` is opinionated in the information it retrieves from the ASD; it always requests the same schema of information and locally computes additional fields.
+Data is initially fetched from the online published NIST page, using the tab-separated ASCII output format.
+The benefit of this format is that it is more 'machine readable' than the formatted ASCII of HTML options.
+This means it requires far less bespoke parsing to get rid of 'human readable' features such as repeated page column headers, or empty lines.
+To ensure a consistent schema of the retrieved data, lines are always retrieved as a function of wavelength, using `vacuum wavelength`, even between 200 to 2000 nm.
+Wavenumbers and Ritz wavelength will be included in the response.
+In the range $5000 \mathrm{cm}^{-1}<\nu<50000 \mathrm{cm}^{-1}$ the air equivalent observed and Ritz wavelengths are calculated using the same Sellmeier equation as the NIST ASD (see [here][ASDcache.readASD.ASDCache.wn_to_n_refractive]).
+This is consistent with the approach of the ASD.
+Each response from the NIST page is cached (1 week by default) on the local system.
+This makes it much faster to load the same data, even across different script runs and/or user programs/sessions.
+As an example: reading all spectra between 200 and 1000 nm can take over 2 minutes without using the cache, but can be as fast as 0.2 seconds using the `polars` backend.
+In addition, it means that an internet connection is not required after initial data fetching.
+The cached response is only updated upon succesfull retrieval of a new response of the NIST page.
+If unable to succesfully fetch new data, we fall back to a 'stale' cached response.
+The cache can be shared to another system, to give offline/airgapped systems access to the same data.
+To that end, the file `NIST_ASD_cache.sqlite` in the user's cache directory has to be copied over.
+The standard cache directories are as follows:
+=== "Windows"
+    `%USERPROFILE%/AppData/Local`
+=== "Linux"
+    `~/.cache/http_cache/`
+=== "MacOS"
+    `/Users/user/Library/Caches/http_cache/`
+Queries to the NIST ASD are hashed by the keys (or parameters) of the requests.
+This means that any change to either one of these parameters, will result in a new cache entry, even if the returned data is equivalent.
+"""
+import importlib
+import warnings
+import pandas as pd
+from requests_cache import CachedSession, CachedResponse
+from io import StringIO
+from datetime import timedelta
+import re
+import numpy as np
+from bs4 import BeautifulSoup
+import sys
+import logging
+from typing import Any, Optional
+if importlib.util.find_spec("polars"):
+    POLARS_AVAILABLE = True
+    """Check if `polars` is installed and available in the active environments"""
+    import polars as pl
+else:
+    POLARS_AVAILABLE = False
+logging.basicConfig(
+    level=logging.INFO,
+    format="[%(asctime)s] %(levelname)s [%(name)s.%(funcName)s:%(lineno)d] %(message)s",
+    datefmt="%d/%b/%Y %H:%M:%S",
+    stream=sys.stdout,
+)
+class SpectraCache:
+    """A class acting as the entrypoint to retrieve data from the NIST Atomic Spectra Database that uses caching.
+    The `ASDCache` instance acts as an access point to the cache, which stores responses on the local system in a SQLite database.
+    Data retrieval from cache is much faster (order milliseconds) than fetching from the internet (order seconds), and avoids wastefull requests to the server.
+    Cache time-to-live is one week by default.
+    Since the NIST ASD is usually updated less frequently than that, this is a compromise between having the latest data, and overall fast performance.
+    Note that the same cache is shared across different class-instances, thread-safety is not guaranteed.
+    """
+    nist_url = "https://physics.nist.gov/cgi-bin/ASD/lines1.pl"
+    species_expr = re.compile(r"spectra=([\w\+\-\%3]+)&")
+    query_params = {
+        "unit": 1,
+        "de": 0,
+        "plot_out": 0,
+        "I_scale_type": 1,
+        "format": 3,
+        "line_out": 0,
+        "remove_js": "on",
+        "no_spaces": "on",
+        "en_unit": 0,
+        "output": 0,
+        "bibrefs": 1,
+        "show_obs_wl": 1,
+        "show_calc_wl": 1,
+        "show_wn": 1,
+        "unc_out": 1,
+        "order_out": 0,
+        "show_av": 3,  # 3: wavelength in vac, 2: wavelength in air
+        "tsb_value": 0,
+        "A_out": 0,
+        "S_out": "on",
+        "f_out": "on",
+        "loggf_out": "on",
+        "intens_out": "on",
+        "conf_out": "on",
+        "term_out": "on",
+        "enrg_out": "on",
+        "J_out": "on",
+        "g_out": "on",
+        "diag_out": "on",
+        "allowed_out": 1,
+        "forbid_out": 1,
+        "submit": "Retrieve Data",
+    }
+    """Request parameters used by the NIST ASD form."""
+    column_order = [
+        "element",
+        "sp_num",
+        "obs_wl_vac(nm)",
+        "unc_obs_wl",
+        "obs_wl_air(nm)",
+        "ritz_wl_vac(nm)",
+        "unc_ritz_wl",
+        "ritz_wl_air(nm)",
+        "wn(cm-1)",
+        "intens",
+        "Aki(s^-1)",
+        "fik",
+        "S(a.u.)",
+        "log_gf",
+        "Acc",
+        "Ei(cm-1)",
+        "Ek(cm-1)",
+        "conf_i",
+        "term_i",
+        "J_i",
+        "conf_k",
+        "term_k",
+        "J_k",
+        "g_i",
+        "g_k",
+        "Type",
+        "tp_ref",
+        "line_ref",
+    ]
+    """Fixed order of columns for consistent schema of data."""
+    def __init__(self, use_polars_backend=False, cache_expiry=timedelta(weeks=1), strict_matching=True):
+        """Initialize an instance that handles cached data lookup of the NIST ASD."""
+        self.strict_matching = strict_matching
+        self.cache_expiry = cache_expiry
+        self.session = CachedSession(
+            "NIST_ASD_cache",
+            use_cache_dir=True,
+            expire_after=cache_expiry,
+            stale_if_error=True,
+            filter_fn=self._check_response_success,
+            ignored_parameters=list(self.query_params.keys()) if self.strict_matching is False else None,
+        )
+        if (use_polars_backend) & (not POLARS_AVAILABLE):
+            warnings.warn("Cannot find `polars` as a backend, falling back to `pandas`", stacklevel=2)
+            self.use_polars = False
+        else:
+            self.use_polars = use_polars_backend
+        self.known_species = self.list_cached_species()
+    @staticmethod
+    def _check_response_success(response: "CachedResponse") -> bool:
+        """Validate that data has been fetched succesfully.
+        If this check fails, the cache should not update with this response, even when marked as stale.
+        """
+        return (response.status_code == 200) & (b"Error Message" not in response.content)
+    def list_cached_species(self) -> list[str]:
+        """List all species in the cache, based on the string of the original query URL."""
+        return [
+            elem.replace("+", " ")
+            for u in self.session.cache.urls()
+            for elem in self.species_expr.search(u).group(1).split("%3B")
+        ]
+    def fetch(self, species, wl_range=(170, 1000), **kwargs) -> "pd.DataFrame|pl.DataFrame|CachedResponse":
+        """Fetch information on a species from the ASD, first checking the cache.
+        This supports loading multiple species in one go by using the same notation as the NIST ASD page.
+        Note however that cache keys are computed for unique options for `species` and `wl_range`.
+        This means that you won't get caching benefits by using different queries.
+        In other words: the cache cannot deduplicate queries such as `ASD.fetch('H', (200,1000))` followed by `ASD.fetch('H I', (650,660))`.
+        Both these operations will fetch data online and be stored as separate cache entries.
+        """
+        query_params = {
+            "spectra": species,
+            "output_type": 0,
+            "low_w": min(wl_range),
+            "upp_w": max(wl_range),
+            **self.query_params,
+        }
+        response = self.session.get(self.nist_url, params=query_params)
+        # if response.status_code == 200:
+        response.raise_for_status()
+        return self.create_dataframe(response)
+        # else:
+        #     print(f"Error: Received status code {response.status_code}")
+        #     print(response.url)
+        #     return response
+    def create_dataframe(self, response) -> "pd.DataFrame|pl.DataFrame":
+        """Create a dataframe from the (cached) NIST ASD response, using the chosen backend at class instantiation."""
+        if self.use_polars:
+            return self._from_polars(response)
+        return self._from_pandas(response)
+    @classmethod
+    def _from_pandas(cls, response: "CachedResponse") -> "pd.DataFrame":
+        r"""Transform a (cached) NIST ASD response into a pandas DataFrame.
+        Calculates the air equivalent wavelength from the vacuum wavelength using the same Sellmeier equation as the NIST ASD.
+        Note that this conversion is only performed for lines with $200 nm < \lambda < 2000 nm$, like the ASD.
+        For lines outside of this range, the conversion falls back to their vacuum wavelength.
+        """
+        schema = {
+            "obs_wl_vac(nm)": str,
+            "ritz_wl_vac(nm)": str,
+            "wn(cm-1)": float,
+            "intens": str,
+            "Aki(s^-1)": float,
+            "fik": float,
+            "S(a.u.)": float,
+            "log_gf": float,
+            "Acc": str,
+            "Ei(cm-1)": str,
+            "Ek(cm-1)": str,
+            "conf_i": str,
+            "conf_k": str,
+            "term_i": str,
+            "term_k": str,
+            "g_i": float,
+            "g_k": float,
+            "J_i": str,
+            "J_k": str,
+            "": str,
+        }
+        df = pd.read_csv(StringIO(response.text), sep="\t", dtype=schema)
+        for col in ["obs_wl_vac(nm)", "ritz_wl_vac(nm)", "intens", "Ei(cm-1)", "Ek(cm-1)"]:
+            df[col] = df.loc[:, col].str.extract(r"([+-]?\d*\.?\d+(?:[eE][+-]?\d+)?)").astype(float)
+        df["Type"] = df.loc[:, "Type"].astype(str).replace("nan", "E1")
+        df["tp_ref"] = df.loc[:, "tp_ref"].fillna("")
+        df["obs_wl_air(nm)"] = df["obs_wl_vac(nm)"]
+        df["obs_wl_air(nm)"] = df[df["wn(cm-1)"].between(5000, 50000)]["obs_wl_air(nm)"] / cls.wn_to_n_refractive(
+            df[df["wn(cm-1)"].between(5000, 50000)]["wn(cm-1)"]
+        )
+        df["ritz_wl_air(nm)"] = df["ritz_wl_vac(nm)"]
+        df["ritz_wl_air(nm)"] = df[df["wn(cm-1)"].between(5000, 50000)]["ritz_wl_air(nm)"] / cls.wn_to_n_refractive(
+            df[df["wn(cm-1)"].between(5000, 50000)]["wn(cm-1)"]
+        )
+        df = df.drop([c for c in df.columns if "Unnamed" in c], axis=1).reset_index(drop=True)
+        if "element" not in df.columns:
+            expr = re.compile(r"spectra=([\w]+)\+?([IVX]+)?")
+            element, numeral = expr.search(response.url).groups()
+            df["element"] = element
+            df["sp_num"] = numeral
+            # cast roman numerals to int for consistency with queries with multiple ionization states, e.g. Ar I vs Ar I-II
+            df["sp_num"] = df["sp_num"].map(cls.roman_to_int)
+        df = (
+            df.assign(unc_obs_wl=df["unc_obs_wl"].astype(float), unc_ritz_wl=df["unc_ritz_wl"].astype(float))
+            if "unc_obs_wl" in df.columns
+            else df.assign(unc_obs_wl=np.nan, unc_ritz_wl=np.nan)
+        )
+        return df.loc[:, cls.column_order]
+    @classmethod
+    def _from_polars(cls, response: "CachedResponse") -> "pl.DataFrame":
+        r"""Transform a (cached) NIST ASD response into a polars DataFrame.
+        Calculates the air equivalent wavelength from the vacuum wavelength using the same Sellmeier equation as the NIST ASD.
+        Note that this conversion is only performed for lines with $200 nm < \lambda < 2000 nm$, like the ASD.
+        For lines outside of this range, the conversion falls back to their vacuum wavelength.
+        """
+        schema = {
+            "obs_wl_vac(nm)": pl.String,
+            "ritz_wl_vac(nm)": pl.String,
+            "wn(cm-1)": pl.Float64,
+            "intens": pl.String,
+            "Aki(s^-1)": pl.Float64,
+            "fik": pl.Float64,
+            "S(a.u.)": pl.Float64,
+            "log_gf": pl.Float64,
+            "Acc": pl.String,
+            "Ei(cm-1)": pl.String,
+            "Ek(cm-1)": pl.String,
+            "conf_i": pl.String,
+            "conf_k": pl.String,
+            "term_i": pl.String,
+            "term_k": pl.String,
+            "g_i": pl.Float64,
+            "g_k": pl.Float64,
+            "J_i": pl.String,
+            "J_k": pl.String,
+            "": pl.String,
+        }
+        # annotation_chars_to_strip = "(?i)()[]?*w,bGhilmprsq:+xzgacHd "
+        df = (
+            pl.read_csv(
+                StringIO(response.text),
+                separator="\t",
+                schema_overrides=schema,
+                null_values="",
+            )
+            .with_columns(
+                pl.col("obs_wl_vac(nm)", "Ei(cm-1)", "Ek(cm-1)", "intens")
+                # .str.strip_chars(annotation_chars_to_strip).str.replace("&dagger;", "", literal=True)
+                .str.extract(r"([+-]?\d*\.?\d+(?:[eE][+-]?\d+)?)")
+                # .str.extract(r"([+-]?\d*\.?\d+e[+-]?\d+)")
+                .replace("", None)
+                .cast(pl.Float64),
+                pl.col("ritz_wl_vac(nm)").str.strip_chars('"+*').replace("", None).cast(pl.Float64),
+                pl.col("S(a.u.)").cast(pl.Float64),
+                pl.col("Type").replace(None, "E1"),
+                pl.col("tp_ref").replace(None, ""),
+            )
+            .drop([""])
+        ).with_columns(
+            pl.when(pl.col("wn(cm-1)").is_between(5000, 50000))
+            .then(
+                pl.col("obs_wl_vac(nm)").cast(pl.Float64)
+                / pl.col("wn(cm-1)").map_elements(cls.wn_to_n_refractive, return_dtype=pl.Float64)
+            )
+            .otherwise(pl.col("obs_wl_vac(nm)"))
+            .cast(pl.Float64)
+            .alias("obs_wl_air(nm)"),
+            pl.when(pl.col("wn(cm-1)").is_between(5000, 50000))
+            .then(
+                pl.col("ritz_wl_vac(nm)").cast(pl.Float64)
+                / pl.col("wn(cm-1)").map_elements(cls.wn_to_n_refractive, return_dtype=pl.Float64)
+            )
+            .otherwise(pl.col("ritz_wl_vac(nm)"))
+            .cast(pl.Float64)
+            .alias("ritz_wl_air(nm)"),
+        )
+        if "element" not in df.columns:
+            expr = re.compile(r"spectra=([\w]+)\+?([IVX]+)?")
+            element, numeral = expr.search(response.url).groups()
+            # cast roman numerals to int for consistency with queries with multiple ionization states, e.g. Ar I vs Ar I-II
+            df = df.with_columns(
+                pl.lit(element).alias("element"),
+                pl.lit("I" if numeral is None else numeral)
+                .cast(pl.String)
+                .alias("sp_num")
+                .map_elements(cls.roman_to_int, return_dtype=pl.Int64),
+            )
+        df = (
+            df.with_columns(pl.col("unc_obs_wl").cast(pl.Float64), pl.col("unc_ritz_wl").cast(pl.Float64))
+            if "unc_obs_wl" in df.columns
+            else df.with_columns(
+                pl.lit(None).cast(pl.Float64).alias("unc_obs_wl"), pl.lit(None).cast(pl.Float64).alias("unc_ritz_wl")
+            )
+        )
+        return df.select(*cls.column_order)
+    @staticmethod
+    def roman_to_int(roman: str) -> int:
+        """Transform Roman numerals to integers.
+        Does only support numerals including up to `L`.
+        """
+        roman_numerals = {"I": 1, "V": 5, "X": 10, "L": 50}
+        total = 0
+        previous = 0
+        for char in reversed(roman):
+            current_value = roman_numerals[char]
+            if current_value < previous:
+                total -= current_value  # Subtract if the current value is less than the previous value
+            else:
+                total += current_value
+            previous = current_value
+        return total
+    @staticmethod
+    def wn_to_n_refractive(wavenumbers: float) -> float:
+        r"""Calculate the refractive index $n$ in air for a transition, using the 5-term Sellmeier formula used by NIST.
+        The used Sellmeier formula is the one from E.R. Peck and K. Reeder [J. Opt. Soc. Am. 62, 958 (1972)](http://dx.doi.org/10.1364/JOSA.62.000958).
+        This formula is fitted to data in the range of 185 nm to 1700 nm for  air at 15 °C, 101 325 Pa pressure, with 0.033 % CO2.
+        This is the same formula used by the NIST ASD to calculate air wavelengths in the interval of 200 nm to 2000 nm.
+        See also [the ASD documentation on the topic](https://physics.nist.gov/PhysRefData/ASD/Html/lineshelp.html#Conversion%20between%20air%20and%20vacuum%20wavelengths).
+        Using this refractive index, air equivalent wavelengths consistent with the ASD can be calculated, without the need to query them separately.
+        """
+        sigma = wavenumbers * 1e-4  # um^-1
+        return 1 + 1e-8 * (8060.51 + 2480990 / (132.274 - sigma**2) + 17455.7 / (39.32957 - sigma**2))
+    def get_all_cached(self) -> "pd.DataFrame|pl.DataFrame":
+        """Retrieve all cached data into a single dataframe."""
+        cached_frames = [self.create_dataframe(cached) for cached in self.session.cache.filter()]
+        if self.use_polars:
+            return pl.concat(cached_frames).unique()
+        return pd.concat(cached_frames).drop_duplicates().reset_index(drop=True)
+class BibCache:
+    r"""A class for handling lookups of bibliographic metadata from the NIST ASD.
+    Supports both bibliographic reference databases curated by NIST:
+        * Atomic Transition Probability Bibliographic Database: [10.18434/T46C7N](https://doi.org/10.18434/T46C7N)
+        * Atomic Energy Levels and Spectral Bibliographic Database: [10.18434/T40K53](https://doi.org/10.18434/T40K53)
+    References to these databases in the NIST ASD data can be looked up and will be cached.
+    """
+    nist_url = "https://physics.nist.gov/cgi-bin/ASBib1/get_ASBib_ref.cgi"
+    reference_expr = re.compile(r"([A-Z])?([\d]+)?([a-z]+[\d]*)?")
+    def __init__(self, cache_expiry=timedelta(weeks=1)):
+        """Initialize an instance that handles cached retrieval of ASD bibliographic references."""
+        self.cache_expiry = cache_expiry
+        self.session = CachedSession(
+            "NIST_ASD_Bibliography_cache",
+            use_cache_dir=True,
+            expire_after=cache_expiry,
+            stale_if_error=True,
+            filter_fn=self._check_response_success,
+            ignored_parameters=["element", "spectr_charge", "type", "ref"],
+        )
+    @staticmethod
+    def _check_response_success(response: "CachedResponse") -> bool:
+        """Validate that data has been fetched succesfully.
+        If this check fails, the cache should not update with this response, even when marked as stale.
+        """
+        is_success = (response.status_code == 200) & (b"There was a problem" not in response.content)
+        if not is_success:
+            logging.warning(f"Request was unsuccesful status:{response.status_code} , url:{response.url}")
+        return is_success
+    @classmethod
+    def parse_reference_code(cls, reference_code: str) -> tuple[str, Optional[str], str]:
+        r"""Parse a reference code from the NIST ASD into the constituent parts that can be used to look up references.
+        Args:
+            * reference_code (str): A NIST ASD bibliographic reference string, such as `L13456n3`, or `T6936n`.
+        Returns:
+            * db    (str)   :   A label for which bibliographic database to target
+            * ref   (str)   :   The database ID for the reference to look up
+            * comment (str) :   An additional comment included in the reference, can be fetched separately.
+        """
+        if reference_code.startswith("n"):
+            db, ref, comment = "T", None, "n"
+        elif (not reference_code.startswith("LS")) & (cls.reference_expr.match(reference_code) is not None):
+            db, ref, comment = cls.reference_expr.match(reference_code).groups()
+            comment = comment if "LS" not in reference_code else "LS"
+        else:
+            db, ref, comment = "T", None, "LS"
+        return db, ref, comment if comment is not None else ""
+    def lookup(self, element: str, sp_num: int, reference_code: str) -> dict[str, Any]:
+        """Look up a reference code for a given element state.
+        Args:
+            element (str)           :   The element name, e.g. `H`
+            sp_num (int)            :   The ionization state of the element, with 1 corresponding to the atom
+            reference_code (str)    :   The bibliographic reference code from the ASD columns `tp_ref` or `line_ref`.
+        Returns:
+            bib_data (dict)         : A dictionary containing bibliographic metadata for the reference, if available/applicable. Contains a url to look it up.
+        """
+        db, ref, comment = self.parse_reference_code(reference_code)
+        params = {
+            "db": "tp" if db == "T" else "el",
+            "db_id": ref,
+            "comment_code": "",
+            "element": element,
+            "spectr_charge": sp_num,
+        }
+        if ref is not None:
+            response = self.session.get(self.nist_url, params=params)
+            response.raise_for_status()
+            soup = BeautifulSoup(response.text, features="html.parser")
+            title = soup.find("font", {"size": "+1"})
+            doi = soup.find("a", {"id": "ad"})
+            authors = soup.find_all("a", {"id": "aa"})
+            title = "" if title is None else title.text.replace("\xa0", " ").strip()
+            doi = "" if doi is None else doi.text.strip()
+            authors = authors if authors == [] else [author.text.replace("\xa0", " ").strip() for author in authors]
+            text = "\n".join([tr.text.strip() for tr in soup.find("table").find_all("tr")]).strip()
+            url = (
+                response.url.replace("REDACTED", f"{element}", 1).replace("REDACTED", f"{sp_num}", 1)
+                + f"&comment_code={comment}"
+            )
+        else:
+            title = ""
+            doi = ""
+            authors = []
+            text = ""
+            url = None
+        # separately look up comments such that we benefit from the cache here as well
+        if comment != "":
+            comment_params = {
+                "db": "tp" if db == "T" else "el",
+                "db_id": "",
+                "comment_code": comment,
+                "element": "H",  # not cached
+                "spectr_charge": 1,  # not cached
+            }
+            comment_response = self.session.get(self.nist_url, params=comment_params)
+            comment_response.raise_for_status()
+            text += BeautifulSoup(comment_response.text, features="html.parser").table.find("td", {"colspan": "2"}).text
+            url = (
+                comment_response.url.replace("REDACTED", f"{element}", 1).replace("REDACTED", f"{sp_num}", 1)
+                + f"&db_id={'' if ref is None else ref}"
+            )
+        bib_data = {
+            "title": title,
+            "doi": doi,
+            "authors": authors,
+            "text": text,
+            "url": url,
+        }
+        return bib_data

asdcache-0.2.0/ASDCache/__init__.py ADDED Viewed

@@ -0,0 +1,10 @@
+"""ASDCache is a module to retrieve data from the NIST Atomic Spectra Database that uses caching for fast local access.
+To make the most use out of the cache, `ASDCache` is opinionated in the information it retrieves from the ASD; it always requests the same schema of information and locally computes additional fields.
+The `SpectraCache` class acts as the entrypoint to retrieve this data.
+"""
+from .ASDCache import SpectraCache, BibCache
+__all__ = ["SpectraCache", "BibCache"]

asdcache-0.2.0/ASDCache/_version.py ADDED Viewed

@@ -0,0 +1,16 @@
+# file generated by setuptools_scm
+# don't change, don't track in version control
+TYPE_CHECKING = False
+if TYPE_CHECKING:
+    from typing import Tuple, Union
+    VERSION_TUPLE = Tuple[Union[int, str], ...]
+else:
+    VERSION_TUPLE = object
+version: str
+__version__: str
+__version_tuple__: VERSION_TUPLE
+version_tuple: VERSION_TUPLE
+__version__ = version = '0.2.0'
+__version_tuple__ = version_tuple = (0, 2, 0)

asdcache-0.2.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2024, Antoine Salden
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

asdcache-0.2.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,137 @@
+Metadata-Version: 2.4
+Name: ASDCache
+Version: 0.2.0
+Summary: A Python module to retrieve data from the NIST Atomic Spectra Database (ASD), using caching for fast, efficient data handling
+Project-URL: Documentation, https://antoinetue.github.io/asdcache
+Project-URL: Source, https://github.com/AntoineTUE/asdcache
+Author-email: Antoine Salden <t.p.w.salden@tue.nl>
+License: MIT
+License-File: LICENSE
+Keywords: ASD,Atomic Spectra Database,NIST,atoms,spectra,spectroscopy,spectrum
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Scientific/Engineering
+Requires-Python: >=3.9
+Requires-Dist: bs4
+Requires-Dist: numpy
+Requires-Dist: pandas
+Requires-Dist: requests
+Requires-Dist: requests-cache
+Provides-Extra: docs
+Requires-Dist: black; extra == 'docs'
+Requires-Dist: mkdocs; extra == 'docs'
+Requires-Dist: mkdocs-autorefs; extra == 'docs'
+Requires-Dist: mkdocs-gen-files; extra == 'docs'
+Requires-Dist: mkdocs-git-revision-date-localized-plugin; extra == 'docs'
+Requires-Dist: mkdocs-include-markdown-plugin; extra == 'docs'
+Requires-Dist: mkdocs-jupyter; extra == 'docs'
+Requires-Dist: mkdocs-literate-nav; extra == 'docs'
+Requires-Dist: mkdocs-material; extra == 'docs'
+Requires-Dist: mkdocs-section-index; extra == 'docs'
+Requires-Dist: mkdocstrings; extra == 'docs'
+Requires-Dist: mkdocstrings-python; extra == 'docs'
+Provides-Extra: polars
+Requires-Dist: polars; extra == 'polars'
+Description-Content-Type: text/markdown
+# ASDCache
+![ASDCache logo](./docs/assets/logo.svg)
+[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.14673488.svg)](https://doi.org/10.5281/zenodo.14673488)
+[![GitHub License](https://img.shields.io/github/license/AntoineTUE/ASDCache)](https//www.github.com/AntoineTUE/ASDCache/blob/main/LICENSE)
+[![GitHub Workflow Status build](https://img.shields.io/github/actions/workflow/status/AntoineTUE/ASDCache/build.yml?label=PyPI%20build)](https://pypi.python.org/pypi/ASDCache)
+[![GitHub Workflow Status docs](https://img.shields.io/github/actions/workflow/status/AntoineTUE/ASDCache/documentation.yml?label=Documentation%20build)](https://antoinetue.github.io/ASDCache)
+[![PyPI - Version](https://img.shields.io/pypi/v/ASDCache)](https://pypi.python.org/pypi/ASDCache)
+[![PyPI - Python versions](https://img.shields.io/pypi/pyversions/ASDCache.svg)](https://pypi.python.org/pypi/ASDCache)
+[![PyPI - Downloads](https://img.shields.io/pypi/dw/ASDCache)](https://pypistats.org/packages/ASDCache)
+[![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
+[![Hatch project](https://img.shields.io/badge/%F0%9F%A5%9A-Hatch-4051b5.svg)](https://github.com/pypa/hatch)
+`ASDCache` is a Python project to retrieve data from the NIST Atomic Spectra Database (ASD), using caching for fast, efficient data handling.
+To make the most use out of the cache, `ASDCache` is opinionated in the information it retrieves from the ASD; it always requests the same schema of information and locally computes additional fields, to provide a more 'machine-useable' experience.
+It also coerces most of the retrieved data to be of a strictly numeric type, which strips out footnotes and annotations, but preserves e.g. bibliographic reference labels.
+You should thus still be sure to check and attribute the NIST ASD when making use of `ASDCache`!
+The main goals and benefits of `ASDCache` are:
+- [x] Make the data from the NIST ASD locally accessible as a Dataframe for use in analysis of spectra
+- [x] Retrieve a consistent schema of the data that represents the 'human readable' format, but enforce strictly numeric data for important columns
+    - [ ] This removes footnotes and other annotations, be sure to check the ASD itself as well for this information.
+- [x]  Use caching to dramatically speed up data retrieval, from minutes down to milliseconds in some cases
+    - [x] Cache time-to-live is 1 week by default, meaning you still get updates to the ASD in a reasonable time frame
+    - [x] The cache time-to-live can be adjusted
+- [x]  Cache data to allow working offline, or even transfering the ASD data to an offline system.
+    - [x] The cache is only updated when a request for new data succeeds
+- [x] Limit repeated queries for the same information, avoiding network overhead and server load.
+`ASDCache` is not affiliated with NIST or the NIST ASD in any way, it simply tries to help make it more accessible.
+## Installing
+`ASDCache` can be installed with `pip`.
+```console
+pip install ASDCache
+```
+Further optional features can be installed by specifying the `polars` or `docs` feature flag, as defined in [pyproject.toml](https://github.com/AntoineTUE/ASDCache/blob/main/pyproject.toml).
+To install all dependencies to locally serve and update the documentation for instance, you can run:
+```console
+pip install ASDCache[docs]
+```
+Installing the `polars` feature is not required, in case `polars` is already installed in the active environment, it is possible to use `polars` instead of `pandas` as a Dataframe backend for `ASDCache`.
+## Documentation
+Documentation for `ASDCache` is available on [this page](https://antoinetue.github.io/ASDCache).
+### Example
+A brief example below demonstrates how to use `SpectraCache` to query the NIST ASD for spectroscopic data for different species and plot their respective relative intensities.
+Note that these relative intensities are in principle not comparable between different species or sources and merely serve as a guide.
+More elaborate examples can be found in the [example section of the documentation](https://antoinetue.github.io/ASDCache/examples)
+```python
+from ASDCache import SpectraCache, BibCache
+import matplotlib.pyplot as plt
+nist = ASDCache()
+lines_H_I = nist.fetch("H I")
+plt.plot(lines_H_I['obs_wl_air(nm)'], lines_H_I['intens'], label=f"{lines_H_I['element'].unique()[0]} {lines_H_I['sp_num'].unique()[0]}")
+nist.fetch("O I-III") # caches data from NIST but does not assign to a variable
+# Oxygen I-III will still be plotted, each ionization state separately.
+lines_all_cached = nist.get_all_cached()
+for species,lines in lines_all_cached.groupby(["element","sp_num"]):
+    plt.plot(lines['obs_wl_air(nm)'], lines['intens'], label=f"{species[0]} {species[1]}", marker='x', ls='none')
+plt.legend()
+```
+## Citing
+Be sure to cite the NIST ASD when using `ASDCache` in your work, since it is the source of the data.
+`ASDCache` itself can be cited using the following DOI provided via Zenodo: [10.5281/zenodo.14673488](https://doi.org/10.5281/zenodo.14673488)
+See also [this page for more information](https://antoinetue.github.io/ASDCache/citing)
+## License
+ASDCache is licensed under the MIT license.

asdcache-0.2.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,185 @@
+[build-system]
+requires = ["hatchling", "hatch-vcs", "hatch-fancy-pypi-readme"]
+build-backend = "hatchling.build"
+[project]
+name = "ASDCache"
+description = "A Python module to retrieve data from the NIST Atomic Spectra Database (ASD), using caching for fast, efficient data handling"
+readme = "README.md"
+requires-python = ">=3.9"
+license = { text = "MIT"}
+keywords = ["NIST", "ASD","Atomic Spectra Database", "spectra","spectrum","spectroscopy","atoms"]
+authors = [
+    { name = "Antoine Salden", email = "t.p.w.salden@tue.nl" },
+]
+classifiers = [
+    "License :: OSI Approved :: MIT License",
+    "Development Status :: 4 - Beta",
+    "Topic :: Scientific/Engineering",
+    "Intended Audience :: Science/Research",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+]
+dependencies = ["requests","requests_cache", "pandas","numpy", "bs4"]
+dynamic = ["version"]
+[project.optional-dependencies]
+polars = ["polars"]
+docs = [
+    "mkdocs",
+    "mkdocs-autorefs",
+    "mkdocs-gen-files",
+    "mkdocs-git-revision-date-localized-plugin",
+    "mkdocs-include-markdown-plugin",
+    "mkdocs-jupyter",
+    "mkdocs-literate-nav",
+    "mkdocs-material",
+    "mkdocs-section-index",
+    "mkdocstrings",
+    "mkdocstrings-python",
+    "black"
+]
+[project.urls]
+Documentation = "https://antoinetue.github.io/asdcache"
+Source = "https://github.com/AntoineTUE/asdcache"
+[tool.hatch.metadata]
+# direct dependency references, e.g `pip @ git+https://github.com/pypa/pip.git@master`
+allow-direct-references = true
+[tool.hatch.version]
+source = "vcs"
+fallback-version = "0.0.1dev"
+[tool.hatch.build.hooks.vcs]
+version-file = "src/ASDCache/_version.py"
+[tool.hatch.build]
+packages = ["src/ASDCache"]
+[tool.hatch.build.targets.sdist]
+exclude = ["/.github"]
+[tool.pytest.ini_options]
+minversion = "6.0"
+addopts = "-ra -q --doctest-glob='*.md'"
+testpaths = ["tests"]
+markers = ["full: test using the full NIST ASD"]
+[tool.coverage.run]
+branch = true
+source = ["src/ASDCache"]
+omit = ["_version.py"]
+[tool.coverage.paths]
+source = [
+    "src/",
+    "*/site-packages/",
+]
+[tool.coverage.report]
+exclude_lines = [
+    "pragma: no cover",
+    "def __repr__",
+    "if self\\.debug",
+    "raise AssertionError",
+    "raise NotImplementedError",
+    "if 0:",
+    "if __name__ == .__main__.:",
+    "if TYPE_CHECKING:",
+]
+[tool.ruff]
+target-version = "py39"
+line-length = 120
+indent-width = 4
+include = [
+    "src/**/*.py",
+    "src/**/*.pyi",
+    "tests/**/*.py",
+    "tests/**/*.pyi",
+    "docs/**/*.ipynb"
+]
+extend-exclude = ["docs/assets/scripts/gen_ref_pages.py"]
+[tool.ruff.lint]
+select = ["E4", "E7", "E9", "F","C4", "SIM", "NPY", "PD","B","UP","D"]
+ignore = ["PD901","F401"]
+[tool.ruff.lint.pydocstyle]
+convention = "pep257"
+[tool.ruff.lint.isort]
+known-first-party = ["ASDCache"]
+[tool.ruff.lint.per-file-ignores]
+"examples/*" = ["T201"]
+"tests/**/*" = ["PLR2004", "S101", "TID252","D","F401"]
+[tool.hatch.metadata.hooks.fancy-pypi-readme]
+content-type = "text/markdown"
+fragments = [
+    { path = "README.md" }
+]
+[tool.uv]
+cache-keys = [{ git = true }]
+[tool.hatch.envs.default]
+python = "3.9"
+post-install-commands = ["pre-commit install"]
+dependencies = ["matplotlib", "ipython","ipykernel","ruff"]
+installer = "uv"
+features = ["polars"]
+[tool.hatch.envs.test]
+dependencies = [
+    "coverage[toml]>=6.2",
+    "pytest",
+    "pytest-cov",
+    "pytest-mock",
+    "pytest-recording",
+    "pytest-sugar",
+    "hypothesis",
+]
+[tool.hatch.envs.hatch-test]
+randomize = false
+parallel = false # avoid cache access conflicts
+retries = 2
+retry-delay = 2
+features = ["polars"]
+[tool.hatch.envs.docs]
+skip-install = true
+features = ["docs"]
+dependencies = ["mike"]
+[tool.hatch.envs.docs.scripts]
+serve = "mkdocs serve -f mkdocs.yml {args}"
+build = "mkdocs build --clean -f mkdocs.yml {args}"
+ci-build = "mike deploy --config-file mkdocs.yml --update-aliases {args}"
+[tool.hatch.envs.lint]
+template = "lint"
+dependencies = ["ruff>=0.7.0"]
+[tool.hatch.envs.lint.scripts]
+style = [
+    "echo \"VERSION: `ruff --version`\"",
+    "ruff check {args:.}",
+    "ruff format --check {args:.}",
+]
+fix = [
+    "ruff format {args:.}",
+    "ruff check --fix {args:.}",
+    "style",  # feedback on what is not fixable
+]
+[[tool.hatch.envs.hatch-test.matrix]]
+python = ["3.9", "3.10", "3.11", "3.12","3.13"]