PyPI - ASDCache - Versions diffs - 0.2.0__tar.gz → 0.2.2__tar.gz - Mend

ASDCache 0.2.0tar.gz → 0.2.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

{asdcache-0.2.0 → asdcache-0.2.2}/ASDCache/ASDCache.py RENAMED Viewed

@@ -61,6 +61,42 @@ logging.basicConfig(
     stream=sys.stdout,
 )
+ASDSchema = {
+    "element": str,
+    "sp_num": int,
+    "obs_wl_vac(nm)": float,
+    "unc_obs_wl": float,
+    "obs_wl_air(nm)": float,
+    "ritz_wl_vac(nm)": float,
+    "unc_ritz_wl": float,
+    "ritz_wl_air(nm)": float,
+    "wn(cm-1)": float,
+    "intens": float,
+    "Aki(s^-1)": float,
+    "fik": float,
+    "S(a.u.)": float,
+    "log_gf": float,
+    "Acc": str,
+    "Ei(cm-1)": float,
+    "Ek(cm-1)": float,
+    "conf_i": str,
+    "term_i": str,
+    "J_i": str,
+    "conf_k": str,
+    "term_k": str,
+    "J_k": str,
+    "g_i": float,
+    "g_k": float,
+    "Type": str,
+    "tp_ref": str,
+    "line_ref": str,
+}
+STATE_EXPR = r"spectra=([\w]+)\+?([IVX]+)?"
+"""Regex pattern for extracting (element,charge) tuple for a single-state query, which uses roman numerals."""
+SCI_EXPR = r"([+-]?\d*\.?\d+(?:[eE][+-]?\d+)?)"
+"""Regex pattern for processing scientific notation"""
 class SpectraCache:
     """A class acting as the entrypoint to retrieve data from the NIST Atomic Spectra Database that uses caching.
@@ -148,7 +184,6 @@ class SpectraCache:
     def __init__(self, use_polars_backend=False, cache_expiry=timedelta(weeks=1), strict_matching=True):
         """Initialize an instance that handles cached data lookup of the NIST ASD."""
         self.strict_matching = strict_matching
-        self.cache_expiry = cache_expiry
         self.session = CachedSession(
             "NIST_ASD_cache",
             use_cache_dir=True,
@@ -165,6 +200,24 @@ class SpectraCache:
         self.known_species = self.list_cached_species()
+    @property
+    def cache_expiry(self) -> timedelta:
+        """The cache expiry time.
+        Queries that are older than this time are considered stale and marked for updating, by quering the NIST ASD.
+        In case the query for new data fails, the stale, cached response will still be parsed.
+        """
+        return self.session.settings.expire_after
+    def set_cache_expiry(self, new: timedelta = None, **kwargs):
+        """Set the cache expiry to a different interval (default: 1 week).
+        Can be done by either passing in a `timedelta` object, or valid keyword arguments for `timedelta` itself.
+        """
+        if new is None:
+            new = timedelta(**kwargs)
+        self.session.settings.expire_after = new
     @staticmethod
     def _check_response_success(response: "CachedResponse") -> bool:
         """Validate that data has been fetched succesfully.
@@ -173,6 +226,11 @@ class SpectraCache:
         """
         return (response.status_code == 200) & (b"Error Message" not in response.content)
+    @property
+    def cached_species(self) -> list[str]:
+        """A list of all cached species."""
+        return self.list_cached_species()
     def list_cached_species(self) -> list[str]:
         """List all species in the cache, based on the string of the original query URL."""
         return [
@@ -247,11 +305,14 @@ class SpectraCache:
             "g_k": float,
             "J_i": str,
             "J_k": str,
+            "Type": str,
+            "tp_ref": str,
+            "line_ref": str,
             "": str,
         }
         df = pd.read_csv(StringIO(response.text), sep="\t", dtype=schema)
         for col in ["obs_wl_vac(nm)", "ritz_wl_vac(nm)", "intens", "Ei(cm-1)", "Ek(cm-1)"]:
-            df[col] = df.loc[:, col].str.extract(r"([+-]?\d*\.?\d+(?:[eE][+-]?\d+)?)").astype(float)
+            df[col] = df.loc[:, col].str.extract(SCI_EXPR).astype(float)
         df["Type"] = df.loc[:, "Type"].astype(str).replace("nan", "E1")
         df["tp_ref"] = df.loc[:, "tp_ref"].fillna("")
         df["obs_wl_air(nm)"] = df["obs_wl_vac(nm)"]
@@ -264,17 +325,13 @@ class SpectraCache:
         )
         df = df.drop([c for c in df.columns if "Unnamed" in c], axis=1).reset_index(drop=True)
         if "element" not in df.columns:
-            expr = re.compile(r"spectra=([\w]+)\+?([IVX]+)?")
-            element, numeral = expr.search(response.url).groups()
+            element, numeral = re.search(STATE_EXPR, response.url).groups()
             df["element"] = element
             df["sp_num"] = numeral
             # cast roman numerals to int for consistency with queries with multiple ionization states, e.g. Ar I vs Ar I-II
             df["sp_num"] = df["sp_num"].map(cls.roman_to_int)
-        df = (
-            df.assign(unc_obs_wl=df["unc_obs_wl"].astype(float), unc_ritz_wl=df["unc_ritz_wl"].astype(float))
-            if "unc_obs_wl" in df.columns
-            else df.assign(unc_obs_wl=np.nan, unc_ritz_wl=np.nan)
-        )
+        df["unc_obs_wl"] = pd.to_numeric(df["unc_obs_wl"]) if "unc_obs_wl" in df.columns else np.nan
+        df["unc_ritz_wl"] = pd.to_numeric(df["unc_ritz_wl"]) if "unc_ritz_wl" in df.columns else np.nan
         return df.loc[:, cls.column_order]
     @classmethod
@@ -320,7 +377,7 @@ class SpectraCache:
             .with_columns(
                 pl.col("obs_wl_vac(nm)", "Ei(cm-1)", "Ek(cm-1)", "intens")
                 # .str.strip_chars(annotation_chars_to_strip).str.replace("&dagger;", "", literal=True)
-                .str.extract(r"([+-]?\d*\.?\d+(?:[eE][+-]?\d+)?)")
+                .str.extract(SCI_EXPR)
                 # .str.extract(r"([+-]?\d*\.?\d+e[+-]?\d+)")
                 .replace("", None)
                 .cast(pl.Float64),
@@ -349,23 +406,20 @@ class SpectraCache:
             .alias("ritz_wl_air(nm)"),
         )
         if "element" not in df.columns:
-            expr = re.compile(r"spectra=([\w]+)\+?([IVX]+)?")
-            element, numeral = expr.search(response.url).groups()
+            element, numeral = re.search(STATE_EXPR, response.url).groups()
             # cast roman numerals to int for consistency with queries with multiple ionization states, e.g. Ar I vs Ar I-II
             df = df.with_columns(
                 pl.lit(element).alias("element"),
                 pl.lit("I" if numeral is None else numeral)
                 .cast(pl.String)
                 .alias("sp_num")
-                .map_elements(cls.roman_to_int, return_dtype=pl.Int64),
+                .map_elements(cls.roman_to_int, return_dtype=pl.Int64)
+                .first(),
             )
-        df = (
-            df.with_columns(pl.col("unc_obs_wl").cast(pl.Float64), pl.col("unc_ritz_wl").cast(pl.Float64))
-            if "unc_obs_wl" in df.columns
-            else df.with_columns(
-                pl.lit(None).cast(pl.Float64).alias("unc_obs_wl"), pl.lit(None).cast(pl.Float64).alias("unc_ritz_wl")
-            )
-        )
+        df = df.with_columns(
+            unc_obs_wl=pl.col("unc_obs_wl") if "unc_obs_wl" in df.columns else None,
+            unc_ritz_wl=pl.col("unc_ritz_wl") if "unc_ritz_wl" in df.columns else None,
+        ).with_columns(pl.col("unc_obs_wl").cast(pl.Float64), pl.col("unc_ritz_wl").cast(pl.Float64))
         return df.select(*cls.column_order)
@@ -408,8 +462,16 @@ class SpectraCache:
         """Retrieve all cached data into a single dataframe."""
         cached_frames = [self.create_dataframe(cached) for cached in self.session.cache.filter()]
         if self.use_polars:
-            return pl.concat(cached_frames).unique()
-        return pd.concat(cached_frames).drop_duplicates().reset_index(drop=True)
+            return (
+                pl.concat(cached_frames).unique()
+                if len(cached_frames) > 0
+                else pl.DataFrame({k: [] for k in ASDSchema}, schema=ASDSchema)
+            )
+        return (
+            pd.concat(cached_frames).drop_duplicates().reset_index(drop=True)
+            if len(cached_frames) > 0
+            else pd.DataFrame({k: pd.Series(dtype=v) for k, v in ASDSchema.items()})
+        )
 class BibCache:
@@ -428,7 +490,6 @@ class BibCache:
     def __init__(self, cache_expiry=timedelta(weeks=1)):
         """Initialize an instance that handles cached retrieval of ASD bibliographic references."""
-        self.cache_expiry = cache_expiry
         self.session = CachedSession(
             "NIST_ASD_Bibliography_cache",
             use_cache_dir=True,
@@ -438,6 +499,24 @@ class BibCache:
             ignored_parameters=["element", "spectr_charge", "type", "ref"],
         )
+    @property
+    def cache_expiry(self) -> timedelta:
+        """The cache expiry time.
+        Queries that are older than this time are considered stale and marked for updating, by quering the NIST ASD.
+        In case the query for new data fails, the stale, cached response will still be parsed.
+        """
+        return self.session.settings.expire_after
+    def set_cache_expiry(self, new: timedelta = None, **kwargs):
+        """Set the cache expiry to a different interval (default: 1 week).
+        Can be done by either passing in a `timedelta` object, or valid keyword arguments for `timedelta` itself.
+        """
+        if new is None:
+            new = timedelta(**kwargs)
+        self.session.settings.expire_after = new
     @staticmethod
     def _check_response_success(response: "CachedResponse") -> bool:
         """Validate that data has been fetched succesfully.

{asdcache-0.2.0 → asdcache-0.2.2}/ASDCache/_version.py RENAMED Viewed

@@ -1,8 +1,13 @@
-# file generated by setuptools_scm
+# file generated by setuptools-scm
 # don't change, don't track in version control
+__all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
 TYPE_CHECKING = False
 if TYPE_CHECKING:
-    from typing import Tuple, Union
+    from typing import Tuple
+    from typing import Union
     VERSION_TUPLE = Tuple[Union[int, str], ...]
 else:
     VERSION_TUPLE = object
@@ -12,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.2.0'
-__version_tuple__ = version_tuple = (0, 2, 0)
+__version__ = version = '0.2.2'
+__version_tuple__ = version_tuple = (0, 2, 2)

{asdcache-0.2.0 → asdcache-0.2.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ASDCache
-Version: 0.2.0
+Version: 0.2.2
 Summary: A Python module to retrieve data from the NIST Atomic Spectra Database (ASD), using caching for fast, efficient data handling
 Project-URL: Documentation, https://antoinetue.github.io/asdcache
 Project-URL: Source, https://github.com/AntoineTUE/asdcache
@@ -16,6 +16,7 @@ Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
 Classifier: Topic :: Scientific/Engineering
 Requires-Python: >=3.9
 Requires-Dist: bs4

{asdcache-0.2.0 → asdcache-0.2.2}/pyproject.toml RENAMED Viewed

@@ -23,6 +23,7 @@ classifiers = [
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
 ]
 dependencies = ["requests","requests_cache", "pandas","numpy", "bs4"]
 dynamic = ["version"]
@@ -131,9 +132,9 @@ fragments = [
 cache-keys = [{ git = true }]
 [tool.hatch.envs.default]
-python = "3.9"
+python = "3.12"
 post-install-commands = ["pre-commit install"]
-dependencies = ["matplotlib", "ipython","ipykernel","ruff"]
+dependencies = ["matplotlib", "ipython","ipykernel","pre-commit"]
 installer = "uv"
 features = ["polars"]
@@ -152,7 +153,7 @@ dependencies = [
 randomize = false
 parallel = false # avoid cache access conflicts
 retries = 2
-retry-delay = 2
+retry-delay = 1
 features = ["polars"]
 [tool.hatch.envs.docs]

{asdcache-0.2.0 → asdcache-0.2.2}/.gitignore RENAMED Viewed

File without changes

{asdcache-0.2.0 → asdcache-0.2.2}/ASDCache/__init__.py RENAMED Viewed

File without changes

{asdcache-0.2.0 → asdcache-0.2.2}/LICENSE RENAMED Viewed

File without changes

ASDCache 0.2.0__tar.gz → 0.2.2__tar.gz

ASDCache 0.2.0tar.gz → 0.2.2tar.gz