PyPI - astro-otter - Versions diffs - 0.2.0__py3-none-any.whl → 0.3.2__py3-none-any.whl - Mend

astro-otter 0.2.0py3-none-any.whl → 0.3.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of astro-otter might be problematic. Click here for more details.

Files changed (11) hide show

{astro_otter-0.2.0.dist-info → astro_otter-0.3.2.dist-info}/METADATA +5 -9
astro_otter-0.3.2.dist-info/RECORD +18 -0
{astro_otter-0.2.0.dist-info → astro_otter-0.3.2.dist-info}/WHEEL +1 -1
otter/_version.py +1 -1
otter/io/otter.py +96 -24
otter/io/transient.py +253 -66
otter/schema.py +7 -1
otter/util.py +27 -1
astro_otter-0.2.0.dist-info/RECORD +0 -18
{astro_otter-0.2.0.dist-info → astro_otter-0.3.2.dist-info}/licenses/LICENSE +0 -0
{astro_otter-0.2.0.dist-info → astro_otter-0.3.2.dist-info}/top_level.txt +0 -0

{astro_otter-0.2.0.dist-info → astro_otter-0.3.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: astro-otter
-Version: 0.2.0
+Version: 0.3.2
 Author-email: Noah Franz <nfranz@arizona.edu>
 License: MIT License
@@ -36,7 +36,7 @@ Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Development Status :: 2 - Pre-Alpha
-Requires-Python: <3.12,>=3.10
+Requires-Python: >=3.9
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: numpy<2,>=1.20
@@ -45,6 +45,7 @@ Requires-Dist: pandas
 Requires-Dist: synphot
 Requires-Dist: typing-extensions
 Requires-Dist: pyarango
+Requires-Dist: tabulate
 Requires-Dist: matplotlib
 Requires-Dist: plotly
 Requires-Dist: astroquery
@@ -117,6 +118,7 @@ python3 -m pip install astro-otter
    ```
    git clone https://github.com/astro-otter/otter.git $OTTER_ROOT/otter
    git clone https://github.com/astro-otter/otterdb.git $OTTER_ROOT/otterdb
+   git clone https://github.com/astro-otter/otter-web.git $OTTER_ROOT/otter-web
    ```
 3. Install the NASA ADS Python API by following the instructions at https://ads.readthedocs.io/en/latest/#getting-started
 4. Install otter, the API for this database. From
@@ -125,13 +127,7 @@ python3 -m pip install astro-otter
    cd $OTTER_ROOT/otter
    python -m pip install -e .
    ```
-5. Process the data to build the local "database" (although it is really just a directory).
-   Then, you can build the "database" by running the
-   following commands:
-   ```
-   cd $OTTER_ROOT/otter/scripts/
-   python3 gen_summary_table.py --otterroot $OTTER_ROOT
-   ```
+5. Process the data to build the local copy of the database. Follow the instructions in the otterdb repo README.
 6. Easily access the data using the Otter code! In python:
   ```
   import os

astro_otter-0.3.2.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,18 @@
+astro_otter-0.3.2.dist-info/licenses/LICENSE,sha256=s9IPE8A3CAMEaZpDhj4eaorpmfLYGB0mIGphq301PUY,1067
+otter/__init__.py,sha256=pvX-TN7nLVmvKpkDi89Zxe-jMfHNiVMD3zsd_bPEK9Y,535
+otter/_version.py,sha256=PzgYWJOir7OHMXvL-_jK7jE3NKIV62yn_oi4sJrmsM8,76
+otter/exceptions.py,sha256=3lQF4AXVTfs9VRsVePQoIrXnramsPZbUL5crvf1s9Ng,1702
+otter/schema.py,sha256=lOn-9FX4EuVYkRY97j64yGQM9fsTdKUCeYHqkJ8-kN0,10790
+otter/util.py,sha256=Ve_3iHmTcdcS_X2zzBg07WQTlWPbx4CBAZf4Gye65Og,22831
+otter/io/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+otter/io/data_finder.py,sha256=v3jZCOhvysHUQG1FOwHdeJ-psTT-MsdO_GZalBpMBGc,38218
+otter/io/host.py,sha256=xv_SznZuvMoMVsZLqlcmlOyaqKCMZqlTQ_gkN4VBSTw,7139
+otter/io/otter.py,sha256=lJStat9oefnQCT4rHrI39Lq0Of-uWl2Dsq7A8Gsy4SY,49892
+otter/io/transient.py,sha256=ANYnqWN1bJuefkTQqyPdt95V33DaQxUOOG_PPU86l3A,48832
+otter/plotter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+otter/plotter/otter_plotter.py,sha256=OQhuLgnMSzgtAjJF8SYBuQOyYcu7Pr0uia5P0G_7z5Q,2144
+otter/plotter/plotter.py,sha256=z09NwQVJS2tuwH3sv95DZv8xogjvf-7Gvj6iWCEx-gQ,9635
+astro_otter-0.3.2.dist-info/METADATA,sha256=DO4CxmtLPoiqkVTQeFpOVXsPvu4bocl9es_VLZfsZLw,7073
+astro_otter-0.3.2.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
+astro_otter-0.3.2.dist-info/top_level.txt,sha256=Wth72sCwBRUk3KZGknSKvLQDMFuJk6qiaAavMDOdG5k,6
+astro_otter-0.3.2.dist-info/RECORD,,

{astro_otter-0.2.0.dist-info → astro_otter-0.3.2.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (78.1.0)
+Generator: setuptools (80.8.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

otter/_version.py CHANGED Viewed

@@ -2,4 +2,4 @@
 Just define the package version in one place
 """
-__version__ = "0.2.0"
+__version__ = "0.3.2"

otter/io/otter.py CHANGED Viewed

@@ -3,11 +3,12 @@ This is the primary class for user interaction with the catalog
 """
 from __future__ import annotations
+from typing import Optional
 import os
 import json
 import glob
-from warnings import warn
 from copy import deepcopy
+import logging
 from pyArango.connection import Connection
 from pyArango.database import Database
@@ -22,7 +23,7 @@ from astropy import units as u
 from .transient import Transient
 from ..exceptions import FailedQueryError, OtterLimitationError, TransientMergeError
-from ..util import bibcode_to_hrn, freq_to_obstype, freq_to_band
+from ..util import bibcode_to_hrn, freq_to_obstype, freq_to_band, _DuplicateFilter
 import warnings
@@ -30,6 +31,8 @@ warnings.simplefilter("once", RuntimeWarning)
 warnings.simplefilter("once", UserWarning)
 warnings.simplefilter("once", u.UnitsWarning)
+logger = logging.getLogger(__name__)
 def _np_encoder(object):
     """
@@ -44,23 +47,34 @@ class Otter(Database):
     This is the primary class for users to access the otter backend database
     Args:
+        url (str): The url where the database api endpoints are located
+        username (str): The username to log into the database with
+        password (str): The password to log into the database with
+        gen_summary (bool): Generate a local summary table, this should generally be
+                            left as False!
         datadir (str): Path to the data directory with the otter data. If not provided
                        will default to a ".otter" directory in the CWD where you call
                        this class from.
         debug (bool): If we should just debug and not do anything serious.
+    Returns:
+        An Otter object that is connected to the otter database
     """
     def __init__(
         self,
         url: str = "http://127.0.0.1:8529",
-        username: str = "user-guest",
-        password: str = "",
+        username: str = os.environ.get("ARANGO_USER_USERNAME", "user-guest"),
+        password: str = os.environ.get("ARANGO_USER_PASSWORD", ""),
         gen_summary: bool = False,
         datadir: str = None,
         debug: bool = False,
         **kwargs,
     ) -> None:
+        print("Attempting to login with the following credentials:")
+        print(f"username: {username}")
+        print(f"password: {password}")
         # save inputs
         if datadir is None:
             self.CWD = os.path.dirname(os.path.abspath("__FILE__"))
@@ -79,7 +93,7 @@ class Otter(Database):
             try:
                 os.makedirs(self.DATADIR)
             except FileExistsError:
-                warn(
+                logger.warning(
                     "Directory was created between the if statement and trying "
                     + "to create the directory!"
                 )
@@ -93,7 +107,8 @@ class Otter(Database):
         Get the metadata of the objects matching the arguments
         Args:
-            **kwargs : Arguments to pass to Otter.query()
+            **kwargs : Arguments to pass to Otter.query(). See that documentation with
+                       `help(otter.Otter.query)`.
         Return:
            The metadata for the transients that match the arguments. Will be an astropy
            Table by default, if raw=True will be a dictionary.
@@ -146,9 +161,9 @@ class Otter(Database):
         unit conversion for you!
         Args:
-            flux_units (astropy.unit.Unit): Either a valid string to convert
+            flux_unit (astropy.unit.Unit): Either a valid string to convert
                                             or an astropy.unit.Unit
-            date_units (astropy.unit.Unit): Either a valid string to convert to a date
+            date_unit (astropy.unit.Unit): Either a valid string to convert to a date
                                             or an astropy.unit.Unit
             return_type (str): Either 'astropy' or 'pandas'. If astropy, returns an
                                astropy Table. If pandas, returns a pandas DataFrame.
@@ -159,6 +174,8 @@ class Otter(Database):
             keep_raw (bool): If True, keep the raw flux/date/freq/wave associated with
                              the dataset. Else, just keep the converted data. Default
                              is False.
+            wave_unit (str): The astropy wavelength unit to return with
+            freq_unit (str): The astropy frequency unit to return with`
             **kwargs : Arguments to pass to Otter.query(). Can be::
                        names (list[str]): A list of names to get the metadata for
@@ -181,6 +198,9 @@ class Otter(Database):
             FailedQueryError: When the query returns no results
             IOError: if one of your inputs is incorrect
         """
+        warn_filt = _DuplicateFilter()
+        logger.addFilter(warn_filt)
         queryres = self.query(hasphot=True, **kwargs)
         dicts = []
@@ -239,6 +259,7 @@ class Otter(Database):
             else:
                 fullphot = fullphot[keys_to_keep]
+        logger.removeFilter(warn_filt)
         if return_type == "astropy":
             return Table.from_pandas(fullphot)
         elif return_type == "pandas":
@@ -252,6 +273,9 @@ class Otter(Database):
         Args:
             filename (str): The path to the OTTER JSON file to load
+        Returns:
+            dictionary with the otter JSON file contents
         """
         # read in files from summary
@@ -270,6 +294,8 @@ class Otter(Database):
         refs: list[str] = None,
         hasphot: bool = False,
         hasspec: bool = False,
+        spec_classed: bool = False,
+        unambiguous: bool = False,
         classification: str = None,
         class_confidence_threshold: float = 0,
         query_private=False,
@@ -280,7 +306,7 @@ class Otter(Database):
         WARNING! This does not do any conversions for you!
         This is how it differs from the `get_meta` method. Users should prefer to use
-        `get_meta`, `getPhot`, and `getSpec` independently because it is a better
+        `get_meta`, and `get_phot` independently because it is a better
         workflow and can return the data in an astropy table with everything in the
         same units.
@@ -293,10 +319,17 @@ class Otter(Database):
             refs (list[str]): A list of ads bibcodes to match to. Will only return
                               metadata for transients that have this as a reference.
             hasphot (bool): if True, only returns transients which have photometry.
-            hasspec (bool): if True, only return transients that have spectra.
+            hasspec (bool): NOT IMPLEMENTED! Will return False for all targets!
+            spec_classed (bool): If True, only returns transients that have been
+                                 specotroscopically classified/confirmed
+            unambiguous (bool): If True, only returns transients that only have a single
+                                published classification in OTTER. If classifications
+                                disagree for a transient, it will be filtered out.
             classification (str): A classification string to search for
             class_confidence_threshold (float): classification confidence cutoff for
                                                 query, between 0 and 1. Default is 0.
+            query_private (bool): Set to True if you would like to also query the
+                                  dataset located at whatever you set datadir to
         Return:
            Get all of the raw (unconverted!) data for objects that match the criteria.
@@ -310,9 +343,15 @@ class Otter(Database):
         if hasspec is True:
             query_filters += "FILTER 'spectra' IN ATTRIBUTES(transient)\n"
+        if spec_classed:
+            query_filters += "FILTER transient.classification.spec_classed >= 1"
+        if unambiguous:
+            query_filters += "FILTER transient.classification.unambiguous"
         if classification is not None:
             query_filters += f"""
-            FOR subdoc IN transient.classification
+            FOR subdoc IN transient.classification.value
                 FILTER subdoc.confidence > TO_NUMBER({class_confidence_threshold})
                 FILTER subdoc.object_class LIKE '%{classification}%'
             """
@@ -573,7 +612,7 @@ class Otter(Database):
     def upload(self, json_data, collection="vetting", testing=False) -> Document:
         """
-        Upload json_data to collection
+        Upload json_data to collection WITHOUT deduplication!
         Args:
             json_data [dict] : A dictionary of the json data to upload to Otter
@@ -793,7 +832,7 @@ class Otter(Database):
     def from_csvs(
         metafile: str,
         photfile: str = None,
-        local_outpath: str = "private_otter_data",
+        local_outpath: Optional[str] = None,
         db: Otter = None,
     ) -> Otter:
         """
@@ -815,9 +854,37 @@ class Otter(Database):
         """
         # read in the metadata and photometry file
         meta = pd.read_csv(metafile)
+        meta.columns = meta.columns.str.strip()  # clean up the col names
         phot = None
+        required_phot_cols = [
+            "name",
+            "date",
+            "date_format",
+            "filter",
+            "filter_eff",
+            "filter_eff_units",
+            "flux",
+            "flux_err",
+            "flux_unit",
+        ]
         if photfile is not None:
-            phot = pd.read_csv(photfile)
+            phot_unclean = pd.read_csv(photfile)
+            phot_unclean.columns = phot_unclean.columns.str.strip()  # cleanup colnames
+            phot = phot_unclean.dropna(subset=required_phot_cols)
+            if len(phot) != len(phot_unclean):
+                logger.warning("""
+                Filtered out rows with nan in the photometry file! Make sure you
+                expect this behaviour!
+                """)
+            if "bibcode" not in phot:
+                phot["bibcode"] = "private"
+                logger.warning("""
+                Setting the bibcode column to the special keyword 'private'!
+                """)
             # we need to generate columns of wave_eff and freq_eff
             wave_eff = []
@@ -842,7 +909,7 @@ class Otter(Database):
             phot["band_eff_freq_unit"] = str(freq_eff_unit)
         if not os.path.exists(local_outpath):
-            os.mkdir(local_outpath)
+            os.makedirs(local_outpath)
         # drop duplicated names in meta and keep the first
         meta = meta.drop_duplicates(subset="name", keep="first")
@@ -883,7 +950,7 @@ class Otter(Database):
                     ra_units=tde.ra_unit[0],
                     dec_units=tde.dec_unit[0],
                     reference=[tde.coord_bibcode[0]],
-                    coordinate_type="equitorial",
+                    coordinate_type="equatorial",
                 )
             ]
@@ -932,13 +999,18 @@ class Otter(Database):
             ### Classification information that is in the csvs
             # classification
             if "classification" in tde:
-                json["classification"] = [
-                    dict(
-                        object_class=tde.classification[0],
-                        confidence=1,  # we know this is at least an tde
-                        reference=[tde.classification_bibcode[0]],
-                    )
-                ]
+                class_flag = 0
+                if "classification_flag" in tde:
+                    class_flag = tde.classification_flag[0]
+                json["classification"] = dict(
+                    value=[
+                        dict(
+                            object_class=tde.classification[0],
+                            confidence=class_flag,
+                            reference=[tde.classification_bibcode[0]],
+                        )
+                    ]
+                )
             # discovery date
             # print(tde)
@@ -1236,7 +1308,7 @@ class Otter(Database):
         if db is None:
             db = Otter(datadir=local_outpath)
         else:
-            db.datadir = local_outpath
+            db.DATADIR = local_outpath
         # always save this document as a new one
         db.save(all_jsons)

otter/io/transient.py CHANGED Viewed

@@ -24,11 +24,9 @@ from ..exceptions import (
     OtterLimitationError,
     TransientMergeError,
 )
-from ..util import XRAY_AREAS
+from ..util import XRAY_AREAS, _KNOWN_CLASS_ROOTS, _DuplicateFilter
 from .host import Host
-warnings.simplefilter("once", RuntimeWarning)
-warnings.simplefilter("once", UserWarning)
 np.seterr(divide="ignore")
 logger = logging.getLogger(__name__)
@@ -289,7 +287,7 @@ class Transient(MutableMapping):
                     raise TransientMergeError(f"{key} was not expected! Can not merge!")
                 else:
                     # Throw a warning and only keep the old stuff
-                    warnings.warn(
+                    logger.warning(
                         f"{key} was not expected! Only keeping the old information!"
                     )
                     out[key] = deepcopy(self[key])
@@ -323,17 +321,17 @@ class Transient(MutableMapping):
         else:
             # run some checks
             if "photometry" in keys:
-                warnings.warn("Not returing the photometry!")
+                logger.warning("Not returing the photometry!")
                 _ = keys.pop("photometry")
             if "spectra" in keys:
-                warnings.warn("Not returning the spectra!")
+                logger.warning("Not returning the spectra!")
                 _ = keys.pop("spectra")
             curr_keys = self.keys()
             for key in keys:
                 if key not in curr_keys:
                     keys.remove(key)
-                    warnings.warn(
+                    logger.warning(
                         f"Not returning {key} because it is not in this transient!"
                     )
@@ -352,7 +350,7 @@ class Transient(MutableMapping):
         """
         # now we can generate the SkyCoord
-        f = "df['coordinate_type'] == 'equitorial'"
+        f = "df['coordinate_type'] == 'equatorial'"
         coord_dict = self._get_default("coordinate", filt=f)
         coordin = self._reformat_coordinate(coord_dict)
         coord = SkyCoord(**coordin).transform_to(coord_format)
@@ -407,7 +405,7 @@ class Transient(MutableMapping):
             and a list of the bibcodes corresponding to that classification. Or, None
             if there is no classification.
         """
-        default = self._get_default("classification")
+        default = self._get_default("classification/value")
         if default is None:
             return default
         return default.object_class, default.confidence, default.reference
@@ -421,7 +419,7 @@ class Transient(MutableMapping):
         The BLAST result will always be the last value in the returned list.
         Args:
-            max_hosts [int] : The maximum number of hosts to return
+            max_hosts [int] : The maximum number of hosts to return, default is 3
             **kwargs : keyword arguments to be passed to getGHOST
         Returns:
@@ -437,7 +435,7 @@ class Transient(MutableMapping):
         # then try BLAST
         if search:
-            logger.warn(
+            logger.warning(
                 "Trying to find a host with BLAST/astro-ghost. Note\
                  that this won't work for older targets! See https://blast.scimma.org"
             )
@@ -489,7 +487,7 @@ class Transient(MutableMapping):
         """
         coordin = None
         if "ra" in item and "dec" in item:
-            # this is an equitorial coordinate
+            # this is an equatorial coordinate
             coordin = {
                 "ra": item["ra"],
                 "dec": item["dec"],
@@ -511,7 +509,6 @@ class Transient(MutableMapping):
         date_unit: u.Unit = "MJD",
         freq_unit: u.Unit = "GHz",
         wave_unit: u.Unit = "nm",
-        by: str = "raw",
         obs_type: str = None,
     ) -> pd.DataFrame:
         """
@@ -529,10 +526,6 @@ class Transient(MutableMapping):
             wave_unit (astropy.unit.Unit): The astropy unit or string representation of
                                            an astropy unit to convert and return the
                                            wavelength as.
-            by (str): Either 'raw' or 'value'. 'raw' is the default and is highly
-                      recommended! If 'value' is used it may skip some photometry.
-                      See the schema definition to understand this keyword completely
-                      before using it.
             obs_type (str): "radio", "xray", or "uvoir". If provided, it only returns
                             data taken within that range of wavelengths/frequencies.
                             Default is None which will return all of the data.
@@ -540,14 +533,17 @@ class Transient(MutableMapping):
         Returns:
             A pandas DataFrame of the cleaned up photometry in the requested units
         """
+        warn_filt = _DuplicateFilter()
+        logger.addFilter(warn_filt)
         # these imports need to be here for some reason
         # otherwise the code breaks
         from synphot.units import VEGAMAG, convert_flux
         from synphot.spectrum import SourceSpectrum
-        # check inputs
-        if by not in {"value", "raw"}:
-            raise IOError("Please choose either value or raw!")
+        # variable so this warning only displays a single time each time this
+        # function is called
+        source_map_warning = True
         # turn the photometry key into a pandas dataframe
         if "photometry" not in self:
@@ -594,12 +590,82 @@ class Transient(MutableMapping):
         # merge the photometry with the filter information
         df = c.merge(filters, on="filter_key")
-        # make sure 'by' is in df
-        if by not in df:
-            if by == "value":
-                by = "raw"
-            else:
-                by = "value"
+        # drop irrelevant obs_types before continuing
+        if obs_type is not None:
+            valid_obs_types = {"radio", "uvoir", "xray"}
+            if obs_type not in valid_obs_types:
+                raise IOError("Please provide a valid obs_type")
+            df = df[df.obs_type == obs_type]
+        # add some mockup columns if they don't exist
+        if "value" not in df:
+            df["value"] = np.nan
+            df["value_err"] = np.nan
+            df["value_units"] = "NaN"
+        # fix some bad units that are old and no longer recognized by astropy
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore")
+            df.raw_units = df.raw_units.str.replace("ergs", "erg")
+            df.raw_units = ["mag(AB)" if uu == "AB" else uu for uu in df.raw_units]
+            df.value_units = df.value_units.str.replace("ergs", "erg")
+            df.value_units = ["mag(AB)" if uu == "AB" else uu for uu in df.value_units]
+        # merge the raw and value keywords based on the requested flux_units
+        # first take everything that just has `raw` and not `value`
+        df_raw_only = df[df.value.isna()]
+        remaining = df[df.value.notna()]
+        if len(remaining) == 0:
+            df_raw = df_raw_only
+            df_value = []  # this tricks the code later
+        else:
+            # then take the remaining rows and figure out if we want the raw or value
+            with warnings.catch_warnings():
+                warnings.filterwarnings("ignore")
+                flux_unit_astropy = u.Unit(flux_unit)
+                val_unit_filt = np.array(
+                    [
+                        u.Unit(uu).is_equivalent(flux_unit_astropy)
+                        for uu in remaining.value_units
+                    ]
+                )
+            df_value = remaining[val_unit_filt]
+            df_raw_and_value = remaining[~val_unit_filt]
+            # then merge the raw dataframes
+            df_raw = pd.concat([df_raw_only, df_raw_and_value], axis=0)
+        # then add columns to these dataframes to convert stuff later
+        df_raw = df_raw.assign(
+            _flux=df_raw["raw"].values,
+            _flux_units=df_raw["raw_units"].values,
+            _flux_err=(
+                df_raw["raw_err"].values
+                if "raw_err" in df_raw
+                else [np.nan] * len(df_raw)
+            ),
+        )
+        if len(df_value) == 0:
+            df = df_raw
+        else:
+            df_value = df_value.assign(
+                _flux=df_value["value"].values,
+                _flux_units=df_value["value_units"].values,
+                _flux_err=(
+                    df_value["value_err"].values
+                    if "value_err" in df_value
+                    else [np.nan] * len(df_value)
+                ),
+            )
+            # then merge df_value and df_raw back into one df
+            df = pd.concat([df_raw, df_value], axis=0)
+        # then, for the rest of the code to work, set the "by" variables to _flux
+        by = "_flux"
         # skip rows where 'by' is nan
         df = df[df[by].notna()]
@@ -612,12 +678,21 @@ class Transient(MutableMapping):
         # the TDE lightcurves for this systematic effect. "
         df = df[df[by].astype(float) > 0]
-        # drop irrelevant obs_types before continuing
-        if obs_type is not None:
-            valid_obs_types = {"radio", "uvoir", "xray"}
-            if obs_type not in valid_obs_types:
-                raise IOError("Please provide a valid obs_type")
-            df = df[df.obs_type == obs_type]
+        # filter out anything that has _flux_units == "ct" because we can't convert that
+        try:
+            # this is a test case to see if we can convert ct -> flux_unit
+            convert_flux(
+                [1 * u.nm, 2 * u.nm], 1 * u.ct, u.Unit(flux_unit), area=1 * u.m**2
+            )
+        except u.UnitsError:
+            bad_units = df[df._flux_units == "ct"]
+            if len(bad_units) > 0:
+                logger.warning(
+                    f"""Removing {len(bad_units)} photometry points from
+                    {self.default_name} because we can't convert them from ct ->
+                    {flux_unit}"""
+                )
+            df = df[df._flux_units != "ct"]
         # convert the ads bibcodes to a string of human readable sources here
         def mappedrefs(row):
@@ -629,7 +704,10 @@ class Transient(MutableMapping):
         try:
             df["human_readable_refs"] = df.apply(mappedrefs, axis=1)
         except Exception as exc:
-            warnings.warn(f"Unable to apply the source mapping because {exc}")
+            if source_map_warning:
+                source_map_warning = False
+                logger.warning(f"Unable to apply the source mapping because {exc}")
             df["human_readable_refs"] = df.reference
         # Figure out what columns are good to groupby in the photometry
@@ -662,8 +740,16 @@ class Transient(MutableMapping):
             try:
                 if isvegamag:
                     astropy_units = VEGAMAG
+                elif unit == "AB":
+                    # In astropy "AB" is a magnitude SYSTEM not unit and while
+                    # u.Unit("AB") will succeed without error, it will not produce
+                    # the expected result!
+                    # We can assume here that this unit really means astropy's "mag(AB)"
+                    astropy_units = u.Unit("mag(AB)")
                 else:
-                    astropy_units = u.Unit(unit)
+                    with warnings.catch_warnings():
+                        warnings.simplefilter("ignore")
+                        astropy_units = u.Unit(unit)
             except ValueError:
                 # this means there is something likely slightly off in the input unit
@@ -688,10 +774,12 @@ class Transient(MutableMapping):
                 indata_err = np.zeros(len(data))
             # convert to an astropy quantity
-            q = indata * u.Unit(astropy_units)
-            q_err = indata_err * u.Unit(
-                astropy_units
-            )  # assume error and values have the same unit
+            with warnings.catch_warnings():
+                warnings.filterwarnings("ignore")
+                q = indata * u.Unit(astropy_units)
+                q_err = indata_err * u.Unit(
+                    astropy_units
+                )  # assume error and values have the same unit
             # get and save the effective wavelength
             # because of cleaning we did to the filter dataframe above wave_eff
@@ -700,8 +788,10 @@ class Transient(MutableMapping):
                 raise ValueError("Flushing out the effective wavelength array failed!")
             zz = zip(data["wave_eff"], data["wave_units"])
-            wave_eff = u.Quantity([vv * u.Unit(uu) for vv, uu in zz], wave_unit)
-            freq_eff = wave_eff.to(freq_unit, equivalencies=u.spectral())
+            with warnings.catch_warnings():
+                warnings.filterwarnings("ignore")
+                wave_eff = u.Quantity([vv * u.Unit(uu) for vv, uu in zz], wave_unit)
+                freq_eff = wave_eff.to(freq_unit, equivalencies=u.spectral())
             data["converted_wave"] = wave_eff.value
             data["converted_wave_unit"] = wave_unit
@@ -727,10 +817,12 @@ class Transient(MutableMapping):
                 # we also need to make this wave_min and wave_max
                 # instead of just the effective wavelength like for radio and uvoir
                 zz = zip(data["wave_min"], data["wave_max"], data["wave_units"])
-                wave_eff = u.Quantity(
-                    [np.array([m, M]) * u.Unit(uu) for m, M, uu in zz],
-                    u.Unit(wave_unit),
-                )
+                with warnings.catch_warnings():
+                    warnings.filterwarnings("ignore")
+                    wave_eff = u.Quantity(
+                        [np.array([m, M]) * u.Unit(uu) for m, M, uu in zz],
+                        u.Unit(wave_unit),
+                    )
             else:
                 area = None
@@ -744,13 +836,15 @@ class Transient(MutableMapping):
                 flux, flux_err = [], []
                 for wave, xray_point, xray_point_err in zip(wave_eff, q, q_err):
-                    f_val = convert_flux(
-                        wave,
-                        xray_point,
-                        u.Unit(flux_unit),
-                        vegaspec=SourceSpectrum.from_vega(),
-                        area=area,
-                    ).value
+                    with warnings.catch_warnings():
+                        warnings.filterwarnings("ignore")
+                        f_val = convert_flux(
+                            wave,
+                            xray_point,
+                            u.Unit(flux_unit),
+                            vegaspec=SourceSpectrum.from_vega(),
+                            area=area,
+                        ).value
                     # approximate the uncertainty as dX = dY/Y * X
                     f_err = np.multiply(
@@ -764,7 +858,9 @@ class Transient(MutableMapping):
             else:
                 # this will be faster and cover most cases
-                flux = convert_flux(wave_eff, q, u.Unit(flux_unit)).value
+                with warnings.catch_warnings():
+                    warnings.filterwarnings("ignore")
+                    flux = convert_flux(wave_eff, q, u.Unit(flux_unit)).value
                 # since the error propagation is different between logarithmic units
                 # and linear units, unfortunately
@@ -806,20 +902,21 @@ class Transient(MutableMapping):
             # magnitude upperlimits are independent of the actual measurement!)
             # sigma_m > (1/3) * (ln(10)/2.5)
             def is_upperlimit(row):
-                if pd.isna(row.upperlimit):
+                if "upperlimit" in row and pd.isna(row.upperlimit):
                     return row.converted_flux_err > np.log(10) / (3 * 2.5)
                 else:
                     return row.upperlimit
         else:
             def is_upperlimit(row):
-                if pd.isna(row.upperlimit):
+                if "upperlimit" in row and pd.isna(row.upperlimit):
                     return row.converted_flux < 3 * row.converted_flux_err
                 else:
                     return row.upperlimit
         outdata["upperlimit"] = outdata.apply(is_upperlimit, axis=1)
+        logger.removeFilter(warn_filt)
         return outdata
     def _merge_names(t1, t2, out):  # noqa: N805
@@ -871,7 +968,7 @@ class Transient(MutableMapping):
             elif score2 > score1:
                 out[key]["default_name"] = t2[key]["default_name"]
             else:
-                warnings.warn(
+                logger.warning(
                     "Names have the same score! Just using the existing default_name"
                 )
                 out[key]["default_name"] = t1[key]["default_name"]
@@ -986,36 +1083,108 @@ class Transient(MutableMapping):
         Combine the classification attribute
         """
         key = "classification"
+        subkey = "value"
         out[key] = deepcopy(t1[key])
-        classes = np.array([item["object_class"] for item in out[key]])
-        for item in t2[key]:
+        classes = np.array([item["object_class"] for item in out[key][subkey]])
+        for item in t2[key][subkey]:
             if item["object_class"] in classes:
                 i = np.where(item["object_class"] == classes)[0][0]
-                if int(item["confidence"]) > int(out[key][i]["confidence"]):
-                    out[key][i]["confidence"] = item[
+                if int(item["confidence"]) > int(out[key][subkey][i]["confidence"]):
+                    out[key][subkey][i]["confidence"] = item[
                         "confidence"
                     ]  # we are now more confident
-                if not isinstance(out[key][i]["reference"], list):
-                    out[key][i]["reference"] = [out[key][i]["reference"]]
+                if not isinstance(out[key][subkey][i]["reference"], list):
+                    out[key][subkey][i]["reference"] = [
+                        out[key][subkey][i]["reference"]
+                    ]
                 if not isinstance(item["reference"], list):
                     item["reference"] = [item["reference"]]
-                newdata = list(np.unique(out[key][i]["reference"] + item["reference"]))
-                out[key][i]["reference"] = newdata
+                newdata = list(
+                    np.unique(out[key][subkey][i]["reference"] + item["reference"])
+                )
+                out[key][subkey][i]["reference"] = newdata
             else:
-                out[key].append(item)
+                out[key][subkey].append(item)
         # now that we have all of them we need to figure out which one is the default
-        maxconf = max(out[key], key=lambda d: d["confidence"])
-        for item in out[key]:
+        maxconf = max(out[key][subkey], key=lambda d: d["confidence"])
+        for item in out[key][subkey]:
             if item == maxconf:
                 item["default"] = True
             else:
                 item["default"] = False
+        # then rederive the classification flags
+        out = Transient._derive_classification_flags(out)
+    @classmethod
+    def _derive_classification_flags(cls, out):
+        """
+        Derive the classification flags based on the confidence flags. This will find
+        - spec_classed
+        - unambiguous
+        See the paper for a detailed description of how this algorithm makes its
+        choices
+        """
+        if "classification" not in out or "value" not in out["classification"]:
+            # this means that the transient doesn't have any classifications
+            # just return itself without any changes
+            return out
+        # get the confidences of all of the classifications of this transient
+        confs = np.array(
+            [item["confidence"] for item in out["classification"]["value"]]
+        ).astype(float)
+        all_class_roots = np.array(
+            [
+                _fuzzy_class_root(item["object_class"])
+                for item in out["classification"]["value"]
+            ]
+        )
+        if np.any(confs >= 3):
+            unambiguous = len(np.unique(all_class_roots)) == 1
+            if np.any(confs == 3) or np.any(confs == 3.3):
+                # this is a "gold spectrum"
+                spec_classed = 3
+            elif np.any(confs == 3.2):
+                # this is a silver spectrum
+                spec_classed = 2
+            elif np.any(confs == 3.1):
+                # this is a bronze spectrum
+                spec_classed = 1
+            else:
+                raise ValueError("Not prepared for this confidence flag!")
+        elif np.any(confs == 2):
+            # these always have spec_classed = True, by definition
+            # They also have unambiguous = False by definition because they don't
+            # have a peer reviewed citation for their classification
+            spec_classed = 1
+            unambiguous = False
+        elif np.any(confs == 1):
+            spec_classed = 0  # by definition
+            unambiguous = len(np.unique(all_class_roots)) == 1
+        else:
+            spec_classed = 0
+            unambiguous = False
+        # finally, set these keys in the classification dict
+        out["classification"]["spec_classed"] = spec_classed
+        out["classification"]["unambiguous"] = unambiguous
+        return out
     @staticmethod
     def _merge_arbitrary(key, t1, t2, out, merge_subkeys=None, groupby_key=None):
         """
@@ -1103,3 +1272,21 @@ class Transient(MutableMapping):
             outdict_cleaned = [{**x[i]} for i, x in outdict.stack().groupby(level=0)]
             out[key] = outdict_cleaned
+def _fuzzy_class_root(s):
+    """
+    Extract the fuzzy classification root name from the string s
+    """
+    s = s.upper()
+    # first split the class s using regex
+    for root in _KNOWN_CLASS_ROOTS:
+        if s.startswith(root):
+            remaining = s[len(root) :]
+            if remaining and root == "SN":
+                # we want to be able to distinguish between SN Ia and SN II
+                # we will use SN Ia to indicate thoes and SN to indicate CCSN
+                if "IA" in remaining or "1A" in remaining:
+                    return "SN Ia"
+            return root
+    return s

otter/schema.py CHANGED Viewed

@@ -135,6 +135,12 @@ class ClassificationSchema(BaseModel):
     class_type: str = None
+class ClassificationDictSchema(BaseModel):
+    spec_classed: Optional[int] = None
+    unambiguous: Optional[bool] = None
+    value: list[ClassificationSchema]
 class ReferenceSchema(BaseModel):
     name: str
     human_readable_name: str
@@ -283,7 +289,7 @@ class OtterSchema(BaseModel):
     name: NameSchema
     coordinate: list[CoordinateSchema]
     distance: Optional[list[DistanceSchema]] = None
-    classification: Optional[list[ClassificationSchema]] = None
+    classification: Optional[ClassificationDictSchema] = None
     reference_alias: list[ReferenceSchema]
     date_reference: Optional[list[DateSchema]] = None
     photometry: Optional[list[PhotometrySchema]] = None

otter/util.py CHANGED Viewed

@@ -580,6 +580,22 @@ VIZIER_LARGE_CATALOGS = [
 ViZier catalog names that we query for host information in the Host class
 """
+_KNOWN_CLASS_ROOTS = [
+    "SN",
+    "SLSN",
+    "TDE",
+    "GRB",
+    "LGRB",
+    "SGRB",
+    "AGN",
+    "FRB",
+    "QSO",
+    "ANT",
+]
+"""
+Classification root names
+"""
 DATADIR = os.path.join(BASEDIR, "data", "base")
 """
 Deprecated database directory that IS NOT always constant anymore
@@ -597,7 +613,7 @@ schema = {
     "name": {"default_name": None, "alias": []},
     "coordinate": [],
     "distance": [],
-    "classification": [],
+    "classification": {"value": []},
     "reference_alias": [],
     "date_reference": [],
     "photometry": [],
@@ -808,3 +824,13 @@ subschema = {
 """
 A useful variable to describe all of the subschemas that are available and can be used
 """
+class _DuplicateFilter(object):
+    def __init__(self):
+        self.msgs = set()
+    def filter(self, record):
+        rv = record.msg not in self.msgs
+        self.msgs.add(record.msg)
+        return rv

astro_otter-0.2.0.dist-info/RECORD DELETED Viewed

@@ -1,18 +0,0 @@
-astro_otter-0.2.0.dist-info/licenses/LICENSE,sha256=s9IPE8A3CAMEaZpDhj4eaorpmfLYGB0mIGphq301PUY,1067
-otter/__init__.py,sha256=pvX-TN7nLVmvKpkDi89Zxe-jMfHNiVMD3zsd_bPEK9Y,535
-otter/_version.py,sha256=Hwps5WC9_lYJO5SVIdnlMYP1MbeluDR9sJvlFgSiQjg,76
-otter/exceptions.py,sha256=3lQF4AXVTfs9VRsVePQoIrXnramsPZbUL5crvf1s9Ng,1702
-otter/schema.py,sha256=eOxlrtp9TTbiENy38ueE2HcYlI-M56g0Ohg7zQeTxjk,10631
-otter/util.py,sha256=xKsNkkxGajML1rZZnR9d5rJV1_z1KeAlIdAs8t0814M,22440
-otter/io/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-otter/io/data_finder.py,sha256=v3jZCOhvysHUQG1FOwHdeJ-psTT-MsdO_GZalBpMBGc,38218
-otter/io/host.py,sha256=xv_SznZuvMoMVsZLqlcmlOyaqKCMZqlTQ_gkN4VBSTw,7139
-otter/io/otter.py,sha256=75YHnYN6NznFi8O9YzMAM8AJQNXqByUSDkmzEOaFro4,46766
-otter/io/transient.py,sha256=0Z2G9uvv0xl4sJSj43cwFfOj6qkhlmRfBIh4XCYUHVU,41416
-otter/plotter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-otter/plotter/otter_plotter.py,sha256=OQhuLgnMSzgtAjJF8SYBuQOyYcu7Pr0uia5P0G_7z5Q,2144
-otter/plotter/plotter.py,sha256=z09NwQVJS2tuwH3sv95DZv8xogjvf-7Gvj6iWCEx-gQ,9635
-astro_otter-0.2.0.dist-info/METADATA,sha256=uyREDToWRYbZ0hYeQAz3PZKLqdz4pmLQYQVhZWAN36w,7134
-astro_otter-0.2.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
-astro_otter-0.2.0.dist-info/top_level.txt,sha256=Wth72sCwBRUk3KZGknSKvLQDMFuJk6qiaAavMDOdG5k,6
-astro_otter-0.2.0.dist-info/RECORD,,

{astro_otter-0.2.0.dist-info → astro_otter-0.3.2.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{astro_otter-0.2.0.dist-info → astro_otter-0.3.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

astro-otter 0.2.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

Potentially problematic release.

astro-otter 0.2.0py3-none-any.whl → 0.3.2py3-none-any.whl