PyPI - astro-otter - Versions diffs - 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl - Mend

astro-otter 0.3.3py3-none-any.whl → 0.3.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of astro-otter might be problematic. Click here for more details.

Files changed (14) hide show

{astro_otter-0.3.3.dist-info → astro_otter-0.3.5.dist-info}/METADATA +1 -1
astro_otter-0.3.5.dist-info/RECORD +18 -0
otter/_version.py +1 -1
otter/io/data_finder.py +10 -2
otter/io/otter.py +275 -50
otter/io/transient.py +208 -2
otter/plotter/otter_plotter.py +1 -1
otter/plotter/plotter.py +1 -9
otter/schema.py +29 -19
otter/util.py +14 -0
astro_otter-0.3.3.dist-info/RECORD +0 -18
{astro_otter-0.3.3.dist-info → astro_otter-0.3.5.dist-info}/WHEEL +0 -0
{astro_otter-0.3.3.dist-info → astro_otter-0.3.5.dist-info}/licenses/LICENSE +0 -0
{astro_otter-0.3.3.dist-info → astro_otter-0.3.5.dist-info}/top_level.txt +0 -0

{astro_otter-0.3.3.dist-info → astro_otter-0.3.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: astro-otter
-Version: 0.3.3
+Version: 0.3.5
 Author-email: Noah Franz <nfranz@arizona.edu>
 License: MIT License

astro_otter-0.3.5.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,18 @@
+astro_otter-0.3.5.dist-info/licenses/LICENSE,sha256=s9IPE8A3CAMEaZpDhj4eaorpmfLYGB0mIGphq301PUY,1067
+otter/__init__.py,sha256=pvX-TN7nLVmvKpkDi89Zxe-jMfHNiVMD3zsd_bPEK9Y,535
+otter/_version.py,sha256=5LVB9FiBWDmYEmn6fLO_jW-OA3WAt_uN2xs5Dc4KpNY,76
+otter/exceptions.py,sha256=3lQF4AXVTfs9VRsVePQoIrXnramsPZbUL5crvf1s9Ng,1702
+otter/schema.py,sha256=J-iI_kEEd0aHN_Hr49HFEa8W07enh1FSDbU99NwIz3Y,11240
+otter/util.py,sha256=G5M2PdtPGKpRXCqkItyXMLj6h5F3EboBBDKWMViw04k,23333
+otter/io/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+otter/io/data_finder.py,sha256=M6kzqMyy-yhwQfxbOs2E7i_8bCcZFW4Zyf8z-6LCxNQ,38425
+otter/io/host.py,sha256=xv_SznZuvMoMVsZLqlcmlOyaqKCMZqlTQ_gkN4VBSTw,7139
+otter/io/otter.py,sha256=qTKTfGkBdmsWjLF1cxjiX-SvOaKOPK-nd0aK7TtPQO8,61076
+otter/io/transient.py,sha256=1yVy-9NEo9ozM4B-sqEcGlJW_1abD0sWSXYG0Ex-3Jc,58020
+otter/plotter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+otter/plotter/otter_plotter.py,sha256=yUjGHR0FcbndwC1yLQekJWqX2KBMAJXtjFKbbASG_Cc,2144
+otter/plotter/plotter.py,sha256=ni4WV63wIjhMHStDmuccltaMHSaFbwwfztYpuMdYAz8,9340
+astro_otter-0.3.5.dist-info/METADATA,sha256=CTru7txtl0-m5zmGInfuJ_qd0upiUjF1eGTU6Eo-ZEk,7046
+astro_otter-0.3.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+astro_otter-0.3.5.dist-info/top_level.txt,sha256=Wth72sCwBRUk3KZGknSKvLQDMFuJk6qiaAavMDOdG5k,6
+astro_otter-0.3.5.dist-info/RECORD,,

otter/_version.py CHANGED Viewed

@@ -2,4 +2,4 @@
 Just define the package version in one place
 """
-__version__ = "0.3.3"
+__version__ = "0.3.5"

otter/io/data_finder.py CHANGED Viewed

@@ -602,8 +602,16 @@ class DataFinder(object):
         cone_search_res = qc.query(adql=adql, fmt="pandas")
         # then retrieve all of the spectra corresponding to those sparcl_ids
-        sparcl_ids = cone_search_res.sparcl_id.tolist()
-        res = client.retrieve(uuid_list=sparcl_ids, include=include)
+        spec_ids = cone_search_res.targetid.tolist()
+        if len(spec_ids) == 0:
+            logger.warn("Object not found in Sparcl!")
+            return
+        res = client.retrieve_by_specid(spec_ids, include=include)
+        if res.count == 0:
+            logger.warn("No Spectra available in sparcl!")
+            return
         all_spec = pd.concat([pd.DataFrame([record]) for record in res.records])
         return Table.from_pandas(all_spec)

otter/io/otter.py CHANGED Viewed

@@ -154,6 +154,7 @@ class Otter(Database):
         keep_raw=False,
         wave_unit="nm",
         freq_unit="GHz",
+        deduplicate=None,
         **kwargs,
     ) -> Table:
         """
@@ -176,6 +177,11 @@ class Otter(Database):
                              is False.
             wave_unit (str): The astropy wavelength unit to return with
             freq_unit (str): The astropy frequency unit to return with`
+            deduplicate (Callable|None|False): if we should deduplicate the dataset
+                                               using the deduplicate Callable. Set to
+                                               False if you don't want this to happen.
+                                               None defaults to
+                                               Transient.deduplicate_photometry
             **kwargs : Arguments to pass to Otter.query(). Can be::
                        names (list[str]): A list of names to get the metadata for
@@ -215,6 +221,7 @@ class Otter(Database):
                     wave_unit=wave_unit,
                     freq_unit=freq_unit,
                     obs_type=obs_type,
+                    deduplicate=deduplicate,
                 )
                 phot["name"] = [default_name] * len(phot)
@@ -291,8 +298,13 @@ class Otter(Database):
         radius: float = 5,
         minz: float = None,
         maxz: float = None,
+        mindec: float = -90,
+        maxdec: float = 90,
         refs: list[str] = None,
         hasphot: bool = False,
+        has_radio_phot: bool = False,
+        has_uvoir_phot: bool = False,
+        has_xray_phot: bool = False,
         hasspec: bool = False,
         spec_classed: bool = False,
         unambiguous: bool = False,
@@ -316,9 +328,14 @@ class Otter(Database):
             radius (float): The radius in arcseconds for a cone search, default is 0.05"
             minz (float): The minimum redshift to search for
             maxz (float): The maximum redshift to search for
+            mindec (float): The minimum declination in degrees
+            maxdec (float): Tje maximum declination in degrees
             refs (list[str]): A list of ads bibcodes to match to. Will only return
                               metadata for transients that have this as a reference.
             hasphot (bool): if True, only returns transients which have photometry.
+            has_radio_phot (bool): if True, only returns transients with radio phot.
+            has_uvoir_phot (bool): if True, only returns transients with uvoir phot.
+            has_xray_phot (bool): if True, only returns transients with X-ray phot.
             hasspec (bool): NOT IMPLEMENTED! Will return False for all targets!
             spec_classed (bool): If True, only returns transients that have been
                                  specotroscopically classified/confirmed
@@ -337,9 +354,18 @@ class Otter(Database):
         # write some AQL filters based on the inputs
         query_filters = ""
-        if hasphot is True:
+        if hasphot or has_radio_phot or has_xray_phot or has_uvoir_phot:
             query_filters += "FILTER 'photometry' IN ATTRIBUTES(transient)\n"
+        if has_radio_phot:
+            query_filters += "FILTER 'radio' IN transient.photometry[*].obs_type\n"
+        if has_uvoir_phot:
+            query_filters += "FILTER 'uvoir' IN transient.photometry[*].obs_type\n"
+        if has_xray_phot:
+            query_filters += "FILTER 'xray' IN transient.photometry[*].obs_type\n"
         if hasspec is True:
             query_filters += "FILTER 'spectra' IN ATTRIBUTES(transient)\n"
@@ -451,6 +477,11 @@ class Otter(Database):
         else:
             arango_query_results = [Transient(res) for res in result.result]
+        # filter based on the min and max declination query options
+        decs = np.array([t.get_skycoord().dec.deg for t in arango_query_results])
+        where_dec = np.where((decs > mindec) * (decs < maxdec))[0]
+        arango_query_results = [arango_query_results[i] for i in where_dec]
         if not query_private:
             return arango_query_results
@@ -665,7 +696,10 @@ class Otter(Database):
             if len(res) > 1:
                 raise OtterLimitationError("Some objects in Otter are too close!")
-            elif len(res) == 1:
+            elif len(res) == 1 and collection != "vetting":
+                # if the collection is the vetting collection we don't want to do the
+                # merging yet, even if the object already exists in OTTER
                 # this object exists in otter already, let's grab the transient data and
                 # merge the files
                 merged = t + res[0]
@@ -674,13 +708,6 @@ class Otter(Database):
                 merged["_key"] = res[0]["_key"]
                 merged["_id"] = res[0]["_id"]
-                # we also have to delete the document from the OTTER database
-                doc = self.fetchDocument(merged["_id"])
-                if not testing:
-                    doc.delete()
-                else:
-                    print(f"Would delete\n{doc}")
             else:
                 # this means the object doesn't exist in otter already
                 merged = t
@@ -832,7 +859,7 @@ class Otter(Database):
     def from_csvs(
         metafile: str,
         photfile: str = None,
-        local_outpath: Optional[str] = None,
+        local_outpath: Optional[str] = os.path.join(os.getcwd(), "private-data"),
         db: Otter = None,
     ) -> Otter:
         """
@@ -973,11 +1000,13 @@ class Otter(Database):
                 pd.isna(tde["luminosity_distance"])
             ):
                 json["distance"].append(
-                    value=tde.luminosity_distance[0],
-                    reference=[tde.luminosity_distance_bibcode[0]],
-                    unit=tde.luminosity_distance_unit[0],
-                    computed=False,
-                    distance_type="luminosity",
+                    dict(
+                        value=tde.luminosity_distance[0],
+                        reference=[tde.luminosity_distance_bibcode[0]],
+                        unit=tde.luminosity_distance_unit[0],
+                        computed=False,
+                        distance_type="luminosity",
+                    )
                 )
             # comoving distance
@@ -985,11 +1014,13 @@ class Otter(Database):
                 pd.isna(tde["comoving_distance"])
             ):
                 json["distance"].append(
-                    value=tde.comoving_distance[0],
-                    reference=[tde.comoving_distance_bibcode[0]],
-                    unit=tde.comoving_distance_unit[0],
-                    computed=False,
-                    distance_type="comoving",
+                    dict(
+                        value=tde.comoving_distance[0],
+                        reference=[tde.comoving_distance_bibcode[0]],
+                        unit=tde.comoving_distance_unit[0],
+                        computed=False,
+                        distance_type="comoving",
+                    )
                 )
             # remove the distance list if it is empty still
@@ -1081,11 +1112,6 @@ class Otter(Database):
                     if src not in phot_sources:
                         phot_sources.append(src)
-                    if len(np.unique(p.flux_unit)) == 1:
-                        raw_units = p.flux_unit.tolist()[0]
-                    else:
-                        raw_units = p.flux_unit.tolist()
                     # add a column to phot with the unique filter key
                     if obstype == "radio":
                         filter_uq_key = (
@@ -1105,17 +1131,73 @@ class Otter(Database):
                     if "upperlimit" not in p:
                         p["upperlimit"] = False
-                    json_phot = dict(
-                        reference=src,
-                        raw=p.flux.astype(float).tolist(),
-                        raw_err=p.flux_err.astype(float).tolist(),
-                        raw_units=raw_units,
-                        date=p.date.tolist(),
-                        date_format=p.date_format.tolist(),
-                        upperlimit=p.upperlimit.tolist(),
-                        filter_key=filter_uq_key,
-                        obs_type=obstype,
-                    )
+                    if "raw" in p.columns and "flux" in p.columns:
+                        if len(np.unique(p.raw_unit)) == 1:
+                            raw_units = p.raw_unit.tolist()[0]
+                        else:
+                            raw_units = p.raw_unit.tolist()
+                        if len(np.unique(p.flux_unit)) == 1:
+                            val_units = p.flux_unit.tolist()[0]
+                        else:
+                            val_units = p.flux_unit.tolist()
+                        # treat "raw" as the "raw" keyword and "flux" as the "value"
+                        json_phot = dict(
+                            reference=src,
+                            raw=p.raw.astype(float).tolist(),
+                            raw_err=p.raw_err.astype(float).tolist(),
+                            raw_units=raw_units,
+                            value=p.flux.astype(float).tolist(),
+                            value_err=p.flux_err.astype(float).tolist(),
+                            value_units=val_units,
+                            date=p.date.tolist(),
+                            date_format=p.date_format.tolist(),
+                            upperlimit=p.upperlimit.tolist(),
+                            filter_key=filter_uq_key,
+                            obs_type=obstype,
+                        )
+                    elif "flux" in p.columns and "raw" not in p.columns:
+                        if len(np.unique(p.flux_unit)) == 1:
+                            raw_units = p.flux_unit.tolist()[0]
+                        else:
+                            raw_units = p.flux_unit.tolist()
+                        # treat "flux" as the "raw" keyword
+                        json_phot = dict(
+                            reference=src,
+                            raw=p.flux.astype(float).tolist(),
+                            raw_err=p.flux_err.astype(float).tolist(),
+                            raw_units=raw_units,
+                            date=p.date.tolist(),
+                            date_format=p.date_format.tolist(),
+                            upperlimit=p.upperlimit.tolist(),
+                            filter_key=filter_uq_key,
+                            obs_type=obstype,
+                        )
+                    elif "raw" in p.columns and "flux" not in p.columns:
+                        if len(np.unique(p.raw_unit)) == 1:
+                            raw_units = p.raw_unit.tolist()[0]
+                        else:
+                            raw_units = p.raw_unit.tolist()
+                        # treat "raw" as the "raw" keyword
+                        json_phot = dict(
+                            reference=src,
+                            raw=p.raw.astype(float).tolist(),
+                            raw_err=p.raw_err.astype(float).tolist(),
+                            raw_units=raw_units,
+                            date=p.date.tolist(),
+                            date_format=p.date_format.tolist(),
+                            upperlimit=p.upperlimit.tolist(),
+                            filter_key=filter_uq_key,
+                            obs_type=obstype,
+                        )
+                    else:
+                        raise ValueError("`raw` and/or `flux` key(s) must be provided!")
                     if not pd.isna(tele):
                         json_phot["telescope"] = tele
@@ -1131,6 +1213,13 @@ class Otter(Database):
                             "Minimum and maximum filters required for X-ray data!"
                         )
+                    if ("date_min" in p and "date_max" not in p) or (
+                        "date_min" not in p and "date_max" in p
+                    ):
+                        raise ValueError(
+                            "If date_min/date_max is provided, the other must be too!"
+                        )
                     # check optional keys
                     optional_keys = [
                         "date_err",
@@ -1142,6 +1231,8 @@ class Otter(Database):
                         "observer",
                         "reducer",
                         "pipeline",
+                        "date_min",
+                        "date_max",
                     ]
                     for k in optional_keys:
                         if k in p and not np.all(pd.isna(p[k])):
@@ -1154,7 +1245,13 @@ class Otter(Database):
                     # handle more detailed uncertainty information
                     raw_err_detail = {}
-                    for key in ["statistical_err", "systematic_err", "iss_err"]:
+                    for key in [
+                        "statistical_err",
+                        "systematic_err",
+                        "iss_err",
+                        "upper_err",
+                        "lower_err",
+                    ]:
                         if key in p and not np.all(pd.isna(p[key])):
                             k = key.split("_")[0]
@@ -1183,6 +1280,126 @@ class Otter(Database):
                             json_phot[c] = p[c].tolist()
                             json_phot[bool_v_key] = [v != "null" for v in json_phot[c]]
+                    # deal with the xray model
+                    if "xray_model_name" not in p and obstype == "xray":
+                        raise ValueError(
+                            "You must provide the xray model for xray data!"
+                        )
+                    if obstype == "xray" and "xray_model_name" in p:
+                        # get various sets of keywords
+                        model_val_cols = list(
+                            p.columns[p.columns.str.contains("xray_model_param_value")]
+                        )
+                        model_up_err_cols = list(
+                            p.columns[p.columns.str.contains("xray_model_param_up_err")]
+                        )
+                        model_lo_err_cols = list(
+                            p.columns[p.columns.str.contains("xray_model_param_lo_err")]
+                        )
+                        model_val_units_cols = list(
+                            p.columns[p.columns.str.contains("xray_model_param_unit")]
+                        )
+                        model_uplim_cols = list(
+                            p.columns[
+                                p.columns.str.contains("xray_model_param_upperlimit")
+                            ]
+                        )
+                        param_names = [v.split("::")[-1] for v in model_val_cols]
+                        xray_model_info = p[
+                            model_val_cols
+                            + model_up_err_cols
+                            + model_lo_err_cols
+                            + model_val_units_cols
+                            + model_uplim_cols
+                            + [
+                                "xray_model_name",
+                                "xray_model_reference",
+                                "filter_min",
+                                "filter_max",
+                                "filter_eff_units",
+                            ]
+                        ]
+                        if len(model_uplim_cols) == 0:
+                            # assume they are all false
+                            for param_name in param_names:
+                                colname = f"xray_model_param_upperlimit::{param_name}"
+                                xray_model_info[colname] = False
+                                model_uplim_cols.append(colname)
+                        if not all(
+                            len(model_val_cols) == len(p)
+                            for p in [
+                                model_up_err_cols,
+                                model_lo_err_cols,
+                                model_val_units_cols,
+                                model_uplim_cols,
+                            ]
+                        ):
+                            raise ValueError(
+                                "Missing a column for one of the X-ray parameters!"
+                            )
+                        xray_models = []
+                        for _, row in xray_model_info.iterrows():
+                            energy1 = (
+                                (row["filter_min"] * u.Unit(row["filter_eff_units"]))
+                                .to("keV", equivalencies=u.spectral())
+                                .value
+                            )
+                            energy2 = (
+                                (row["filter_max"] * u.Unit(row["filter_eff_units"]))
+                                .to("keV", equivalencies=u.spectral())
+                                .value
+                            )
+                            if energy1 > energy2:
+                                min_energy = energy2
+                                max_energy = energy1
+                            else:
+                                min_energy = energy1
+                                max_energy = energy2
+                            param_names_not_na = []
+                            for n in param_names:
+                                if not pd.isna(row[f"xray_model_param_value::{n}"]):
+                                    param_names_not_na.append(n)
+                            xray_models.append(
+                                {
+                                    "model_name": row.xray_model_name,
+                                    "param_names": [n for n in param_names_not_na],
+                                    "param_values": [
+                                        row[f"xray_model_param_value::{n}"]
+                                        for n in param_names_not_na
+                                    ],
+                                    "param_value_upper_err": [
+                                        row[f"xray_model_param_up_err::{n}"]
+                                        for n in param_names_not_na
+                                    ],
+                                    "param_value_lower_err": [
+                                        row[f"xray_model_param_lo_err::{n}"]
+                                        for n in param_names_not_na
+                                    ],
+                                    "param_upperlimit": [
+                                        row[f"xray_model_param_upperlimit::{n}"]
+                                        for n in param_names_not_na
+                                    ],
+                                    "param_units": [
+                                        row[f"xray_model_param_unit::{n}"]
+                                        for n in param_names_not_na
+                                    ],
+                                    "model_reference": row["xray_model_reference"],
+                                    "min_energy": min_energy,
+                                    "max_energy": max_energy,
+                                    "energy_units": "keV",
+                                }
+                            )
+                        json_phot["xray_model"] = xray_models
                     json["photometry"].append(json_phot)
                 tde["filter_uq_key"] = pd.Series(
@@ -1191,14 +1408,20 @@ class Otter(Database):
                 # filter alias
                 # radio filters first
-                filter_keys1 = ["filter_uq_key", "band_eff_wave", "band_eff_wave_unit"]
+                filter_keys1 = [
+                    "filter_uq_key",
+                    "band_eff_wave",
+                    "band_eff_wave_unit",
+                    "filter_eff_units",
+                ]
                 if "filter_min" in tde:
                     filter_keys1.append("filter_min")
                 if "filter_max" in tde:
                     filter_keys1.append("filter_max")
-                filter_map = (
-                    tde[filter_keys1].drop_duplicates().set_index("filter_uq_key")
+                filt_info = tde[filter_keys1]
+                filter_map = filt_info.drop_duplicates().set_index(
+                    "filter_uq_key"
                 )  # .to_dict(orient='index')
                 try:
                     filter_map_radio = filter_map.to_dict(orient="index")
@@ -1229,22 +1452,24 @@ class Otter(Database):
                         wave_units=val["band_eff_wave_unit"],
                     )
-                    if "filter_min" in val:
+                    if "filter_min" in val and not pd.isna(val["filter_min"]):
                         filter_alias_dict["wave_min"] = (
-                            val["filter_min"] * u.Unit(phot.filter_eff_units)
-                        ).to(
-                            u.Unit(
-                                val["band_eff_wave_unit"], equivalencies=u.spectral()
+                            (val["filter_min"] * u.Unit(val["filter_eff_units"]))
+                            .to(
+                                u.Unit(val["band_eff_wave_unit"]),
+                                equivalencies=u.spectral(),
                             )
+                            .value
                         )
-                    if "filter_max" in val:
+                    if "filter_max" in val and not pd.isna(val["filter_max"]):
                         filter_alias_dict["wave_max"] = (
-                            val["filter_max"] * u.Unit(phot.filter_eff_units)
-                        ).to(
-                            u.Unit(
-                                val["band_eff_wave_unit"], equivalencies=u.spectral()
+                            (val["filter_max"] * u.Unit(val["filter_eff_units"]))
+                            .to(
+                                u.Unit(val["band_eff_wave_unit"]),
+                                equivalencies=u.spectral(),
                             )
+                            .value
                         )
                     json["filter_alias"].append(filter_alias_dict)

otter/io/transient.py CHANGED Viewed

@@ -8,6 +8,7 @@ import warnings
 from copy import deepcopy
 import re
 from collections.abc import MutableMapping
+from typing import Callable
 from typing_extensions import Self
 import logging
@@ -70,7 +71,7 @@ class Transient(MutableMapping):
         """
         if isinstance(keys, (list, tuple)):
-            return Transient({key: (self[key] if key in self else []) for key in keys})
+            return Transient({key: self[key] for key in keys if key in self})
         elif isinstance(keys, str) and "/" in keys:  # this is for a path
             s = "']['".join(keys.split("/"))
             s = "['" + s
@@ -510,6 +511,7 @@ class Transient(MutableMapping):
         freq_unit: u.Unit = "GHz",
         wave_unit: u.Unit = "nm",
         obs_type: str = None,
+        deduplicate: Callable | None = None,
     ) -> pd.DataFrame:
         """
         Ensure the photometry associated with this transient is all in the same
@@ -529,10 +531,20 @@ class Transient(MutableMapping):
             obs_type (str): "radio", "xray", or "uvoir". If provided, it only returns
                             data taken within that range of wavelengths/frequencies.
                             Default is None which will return all of the data.
+            deduplicate (Callable|None): A function to be used to remove duplicate
+                                         reductions of the same data that produces
+                                         different flux values. The default is the
+                                         otter.deduplicate_photometry  method,
+                                         but you can pass
+                                         any callable that takes the output pandas
+                                         dataframe as input. Set this to False if you
+                                         don't want deduplication to occur.
         Returns:
             A pandas DataFrame of the cleaned up photometry in the requested units
         """
+        if deduplicate is None:
+            deduplicate = self.deduplicate_photometry
         warn_filt = _DuplicateFilter()
         logger.addFilter(warn_filt)
@@ -916,9 +928,203 @@ class Transient(MutableMapping):
         outdata["upperlimit"] = outdata.apply(is_upperlimit, axis=1)
+        # perform some more complex deduplication of the dataset
+        if deduplicate:
+            outdata = deduplicate(outdata)
+        # throw a warning if the output dataframe has UV/Optical/IR or Radio data
+        # where we don't know if the dataset has been host corrected or not
+        if ("corr_host" not in outdata) or (
+            len(outdata[pd.isna(outdata.corr_host) * (outdata.obs_type != "xray")]) >= 0
+        ):
+            logger.warning(
+                f"{self.default_name} has at least one photometry point where it is "
+                + "unclear if a host subtraction was performed. This can be especially "
+                + "detrimental for UV data. Please consider filtering out UV/Optical/IR"
+                + " or radio rows where the corr_host column is null/None/NaN."
+            )
         logger.removeFilter(warn_filt)
         return outdata
+    @classmethod
+    def deduplicate_photometry(cls, phot: pd.DataFrame, date_tol: int | float = 1):
+        """
+        This deduplicates a pandas dataframe of photometry that could potentially
+        have rows/datasets that are the result of different reductions of the same
+        data. This is especially relevant for X-ray and UV observations where different
+        reductions can produce different flux values from the same observation.
+        The algorithm used here first finds duplicates by normalizing the telescope
+        names, then  grouping the dataframe by transient name, norm telescope name,
+        filter_key, and the obs_type. It then assumes that data from the same
+        reference will not produce duplicated data. Finally, it finds the overlapping
+        regions within date +/- date_tol (or between date_min and date_max for binned
+        data), and uses any data within that region as duplicated. From there, it
+        first tries to choose the reduction that is host subtracted (if only one is
+        host subtracted), then if neither or more than one of the datasets are host
+        subtracted then it just takes the most recent reduction.
+        Args:
+            phot (pd.DataFrame): A pandas dataframe of the photometry with keys defined
+                                 by the OTTER schema
+            date_tol (int|float): The default tolerance (or "uncertainty") to use on the
+                                  dates in the "date" column of phot. In days. Defaults
+                                  to 1 day.
+        """
+        # we need to reset the index to keep track of things appropriately
+        phot = phot.reset_index(drop=True)
+        if "telescope" not in phot:
+            phot["telescope"] = np.nan
+        # we first have to standardize some columns given some basic assumptions
+        phot["_ref_str"] = phot.reference.astype(str)
+        # normalize the telescope name so we can group by it
+        phot["_norm_tele_name"] = phot.telescope.apply(cls._normalize_tele_name)
+        # now find the duplicated data
+        dups = []
+        phot_grpby = phot.groupby(
+            ["_norm_tele_name", "filter_key", "obs_type"], dropna=False
+        )
+        for (tele, filter_key, obs_type), grp in phot_grpby:
+            # by definition, there can only be dups if the name, telescope, and filter
+            # are the same
+            # if there is only one reference in this group of data, there's no way
+            # there are duplicate reductions of the same dataset
+            if len(grp._ref_str.unique()) <= 1:
+                continue
+            # the next trick is that the dates don't need to be the same, but need to
+            # fall inside the same range
+            grp["_mean_dates"] = grp.apply(cls._convert_dates, axis=1)
+            if "date_min" in grp and not np.all(pd.isna(grp.date_min)):
+                grp["min_dates"] = grp.apply(
+                    lambda row: cls._convert_dates(row, date_key="date_min"), axis=1
+                ).astype(float)
+                grp["max_dates"] = grp.apply(
+                    lambda row: cls._convert_dates(row, date_key="date_max"), axis=1
+                ).astype(float)
+                # in case any of the min_date and max_date in the grp are nan
+                grp.fillna(
+                    {
+                        "min_dates": grp._mean_dates - date_tol,
+                        "max_dates": grp._mean_dates + date_tol,
+                    },
+                    inplace=True,
+                )
+            elif "date_err" in grp and not np.any(pd.isna(grp.date_err)):
+                grp["min_dates"] = (grp._mean_dates - grp.date_err).astype(float)
+                grp["max_dates"] = (grp._mean_dates + grp.date_err).astype(float)
+            else:
+                # then assume some uncertainty on the date
+                grp["min_dates"] = (grp._mean_dates - date_tol).astype(float)
+                grp["max_dates"] = (grp._mean_dates + date_tol).astype(float)
+            ref_ranges = [
+                (subgrp.min_dates.min(), subgrp.max_dates.max())
+                for _, subgrp in grp.groupby("_ref_str")
+            ]
+            overlaps = cls._find_overlapping_regions(ref_ranges)
+            if len(overlaps) == 0:
+                continue  # then there are no dups
+            for min_overlap, max_overlap in overlaps:
+                dup_data = grp[
+                    (grp.min_dates >= min_overlap) * (grp.max_dates <= max_overlap)
+                ]
+                if len(dup_data) == 0:
+                    continue  # no data falls in this range!
+                dups.append(dup_data)
+        # now that we've found the duplicated datasets, we can iterate through them
+        # and choose the "default"
+        phot_res = deepcopy(phot)
+        undupd = []
+        for dup in dups:
+            try:
+                phot_res = phot_res.drop(dup.index)  # we'll append back in the non dup
+            except KeyError:
+                continue  # we already deleted these ones
+            # first, check if only one of the dup reductions host subtracted
+            if "corr_host" in dup:
+                dup_host_corr = dup[dup.corr_host.astype(bool)]
+                host_corr_refs = dup_host_corr.human_readable_refs.unique()
+                if len(host_corr_refs) == 1:
+                    # then one of the reductions is host corrected and the other isn't!
+                    undupd.append(dup[dup.human_readable_refs == host_corr_refs[0]])
+                    continue
+            bibcodes_sorted_by_year = sorted(dup._ref_str.unique(), key=cls._find_year)
+            dataset_to_use = dup[dup._ref_str == bibcodes_sorted_by_year[0]]
+            undupd.append(dataset_to_use)
+        # then return the full photometry dataset but with the dups removed!
+        return pd.concat([phot_res] + undupd).reset_index()
+    @staticmethod
+    def _normalize_tele_name(tele_name):
+        if pd.isna(tele_name):
+            return tele_name
+        common_delims = ["-", "/", " ", "."]
+        for delim in common_delims:
+            tele_name = tele_name.replace(delim, ":*:")
+        # this assumes that the telescope name will almost always be first,
+        # before other delimiters
+        return tele_name.split(":*:")[0].lower()
+    @staticmethod
+    def _convert_dates(row, date_key="date"):
+        """Make sure the dates are in MJD"""
+        if pd.isna(row[date_key]):
+            return row[date_key]
+        return Time(row[date_key], format=row.date_format.lower()).mjd
+    @staticmethod
+    def _find_overlapping_regions(intervals):
+        """Find the overlaps in a list of tuples of mins and maxs. This is relatively
+        inefficient but the len(intervals) should be < 10 so it should be fine"""
+        overlap_ranges = []
+        for ii, (start_ii, end_ii) in enumerate(intervals):
+            for jj, (start_jj, end_jj) in enumerate(intervals):
+                if ii <= jj:
+                    continue
+                if start_ii > start_jj:
+                    start = start_ii
+                else:
+                    start = start_jj
+                if end_ii > end_jj:
+                    end = end_jj
+                else:
+                    end = end_ii
+                if start < end:
+                    # then there is an overlap!
+                    overlap_ranges.append((start, end))
+        return overlap_ranges
+    @staticmethod
+    def _find_year(s):
+        match = re.search(r"\d{4}", s)
+        return int(match.group()) if match else float("inf")
     def _merge_names(t1, t2, out):  # noqa: N805
         """
         Private method to merge the name data in t1 and t2 and put it in out

otter/plotter/otter_plotter.py CHANGED Viewed

@@ -23,7 +23,7 @@ class OtterPlotter:
     def __init__(self, backend):
         if backend == "matplotlib.pyplot":
             self.backend = backend
-        elif backend == "pyplot.graph_objects":
+        elif backend == "plotly.graph_objects":
             self.backend = backend
         elif "plotly" in backend and "graph_objects" not in backend:
             self.backend = "plotly.graph_objects"

otter/plotter/plotter.py CHANGED Viewed

@@ -15,8 +15,7 @@ from ..io.otter import Transient, Otter
 def query_quick_view(
-    db: Otter = None,
-    otter_path: str = None,
+    db: Otter,
     ptype: str = "both",
     sed_dim: str = "freq",
     dt_over_t: float = 0,
@@ -31,7 +30,6 @@ def query_quick_view(
     Args:
         db (otter.Otter) : The otter object to query
-        otter_path (str) : The path to construct an otter path from
         ptype (str) : The plot type to generate. Valid options are
                       - both -> Plot both light curve and sed (default)
                       - sed -> Plot just the sed
@@ -55,12 +53,6 @@ def query_quick_view(
         A list of matplotlib pyplot Figure objects that we plotted
     """
-    if db is None:
-        if otter_path is not None:
-            db = Otter(otter_path)
-        else:
-            raise ValueError("Either the db or otter_path arguments must be provided!")
     res = db.query(**kwargs)
     if len(res) > result_length_tol:

otter/schema.py CHANGED Viewed

@@ -2,7 +2,7 @@
 Pydantic Schema Model of our JSON schema
 """
-from pydantic import BaseModel, model_validator, field_validator, ValidationError
+from pydantic import BaseModel, model_validator, field_validator
 from typing import Optional, Union, List
@@ -86,24 +86,24 @@ class CoordinateSchema(BaseModel):
         if uses_ra_dec:
             if self.ra_units is None:
-                raise ValidationError("ra_units must be provided for RA!")
+                raise ValueError("ra_units must be provided for RA!")
             if self.dec_units is None:
-                raise ValidationError("dec_units must be provided for Dec!")
+                raise ValueError("dec_units must be provided for Dec!")
         elif uses_galactic:
             if self.l_units is None:
-                raise ValidationError("l_units must be provided for RA!")
+                raise ValueError("l_units must be provided for RA!")
             if self.b_units is None:
-                raise ValidationError("b_units must be provided for Dec!")
+                raise ValueError("b_units must be provided for Dec!")
         elif uses_lon_lat:
             if self.lon_units is None:
-                raise ValidationError("lon_units must be provided for RA!")
+                raise ValueError("lon_units must be provided for RA!")
             if self.lat_units is None:
-                raise ValidationError("lat_units must be provided for Dec!")
+                raise ValueError("lat_units must be provided for Dec!")
         else:
-            ValidationError("Must have RA/Dec, l/b, and/or lon/lat!")
+            raise ValueError("Must have RA/Dec, l/b, and/or lon/lat!")
         return self
@@ -122,7 +122,7 @@ class DistanceSchema(BaseModel):
     @model_validator(mode="after")
     def _has_units(self):
         if self.distance_type != "redshift" and self.unit is None:
-            raise ValidationError("Need units if the distance_type is not redshift!")
+            raise ValueError("Need units if the distance_type is not redshift!")
         return self
@@ -171,6 +171,8 @@ class PhotometrySchema(BaseModel):
     date: Union[str, float, List[Union[str, float]]]
     date_format: Union[str, List[str]]
     date_err: Optional[Union[str, float, List[Union[str, float]]]] = None
+    date_min: Optional[Union[str, float, List[Union[str, float]]]] = None
+    date_max: Optional[Union[str, float, List[Union[str, float]]]] = None
     ignore: Optional[Union[bool, List[bool]]] = None
     upperlimit: Optional[Union[bool, List[bool]]] = None
     sigma: Optional[Union[str, float, List[Union[str, float]]]] = None
@@ -213,6 +215,18 @@ class PhotometrySchema(BaseModel):
             return [v]
         return v
+    @model_validator(mode="after")
+    def _ensure_min_and_max_date(self):
+        """
+        This will make sure that if date_min is provided so is date_max
+        """
+        if (self.date_min is not None and self.date_max is None) or (
+            self.date_min is None and self.date_max is not None
+        ):
+            raise ValueError(
+                "If you provide date_min or date_max you must provide the other!"
+            )
     @model_validator(mode="after")
     def _ensure_xray_model(self):
         """
@@ -221,7 +235,7 @@ class PhotometrySchema(BaseModel):
         It will be commented out until we get the data setup correctly
         """
         # if self.obs_type == "xray" and self.xray_model is None:
-        #     raise ValidationError(
+        #     raise ValueError(
         #         "Need an xray_model for this xray data!"
         #     )
@@ -262,24 +276,20 @@ class HostSchema(BaseModel):
         # if it has the RA/Dec keys, make sure it also has ra_unit, dec_unit keys
         if has_coordinate:
             if self.host_ra_units is None:
-                raise ValidationError("Need RA unit if coordinates are provided!")
+                raise ValueError("Need RA unit if coordinates are provided!")
             if self.host_dec_units is None:
-                raise ValidationError("Need Dec unit if coordinates are provided!")
+                raise ValueError("Need Dec unit if coordinates are provided!")
         # we need either the coordinate or name to identify this object
         # Both are okay too (more info is always better)
         if not has_coordinate and not has_name:
-            raise ValidationError(
-                "Need to provide a Host name and/or host coordinates!"
-            )
+            raise ValueError("Need to provide a Host name and/or host coordinates!")
         # Make sure that if one of RA/Dec is given then both are given
         if (self.host_ra is None and self.host_dec is not None) or (
             self.host_ra is not None and self.host_dec is None
         ):
-            raise ValidationError(
-                "Please provide RA AND Dec, not just one or the other!"
-            )
+            raise ValueError("Please provide RA AND Dec, not just one or the other!")
         return self
@@ -299,4 +309,4 @@ class OtterSchema(BaseModel):
     @model_validator(mode="after")
     def _verify_filter_alias(self):
         if self.photometry is not None and self.filter_alias is None:
-            raise ValidationError("filter_alias is needed if photometry is given!")
+            raise ValueError("filter_alias is needed if photometry is given!")

otter/util.py CHANGED Viewed

@@ -121,6 +121,10 @@ def _bibcode_to_hrn_with_query(bibcode):
         if val in bibcodes:
             bibcodes.pop(bibcodes.index(val))
+    if len(bibcodes) == 0:
+        # then all of the bibcodes were "private"
+        return bibcodes, bibcodes
     query = f"bibcode:{bibcodes[0]}"
     if len(bibcodes) > 1:
         for b in bibcodes[1:]:
@@ -469,6 +473,7 @@ Mapping for the effective frequencies in THz for all the filters used in OTTER
 XRAY_AREAS = {
     # https://swift.gsfc.nasa.gov/about_swift/Sci_Fact_Sheet.pdf
     "swift": 135 * u.cm**2,
+    "swift-xrt": 135 * u.cm**2,
     # https://heasarc.gsfc.nasa.gov/docs/rosat/ruh/handbook/node39.html#SECTION00634000000000000000
     "rosat": 400 * u.cm**2,
     # https://www.cosmos.esa.int/web/xmm-newton/technical-details-mirrors
@@ -481,6 +486,15 @@ XRAY_AREAS = {
     "chandra": 600 * u.cm**2,
     # https://www.cosmos.esa.int/documents/332006/954765/Brunner_TopicK.pdf
     "erosita": 1500 * u.cm**2,
+    # https://en.wikipedia.org/wiki/NuSTAR
+    "nustar": 847 * u.cm**2,
+    # https://iss.jaxa.jp/en/kiboexp/ef/maxi/
+    "maxi": 200 * u.cm**2,
+    # https://iopscience.iop.org/article/10.3847/1538-4357/abd569
+    "konus-wind": 120 * u.cm**2,
+    # https://www.cosmos.esa.int/web/einstein-probe/mission
+    "ep": 600 * u.cm**2,
+    "ep-fxt": 600 * u.cm**2,
 }
 """
 X-Ray telescope areas that are used for converting from counts to other units.

astro_otter-0.3.3.dist-info/RECORD DELETED Viewed

@@ -1,18 +0,0 @@
-astro_otter-0.3.3.dist-info/licenses/LICENSE,sha256=s9IPE8A3CAMEaZpDhj4eaorpmfLYGB0mIGphq301PUY,1067
-otter/__init__.py,sha256=pvX-TN7nLVmvKpkDi89Zxe-jMfHNiVMD3zsd_bPEK9Y,535
-otter/_version.py,sha256=IP9YVdiq2R5ldxON8iRmWBlg638Aq0aZCl5syP2mi78,76
-otter/exceptions.py,sha256=3lQF4AXVTfs9VRsVePQoIrXnramsPZbUL5crvf1s9Ng,1702
-otter/schema.py,sha256=lOn-9FX4EuVYkRY97j64yGQM9fsTdKUCeYHqkJ8-kN0,10790
-otter/util.py,sha256=Ve_3iHmTcdcS_X2zzBg07WQTlWPbx4CBAZf4Gye65Og,22831
-otter/io/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-otter/io/data_finder.py,sha256=v3jZCOhvysHUQG1FOwHdeJ-psTT-MsdO_GZalBpMBGc,38218
-otter/io/host.py,sha256=xv_SznZuvMoMVsZLqlcmlOyaqKCMZqlTQ_gkN4VBSTw,7139
-otter/io/otter.py,sha256=lJStat9oefnQCT4rHrI39Lq0Of-uWl2Dsq7A8Gsy4SY,49892
-otter/io/transient.py,sha256=ANYnqWN1bJuefkTQqyPdt95V33DaQxUOOG_PPU86l3A,48832
-otter/plotter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-otter/plotter/otter_plotter.py,sha256=OQhuLgnMSzgtAjJF8SYBuQOyYcu7Pr0uia5P0G_7z5Q,2144
-otter/plotter/plotter.py,sha256=z09NwQVJS2tuwH3sv95DZv8xogjvf-7Gvj6iWCEx-gQ,9635
-astro_otter-0.3.3.dist-info/METADATA,sha256=EJGa_8TqBnyNJsxHDLh9h48UcwxgJyVcra4aAW7iATQ,7046
-astro_otter-0.3.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-astro_otter-0.3.3.dist-info/top_level.txt,sha256=Wth72sCwBRUk3KZGknSKvLQDMFuJk6qiaAavMDOdG5k,6
-astro_otter-0.3.3.dist-info/RECORD,,

{astro_otter-0.3.3.dist-info → astro_otter-0.3.5.dist-info}/WHEEL RENAMED Viewed

File without changes

{astro_otter-0.3.3.dist-info → astro_otter-0.3.5.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{astro_otter-0.3.3.dist-info → astro_otter-0.3.5.dist-info}/top_level.txt RENAMED Viewed

File without changes

astro-otter 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl

Potentially problematic release.

astro-otter 0.3.3py3-none-any.whl → 0.3.5py3-none-any.whl