PyPI - astro-otter - Versions diffs - 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

astro-otter 0.1.0py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of astro-otter might be problematic. Click here for more details.

Files changed (16) hide show

{astro_otter-0.1.0.dist-info → astro_otter-0.2.0.dist-info}/METADATA +52 -15
astro_otter-0.2.0.dist-info/RECORD +18 -0
{astro_otter-0.1.0.dist-info → astro_otter-0.2.0.dist-info}/WHEEL +1 -1
otter/__init__.py +4 -1
otter/_version.py +1 -1
otter/io/data_finder.py +306 -13
otter/io/host.py +80 -0
otter/io/otter.py +757 -13
otter/io/transient.py +216 -87
otter/plotter/otter_plotter.py +6 -4
otter/plotter/plotter.py +180 -2
otter/schema.py +296 -0
otter/util.py +258 -59
astro_otter-0.1.0.dist-info/RECORD +0 -17
{astro_otter-0.1.0.dist-info → astro_otter-0.2.0.dist-info/licenses}/LICENSE +0 -0
{astro_otter-0.1.0.dist-info → astro_otter-0.2.0.dist-info}/top_level.txt +0 -0

otter/io/transient.py CHANGED Viewed

@@ -209,6 +209,36 @@ class Transient(MutableMapping):
             "host",
         }
+        merge_subkeys_map = {
+            "name": None,
+            "date_reference": ["value", "date_format", "date_type"],
+            "coordinate": None,  # may need to update this if we run into problems
+            "distance": ["value", "distance_type", "unit"],
+            "filter_alias": None,
+            "schema_version": None,
+            "photometry": None,
+            "classification": None,
+            "host": [
+                "host_ra",
+                "host_dec",
+                "host_ra_units",
+                "host_dec_units",
+                "host_name",
+            ],
+        }
+        groupby_key_for_default_map = {
+            "name": None,
+            "date_reference": "date_type",
+            "coordinate": "coordinate_type",
+            "distance": "distance_type",
+            "filter_alias": None,
+            "schema_version": None,
+            "photometry": None,
+            "classification": None,
+            "host": None,
+        }
         # create a blank dictionary since we don't want to overwrite this object
         out = {}
@@ -244,14 +274,19 @@ class Transient(MutableMapping):
             # There are some special keys that we are expecting
             if key in allowed_keywords:
-                Transient._merge_arbitrary(key, self, other, out)
+                Transient._merge_arbitrary(
+                    key,
+                    self,
+                    other,
+                    out,
+                    merge_subkeys=merge_subkeys_map[key],
+                    groupby_key=groupby_key_for_default_map[key],
+                )
             else:
                 # this is an unexpected key!
                 if strict_merge:
                     # since this is a strict merge we don't want unexpected data!
-                    raise TransientMergeError(
-                        f"{key} was not expected! Only keeping the old information!"
-                    )
+                    raise TransientMergeError(f"{key} was not expected! Can not merge!")
                 else:
                     # Throw a warning and only keep the old stuff
                     warnings.warn(
@@ -332,7 +367,14 @@ class Transient(MutableMapping):
             astropy.time.Time of the default discovery date
         """
         key = "date_reference"
-        date = self._get_default(key, filt='df["date_type"] == "discovery"')
+        try:
+            date = self._get_default(key, filt='df["date_type"] == "discovery"')
+        except KeyError:
+            return None
+        if date is None:
+            return date
         if "date_format" in date:
             f = date["date_format"]
         else:
@@ -370,11 +412,13 @@ class Transient(MutableMapping):
             return default
         return default.object_class, default.confidence, default.reference
-    def get_host(self, max_hosts=3, **kwargs) -> list[Host]:
+    def get_host(self, max_hosts=3, search=False, **kwargs) -> list[Host]:
         """
         Gets the default host information of this Transient. This returns an otter.Host
-        object. If no host is known in OTTER, it uses astro-ghost to find the best
-        match.
+        object. If search=True, it will also check the BLAST host association database
+        for the best match and return it as well. Note that if search is True then
+        this has the potential to return max_hosts + 1, if BLAST also returns a result.
+        The BLAST result will always be the last value in the returned list.
         Args:
             max_hosts [int] : The maximum number of hosts to return
@@ -385,38 +429,25 @@ class Transient(MutableMapping):
             useful methods for querying public catalogs for data of the host.
         """
         # first try to get the host information from our local database
+        host = []
         if "host" in self:
-            host = [
-                Host(transient_name=self.default_name, **dict(h)) for h in self["host"]
-            ]
+            max_hosts = min([max_hosts, len(self["host"])])
+            for h in self["host"][:max_hosts]:
+                host.append(Host(transient_name=self.default_name, **dict(h)))
-        # then try astro-ghost
-        else:
+        # then try BLAST
+        if search:
             logger.warn(
-                "No host known, trying to find it with astro-ghost. \
-                See https://uiucsnastro-ghost.readthedocs.io/en/latest/index.html"
-            )
-            # this import has to be here otherwise the code breaks
-            from astro_ghost.ghostHelperFunctions import getTransientHosts, getGHOST
-            getGHOST(real=False, verbose=1)
-            res = getTransientHosts(
-                [self.default_name], [self.get_skycoord()], verbose=False
+                "Trying to find a host with BLAST/astro-ghost. Note\
+                 that this won't work for older targets! See https://blast.scimma.org"
             )
-            host = [
-                Host(
-                    host_ra=row["raStack"],
-                    host_dec=row["decStack"],
-                    host_ra_units="deg",
-                    host_dec_units="deg",
-                    host_name=row["objName"],
-                    transient_name=self.default_name,
-                    reference=["astro-ghost"],
-                )
-                for i, row in res.iterrows()
-            ]
+            # default_name should always be the TNS name if we have one
+            print(self.default_name)
+            blast_host = Host.query_blast(self.default_name)
+            print(blast_host)
+            if blast_host is not None:
+                host.append(blast_host)
         return host
@@ -433,6 +464,9 @@ class Transient(MutableMapping):
             raise KeyError(f"This transient does not have {key} associated with it!")
         df = pd.DataFrame(self[key])
+        if len(df) == 0:
+            raise KeyError(f"This transient does not have {key} associated with it!")
         if filt is not None:
             df = df[eval(filt)]  # apply the filters
@@ -446,6 +480,7 @@ class Transient(MutableMapping):
         if len(df_filtered) == 0:
             return None
         return df_filtered.iloc[0]
     def _reformat_coordinate(self, item):
@@ -515,6 +550,9 @@ class Transient(MutableMapping):
             raise IOError("Please choose either value or raw!")
         # turn the photometry key into a pandas dataframe
+        if "photometry" not in self:
+            raise FailedQueryError("No photometry for this object!")
         dfs = []
         for item in self["photometry"]:
             max_len = 0
@@ -531,9 +569,29 @@ class Transient(MutableMapping):
             df = pd.DataFrame(item)
             dfs.append(df)
+        if len(dfs) == 0:
+            raise FailedQueryError("No photometry for this object!")
         c = pd.concat(dfs)
+        # extract the filter information and substitute in any missing columns
+        # because of how we handle this later, we just need to make sure the effective
+        # wavelengths are never nan
+        def fill_wave(row):
+            if "wave_eff" not in row or (
+                pd.isna(row.wave_eff) and not pd.isna(row.freq_eff)
+            ):
+                freq_eff = row.freq_eff * u.Unit(row.freq_units)
+                wave_eff = freq_eff.to(u.Unit(wave_unit), equivalencies=u.spectral())
+                return wave_eff.value, wave_unit
+            elif not pd.isna(row.wave_eff):
+                return row.wave_eff, row.wave_units
+            else:
+                raise ValueError("Missing frequency or wavelength information!")
         filters = pd.DataFrame(self["filter_alias"])
+        res = filters.apply(fill_wave, axis=1)
+        filters["wave_eff"], filters["wave_units"] = zip(*res)
+        # merge the photometry with the filter information
         df = c.merge(filters, on="filter_key")
         # make sure 'by' is in df
@@ -546,6 +604,14 @@ class Transient(MutableMapping):
         # skip rows where 'by' is nan
         df = df[df[by].notna()]
+        # remove rows where the flux is less than zero since this is nonphysical
+        # See Mummery et al. (2023) Section 5.2 for why we need to do this when using
+        # ZTF data:
+        # "Because the origin of the negative late-time flux is currently un-
+        # known (and under investigation), we have not attempted to correct
+        # the TDE lightcurves for this systematic effect. "
+        df = df[df[by].astype(float) > 0]
         # drop irrelevant obs_types before continuing
         if obs_type is not None:
             valid_obs_types = {"radio", "uvoir", "xray"}
@@ -568,6 +634,7 @@ class Transient(MutableMapping):
         # Figure out what columns are good to groupby in the photometry
         outdata = []
         if "telescope" in df:
             tele = True
             to_grp_by = ["obs_type", by + "_units", "telescope"]
@@ -619,24 +686,22 @@ class Transient(MutableMapping):
                 indata_err = np.array(data[by + "_err"].astype(float))
             else:
                 indata_err = np.zeros(len(data))
+            # convert to an astropy quantity
             q = indata * u.Unit(astropy_units)
             q_err = indata_err * u.Unit(
                 astropy_units
             )  # assume error and values have the same unit
             # get and save the effective wavelength
-            if "freq_eff" in data and not np.isnan(data["freq_eff"].iloc[0]):
-                zz = zip(data["freq_eff"], data["freq_units"])
-                freq_eff = u.Quantity([vv * u.Unit(uu) for vv, uu in zz], freq_unit)
-                wave_eff = freq_eff.to(wave_unit, equivalencies=u.spectral())
+            # because of cleaning we did to the filter dataframe above wave_eff
+            # should NEVER be nan!
+            if np.any(pd.isna(data["wave_eff"])):
+                raise ValueError("Flushing out the effective wavelength array failed!")
-            elif "wave_eff" in data and not np.isnan(data["wave_eff"].iloc[0]):
-                zz = zip(data["wave_eff"], data["wave_units"])
-                wave_eff = u.Quantity([vv * u.Unit(uu) for vv, uu in zz], wave_unit)
-                freq_eff = wave_eff.to(freq_unit, equivalencies=u.spectral())
-            else:
-                raise ValueError("No known frequency or wavelength, please fix!")
+            zz = zip(data["wave_eff"], data["wave_units"])
+            wave_eff = u.Quantity([vv * u.Unit(uu) for vv, uu in zz], wave_unit)
+            freq_eff = wave_eff.to(freq_unit, equivalencies=u.spectral())
             data["converted_wave"] = wave_eff.value
             data["converted_wave_unit"] = wave_unit
@@ -656,7 +721,7 @@ class Transient(MutableMapping):
                         )
                 else:
                     raise OtterLimitationError(
-                        "Can not convert x-ray data without a " + "telescope"
+                        "Can not convert x-ray data without a telescope"
                     )
                 # we also need to make this wave_min and wave_max
@@ -685,24 +750,32 @@ class Transient(MutableMapping):
                         u.Unit(flux_unit),
                         vegaspec=SourceSpectrum.from_vega(),
                         area=area,
-                    )
-                    f_err = convert_flux(
-                        wave,
-                        xray_point_err,
-                        u.Unit(flux_unit),
-                        vegaspec=SourceSpectrum.from_vega(),
-                        area=area,
+                    ).value
+                    # approximate the uncertainty as dX = dY/Y * X
+                    f_err = np.multiply(
+                        f_val, np.divide(xray_point_err.value, xray_point.value)
                     )
                     # then we take the average of the minimum and maximum values
                     # computed by syncphot
-                    flux.append(np.mean(f_val).value)
-                    flux_err.append(np.mean(f_err).value)
+                    flux.append(np.mean(f_val))
+                    flux_err.append(np.mean(f_err))
             else:
                 # this will be faster and cover most cases
-                flux = convert_flux(wave_eff, q, u.Unit(flux_unit))
-                flux_err = convert_flux(wave_eff, q_err, u.Unit(flux_unit))
+                flux = convert_flux(wave_eff, q, u.Unit(flux_unit)).value
+                # since the error propagation is different between logarithmic units
+                # and linear units, unfortunately
+                if isinstance(u.Unit(flux_unit), u.LogUnit):
+                    # approximate the uncertainty as dX = dY/Y * |ln(10)/2.5|
+                    prefactor = np.abs(np.log(10) / 2.5)  # this is basically 1
+                else:
+                    # approximate the uncertainty as dX = dY/Y * X
+                    prefactor = flux
+                flux_err = np.multiply(prefactor, np.divide(q_err.value, q.value))
             flux = np.array(flux) * u.Unit(flux_unit)
             flux_err = np.array(flux_err) * u.Unit(flux_unit)
@@ -726,6 +799,27 @@ class Transient(MutableMapping):
         outdata["converted_date"] = times
         outdata["converted_date_unit"] = date_unit
+        # compute the upperlimit value based on a 3 sigma detection
+        # this is just for rows where we don't already know if it is an upperlimit
+        if isinstance(u.Unit(flux_unit), u.LogUnit):
+            # this uses the following formula (which is surprising because it means
+            # magnitude upperlimits are independent of the actual measurement!)
+            # sigma_m > (1/3) * (ln(10)/2.5)
+            def is_upperlimit(row):
+                if pd.isna(row.upperlimit):
+                    return row.converted_flux_err > np.log(10) / (3 * 2.5)
+                else:
+                    return row.upperlimit
+        else:
+            def is_upperlimit(row):
+                if pd.isna(row.upperlimit):
+                    return row.converted_flux < 3 * row.converted_flux_err
+                else:
+                    return row.upperlimit
+        outdata["upperlimit"] = outdata.apply(is_upperlimit, axis=1)
         return outdata
     def _merge_names(t1, t2, out):  # noqa: N805
@@ -831,11 +925,21 @@ class Transient(MutableMapping):
         Just keep whichever schema version is greater
         """
         key = "schema_version/value"
-        if int(t1[key]) > int(t2[key]):
+        if "comment" not in t1["schema_version"]:
+            t1["schema_version/comment"] = ""
+        if "comment" not in t2["schema_version"]:
+            t2["schema_version/comment"] = ""
+        if key in t1 and key in t2 and int(t1[key]) > int(t2[key]):
             out["schema_version"] = deepcopy(t1["schema_version"])
         else:
             out["schema_version"] = deepcopy(t2["schema_version"])
+        out["schema_version"]["comment"] = (
+            t1["schema_version/comment"] + ";" + t2["schema_version/comment"]
+        )
     def _merge_photometry(t1, t2, out):  # noqa: N805
         """
         Combine photometry sources
@@ -913,7 +1017,7 @@ class Transient(MutableMapping):
                 item["default"] = False
     @staticmethod
-    def _merge_arbitrary(key, t1, t2, out):
+    def _merge_arbitrary(key, t1, t2, out, merge_subkeys=None, groupby_key=None):
         """
         Merge two arbitrary datasets inside the json file using pandas
@@ -940,37 +1044,62 @@ class Transient(MutableMapping):
             # have to get the indexes to drop using a string rep of the df
             # this is cause we have lists in some cells
-            to_drop = merged_with_dups.astype(str).drop_duplicates().index
-            merged = merged_with_dups.iloc[to_drop].reset_index(drop=True)
-            outdict = merged.to_dict(orient="records")
+            # We also need to deal with merging the lists of references across rows
+            # that we deem to be duplicates. This solution to do this quickly is from
+            # https://stackoverflow.com/questions/36271413/ \
+            # pandas-merge-nearly-duplicate-rows-based-on-column-value
+            if merge_subkeys is None:
+                merge_subkeys = merged_with_dups.columns.tolist()
+                merge_subkeys.remove("reference")
+            else:
+                for k in merge_subkeys:
+                    if k not in merged_with_dups:
+                        merge_subkeys.remove(k)
+            merged = (
+                merged_with_dups.astype(str)
+                .groupby(merge_subkeys)["reference"]
+                .apply(lambda x: x.sum())
+                .reset_index()
+            )
-            outdict_cleaned = Transient._remove_nans(
-                outdict
-            )  # clear out the nans from pandas conversion
+            # then we have to turn the merged reference strings into a string list
+            merged["reference"] = merged.reference.str.replace("][", ",")
-            out[key] = outdict_cleaned
+            # then eval the string of a list to get back an actual list of sources
+            merged["reference"] = merged.reference.apply(
+                lambda v: np.unique(eval(v)).tolist()
+            )
-    @staticmethod
-    def _remove_nans(d):
-        """
-        Remove nans from a record dictionary
+            # decide on default values
+            if groupby_key is None:
+                iterate_through = [(0, merged)]
+            else:
+                iterate_through = merged.groupby(groupby_key)
+            # we will make whichever value has more references the default
+            outdict = []
+            for data_type, df in iterate_through:
+                lengths = df.reference.map(len)
+                max_idx_arr = np.argmax(lengths)
+                if isinstance(max_idx_arr, np.int64):
+                    max_idx = max_idx_arr
+                elif len(max_idx_arr) == 0:
+                    raise ValueError("Something went wrong with deciding the default")
+                else:
+                    max_idx = max_idx_arr[0]  # arbitrarily choose the first
-        THIS IS SLOW: O(n^2)!!! WILL NEED TO BE SPED UP LATER
-        """
+                defaults = np.full(len(df), False, dtype=bool)
+                defaults[max_idx] = True
-        outd = []
-        for item in d:
-            outsubd = {}
-            for key, val in item.items():
-                if not isinstance(val, float):
-                    # this definitely is not NaN
-                    outsubd[key] = val
+                df["default"] = defaults
+                outdict.append(df)
+            outdict = pd.concat(outdict)
-                else:
-                    if not np.isnan(val):
-                        outsubd[key] = val
-            outd.append(outsubd)
+            # from https://stackoverflow.com/questions/52504972/ \
+            # converting-a-pandas-df-to-json-without-nan
+            outdict = outdict.replace("nan", np.nan)
+            outdict_cleaned = [{**x[i]} for i, x in outdict.stack().groupby(level=0)]
-        return outd
+            out[key] = outdict_cleaned

otter/plotter/otter_plotter.py CHANGED Viewed

@@ -39,7 +39,7 @@ class OtterPlotter:
         elif self.backend == "plotly.graph_objects":
             self.plot = self._plot_plotly
         else:
-            raise ValueError("Unknown backend!")
+            raise ValueError("Unknown plotting backend!")
     def _plot_matplotlib(self, x, y, xerr=None, yerr=None, ax=None, **kwargs):
         """
@@ -53,17 +53,19 @@ class OtterPlotter:
         ax.errorbar(x, y, xerr=xerr, yerr=yerr, **kwargs)
         return ax
-    def _plot_plotly(self, x, y, xerr=None, yerr=None, go=None, *args, **kwargs):
+    def _plot_plotly(self, x, y, xerr=None, yerr=None, ax=None, *args, **kwargs):
         """
         General plotting method using plotly, is called by _plotly_light_curve and
         _plotly_sed
         """
-        if go is None:
+        if ax is None:
             go = self.plotter.Figure()
+        else:
+            go = ax
         fig = go.add_scatter(
-            x=x, y=y, error_x=dict(array=xerr), error_y=dict(array=yerr)
+            x=x, y=y, error_x=dict(array=xerr), error_y=dict(array=yerr), **kwargs
         )
         return fig

astro-otter 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

Potentially problematic release.

astro-otter 0.1.0py3-none-any.whl → 0.2.0py3-none-any.whl