astro-otter 0.3.0__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of astro-otter might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: astro-otter
3
- Version: 0.3.0
3
+ Version: 0.3.3
4
4
  Author-email: Noah Franz <nfranz@arizona.edu>
5
5
  License: MIT License
6
6
 
@@ -45,6 +45,7 @@ Requires-Dist: pandas
45
45
  Requires-Dist: synphot
46
46
  Requires-Dist: typing-extensions
47
47
  Requires-Dist: pyarango
48
+ Requires-Dist: tabulate
48
49
  Requires-Dist: matplotlib
49
50
  Requires-Dist: plotly
50
51
  Requires-Dist: astroquery
@@ -53,7 +54,6 @@ Requires-Dist: skypatrol
53
54
  Requires-Dist: fundamentals
54
55
  Requires-Dist: astro-datalab
55
56
  Requires-Dist: sparclclient
56
- Requires-Dist: astro-ghost
57
57
  Requires-Dist: pydantic
58
58
  Requires-Dist: pyreadline3; platform_system == "Windows"
59
59
  Provides-Extra: docs
@@ -117,6 +117,7 @@ python3 -m pip install astro-otter
117
117
  ```
118
118
  git clone https://github.com/astro-otter/otter.git $OTTER_ROOT/otter
119
119
  git clone https://github.com/astro-otter/otterdb.git $OTTER_ROOT/otterdb
120
+ git clone https://github.com/astro-otter/otter-web.git $OTTER_ROOT/otter-web
120
121
  ```
121
122
  3. Install the NASA ADS Python API by following the instructions at https://ads.readthedocs.io/en/latest/#getting-started
122
123
  4. Install otter, the API for this database. From
@@ -125,13 +126,7 @@ python3 -m pip install astro-otter
125
126
  cd $OTTER_ROOT/otter
126
127
  python -m pip install -e .
127
128
  ```
128
- 5. Process the data to build the local "database" (although it is really just a directory).
129
- Then, you can build the "database" by running the
130
- following commands:
131
- ```
132
- cd $OTTER_ROOT/otter/scripts/
133
- python3 gen_summary_table.py --otterroot $OTTER_ROOT
134
- ```
129
+ 5. Process the data to build the local copy of the database. Follow the instructions in the otterdb repo README.
135
130
  6. Easily access the data using the Otter code! In python:
136
131
  ```
137
132
  import os
@@ -0,0 +1,18 @@
1
+ astro_otter-0.3.3.dist-info/licenses/LICENSE,sha256=s9IPE8A3CAMEaZpDhj4eaorpmfLYGB0mIGphq301PUY,1067
2
+ otter/__init__.py,sha256=pvX-TN7nLVmvKpkDi89Zxe-jMfHNiVMD3zsd_bPEK9Y,535
3
+ otter/_version.py,sha256=IP9YVdiq2R5ldxON8iRmWBlg638Aq0aZCl5syP2mi78,76
4
+ otter/exceptions.py,sha256=3lQF4AXVTfs9VRsVePQoIrXnramsPZbUL5crvf1s9Ng,1702
5
+ otter/schema.py,sha256=lOn-9FX4EuVYkRY97j64yGQM9fsTdKUCeYHqkJ8-kN0,10790
6
+ otter/util.py,sha256=Ve_3iHmTcdcS_X2zzBg07WQTlWPbx4CBAZf4Gye65Og,22831
7
+ otter/io/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ otter/io/data_finder.py,sha256=v3jZCOhvysHUQG1FOwHdeJ-psTT-MsdO_GZalBpMBGc,38218
9
+ otter/io/host.py,sha256=xv_SznZuvMoMVsZLqlcmlOyaqKCMZqlTQ_gkN4VBSTw,7139
10
+ otter/io/otter.py,sha256=lJStat9oefnQCT4rHrI39Lq0Of-uWl2Dsq7A8Gsy4SY,49892
11
+ otter/io/transient.py,sha256=ANYnqWN1bJuefkTQqyPdt95V33DaQxUOOG_PPU86l3A,48832
12
+ otter/plotter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
+ otter/plotter/otter_plotter.py,sha256=OQhuLgnMSzgtAjJF8SYBuQOyYcu7Pr0uia5P0G_7z5Q,2144
14
+ otter/plotter/plotter.py,sha256=z09NwQVJS2tuwH3sv95DZv8xogjvf-7Gvj6iWCEx-gQ,9635
15
+ astro_otter-0.3.3.dist-info/METADATA,sha256=EJGa_8TqBnyNJsxHDLh9h48UcwxgJyVcra4aAW7iATQ,7046
16
+ astro_otter-0.3.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
17
+ astro_otter-0.3.3.dist-info/top_level.txt,sha256=Wth72sCwBRUk3KZGknSKvLQDMFuJk6qiaAavMDOdG5k,6
18
+ astro_otter-0.3.3.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.0.1)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
otter/_version.py CHANGED
@@ -2,4 +2,4 @@
2
2
  Just define the package version in one place
3
3
  """
4
4
 
5
- __version__ = "0.3.0"
5
+ __version__ = "0.3.3"
otter/io/otter.py CHANGED
@@ -3,11 +3,12 @@ This is the primary class for user interaction with the catalog
3
3
  """
4
4
 
5
5
  from __future__ import annotations
6
+ from typing import Optional
6
7
  import os
7
8
  import json
8
9
  import glob
9
- from warnings import warn
10
10
  from copy import deepcopy
11
+ import logging
11
12
 
12
13
  from pyArango.connection import Connection
13
14
  from pyArango.database import Database
@@ -22,7 +23,7 @@ from astropy import units as u
22
23
 
23
24
  from .transient import Transient
24
25
  from ..exceptions import FailedQueryError, OtterLimitationError, TransientMergeError
25
- from ..util import bibcode_to_hrn, freq_to_obstype, freq_to_band
26
+ from ..util import bibcode_to_hrn, freq_to_obstype, freq_to_band, _DuplicateFilter
26
27
 
27
28
  import warnings
28
29
 
@@ -30,6 +31,8 @@ warnings.simplefilter("once", RuntimeWarning)
30
31
  warnings.simplefilter("once", UserWarning)
31
32
  warnings.simplefilter("once", u.UnitsWarning)
32
33
 
34
+ logger = logging.getLogger(__name__)
35
+
33
36
 
34
37
  def _np_encoder(object):
35
38
  """
@@ -44,23 +47,34 @@ class Otter(Database):
44
47
  This is the primary class for users to access the otter backend database
45
48
 
46
49
  Args:
50
+ url (str): The url where the database api endpoints are located
51
+ username (str): The username to log into the database with
52
+ password (str): The password to log into the database with
53
+ gen_summary (bool): Generate a local summary table, this should generally be
54
+ left as False!
47
55
  datadir (str): Path to the data directory with the otter data. If not provided
48
56
  will default to a ".otter" directory in the CWD where you call
49
57
  this class from.
50
58
  debug (bool): If we should just debug and not do anything serious.
51
59
 
60
+ Returns:
61
+ An Otter object that is connected to the otter database
52
62
  """
53
63
 
54
64
  def __init__(
55
65
  self,
56
66
  url: str = "http://127.0.0.1:8529",
57
- username: str = "user-guest",
58
- password: str = "",
67
+ username: str = os.environ.get("ARANGO_USER_USERNAME", "user-guest"),
68
+ password: str = os.environ.get("ARANGO_USER_PASSWORD", ""),
59
69
  gen_summary: bool = False,
60
70
  datadir: str = None,
61
71
  debug: bool = False,
62
72
  **kwargs,
63
73
  ) -> None:
74
+ print("Attempting to login with the following credentials:")
75
+ print(f"username: {username}")
76
+ print(f"password: {password}")
77
+
64
78
  # save inputs
65
79
  if datadir is None:
66
80
  self.CWD = os.path.dirname(os.path.abspath("__FILE__"))
@@ -79,7 +93,7 @@ class Otter(Database):
79
93
  try:
80
94
  os.makedirs(self.DATADIR)
81
95
  except FileExistsError:
82
- warn(
96
+ logger.warning(
83
97
  "Directory was created between the if statement and trying "
84
98
  + "to create the directory!"
85
99
  )
@@ -93,7 +107,8 @@ class Otter(Database):
93
107
  Get the metadata of the objects matching the arguments
94
108
 
95
109
  Args:
96
- **kwargs : Arguments to pass to Otter.query()
110
+ **kwargs : Arguments to pass to Otter.query(). See that documentation with
111
+ `help(otter.Otter.query)`.
97
112
  Return:
98
113
  The metadata for the transients that match the arguments. Will be an astropy
99
114
  Table by default, if raw=True will be a dictionary.
@@ -146,9 +161,9 @@ class Otter(Database):
146
161
  unit conversion for you!
147
162
 
148
163
  Args:
149
- flux_units (astropy.unit.Unit): Either a valid string to convert
164
+ flux_unit (astropy.unit.Unit): Either a valid string to convert
150
165
  or an astropy.unit.Unit
151
- date_units (astropy.unit.Unit): Either a valid string to convert to a date
166
+ date_unit (astropy.unit.Unit): Either a valid string to convert to a date
152
167
  or an astropy.unit.Unit
153
168
  return_type (str): Either 'astropy' or 'pandas'. If astropy, returns an
154
169
  astropy Table. If pandas, returns a pandas DataFrame.
@@ -159,6 +174,8 @@ class Otter(Database):
159
174
  keep_raw (bool): If True, keep the raw flux/date/freq/wave associated with
160
175
  the dataset. Else, just keep the converted data. Default
161
176
  is False.
177
+ wave_unit (str): The astropy wavelength unit to return with
178
+ freq_unit (str): The astropy frequency unit to return with`
162
179
  **kwargs : Arguments to pass to Otter.query(). Can be::
163
180
 
164
181
  names (list[str]): A list of names to get the metadata for
@@ -181,6 +198,9 @@ class Otter(Database):
181
198
  FailedQueryError: When the query returns no results
182
199
  IOError: if one of your inputs is incorrect
183
200
  """
201
+ warn_filt = _DuplicateFilter()
202
+ logger.addFilter(warn_filt)
203
+
184
204
  queryres = self.query(hasphot=True, **kwargs)
185
205
 
186
206
  dicts = []
@@ -239,6 +259,7 @@ class Otter(Database):
239
259
  else:
240
260
  fullphot = fullphot[keys_to_keep]
241
261
 
262
+ logger.removeFilter(warn_filt)
242
263
  if return_type == "astropy":
243
264
  return Table.from_pandas(fullphot)
244
265
  elif return_type == "pandas":
@@ -252,6 +273,9 @@ class Otter(Database):
252
273
 
253
274
  Args:
254
275
  filename (str): The path to the OTTER JSON file to load
276
+
277
+ Returns:
278
+ dictionary with the otter JSON file contents
255
279
  """
256
280
 
257
281
  # read in files from summary
@@ -270,6 +294,8 @@ class Otter(Database):
270
294
  refs: list[str] = None,
271
295
  hasphot: bool = False,
272
296
  hasspec: bool = False,
297
+ spec_classed: bool = False,
298
+ unambiguous: bool = False,
273
299
  classification: str = None,
274
300
  class_confidence_threshold: float = 0,
275
301
  query_private=False,
@@ -293,10 +319,17 @@ class Otter(Database):
293
319
  refs (list[str]): A list of ads bibcodes to match to. Will only return
294
320
  metadata for transients that have this as a reference.
295
321
  hasphot (bool): if True, only returns transients which have photometry.
296
- hasspec (bool): if True, only return transients that have spectra.
322
+ hasspec (bool): NOT IMPLEMENTED! Will return False for all targets!
323
+ spec_classed (bool): If True, only returns transients that have been
324
+ specotroscopically classified/confirmed
325
+ unambiguous (bool): If True, only returns transients that only have a single
326
+ published classification in OTTER. If classifications
327
+ disagree for a transient, it will be filtered out.
297
328
  classification (str): A classification string to search for
298
329
  class_confidence_threshold (float): classification confidence cutoff for
299
330
  query, between 0 and 1. Default is 0.
331
+ query_private (bool): Set to True if you would like to also query the
332
+ dataset located at whatever you set datadir to
300
333
 
301
334
  Return:
302
335
  Get all of the raw (unconverted!) data for objects that match the criteria.
@@ -310,9 +343,15 @@ class Otter(Database):
310
343
  if hasspec is True:
311
344
  query_filters += "FILTER 'spectra' IN ATTRIBUTES(transient)\n"
312
345
 
346
+ if spec_classed:
347
+ query_filters += "FILTER transient.classification.spec_classed >= 1"
348
+
349
+ if unambiguous:
350
+ query_filters += "FILTER transient.classification.unambiguous"
351
+
313
352
  if classification is not None:
314
353
  query_filters += f"""
315
- FOR subdoc IN transient.classification
354
+ FOR subdoc IN transient.classification.value
316
355
  FILTER subdoc.confidence > TO_NUMBER({class_confidence_threshold})
317
356
  FILTER subdoc.object_class LIKE '%{classification}%'
318
357
  """
@@ -573,7 +612,7 @@ class Otter(Database):
573
612
 
574
613
  def upload(self, json_data, collection="vetting", testing=False) -> Document:
575
614
  """
576
- Upload json_data to collection
615
+ Upload json_data to collection WITHOUT deduplication!
577
616
 
578
617
  Args:
579
618
  json_data [dict] : A dictionary of the json data to upload to Otter
@@ -793,7 +832,7 @@ class Otter(Database):
793
832
  def from_csvs(
794
833
  metafile: str,
795
834
  photfile: str = None,
796
- local_outpath: str = "private_otter_data",
835
+ local_outpath: Optional[str] = None,
797
836
  db: Otter = None,
798
837
  ) -> Otter:
799
838
  """
@@ -815,6 +854,7 @@ class Otter(Database):
815
854
  """
816
855
  # read in the metadata and photometry file
817
856
  meta = pd.read_csv(metafile)
857
+ meta.columns = meta.columns.str.strip() # clean up the col names
818
858
  phot = None
819
859
 
820
860
  required_phot_cols = [
@@ -831,17 +871,20 @@ class Otter(Database):
831
871
 
832
872
  if photfile is not None:
833
873
  phot_unclean = pd.read_csv(photfile)
874
+ phot_unclean.columns = phot_unclean.columns.str.strip() # cleanup colnames
834
875
 
835
876
  phot = phot_unclean.dropna(subset=required_phot_cols)
836
877
  if len(phot) != len(phot_unclean):
837
- warn("""
878
+ logger.warning("""
838
879
  Filtered out rows with nan in the photometry file! Make sure you
839
880
  expect this behaviour!
840
881
  """)
841
882
 
842
883
  if "bibcode" not in phot:
843
884
  phot["bibcode"] = "private"
844
- warn("Setting the bibcode column to the special keyword 'private'!")
885
+ logger.warning("""
886
+ Setting the bibcode column to the special keyword 'private'!
887
+ """)
845
888
 
846
889
  # we need to generate columns of wave_eff and freq_eff
847
890
  wave_eff = []
@@ -866,7 +909,7 @@ class Otter(Database):
866
909
  phot["band_eff_freq_unit"] = str(freq_eff_unit)
867
910
 
868
911
  if not os.path.exists(local_outpath):
869
- os.mkdir(local_outpath)
912
+ os.makedirs(local_outpath)
870
913
 
871
914
  # drop duplicated names in meta and keep the first
872
915
  meta = meta.drop_duplicates(subset="name", keep="first")
@@ -907,7 +950,7 @@ class Otter(Database):
907
950
  ra_units=tde.ra_unit[0],
908
951
  dec_units=tde.dec_unit[0],
909
952
  reference=[tde.coord_bibcode[0]],
910
- coordinate_type="equitorial",
953
+ coordinate_type="equatorial",
911
954
  )
912
955
  ]
913
956
 
@@ -956,13 +999,18 @@ class Otter(Database):
956
999
  ### Classification information that is in the csvs
957
1000
  # classification
958
1001
  if "classification" in tde:
959
- json["classification"] = [
960
- dict(
961
- object_class=tde.classification[0],
962
- confidence=1, # we know this is at least an tde
963
- reference=[tde.classification_bibcode[0]],
964
- )
965
- ]
1002
+ class_flag = 0
1003
+ if "classification_flag" in tde:
1004
+ class_flag = tde.classification_flag[0]
1005
+ json["classification"] = dict(
1006
+ value=[
1007
+ dict(
1008
+ object_class=tde.classification[0],
1009
+ confidence=class_flag,
1010
+ reference=[tde.classification_bibcode[0]],
1011
+ )
1012
+ ]
1013
+ )
966
1014
 
967
1015
  # discovery date
968
1016
  # print(tde)
@@ -1260,7 +1308,7 @@ class Otter(Database):
1260
1308
  if db is None:
1261
1309
  db = Otter(datadir=local_outpath)
1262
1310
  else:
1263
- db.datadir = local_outpath
1311
+ db.DATADIR = local_outpath
1264
1312
 
1265
1313
  # always save this document as a new one
1266
1314
  db.save(all_jsons)
otter/io/transient.py CHANGED
@@ -24,11 +24,9 @@ from ..exceptions import (
24
24
  OtterLimitationError,
25
25
  TransientMergeError,
26
26
  )
27
- from ..util import XRAY_AREAS
27
+ from ..util import XRAY_AREAS, _KNOWN_CLASS_ROOTS, _DuplicateFilter
28
28
  from .host import Host
29
29
 
30
- warnings.simplefilter("once", RuntimeWarning)
31
- warnings.simplefilter("once", UserWarning)
32
30
  np.seterr(divide="ignore")
33
31
  logger = logging.getLogger(__name__)
34
32
 
@@ -289,7 +287,7 @@ class Transient(MutableMapping):
289
287
  raise TransientMergeError(f"{key} was not expected! Can not merge!")
290
288
  else:
291
289
  # Throw a warning and only keep the old stuff
292
- warnings.warn(
290
+ logger.warning(
293
291
  f"{key} was not expected! Only keeping the old information!"
294
292
  )
295
293
  out[key] = deepcopy(self[key])
@@ -323,17 +321,17 @@ class Transient(MutableMapping):
323
321
  else:
324
322
  # run some checks
325
323
  if "photometry" in keys:
326
- warnings.warn("Not returing the photometry!")
324
+ logger.warning("Not returing the photometry!")
327
325
  _ = keys.pop("photometry")
328
326
  if "spectra" in keys:
329
- warnings.warn("Not returning the spectra!")
327
+ logger.warning("Not returning the spectra!")
330
328
  _ = keys.pop("spectra")
331
329
 
332
330
  curr_keys = self.keys()
333
331
  for key in keys:
334
332
  if key not in curr_keys:
335
333
  keys.remove(key)
336
- warnings.warn(
334
+ logger.warning(
337
335
  f"Not returning {key} because it is not in this transient!"
338
336
  )
339
337
 
@@ -352,7 +350,7 @@ class Transient(MutableMapping):
352
350
  """
353
351
 
354
352
  # now we can generate the SkyCoord
355
- f = "df['coordinate_type'] == 'equitorial'"
353
+ f = "df['coordinate_type'] == 'equatorial'"
356
354
  coord_dict = self._get_default("coordinate", filt=f)
357
355
  coordin = self._reformat_coordinate(coord_dict)
358
356
  coord = SkyCoord(**coordin).transform_to(coord_format)
@@ -407,7 +405,7 @@ class Transient(MutableMapping):
407
405
  and a list of the bibcodes corresponding to that classification. Or, None
408
406
  if there is no classification.
409
407
  """
410
- default = self._get_default("classification")
408
+ default = self._get_default("classification/value")
411
409
  if default is None:
412
410
  return default
413
411
  return default.object_class, default.confidence, default.reference
@@ -421,7 +419,7 @@ class Transient(MutableMapping):
421
419
  The BLAST result will always be the last value in the returned list.
422
420
 
423
421
  Args:
424
- max_hosts [int] : The maximum number of hosts to return
422
+ max_hosts [int] : The maximum number of hosts to return, default is 3
425
423
  **kwargs : keyword arguments to be passed to getGHOST
426
424
 
427
425
  Returns:
@@ -437,7 +435,7 @@ class Transient(MutableMapping):
437
435
 
438
436
  # then try BLAST
439
437
  if search:
440
- logger.warn(
438
+ logger.warning(
441
439
  "Trying to find a host with BLAST/astro-ghost. Note\
442
440
  that this won't work for older targets! See https://blast.scimma.org"
443
441
  )
@@ -489,7 +487,7 @@ class Transient(MutableMapping):
489
487
  """
490
488
  coordin = None
491
489
  if "ra" in item and "dec" in item:
492
- # this is an equitorial coordinate
490
+ # this is an equatorial coordinate
493
491
  coordin = {
494
492
  "ra": item["ra"],
495
493
  "dec": item["dec"],
@@ -511,7 +509,6 @@ class Transient(MutableMapping):
511
509
  date_unit: u.Unit = "MJD",
512
510
  freq_unit: u.Unit = "GHz",
513
511
  wave_unit: u.Unit = "nm",
514
- by: str = "raw",
515
512
  obs_type: str = None,
516
513
  ) -> pd.DataFrame:
517
514
  """
@@ -529,10 +526,6 @@ class Transient(MutableMapping):
529
526
  wave_unit (astropy.unit.Unit): The astropy unit or string representation of
530
527
  an astropy unit to convert and return the
531
528
  wavelength as.
532
- by (str): Either 'raw' or 'value'. 'raw' is the default and is highly
533
- recommended! If 'value' is used it may skip some photometry.
534
- See the schema definition to understand this keyword completely
535
- before using it.
536
529
  obs_type (str): "radio", "xray", or "uvoir". If provided, it only returns
537
530
  data taken within that range of wavelengths/frequencies.
538
531
  Default is None which will return all of the data.
@@ -540,14 +533,17 @@ class Transient(MutableMapping):
540
533
  Returns:
541
534
  A pandas DataFrame of the cleaned up photometry in the requested units
542
535
  """
536
+ warn_filt = _DuplicateFilter()
537
+ logger.addFilter(warn_filt)
538
+
543
539
  # these imports need to be here for some reason
544
540
  # otherwise the code breaks
545
541
  from synphot.units import VEGAMAG, convert_flux
546
542
  from synphot.spectrum import SourceSpectrum
547
543
 
548
- # check inputs
549
- if by not in {"value", "raw"}:
550
- raise IOError("Please choose either value or raw!")
544
+ # variable so this warning only displays a single time each time this
545
+ # function is called
546
+ source_map_warning = True
551
547
 
552
548
  # turn the photometry key into a pandas dataframe
553
549
  if "photometry" not in self:
@@ -594,12 +590,82 @@ class Transient(MutableMapping):
594
590
  # merge the photometry with the filter information
595
591
  df = c.merge(filters, on="filter_key")
596
592
 
597
- # make sure 'by' is in df
598
- if by not in df:
599
- if by == "value":
600
- by = "raw"
601
- else:
602
- by = "value"
593
+ # drop irrelevant obs_types before continuing
594
+ if obs_type is not None:
595
+ valid_obs_types = {"radio", "uvoir", "xray"}
596
+ if obs_type not in valid_obs_types:
597
+ raise IOError("Please provide a valid obs_type")
598
+ df = df[df.obs_type == obs_type]
599
+
600
+ # add some mockup columns if they don't exist
601
+ if "value" not in df:
602
+ df["value"] = np.nan
603
+ df["value_err"] = np.nan
604
+ df["value_units"] = "NaN"
605
+
606
+ # fix some bad units that are old and no longer recognized by astropy
607
+ with warnings.catch_warnings():
608
+ warnings.filterwarnings("ignore")
609
+ df.raw_units = df.raw_units.str.replace("ergs", "erg")
610
+ df.raw_units = ["mag(AB)" if uu == "AB" else uu for uu in df.raw_units]
611
+ df.value_units = df.value_units.str.replace("ergs", "erg")
612
+ df.value_units = ["mag(AB)" if uu == "AB" else uu for uu in df.value_units]
613
+
614
+ # merge the raw and value keywords based on the requested flux_units
615
+ # first take everything that just has `raw` and not `value`
616
+ df_raw_only = df[df.value.isna()]
617
+ remaining = df[df.value.notna()]
618
+ if len(remaining) == 0:
619
+ df_raw = df_raw_only
620
+ df_value = [] # this tricks the code later
621
+ else:
622
+ # then take the remaining rows and figure out if we want the raw or value
623
+ with warnings.catch_warnings():
624
+ warnings.filterwarnings("ignore")
625
+ flux_unit_astropy = u.Unit(flux_unit)
626
+
627
+ val_unit_filt = np.array(
628
+ [
629
+ u.Unit(uu).is_equivalent(flux_unit_astropy)
630
+ for uu in remaining.value_units
631
+ ]
632
+ )
633
+
634
+ df_value = remaining[val_unit_filt]
635
+ df_raw_and_value = remaining[~val_unit_filt]
636
+
637
+ # then merge the raw dataframes
638
+ df_raw = pd.concat([df_raw_only, df_raw_and_value], axis=0)
639
+
640
+ # then add columns to these dataframes to convert stuff later
641
+ df_raw = df_raw.assign(
642
+ _flux=df_raw["raw"].values,
643
+ _flux_units=df_raw["raw_units"].values,
644
+ _flux_err=(
645
+ df_raw["raw_err"].values
646
+ if "raw_err" in df_raw
647
+ else [np.nan] * len(df_raw)
648
+ ),
649
+ )
650
+
651
+ if len(df_value) == 0:
652
+ df = df_raw
653
+ else:
654
+ df_value = df_value.assign(
655
+ _flux=df_value["value"].values,
656
+ _flux_units=df_value["value_units"].values,
657
+ _flux_err=(
658
+ df_value["value_err"].values
659
+ if "value_err" in df_value
660
+ else [np.nan] * len(df_value)
661
+ ),
662
+ )
663
+
664
+ # then merge df_value and df_raw back into one df
665
+ df = pd.concat([df_raw, df_value], axis=0)
666
+
667
+ # then, for the rest of the code to work, set the "by" variables to _flux
668
+ by = "_flux"
603
669
 
604
670
  # skip rows where 'by' is nan
605
671
  df = df[df[by].notna()]
@@ -612,12 +678,21 @@ class Transient(MutableMapping):
612
678
  # the TDE lightcurves for this systematic effect. "
613
679
  df = df[df[by].astype(float) > 0]
614
680
 
615
- # drop irrelevant obs_types before continuing
616
- if obs_type is not None:
617
- valid_obs_types = {"radio", "uvoir", "xray"}
618
- if obs_type not in valid_obs_types:
619
- raise IOError("Please provide a valid obs_type")
620
- df = df[df.obs_type == obs_type]
681
+ # filter out anything that has _flux_units == "ct" because we can't convert that
682
+ try:
683
+ # this is a test case to see if we can convert ct -> flux_unit
684
+ convert_flux(
685
+ [1 * u.nm, 2 * u.nm], 1 * u.ct, u.Unit(flux_unit), area=1 * u.m**2
686
+ )
687
+ except u.UnitsError:
688
+ bad_units = df[df._flux_units == "ct"]
689
+ if len(bad_units) > 0:
690
+ logger.warning(
691
+ f"""Removing {len(bad_units)} photometry points from
692
+ {self.default_name} because we can't convert them from ct ->
693
+ {flux_unit}"""
694
+ )
695
+ df = df[df._flux_units != "ct"]
621
696
 
622
697
  # convert the ads bibcodes to a string of human readable sources here
623
698
  def mappedrefs(row):
@@ -629,7 +704,10 @@ class Transient(MutableMapping):
629
704
  try:
630
705
  df["human_readable_refs"] = df.apply(mappedrefs, axis=1)
631
706
  except Exception as exc:
632
- warnings.warn(f"Unable to apply the source mapping because {exc}")
707
+ if source_map_warning:
708
+ source_map_warning = False
709
+ logger.warning(f"Unable to apply the source mapping because {exc}")
710
+
633
711
  df["human_readable_refs"] = df.reference
634
712
 
635
713
  # Figure out what columns are good to groupby in the photometry
@@ -669,7 +747,9 @@ class Transient(MutableMapping):
669
747
  # We can assume here that this unit really means astropy's "mag(AB)"
670
748
  astropy_units = u.Unit("mag(AB)")
671
749
  else:
672
- astropy_units = u.Unit(unit)
750
+ with warnings.catch_warnings():
751
+ warnings.simplefilter("ignore")
752
+ astropy_units = u.Unit(unit)
673
753
 
674
754
  except ValueError:
675
755
  # this means there is something likely slightly off in the input unit
@@ -694,10 +774,12 @@ class Transient(MutableMapping):
694
774
  indata_err = np.zeros(len(data))
695
775
 
696
776
  # convert to an astropy quantity
697
- q = indata * u.Unit(astropy_units)
698
- q_err = indata_err * u.Unit(
699
- astropy_units
700
- ) # assume error and values have the same unit
777
+ with warnings.catch_warnings():
778
+ warnings.filterwarnings("ignore")
779
+ q = indata * u.Unit(astropy_units)
780
+ q_err = indata_err * u.Unit(
781
+ astropy_units
782
+ ) # assume error and values have the same unit
701
783
 
702
784
  # get and save the effective wavelength
703
785
  # because of cleaning we did to the filter dataframe above wave_eff
@@ -706,8 +788,10 @@ class Transient(MutableMapping):
706
788
  raise ValueError("Flushing out the effective wavelength array failed!")
707
789
 
708
790
  zz = zip(data["wave_eff"], data["wave_units"])
709
- wave_eff = u.Quantity([vv * u.Unit(uu) for vv, uu in zz], wave_unit)
710
- freq_eff = wave_eff.to(freq_unit, equivalencies=u.spectral())
791
+ with warnings.catch_warnings():
792
+ warnings.filterwarnings("ignore")
793
+ wave_eff = u.Quantity([vv * u.Unit(uu) for vv, uu in zz], wave_unit)
794
+ freq_eff = wave_eff.to(freq_unit, equivalencies=u.spectral())
711
795
 
712
796
  data["converted_wave"] = wave_eff.value
713
797
  data["converted_wave_unit"] = wave_unit
@@ -733,10 +817,12 @@ class Transient(MutableMapping):
733
817
  # we also need to make this wave_min and wave_max
734
818
  # instead of just the effective wavelength like for radio and uvoir
735
819
  zz = zip(data["wave_min"], data["wave_max"], data["wave_units"])
736
- wave_eff = u.Quantity(
737
- [np.array([m, M]) * u.Unit(uu) for m, M, uu in zz],
738
- u.Unit(wave_unit),
739
- )
820
+ with warnings.catch_warnings():
821
+ warnings.filterwarnings("ignore")
822
+ wave_eff = u.Quantity(
823
+ [np.array([m, M]) * u.Unit(uu) for m, M, uu in zz],
824
+ u.Unit(wave_unit),
825
+ )
740
826
 
741
827
  else:
742
828
  area = None
@@ -750,13 +836,15 @@ class Transient(MutableMapping):
750
836
 
751
837
  flux, flux_err = [], []
752
838
  for wave, xray_point, xray_point_err in zip(wave_eff, q, q_err):
753
- f_val = convert_flux(
754
- wave,
755
- xray_point,
756
- u.Unit(flux_unit),
757
- vegaspec=SourceSpectrum.from_vega(),
758
- area=area,
759
- ).value
839
+ with warnings.catch_warnings():
840
+ warnings.filterwarnings("ignore")
841
+ f_val = convert_flux(
842
+ wave,
843
+ xray_point,
844
+ u.Unit(flux_unit),
845
+ vegaspec=SourceSpectrum.from_vega(),
846
+ area=area,
847
+ ).value
760
848
 
761
849
  # approximate the uncertainty as dX = dY/Y * X
762
850
  f_err = np.multiply(
@@ -770,7 +858,9 @@ class Transient(MutableMapping):
770
858
 
771
859
  else:
772
860
  # this will be faster and cover most cases
773
- flux = convert_flux(wave_eff, q, u.Unit(flux_unit)).value
861
+ with warnings.catch_warnings():
862
+ warnings.filterwarnings("ignore")
863
+ flux = convert_flux(wave_eff, q, u.Unit(flux_unit)).value
774
864
 
775
865
  # since the error propagation is different between logarithmic units
776
866
  # and linear units, unfortunately
@@ -825,6 +915,8 @@ class Transient(MutableMapping):
825
915
  return row.upperlimit
826
916
 
827
917
  outdata["upperlimit"] = outdata.apply(is_upperlimit, axis=1)
918
+
919
+ logger.removeFilter(warn_filt)
828
920
  return outdata
829
921
 
830
922
  def _merge_names(t1, t2, out): # noqa: N805
@@ -876,7 +968,7 @@ class Transient(MutableMapping):
876
968
  elif score2 > score1:
877
969
  out[key]["default_name"] = t2[key]["default_name"]
878
970
  else:
879
- warnings.warn(
971
+ logger.warning(
880
972
  "Names have the same score! Just using the existing default_name"
881
973
  )
882
974
  out[key]["default_name"] = t1[key]["default_name"]
@@ -991,36 +1083,108 @@ class Transient(MutableMapping):
991
1083
  Combine the classification attribute
992
1084
  """
993
1085
  key = "classification"
1086
+ subkey = "value"
994
1087
  out[key] = deepcopy(t1[key])
995
- classes = np.array([item["object_class"] for item in out[key]])
996
- for item in t2[key]:
1088
+ classes = np.array([item["object_class"] for item in out[key][subkey]])
1089
+
1090
+ for item in t2[key][subkey]:
997
1091
  if item["object_class"] in classes:
998
1092
  i = np.where(item["object_class"] == classes)[0][0]
999
- if int(item["confidence"]) > int(out[key][i]["confidence"]):
1000
- out[key][i]["confidence"] = item[
1093
+ if int(item["confidence"]) > int(out[key][subkey][i]["confidence"]):
1094
+ out[key][subkey][i]["confidence"] = item[
1001
1095
  "confidence"
1002
1096
  ] # we are now more confident
1003
1097
 
1004
- if not isinstance(out[key][i]["reference"], list):
1005
- out[key][i]["reference"] = [out[key][i]["reference"]]
1098
+ if not isinstance(out[key][subkey][i]["reference"], list):
1099
+ out[key][subkey][i]["reference"] = [
1100
+ out[key][subkey][i]["reference"]
1101
+ ]
1006
1102
 
1007
1103
  if not isinstance(item["reference"], list):
1008
1104
  item["reference"] = [item["reference"]]
1009
1105
 
1010
- newdata = list(np.unique(out[key][i]["reference"] + item["reference"]))
1011
- out[key][i]["reference"] = newdata
1106
+ newdata = list(
1107
+ np.unique(out[key][subkey][i]["reference"] + item["reference"])
1108
+ )
1109
+ out[key][subkey][i]["reference"] = newdata
1012
1110
 
1013
1111
  else:
1014
- out[key].append(item)
1112
+ out[key][subkey].append(item)
1015
1113
 
1016
1114
  # now that we have all of them we need to figure out which one is the default
1017
- maxconf = max(out[key], key=lambda d: d["confidence"])
1018
- for item in out[key]:
1115
+ maxconf = max(out[key][subkey], key=lambda d: d["confidence"])
1116
+ for item in out[key][subkey]:
1019
1117
  if item == maxconf:
1020
1118
  item["default"] = True
1021
1119
  else:
1022
1120
  item["default"] = False
1023
1121
 
1122
+ # then rederive the classification flags
1123
+ out = Transient._derive_classification_flags(out)
1124
+
1125
+ @classmethod
1126
+ def _derive_classification_flags(cls, out):
1127
+ """
1128
+ Derive the classification flags based on the confidence flags. This will find
1129
+ - spec_classed
1130
+ - unambiguous
1131
+
1132
+ See the paper for a detailed description of how this algorithm makes its
1133
+ choices
1134
+ """
1135
+
1136
+ if "classification" not in out or "value" not in out["classification"]:
1137
+ # this means that the transient doesn't have any classifications
1138
+ # just return itself without any changes
1139
+ return out
1140
+
1141
+ # get the confidences of all of the classifications of this transient
1142
+ confs = np.array(
1143
+ [item["confidence"] for item in out["classification"]["value"]]
1144
+ ).astype(float)
1145
+
1146
+ all_class_roots = np.array(
1147
+ [
1148
+ _fuzzy_class_root(item["object_class"])
1149
+ for item in out["classification"]["value"]
1150
+ ]
1151
+ )
1152
+
1153
+ if np.any(confs >= 3):
1154
+ unambiguous = len(np.unique(all_class_roots)) == 1
1155
+ if np.any(confs == 3) or np.any(confs == 3.3):
1156
+ # this is a "gold spectrum"
1157
+ spec_classed = 3
1158
+ elif np.any(confs == 3.2):
1159
+ # this is a silver spectrum
1160
+ spec_classed = 2
1161
+ elif np.any(confs == 3.1):
1162
+ # this is a bronze spectrum
1163
+ spec_classed = 1
1164
+ else:
1165
+ raise ValueError("Not prepared for this confidence flag!")
1166
+
1167
+ elif np.any(confs == 2):
1168
+ # these always have spec_classed = True, by definition
1169
+ # They also have unambiguous = False by definition because they don't
1170
+ # have a peer reviewed citation for their classification
1171
+ spec_classed = 1
1172
+ unambiguous = False
1173
+
1174
+ elif np.any(confs == 1):
1175
+ spec_classed = 0 # by definition
1176
+ unambiguous = len(np.unique(all_class_roots)) == 1
1177
+
1178
+ else:
1179
+ spec_classed = 0
1180
+ unambiguous = False
1181
+
1182
+ # finally, set these keys in the classification dict
1183
+ out["classification"]["spec_classed"] = spec_classed
1184
+ out["classification"]["unambiguous"] = unambiguous
1185
+
1186
+ return out
1187
+
1024
1188
  @staticmethod
1025
1189
  def _merge_arbitrary(key, t1, t2, out, merge_subkeys=None, groupby_key=None):
1026
1190
  """
@@ -1108,3 +1272,21 @@ class Transient(MutableMapping):
1108
1272
  outdict_cleaned = [{**x[i]} for i, x in outdict.stack().groupby(level=0)]
1109
1273
 
1110
1274
  out[key] = outdict_cleaned
1275
+
1276
+
1277
+ def _fuzzy_class_root(s):
1278
+ """
1279
+ Extract the fuzzy classification root name from the string s
1280
+ """
1281
+ s = s.upper()
1282
+ # first split the class s using regex
1283
+ for root in _KNOWN_CLASS_ROOTS:
1284
+ if s.startswith(root):
1285
+ remaining = s[len(root) :]
1286
+ if remaining and root == "SN":
1287
+ # we want to be able to distinguish between SN Ia and SN II
1288
+ # we will use SN Ia to indicate thoes and SN to indicate CCSN
1289
+ if "IA" in remaining or "1A" in remaining:
1290
+ return "SN Ia"
1291
+ return root
1292
+ return s
otter/schema.py CHANGED
@@ -135,6 +135,12 @@ class ClassificationSchema(BaseModel):
135
135
  class_type: str = None
136
136
 
137
137
 
138
+ class ClassificationDictSchema(BaseModel):
139
+ spec_classed: Optional[int] = None
140
+ unambiguous: Optional[bool] = None
141
+ value: list[ClassificationSchema]
142
+
143
+
138
144
  class ReferenceSchema(BaseModel):
139
145
  name: str
140
146
  human_readable_name: str
@@ -283,7 +289,7 @@ class OtterSchema(BaseModel):
283
289
  name: NameSchema
284
290
  coordinate: list[CoordinateSchema]
285
291
  distance: Optional[list[DistanceSchema]] = None
286
- classification: Optional[list[ClassificationSchema]] = None
292
+ classification: Optional[ClassificationDictSchema] = None
287
293
  reference_alias: list[ReferenceSchema]
288
294
  date_reference: Optional[list[DateSchema]] = None
289
295
  photometry: Optional[list[PhotometrySchema]] = None
otter/util.py CHANGED
@@ -580,6 +580,22 @@ VIZIER_LARGE_CATALOGS = [
580
580
  ViZier catalog names that we query for host information in the Host class
581
581
  """
582
582
 
583
+ _KNOWN_CLASS_ROOTS = [
584
+ "SN",
585
+ "SLSN",
586
+ "TDE",
587
+ "GRB",
588
+ "LGRB",
589
+ "SGRB",
590
+ "AGN",
591
+ "FRB",
592
+ "QSO",
593
+ "ANT",
594
+ ]
595
+ """
596
+ Classification root names
597
+ """
598
+
583
599
  DATADIR = os.path.join(BASEDIR, "data", "base")
584
600
  """
585
601
  Deprecated database directory that IS NOT always constant anymore
@@ -597,7 +613,7 @@ schema = {
597
613
  "name": {"default_name": None, "alias": []},
598
614
  "coordinate": [],
599
615
  "distance": [],
600
- "classification": [],
616
+ "classification": {"value": []},
601
617
  "reference_alias": [],
602
618
  "date_reference": [],
603
619
  "photometry": [],
@@ -808,3 +824,13 @@ subschema = {
808
824
  """
809
825
  A useful variable to describe all of the subschemas that are available and can be used
810
826
  """
827
+
828
+
829
+ class _DuplicateFilter(object):
830
+ def __init__(self):
831
+ self.msgs = set()
832
+
833
+ def filter(self, record):
834
+ rv = record.msg not in self.msgs
835
+ self.msgs.add(record.msg)
836
+ return rv
@@ -1,18 +0,0 @@
1
- astro_otter-0.3.0.dist-info/licenses/LICENSE,sha256=s9IPE8A3CAMEaZpDhj4eaorpmfLYGB0mIGphq301PUY,1067
2
- otter/__init__.py,sha256=pvX-TN7nLVmvKpkDi89Zxe-jMfHNiVMD3zsd_bPEK9Y,535
3
- otter/_version.py,sha256=XDCAY_avI84JEWPKHKBF9kXkCGBGi2CjNGEItozNsk4,76
4
- otter/exceptions.py,sha256=3lQF4AXVTfs9VRsVePQoIrXnramsPZbUL5crvf1s9Ng,1702
5
- otter/schema.py,sha256=eOxlrtp9TTbiENy38ueE2HcYlI-M56g0Ohg7zQeTxjk,10631
6
- otter/util.py,sha256=xKsNkkxGajML1rZZnR9d5rJV1_z1KeAlIdAs8t0814M,22440
7
- otter/io/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- otter/io/data_finder.py,sha256=v3jZCOhvysHUQG1FOwHdeJ-psTT-MsdO_GZalBpMBGc,38218
9
- otter/io/host.py,sha256=xv_SznZuvMoMVsZLqlcmlOyaqKCMZqlTQ_gkN4VBSTw,7139
10
- otter/io/otter.py,sha256=HiYQ5hgAvPYSHmU6taF6NumpElYzFgjjA0oIgLEo9_4,47471
11
- otter/io/transient.py,sha256=b_gOQxjk8z3VylZ9ZrAv-Dhqp_d2iXPICL65Hq3kYGg,41846
12
- otter/plotter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
- otter/plotter/otter_plotter.py,sha256=OQhuLgnMSzgtAjJF8SYBuQOyYcu7Pr0uia5P0G_7z5Q,2144
14
- otter/plotter/plotter.py,sha256=z09NwQVJS2tuwH3sv95DZv8xogjvf-7Gvj6iWCEx-gQ,9635
15
- astro_otter-0.3.0.dist-info/METADATA,sha256=7qokfMceKHN1Ai0FNKqY95LYiil9jf3BzAX_7_A5KVI,7127
16
- astro_otter-0.3.0.dist-info/WHEEL,sha256=ooBFpIzZCPdw3uqIQsOo4qqbA4ZRPxHnOH7peeONza0,91
17
- astro_otter-0.3.0.dist-info/top_level.txt,sha256=Wth72sCwBRUk3KZGknSKvLQDMFuJk6qiaAavMDOdG5k,6
18
- astro_otter-0.3.0.dist-info/RECORD,,