astro-otter 0.0.2__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of astro-otter might be problematic. Click here for more details.

otter/io/otter.py CHANGED
@@ -7,15 +7,22 @@ import os
7
7
  import json
8
8
  import glob
9
9
  from warnings import warn
10
+ from copy import deepcopy
11
+
12
+ from pyArango.connection import Connection
13
+ from pyArango.database import Database
14
+ from pyArango.document import Document
10
15
 
11
16
  import pandas as pd
17
+ import numpy as np
12
18
 
13
19
  from astropy.coordinates import SkyCoord, search_around_sky
14
20
  from astropy.table import Table
15
21
  from astropy import units as u
16
22
 
17
23
  from .transient import Transient
18
- from ..exceptions import FailedQueryError, OtterLimitationError
24
+ from ..exceptions import FailedQueryError, OtterLimitationError, TransientMergeError
25
+ from ..util import bibcode_to_hrn, freq_to_obstype, freq_to_band
19
26
 
20
27
  import warnings
21
28
 
@@ -24,7 +31,15 @@ warnings.simplefilter("once", UserWarning)
24
31
  warnings.simplefilter("once", u.UnitsWarning)
25
32
 
26
33
 
27
- class Otter(object):
34
+ def _np_encoder(object):
35
+ """
36
+ Numpy data type encoder for json.dump
37
+ """
38
+ if isinstance(object, (np.generic, np.ndarray)):
39
+ return object.item()
40
+
41
+
42
+ class Otter(Database):
28
43
  """
29
44
  This is the primary class for users to access the otter backend database
30
45
 
@@ -36,7 +51,16 @@ class Otter(object):
36
51
 
37
52
  """
38
53
 
39
- def __init__(self, datadir: str = None, debug: bool = False) -> None:
54
+ def __init__(
55
+ self,
56
+ url: str = "http://127.0.0.1:8529",
57
+ username: str = "user-guest",
58
+ password: str = "",
59
+ gen_summary: bool = False,
60
+ datadir: str = None,
61
+ debug: bool = False,
62
+ **kwargs,
63
+ ) -> None:
40
64
  # save inputs
41
65
  if datadir is None:
42
66
  self.CWD = os.path.dirname(os.path.abspath("__FILE__"))
@@ -47,6 +71,9 @@ class Otter(object):
47
71
 
48
72
  self.debug = debug
49
73
 
74
+ if gen_summary:
75
+ self.generate_summary_table(save=True)
76
+
50
77
  # make sure the data directory exists
51
78
  if not os.path.exists(self.DATADIR):
52
79
  try:
@@ -58,6 +85,9 @@ class Otter(object):
58
85
  )
59
86
  pass
60
87
 
88
+ connection = Connection(username=username, password=password, arangoURL=url)
89
+ super().__init__(connection, "otter", **kwargs)
90
+
61
91
  def get_meta(self, **kwargs) -> Table:
62
92
  """
63
93
  Get the metadata of the objects matching the arguments
@@ -74,6 +104,7 @@ class Otter(object):
74
104
  "date_reference",
75
105
  "distance",
76
106
  "classification",
107
+ "reference_alias",
77
108
  ]
78
109
 
79
110
  return [t[metakeys] for t in self.query(**kwargs)]
@@ -192,11 +223,16 @@ class Otter(object):
192
223
  "converted_date_unit",
193
224
  "converted_wave_unit",
194
225
  "converted_freq_unit",
226
+ "filter_name",
195
227
  "obs_type",
196
228
  "upperlimit",
197
229
  "reference",
230
+ "human_readable_refs",
198
231
  ]
199
232
 
233
+ if "upperlimit" not in fullphot:
234
+ fullphot["upperlimit"] = False
235
+
200
236
  if not keep_raw:
201
237
  if "telescope" in fullphot:
202
238
  fullphot = fullphot[keys_to_keep + ["telescope"]]
@@ -225,6 +261,186 @@ class Otter(object):
225
261
  return to_ret
226
262
 
227
263
  def query(
264
+ self,
265
+ names: list[str] = None,
266
+ coords: SkyCoord = None,
267
+ radius: float = 5,
268
+ minz: float = None,
269
+ maxz: float = None,
270
+ refs: list[str] = None,
271
+ hasphot: bool = False,
272
+ hasspec: bool = False,
273
+ classification: str = None,
274
+ class_confidence_threshold: float = 0,
275
+ query_private=False,
276
+ **kwargs,
277
+ ) -> dict:
278
+ """
279
+ Searches the arango database table and reads relevant JSON files
280
+
281
+ WARNING! This does not do any conversions for you!
282
+ This is how it differs from the `get_meta` method. Users should prefer to use
283
+ `get_meta`, `getPhot`, and `getSpec` independently because it is a better
284
+ workflow and can return the data in an astropy table with everything in the
285
+ same units.
286
+
287
+ Args:
288
+ names (list[str]): A list of names to get the metadata for
289
+ coords (SkyCoord): An astropy SkyCoord object with coordinates to match to
290
+ radius (float): The radius in arcseconds for a cone search, default is 0.05"
291
+ minz (float): The minimum redshift to search for
292
+ maxz (float): The maximum redshift to search for
293
+ refs (list[str]): A list of ads bibcodes to match to. Will only return
294
+ metadata for transients that have this as a reference.
295
+ hasphot (bool): if True, only returns transients which have photometry.
296
+ hasspec (bool): if True, only return transients that have spectra.
297
+ classification (str): A classification string to search for
298
+ class_confidence_threshold (float): classification confidence cutoff for
299
+ query, between 0 and 1. Default is 0.
300
+
301
+ Return:
302
+ Get all of the raw (unconverted!) data for objects that match the criteria.
303
+ """
304
+ # write some AQL filters based on the inputs
305
+ query_filters = ""
306
+
307
+ if hasphot is True:
308
+ query_filters += "FILTER 'photometry' IN ATTRIBUTES(transient)\n"
309
+
310
+ if hasspec is True:
311
+ query_filters += "FILTER 'spectra' IN ATTRIBUTES(transient)\n"
312
+
313
+ if classification is not None:
314
+ query_filters += f"""
315
+ FOR subdoc IN transient.classification
316
+ FILTER subdoc.confidence > TO_NUMBER({class_confidence_threshold})
317
+ FILTER subdoc.object_class LIKE '%{classification}%'
318
+ """
319
+
320
+ if minz is not None:
321
+ sfilt = f"""
322
+ FILTER 'redshift' IN transient.distance[*].distance_type
323
+ LET redshifts1 = (
324
+ FOR val IN transient.distance
325
+ FILTER val.distance_type == 'redshift'
326
+ FILTER TO_NUMBER(val.value) >= {minz}
327
+ RETURN val
328
+ )
329
+ FILTER COUNT(redshifts1) > 0
330
+ """
331
+ query_filters += sfilt
332
+ if maxz is not None:
333
+ sfilt = f"""
334
+ FILTER 'redshift' IN transient.distance[*].distance_type
335
+ LET redshifts2 = (
336
+ FOR val IN transient.distance
337
+ FILTER val.distance_type == 'redshift'
338
+ FILTER TO_NUMBER(val.value) <= {maxz}
339
+ RETURN val
340
+ )
341
+ FILTER COUNT(redshifts2) > 0
342
+ """
343
+ query_filters += sfilt
344
+
345
+ if names is not None:
346
+ if isinstance(names, str):
347
+ query_filters += f"""
348
+ FILTER UPPER(transient.name) LIKE UPPER('%{names}%')\n
349
+ """
350
+ elif isinstance(names, list):
351
+ namefilt = f"""
352
+ FOR name IN {names}
353
+ FILTER name IN transient.name.alias[*].value\n
354
+ """
355
+ query_filters += namefilt
356
+ else:
357
+ raise Exception("Names must be either a string or list")
358
+
359
+ if refs is not None:
360
+ if isinstance(refs, str): # this is just a single bibcode
361
+ query_filters += f"FILTER {refs} IN transient.reference_alias[*].name"
362
+ elif isinstance(refs, list):
363
+ query_filters += f"""
364
+ FOR ref IN {refs}
365
+ FILTER ref IN transient.reference_alias[*].name
366
+ """
367
+ else:
368
+ raise Exception("reference list must be either a string or a list")
369
+
370
+ # define the query
371
+ query = f"""
372
+ FOR transient IN transients
373
+ {query_filters}
374
+ RETURN transient
375
+ """
376
+
377
+ # set batch size to 100 million (for now at least)
378
+ result = self.AQLQuery(query, rawResults=True, batchSize=100_000_000)
379
+
380
+ # now that we have the query results do the RA and Dec queries if they exist
381
+ if coords is not None:
382
+ # get the catalog RAs and Decs to compare against
383
+ query_coords = coords
384
+ good_tdes = []
385
+
386
+ for tde in result:
387
+ for coordinfo in tde["coordinate"]:
388
+ if "ra" in coordinfo and "dec" in coordinfo:
389
+ coord = SkyCoord(
390
+ coordinfo["ra"],
391
+ coordinfo["dec"],
392
+ unit=(coordinfo["ra_units"], coordinfo["dec_units"]),
393
+ )
394
+ elif "l" in coordinfo and "b" in coordinfo:
395
+ # this is galactic
396
+ coord = SkyCoord(
397
+ coordinfo["l"],
398
+ coordinfo["b"],
399
+ unit=(coordinfo["l_units"], coordinfo["b_units"]),
400
+ frame="galactic",
401
+ )
402
+ else:
403
+ raise ValueError(
404
+ "Either needs to have ra and dec or l and b as keys!"
405
+ )
406
+ if query_coords.separation(coord) < radius * u.arcsec:
407
+ good_tdes.append(tde)
408
+ break # we've confirmed this tde is in the cone!
409
+
410
+ arango_query_results = [Transient(t) for t in good_tdes]
411
+
412
+ else:
413
+ arango_query_results = [Transient(res) for res in result.result]
414
+
415
+ if not query_private:
416
+ return arango_query_results
417
+
418
+ private_results = self._query_datadir(
419
+ names=names,
420
+ coords=coords,
421
+ radius=radius,
422
+ minz=minz,
423
+ maxz=maxz,
424
+ refs=refs,
425
+ hasphot=hasphot,
426
+ hasspec=hasspec,
427
+ )
428
+
429
+ partially_merged = deepcopy(arango_query_results)
430
+ new_transients = []
431
+ for jj, t_private in enumerate(private_results):
432
+ for ii, t_public in enumerate(arango_query_results):
433
+ try:
434
+ partially_merged[ii] += t_private
435
+ break
436
+ except TransientMergeError:
437
+ continue
438
+ else:
439
+ new_transients.append(t_private)
440
+
441
+ return partially_merged + new_transients
442
+
443
+ def _query_datadir(
228
444
  self,
229
445
  names: list[str] = None,
230
446
  coords: SkyCoord = None,
@@ -237,6 +453,9 @@ class Otter(object):
237
453
  raw: bool = False,
238
454
  ) -> dict:
239
455
  """
456
+ This is a private method and is here just for the pipeline!!!
457
+ This should not be used by end users!
458
+
240
459
  Searches the summary.csv table and reads relevant JSON files
241
460
 
242
461
  WARNING! This does not do any conversions for you!
@@ -268,13 +487,7 @@ class Otter(object):
268
487
  # read in the metdata from all json files
269
488
  # this could be dangerous later on!!
270
489
  allfiles = glob.glob(os.path.join(self.DATADIR, "*.json"))
271
- jsondata = []
272
-
273
- # read the data from all the json files and convert to Transients
274
- for jsonfile in allfiles:
275
- with open(jsonfile, "r") as j:
276
- t = Transient(json.load(j))
277
- jsondata.append(t.get_meta())
490
+ jsondata = [self.load_file(jsonfile) for jsonfile in allfiles]
278
491
 
279
492
  return jsondata
280
493
 
@@ -285,6 +498,8 @@ class Otter(object):
285
498
 
286
499
  # then read and query the summary table
287
500
  summary = pd.read_csv(summary_table)
501
+ if len(summary) == 0:
502
+ return []
288
503
 
289
504
  # coordinate search first
290
505
  if coords is not None:
@@ -356,6 +571,85 @@ class Otter(object):
356
571
 
357
572
  return outdata
358
573
 
574
+ def upload(self, json_data, collection="vetting", testing=False) -> Document:
575
+ """
576
+ Upload json_data to collection
577
+
578
+ Args:
579
+ json_data [dict] : A dictionary of the json data to upload to Otter
580
+ collection [str] : The collection to upload to
581
+ testing [bool] : Default is False
582
+
583
+ Returns:
584
+ The pyArango document that was uplaoded
585
+ """
586
+
587
+ # now add the document
588
+ doc = self[collection].createDocument(json_data)
589
+ if not testing:
590
+ doc.save()
591
+ return doc
592
+
593
+ def upload_private(self, collection="vetting", testing=False) -> None:
594
+ """
595
+ Upload the local/private data stored in self.DATADIR to the vetting collection
596
+ (like a SQL table) in the central arangodb document database.
597
+
598
+ WARNING! This will make any data in self.DATADIR public! Please double check
599
+ before using this method!
600
+
601
+ Args:
602
+ collection (str) : The collection to add the documents to. Default is
603
+ "vetting" where the documents will then be vetted by
604
+ our team.
605
+ testing (bool) : True if don't actually upload, False is default
606
+
607
+ Returns:
608
+ If testing is false (the default), returns the arangodb upload result. If
609
+ testing is true, returns the list of merged dictionaries that would get
610
+ uploaded.
611
+
612
+ Raises:
613
+ OtterLimitationError: If some objects in OTTER are within 5" we can't figure
614
+ out which ones to merge with which ones.
615
+
616
+ """
617
+
618
+ if not self.hasCollection(collection):
619
+ raise ValueError(f"{collection} not in {self}!")
620
+
621
+ local_data = self._query_datadir()
622
+ docs = []
623
+ for t in local_data:
624
+ res = self.query(coords=t.get_skycoord())
625
+
626
+ if len(res) > 1:
627
+ raise OtterLimitationError("Some objects in Otter are too close!")
628
+
629
+ elif len(res) == 1:
630
+ # this object exists in otter already, let's grab the transient data and
631
+ # merge the files
632
+ merged = t + res[0]
633
+
634
+ # copy over the special arangodb keys
635
+ merged["_key"] = res[0]["_key"]
636
+ merged["_id"] = res[0]["_id"]
637
+
638
+ # we also have to delete the document from the OTTER database
639
+ doc = self.fetchDocument(merged["_id"])
640
+ if not testing:
641
+ doc.delete()
642
+ else:
643
+ print(f"Would delete\n{doc}")
644
+
645
+ else:
646
+ # this means the object doesn't exist in otter already
647
+ merged = t
648
+
649
+ docs.append(self.upload(merged, collection=collection, testing=testing))
650
+
651
+ return docs
652
+
359
653
  def save(self, schema: list[dict], testing=False) -> None:
360
654
  """
361
655
  Upload all the data in the given list of schemas.
@@ -380,7 +674,7 @@ class Otter(object):
380
674
  print(transient["name/default_name"])
381
675
 
382
676
  coord = transient.get_skycoord()
383
- res = self.cone_search(coords=coord)
677
+ res = self._query_datadir(coords=coord)
384
678
 
385
679
  if len(res) == 0:
386
680
  # This is a new object to upload
@@ -433,7 +727,7 @@ class Otter(object):
433
727
  if isinstance(schema, Transient):
434
728
  schema = dict(schema)
435
729
 
436
- out = json.dumps(schema, indent=4)
730
+ out = json.dumps(schema, indent=4, default=_np_encoder)
437
731
  # out = '[' + out
438
732
  # out += ']'
439
733
 
@@ -473,10 +767,14 @@ class Otter(object):
473
767
  }
474
768
 
475
769
  if "date_reference" in t:
476
- row["discovery_date"] = t.get_discovery_date()
770
+ date_types = {d["date_type"] for d in t["date_reference"]}
771
+ if "discovery" in date_types:
772
+ row["discovery_date"] = t.get_discovery_date()
477
773
 
478
774
  if "distance" in t:
479
- row["z"] = t.get_redshift()
775
+ dist_types = {d["distance_type"] for d in t["distance"]}
776
+ if "redshift" in dist_types:
777
+ row["z"] = t.get_redshift()
480
778
 
481
779
  row["hasPhot"] = "photometry" in t
482
780
  row["hasSpec"] = "spectra" in t
@@ -490,3 +788,457 @@ class Otter(object):
490
788
  alljsons.to_csv(os.path.join(self.DATADIR, "summary.csv"))
491
789
 
492
790
  return alljsons
791
+
792
+ @staticmethod
793
+ def from_csvs(
794
+ metafile: str,
795
+ photfile: str = None,
796
+ local_outpath: str = "private_otter_data",
797
+ db: Otter = None,
798
+ ) -> Otter:
799
+ """
800
+ Converts private metadata and photometry csvs to an Otter object stored
801
+ *locally* so you don't need to worry about accidentally uploading them to the
802
+ real Otter database.
803
+
804
+ Args:
805
+ metafile (str) : String filepath or string io csv object of the csv metadata
806
+ photfile (str) : String filepath or string io csv object of the csv
807
+ photometry
808
+ local_outpath (str) : The outpath to write the OTTER json files to
809
+ db (Otter) : An Otter instance to add the local_outpath to for querying.
810
+ This keyword can be useful if you have special permission for
811
+ the otter database and want to upload your private data
812
+
813
+ Returns:
814
+ An Otter object where the json files are stored locally
815
+ """
816
+ # read in the metadata and photometry file
817
+ meta = pd.read_csv(metafile)
818
+ phot = None
819
+ if photfile is not None:
820
+ phot = pd.read_csv(photfile)
821
+
822
+ # we need to generate columns of wave_eff and freq_eff
823
+ wave_eff = []
824
+ freq_eff = []
825
+ wave_eff_unit = u.nm
826
+ freq_eff_unit = u.GHz
827
+ for val, unit in zip(phot.filter_eff, phot.filter_eff_units):
828
+ wave_eff.append(
829
+ (val * u.Unit(unit))
830
+ .to(wave_eff_unit, equivalencies=u.spectral())
831
+ .value
832
+ )
833
+ freq_eff.append(
834
+ (val * u.Unit(unit))
835
+ .to(freq_eff_unit, equivalencies=u.spectral())
836
+ .value
837
+ )
838
+
839
+ phot["band_eff_wave"] = wave_eff
840
+ phot["band_eff_wave_unit"] = str(wave_eff_unit)
841
+ phot["band_eff_freq"] = freq_eff
842
+ phot["band_eff_freq_unit"] = str(freq_eff_unit)
843
+
844
+ if not os.path.exists(local_outpath):
845
+ os.mkdir(local_outpath)
846
+
847
+ # drop duplicated names in meta and keep the first
848
+ meta = meta.drop_duplicates(subset="name", keep="first")
849
+
850
+ # merge the meta and phot data
851
+ if phot is not None:
852
+ data = pd.merge(phot, meta, on="name", how="inner")
853
+ else:
854
+ data = meta
855
+
856
+ # perform some data checks
857
+ assert (
858
+ len(data[pd.isna(data.ra)].name.unique()) == 0
859
+ ), "Missing some RA and Decs, please check the input files!"
860
+ if phot is not None:
861
+ for name in meta.name:
862
+ assert len(data[data.name == name]) == len(
863
+ phot[phot.name == name]
864
+ ), f"failed on {name}"
865
+
866
+ # actually do the data conversion to OTTER
867
+ all_jsons = []
868
+ for name, tde in data.groupby("name"):
869
+ json = {}
870
+ tde = tde.reset_index()
871
+
872
+ # name first
873
+ json["name"] = dict(
874
+ default_name=name,
875
+ alias=[dict(value=name, reference=[tde.coord_bibcode[0]])],
876
+ )
877
+
878
+ # coordinates
879
+ json["coordinate"] = [
880
+ dict(
881
+ ra=tde.ra[0],
882
+ dec=tde.dec[0],
883
+ ra_units=tde.ra_unit[0],
884
+ dec_units=tde.dec_unit[0],
885
+ reference=[tde.coord_bibcode[0]],
886
+ coordinate_type="equitorial",
887
+ )
888
+ ]
889
+
890
+ ### distance info
891
+ json["distance"] = []
892
+
893
+ # redshift
894
+ if "redshift" in tde and not np.any(pd.isna(tde["redshift"])):
895
+ json["distance"].append(
896
+ dict(
897
+ value=tde.redshift[0],
898
+ reference=[tde.redshift_bibcode[0]],
899
+ computed=False,
900
+ distance_type="redshift",
901
+ )
902
+ )
903
+
904
+ # luminosity distance
905
+ if "luminosity_distance" in tde and not np.any(
906
+ pd.isna(tde["luminosity_distance"])
907
+ ):
908
+ json["distance"].append(
909
+ value=tde.luminosity_distance[0],
910
+ reference=[tde.luminosity_distance_bibcode[0]],
911
+ unit=tde.luminosity_distance_unit[0],
912
+ computed=False,
913
+ distance_type="luminosity",
914
+ )
915
+
916
+ # comoving distance
917
+ if "comoving_distance" in tde and not np.any(
918
+ pd.isna(tde["comoving_distance"])
919
+ ):
920
+ json["distance"].append(
921
+ value=tde.comoving_distance[0],
922
+ reference=[tde.comoving_distance_bibcode[0]],
923
+ unit=tde.comoving_distance_unit[0],
924
+ computed=False,
925
+ distance_type="comoving",
926
+ )
927
+
928
+ # remove the distance list if it is empty still
929
+ if len(json["distance"]) == 0:
930
+ del json["distance"]
931
+
932
+ ### Classification information that is in the csvs
933
+ # classification
934
+ if "classification" in tde:
935
+ json["classification"] = [
936
+ dict(
937
+ object_class=tde.classification[0],
938
+ confidence=1, # we know this is at least an tde
939
+ reference=[tde.classification_bibcode[0]],
940
+ )
941
+ ]
942
+
943
+ # discovery date
944
+ # print(tde)
945
+ if "discovery_date" in tde and not np.any(pd.isna(tde.discovery_date)):
946
+ json["date_reference"] = [
947
+ dict(
948
+ value=str(tde.discovery_date.tolist()[0]).strip(),
949
+ date_format=tde.discovery_date_format.tolist()[0].lower(),
950
+ reference=tde.discovery_date_bibcode.tolist(),
951
+ computed=False,
952
+ date_type="discovery",
953
+ )
954
+ ]
955
+
956
+ # host information
957
+ if "host_ref" in tde and not np.any(pd.isna(tde.host_ref)):
958
+ host_info = dict(
959
+ host_name=tde.host_name.tolist()[0].strip(),
960
+ host_ra=tde.host_ra.tolist()[0],
961
+ host_dec=tde.host_dec.tolist()[0],
962
+ host_ra_units=tde.host_ra_unit.tolist()[0],
963
+ host_dec_units=tde.host_dec_unit.tolist()[0],
964
+ reference=[tde.host_ref.tolist()[0]],
965
+ )
966
+
967
+ if not pd.isna(tde.host_redshift.tolist()[0]):
968
+ host_info["host_z"] = tde.host_redshift.tolist()[0]
969
+
970
+ if "host" in json:
971
+ json["host"].append(host_info)
972
+ else:
973
+ json["host"] = [host_info]
974
+
975
+ # comments
976
+ if "comment" in tde and not np.any(pd.isna(tde.comment)):
977
+ if "schema_version" not in json:
978
+ json["schema_version"] = {}
979
+ json["schema_version"]["comment"] = tde.comment.tolist()[0]
980
+
981
+ # skip the photometry code if there is no photometry file
982
+ # if there is a photometry file then we want to convert it below
983
+ phot_sources = []
984
+ if phot is not None:
985
+ tde["obs_type"] = [
986
+ freq_to_obstype(vv * u.Unit(uu))
987
+ for vv, uu in zip(
988
+ tde.band_eff_freq.values,
989
+ tde.band_eff_freq_unit.values,
990
+ )
991
+ ]
992
+
993
+ unique_filter_keys = []
994
+ index_for_match = []
995
+ json["photometry"] = []
996
+
997
+ if "telescope" in tde:
998
+ to_grpby = ["bibcode", "telescope", "obs_type"]
999
+ else:
1000
+ to_grpby = ["bibcode", "obs_type"]
1001
+
1002
+ for grp_keys, p in tde.groupby(to_grpby, dropna=False):
1003
+ if len(grp_keys) == 3:
1004
+ src, tele, obstype = grp_keys
1005
+ else:
1006
+ src, obstype = grp_keys
1007
+ tele = None
1008
+
1009
+ if src not in phot_sources:
1010
+ phot_sources.append(src)
1011
+
1012
+ if len(np.unique(p.flux_unit)) == 1:
1013
+ raw_units = p.flux_unit.tolist()[0]
1014
+ else:
1015
+ raw_units = p.flux_unit.tolist()
1016
+
1017
+ # add a column to phot with the unique filter key
1018
+ if obstype == "radio":
1019
+ filter_uq_key = (
1020
+ p.band_eff_freq.astype(str)
1021
+ + p.band_eff_freq_unit.astype(str)
1022
+ ).tolist()
1023
+
1024
+ elif obstype in ("uvoir", "xray"):
1025
+ filter_uq_key = p["filter"].astype(str).tolist()
1026
+
1027
+ else:
1028
+ raise ValueError("not prepared for this obstype!")
1029
+
1030
+ unique_filter_keys += filter_uq_key
1031
+ index_for_match += p.index.tolist()
1032
+
1033
+ if "upperlimit" not in p:
1034
+ p["upperlimit"] = False
1035
+
1036
+ json_phot = dict(
1037
+ reference=src,
1038
+ raw=p.flux.astype(float).tolist(),
1039
+ raw_err=p.flux_err.astype(float).tolist(),
1040
+ raw_units=raw_units,
1041
+ date=p.date.tolist(),
1042
+ date_format=p.date_format.tolist(),
1043
+ upperlimit=p.upperlimit.tolist(),
1044
+ filter_key=filter_uq_key,
1045
+ obs_type=obstype,
1046
+ )
1047
+
1048
+ if not pd.isna(tele):
1049
+ json_phot["telescope"] = tele
1050
+
1051
+ if pd.isna(tele) and obstype == "xray":
1052
+ raise ValueError("The telescope is required for X-ray data!")
1053
+
1054
+ # check the minimum and maximum filter values
1055
+ if obstype == "xray" and (
1056
+ "filter_min" not in p or "filter_max" not in p
1057
+ ):
1058
+ raise ValueError(
1059
+ "Minimum and maximum filters required for X-ray data!"
1060
+ )
1061
+
1062
+ # check optional keys
1063
+ optional_keys = [
1064
+ "date_err",
1065
+ "sigma",
1066
+ "instrument",
1067
+ "phot_type",
1068
+ "exptime",
1069
+ "aperature",
1070
+ "observer",
1071
+ "reducer",
1072
+ "pipeline",
1073
+ ]
1074
+ for k in optional_keys:
1075
+ if k in p and not np.all(pd.isna(p[k])):
1076
+ # fill the nan values
1077
+ # this is to match with the official json format
1078
+ # and works with arangodb document structure
1079
+ p[k].fillna("null", inplace=True)
1080
+
1081
+ json_phot[k] = p[k].tolist()
1082
+
1083
+ # handle more detailed uncertainty information
1084
+ raw_err_detail = {}
1085
+ for key in ["statistical_err", "systematic_err", "iss_err"]:
1086
+ if key in p and not np.all(pd.isna(p[key])):
1087
+ k = key.split("_")[0]
1088
+
1089
+ # fill the nan values
1090
+ # this is to match with the official json format
1091
+ # and works with arangodb document structure
1092
+ p[key].fillna(0, inplace=True)
1093
+
1094
+ raw_err_detail[k] = p[key].tolist()
1095
+
1096
+ if len(raw_err_detail) > 0:
1097
+ json_phot["raw_err_detail"] = raw_err_detail
1098
+
1099
+ # check the possible corrections
1100
+ corrs = ["val_k", "val_s", "val_host", "val_av", "val_hostav"]
1101
+ for c in corrs:
1102
+ bool_v_key = c.replace("val", "corr")
1103
+ json_phot[c] = False
1104
+
1105
+ if c in p:
1106
+ # fill the nan values
1107
+ # this is to match with the official json format
1108
+ # and works with arangodb document structure
1109
+ p[c].fillna("null", inplace=True)
1110
+
1111
+ json_phot[c] = p[c].tolist()
1112
+ json_phot[bool_v_key] = [v != "null" for v in json_phot[c]]
1113
+
1114
+ json["photometry"].append(json_phot)
1115
+
1116
+ tde["filter_uq_key"] = pd.Series(
1117
+ unique_filter_keys, index=index_for_match
1118
+ )
1119
+
1120
+ # filter alias
1121
+ # radio filters first
1122
+ filter_keys1 = ["filter_uq_key", "band_eff_wave", "band_eff_wave_unit"]
1123
+ if "filter_min" in tde:
1124
+ filter_keys1.append("filter_min")
1125
+ if "filter_max" in tde:
1126
+ filter_keys1.append("filter_max")
1127
+
1128
+ filter_map = (
1129
+ tde[filter_keys1].drop_duplicates().set_index("filter_uq_key")
1130
+ ) # .to_dict(orient='index')
1131
+ try:
1132
+ filter_map_radio = filter_map.to_dict(orient="index")
1133
+ except Exception:
1134
+ print(filter_map)
1135
+ print(name)
1136
+ raise Exception
1137
+
1138
+ json["filter_alias"] = []
1139
+ for filt, val in filter_map_radio.items():
1140
+ obs_type = freq_to_obstype(
1141
+ float(val["band_eff_wave"]) * u.Unit(val["band_eff_wave_unit"])
1142
+ )
1143
+ if obs_type == "radio":
1144
+ filter_name = freq_to_band(
1145
+ (
1146
+ float(val["band_eff_wave"])
1147
+ * u.Unit(val["band_eff_wave_unit"])
1148
+ ).to(u.GHz, equivalencies=u.spectral())
1149
+ )
1150
+ else:
1151
+ filter_name = filt
1152
+
1153
+ filter_alias_dict = dict(
1154
+ filter_key=filt,
1155
+ filter_name=filter_name,
1156
+ wave_eff=float(val["band_eff_wave"]),
1157
+ wave_units=val["band_eff_wave_unit"],
1158
+ )
1159
+
1160
+ if "filter_min" in val:
1161
+ filter_alias_dict["wave_min"] = (
1162
+ val["filter_min"] * u.Unit(phot.filter_eff_units)
1163
+ ).to(
1164
+ u.Unit(
1165
+ val["band_eff_wave_unit"], equivalencies=u.spectral()
1166
+ )
1167
+ )
1168
+
1169
+ if "filter_max" in val:
1170
+ filter_alias_dict["wave_max"] = (
1171
+ val["filter_max"] * u.Unit(phot.filter_eff_units)
1172
+ ).to(
1173
+ u.Unit(
1174
+ val["band_eff_wave_unit"], equivalencies=u.spectral()
1175
+ )
1176
+ )
1177
+
1178
+ json["filter_alias"].append(filter_alias_dict)
1179
+
1180
+ # reference alias
1181
+ # gather all the bibcodes
1182
+ all_bibcodes = [tde.coord_bibcode[0]] + phot_sources
1183
+ if (
1184
+ "redshift_bibcode" in tde
1185
+ and tde.redshift_bibcode[0] not in all_bibcodes
1186
+ and not np.any(pd.isna(tde.redshift))
1187
+ ):
1188
+ all_bibcodes.append(tde.redshift_bibcode[0])
1189
+
1190
+ if (
1191
+ "luminosity_distance_bibcode" in tde
1192
+ and tde.luminosity_distance_bibcode[0] not in all_bibcodes
1193
+ and not np.any(pd.isna(tde.luminosity_distance))
1194
+ ):
1195
+ all_bibcodes.append(tde.luminosity_distance_bibcode[0])
1196
+
1197
+ if (
1198
+ "comoving_distance_bibcode" in tde
1199
+ and tde.comoving_distance_bibcode[0] not in all_bibcodes
1200
+ and not np.any(pd.isna(tde.comoving_distance))
1201
+ ):
1202
+ all_bibcodes.append(tde.comoving_distance_bibcode[0])
1203
+
1204
+ if (
1205
+ "discovery_date_bibcode" in tde
1206
+ and tde.discovery_date_bibcode[0] not in all_bibcodes
1207
+ and not np.any(pd.isna(tde.discovery_date))
1208
+ ):
1209
+ all_bibcodes.append(tde.discovery_date_bibcode[0])
1210
+
1211
+ if (
1212
+ "classification_bibcode" in tde
1213
+ and tde.classification_bibcode[0] not in all_bibcodes
1214
+ and not np.any(pd.isna(tde.classification))
1215
+ ):
1216
+ all_bibcodes.append(tde.classification_bibcode[0])
1217
+
1218
+ if (
1219
+ "host_bibcode" in tde
1220
+ and tde.host_bibcode not in all_bibcodes
1221
+ and not np.any(pd.isna(tde.host_bibcode))
1222
+ ):
1223
+ all_bibcodes.append(tde.host_bibcode[0])
1224
+
1225
+ # find the hrn's for all of these bibcodes
1226
+ uq_bibcodes, all_hrns = bibcode_to_hrn(all_bibcodes)
1227
+
1228
+ # package these into the reference alias
1229
+ json["reference_alias"] = [
1230
+ dict(name=name, human_readable_name=hrn)
1231
+ for name, hrn in zip(uq_bibcodes, all_hrns)
1232
+ ]
1233
+
1234
+ all_jsons.append(Transient(json))
1235
+
1236
+ if db is None:
1237
+ db = Otter(datadir=local_outpath)
1238
+ else:
1239
+ db.datadir = local_outpath
1240
+
1241
+ # always save this document as a new one
1242
+ db.save(all_jsons)
1243
+ db.generate_summary_table(save=True)
1244
+ return db