astro-otter 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
otter/io/otter.py ADDED
@@ -0,0 +1,1594 @@
1
+ """
2
+ This is the primary class for user interaction with the catalog
3
+ """
4
+
5
+ from __future__ import annotations
6
+ from typing import Optional
7
+ import os
8
+ import json
9
+ import glob
10
+ from copy import deepcopy
11
+ import logging
12
+
13
+ from pyArango.connection import Connection
14
+ from pyArango.database import Database
15
+ from pyArango.document import Document
16
+
17
+ import pandas as pd
18
+ import numpy as np
19
+
20
+ from astropy.coordinates import SkyCoord, search_around_sky
21
+ from astropy.table import Table
22
+ from astropy import units as u
23
+
24
+ from .transient import Transient
25
+ from ..exceptions import FailedQueryError, OtterLimitationError, TransientMergeError
26
+ from ..util import bibcode_to_hrn, freq_to_obstype, freq_to_band, _DuplicateFilter
27
+
28
+ import warnings
29
+
30
+ warnings.simplefilter("once", RuntimeWarning)
31
+ warnings.simplefilter("once", UserWarning)
32
+ warnings.simplefilter("once", u.UnitsWarning)
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+
37
+ def _np_encoder(object):
38
+ """
39
+ Numpy data type encoder for json.dump
40
+ """
41
+ if isinstance(object, (np.generic, np.ndarray)):
42
+ return object.item()
43
+
44
+
45
+ class Otter(Database):
46
+ """
47
+ This is the primary class for users to access the otter backend database
48
+
49
+ Args:
50
+ url (str): The url where the database api endpoints are located
51
+ username (str): The username to log into the database with
52
+ password (str): The password to log into the database with
53
+ gen_summary (bool): Generate a local summary table, this should generally be
54
+ left as False!
55
+ datadir (str): Path to the data directory with the otter data. If not provided
56
+ will default to a ".otter" directory in the CWD where you call
57
+ this class from.
58
+ debug (bool): If we should just debug and not do anything serious.
59
+
60
+ Returns:
61
+ An Otter object that is connected to the otter database
62
+ """
63
+
64
+ def __init__(
65
+ self,
66
+ url: str = "https://otter.idies.jhu.edu/api",
67
+ username: str = os.environ.get("ARANGO_USER_USERNAME", "user-guest"),
68
+ password: str = os.environ.get("ARANGO_USER_PASSWORD", "test"),
69
+ gen_summary: bool = False,
70
+ datadir: str = None,
71
+ debug: bool = False,
72
+ **kwargs,
73
+ ) -> None:
74
+ print(f"Attempting to login to {url} with the following credentials:")
75
+ print(f"username: {username}")
76
+ print(f"password: {password}")
77
+
78
+ # save inputs
79
+ if datadir is None:
80
+ self.CWD = os.path.dirname(os.path.abspath("__FILE__"))
81
+ self.DATADIR = os.path.join(self.CWD, ".otter")
82
+ else:
83
+ self.CWD = os.path.dirname(datadir)
84
+ self.DATADIR = datadir
85
+
86
+ self.debug = debug
87
+
88
+ # make sure the data directory exists
89
+ if not os.path.exists(self.DATADIR):
90
+ try:
91
+ os.makedirs(self.DATADIR)
92
+ except FileExistsError:
93
+ logger.warning(
94
+ "Directory was created between the if statement and trying "
95
+ + "to create the directory!"
96
+ )
97
+ pass
98
+
99
+ if gen_summary:
100
+ self.generate_summary_table(save=True)
101
+
102
+ connection = Connection(username=username, password=password, arangoURL=url)
103
+ super().__init__(connection, "otter", **kwargs)
104
+
105
+ def get_meta(self, **kwargs) -> Table:
106
+ """
107
+ Get the metadata of the objects matching the arguments
108
+
109
+ Args:
110
+ **kwargs : Arguments to pass to Otter.query(). See that documentation with
111
+ `help(otter.Otter.query)`.
112
+ Return:
113
+ The metadata for the transients that match the arguments. Will be an astropy
114
+ Table by default, if raw=True will be a dictionary.
115
+ """
116
+ metakeys = [
117
+ "name",
118
+ "coordinate",
119
+ "date_reference",
120
+ "distance",
121
+ "classification",
122
+ "reference_alias",
123
+ ]
124
+
125
+ return [t[metakeys] for t in self.query(**kwargs)]
126
+
127
+ def cone_search(
128
+ self, coords: SkyCoord, radius: float = 5, raw: bool = False
129
+ ) -> Table:
130
+ """
131
+ Performs a cone search of the catalog over the given coords and radius.
132
+
133
+ Args:
134
+ coords (SkyCoord): An astropy SkyCoord object with coordinates to match to
135
+ radius (float): The radius of the cone in arcseconds, default is 0.05"
136
+ raw (bool): If False (the default) return an astropy table of the metadata
137
+ for matching objects. Otherwise, return the raw json dicts
138
+
139
+ Return:
140
+ The metadata for the transients in coords+radius. Will return an astropy
141
+ Table if raw is False, otherwise a dict.
142
+ """
143
+
144
+ transients = self.query(coords=coords, radius=radius, raw=raw)
145
+
146
+ return transients
147
+
148
+ def get_phot(
149
+ self,
150
+ flux_unit="mag(AB)",
151
+ date_unit="MJD",
152
+ return_type="astropy",
153
+ obs_type=None,
154
+ keep_raw=False,
155
+ wave_unit="nm",
156
+ freq_unit="GHz",
157
+ deduplicate=None,
158
+ **kwargs,
159
+ ) -> Table:
160
+ """
161
+ Get the photometry of the objects matching the arguments. This will do the
162
+ unit conversion for you!
163
+
164
+ Args:
165
+ flux_unit (astropy.unit.Unit): Either a valid string to convert
166
+ or an astropy.unit.Unit, this can be either
167
+ flux, flux density, or magnitude unit. This
168
+ supports any base units supported by
169
+ synphot (https://synphot.readthedocs.io/en/latest/synphot/units.html#flux-units).
170
+ date_unit (astropy.unit.Unit): Either a valid string to convert to a date
171
+ or an astropy.unit.Unit. For supported
172
+ formats see
173
+ https://docs.astropy.org/en/stable/time/index.html#time-format
174
+ return_type (str): Either 'astropy' or 'pandas'. If astropy, returns an
175
+ astropy Table. If pandas, returns a pandas DataFrame.
176
+ Default is 'astropy'.
177
+ obs_type (str): Either 'radio', 'uvoir', or 'xray'. Will only return that
178
+ type of photometry if not None. Default is None and will
179
+ return any type of photometry.
180
+ keep_raw (bool): If True, keep the raw flux/date/freq/wave associated with
181
+ the dataset. Else, just keep the converted data. Default
182
+ is False.
183
+ wave_unit (str): The astropy wavelength unit to return with. Must have
184
+ base units of length.
185
+ freq_unit (str): The astropy frequency unit to return with. Must have base
186
+ units of 1/time.
187
+ deduplicate (Callable|None|False): if we should deduplicate the dataset
188
+ using the deduplicate Callable. Set to
189
+ False if you don't want this to happen.
190
+ None defaults to
191
+ Transient.deduplicate_photometry
192
+ **kwargs : Arguments to pass to Otter.query(). Can be::
193
+
194
+ names (list[str]): A list of names to get the metadata for
195
+ coords (SkyCoord): An astropy SkyCoord object with coordinates
196
+ to match to
197
+ radius (float): The radius in arcseconds for a cone search,
198
+ default is 0.05"
199
+ minZ (float): The minimum redshift to search for
200
+ maxZ (float): The maximum redshift to search for
201
+ refs (list[str]): A list of ads bibcodes to match to. Will only
202
+ return metadata for transients that have this
203
+ as a reference.
204
+ hasSpec (bool): if True, only return events that have spectra.
205
+
206
+ Return:
207
+ The photometry for the requested transients that match the arguments.
208
+ Will be an astropy Table sorted by transient default name.
209
+
210
+ Raises:
211
+ FailedQueryError: When the query returns no results
212
+ IOError: if one of your inputs is incorrect
213
+ """
214
+ warn_filt = _DuplicateFilter()
215
+ logger.addFilter(warn_filt)
216
+
217
+ queryres = self.query(hasphot=True, **kwargs)
218
+
219
+ dicts = []
220
+ for transient in queryres:
221
+ # clean the photometry
222
+ default_name = transient["name/default_name"]
223
+
224
+ try:
225
+ phot = transient.clean_photometry(
226
+ flux_unit=flux_unit,
227
+ date_unit=date_unit,
228
+ wave_unit=wave_unit,
229
+ freq_unit=freq_unit,
230
+ obs_type=obs_type,
231
+ deduplicate=deduplicate,
232
+ )
233
+
234
+ phot["name"] = [default_name] * len(phot)
235
+
236
+ dicts.append(phot)
237
+
238
+ except FailedQueryError:
239
+ # This is fine, it just means that there is no data associated
240
+ # with this one transient. We'll check and make sure there is data
241
+ # associated with at least one of the transients later!
242
+ pass
243
+
244
+ if len(dicts) == 0:
245
+ raise FailedQueryError()
246
+ fullphot = pd.concat(dicts)
247
+
248
+ # remove some possibly confusing keys
249
+ keys_to_keep = [
250
+ "name",
251
+ "converted_flux",
252
+ "converted_flux_err",
253
+ "converted_date",
254
+ "converted_wave",
255
+ "converted_freq",
256
+ "converted_flux_unit",
257
+ "converted_date_unit",
258
+ "converted_wave_unit",
259
+ "converted_freq_unit",
260
+ "filter_name",
261
+ "obs_type",
262
+ "upperlimit",
263
+ "reference",
264
+ "human_readable_refs",
265
+ ]
266
+
267
+ if "upperlimit" not in fullphot:
268
+ fullphot["upperlimit"] = False
269
+
270
+ if not keep_raw:
271
+ if "telescope" in fullphot:
272
+ fullphot = fullphot[keys_to_keep + ["telescope"]]
273
+ else:
274
+ fullphot = fullphot[keys_to_keep]
275
+
276
+ logger.removeFilter(warn_filt)
277
+ if return_type == "astropy":
278
+ return Table.from_pandas(fullphot)
279
+ elif return_type == "pandas":
280
+ return fullphot
281
+ else:
282
+ raise IOError("return_type can only be pandas or astropy")
283
+
284
+ def load_file(self, filename: str) -> dict:
285
+ """
286
+ Loads an otter JSON file
287
+
288
+ Args:
289
+ filename (str): The path to the OTTER JSON file to load
290
+
291
+ Returns:
292
+ dictionary with the otter JSON file contents
293
+ """
294
+
295
+ # read in files from summary
296
+ with open(filename, "r") as f:
297
+ to_ret = Transient(json.load(f))
298
+
299
+ return to_ret
300
+
301
+ def query(
302
+ self,
303
+ names: list[str] = None,
304
+ coords: SkyCoord = None,
305
+ radius: float = 5,
306
+ minz: float = None,
307
+ maxz: float = None,
308
+ mindec: float = -90,
309
+ maxdec: float = 90,
310
+ refs: list[str] = None,
311
+ hasphot: bool = False,
312
+ has_radio_phot: bool = False,
313
+ has_uvoir_phot: bool = False,
314
+ has_xray_phot: bool = False,
315
+ hasspec: bool = False,
316
+ spec_classed: bool = False,
317
+ unambiguous: bool = False,
318
+ classification: str = None,
319
+ class_confidence_threshold: float = 0,
320
+ has_det: bool = False,
321
+ wave_det: str = None,
322
+ query_private=False,
323
+ **kwargs,
324
+ ) -> dict:
325
+ """
326
+ Searches the arango database table and reads relevant JSON files
327
+
328
+ WARNING! This does not do any conversions for you!
329
+ This is how it differs from the `get_meta` method. Users should prefer to use
330
+ `get_meta`, and `get_phot` independently because it is a better
331
+ workflow and can return the data in an astropy table with everything in the
332
+ same units.
333
+
334
+ Args:
335
+ names (list[str]): A list of names to get the metadata for
336
+ coords (SkyCoord): An astropy SkyCoord object with coordinates to match to
337
+ radius (float): The radius in arcseconds for a cone search, default is 0.05"
338
+ minz (float): The minimum redshift to search for
339
+ maxz (float): The maximum redshift to search for
340
+ mindec (float): The minimum declination in degrees
341
+ maxdec (float): Tje maximum declination in degrees
342
+ refs (list[str]): A list of ads bibcodes to match to. Will only return
343
+ metadata for transients that have this as a reference.
344
+ hasphot (bool): if True, only returns transients which have photometry.
345
+ has_radio_phot (bool): if True, only returns transients with radio phot.
346
+ has_uvoir_phot (bool): if True, only returns transients with uvoir phot.
347
+ has_xray_phot (bool): if True, only returns transients with X-ray phot.
348
+ hasspec (bool): NOT IMPLEMENTED! Will return False for all targets!
349
+ spec_classed (bool): If True, only returns transients that have been
350
+ specotroscopically classified/confirmed
351
+ unambiguous (bool): If True, only returns transients that only have a single
352
+ published classification in OTTER. If classifications
353
+ disagree for a transient, it will be filtered out.
354
+ classification (str): A classification string to search for
355
+ class_confidence_threshold (float): classification confidence cutoff.
356
+ Default is 0. Any classifications with a
357
+ confiedence less than
358
+ this value will be ignored. In general,
359
+ (1) C = 0 means this classification is
360
+ unverified, (2) C = 1 means this
361
+ classification is based only on
362
+ photometry, (3) C = 2 means this
363
+ classification is based on a TNS (not
364
+ peer-reviewed) spectrum, (4) C >= 3
365
+ means this classification is based on
366
+ at least one peer-reviewed optical
367
+ spectrum. Setting this flag to 3 should
368
+ produce the most pure sample, but might
369
+ miss some events that are very
370
+ confidently classified based on
371
+ photometry (e.g., X-ray observations).
372
+ has_det (bool): This can be set to true to only search for transients that
373
+ have a detection in their photometry. It can be used in
374
+ conjunction with e.g., `has_radio_phot=True` to search for
375
+ transients that have a radio detection. Default is False.
376
+ wave_det (str): Set this to the wavelength regime that you want to check for
377
+ detections in. Either "uvoir", "radio", "xray". Default is
378
+ None, which doesn't filter on the wavelength regime before
379
+ checking for detections.
380
+ query_private (bool): Set to True if you would like to also query the
381
+ dataset located at whatever you set datadir to
382
+
383
+ Return:
384
+ Get all of the raw (unconverted!) data for objects that match the criteria.
385
+ """
386
+ # write some AQL filters based on the inputs
387
+ query_filters = ""
388
+
389
+ if hasphot or has_radio_phot or has_xray_phot or has_uvoir_phot:
390
+ query_filters += "FILTER 'photometry' IN ATTRIBUTES(transient)\n"
391
+
392
+ if has_radio_phot:
393
+ query_filters += "FILTER 'radio' IN transient.photometry[*].obs_type\n"
394
+
395
+ if has_uvoir_phot:
396
+ query_filters += "FILTER 'uvoir' IN transient.photometry[*].obs_type\n"
397
+
398
+ if has_xray_phot:
399
+ query_filters += "FILTER 'xray' IN transient.photometry[*].obs_type\n"
400
+
401
+ if has_det:
402
+ if wave_det is None:
403
+ query_filters += """
404
+ FILTER FLATTEN(transient.photometry[*].upperlimit) ANY == false\n
405
+ """
406
+ else:
407
+ query_filters += f"""
408
+ FILTER "photometry" IN ATTRIBUTES(transient)
409
+ LET phot = (
410
+ FOR p IN transient.photometry
411
+ FILTER p.obs_type == "{wave_det}"
412
+ RETURN p
413
+ )
414
+ FILTER FLATTEN(phot[*].upperlimit) ANY == false
415
+ """
416
+
417
+ if hasspec is True:
418
+ query_filters += "FILTER 'spectra' IN ATTRIBUTES(transient)\n"
419
+
420
+ if spec_classed:
421
+ query_filters += "FILTER transient.classification.spec_classed >= 1"
422
+
423
+ if unambiguous:
424
+ query_filters += "FILTER transient.classification.unambiguous"
425
+
426
+ if classification is not None:
427
+ query_filters += f"""
428
+ FOR subdoc IN transient.classification.value
429
+ FILTER subdoc.confidence > TO_NUMBER({class_confidence_threshold})
430
+ FILTER subdoc.object_class LIKE '%{classification}%'
431
+ """
432
+
433
+ if minz is not None:
434
+ sfilt = f"""
435
+ FILTER 'redshift' IN transient.distance[*].distance_type
436
+ LET redshifts1 = (
437
+ FOR val IN transient.distance
438
+ FILTER val.distance_type == 'redshift'
439
+ FILTER TO_NUMBER(val.value) >= {minz}
440
+ RETURN val
441
+ )
442
+ FILTER COUNT(redshifts1) > 0
443
+ """
444
+ query_filters += sfilt
445
+ if maxz is not None:
446
+ sfilt = f"""
447
+ FILTER 'redshift' IN transient.distance[*].distance_type
448
+ LET redshifts2 = (
449
+ FOR val IN transient.distance
450
+ FILTER val.distance_type == 'redshift'
451
+ FILTER TO_NUMBER(val.value) <= {maxz}
452
+ RETURN val
453
+ )
454
+ FILTER COUNT(redshifts2) > 0
455
+ """
456
+ query_filters += sfilt
457
+
458
+ if names is not None:
459
+ if isinstance(names, str):
460
+ query_filters += f"""
461
+ FILTER UPPER(transient.name) LIKE UPPER('%{names}%')\n
462
+ """
463
+ elif isinstance(names, list):
464
+ namefilt = f"""
465
+ FOR name IN {names}
466
+ FILTER name IN transient.name.alias[*].value\n
467
+ """
468
+ query_filters += namefilt
469
+ else:
470
+ raise Exception("Names must be either a string or list")
471
+
472
+ if refs is not None:
473
+ if isinstance(refs, str): # this is just a single bibcode
474
+ query_filters += f"FILTER {refs} IN transient.reference_alias[*].name"
475
+ elif isinstance(refs, list):
476
+ query_filters += f"""
477
+ FOR ref IN {refs}
478
+ FILTER ref IN transient.reference_alias[*].name
479
+ """
480
+ else:
481
+ raise Exception("reference list must be either a string or a list")
482
+
483
+ # define the query
484
+ query = f"""
485
+ FOR transient IN transients
486
+ {query_filters}
487
+ RETURN transient
488
+ """
489
+
490
+ # set batch size to 100 million (for now at least)
491
+ result = self.AQLQuery(query, rawResults=True, batchSize=100_000_000)
492
+
493
+ # now that we have the query results do the RA and Dec queries if they exist
494
+ if coords is not None:
495
+ # get the catalog RAs and Decs to compare against
496
+ query_coords = coords
497
+ good_tdes = []
498
+
499
+ for tde in result:
500
+ for coordinfo in tde["coordinate"]:
501
+ if "ra" in coordinfo and "dec" in coordinfo:
502
+ coord = SkyCoord(
503
+ coordinfo["ra"],
504
+ coordinfo["dec"],
505
+ unit=(coordinfo["ra_units"], coordinfo["dec_units"]),
506
+ )
507
+ elif "l" in coordinfo and "b" in coordinfo:
508
+ # this is galactic
509
+ coord = SkyCoord(
510
+ coordinfo["l"],
511
+ coordinfo["b"],
512
+ unit=(coordinfo["l_units"], coordinfo["b_units"]),
513
+ frame="galactic",
514
+ )
515
+ else:
516
+ raise ValueError(
517
+ "Either needs to have ra and dec or l and b as keys!"
518
+ )
519
+ if query_coords.separation(coord) < radius * u.arcsec:
520
+ good_tdes.append(tde)
521
+ break # we've confirmed this tde is in the cone!
522
+
523
+ arango_query_results = [Transient(t) for t in good_tdes]
524
+
525
+ else:
526
+ arango_query_results = [Transient(res) for res in result.result]
527
+
528
+ # filter based on the min and max declination query options
529
+ decs = np.array([t.get_skycoord().dec.deg for t in arango_query_results])
530
+ where_dec = np.where((decs > mindec) * (decs < maxdec))[0]
531
+ arango_query_results = [arango_query_results[i] for i in where_dec]
532
+
533
+ if not query_private:
534
+ return arango_query_results
535
+
536
+ private_results = self._query_datadir(
537
+ names=names,
538
+ coords=coords,
539
+ radius=radius,
540
+ minz=minz,
541
+ maxz=maxz,
542
+ refs=refs,
543
+ hasphot=hasphot,
544
+ hasspec=hasspec,
545
+ )
546
+
547
+ partially_merged = deepcopy(arango_query_results)
548
+ new_transients = []
549
+ for jj, t_private in enumerate(private_results):
550
+ for ii, t_public in enumerate(arango_query_results):
551
+ try:
552
+ partially_merged[ii] += t_private
553
+ break
554
+ except TransientMergeError:
555
+ continue
556
+ else:
557
+ new_transients.append(t_private)
558
+
559
+ return partially_merged + new_transients
560
+
561
+ def _query_datadir(
562
+ self,
563
+ names: list[str] = None,
564
+ coords: SkyCoord = None,
565
+ radius: float = 5,
566
+ minz: float = None,
567
+ maxz: float = None,
568
+ refs: list[str] = None,
569
+ hasphot: bool = False,
570
+ hasspec: bool = False,
571
+ raw: bool = False,
572
+ ) -> dict:
573
+ """
574
+ This is a private method and is here just for the pipeline!!!
575
+ This should not be used by end users!
576
+
577
+ Searches the summary.csv table and reads relevant JSON files
578
+
579
+ WARNING! This does not do any conversions for you!
580
+ This is how it differs from the `get_meta` method. Users should prefer to use
581
+ `get_meta`, `getPhot`, and `getSpec` independently because it is a better
582
+ workflow and can return the data in an astropy table with everything in the
583
+ same units.
584
+
585
+ Args:
586
+ names (list[str]): A list of names to get the metadata for
587
+ coords (SkyCoord): An astropy SkyCoord object with coordinates to match to
588
+ radius (float): The radius in arcseconds for a cone search, default is 0.05"
589
+ minz (float): The minimum redshift to search for
590
+ maxz (float): The maximum redshift to search for
591
+ refs (list[str]): A list of ads bibcodes to match to. Will only return
592
+ metadata for transients that have this as a reference.
593
+ hasphot (bool): if True, only returns transients which have photometry.
594
+ hasspec (bool): if True, only return transients that have spectra.
595
+
596
+ Return:
597
+ Get all of the raw (unconverted!) data for objects that match the criteria.
598
+ """
599
+ if (
600
+ all(arg is None for arg in [names, coords, maxz, minz, refs])
601
+ and not hasphot
602
+ and not hasspec
603
+ ):
604
+ # there's nothing to query!
605
+ # read in the metdata from all json files
606
+ # this could be dangerous later on!!
607
+ allfiles = glob.glob(os.path.join(self.DATADIR, "*.json"))
608
+ jsondata = [self.load_file(jsonfile) for jsonfile in allfiles]
609
+
610
+ return jsondata
611
+
612
+ # check if the summary table exists, if it doen't create it
613
+ summary_table = os.path.join(self.DATADIR, "summary.csv")
614
+ if not os.path.exists(summary_table):
615
+ self.generate_summary_table(save=True)
616
+
617
+ # then read and query the summary table
618
+ summary = pd.read_csv(summary_table)
619
+ if len(summary) == 0:
620
+ return []
621
+
622
+ # coordinate search first
623
+ if coords is not None:
624
+ if not isinstance(coords, SkyCoord):
625
+ raise ValueError("Input coordinate must be an astropy SkyCoord!")
626
+ summary_coords = SkyCoord(
627
+ summary.ra.tolist(), summary.dec.tolist(), unit=(u.deg, u.deg)
628
+ )
629
+
630
+ try:
631
+ summary_idx, _, _, _ = search_around_sky(
632
+ summary_coords, coords, seplimit=radius * u.arcsec
633
+ )
634
+ except ValueError:
635
+ summary_idx, _, _, _ = search_around_sky(
636
+ summary_coords,
637
+ SkyCoord([coords]),
638
+ seplimit=radius * u.arcsec,
639
+ )
640
+
641
+ summary = summary.iloc[summary_idx]
642
+
643
+ # redshift
644
+ if minz is not None:
645
+ summary = summary[summary.z.astype(float) >= minz]
646
+
647
+ if maxz is not None:
648
+ summary = summary[summary.z.astype(float) <= maxz]
649
+
650
+ # check photometry and spectra
651
+ if hasphot:
652
+ summary = summary[summary.hasPhot == True]
653
+
654
+ if hasspec:
655
+ summary = summary[summary.hasSpec == True]
656
+
657
+ # check names
658
+ if names is not None:
659
+ if isinstance(names, str):
660
+ n = {names}
661
+ else:
662
+ n = set(names)
663
+
664
+ checknames = []
665
+ for alias_row in summary.alias:
666
+ rs = set(eval(alias_row))
667
+ intersection = list(n & rs)
668
+ checknames.append(len(intersection) > 0)
669
+
670
+ summary = summary[checknames]
671
+
672
+ # check references
673
+ if refs is not None:
674
+ checkrefs = []
675
+
676
+ if isinstance(refs, str):
677
+ n = {refs}
678
+ else:
679
+ n = set(refs)
680
+
681
+ for ref_row in summary.refs:
682
+ rs = set(eval(ref_row))
683
+ intersection = list(n & rs)
684
+ checkrefs.append(len(intersection) > 0)
685
+
686
+ summary = summary[checkrefs]
687
+
688
+ outdata = [self.load_file(path) for path in summary.json_path]
689
+
690
+ return outdata
691
+
692
+ def upload(self, json_data, collection="vetting", testing=False) -> Document:
693
+ """
694
+ Upload json_data to collection WITHOUT deduplication!
695
+
696
+ Args:
697
+ json_data [dict] : A dictionary of the json data to upload to Otter
698
+ collection [str] : The collection to upload to
699
+ testing [bool] : Default is False
700
+
701
+ Returns:
702
+ The pyArango document that was uplaoded
703
+ """
704
+
705
+ # now add the document
706
+ doc = self[collection].createDocument(json_data)
707
+ if not testing:
708
+ doc.save()
709
+ return doc
710
+
711
+ def upload_private(self, collection="vetting", testing=False) -> None:
712
+ """
713
+ Upload the local/private data stored in self.DATADIR to the vetting collection
714
+ (like a SQL table) in the central arangodb document database.
715
+
716
+ WARNING! This will make any data in self.DATADIR public! Please double check
717
+ before using this method!
718
+
719
+ Args:
720
+ collection (str) : The collection to add the documents to. Default is
721
+ "vetting" where the documents will then be vetted by
722
+ our team.
723
+ testing (bool) : True if don't actually upload, False is default
724
+
725
+ Returns:
726
+ If testing is false (the default), returns the arangodb upload result. If
727
+ testing is true, returns the list of merged dictionaries that would get
728
+ uploaded.
729
+
730
+ Raises:
731
+ OtterLimitationError: If some objects in OTTER are within 5" we can't figure
732
+ out which ones to merge with which ones.
733
+
734
+ """
735
+
736
+ if not self.hasCollection(collection):
737
+ raise ValueError(f"{collection} not in {self}!")
738
+
739
+ local_data = self._query_datadir()
740
+ docs = []
741
+ for t in local_data:
742
+ res = self.query(coords=t.get_skycoord())
743
+
744
+ if len(res) > 1:
745
+ raise OtterLimitationError("Some objects in Otter are too close!")
746
+
747
+ elif len(res) == 1 and collection != "vetting":
748
+ # if the collection is the vetting collection we don't want to do the
749
+ # merging yet, even if the object already exists in OTTER
750
+
751
+ # this object exists in otter already, let's grab the transient data and
752
+ # merge the files
753
+ merged = t + res[0]
754
+
755
+ # copy over the special arangodb keys
756
+ merged["_key"] = res[0]["_key"]
757
+ merged["_id"] = res[0]["_id"]
758
+
759
+ else:
760
+ # this means the object doesn't exist in otter already
761
+ merged = t
762
+
763
+ docs.append(self.upload(merged, collection=collection, testing=testing))
764
+
765
+ return docs
766
+
767
+ def save(self, schema: list[dict], testing=False) -> None:
768
+ """
769
+ Upload all the data in the given list of schemas.
770
+
771
+ Args:
772
+ schema (list[dict]): A list of json dictionaries
773
+ testing (bool): Should we just enter test mode? Default is False
774
+
775
+ Raises:
776
+ OtterLimitationError: If some objects in OTTER are within 5" we can't figure
777
+ out which ones to merge with which ones.
778
+ """
779
+
780
+ if not isinstance(schema, list):
781
+ schema = [schema]
782
+
783
+ for transient in schema:
784
+ # convert the json to a Transient
785
+ if not isinstance(transient, Transient):
786
+ transient = Transient(transient)
787
+
788
+ print(transient["name/default_name"])
789
+
790
+ coord = transient.get_skycoord()
791
+ res = self._query_datadir(coords=coord)
792
+
793
+ if len(res) == 0:
794
+ # This is a new object to upload
795
+ print("Adding this as a new object...")
796
+ self._save_document(dict(transient), test_mode=testing)
797
+
798
+ else:
799
+ # We must merge this with existing data
800
+ print("Found this object in the database already, merging the data...")
801
+ if len(res) == 1:
802
+ # we can just add these to merge them!
803
+ combined = res[0] + transient
804
+ self._save_document(combined, test_mode=testing)
805
+ else:
806
+ # for now throw an error
807
+ # this is a limitation we can come back to fix if it is causing
808
+ # problems though!
809
+ raise OtterLimitationError("Some objects in Otter are too close!")
810
+
811
+ # update the summary table appropriately
812
+ self.generate_summary_table(save=True)
813
+
814
+ def _save_document(self, schema, test_mode=False):
815
+ """
816
+ Save a json file in the correct format to the OTTER data directory
817
+ """
818
+ # check if this documents key is in the database already
819
+ # and if so remove it!
820
+ jsonpath = os.path.join(self.DATADIR, "*.json")
821
+ aliases = {item["value"].replace(" ", "-") for item in schema["name"]["alias"]}
822
+ filenames = {
823
+ os.path.basename(fname).split(".")[0] for fname in glob.glob(jsonpath)
824
+ }
825
+ todel = list(aliases & filenames)
826
+
827
+ # now save this data
828
+ # create a new file in self.DATADIR with this
829
+ if len(todel) > 0:
830
+ outfilepath = os.path.join(self.DATADIR, todel[0] + ".json")
831
+ if test_mode:
832
+ print("Renaming the following file for backups: ", outfilepath)
833
+ else:
834
+ if test_mode:
835
+ print("Don't need to mess with the files at all!")
836
+ fname = schema["name"]["default_name"] + ".json"
837
+ fname = fname.replace(" ", "-") # replace spaces in the filename
838
+ outfilepath = os.path.join(self.DATADIR, fname)
839
+
840
+ # format as a json
841
+ if isinstance(schema, Transient):
842
+ schema = dict(schema)
843
+
844
+ out = json.dumps(schema, indent=4, default=_np_encoder)
845
+ # out = '[' + out
846
+ # out += ']'
847
+
848
+ if not test_mode:
849
+ with open(outfilepath, "w") as f:
850
+ f.write(out)
851
+ else:
852
+ print(f"Would write to {outfilepath}")
853
+ # print(out)
854
+
855
+ def generate_summary_table(self, save=False) -> pd.DataFrame:
856
+ """
857
+ Generate a summary table for the JSON files in self.DATADIR
858
+
859
+ args:
860
+ save (bool): if True, save the summary file to "summary.csv"
861
+ in self.DATADIR. Default is False and is just returned.
862
+
863
+ returns:
864
+ pandas.DataFrame of the summary meta information of the transients
865
+ """
866
+ allfiles = glob.glob(os.path.join(self.DATADIR, "*.json"))
867
+
868
+ # read the data from all the json files and convert to Transients
869
+ rows = []
870
+ for jsonfile in allfiles:
871
+ with open(jsonfile, "r") as j:
872
+ t = Transient(json.load(j))
873
+ skycoord = t.get_skycoord()
874
+
875
+ row = {
876
+ "name": t.default_name,
877
+ "alias": [alias["value"] for alias in t["name"]["alias"]],
878
+ "ra": skycoord.ra,
879
+ "dec": skycoord.dec,
880
+ "refs": [ref["name"] for ref in t["reference_alias"]],
881
+ }
882
+
883
+ if "date_reference" in t:
884
+ date_types = {d["date_type"] for d in t["date_reference"]}
885
+ if "discovery" in date_types:
886
+ row["discovery_date"] = t.get_discovery_date()
887
+
888
+ if "distance" in t:
889
+ dist_types = {d["distance_type"] for d in t["distance"]}
890
+ if "redshift" in dist_types:
891
+ row["z"] = t.get_redshift()
892
+
893
+ row["hasPhot"] = "photometry" in t
894
+ row["hasSpec"] = "spectra" in t
895
+
896
+ row["json_path"] = os.path.abspath(jsonfile)
897
+
898
+ rows.append(row)
899
+
900
+ alljsons = pd.DataFrame(rows)
901
+ if save:
902
+ alljsons.to_csv(os.path.join(self.DATADIR, "summary.csv"))
903
+
904
+ return alljsons
905
+
906
+ @staticmethod
907
+ def from_csvs(
908
+ metafile: str,
909
+ photfile: str = None,
910
+ local_outpath: Optional[str] = os.path.join(os.getcwd(), "private-data"),
911
+ db: Otter = None,
912
+ ) -> Otter:
913
+ """
914
+ Converts private metadata and photometry csvs to an Otter object stored
915
+ *locally* so you don't need to worry about accidentally uploading them to the
916
+ real Otter database.
917
+
918
+ Args:
919
+ metafile (str) : String filepath or string io csv object of the csv metadata
920
+ photfile (str) : String filepath or string io csv object of the csv
921
+ photometry
922
+ local_outpath (str) : The outpath to write the OTTER json files to
923
+ db (Otter) : An Otter instance to add the local_outpath to for querying.
924
+ This keyword can be useful if you have special permission for
925
+ the otter database and want to upload your private data
926
+
927
+ Returns:
928
+ An Otter object where the json files are stored locally
929
+ """
930
+ # read in the metadata and photometry file
931
+ meta = pd.read_csv(metafile)
932
+ meta.columns = meta.columns.str.strip() # clean up the col names
933
+ phot = None
934
+
935
+ required_phot_cols = [
936
+ "name",
937
+ "date",
938
+ "date_format",
939
+ "filter",
940
+ "filter_eff",
941
+ "filter_eff_units",
942
+ "flux",
943
+ "flux_err",
944
+ "flux_unit",
945
+ ]
946
+
947
+ if photfile is not None:
948
+ phot_unclean = pd.read_csv(photfile)
949
+ phot_unclean.columns = phot_unclean.columns.str.strip() # cleanup colnames
950
+
951
+ phot = phot_unclean.dropna(subset=required_phot_cols)
952
+ if len(phot) != len(phot_unclean):
953
+ logger.warning("""
954
+ Filtered out rows with nan in the photometry file! Make sure you
955
+ expect this behaviour!
956
+ """)
957
+
958
+ if "bibcode" not in phot:
959
+ phot["bibcode"] = "private"
960
+ logger.warning("""
961
+ Setting the bibcode column to the special keyword 'private'!
962
+ """)
963
+
964
+ # we need to generate columns of wave_eff and freq_eff
965
+ wave_eff = []
966
+ freq_eff = []
967
+ wave_eff_unit = u.nm
968
+ freq_eff_unit = u.GHz
969
+ for val, unit in zip(phot.filter_eff, phot.filter_eff_units):
970
+ wave_eff.append(
971
+ (val * u.Unit(unit))
972
+ .to(wave_eff_unit, equivalencies=u.spectral())
973
+ .value
974
+ )
975
+ freq_eff.append(
976
+ (val * u.Unit(unit))
977
+ .to(freq_eff_unit, equivalencies=u.spectral())
978
+ .value
979
+ )
980
+
981
+ phot["band_eff_wave"] = wave_eff
982
+ phot["band_eff_wave_unit"] = str(wave_eff_unit)
983
+ phot["band_eff_freq"] = freq_eff
984
+ phot["band_eff_freq_unit"] = str(freq_eff_unit)
985
+
986
+ if not os.path.exists(local_outpath):
987
+ os.makedirs(local_outpath)
988
+
989
+ # drop duplicated names in meta and keep the first
990
+ meta = meta.drop_duplicates(subset="name", keep="first")
991
+
992
+ # merge the meta and phot data
993
+ if phot is not None:
994
+ data = pd.merge(phot, meta, on="name", how="outer")
995
+ else:
996
+ data = meta
997
+
998
+ # perform some data checks
999
+ assert (
1000
+ len(data[pd.isna(data.ra)].name.unique()) == 0
1001
+ ), "Missing some RA and Decs, please check the input files!"
1002
+ # if phot is not None:
1003
+ # for name in meta.name:
1004
+ # assert len(data[data.name == name]) == len(
1005
+ # phot[phot.name == name]
1006
+ # ), f"failed on {name}"
1007
+
1008
+ # actually do the data conversion to OTTER
1009
+ all_jsons = []
1010
+ for name, tde in data.groupby("name"):
1011
+ json = {}
1012
+ tde = tde.reset_index()
1013
+
1014
+ # name first
1015
+ json["name"] = dict(
1016
+ default_name=name,
1017
+ alias=[dict(value=name, reference=[tde.coord_bibcode[0]])],
1018
+ )
1019
+
1020
+ # coordinates
1021
+ json["coordinate"] = [
1022
+ dict(
1023
+ ra=tde.ra[0],
1024
+ dec=tde.dec[0],
1025
+ ra_units=tde.ra_unit[0],
1026
+ dec_units=tde.dec_unit[0],
1027
+ reference=[tde.coord_bibcode[0]],
1028
+ coordinate_type="equatorial",
1029
+ )
1030
+ ]
1031
+
1032
+ ### distance info
1033
+ json["distance"] = []
1034
+
1035
+ # redshift
1036
+ if "redshift" in tde and not np.any(pd.isna(tde["redshift"])):
1037
+ json["distance"].append(
1038
+ dict(
1039
+ value=tde.redshift[0],
1040
+ reference=[tde.redshift_bibcode[0]],
1041
+ computed=False,
1042
+ distance_type="redshift",
1043
+ )
1044
+ )
1045
+
1046
+ # luminosity distance
1047
+ if "luminosity_distance" in tde and not np.any(
1048
+ pd.isna(tde["luminosity_distance"])
1049
+ ):
1050
+ json["distance"].append(
1051
+ dict(
1052
+ value=tde.luminosity_distance[0],
1053
+ reference=[tde.luminosity_distance_bibcode[0]],
1054
+ unit=tde.luminosity_distance_unit[0],
1055
+ computed=False,
1056
+ distance_type="luminosity",
1057
+ )
1058
+ )
1059
+
1060
+ # comoving distance
1061
+ if "comoving_distance" in tde and not np.any(
1062
+ pd.isna(tde["comoving_distance"])
1063
+ ):
1064
+ json["distance"].append(
1065
+ dict(
1066
+ value=tde.comoving_distance[0],
1067
+ reference=[tde.comoving_distance_bibcode[0]],
1068
+ unit=tde.comoving_distance_unit[0],
1069
+ computed=False,
1070
+ distance_type="comoving",
1071
+ )
1072
+ )
1073
+
1074
+ # remove the distance list if it is empty still
1075
+ if len(json["distance"]) == 0:
1076
+ del json["distance"]
1077
+
1078
+ ### Classification information that is in the csvs
1079
+ # classification
1080
+ if "classification" in tde:
1081
+ class_flag = 0
1082
+ if "classification_flag" in tde:
1083
+ class_flag = tde.classification_flag[0]
1084
+ json["classification"] = dict(
1085
+ value=[
1086
+ dict(
1087
+ object_class=tde.classification[0],
1088
+ confidence=class_flag,
1089
+ reference=[tde.classification_bibcode[0]],
1090
+ )
1091
+ ]
1092
+ )
1093
+
1094
+ # discovery date
1095
+ # print(tde)
1096
+ if "discovery_date" in tde and not np.any(pd.isna(tde.discovery_date)):
1097
+ json["date_reference"] = [
1098
+ dict(
1099
+ value=str(tde.discovery_date.tolist()[0]).strip(),
1100
+ date_format=tde.discovery_date_format.tolist()[0].lower(),
1101
+ reference=tde.discovery_date_bibcode.tolist(),
1102
+ computed=False,
1103
+ date_type="discovery",
1104
+ )
1105
+ ]
1106
+
1107
+ # host information
1108
+ if "host_ref" in tde and not np.any(pd.isna(tde.host_ref)):
1109
+ host_info = dict(
1110
+ host_name=tde.host_name.tolist()[0].strip(),
1111
+ host_ra=tde.host_ra.tolist()[0],
1112
+ host_dec=tde.host_dec.tolist()[0],
1113
+ host_ra_units=tde.host_ra_unit.tolist()[0],
1114
+ host_dec_units=tde.host_dec_unit.tolist()[0],
1115
+ reference=[tde.host_ref.tolist()[0]],
1116
+ )
1117
+
1118
+ if not pd.isna(tde.host_redshift.tolist()[0]):
1119
+ host_info["host_z"] = tde.host_redshift.tolist()[0]
1120
+
1121
+ if "host" in json:
1122
+ json["host"].append(host_info)
1123
+ else:
1124
+ json["host"] = [host_info]
1125
+
1126
+ # comments
1127
+ if "comment" in tde and not np.any(pd.isna(tde.comment)):
1128
+ if "schema_version" not in json:
1129
+ json["schema_version"] = {}
1130
+ json["schema_version"]["comment"] = tde.comment.tolist()[0]
1131
+
1132
+ # skip the photometry code if there is no photometry file
1133
+ # if there is a photometry file then we want to convert it below
1134
+ phot_sources = []
1135
+ if phot is not None and not np.all(pd.isna(tde["flux"])):
1136
+ tde["obs_type"] = [
1137
+ freq_to_obstype(vv * u.Unit(uu))
1138
+ for vv, uu in zip(
1139
+ tde.band_eff_freq.values,
1140
+ tde.band_eff_freq_unit.values,
1141
+ )
1142
+ ]
1143
+
1144
+ unique_filter_keys = []
1145
+ index_for_match = []
1146
+ json["photometry"] = []
1147
+
1148
+ if "telescope" in tde:
1149
+ to_grpby = ["bibcode", "telescope", "obs_type"]
1150
+ else:
1151
+ to_grpby = ["bibcode", "obs_type"]
1152
+
1153
+ for grp_keys, p in tde.groupby(to_grpby, dropna=False):
1154
+ if len(grp_keys) == 3:
1155
+ src, tele, obstype = grp_keys
1156
+ else:
1157
+ src, obstype = grp_keys
1158
+ tele = None
1159
+
1160
+ if src not in phot_sources:
1161
+ phot_sources.append(src)
1162
+
1163
+ # add a column to phot with the unique filter key
1164
+ if obstype == "radio":
1165
+ filter_uq_key = (
1166
+ p.band_eff_freq.astype(str)
1167
+ + p.band_eff_freq_unit.astype(str)
1168
+ ).tolist()
1169
+
1170
+ elif obstype in ("uvoir", "xray"):
1171
+ filter_uq_key = p["filter"].astype(str).tolist()
1172
+
1173
+ else:
1174
+ raise ValueError("not prepared for this obstype!")
1175
+
1176
+ unique_filter_keys += filter_uq_key
1177
+ index_for_match += p.index.tolist()
1178
+
1179
+ if "upperlimit" not in p:
1180
+ p["upperlimit"] = False
1181
+
1182
+ if "raw" in p.columns and "flux" in p.columns:
1183
+ if len(np.unique(p.raw_unit)) == 1:
1184
+ raw_units = p.raw_unit.tolist()[0]
1185
+ else:
1186
+ raw_units = p.raw_unit.tolist()
1187
+
1188
+ if len(np.unique(p.flux_unit)) == 1:
1189
+ val_units = p.flux_unit.tolist()[0]
1190
+ else:
1191
+ val_units = p.flux_unit.tolist()
1192
+
1193
+ # treat "raw" as the "raw" keyword and "flux" as the "value"
1194
+ json_phot = dict(
1195
+ reference=src,
1196
+ raw=p.raw.astype(float).tolist(),
1197
+ raw_err=p.raw_err.astype(float).tolist(),
1198
+ raw_units=raw_units,
1199
+ value=p.flux.astype(float).tolist(),
1200
+ value_err=p.flux_err.astype(float).tolist(),
1201
+ value_units=val_units,
1202
+ date=p.date.tolist(),
1203
+ date_format=p.date_format.tolist(),
1204
+ upperlimit=p.upperlimit.tolist(),
1205
+ filter_key=filter_uq_key,
1206
+ obs_type=obstype,
1207
+ )
1208
+
1209
+ elif "flux" in p.columns and "raw" not in p.columns:
1210
+ if len(np.unique(p.flux_unit)) == 1:
1211
+ raw_units = p.flux_unit.tolist()[0]
1212
+ else:
1213
+ raw_units = p.flux_unit.tolist()
1214
+
1215
+ # treat "flux" as the "raw" keyword
1216
+ json_phot = dict(
1217
+ reference=src,
1218
+ raw=p.flux.astype(float).tolist(),
1219
+ raw_err=p.flux_err.astype(float).tolist(),
1220
+ raw_units=raw_units,
1221
+ date=p.date.tolist(),
1222
+ date_format=p.date_format.tolist(),
1223
+ upperlimit=p.upperlimit.tolist(),
1224
+ filter_key=filter_uq_key,
1225
+ obs_type=obstype,
1226
+ )
1227
+
1228
+ elif "raw" in p.columns and "flux" not in p.columns:
1229
+ if len(np.unique(p.raw_unit)) == 1:
1230
+ raw_units = p.raw_unit.tolist()[0]
1231
+ else:
1232
+ raw_units = p.raw_unit.tolist()
1233
+
1234
+ # treat "raw" as the "raw" keyword
1235
+ json_phot = dict(
1236
+ reference=src,
1237
+ raw=p.raw.astype(float).tolist(),
1238
+ raw_err=p.raw_err.astype(float).tolist(),
1239
+ raw_units=raw_units,
1240
+ date=p.date.tolist(),
1241
+ date_format=p.date_format.tolist(),
1242
+ upperlimit=p.upperlimit.tolist(),
1243
+ filter_key=filter_uq_key,
1244
+ obs_type=obstype,
1245
+ )
1246
+
1247
+ else:
1248
+ raise ValueError("`raw` and/or `flux` key(s) must be provided!")
1249
+
1250
+ if not pd.isna(tele):
1251
+ json_phot["telescope"] = tele
1252
+
1253
+ if pd.isna(tele) and obstype == "xray":
1254
+ raise ValueError("The telescope is required for X-ray data!")
1255
+
1256
+ # check the minimum and maximum filter values
1257
+ if obstype == "xray" and (
1258
+ "filter_min" not in p or "filter_max" not in p
1259
+ ):
1260
+ raise ValueError(
1261
+ "Minimum and maximum filters required for X-ray data!"
1262
+ )
1263
+
1264
+ if ("date_min" in p and "date_max" not in p) or (
1265
+ "date_min" not in p and "date_max" in p
1266
+ ):
1267
+ raise ValueError(
1268
+ "If date_min/date_max is provided, the other must be too!"
1269
+ )
1270
+
1271
+ # check optional keys
1272
+ optional_keys = [
1273
+ "date_err",
1274
+ "sigma",
1275
+ "instrument",
1276
+ "phot_type",
1277
+ "exptime",
1278
+ "aperature",
1279
+ "observer",
1280
+ "reducer",
1281
+ "pipeline",
1282
+ "date_min",
1283
+ "date_max",
1284
+ ]
1285
+ for k in optional_keys:
1286
+ if k in p and not np.all(pd.isna(p[k])):
1287
+ # fill the nan values
1288
+ # this is to match with the official json format
1289
+ # and works with arangodb document structure
1290
+ p[k].fillna("null", inplace=True)
1291
+
1292
+ json_phot[k] = p[k].tolist()
1293
+
1294
+ # handle more detailed uncertainty information
1295
+ raw_err_detail = {}
1296
+ for key in [
1297
+ "statistical_err",
1298
+ "systematic_err",
1299
+ "iss_err",
1300
+ "upper_err",
1301
+ "lower_err",
1302
+ ]:
1303
+ if key in p and not np.all(pd.isna(p[key])):
1304
+ k = key.split("_")[0]
1305
+
1306
+ # fill the nan values
1307
+ # this is to match with the official json format
1308
+ # and works with arangodb document structure
1309
+ p[key].fillna(0, inplace=True)
1310
+
1311
+ raw_err_detail[k] = p[key].tolist()
1312
+
1313
+ if len(raw_err_detail) > 0:
1314
+ json_phot["raw_err_detail"] = raw_err_detail
1315
+
1316
+ # check the possible corrections
1317
+ corrs = ["val_k", "val_s", "val_host", "val_av", "val_hostav"]
1318
+ for c in corrs:
1319
+ bool_v_key = c.replace("val", "corr")
1320
+ json_phot[c] = False
1321
+
1322
+ if c in p:
1323
+ # fill the nan values
1324
+ # this is to match with the official json format
1325
+ # and works with arangodb document structure
1326
+ p[c].fillna("null", inplace=True)
1327
+
1328
+ json_phot[c] = p[c].tolist()
1329
+ json_phot[bool_v_key] = [v != "null" for v in json_phot[c]]
1330
+
1331
+ # deal with the xray model
1332
+ if "xray_model_name" not in p and obstype == "xray":
1333
+ raise ValueError(
1334
+ "You must provide the xray model for xray data!"
1335
+ )
1336
+ if obstype == "xray" and "xray_model_name" in p:
1337
+ # get various sets of keywords
1338
+ model_val_cols = list(
1339
+ p.columns[p.columns.str.contains("xray_model_param_value")]
1340
+ )
1341
+ model_up_err_cols = list(
1342
+ p.columns[p.columns.str.contains("xray_model_param_up_err")]
1343
+ )
1344
+ model_lo_err_cols = list(
1345
+ p.columns[p.columns.str.contains("xray_model_param_lo_err")]
1346
+ )
1347
+ model_val_units_cols = list(
1348
+ p.columns[p.columns.str.contains("xray_model_param_unit")]
1349
+ )
1350
+ model_uplim_cols = list(
1351
+ p.columns[
1352
+ p.columns.str.contains("xray_model_param_upperlimit")
1353
+ ]
1354
+ )
1355
+
1356
+ param_names = [v.split("::")[-1] for v in model_val_cols]
1357
+
1358
+ xray_model_info = p[
1359
+ model_val_cols
1360
+ + model_up_err_cols
1361
+ + model_lo_err_cols
1362
+ + model_val_units_cols
1363
+ + model_uplim_cols
1364
+ + [
1365
+ "xray_model_name",
1366
+ "xray_model_reference",
1367
+ "filter_min",
1368
+ "filter_max",
1369
+ "filter_eff_units",
1370
+ ]
1371
+ ]
1372
+
1373
+ if len(model_uplim_cols) == 0:
1374
+ # assume they are all false
1375
+ for param_name in param_names:
1376
+ colname = f"xray_model_param_upperlimit::{param_name}"
1377
+ xray_model_info[colname] = False
1378
+ model_uplim_cols.append(colname)
1379
+
1380
+ if not all(
1381
+ len(model_val_cols) == len(p)
1382
+ for p in [
1383
+ model_up_err_cols,
1384
+ model_lo_err_cols,
1385
+ model_val_units_cols,
1386
+ model_uplim_cols,
1387
+ ]
1388
+ ):
1389
+ raise ValueError(
1390
+ "Missing a column for one of the X-ray parameters!"
1391
+ )
1392
+
1393
+ xray_models = []
1394
+ for _, row in xray_model_info.iterrows():
1395
+ energy1 = (
1396
+ (row["filter_min"] * u.Unit(row["filter_eff_units"]))
1397
+ .to("keV", equivalencies=u.spectral())
1398
+ .value
1399
+ )
1400
+ energy2 = (
1401
+ (row["filter_max"] * u.Unit(row["filter_eff_units"]))
1402
+ .to("keV", equivalencies=u.spectral())
1403
+ .value
1404
+ )
1405
+
1406
+ if energy1 > energy2:
1407
+ min_energy = energy2
1408
+ max_energy = energy1
1409
+ else:
1410
+ min_energy = energy1
1411
+ max_energy = energy2
1412
+
1413
+ param_names_not_na = []
1414
+ for n in param_names:
1415
+ if not pd.isna(row[f"xray_model_param_value::{n}"]):
1416
+ param_names_not_na.append(n)
1417
+
1418
+ _fillna = lambda val: "null" if pd.isna(val) else val # noqa
1419
+ xray_models.append(
1420
+ {
1421
+ "model_name": row.xray_model_name,
1422
+ "param_names": [n for n in param_names_not_na],
1423
+ "param_values": [
1424
+ _fillna(row[f"xray_model_param_value::{n}"])
1425
+ for n in param_names_not_na
1426
+ ],
1427
+ "param_value_upper_err": [
1428
+ _fillna(row[f"xray_model_param_up_err::{n}"])
1429
+ for n in param_names_not_na
1430
+ ],
1431
+ "param_value_lower_err": [
1432
+ _fillna(row[f"xray_model_param_lo_err::{n}"])
1433
+ for n in param_names_not_na
1434
+ ],
1435
+ "param_upperlimit": [
1436
+ _fillna(
1437
+ row[f"xray_model_param_upperlimit::{n}"]
1438
+ )
1439
+ for n in param_names_not_na
1440
+ ],
1441
+ "param_units": [
1442
+ _fillna(row[f"xray_model_param_unit::{n}"])
1443
+ for n in param_names_not_na
1444
+ ],
1445
+ "model_reference": _fillna(
1446
+ row["xray_model_reference"]
1447
+ ),
1448
+ "min_energy": min_energy,
1449
+ "max_energy": max_energy,
1450
+ "energy_units": "keV",
1451
+ }
1452
+ )
1453
+
1454
+ json_phot["xray_model"] = xray_models
1455
+
1456
+ json["photometry"].append(json_phot)
1457
+
1458
+ tde["filter_uq_key"] = pd.Series(
1459
+ unique_filter_keys, index=index_for_match
1460
+ )
1461
+
1462
+ # filter alias
1463
+ # radio filters first
1464
+ filter_keys1 = [
1465
+ "filter_uq_key",
1466
+ "band_eff_wave",
1467
+ "band_eff_wave_unit",
1468
+ "filter_eff_units",
1469
+ ]
1470
+ if "filter_min" in tde:
1471
+ filter_keys1.append("filter_min")
1472
+ if "filter_max" in tde:
1473
+ filter_keys1.append("filter_max")
1474
+
1475
+ filt_info = tde[filter_keys1]
1476
+ filter_map = filt_info.drop_duplicates().set_index(
1477
+ "filter_uq_key"
1478
+ ) # .to_dict(orient='index')
1479
+ try:
1480
+ filter_map_radio = filter_map.to_dict(orient="index")
1481
+ except Exception:
1482
+ print(filter_map)
1483
+ print(name)
1484
+ raise Exception
1485
+
1486
+ json["filter_alias"] = []
1487
+ for filt, val in filter_map_radio.items():
1488
+ obs_type = freq_to_obstype(
1489
+ float(val["band_eff_wave"]) * u.Unit(val["band_eff_wave_unit"])
1490
+ )
1491
+ if obs_type == "radio":
1492
+ filter_name = freq_to_band(
1493
+ (
1494
+ float(val["band_eff_wave"])
1495
+ * u.Unit(val["band_eff_wave_unit"])
1496
+ ).to(u.GHz, equivalencies=u.spectral())
1497
+ )
1498
+ else:
1499
+ filter_name = filt
1500
+
1501
+ filter_alias_dict = dict(
1502
+ filter_key=filt,
1503
+ filter_name=filter_name,
1504
+ wave_eff=float(val["band_eff_wave"]),
1505
+ wave_units=val["band_eff_wave_unit"],
1506
+ )
1507
+
1508
+ if "filter_min" in val and not pd.isna(val["filter_min"]):
1509
+ filter_alias_dict["wave_min"] = (
1510
+ (val["filter_min"] * u.Unit(val["filter_eff_units"]))
1511
+ .to(
1512
+ u.Unit(val["band_eff_wave_unit"]),
1513
+ equivalencies=u.spectral(),
1514
+ )
1515
+ .value
1516
+ )
1517
+
1518
+ if "filter_max" in val and not pd.isna(val["filter_max"]):
1519
+ filter_alias_dict["wave_max"] = (
1520
+ (val["filter_max"] * u.Unit(val["filter_eff_units"]))
1521
+ .to(
1522
+ u.Unit(val["band_eff_wave_unit"]),
1523
+ equivalencies=u.spectral(),
1524
+ )
1525
+ .value
1526
+ )
1527
+
1528
+ json["filter_alias"].append(filter_alias_dict)
1529
+
1530
+ # reference alias
1531
+ # gather all the bibcodes
1532
+ all_bibcodes = [tde.coord_bibcode[0]] + phot_sources
1533
+ if (
1534
+ "redshift_bibcode" in tde
1535
+ and tde.redshift_bibcode[0] not in all_bibcodes
1536
+ and not np.any(pd.isna(tde.redshift))
1537
+ ):
1538
+ all_bibcodes.append(tde.redshift_bibcode[0])
1539
+
1540
+ if (
1541
+ "luminosity_distance_bibcode" in tde
1542
+ and tde.luminosity_distance_bibcode[0] not in all_bibcodes
1543
+ and not np.any(pd.isna(tde.luminosity_distance))
1544
+ ):
1545
+ all_bibcodes.append(tde.luminosity_distance_bibcode[0])
1546
+
1547
+ if (
1548
+ "comoving_distance_bibcode" in tde
1549
+ and tde.comoving_distance_bibcode[0] not in all_bibcodes
1550
+ and not np.any(pd.isna(tde.comoving_distance))
1551
+ ):
1552
+ all_bibcodes.append(tde.comoving_distance_bibcode[0])
1553
+
1554
+ if (
1555
+ "discovery_date_bibcode" in tde
1556
+ and tde.discovery_date_bibcode[0] not in all_bibcodes
1557
+ and not np.any(pd.isna(tde.discovery_date))
1558
+ ):
1559
+ all_bibcodes.append(tde.discovery_date_bibcode[0])
1560
+
1561
+ if (
1562
+ "classification_bibcode" in tde
1563
+ and tde.classification_bibcode[0] not in all_bibcodes
1564
+ and not np.any(pd.isna(tde.classification))
1565
+ ):
1566
+ all_bibcodes.append(tde.classification_bibcode[0])
1567
+
1568
+ if (
1569
+ "host_bibcode" in tde
1570
+ and tde.host_bibcode not in all_bibcodes
1571
+ and not np.any(pd.isna(tde.host_bibcode))
1572
+ ):
1573
+ all_bibcodes.append(tde.host_bibcode[0])
1574
+
1575
+ # find the hrn's for all of these bibcodes
1576
+ uq_bibcodes, all_hrns = bibcode_to_hrn(all_bibcodes)
1577
+
1578
+ # package these into the reference alias
1579
+ json["reference_alias"] = [
1580
+ dict(name=name, human_readable_name=hrn)
1581
+ for name, hrn in zip(uq_bibcodes, all_hrns)
1582
+ ]
1583
+
1584
+ all_jsons.append(Transient(json))
1585
+
1586
+ if db is None:
1587
+ db = Otter(datadir=local_outpath, gen_summary=True)
1588
+ else:
1589
+ db.DATADIR = local_outpath
1590
+
1591
+ # always save this document as a new one
1592
+ db.save(all_jsons)
1593
+ db.generate_summary_table(save=True)
1594
+ return db