astro-otter 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of astro-otter might be problematic. Click here for more details.
- {astro_otter-0.1.0.dist-info → astro_otter-0.3.0.dist-info}/METADATA +53 -16
- astro_otter-0.3.0.dist-info/RECORD +18 -0
- {astro_otter-0.1.0.dist-info → astro_otter-0.3.0.dist-info}/WHEEL +1 -1
- otter/__init__.py +4 -1
- otter/_version.py +1 -1
- otter/io/data_finder.py +306 -13
- otter/io/host.py +80 -0
- otter/io/otter.py +781 -13
- otter/io/transient.py +221 -87
- otter/plotter/otter_plotter.py +6 -4
- otter/plotter/plotter.py +180 -2
- otter/schema.py +296 -0
- otter/util.py +258 -59
- astro_otter-0.1.0.dist-info/RECORD +0 -17
- {astro_otter-0.1.0.dist-info → astro_otter-0.3.0.dist-info/licenses}/LICENSE +0 -0
- {astro_otter-0.1.0.dist-info → astro_otter-0.3.0.dist-info}/top_level.txt +0 -0
otter/io/otter.py
CHANGED
|
@@ -7,15 +7,22 @@ import os
|
|
|
7
7
|
import json
|
|
8
8
|
import glob
|
|
9
9
|
from warnings import warn
|
|
10
|
+
from copy import deepcopy
|
|
11
|
+
|
|
12
|
+
from pyArango.connection import Connection
|
|
13
|
+
from pyArango.database import Database
|
|
14
|
+
from pyArango.document import Document
|
|
10
15
|
|
|
11
16
|
import pandas as pd
|
|
17
|
+
import numpy as np
|
|
12
18
|
|
|
13
19
|
from astropy.coordinates import SkyCoord, search_around_sky
|
|
14
20
|
from astropy.table import Table
|
|
15
21
|
from astropy import units as u
|
|
16
22
|
|
|
17
23
|
from .transient import Transient
|
|
18
|
-
from ..exceptions import FailedQueryError, OtterLimitationError
|
|
24
|
+
from ..exceptions import FailedQueryError, OtterLimitationError, TransientMergeError
|
|
25
|
+
from ..util import bibcode_to_hrn, freq_to_obstype, freq_to_band
|
|
19
26
|
|
|
20
27
|
import warnings
|
|
21
28
|
|
|
@@ -24,7 +31,15 @@ warnings.simplefilter("once", UserWarning)
|
|
|
24
31
|
warnings.simplefilter("once", u.UnitsWarning)
|
|
25
32
|
|
|
26
33
|
|
|
27
|
-
|
|
34
|
+
def _np_encoder(object):
|
|
35
|
+
"""
|
|
36
|
+
Numpy data type encoder for json.dump
|
|
37
|
+
"""
|
|
38
|
+
if isinstance(object, (np.generic, np.ndarray)):
|
|
39
|
+
return object.item()
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class Otter(Database):
|
|
28
43
|
"""
|
|
29
44
|
This is the primary class for users to access the otter backend database
|
|
30
45
|
|
|
@@ -36,7 +51,16 @@ class Otter(object):
|
|
|
36
51
|
|
|
37
52
|
"""
|
|
38
53
|
|
|
39
|
-
def __init__(
|
|
54
|
+
def __init__(
|
|
55
|
+
self,
|
|
56
|
+
url: str = "http://127.0.0.1:8529",
|
|
57
|
+
username: str = "user-guest",
|
|
58
|
+
password: str = "",
|
|
59
|
+
gen_summary: bool = False,
|
|
60
|
+
datadir: str = None,
|
|
61
|
+
debug: bool = False,
|
|
62
|
+
**kwargs,
|
|
63
|
+
) -> None:
|
|
40
64
|
# save inputs
|
|
41
65
|
if datadir is None:
|
|
42
66
|
self.CWD = os.path.dirname(os.path.abspath("__FILE__"))
|
|
@@ -47,6 +71,9 @@ class Otter(object):
|
|
|
47
71
|
|
|
48
72
|
self.debug = debug
|
|
49
73
|
|
|
74
|
+
if gen_summary:
|
|
75
|
+
self.generate_summary_table(save=True)
|
|
76
|
+
|
|
50
77
|
# make sure the data directory exists
|
|
51
78
|
if not os.path.exists(self.DATADIR):
|
|
52
79
|
try:
|
|
@@ -58,6 +85,9 @@ class Otter(object):
|
|
|
58
85
|
)
|
|
59
86
|
pass
|
|
60
87
|
|
|
88
|
+
connection = Connection(username=username, password=password, arangoURL=url)
|
|
89
|
+
super().__init__(connection, "otter", **kwargs)
|
|
90
|
+
|
|
61
91
|
def get_meta(self, **kwargs) -> Table:
|
|
62
92
|
"""
|
|
63
93
|
Get the metadata of the objects matching the arguments
|
|
@@ -74,6 +104,7 @@ class Otter(object):
|
|
|
74
104
|
"date_reference",
|
|
75
105
|
"distance",
|
|
76
106
|
"classification",
|
|
107
|
+
"reference_alias",
|
|
77
108
|
]
|
|
78
109
|
|
|
79
110
|
return [t[metakeys] for t in self.query(**kwargs)]
|
|
@@ -192,7 +223,7 @@ class Otter(object):
|
|
|
192
223
|
"converted_date_unit",
|
|
193
224
|
"converted_wave_unit",
|
|
194
225
|
"converted_freq_unit",
|
|
195
|
-
"
|
|
226
|
+
"filter_name",
|
|
196
227
|
"obs_type",
|
|
197
228
|
"upperlimit",
|
|
198
229
|
"reference",
|
|
@@ -230,6 +261,186 @@ class Otter(object):
|
|
|
230
261
|
return to_ret
|
|
231
262
|
|
|
232
263
|
def query(
|
|
264
|
+
self,
|
|
265
|
+
names: list[str] = None,
|
|
266
|
+
coords: SkyCoord = None,
|
|
267
|
+
radius: float = 5,
|
|
268
|
+
minz: float = None,
|
|
269
|
+
maxz: float = None,
|
|
270
|
+
refs: list[str] = None,
|
|
271
|
+
hasphot: bool = False,
|
|
272
|
+
hasspec: bool = False,
|
|
273
|
+
classification: str = None,
|
|
274
|
+
class_confidence_threshold: float = 0,
|
|
275
|
+
query_private=False,
|
|
276
|
+
**kwargs,
|
|
277
|
+
) -> dict:
|
|
278
|
+
"""
|
|
279
|
+
Searches the arango database table and reads relevant JSON files
|
|
280
|
+
|
|
281
|
+
WARNING! This does not do any conversions for you!
|
|
282
|
+
This is how it differs from the `get_meta` method. Users should prefer to use
|
|
283
|
+
`get_meta`, and `get_phot` independently because it is a better
|
|
284
|
+
workflow and can return the data in an astropy table with everything in the
|
|
285
|
+
same units.
|
|
286
|
+
|
|
287
|
+
Args:
|
|
288
|
+
names (list[str]): A list of names to get the metadata for
|
|
289
|
+
coords (SkyCoord): An astropy SkyCoord object with coordinates to match to
|
|
290
|
+
radius (float): The radius in arcseconds for a cone search, default is 0.05"
|
|
291
|
+
minz (float): The minimum redshift to search for
|
|
292
|
+
maxz (float): The maximum redshift to search for
|
|
293
|
+
refs (list[str]): A list of ads bibcodes to match to. Will only return
|
|
294
|
+
metadata for transients that have this as a reference.
|
|
295
|
+
hasphot (bool): if True, only returns transients which have photometry.
|
|
296
|
+
hasspec (bool): if True, only return transients that have spectra.
|
|
297
|
+
classification (str): A classification string to search for
|
|
298
|
+
class_confidence_threshold (float): classification confidence cutoff for
|
|
299
|
+
query, between 0 and 1. Default is 0.
|
|
300
|
+
|
|
301
|
+
Return:
|
|
302
|
+
Get all of the raw (unconverted!) data for objects that match the criteria.
|
|
303
|
+
"""
|
|
304
|
+
# write some AQL filters based on the inputs
|
|
305
|
+
query_filters = ""
|
|
306
|
+
|
|
307
|
+
if hasphot is True:
|
|
308
|
+
query_filters += "FILTER 'photometry' IN ATTRIBUTES(transient)\n"
|
|
309
|
+
|
|
310
|
+
if hasspec is True:
|
|
311
|
+
query_filters += "FILTER 'spectra' IN ATTRIBUTES(transient)\n"
|
|
312
|
+
|
|
313
|
+
if classification is not None:
|
|
314
|
+
query_filters += f"""
|
|
315
|
+
FOR subdoc IN transient.classification
|
|
316
|
+
FILTER subdoc.confidence > TO_NUMBER({class_confidence_threshold})
|
|
317
|
+
FILTER subdoc.object_class LIKE '%{classification}%'
|
|
318
|
+
"""
|
|
319
|
+
|
|
320
|
+
if minz is not None:
|
|
321
|
+
sfilt = f"""
|
|
322
|
+
FILTER 'redshift' IN transient.distance[*].distance_type
|
|
323
|
+
LET redshifts1 = (
|
|
324
|
+
FOR val IN transient.distance
|
|
325
|
+
FILTER val.distance_type == 'redshift'
|
|
326
|
+
FILTER TO_NUMBER(val.value) >= {minz}
|
|
327
|
+
RETURN val
|
|
328
|
+
)
|
|
329
|
+
FILTER COUNT(redshifts1) > 0
|
|
330
|
+
"""
|
|
331
|
+
query_filters += sfilt
|
|
332
|
+
if maxz is not None:
|
|
333
|
+
sfilt = f"""
|
|
334
|
+
FILTER 'redshift' IN transient.distance[*].distance_type
|
|
335
|
+
LET redshifts2 = (
|
|
336
|
+
FOR val IN transient.distance
|
|
337
|
+
FILTER val.distance_type == 'redshift'
|
|
338
|
+
FILTER TO_NUMBER(val.value) <= {maxz}
|
|
339
|
+
RETURN val
|
|
340
|
+
)
|
|
341
|
+
FILTER COUNT(redshifts2) > 0
|
|
342
|
+
"""
|
|
343
|
+
query_filters += sfilt
|
|
344
|
+
|
|
345
|
+
if names is not None:
|
|
346
|
+
if isinstance(names, str):
|
|
347
|
+
query_filters += f"""
|
|
348
|
+
FILTER UPPER(transient.name) LIKE UPPER('%{names}%')\n
|
|
349
|
+
"""
|
|
350
|
+
elif isinstance(names, list):
|
|
351
|
+
namefilt = f"""
|
|
352
|
+
FOR name IN {names}
|
|
353
|
+
FILTER name IN transient.name.alias[*].value\n
|
|
354
|
+
"""
|
|
355
|
+
query_filters += namefilt
|
|
356
|
+
else:
|
|
357
|
+
raise Exception("Names must be either a string or list")
|
|
358
|
+
|
|
359
|
+
if refs is not None:
|
|
360
|
+
if isinstance(refs, str): # this is just a single bibcode
|
|
361
|
+
query_filters += f"FILTER {refs} IN transient.reference_alias[*].name"
|
|
362
|
+
elif isinstance(refs, list):
|
|
363
|
+
query_filters += f"""
|
|
364
|
+
FOR ref IN {refs}
|
|
365
|
+
FILTER ref IN transient.reference_alias[*].name
|
|
366
|
+
"""
|
|
367
|
+
else:
|
|
368
|
+
raise Exception("reference list must be either a string or a list")
|
|
369
|
+
|
|
370
|
+
# define the query
|
|
371
|
+
query = f"""
|
|
372
|
+
FOR transient IN transients
|
|
373
|
+
{query_filters}
|
|
374
|
+
RETURN transient
|
|
375
|
+
"""
|
|
376
|
+
|
|
377
|
+
# set batch size to 100 million (for now at least)
|
|
378
|
+
result = self.AQLQuery(query, rawResults=True, batchSize=100_000_000)
|
|
379
|
+
|
|
380
|
+
# now that we have the query results do the RA and Dec queries if they exist
|
|
381
|
+
if coords is not None:
|
|
382
|
+
# get the catalog RAs and Decs to compare against
|
|
383
|
+
query_coords = coords
|
|
384
|
+
good_tdes = []
|
|
385
|
+
|
|
386
|
+
for tde in result:
|
|
387
|
+
for coordinfo in tde["coordinate"]:
|
|
388
|
+
if "ra" in coordinfo and "dec" in coordinfo:
|
|
389
|
+
coord = SkyCoord(
|
|
390
|
+
coordinfo["ra"],
|
|
391
|
+
coordinfo["dec"],
|
|
392
|
+
unit=(coordinfo["ra_units"], coordinfo["dec_units"]),
|
|
393
|
+
)
|
|
394
|
+
elif "l" in coordinfo and "b" in coordinfo:
|
|
395
|
+
# this is galactic
|
|
396
|
+
coord = SkyCoord(
|
|
397
|
+
coordinfo["l"],
|
|
398
|
+
coordinfo["b"],
|
|
399
|
+
unit=(coordinfo["l_units"], coordinfo["b_units"]),
|
|
400
|
+
frame="galactic",
|
|
401
|
+
)
|
|
402
|
+
else:
|
|
403
|
+
raise ValueError(
|
|
404
|
+
"Either needs to have ra and dec or l and b as keys!"
|
|
405
|
+
)
|
|
406
|
+
if query_coords.separation(coord) < radius * u.arcsec:
|
|
407
|
+
good_tdes.append(tde)
|
|
408
|
+
break # we've confirmed this tde is in the cone!
|
|
409
|
+
|
|
410
|
+
arango_query_results = [Transient(t) for t in good_tdes]
|
|
411
|
+
|
|
412
|
+
else:
|
|
413
|
+
arango_query_results = [Transient(res) for res in result.result]
|
|
414
|
+
|
|
415
|
+
if not query_private:
|
|
416
|
+
return arango_query_results
|
|
417
|
+
|
|
418
|
+
private_results = self._query_datadir(
|
|
419
|
+
names=names,
|
|
420
|
+
coords=coords,
|
|
421
|
+
radius=radius,
|
|
422
|
+
minz=minz,
|
|
423
|
+
maxz=maxz,
|
|
424
|
+
refs=refs,
|
|
425
|
+
hasphot=hasphot,
|
|
426
|
+
hasspec=hasspec,
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
partially_merged = deepcopy(arango_query_results)
|
|
430
|
+
new_transients = []
|
|
431
|
+
for jj, t_private in enumerate(private_results):
|
|
432
|
+
for ii, t_public in enumerate(arango_query_results):
|
|
433
|
+
try:
|
|
434
|
+
partially_merged[ii] += t_private
|
|
435
|
+
break
|
|
436
|
+
except TransientMergeError:
|
|
437
|
+
continue
|
|
438
|
+
else:
|
|
439
|
+
new_transients.append(t_private)
|
|
440
|
+
|
|
441
|
+
return partially_merged + new_transients
|
|
442
|
+
|
|
443
|
+
def _query_datadir(
|
|
233
444
|
self,
|
|
234
445
|
names: list[str] = None,
|
|
235
446
|
coords: SkyCoord = None,
|
|
@@ -242,6 +453,9 @@ class Otter(object):
|
|
|
242
453
|
raw: bool = False,
|
|
243
454
|
) -> dict:
|
|
244
455
|
"""
|
|
456
|
+
This is a private method and is here just for the pipeline!!!
|
|
457
|
+
This should not be used by end users!
|
|
458
|
+
|
|
245
459
|
Searches the summary.csv table and reads relevant JSON files
|
|
246
460
|
|
|
247
461
|
WARNING! This does not do any conversions for you!
|
|
@@ -273,13 +487,7 @@ class Otter(object):
|
|
|
273
487
|
# read in the metdata from all json files
|
|
274
488
|
# this could be dangerous later on!!
|
|
275
489
|
allfiles = glob.glob(os.path.join(self.DATADIR, "*.json"))
|
|
276
|
-
jsondata = []
|
|
277
|
-
|
|
278
|
-
# read the data from all the json files and convert to Transients
|
|
279
|
-
for jsonfile in allfiles:
|
|
280
|
-
with open(jsonfile, "r") as j:
|
|
281
|
-
t = Transient(json.load(j))
|
|
282
|
-
jsondata.append(t.get_meta())
|
|
490
|
+
jsondata = [self.load_file(jsonfile) for jsonfile in allfiles]
|
|
283
491
|
|
|
284
492
|
return jsondata
|
|
285
493
|
|
|
@@ -290,6 +498,9 @@ class Otter(object):
|
|
|
290
498
|
|
|
291
499
|
# then read and query the summary table
|
|
292
500
|
summary = pd.read_csv(summary_table)
|
|
501
|
+
if len(summary) == 0:
|
|
502
|
+
return []
|
|
503
|
+
|
|
293
504
|
# coordinate search first
|
|
294
505
|
if coords is not None:
|
|
295
506
|
if not isinstance(coords, SkyCoord):
|
|
@@ -360,6 +571,85 @@ class Otter(object):
|
|
|
360
571
|
|
|
361
572
|
return outdata
|
|
362
573
|
|
|
574
|
+
def upload(self, json_data, collection="vetting", testing=False) -> Document:
|
|
575
|
+
"""
|
|
576
|
+
Upload json_data to collection
|
|
577
|
+
|
|
578
|
+
Args:
|
|
579
|
+
json_data [dict] : A dictionary of the json data to upload to Otter
|
|
580
|
+
collection [str] : The collection to upload to
|
|
581
|
+
testing [bool] : Default is False
|
|
582
|
+
|
|
583
|
+
Returns:
|
|
584
|
+
The pyArango document that was uplaoded
|
|
585
|
+
"""
|
|
586
|
+
|
|
587
|
+
# now add the document
|
|
588
|
+
doc = self[collection].createDocument(json_data)
|
|
589
|
+
if not testing:
|
|
590
|
+
doc.save()
|
|
591
|
+
return doc
|
|
592
|
+
|
|
593
|
+
def upload_private(self, collection="vetting", testing=False) -> None:
|
|
594
|
+
"""
|
|
595
|
+
Upload the local/private data stored in self.DATADIR to the vetting collection
|
|
596
|
+
(like a SQL table) in the central arangodb document database.
|
|
597
|
+
|
|
598
|
+
WARNING! This will make any data in self.DATADIR public! Please double check
|
|
599
|
+
before using this method!
|
|
600
|
+
|
|
601
|
+
Args:
|
|
602
|
+
collection (str) : The collection to add the documents to. Default is
|
|
603
|
+
"vetting" where the documents will then be vetted by
|
|
604
|
+
our team.
|
|
605
|
+
testing (bool) : True if don't actually upload, False is default
|
|
606
|
+
|
|
607
|
+
Returns:
|
|
608
|
+
If testing is false (the default), returns the arangodb upload result. If
|
|
609
|
+
testing is true, returns the list of merged dictionaries that would get
|
|
610
|
+
uploaded.
|
|
611
|
+
|
|
612
|
+
Raises:
|
|
613
|
+
OtterLimitationError: If some objects in OTTER are within 5" we can't figure
|
|
614
|
+
out which ones to merge with which ones.
|
|
615
|
+
|
|
616
|
+
"""
|
|
617
|
+
|
|
618
|
+
if not self.hasCollection(collection):
|
|
619
|
+
raise ValueError(f"{collection} not in {self}!")
|
|
620
|
+
|
|
621
|
+
local_data = self._query_datadir()
|
|
622
|
+
docs = []
|
|
623
|
+
for t in local_data:
|
|
624
|
+
res = self.query(coords=t.get_skycoord())
|
|
625
|
+
|
|
626
|
+
if len(res) > 1:
|
|
627
|
+
raise OtterLimitationError("Some objects in Otter are too close!")
|
|
628
|
+
|
|
629
|
+
elif len(res) == 1:
|
|
630
|
+
# this object exists in otter already, let's grab the transient data and
|
|
631
|
+
# merge the files
|
|
632
|
+
merged = t + res[0]
|
|
633
|
+
|
|
634
|
+
# copy over the special arangodb keys
|
|
635
|
+
merged["_key"] = res[0]["_key"]
|
|
636
|
+
merged["_id"] = res[0]["_id"]
|
|
637
|
+
|
|
638
|
+
# we also have to delete the document from the OTTER database
|
|
639
|
+
doc = self.fetchDocument(merged["_id"])
|
|
640
|
+
if not testing:
|
|
641
|
+
doc.delete()
|
|
642
|
+
else:
|
|
643
|
+
print(f"Would delete\n{doc}")
|
|
644
|
+
|
|
645
|
+
else:
|
|
646
|
+
# this means the object doesn't exist in otter already
|
|
647
|
+
merged = t
|
|
648
|
+
|
|
649
|
+
docs.append(self.upload(merged, collection=collection, testing=testing))
|
|
650
|
+
|
|
651
|
+
return docs
|
|
652
|
+
|
|
363
653
|
def save(self, schema: list[dict], testing=False) -> None:
|
|
364
654
|
"""
|
|
365
655
|
Upload all the data in the given list of schemas.
|
|
@@ -384,7 +674,7 @@ class Otter(object):
|
|
|
384
674
|
print(transient["name/default_name"])
|
|
385
675
|
|
|
386
676
|
coord = transient.get_skycoord()
|
|
387
|
-
res = self.
|
|
677
|
+
res = self._query_datadir(coords=coord)
|
|
388
678
|
|
|
389
679
|
if len(res) == 0:
|
|
390
680
|
# This is a new object to upload
|
|
@@ -437,7 +727,7 @@ class Otter(object):
|
|
|
437
727
|
if isinstance(schema, Transient):
|
|
438
728
|
schema = dict(schema)
|
|
439
729
|
|
|
440
|
-
out = json.dumps(schema, indent=4)
|
|
730
|
+
out = json.dumps(schema, indent=4, default=_np_encoder)
|
|
441
731
|
# out = '[' + out
|
|
442
732
|
# out += ']'
|
|
443
733
|
|
|
@@ -498,3 +788,481 @@ class Otter(object):
|
|
|
498
788
|
alljsons.to_csv(os.path.join(self.DATADIR, "summary.csv"))
|
|
499
789
|
|
|
500
790
|
return alljsons
|
|
791
|
+
|
|
792
|
+
@staticmethod
|
|
793
|
+
def from_csvs(
|
|
794
|
+
metafile: str,
|
|
795
|
+
photfile: str = None,
|
|
796
|
+
local_outpath: str = "private_otter_data",
|
|
797
|
+
db: Otter = None,
|
|
798
|
+
) -> Otter:
|
|
799
|
+
"""
|
|
800
|
+
Converts private metadata and photometry csvs to an Otter object stored
|
|
801
|
+
*locally* so you don't need to worry about accidentally uploading them to the
|
|
802
|
+
real Otter database.
|
|
803
|
+
|
|
804
|
+
Args:
|
|
805
|
+
metafile (str) : String filepath or string io csv object of the csv metadata
|
|
806
|
+
photfile (str) : String filepath or string io csv object of the csv
|
|
807
|
+
photometry
|
|
808
|
+
local_outpath (str) : The outpath to write the OTTER json files to
|
|
809
|
+
db (Otter) : An Otter instance to add the local_outpath to for querying.
|
|
810
|
+
This keyword can be useful if you have special permission for
|
|
811
|
+
the otter database and want to upload your private data
|
|
812
|
+
|
|
813
|
+
Returns:
|
|
814
|
+
An Otter object where the json files are stored locally
|
|
815
|
+
"""
|
|
816
|
+
# read in the metadata and photometry file
|
|
817
|
+
meta = pd.read_csv(metafile)
|
|
818
|
+
phot = None
|
|
819
|
+
|
|
820
|
+
required_phot_cols = [
|
|
821
|
+
"name",
|
|
822
|
+
"date",
|
|
823
|
+
"date_format",
|
|
824
|
+
"filter",
|
|
825
|
+
"filter_eff",
|
|
826
|
+
"filter_eff_units",
|
|
827
|
+
"flux",
|
|
828
|
+
"flux_err",
|
|
829
|
+
"flux_unit",
|
|
830
|
+
]
|
|
831
|
+
|
|
832
|
+
if photfile is not None:
|
|
833
|
+
phot_unclean = pd.read_csv(photfile)
|
|
834
|
+
|
|
835
|
+
phot = phot_unclean.dropna(subset=required_phot_cols)
|
|
836
|
+
if len(phot) != len(phot_unclean):
|
|
837
|
+
warn("""
|
|
838
|
+
Filtered out rows with nan in the photometry file! Make sure you
|
|
839
|
+
expect this behaviour!
|
|
840
|
+
""")
|
|
841
|
+
|
|
842
|
+
if "bibcode" not in phot:
|
|
843
|
+
phot["bibcode"] = "private"
|
|
844
|
+
warn("Setting the bibcode column to the special keyword 'private'!")
|
|
845
|
+
|
|
846
|
+
# we need to generate columns of wave_eff and freq_eff
|
|
847
|
+
wave_eff = []
|
|
848
|
+
freq_eff = []
|
|
849
|
+
wave_eff_unit = u.nm
|
|
850
|
+
freq_eff_unit = u.GHz
|
|
851
|
+
for val, unit in zip(phot.filter_eff, phot.filter_eff_units):
|
|
852
|
+
wave_eff.append(
|
|
853
|
+
(val * u.Unit(unit))
|
|
854
|
+
.to(wave_eff_unit, equivalencies=u.spectral())
|
|
855
|
+
.value
|
|
856
|
+
)
|
|
857
|
+
freq_eff.append(
|
|
858
|
+
(val * u.Unit(unit))
|
|
859
|
+
.to(freq_eff_unit, equivalencies=u.spectral())
|
|
860
|
+
.value
|
|
861
|
+
)
|
|
862
|
+
|
|
863
|
+
phot["band_eff_wave"] = wave_eff
|
|
864
|
+
phot["band_eff_wave_unit"] = str(wave_eff_unit)
|
|
865
|
+
phot["band_eff_freq"] = freq_eff
|
|
866
|
+
phot["band_eff_freq_unit"] = str(freq_eff_unit)
|
|
867
|
+
|
|
868
|
+
if not os.path.exists(local_outpath):
|
|
869
|
+
os.mkdir(local_outpath)
|
|
870
|
+
|
|
871
|
+
# drop duplicated names in meta and keep the first
|
|
872
|
+
meta = meta.drop_duplicates(subset="name", keep="first")
|
|
873
|
+
|
|
874
|
+
# merge the meta and phot data
|
|
875
|
+
if phot is not None:
|
|
876
|
+
data = pd.merge(phot, meta, on="name", how="inner")
|
|
877
|
+
else:
|
|
878
|
+
data = meta
|
|
879
|
+
|
|
880
|
+
# perform some data checks
|
|
881
|
+
assert (
|
|
882
|
+
len(data[pd.isna(data.ra)].name.unique()) == 0
|
|
883
|
+
), "Missing some RA and Decs, please check the input files!"
|
|
884
|
+
if phot is not None:
|
|
885
|
+
for name in meta.name:
|
|
886
|
+
assert len(data[data.name == name]) == len(
|
|
887
|
+
phot[phot.name == name]
|
|
888
|
+
), f"failed on {name}"
|
|
889
|
+
|
|
890
|
+
# actually do the data conversion to OTTER
|
|
891
|
+
all_jsons = []
|
|
892
|
+
for name, tde in data.groupby("name"):
|
|
893
|
+
json = {}
|
|
894
|
+
tde = tde.reset_index()
|
|
895
|
+
|
|
896
|
+
# name first
|
|
897
|
+
json["name"] = dict(
|
|
898
|
+
default_name=name,
|
|
899
|
+
alias=[dict(value=name, reference=[tde.coord_bibcode[0]])],
|
|
900
|
+
)
|
|
901
|
+
|
|
902
|
+
# coordinates
|
|
903
|
+
json["coordinate"] = [
|
|
904
|
+
dict(
|
|
905
|
+
ra=tde.ra[0],
|
|
906
|
+
dec=tde.dec[0],
|
|
907
|
+
ra_units=tde.ra_unit[0],
|
|
908
|
+
dec_units=tde.dec_unit[0],
|
|
909
|
+
reference=[tde.coord_bibcode[0]],
|
|
910
|
+
coordinate_type="equitorial",
|
|
911
|
+
)
|
|
912
|
+
]
|
|
913
|
+
|
|
914
|
+
### distance info
|
|
915
|
+
json["distance"] = []
|
|
916
|
+
|
|
917
|
+
# redshift
|
|
918
|
+
if "redshift" in tde and not np.any(pd.isna(tde["redshift"])):
|
|
919
|
+
json["distance"].append(
|
|
920
|
+
dict(
|
|
921
|
+
value=tde.redshift[0],
|
|
922
|
+
reference=[tde.redshift_bibcode[0]],
|
|
923
|
+
computed=False,
|
|
924
|
+
distance_type="redshift",
|
|
925
|
+
)
|
|
926
|
+
)
|
|
927
|
+
|
|
928
|
+
# luminosity distance
|
|
929
|
+
if "luminosity_distance" in tde and not np.any(
|
|
930
|
+
pd.isna(tde["luminosity_distance"])
|
|
931
|
+
):
|
|
932
|
+
json["distance"].append(
|
|
933
|
+
value=tde.luminosity_distance[0],
|
|
934
|
+
reference=[tde.luminosity_distance_bibcode[0]],
|
|
935
|
+
unit=tde.luminosity_distance_unit[0],
|
|
936
|
+
computed=False,
|
|
937
|
+
distance_type="luminosity",
|
|
938
|
+
)
|
|
939
|
+
|
|
940
|
+
# comoving distance
|
|
941
|
+
if "comoving_distance" in tde and not np.any(
|
|
942
|
+
pd.isna(tde["comoving_distance"])
|
|
943
|
+
):
|
|
944
|
+
json["distance"].append(
|
|
945
|
+
value=tde.comoving_distance[0],
|
|
946
|
+
reference=[tde.comoving_distance_bibcode[0]],
|
|
947
|
+
unit=tde.comoving_distance_unit[0],
|
|
948
|
+
computed=False,
|
|
949
|
+
distance_type="comoving",
|
|
950
|
+
)
|
|
951
|
+
|
|
952
|
+
# remove the distance list if it is empty still
|
|
953
|
+
if len(json["distance"]) == 0:
|
|
954
|
+
del json["distance"]
|
|
955
|
+
|
|
956
|
+
### Classification information that is in the csvs
|
|
957
|
+
# classification
|
|
958
|
+
if "classification" in tde:
|
|
959
|
+
json["classification"] = [
|
|
960
|
+
dict(
|
|
961
|
+
object_class=tde.classification[0],
|
|
962
|
+
confidence=1, # we know this is at least an tde
|
|
963
|
+
reference=[tde.classification_bibcode[0]],
|
|
964
|
+
)
|
|
965
|
+
]
|
|
966
|
+
|
|
967
|
+
# discovery date
|
|
968
|
+
# print(tde)
|
|
969
|
+
if "discovery_date" in tde and not np.any(pd.isna(tde.discovery_date)):
|
|
970
|
+
json["date_reference"] = [
|
|
971
|
+
dict(
|
|
972
|
+
value=str(tde.discovery_date.tolist()[0]).strip(),
|
|
973
|
+
date_format=tde.discovery_date_format.tolist()[0].lower(),
|
|
974
|
+
reference=tde.discovery_date_bibcode.tolist(),
|
|
975
|
+
computed=False,
|
|
976
|
+
date_type="discovery",
|
|
977
|
+
)
|
|
978
|
+
]
|
|
979
|
+
|
|
980
|
+
# host information
|
|
981
|
+
if "host_ref" in tde and not np.any(pd.isna(tde.host_ref)):
|
|
982
|
+
host_info = dict(
|
|
983
|
+
host_name=tde.host_name.tolist()[0].strip(),
|
|
984
|
+
host_ra=tde.host_ra.tolist()[0],
|
|
985
|
+
host_dec=tde.host_dec.tolist()[0],
|
|
986
|
+
host_ra_units=tde.host_ra_unit.tolist()[0],
|
|
987
|
+
host_dec_units=tde.host_dec_unit.tolist()[0],
|
|
988
|
+
reference=[tde.host_ref.tolist()[0]],
|
|
989
|
+
)
|
|
990
|
+
|
|
991
|
+
if not pd.isna(tde.host_redshift.tolist()[0]):
|
|
992
|
+
host_info["host_z"] = tde.host_redshift.tolist()[0]
|
|
993
|
+
|
|
994
|
+
if "host" in json:
|
|
995
|
+
json["host"].append(host_info)
|
|
996
|
+
else:
|
|
997
|
+
json["host"] = [host_info]
|
|
998
|
+
|
|
999
|
+
# comments
|
|
1000
|
+
if "comment" in tde and not np.any(pd.isna(tde.comment)):
|
|
1001
|
+
if "schema_version" not in json:
|
|
1002
|
+
json["schema_version"] = {}
|
|
1003
|
+
json["schema_version"]["comment"] = tde.comment.tolist()[0]
|
|
1004
|
+
|
|
1005
|
+
# skip the photometry code if there is no photometry file
|
|
1006
|
+
# if there is a photometry file then we want to convert it below
|
|
1007
|
+
phot_sources = []
|
|
1008
|
+
if phot is not None:
|
|
1009
|
+
tde["obs_type"] = [
|
|
1010
|
+
freq_to_obstype(vv * u.Unit(uu))
|
|
1011
|
+
for vv, uu in zip(
|
|
1012
|
+
tde.band_eff_freq.values,
|
|
1013
|
+
tde.band_eff_freq_unit.values,
|
|
1014
|
+
)
|
|
1015
|
+
]
|
|
1016
|
+
|
|
1017
|
+
unique_filter_keys = []
|
|
1018
|
+
index_for_match = []
|
|
1019
|
+
json["photometry"] = []
|
|
1020
|
+
|
|
1021
|
+
if "telescope" in tde:
|
|
1022
|
+
to_grpby = ["bibcode", "telescope", "obs_type"]
|
|
1023
|
+
else:
|
|
1024
|
+
to_grpby = ["bibcode", "obs_type"]
|
|
1025
|
+
|
|
1026
|
+
for grp_keys, p in tde.groupby(to_grpby, dropna=False):
|
|
1027
|
+
if len(grp_keys) == 3:
|
|
1028
|
+
src, tele, obstype = grp_keys
|
|
1029
|
+
else:
|
|
1030
|
+
src, obstype = grp_keys
|
|
1031
|
+
tele = None
|
|
1032
|
+
|
|
1033
|
+
if src not in phot_sources:
|
|
1034
|
+
phot_sources.append(src)
|
|
1035
|
+
|
|
1036
|
+
if len(np.unique(p.flux_unit)) == 1:
|
|
1037
|
+
raw_units = p.flux_unit.tolist()[0]
|
|
1038
|
+
else:
|
|
1039
|
+
raw_units = p.flux_unit.tolist()
|
|
1040
|
+
|
|
1041
|
+
# add a column to phot with the unique filter key
|
|
1042
|
+
if obstype == "radio":
|
|
1043
|
+
filter_uq_key = (
|
|
1044
|
+
p.band_eff_freq.astype(str)
|
|
1045
|
+
+ p.band_eff_freq_unit.astype(str)
|
|
1046
|
+
).tolist()
|
|
1047
|
+
|
|
1048
|
+
elif obstype in ("uvoir", "xray"):
|
|
1049
|
+
filter_uq_key = p["filter"].astype(str).tolist()
|
|
1050
|
+
|
|
1051
|
+
else:
|
|
1052
|
+
raise ValueError("not prepared for this obstype!")
|
|
1053
|
+
|
|
1054
|
+
unique_filter_keys += filter_uq_key
|
|
1055
|
+
index_for_match += p.index.tolist()
|
|
1056
|
+
|
|
1057
|
+
if "upperlimit" not in p:
|
|
1058
|
+
p["upperlimit"] = False
|
|
1059
|
+
|
|
1060
|
+
json_phot = dict(
|
|
1061
|
+
reference=src,
|
|
1062
|
+
raw=p.flux.astype(float).tolist(),
|
|
1063
|
+
raw_err=p.flux_err.astype(float).tolist(),
|
|
1064
|
+
raw_units=raw_units,
|
|
1065
|
+
date=p.date.tolist(),
|
|
1066
|
+
date_format=p.date_format.tolist(),
|
|
1067
|
+
upperlimit=p.upperlimit.tolist(),
|
|
1068
|
+
filter_key=filter_uq_key,
|
|
1069
|
+
obs_type=obstype,
|
|
1070
|
+
)
|
|
1071
|
+
|
|
1072
|
+
if not pd.isna(tele):
|
|
1073
|
+
json_phot["telescope"] = tele
|
|
1074
|
+
|
|
1075
|
+
if pd.isna(tele) and obstype == "xray":
|
|
1076
|
+
raise ValueError("The telescope is required for X-ray data!")
|
|
1077
|
+
|
|
1078
|
+
# check the minimum and maximum filter values
|
|
1079
|
+
if obstype == "xray" and (
|
|
1080
|
+
"filter_min" not in p or "filter_max" not in p
|
|
1081
|
+
):
|
|
1082
|
+
raise ValueError(
|
|
1083
|
+
"Minimum and maximum filters required for X-ray data!"
|
|
1084
|
+
)
|
|
1085
|
+
|
|
1086
|
+
# check optional keys
|
|
1087
|
+
optional_keys = [
|
|
1088
|
+
"date_err",
|
|
1089
|
+
"sigma",
|
|
1090
|
+
"instrument",
|
|
1091
|
+
"phot_type",
|
|
1092
|
+
"exptime",
|
|
1093
|
+
"aperature",
|
|
1094
|
+
"observer",
|
|
1095
|
+
"reducer",
|
|
1096
|
+
"pipeline",
|
|
1097
|
+
]
|
|
1098
|
+
for k in optional_keys:
|
|
1099
|
+
if k in p and not np.all(pd.isna(p[k])):
|
|
1100
|
+
# fill the nan values
|
|
1101
|
+
# this is to match with the official json format
|
|
1102
|
+
# and works with arangodb document structure
|
|
1103
|
+
p[k].fillna("null", inplace=True)
|
|
1104
|
+
|
|
1105
|
+
json_phot[k] = p[k].tolist()
|
|
1106
|
+
|
|
1107
|
+
# handle more detailed uncertainty information
|
|
1108
|
+
raw_err_detail = {}
|
|
1109
|
+
for key in ["statistical_err", "systematic_err", "iss_err"]:
|
|
1110
|
+
if key in p and not np.all(pd.isna(p[key])):
|
|
1111
|
+
k = key.split("_")[0]
|
|
1112
|
+
|
|
1113
|
+
# fill the nan values
|
|
1114
|
+
# this is to match with the official json format
|
|
1115
|
+
# and works with arangodb document structure
|
|
1116
|
+
p[key].fillna(0, inplace=True)
|
|
1117
|
+
|
|
1118
|
+
raw_err_detail[k] = p[key].tolist()
|
|
1119
|
+
|
|
1120
|
+
if len(raw_err_detail) > 0:
|
|
1121
|
+
json_phot["raw_err_detail"] = raw_err_detail
|
|
1122
|
+
|
|
1123
|
+
# check the possible corrections
|
|
1124
|
+
corrs = ["val_k", "val_s", "val_host", "val_av", "val_hostav"]
|
|
1125
|
+
for c in corrs:
|
|
1126
|
+
bool_v_key = c.replace("val", "corr")
|
|
1127
|
+
json_phot[c] = False
|
|
1128
|
+
|
|
1129
|
+
if c in p:
|
|
1130
|
+
# fill the nan values
|
|
1131
|
+
# this is to match with the official json format
|
|
1132
|
+
# and works with arangodb document structure
|
|
1133
|
+
p[c].fillna("null", inplace=True)
|
|
1134
|
+
|
|
1135
|
+
json_phot[c] = p[c].tolist()
|
|
1136
|
+
json_phot[bool_v_key] = [v != "null" for v in json_phot[c]]
|
|
1137
|
+
|
|
1138
|
+
json["photometry"].append(json_phot)
|
|
1139
|
+
|
|
1140
|
+
tde["filter_uq_key"] = pd.Series(
|
|
1141
|
+
unique_filter_keys, index=index_for_match
|
|
1142
|
+
)
|
|
1143
|
+
|
|
1144
|
+
# filter alias
|
|
1145
|
+
# radio filters first
|
|
1146
|
+
filter_keys1 = ["filter_uq_key", "band_eff_wave", "band_eff_wave_unit"]
|
|
1147
|
+
if "filter_min" in tde:
|
|
1148
|
+
filter_keys1.append("filter_min")
|
|
1149
|
+
if "filter_max" in tde:
|
|
1150
|
+
filter_keys1.append("filter_max")
|
|
1151
|
+
|
|
1152
|
+
filter_map = (
|
|
1153
|
+
tde[filter_keys1].drop_duplicates().set_index("filter_uq_key")
|
|
1154
|
+
) # .to_dict(orient='index')
|
|
1155
|
+
try:
|
|
1156
|
+
filter_map_radio = filter_map.to_dict(orient="index")
|
|
1157
|
+
except Exception:
|
|
1158
|
+
print(filter_map)
|
|
1159
|
+
print(name)
|
|
1160
|
+
raise Exception
|
|
1161
|
+
|
|
1162
|
+
json["filter_alias"] = []
|
|
1163
|
+
for filt, val in filter_map_radio.items():
|
|
1164
|
+
obs_type = freq_to_obstype(
|
|
1165
|
+
float(val["band_eff_wave"]) * u.Unit(val["band_eff_wave_unit"])
|
|
1166
|
+
)
|
|
1167
|
+
if obs_type == "radio":
|
|
1168
|
+
filter_name = freq_to_band(
|
|
1169
|
+
(
|
|
1170
|
+
float(val["band_eff_wave"])
|
|
1171
|
+
* u.Unit(val["band_eff_wave_unit"])
|
|
1172
|
+
).to(u.GHz, equivalencies=u.spectral())
|
|
1173
|
+
)
|
|
1174
|
+
else:
|
|
1175
|
+
filter_name = filt
|
|
1176
|
+
|
|
1177
|
+
filter_alias_dict = dict(
|
|
1178
|
+
filter_key=filt,
|
|
1179
|
+
filter_name=filter_name,
|
|
1180
|
+
wave_eff=float(val["band_eff_wave"]),
|
|
1181
|
+
wave_units=val["band_eff_wave_unit"],
|
|
1182
|
+
)
|
|
1183
|
+
|
|
1184
|
+
if "filter_min" in val:
|
|
1185
|
+
filter_alias_dict["wave_min"] = (
|
|
1186
|
+
val["filter_min"] * u.Unit(phot.filter_eff_units)
|
|
1187
|
+
).to(
|
|
1188
|
+
u.Unit(
|
|
1189
|
+
val["band_eff_wave_unit"], equivalencies=u.spectral()
|
|
1190
|
+
)
|
|
1191
|
+
)
|
|
1192
|
+
|
|
1193
|
+
if "filter_max" in val:
|
|
1194
|
+
filter_alias_dict["wave_max"] = (
|
|
1195
|
+
val["filter_max"] * u.Unit(phot.filter_eff_units)
|
|
1196
|
+
).to(
|
|
1197
|
+
u.Unit(
|
|
1198
|
+
val["band_eff_wave_unit"], equivalencies=u.spectral()
|
|
1199
|
+
)
|
|
1200
|
+
)
|
|
1201
|
+
|
|
1202
|
+
json["filter_alias"].append(filter_alias_dict)
|
|
1203
|
+
|
|
1204
|
+
# reference alias
|
|
1205
|
+
# gather all the bibcodes
|
|
1206
|
+
all_bibcodes = [tde.coord_bibcode[0]] + phot_sources
|
|
1207
|
+
if (
|
|
1208
|
+
"redshift_bibcode" in tde
|
|
1209
|
+
and tde.redshift_bibcode[0] not in all_bibcodes
|
|
1210
|
+
and not np.any(pd.isna(tde.redshift))
|
|
1211
|
+
):
|
|
1212
|
+
all_bibcodes.append(tde.redshift_bibcode[0])
|
|
1213
|
+
|
|
1214
|
+
if (
|
|
1215
|
+
"luminosity_distance_bibcode" in tde
|
|
1216
|
+
and tde.luminosity_distance_bibcode[0] not in all_bibcodes
|
|
1217
|
+
and not np.any(pd.isna(tde.luminosity_distance))
|
|
1218
|
+
):
|
|
1219
|
+
all_bibcodes.append(tde.luminosity_distance_bibcode[0])
|
|
1220
|
+
|
|
1221
|
+
if (
|
|
1222
|
+
"comoving_distance_bibcode" in tde
|
|
1223
|
+
and tde.comoving_distance_bibcode[0] not in all_bibcodes
|
|
1224
|
+
and not np.any(pd.isna(tde.comoving_distance))
|
|
1225
|
+
):
|
|
1226
|
+
all_bibcodes.append(tde.comoving_distance_bibcode[0])
|
|
1227
|
+
|
|
1228
|
+
if (
|
|
1229
|
+
"discovery_date_bibcode" in tde
|
|
1230
|
+
and tde.discovery_date_bibcode[0] not in all_bibcodes
|
|
1231
|
+
and not np.any(pd.isna(tde.discovery_date))
|
|
1232
|
+
):
|
|
1233
|
+
all_bibcodes.append(tde.discovery_date_bibcode[0])
|
|
1234
|
+
|
|
1235
|
+
if (
|
|
1236
|
+
"classification_bibcode" in tde
|
|
1237
|
+
and tde.classification_bibcode[0] not in all_bibcodes
|
|
1238
|
+
and not np.any(pd.isna(tde.classification))
|
|
1239
|
+
):
|
|
1240
|
+
all_bibcodes.append(tde.classification_bibcode[0])
|
|
1241
|
+
|
|
1242
|
+
if (
|
|
1243
|
+
"host_bibcode" in tde
|
|
1244
|
+
and tde.host_bibcode not in all_bibcodes
|
|
1245
|
+
and not np.any(pd.isna(tde.host_bibcode))
|
|
1246
|
+
):
|
|
1247
|
+
all_bibcodes.append(tde.host_bibcode[0])
|
|
1248
|
+
|
|
1249
|
+
# find the hrn's for all of these bibcodes
|
|
1250
|
+
uq_bibcodes, all_hrns = bibcode_to_hrn(all_bibcodes)
|
|
1251
|
+
|
|
1252
|
+
# package these into the reference alias
|
|
1253
|
+
json["reference_alias"] = [
|
|
1254
|
+
dict(name=name, human_readable_name=hrn)
|
|
1255
|
+
for name, hrn in zip(uq_bibcodes, all_hrns)
|
|
1256
|
+
]
|
|
1257
|
+
|
|
1258
|
+
all_jsons.append(Transient(json))
|
|
1259
|
+
|
|
1260
|
+
if db is None:
|
|
1261
|
+
db = Otter(datadir=local_outpath)
|
|
1262
|
+
else:
|
|
1263
|
+
db.datadir = local_outpath
|
|
1264
|
+
|
|
1265
|
+
# always save this document as a new one
|
|
1266
|
+
db.save(all_jsons)
|
|
1267
|
+
db.generate_summary_table(save=True)
|
|
1268
|
+
return db
|