astro-otter 0.0.2__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of astro-otter might be problematic. Click here for more details.
- astro_otter-0.2.0.dist-info/METADATA +164 -0
- astro_otter-0.2.0.dist-info/RECORD +18 -0
- {astro_otter-0.0.2.dist-info → astro_otter-0.2.0.dist-info}/WHEEL +1 -1
- otter/__init__.py +6 -1
- otter/_version.py +1 -1
- otter/exceptions.py +29 -0
- otter/io/data_finder.py +1037 -0
- otter/io/host.py +186 -0
- otter/io/otter.py +766 -14
- otter/io/transient.py +337 -164
- otter/plotter/otter_plotter.py +6 -4
- otter/plotter/plotter.py +180 -2
- otter/schema.py +296 -0
- otter/util.py +258 -17
- astro_otter-0.0.2.dist-info/METADATA +0 -875
- astro_otter-0.0.2.dist-info/RECORD +0 -15
- {astro_otter-0.0.2.dist-info → astro_otter-0.2.0.dist-info/licenses}/LICENSE +0 -0
- {astro_otter-0.0.2.dist-info → astro_otter-0.2.0.dist-info}/top_level.txt +0 -0
otter/io/otter.py
CHANGED
|
@@ -7,15 +7,22 @@ import os
|
|
|
7
7
|
import json
|
|
8
8
|
import glob
|
|
9
9
|
from warnings import warn
|
|
10
|
+
from copy import deepcopy
|
|
11
|
+
|
|
12
|
+
from pyArango.connection import Connection
|
|
13
|
+
from pyArango.database import Database
|
|
14
|
+
from pyArango.document import Document
|
|
10
15
|
|
|
11
16
|
import pandas as pd
|
|
17
|
+
import numpy as np
|
|
12
18
|
|
|
13
19
|
from astropy.coordinates import SkyCoord, search_around_sky
|
|
14
20
|
from astropy.table import Table
|
|
15
21
|
from astropy import units as u
|
|
16
22
|
|
|
17
23
|
from .transient import Transient
|
|
18
|
-
from ..exceptions import FailedQueryError, OtterLimitationError
|
|
24
|
+
from ..exceptions import FailedQueryError, OtterLimitationError, TransientMergeError
|
|
25
|
+
from ..util import bibcode_to_hrn, freq_to_obstype, freq_to_band
|
|
19
26
|
|
|
20
27
|
import warnings
|
|
21
28
|
|
|
@@ -24,7 +31,15 @@ warnings.simplefilter("once", UserWarning)
|
|
|
24
31
|
warnings.simplefilter("once", u.UnitsWarning)
|
|
25
32
|
|
|
26
33
|
|
|
27
|
-
|
|
34
|
+
def _np_encoder(object):
|
|
35
|
+
"""
|
|
36
|
+
Numpy data type encoder for json.dump
|
|
37
|
+
"""
|
|
38
|
+
if isinstance(object, (np.generic, np.ndarray)):
|
|
39
|
+
return object.item()
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class Otter(Database):
|
|
28
43
|
"""
|
|
29
44
|
This is the primary class for users to access the otter backend database
|
|
30
45
|
|
|
@@ -36,7 +51,16 @@ class Otter(object):
|
|
|
36
51
|
|
|
37
52
|
"""
|
|
38
53
|
|
|
39
|
-
def __init__(
|
|
54
|
+
def __init__(
|
|
55
|
+
self,
|
|
56
|
+
url: str = "http://127.0.0.1:8529",
|
|
57
|
+
username: str = "user-guest",
|
|
58
|
+
password: str = "",
|
|
59
|
+
gen_summary: bool = False,
|
|
60
|
+
datadir: str = None,
|
|
61
|
+
debug: bool = False,
|
|
62
|
+
**kwargs,
|
|
63
|
+
) -> None:
|
|
40
64
|
# save inputs
|
|
41
65
|
if datadir is None:
|
|
42
66
|
self.CWD = os.path.dirname(os.path.abspath("__FILE__"))
|
|
@@ -47,6 +71,9 @@ class Otter(object):
|
|
|
47
71
|
|
|
48
72
|
self.debug = debug
|
|
49
73
|
|
|
74
|
+
if gen_summary:
|
|
75
|
+
self.generate_summary_table(save=True)
|
|
76
|
+
|
|
50
77
|
# make sure the data directory exists
|
|
51
78
|
if not os.path.exists(self.DATADIR):
|
|
52
79
|
try:
|
|
@@ -58,6 +85,9 @@ class Otter(object):
|
|
|
58
85
|
)
|
|
59
86
|
pass
|
|
60
87
|
|
|
88
|
+
connection = Connection(username=username, password=password, arangoURL=url)
|
|
89
|
+
super().__init__(connection, "otter", **kwargs)
|
|
90
|
+
|
|
61
91
|
def get_meta(self, **kwargs) -> Table:
|
|
62
92
|
"""
|
|
63
93
|
Get the metadata of the objects matching the arguments
|
|
@@ -74,6 +104,7 @@ class Otter(object):
|
|
|
74
104
|
"date_reference",
|
|
75
105
|
"distance",
|
|
76
106
|
"classification",
|
|
107
|
+
"reference_alias",
|
|
77
108
|
]
|
|
78
109
|
|
|
79
110
|
return [t[metakeys] for t in self.query(**kwargs)]
|
|
@@ -192,11 +223,16 @@ class Otter(object):
|
|
|
192
223
|
"converted_date_unit",
|
|
193
224
|
"converted_wave_unit",
|
|
194
225
|
"converted_freq_unit",
|
|
226
|
+
"filter_name",
|
|
195
227
|
"obs_type",
|
|
196
228
|
"upperlimit",
|
|
197
229
|
"reference",
|
|
230
|
+
"human_readable_refs",
|
|
198
231
|
]
|
|
199
232
|
|
|
233
|
+
if "upperlimit" not in fullphot:
|
|
234
|
+
fullphot["upperlimit"] = False
|
|
235
|
+
|
|
200
236
|
if not keep_raw:
|
|
201
237
|
if "telescope" in fullphot:
|
|
202
238
|
fullphot = fullphot[keys_to_keep + ["telescope"]]
|
|
@@ -225,6 +261,186 @@ class Otter(object):
|
|
|
225
261
|
return to_ret
|
|
226
262
|
|
|
227
263
|
def query(
|
|
264
|
+
self,
|
|
265
|
+
names: list[str] = None,
|
|
266
|
+
coords: SkyCoord = None,
|
|
267
|
+
radius: float = 5,
|
|
268
|
+
minz: float = None,
|
|
269
|
+
maxz: float = None,
|
|
270
|
+
refs: list[str] = None,
|
|
271
|
+
hasphot: bool = False,
|
|
272
|
+
hasspec: bool = False,
|
|
273
|
+
classification: str = None,
|
|
274
|
+
class_confidence_threshold: float = 0,
|
|
275
|
+
query_private=False,
|
|
276
|
+
**kwargs,
|
|
277
|
+
) -> dict:
|
|
278
|
+
"""
|
|
279
|
+
Searches the arango database table and reads relevant JSON files
|
|
280
|
+
|
|
281
|
+
WARNING! This does not do any conversions for you!
|
|
282
|
+
This is how it differs from the `get_meta` method. Users should prefer to use
|
|
283
|
+
`get_meta`, `getPhot`, and `getSpec` independently because it is a better
|
|
284
|
+
workflow and can return the data in an astropy table with everything in the
|
|
285
|
+
same units.
|
|
286
|
+
|
|
287
|
+
Args:
|
|
288
|
+
names (list[str]): A list of names to get the metadata for
|
|
289
|
+
coords (SkyCoord): An astropy SkyCoord object with coordinates to match to
|
|
290
|
+
radius (float): The radius in arcseconds for a cone search, default is 0.05"
|
|
291
|
+
minz (float): The minimum redshift to search for
|
|
292
|
+
maxz (float): The maximum redshift to search for
|
|
293
|
+
refs (list[str]): A list of ads bibcodes to match to. Will only return
|
|
294
|
+
metadata for transients that have this as a reference.
|
|
295
|
+
hasphot (bool): if True, only returns transients which have photometry.
|
|
296
|
+
hasspec (bool): if True, only return transients that have spectra.
|
|
297
|
+
classification (str): A classification string to search for
|
|
298
|
+
class_confidence_threshold (float): classification confidence cutoff for
|
|
299
|
+
query, between 0 and 1. Default is 0.
|
|
300
|
+
|
|
301
|
+
Return:
|
|
302
|
+
Get all of the raw (unconverted!) data for objects that match the criteria.
|
|
303
|
+
"""
|
|
304
|
+
# write some AQL filters based on the inputs
|
|
305
|
+
query_filters = ""
|
|
306
|
+
|
|
307
|
+
if hasphot is True:
|
|
308
|
+
query_filters += "FILTER 'photometry' IN ATTRIBUTES(transient)\n"
|
|
309
|
+
|
|
310
|
+
if hasspec is True:
|
|
311
|
+
query_filters += "FILTER 'spectra' IN ATTRIBUTES(transient)\n"
|
|
312
|
+
|
|
313
|
+
if classification is not None:
|
|
314
|
+
query_filters += f"""
|
|
315
|
+
FOR subdoc IN transient.classification
|
|
316
|
+
FILTER subdoc.confidence > TO_NUMBER({class_confidence_threshold})
|
|
317
|
+
FILTER subdoc.object_class LIKE '%{classification}%'
|
|
318
|
+
"""
|
|
319
|
+
|
|
320
|
+
if minz is not None:
|
|
321
|
+
sfilt = f"""
|
|
322
|
+
FILTER 'redshift' IN transient.distance[*].distance_type
|
|
323
|
+
LET redshifts1 = (
|
|
324
|
+
FOR val IN transient.distance
|
|
325
|
+
FILTER val.distance_type == 'redshift'
|
|
326
|
+
FILTER TO_NUMBER(val.value) >= {minz}
|
|
327
|
+
RETURN val
|
|
328
|
+
)
|
|
329
|
+
FILTER COUNT(redshifts1) > 0
|
|
330
|
+
"""
|
|
331
|
+
query_filters += sfilt
|
|
332
|
+
if maxz is not None:
|
|
333
|
+
sfilt = f"""
|
|
334
|
+
FILTER 'redshift' IN transient.distance[*].distance_type
|
|
335
|
+
LET redshifts2 = (
|
|
336
|
+
FOR val IN transient.distance
|
|
337
|
+
FILTER val.distance_type == 'redshift'
|
|
338
|
+
FILTER TO_NUMBER(val.value) <= {maxz}
|
|
339
|
+
RETURN val
|
|
340
|
+
)
|
|
341
|
+
FILTER COUNT(redshifts2) > 0
|
|
342
|
+
"""
|
|
343
|
+
query_filters += sfilt
|
|
344
|
+
|
|
345
|
+
if names is not None:
|
|
346
|
+
if isinstance(names, str):
|
|
347
|
+
query_filters += f"""
|
|
348
|
+
FILTER UPPER(transient.name) LIKE UPPER('%{names}%')\n
|
|
349
|
+
"""
|
|
350
|
+
elif isinstance(names, list):
|
|
351
|
+
namefilt = f"""
|
|
352
|
+
FOR name IN {names}
|
|
353
|
+
FILTER name IN transient.name.alias[*].value\n
|
|
354
|
+
"""
|
|
355
|
+
query_filters += namefilt
|
|
356
|
+
else:
|
|
357
|
+
raise Exception("Names must be either a string or list")
|
|
358
|
+
|
|
359
|
+
if refs is not None:
|
|
360
|
+
if isinstance(refs, str): # this is just a single bibcode
|
|
361
|
+
query_filters += f"FILTER {refs} IN transient.reference_alias[*].name"
|
|
362
|
+
elif isinstance(refs, list):
|
|
363
|
+
query_filters += f"""
|
|
364
|
+
FOR ref IN {refs}
|
|
365
|
+
FILTER ref IN transient.reference_alias[*].name
|
|
366
|
+
"""
|
|
367
|
+
else:
|
|
368
|
+
raise Exception("reference list must be either a string or a list")
|
|
369
|
+
|
|
370
|
+
# define the query
|
|
371
|
+
query = f"""
|
|
372
|
+
FOR transient IN transients
|
|
373
|
+
{query_filters}
|
|
374
|
+
RETURN transient
|
|
375
|
+
"""
|
|
376
|
+
|
|
377
|
+
# set batch size to 100 million (for now at least)
|
|
378
|
+
result = self.AQLQuery(query, rawResults=True, batchSize=100_000_000)
|
|
379
|
+
|
|
380
|
+
# now that we have the query results do the RA and Dec queries if they exist
|
|
381
|
+
if coords is not None:
|
|
382
|
+
# get the catalog RAs and Decs to compare against
|
|
383
|
+
query_coords = coords
|
|
384
|
+
good_tdes = []
|
|
385
|
+
|
|
386
|
+
for tde in result:
|
|
387
|
+
for coordinfo in tde["coordinate"]:
|
|
388
|
+
if "ra" in coordinfo and "dec" in coordinfo:
|
|
389
|
+
coord = SkyCoord(
|
|
390
|
+
coordinfo["ra"],
|
|
391
|
+
coordinfo["dec"],
|
|
392
|
+
unit=(coordinfo["ra_units"], coordinfo["dec_units"]),
|
|
393
|
+
)
|
|
394
|
+
elif "l" in coordinfo and "b" in coordinfo:
|
|
395
|
+
# this is galactic
|
|
396
|
+
coord = SkyCoord(
|
|
397
|
+
coordinfo["l"],
|
|
398
|
+
coordinfo["b"],
|
|
399
|
+
unit=(coordinfo["l_units"], coordinfo["b_units"]),
|
|
400
|
+
frame="galactic",
|
|
401
|
+
)
|
|
402
|
+
else:
|
|
403
|
+
raise ValueError(
|
|
404
|
+
"Either needs to have ra and dec or l and b as keys!"
|
|
405
|
+
)
|
|
406
|
+
if query_coords.separation(coord) < radius * u.arcsec:
|
|
407
|
+
good_tdes.append(tde)
|
|
408
|
+
break # we've confirmed this tde is in the cone!
|
|
409
|
+
|
|
410
|
+
arango_query_results = [Transient(t) for t in good_tdes]
|
|
411
|
+
|
|
412
|
+
else:
|
|
413
|
+
arango_query_results = [Transient(res) for res in result.result]
|
|
414
|
+
|
|
415
|
+
if not query_private:
|
|
416
|
+
return arango_query_results
|
|
417
|
+
|
|
418
|
+
private_results = self._query_datadir(
|
|
419
|
+
names=names,
|
|
420
|
+
coords=coords,
|
|
421
|
+
radius=radius,
|
|
422
|
+
minz=minz,
|
|
423
|
+
maxz=maxz,
|
|
424
|
+
refs=refs,
|
|
425
|
+
hasphot=hasphot,
|
|
426
|
+
hasspec=hasspec,
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
partially_merged = deepcopy(arango_query_results)
|
|
430
|
+
new_transients = []
|
|
431
|
+
for jj, t_private in enumerate(private_results):
|
|
432
|
+
for ii, t_public in enumerate(arango_query_results):
|
|
433
|
+
try:
|
|
434
|
+
partially_merged[ii] += t_private
|
|
435
|
+
break
|
|
436
|
+
except TransientMergeError:
|
|
437
|
+
continue
|
|
438
|
+
else:
|
|
439
|
+
new_transients.append(t_private)
|
|
440
|
+
|
|
441
|
+
return partially_merged + new_transients
|
|
442
|
+
|
|
443
|
+
def _query_datadir(
|
|
228
444
|
self,
|
|
229
445
|
names: list[str] = None,
|
|
230
446
|
coords: SkyCoord = None,
|
|
@@ -237,6 +453,9 @@ class Otter(object):
|
|
|
237
453
|
raw: bool = False,
|
|
238
454
|
) -> dict:
|
|
239
455
|
"""
|
|
456
|
+
This is a private method and is here just for the pipeline!!!
|
|
457
|
+
This should not be used by end users!
|
|
458
|
+
|
|
240
459
|
Searches the summary.csv table and reads relevant JSON files
|
|
241
460
|
|
|
242
461
|
WARNING! This does not do any conversions for you!
|
|
@@ -268,13 +487,7 @@ class Otter(object):
|
|
|
268
487
|
# read in the metdata from all json files
|
|
269
488
|
# this could be dangerous later on!!
|
|
270
489
|
allfiles = glob.glob(os.path.join(self.DATADIR, "*.json"))
|
|
271
|
-
jsondata = []
|
|
272
|
-
|
|
273
|
-
# read the data from all the json files and convert to Transients
|
|
274
|
-
for jsonfile in allfiles:
|
|
275
|
-
with open(jsonfile, "r") as j:
|
|
276
|
-
t = Transient(json.load(j))
|
|
277
|
-
jsondata.append(t.get_meta())
|
|
490
|
+
jsondata = [self.load_file(jsonfile) for jsonfile in allfiles]
|
|
278
491
|
|
|
279
492
|
return jsondata
|
|
280
493
|
|
|
@@ -285,6 +498,8 @@ class Otter(object):
|
|
|
285
498
|
|
|
286
499
|
# then read and query the summary table
|
|
287
500
|
summary = pd.read_csv(summary_table)
|
|
501
|
+
if len(summary) == 0:
|
|
502
|
+
return []
|
|
288
503
|
|
|
289
504
|
# coordinate search first
|
|
290
505
|
if coords is not None:
|
|
@@ -356,6 +571,85 @@ class Otter(object):
|
|
|
356
571
|
|
|
357
572
|
return outdata
|
|
358
573
|
|
|
574
|
+
def upload(self, json_data, collection="vetting", testing=False) -> Document:
|
|
575
|
+
"""
|
|
576
|
+
Upload json_data to collection
|
|
577
|
+
|
|
578
|
+
Args:
|
|
579
|
+
json_data [dict] : A dictionary of the json data to upload to Otter
|
|
580
|
+
collection [str] : The collection to upload to
|
|
581
|
+
testing [bool] : Default is False
|
|
582
|
+
|
|
583
|
+
Returns:
|
|
584
|
+
The pyArango document that was uplaoded
|
|
585
|
+
"""
|
|
586
|
+
|
|
587
|
+
# now add the document
|
|
588
|
+
doc = self[collection].createDocument(json_data)
|
|
589
|
+
if not testing:
|
|
590
|
+
doc.save()
|
|
591
|
+
return doc
|
|
592
|
+
|
|
593
|
+
def upload_private(self, collection="vetting", testing=False) -> None:
|
|
594
|
+
"""
|
|
595
|
+
Upload the local/private data stored in self.DATADIR to the vetting collection
|
|
596
|
+
(like a SQL table) in the central arangodb document database.
|
|
597
|
+
|
|
598
|
+
WARNING! This will make any data in self.DATADIR public! Please double check
|
|
599
|
+
before using this method!
|
|
600
|
+
|
|
601
|
+
Args:
|
|
602
|
+
collection (str) : The collection to add the documents to. Default is
|
|
603
|
+
"vetting" where the documents will then be vetted by
|
|
604
|
+
our team.
|
|
605
|
+
testing (bool) : True if don't actually upload, False is default
|
|
606
|
+
|
|
607
|
+
Returns:
|
|
608
|
+
If testing is false (the default), returns the arangodb upload result. If
|
|
609
|
+
testing is true, returns the list of merged dictionaries that would get
|
|
610
|
+
uploaded.
|
|
611
|
+
|
|
612
|
+
Raises:
|
|
613
|
+
OtterLimitationError: If some objects in OTTER are within 5" we can't figure
|
|
614
|
+
out which ones to merge with which ones.
|
|
615
|
+
|
|
616
|
+
"""
|
|
617
|
+
|
|
618
|
+
if not self.hasCollection(collection):
|
|
619
|
+
raise ValueError(f"{collection} not in {self}!")
|
|
620
|
+
|
|
621
|
+
local_data = self._query_datadir()
|
|
622
|
+
docs = []
|
|
623
|
+
for t in local_data:
|
|
624
|
+
res = self.query(coords=t.get_skycoord())
|
|
625
|
+
|
|
626
|
+
if len(res) > 1:
|
|
627
|
+
raise OtterLimitationError("Some objects in Otter are too close!")
|
|
628
|
+
|
|
629
|
+
elif len(res) == 1:
|
|
630
|
+
# this object exists in otter already, let's grab the transient data and
|
|
631
|
+
# merge the files
|
|
632
|
+
merged = t + res[0]
|
|
633
|
+
|
|
634
|
+
# copy over the special arangodb keys
|
|
635
|
+
merged["_key"] = res[0]["_key"]
|
|
636
|
+
merged["_id"] = res[0]["_id"]
|
|
637
|
+
|
|
638
|
+
# we also have to delete the document from the OTTER database
|
|
639
|
+
doc = self.fetchDocument(merged["_id"])
|
|
640
|
+
if not testing:
|
|
641
|
+
doc.delete()
|
|
642
|
+
else:
|
|
643
|
+
print(f"Would delete\n{doc}")
|
|
644
|
+
|
|
645
|
+
else:
|
|
646
|
+
# this means the object doesn't exist in otter already
|
|
647
|
+
merged = t
|
|
648
|
+
|
|
649
|
+
docs.append(self.upload(merged, collection=collection, testing=testing))
|
|
650
|
+
|
|
651
|
+
return docs
|
|
652
|
+
|
|
359
653
|
def save(self, schema: list[dict], testing=False) -> None:
|
|
360
654
|
"""
|
|
361
655
|
Upload all the data in the given list of schemas.
|
|
@@ -380,7 +674,7 @@ class Otter(object):
|
|
|
380
674
|
print(transient["name/default_name"])
|
|
381
675
|
|
|
382
676
|
coord = transient.get_skycoord()
|
|
383
|
-
res = self.
|
|
677
|
+
res = self._query_datadir(coords=coord)
|
|
384
678
|
|
|
385
679
|
if len(res) == 0:
|
|
386
680
|
# This is a new object to upload
|
|
@@ -433,7 +727,7 @@ class Otter(object):
|
|
|
433
727
|
if isinstance(schema, Transient):
|
|
434
728
|
schema = dict(schema)
|
|
435
729
|
|
|
436
|
-
out = json.dumps(schema, indent=4)
|
|
730
|
+
out = json.dumps(schema, indent=4, default=_np_encoder)
|
|
437
731
|
# out = '[' + out
|
|
438
732
|
# out += ']'
|
|
439
733
|
|
|
@@ -473,10 +767,14 @@ class Otter(object):
|
|
|
473
767
|
}
|
|
474
768
|
|
|
475
769
|
if "date_reference" in t:
|
|
476
|
-
|
|
770
|
+
date_types = {d["date_type"] for d in t["date_reference"]}
|
|
771
|
+
if "discovery" in date_types:
|
|
772
|
+
row["discovery_date"] = t.get_discovery_date()
|
|
477
773
|
|
|
478
774
|
if "distance" in t:
|
|
479
|
-
|
|
775
|
+
dist_types = {d["distance_type"] for d in t["distance"]}
|
|
776
|
+
if "redshift" in dist_types:
|
|
777
|
+
row["z"] = t.get_redshift()
|
|
480
778
|
|
|
481
779
|
row["hasPhot"] = "photometry" in t
|
|
482
780
|
row["hasSpec"] = "spectra" in t
|
|
@@ -490,3 +788,457 @@ class Otter(object):
|
|
|
490
788
|
alljsons.to_csv(os.path.join(self.DATADIR, "summary.csv"))
|
|
491
789
|
|
|
492
790
|
return alljsons
|
|
791
|
+
|
|
792
|
+
@staticmethod
|
|
793
|
+
def from_csvs(
|
|
794
|
+
metafile: str,
|
|
795
|
+
photfile: str = None,
|
|
796
|
+
local_outpath: str = "private_otter_data",
|
|
797
|
+
db: Otter = None,
|
|
798
|
+
) -> Otter:
|
|
799
|
+
"""
|
|
800
|
+
Converts private metadata and photometry csvs to an Otter object stored
|
|
801
|
+
*locally* so you don't need to worry about accidentally uploading them to the
|
|
802
|
+
real Otter database.
|
|
803
|
+
|
|
804
|
+
Args:
|
|
805
|
+
metafile (str) : String filepath or string io csv object of the csv metadata
|
|
806
|
+
photfile (str) : String filepath or string io csv object of the csv
|
|
807
|
+
photometry
|
|
808
|
+
local_outpath (str) : The outpath to write the OTTER json files to
|
|
809
|
+
db (Otter) : An Otter instance to add the local_outpath to for querying.
|
|
810
|
+
This keyword can be useful if you have special permission for
|
|
811
|
+
the otter database and want to upload your private data
|
|
812
|
+
|
|
813
|
+
Returns:
|
|
814
|
+
An Otter object where the json files are stored locally
|
|
815
|
+
"""
|
|
816
|
+
# read in the metadata and photometry file
|
|
817
|
+
meta = pd.read_csv(metafile)
|
|
818
|
+
phot = None
|
|
819
|
+
if photfile is not None:
|
|
820
|
+
phot = pd.read_csv(photfile)
|
|
821
|
+
|
|
822
|
+
# we need to generate columns of wave_eff and freq_eff
|
|
823
|
+
wave_eff = []
|
|
824
|
+
freq_eff = []
|
|
825
|
+
wave_eff_unit = u.nm
|
|
826
|
+
freq_eff_unit = u.GHz
|
|
827
|
+
for val, unit in zip(phot.filter_eff, phot.filter_eff_units):
|
|
828
|
+
wave_eff.append(
|
|
829
|
+
(val * u.Unit(unit))
|
|
830
|
+
.to(wave_eff_unit, equivalencies=u.spectral())
|
|
831
|
+
.value
|
|
832
|
+
)
|
|
833
|
+
freq_eff.append(
|
|
834
|
+
(val * u.Unit(unit))
|
|
835
|
+
.to(freq_eff_unit, equivalencies=u.spectral())
|
|
836
|
+
.value
|
|
837
|
+
)
|
|
838
|
+
|
|
839
|
+
phot["band_eff_wave"] = wave_eff
|
|
840
|
+
phot["band_eff_wave_unit"] = str(wave_eff_unit)
|
|
841
|
+
phot["band_eff_freq"] = freq_eff
|
|
842
|
+
phot["band_eff_freq_unit"] = str(freq_eff_unit)
|
|
843
|
+
|
|
844
|
+
if not os.path.exists(local_outpath):
|
|
845
|
+
os.mkdir(local_outpath)
|
|
846
|
+
|
|
847
|
+
# drop duplicated names in meta and keep the first
|
|
848
|
+
meta = meta.drop_duplicates(subset="name", keep="first")
|
|
849
|
+
|
|
850
|
+
# merge the meta and phot data
|
|
851
|
+
if phot is not None:
|
|
852
|
+
data = pd.merge(phot, meta, on="name", how="inner")
|
|
853
|
+
else:
|
|
854
|
+
data = meta
|
|
855
|
+
|
|
856
|
+
# perform some data checks
|
|
857
|
+
assert (
|
|
858
|
+
len(data[pd.isna(data.ra)].name.unique()) == 0
|
|
859
|
+
), "Missing some RA and Decs, please check the input files!"
|
|
860
|
+
if phot is not None:
|
|
861
|
+
for name in meta.name:
|
|
862
|
+
assert len(data[data.name == name]) == len(
|
|
863
|
+
phot[phot.name == name]
|
|
864
|
+
), f"failed on {name}"
|
|
865
|
+
|
|
866
|
+
# actually do the data conversion to OTTER
|
|
867
|
+
all_jsons = []
|
|
868
|
+
for name, tde in data.groupby("name"):
|
|
869
|
+
json = {}
|
|
870
|
+
tde = tde.reset_index()
|
|
871
|
+
|
|
872
|
+
# name first
|
|
873
|
+
json["name"] = dict(
|
|
874
|
+
default_name=name,
|
|
875
|
+
alias=[dict(value=name, reference=[tde.coord_bibcode[0]])],
|
|
876
|
+
)
|
|
877
|
+
|
|
878
|
+
# coordinates
|
|
879
|
+
json["coordinate"] = [
|
|
880
|
+
dict(
|
|
881
|
+
ra=tde.ra[0],
|
|
882
|
+
dec=tde.dec[0],
|
|
883
|
+
ra_units=tde.ra_unit[0],
|
|
884
|
+
dec_units=tde.dec_unit[0],
|
|
885
|
+
reference=[tde.coord_bibcode[0]],
|
|
886
|
+
coordinate_type="equitorial",
|
|
887
|
+
)
|
|
888
|
+
]
|
|
889
|
+
|
|
890
|
+
### distance info
|
|
891
|
+
json["distance"] = []
|
|
892
|
+
|
|
893
|
+
# redshift
|
|
894
|
+
if "redshift" in tde and not np.any(pd.isna(tde["redshift"])):
|
|
895
|
+
json["distance"].append(
|
|
896
|
+
dict(
|
|
897
|
+
value=tde.redshift[0],
|
|
898
|
+
reference=[tde.redshift_bibcode[0]],
|
|
899
|
+
computed=False,
|
|
900
|
+
distance_type="redshift",
|
|
901
|
+
)
|
|
902
|
+
)
|
|
903
|
+
|
|
904
|
+
# luminosity distance
|
|
905
|
+
if "luminosity_distance" in tde and not np.any(
|
|
906
|
+
pd.isna(tde["luminosity_distance"])
|
|
907
|
+
):
|
|
908
|
+
json["distance"].append(
|
|
909
|
+
value=tde.luminosity_distance[0],
|
|
910
|
+
reference=[tde.luminosity_distance_bibcode[0]],
|
|
911
|
+
unit=tde.luminosity_distance_unit[0],
|
|
912
|
+
computed=False,
|
|
913
|
+
distance_type="luminosity",
|
|
914
|
+
)
|
|
915
|
+
|
|
916
|
+
# comoving distance
|
|
917
|
+
if "comoving_distance" in tde and not np.any(
|
|
918
|
+
pd.isna(tde["comoving_distance"])
|
|
919
|
+
):
|
|
920
|
+
json["distance"].append(
|
|
921
|
+
value=tde.comoving_distance[0],
|
|
922
|
+
reference=[tde.comoving_distance_bibcode[0]],
|
|
923
|
+
unit=tde.comoving_distance_unit[0],
|
|
924
|
+
computed=False,
|
|
925
|
+
distance_type="comoving",
|
|
926
|
+
)
|
|
927
|
+
|
|
928
|
+
# remove the distance list if it is empty still
|
|
929
|
+
if len(json["distance"]) == 0:
|
|
930
|
+
del json["distance"]
|
|
931
|
+
|
|
932
|
+
### Classification information that is in the csvs
|
|
933
|
+
# classification
|
|
934
|
+
if "classification" in tde:
|
|
935
|
+
json["classification"] = [
|
|
936
|
+
dict(
|
|
937
|
+
object_class=tde.classification[0],
|
|
938
|
+
confidence=1, # we know this is at least an tde
|
|
939
|
+
reference=[tde.classification_bibcode[0]],
|
|
940
|
+
)
|
|
941
|
+
]
|
|
942
|
+
|
|
943
|
+
# discovery date
|
|
944
|
+
# print(tde)
|
|
945
|
+
if "discovery_date" in tde and not np.any(pd.isna(tde.discovery_date)):
|
|
946
|
+
json["date_reference"] = [
|
|
947
|
+
dict(
|
|
948
|
+
value=str(tde.discovery_date.tolist()[0]).strip(),
|
|
949
|
+
date_format=tde.discovery_date_format.tolist()[0].lower(),
|
|
950
|
+
reference=tde.discovery_date_bibcode.tolist(),
|
|
951
|
+
computed=False,
|
|
952
|
+
date_type="discovery",
|
|
953
|
+
)
|
|
954
|
+
]
|
|
955
|
+
|
|
956
|
+
# host information
|
|
957
|
+
if "host_ref" in tde and not np.any(pd.isna(tde.host_ref)):
|
|
958
|
+
host_info = dict(
|
|
959
|
+
host_name=tde.host_name.tolist()[0].strip(),
|
|
960
|
+
host_ra=tde.host_ra.tolist()[0],
|
|
961
|
+
host_dec=tde.host_dec.tolist()[0],
|
|
962
|
+
host_ra_units=tde.host_ra_unit.tolist()[0],
|
|
963
|
+
host_dec_units=tde.host_dec_unit.tolist()[0],
|
|
964
|
+
reference=[tde.host_ref.tolist()[0]],
|
|
965
|
+
)
|
|
966
|
+
|
|
967
|
+
if not pd.isna(tde.host_redshift.tolist()[0]):
|
|
968
|
+
host_info["host_z"] = tde.host_redshift.tolist()[0]
|
|
969
|
+
|
|
970
|
+
if "host" in json:
|
|
971
|
+
json["host"].append(host_info)
|
|
972
|
+
else:
|
|
973
|
+
json["host"] = [host_info]
|
|
974
|
+
|
|
975
|
+
# comments
|
|
976
|
+
if "comment" in tde and not np.any(pd.isna(tde.comment)):
|
|
977
|
+
if "schema_version" not in json:
|
|
978
|
+
json["schema_version"] = {}
|
|
979
|
+
json["schema_version"]["comment"] = tde.comment.tolist()[0]
|
|
980
|
+
|
|
981
|
+
# skip the photometry code if there is no photometry file
|
|
982
|
+
# if there is a photometry file then we want to convert it below
|
|
983
|
+
phot_sources = []
|
|
984
|
+
if phot is not None:
|
|
985
|
+
tde["obs_type"] = [
|
|
986
|
+
freq_to_obstype(vv * u.Unit(uu))
|
|
987
|
+
for vv, uu in zip(
|
|
988
|
+
tde.band_eff_freq.values,
|
|
989
|
+
tde.band_eff_freq_unit.values,
|
|
990
|
+
)
|
|
991
|
+
]
|
|
992
|
+
|
|
993
|
+
unique_filter_keys = []
|
|
994
|
+
index_for_match = []
|
|
995
|
+
json["photometry"] = []
|
|
996
|
+
|
|
997
|
+
if "telescope" in tde:
|
|
998
|
+
to_grpby = ["bibcode", "telescope", "obs_type"]
|
|
999
|
+
else:
|
|
1000
|
+
to_grpby = ["bibcode", "obs_type"]
|
|
1001
|
+
|
|
1002
|
+
for grp_keys, p in tde.groupby(to_grpby, dropna=False):
|
|
1003
|
+
if len(grp_keys) == 3:
|
|
1004
|
+
src, tele, obstype = grp_keys
|
|
1005
|
+
else:
|
|
1006
|
+
src, obstype = grp_keys
|
|
1007
|
+
tele = None
|
|
1008
|
+
|
|
1009
|
+
if src not in phot_sources:
|
|
1010
|
+
phot_sources.append(src)
|
|
1011
|
+
|
|
1012
|
+
if len(np.unique(p.flux_unit)) == 1:
|
|
1013
|
+
raw_units = p.flux_unit.tolist()[0]
|
|
1014
|
+
else:
|
|
1015
|
+
raw_units = p.flux_unit.tolist()
|
|
1016
|
+
|
|
1017
|
+
# add a column to phot with the unique filter key
|
|
1018
|
+
if obstype == "radio":
|
|
1019
|
+
filter_uq_key = (
|
|
1020
|
+
p.band_eff_freq.astype(str)
|
|
1021
|
+
+ p.band_eff_freq_unit.astype(str)
|
|
1022
|
+
).tolist()
|
|
1023
|
+
|
|
1024
|
+
elif obstype in ("uvoir", "xray"):
|
|
1025
|
+
filter_uq_key = p["filter"].astype(str).tolist()
|
|
1026
|
+
|
|
1027
|
+
else:
|
|
1028
|
+
raise ValueError("not prepared for this obstype!")
|
|
1029
|
+
|
|
1030
|
+
unique_filter_keys += filter_uq_key
|
|
1031
|
+
index_for_match += p.index.tolist()
|
|
1032
|
+
|
|
1033
|
+
if "upperlimit" not in p:
|
|
1034
|
+
p["upperlimit"] = False
|
|
1035
|
+
|
|
1036
|
+
json_phot = dict(
|
|
1037
|
+
reference=src,
|
|
1038
|
+
raw=p.flux.astype(float).tolist(),
|
|
1039
|
+
raw_err=p.flux_err.astype(float).tolist(),
|
|
1040
|
+
raw_units=raw_units,
|
|
1041
|
+
date=p.date.tolist(),
|
|
1042
|
+
date_format=p.date_format.tolist(),
|
|
1043
|
+
upperlimit=p.upperlimit.tolist(),
|
|
1044
|
+
filter_key=filter_uq_key,
|
|
1045
|
+
obs_type=obstype,
|
|
1046
|
+
)
|
|
1047
|
+
|
|
1048
|
+
if not pd.isna(tele):
|
|
1049
|
+
json_phot["telescope"] = tele
|
|
1050
|
+
|
|
1051
|
+
if pd.isna(tele) and obstype == "xray":
|
|
1052
|
+
raise ValueError("The telescope is required for X-ray data!")
|
|
1053
|
+
|
|
1054
|
+
# check the minimum and maximum filter values
|
|
1055
|
+
if obstype == "xray" and (
|
|
1056
|
+
"filter_min" not in p or "filter_max" not in p
|
|
1057
|
+
):
|
|
1058
|
+
raise ValueError(
|
|
1059
|
+
"Minimum and maximum filters required for X-ray data!"
|
|
1060
|
+
)
|
|
1061
|
+
|
|
1062
|
+
# check optional keys
|
|
1063
|
+
optional_keys = [
|
|
1064
|
+
"date_err",
|
|
1065
|
+
"sigma",
|
|
1066
|
+
"instrument",
|
|
1067
|
+
"phot_type",
|
|
1068
|
+
"exptime",
|
|
1069
|
+
"aperature",
|
|
1070
|
+
"observer",
|
|
1071
|
+
"reducer",
|
|
1072
|
+
"pipeline",
|
|
1073
|
+
]
|
|
1074
|
+
for k in optional_keys:
|
|
1075
|
+
if k in p and not np.all(pd.isna(p[k])):
|
|
1076
|
+
# fill the nan values
|
|
1077
|
+
# this is to match with the official json format
|
|
1078
|
+
# and works with arangodb document structure
|
|
1079
|
+
p[k].fillna("null", inplace=True)
|
|
1080
|
+
|
|
1081
|
+
json_phot[k] = p[k].tolist()
|
|
1082
|
+
|
|
1083
|
+
# handle more detailed uncertainty information
|
|
1084
|
+
raw_err_detail = {}
|
|
1085
|
+
for key in ["statistical_err", "systematic_err", "iss_err"]:
|
|
1086
|
+
if key in p and not np.all(pd.isna(p[key])):
|
|
1087
|
+
k = key.split("_")[0]
|
|
1088
|
+
|
|
1089
|
+
# fill the nan values
|
|
1090
|
+
# this is to match with the official json format
|
|
1091
|
+
# and works with arangodb document structure
|
|
1092
|
+
p[key].fillna(0, inplace=True)
|
|
1093
|
+
|
|
1094
|
+
raw_err_detail[k] = p[key].tolist()
|
|
1095
|
+
|
|
1096
|
+
if len(raw_err_detail) > 0:
|
|
1097
|
+
json_phot["raw_err_detail"] = raw_err_detail
|
|
1098
|
+
|
|
1099
|
+
# check the possible corrections
|
|
1100
|
+
corrs = ["val_k", "val_s", "val_host", "val_av", "val_hostav"]
|
|
1101
|
+
for c in corrs:
|
|
1102
|
+
bool_v_key = c.replace("val", "corr")
|
|
1103
|
+
json_phot[c] = False
|
|
1104
|
+
|
|
1105
|
+
if c in p:
|
|
1106
|
+
# fill the nan values
|
|
1107
|
+
# this is to match with the official json format
|
|
1108
|
+
# and works with arangodb document structure
|
|
1109
|
+
p[c].fillna("null", inplace=True)
|
|
1110
|
+
|
|
1111
|
+
json_phot[c] = p[c].tolist()
|
|
1112
|
+
json_phot[bool_v_key] = [v != "null" for v in json_phot[c]]
|
|
1113
|
+
|
|
1114
|
+
json["photometry"].append(json_phot)
|
|
1115
|
+
|
|
1116
|
+
tde["filter_uq_key"] = pd.Series(
|
|
1117
|
+
unique_filter_keys, index=index_for_match
|
|
1118
|
+
)
|
|
1119
|
+
|
|
1120
|
+
# filter alias
|
|
1121
|
+
# radio filters first
|
|
1122
|
+
filter_keys1 = ["filter_uq_key", "band_eff_wave", "band_eff_wave_unit"]
|
|
1123
|
+
if "filter_min" in tde:
|
|
1124
|
+
filter_keys1.append("filter_min")
|
|
1125
|
+
if "filter_max" in tde:
|
|
1126
|
+
filter_keys1.append("filter_max")
|
|
1127
|
+
|
|
1128
|
+
filter_map = (
|
|
1129
|
+
tde[filter_keys1].drop_duplicates().set_index("filter_uq_key")
|
|
1130
|
+
) # .to_dict(orient='index')
|
|
1131
|
+
try:
|
|
1132
|
+
filter_map_radio = filter_map.to_dict(orient="index")
|
|
1133
|
+
except Exception:
|
|
1134
|
+
print(filter_map)
|
|
1135
|
+
print(name)
|
|
1136
|
+
raise Exception
|
|
1137
|
+
|
|
1138
|
+
json["filter_alias"] = []
|
|
1139
|
+
for filt, val in filter_map_radio.items():
|
|
1140
|
+
obs_type = freq_to_obstype(
|
|
1141
|
+
float(val["band_eff_wave"]) * u.Unit(val["band_eff_wave_unit"])
|
|
1142
|
+
)
|
|
1143
|
+
if obs_type == "radio":
|
|
1144
|
+
filter_name = freq_to_band(
|
|
1145
|
+
(
|
|
1146
|
+
float(val["band_eff_wave"])
|
|
1147
|
+
* u.Unit(val["band_eff_wave_unit"])
|
|
1148
|
+
).to(u.GHz, equivalencies=u.spectral())
|
|
1149
|
+
)
|
|
1150
|
+
else:
|
|
1151
|
+
filter_name = filt
|
|
1152
|
+
|
|
1153
|
+
filter_alias_dict = dict(
|
|
1154
|
+
filter_key=filt,
|
|
1155
|
+
filter_name=filter_name,
|
|
1156
|
+
wave_eff=float(val["band_eff_wave"]),
|
|
1157
|
+
wave_units=val["band_eff_wave_unit"],
|
|
1158
|
+
)
|
|
1159
|
+
|
|
1160
|
+
if "filter_min" in val:
|
|
1161
|
+
filter_alias_dict["wave_min"] = (
|
|
1162
|
+
val["filter_min"] * u.Unit(phot.filter_eff_units)
|
|
1163
|
+
).to(
|
|
1164
|
+
u.Unit(
|
|
1165
|
+
val["band_eff_wave_unit"], equivalencies=u.spectral()
|
|
1166
|
+
)
|
|
1167
|
+
)
|
|
1168
|
+
|
|
1169
|
+
if "filter_max" in val:
|
|
1170
|
+
filter_alias_dict["wave_max"] = (
|
|
1171
|
+
val["filter_max"] * u.Unit(phot.filter_eff_units)
|
|
1172
|
+
).to(
|
|
1173
|
+
u.Unit(
|
|
1174
|
+
val["band_eff_wave_unit"], equivalencies=u.spectral()
|
|
1175
|
+
)
|
|
1176
|
+
)
|
|
1177
|
+
|
|
1178
|
+
json["filter_alias"].append(filter_alias_dict)
|
|
1179
|
+
|
|
1180
|
+
# reference alias
|
|
1181
|
+
# gather all the bibcodes
|
|
1182
|
+
all_bibcodes = [tde.coord_bibcode[0]] + phot_sources
|
|
1183
|
+
if (
|
|
1184
|
+
"redshift_bibcode" in tde
|
|
1185
|
+
and tde.redshift_bibcode[0] not in all_bibcodes
|
|
1186
|
+
and not np.any(pd.isna(tde.redshift))
|
|
1187
|
+
):
|
|
1188
|
+
all_bibcodes.append(tde.redshift_bibcode[0])
|
|
1189
|
+
|
|
1190
|
+
if (
|
|
1191
|
+
"luminosity_distance_bibcode" in tde
|
|
1192
|
+
and tde.luminosity_distance_bibcode[0] not in all_bibcodes
|
|
1193
|
+
and not np.any(pd.isna(tde.luminosity_distance))
|
|
1194
|
+
):
|
|
1195
|
+
all_bibcodes.append(tde.luminosity_distance_bibcode[0])
|
|
1196
|
+
|
|
1197
|
+
if (
|
|
1198
|
+
"comoving_distance_bibcode" in tde
|
|
1199
|
+
and tde.comoving_distance_bibcode[0] not in all_bibcodes
|
|
1200
|
+
and not np.any(pd.isna(tde.comoving_distance))
|
|
1201
|
+
):
|
|
1202
|
+
all_bibcodes.append(tde.comoving_distance_bibcode[0])
|
|
1203
|
+
|
|
1204
|
+
if (
|
|
1205
|
+
"discovery_date_bibcode" in tde
|
|
1206
|
+
and tde.discovery_date_bibcode[0] not in all_bibcodes
|
|
1207
|
+
and not np.any(pd.isna(tde.discovery_date))
|
|
1208
|
+
):
|
|
1209
|
+
all_bibcodes.append(tde.discovery_date_bibcode[0])
|
|
1210
|
+
|
|
1211
|
+
if (
|
|
1212
|
+
"classification_bibcode" in tde
|
|
1213
|
+
and tde.classification_bibcode[0] not in all_bibcodes
|
|
1214
|
+
and not np.any(pd.isna(tde.classification))
|
|
1215
|
+
):
|
|
1216
|
+
all_bibcodes.append(tde.classification_bibcode[0])
|
|
1217
|
+
|
|
1218
|
+
if (
|
|
1219
|
+
"host_bibcode" in tde
|
|
1220
|
+
and tde.host_bibcode not in all_bibcodes
|
|
1221
|
+
and not np.any(pd.isna(tde.host_bibcode))
|
|
1222
|
+
):
|
|
1223
|
+
all_bibcodes.append(tde.host_bibcode[0])
|
|
1224
|
+
|
|
1225
|
+
# find the hrn's for all of these bibcodes
|
|
1226
|
+
uq_bibcodes, all_hrns = bibcode_to_hrn(all_bibcodes)
|
|
1227
|
+
|
|
1228
|
+
# package these into the reference alias
|
|
1229
|
+
json["reference_alias"] = [
|
|
1230
|
+
dict(name=name, human_readable_name=hrn)
|
|
1231
|
+
for name, hrn in zip(uq_bibcodes, all_hrns)
|
|
1232
|
+
]
|
|
1233
|
+
|
|
1234
|
+
all_jsons.append(Transient(json))
|
|
1235
|
+
|
|
1236
|
+
if db is None:
|
|
1237
|
+
db = Otter(datadir=local_outpath)
|
|
1238
|
+
else:
|
|
1239
|
+
db.datadir = local_outpath
|
|
1240
|
+
|
|
1241
|
+
# always save this document as a new one
|
|
1242
|
+
db.save(all_jsons)
|
|
1243
|
+
db.generate_summary_table(save=True)
|
|
1244
|
+
return db
|