sdss-almanac 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
almanac/__init__.py ADDED
@@ -0,0 +1,4 @@
1
+ __version__ = "0.2.1"
2
+
3
+ from .config import config, get_config_path, ConfigManager
4
+ from .logger import logger
almanac/apogee.py ADDED
@@ -0,0 +1,300 @@
1
+ import os
2
+ import numpy as np
3
+ from glob import glob
4
+ from subprocess import check_output
5
+ from astropy.table import Table, hstack, unique
6
+ from itertools import groupby
7
+ from typing import Optional, Tuple, Dict, List, Set, Generator, Any, Union
8
+
9
+ from scipy.spatial.distance import cdist
10
+
11
+ from almanac import config, logger, utils
12
+ from almanac.data_models import Exposure
13
+ from almanac.data_models.types import ImageType
14
+ from almanac.data_models.utils import mjd_to_exposure_prefix, get_exposure_path
15
+
16
+ def get_unique_exposure_paths(paths: List[str]) -> List[str]:
17
+ """
18
+ Process a list of file paths to find unique exposures and determine which chips are available.
19
+
20
+ :param paths:
21
+ List of file paths to APOGEE exposure files.
22
+
23
+ :returns:
24
+ List of exposure paths.
25
+ """
26
+
27
+ chip_mapping = {}
28
+ for path in paths:
29
+ _, observatory, mjd, basename = path.rsplit("/", 3)
30
+ prefix, chip, exposure_apz = basename.split("-")
31
+
32
+ key = (observatory, mjd, exposure_apz)
33
+ chip_mapping.setdefault(key, [prefix, [False, False, False]])
34
+ index = "abc".index(chip)
35
+ chip_mapping[key][1][index] = True
36
+
37
+ unique_exposure_paths = []
38
+ for (observatory, mjd, exposure_apz), (prefix, chips) in chip_mapping.items():
39
+ chip = "abc"[chips.index(True)]
40
+ path = f"{config.apogee_dir}/{observatory}/{mjd}/{prefix}-{chip}-{exposure_apz}"
41
+ unique_exposure_paths.append(path)
42
+
43
+ return unique_exposure_paths
44
+
45
+
46
+ def get_exposures(observatory: str, mjd: int) -> Generator[Exposure, None, None]:
47
+ """
48
+ Generate exposures taken from a given observatory on a given MJD.
49
+
50
+ :param observatory:
51
+ The observatory name (e.g. "apo").
52
+
53
+ :param mjd:
54
+ The Modified Julian Date.
55
+
56
+ :yields:
57
+ Exposure instances for each unique exposure found on disk.
58
+ """
59
+ paths = glob(get_exposure_path(observatory, mjd, "a?R", "*", "*"))
60
+ return organize_exposures(map(Exposure.from_path, get_unique_exposure_paths(paths)))
61
+
62
+
63
+ def get_expected_number_of_exposures(observatory: str, mjd: int) -> int:
64
+ """
65
+ Query the SDSS database to get the expected exposures for a given observatory and MJD.
66
+ This is useful for identifying missing exposures.
67
+ """
68
+
69
+ if mjd < int(getattr(config.sdssdb_exposure_min_mjd, observatory)):
70
+ return -1
71
+
72
+ from almanac.database import opsdb
73
+ from peewee import fn
74
+
75
+ for model in (opsdb.Exposure, opsdb.ExposureFlavor):
76
+ model._meta.schema = f"opsdb_{observatory}"
77
+
78
+ start, end = map(mjd_to_exposure_prefix, (mjd, mjd + 1))
79
+
80
+ q = (
81
+ opsdb.Exposure.select(
82
+ fn.max(opsdb.Exposure.exposure_no)
83
+ )
84
+ .where(
85
+ (opsdb.Exposure.exposure_no > start)
86
+ & (opsdb.Exposure.exposure_no < end)
87
+ )
88
+ .join(
89
+ opsdb.ExposureFlavor,
90
+ on=(opsdb.ExposureFlavor.pk == opsdb.Exposure.exposure_flavor),
91
+ )
92
+ )
93
+ try:
94
+ return q.scalar() - start
95
+ except:
96
+ return -1
97
+
98
+
99
+ def organize_exposures(exposures: List[Exposure]) -> List[Exposure]:
100
+ """
101
+ Identify any missing exposures (based on non-contiguous exposure numbers)
102
+ and fill them with missing image types.
103
+
104
+ :param exposures:
105
+ A list of `Exposure` instances.
106
+
107
+ :returns:
108
+ A list of organized `Exposure` instances.
109
+ """
110
+
111
+ exposures = sorted(exposures, key=lambda x: x.exposure)
112
+
113
+ if len(exposures) == 0:
114
+ return []
115
+
116
+ observatory, mjd = (exposures[0].observatory, exposures[0].mjd)
117
+
118
+ n_expected = get_expected_number_of_exposures(observatory, mjd)
119
+ max_exposure = max(exposures[-1].exposure, n_expected)
120
+
121
+ organized = []
122
+ for i in range(1, max_exposure + 1):
123
+ if exposures and exposures[0].exposure == i:
124
+ organized.append(exposures.pop(0))
125
+ else:
126
+ organized.append(
127
+ Exposure(
128
+ observatory=observatory,
129
+ exposure=i,
130
+ mjd=mjd,
131
+ image_type="missing"
132
+ )
133
+ )
134
+ return organized
135
+
136
+
137
+ def get_sequences(exposures: List[Exposure], image_type: ImageType, fields: Tuple[str, ...]) -> List[Tuple[int, int]]:
138
+ """
139
+ Get exposure number ranges for sequences of a specific image type.
140
+
141
+ :param exposures:
142
+ Astropy Table containing exposure metadata.
143
+ :param image_type:
144
+ The image type to search for (e.g., "Object", "ArcLamp").
145
+ :param fields:
146
+ Tuple of column names to group exposures by.
147
+ :param require_contiguous:
148
+ Whether to require exposure numbers to be contiguous within groups.
149
+
150
+ :returns:
151
+ List of tuples containing (start_exposure, end_exposure) for each sequence.
152
+ """
153
+ s = list(filter(lambda x: x.image_type == image_type, exposures))
154
+ sequence_exposure_numbers = []
155
+ for v, group in groupby(s, key=lambda x: tuple(getattr(x, f) for f in fields)):
156
+ for si, ei in utils.group_contiguous([e.exposure for e in group]):
157
+ sequence_exposure_numbers.append((si, ei))
158
+ return sequence_exposure_numbers
159
+
160
+
161
+ def get_arclamp_sequences(exposures: List[Exposure]) -> List[Tuple[int, int]]:
162
+ """
163
+ Return a list of tuples indicating the start and end exposure numbers for
164
+ a sequence of arc lamp exposures.
165
+
166
+ :param exposures:
167
+ A list of `Exposure` instances.
168
+
169
+ :returns:
170
+ List of tuples containing (start_exposure, end_exposure) for each arc lamp sequence.
171
+ """
172
+ return get_sequences(exposures, "arclamp", ("dithered_pixels", ))
173
+
174
+
175
+ def get_science_sequences(exposures: List[Exposure]) -> List[Tuple[int, int]]:
176
+ """
177
+ Return a list of tuples indicating the start and end exposure numbers for
178
+ a sequence of science exposures.
179
+
180
+ :param exposures:
181
+ A list of `Exposure` instances.
182
+
183
+ :returns:
184
+ List of tuples containing (start_exposure, end_exposure) for each science sequence.
185
+ """
186
+ return get_sequences(exposures, "object", ("field_id", "plate_id", "config_id", "image_type"))
187
+
188
+
189
+ def get_almanac_data(observatory: str, mjd: int, fibers: bool = False, meta: bool = False):
190
+ """
191
+ Return comprehensive almanac data for all exposures taken from a given observatory on a given MJD.
192
+
193
+ :param observatory:
194
+ The observatory name (e.g. "apo").
195
+ :param mjd:
196
+ The Modified Julian Date.
197
+ :param fibers:
198
+ Whether to include fiber mapping information.
199
+ :param xmatch:
200
+ Whether to perform cross-matching with catalog database.
201
+ :param kwargs:
202
+ Additional keyword arguments passed to other functions.
203
+
204
+ :returns:
205
+ Tuple containing:
206
+ - observatory name
207
+ - MJD
208
+ - A list of exposures
209
+ - Table of exposure data
210
+ - dictionary of sequence indices
211
+ - dictionary of fiber mappings
212
+ """
213
+
214
+ exposures = get_exposures(observatory, mjd)
215
+ sequences = {
216
+ "objects": get_science_sequences(exposures),
217
+ "arclamps": get_arclamp_sequences(exposures),
218
+ }
219
+ if fibers:
220
+ catalogids, twomass_designations = (set(), set())
221
+ # We only need to get targets for one exposure in each science sequence.
222
+ for si, ei in sequences["objects"]:
223
+ exposure = exposures[si - 1]
224
+ for target in exposure.targets:
225
+ if target.expected_to_be_assigned_sdss_id:
226
+ if target.catalogid > 0:
227
+ catalogids.add(target.catalogid)
228
+ else:
229
+ twomass_designations.add(target.twomass_designation)
230
+
231
+ if meta:
232
+ # We will often run `get_almanac_data` in parallel (through multiple processes),
233
+ # so here we are avoiding opening a database connection until the child process starts.
234
+ from almanac.database import is_database_available, catalogdb
235
+
236
+ lookup_catalog = {}
237
+ lookup_twomass = {}
238
+ if catalogids and is_database_available:
239
+ q = (
240
+ catalogdb.SDSS_ID_flat
241
+ .select(
242
+ catalogdb.SDSS_ID_flat.sdss_id,
243
+ catalogdb.SDSS_ID_flat.catalogid
244
+ )
245
+ .where(
246
+ catalogdb.SDSS_ID_flat.catalogid.in_(tuple(catalogids))
247
+ & (catalogdb.SDSS_ID_flat.rank == 1)
248
+ )
249
+ .tuples()
250
+ )
251
+ for sdss_id, catalogid in q:
252
+ lookup_catalog[catalogid] = sdss_id
253
+
254
+ if twomass_designations and is_database_available:
255
+ q = (
256
+ catalogdb.SDSS_ID_flat
257
+ .select(
258
+ catalogdb.SDSS_ID_flat.sdss_id,
259
+ catalogdb.TwoMassPSC.designation
260
+ )
261
+ .join(
262
+ catalogdb.CatalogToTwoMassPSC,
263
+ on=(
264
+ catalogdb.SDSS_ID_flat.catalogid
265
+ == catalogdb.CatalogToTwoMassPSC.catalogid
266
+ ),
267
+ )
268
+ .join(
269
+ catalogdb.TwoMassPSC,
270
+ on=(
271
+ catalogdb.CatalogToTwoMassPSC.target_id
272
+ == catalogdb.TwoMassPSC.pts_key
273
+ ),
274
+ )
275
+ .where(
276
+ catalogdb.TwoMassPSC.designation.in_(
277
+ tuple(twomass_designations)
278
+ )
279
+ )
280
+ .tuples()
281
+ )
282
+ for sdss_id, designation in q:
283
+ lookup_twomass[designation] = sdss_id
284
+
285
+ # Add sdss_id to targets
286
+ for si, ei in sequences["objects"]:
287
+ for i in range(si - 1, ei):
288
+ exposure = exposures[i]
289
+ for target in exposure.targets:
290
+
291
+ matches = [
292
+ lookup_catalog.get(target.catalogid, -1),
293
+ lookup_twomass.get(target.twomass_designation, -1)
294
+ ]
295
+ for match in matches:
296
+ if match > 0:
297
+ target.sdss_id = match
298
+ break
299
+
300
+ return (observatory, mjd, exposures, sequences)