remove-starfield 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ from . import utils
2
+ from .processor import ImageHolder, ImageProcessor
3
+ from . import reducers
4
+ from .subtracted_image import SubtractedImage
5
+ from .starfield import Starfield
6
+ from . import star_tools
7
+
8
+ from .core import build_starfield_estimate
9
+
10
+ __version__ = "0.0.1"
@@ -0,0 +1,407 @@
1
+ from collections.abc import Iterable
2
+ from itertools import repeat
3
+ from math import ceil, floor
4
+ import multiprocessing
5
+ import random
6
+
7
+ from astropy.wcs import WCS
8
+ import numpy as np
9
+ import reproject
10
+ from tqdm.auto import tqdm
11
+ import warnings
12
+
13
+ from . import ImageProcessor, Starfield, utils
14
+ from .reducers import StackReducer, GaussianReducer
15
+
16
+
17
+ def build_starfield_estimate(
18
+ files: Iterable[str],
19
+ frame_count: bool=False,
20
+ attribution: bool=False,
21
+ processor: ImageProcessor=ImageProcessor(),
22
+ reducer: StackReducer=GaussianReducer(),
23
+ ra_bounds: Iterable[float]=None,
24
+ dec_bounds: Iterable[float]=None,
25
+ target_mem_usage: float=10,
26
+ map_scale: float=0.04,
27
+ stack_all: bool=False,
28
+ shuffle: bool=True) -> Starfield:
29
+ """Generate a starfield estimate from a set of images
30
+
31
+ This is generally a slow, high-memory-use function, as each image must be
32
+ reprojected into the frame of the output all-sky map, and all reprojected
33
+ images must be held in memory to compute the low-percentile value at each
34
+ pixel. To contain the memory usage, the output map is divided into chunks
35
+ which are each computed separately. This should generally divide up the
36
+ reprojection work as well, but some work (such as the PSF correction, if
37
+ applied during this process) is repeated with each chunk, so there is a
38
+ speed--memory tradeoff.
39
+
40
+ Parameters
41
+ ----------
42
+ files : ``Iterable`` of ``str``
43
+ A list of file paths, referring to the set of input images
44
+ percentiles : ``float`` or ``Iterable``
45
+ The percentile value or values to use. Since computing multiple
46
+ percentile values is almost free once all the images have been
47
+ reprojected and stacked, this function can accept multiple percentile
48
+ values and return multiple starmaps. This can be very useful when
49
+ comparing different percentile values.
50
+ frame_count : ``bool``, optional
51
+ Whether to track and return the number of input images contributing to
52
+ each pixel in the output image.
53
+ attribution : ``bool``, optional
54
+ If True, this function also returns an attribution array. For each
55
+ pixel in the output skymap, this array contains an index into the list
56
+ of filenames, indicating which file contributed the value selected for
57
+ the output map. (In practice, the output values are interpolated
58
+ between the two input values closest to the exact percentile location,
59
+ and it's the closest of those values that is called the source.)
60
+ processor : `ImageProcessor`, optional
61
+ An instance of a class providing functions allowing the handling of the
62
+ input images to be customized. This class is responsible for loading
63
+ images from files, pre-processing them before being reprojected, and
64
+ post-processing them after reprojection but before stacking. If not
65
+ provided, a default implementation loads data from FITS files and does
66
+ nothing else. Must be pickleable to support parallel processing.
67
+ reducer : `StackReducer`, optional
68
+ An instance of a class with a ``reduce_strip`` method that reduces the
69
+ stack of images to an output map. See `StackReducer` for more details.
70
+ ra_bounds, dec_bounds : ``Iterable`` of ``float``, optional
71
+ If provided, the bounds to use for the output star map (instead of
72
+ producing a full all-sky map). If not provided, the output map spans
73
+ all of right ascension, but the input images are used to determine
74
+ appropriate declination bounds (ensuring that all input images are
75
+ contained in the bounds).
76
+ target_mem_usage : ``float``, optional
77
+ The (approximate) maximum amount of memory to use for the accumulation
78
+ array, as a number of gigabytes. This will determine how many chunks
79
+ the output map is broken into. Higher values will tend to speed up the
80
+ computation. Actual memory usage will likely be less, unless there is a
81
+ portion of the all-sky map spanned by all the input images.
82
+ map_scale : ``float``
83
+ The scale of the output map, in degrees per pixel. (It's the CDELT
84
+ parameter of the output WCS.)
85
+ stack_all : ``bool``, optional
86
+ For debugging---after the first chunk of the starfield has been
87
+ computed, return the full accumulation array as well as the starfield
88
+ (which is empty in all but the first chunk). This can be very useful to
89
+ inspect the distribution of values at each location. Use in combination
90
+ with ``ra_bounds`` and ``dec_bounds`` to target a particular portion of
91
+ the sky.
92
+ shuffle : ``bool``, optional
93
+ As the input images are reprojected into a given chunk of the output
94
+ skymap, it is likely that many of them won't cover than chunk at all.
95
+ This "no-op" images will result in a very uneven parallel processing
96
+ workload, if the images that do fall within the chunk are clustered
97
+ within the list of input images. To ensure a more even distribution of
98
+ work, the list of input images is randomly shuffled. This can be
99
+ disabled for debugging purposes.
100
+
101
+ Returns
102
+ -------
103
+ starfield : `Starfield` or ``List[Starfield]``
104
+ The starfield estimate, including a WCS and, if specified, frame counts
105
+ and attribution information. If multiple maps are produced by
106
+ ``processor``, this will be a list of `Starfield` s.
107
+ stack : ``np.ndarray``
108
+ Returned if ``stack_all==True``. An array of shape ``(n_images x ny x
109
+ chunk_width)`` containing all the samples that contribute to the pixels
110
+ in the first chunk of computation. (Note that ``n_images`` is the
111
+ number of images which contributed to this chunk, not the number of
112
+ input images.)
113
+ stack_sources : ``np.ndarray``
114
+ The index in the input file list of the source file for each position
115
+ along the first axis of ``stack``.
116
+ """
117
+ # Create the WCS describing the whole-sky starmap
118
+ shape = [int(floor(180/map_scale)), int(floor(360/map_scale))]
119
+ starfield_wcs = WCS(naxis=2)
120
+ # n.b. it seems the RA wrap point is chosen so there's 180 degrees included
121
+ # on either side of crpix
122
+ crpix = [shape[1]/2 + .5, shape[0]/2 + .5]
123
+ starfield_wcs.wcs.crpix = crpix
124
+ starfield_wcs.wcs.crval = 180, 0
125
+ starfield_wcs.wcs.cdelt = map_scale, map_scale
126
+ starfield_wcs.wcs.ctype = 'RA---CAR', 'DEC--CAR'
127
+ starfield_wcs.wcs.cunit = 'deg', 'deg'
128
+
129
+ if ra_bounds is not None:
130
+ # Apply user-specified RA bounds to the output starfield
131
+ (x_min, x_max), _ = starfield_wcs.all_world2pix(ra_bounds, [0, 0], 0)
132
+ x_min = int(x_min)
133
+ x_max = int(x_max)
134
+ starfield_wcs = starfield_wcs[:, x_min:x_max+1]
135
+ x_size = shape[1]
136
+ x_size -= (x_size - x_max)
137
+ x_size -= x_min
138
+ shape[1] = int(x_size)
139
+ # n.b. Since RA is a periodic coordinates, the notion of bounds gets weird
140
+ # without special handling, so don't attempt to automatically clamp the
141
+ # output map in RA.
142
+
143
+ if dec_bounds is not None:
144
+ # Apply user-specified dec bounds to the output starfield
145
+ _, (y_min, y_max) = starfield_wcs.all_world2pix([10, 10], dec_bounds, 0)
146
+ y_min = int(y_min)
147
+ y_max = int(y_max)
148
+ starfield_wcs = starfield_wcs[y_min:y_max+1, :]
149
+ y_size = shape[0]
150
+ y_size -= (y_size - y_max)
151
+ y_size -= y_min
152
+ shape[0] = int(y_size)
153
+ else:
154
+ # Figure out how much of the full sky is covered by our set of images. If
155
+ # we don't go all the way to the celestial poles, we can limit our
156
+ # declination range and save time & memory.
157
+ # Only process every 15th file to speed this up a bit, on the assumption
158
+ # that the on-sky position varies slowly through the image sequence.
159
+ bounds = utils.find_collective_bounds(
160
+ files[::15], starfield_wcs, processor=processor)
161
+ # Apply default dec bounds to the output starfield, based on the
162
+ # declination values covered by the input images.
163
+ shape[0] -= shape[0] - bounds[3]
164
+ shape[0] -= bounds[2]
165
+ starfield_wcs = starfield_wcs[bounds[2]:bounds[3]]
166
+
167
+ # Allocate this later
168
+ starfields = None
169
+ if frame_count:
170
+ count = np.zeros(shape, dtype=int)
171
+
172
+ # Divide the output starfields into vertical strips, each of which will be
173
+ # processed separately. This avoids extreme memory demands for large sets
174
+ # of input files.
175
+ size_of_pixel = np.empty(1).dtype.itemsize
176
+ size_of_column = size_of_pixel * shape[0] * len(files)
177
+ stride = int(target_mem_usage * 1024**3 // size_of_column)
178
+ if stride > shape[1]:
179
+ stride = shape[1]
180
+
181
+ n_chunks = ceil(shape[1] / stride)
182
+ if stack_all:
183
+ n_chunks = 1
184
+ pbar_stack = tqdm(total=n_chunks * len(files), desc="Reprojecting")
185
+ pbar_reduce = tqdm(total=n_chunks * shape[0], desc="Reducing")
186
+
187
+ # The order we process these files doesn't matter, and for every section,
188
+ # there will be some input files covering that section and some that don't.
189
+ # Shuffle the file list to get a more even distribution of lots-of-work and
190
+ # no-work files, to benefit the multiprocessing.
191
+ files = [f for f in files]
192
+ fname_to_i = {fname: i for i, fname in enumerate(files)}
193
+ if shuffle:
194
+ random.seed(1)
195
+ random.shuffle(files)
196
+
197
+ # This is the size of the "working space" array, where we accumulate the
198
+ # values from every image at every pixel in this chunk of the starfield.
199
+ cutout_shape = (len(files), shape[0], stride)
200
+
201
+ with multiprocessing.Pool() as p:
202
+ # Make some memory allocations after the fork
203
+
204
+ # This is the big honking array that holds a bunch of reprojected
205
+ # images in memory at once. We allocate it only once and keep re-using
206
+ # it, since allocating so much is quite slow.
207
+ starfield_accum = np.empty(cutout_shape)
208
+
209
+ # Begin looping over output chunks
210
+ for i in range(n_chunks):
211
+ # Work out where we are in the all-sky map
212
+ xstart = stride * i
213
+ xstop = min(shape[1], stride * (i + 1))
214
+ if xstop - xstart < stride:
215
+ # This must be the last iteration
216
+ assert i == n_chunks - 1
217
+ starfield_accum = starfield_accum[:, :, 0:xstop-xstart]
218
+ cutout_shape = starfield_accum.shape
219
+ # imap_unordered only accepts one list of arguments, so bundle up
220
+ # what we need.
221
+ args = zip(
222
+ files,
223
+ repeat(starfield_wcs[:, xstart:xstop]),
224
+ repeat(cutout_shape[1:]),
225
+ repeat(processor))
226
+ n_good = 0
227
+ stack_sources = []
228
+ for (ymin, ymax, xmin, xmax, output, fname) in p.imap_unordered(
229
+ _process_file, args, chunksize=5):
230
+ # for (ymin, ymax, xmin, xmax, output) in map(process_file_percentile, args):
231
+ pbar_stack.update()
232
+ if output is not None:
233
+ # In practice, not every input image covers a portion of
234
+ # each chunk of the output map. As an optimization, instead
235
+ # of assigning a layer of the accumulation array to each
236
+ # input image from the start, we assign as we go---each
237
+ # time a process returns a contribution from an image, we
238
+ # move to teh next layer of the accumulation array, clear
239
+ # it, and paste in what we got from the worker process.
240
+ # This avoids having to clear out the entire array each
241
+ # time through the loop, and makes it easy to reduce the
242
+ # work done during the percentile calculation, since we're
243
+ # not feeding in as many NaNs that have to be filtered.
244
+ starfield_accum[n_good].fill(np.nan)
245
+ starfield_accum[n_good, ymin:ymax, xmin:xmax] = output
246
+ n_good += 1
247
+ if frame_count:
248
+ count[:, xstart:xstop][ymin:ymax, xmin:xmax] += (
249
+ np.isfinite(output))
250
+ stack_sources.append(fname_to_i[fname])
251
+ pbar_stack.refresh()
252
+ # Ignore all the slices we didn't use
253
+ starfield_accum_used = starfield_accum[:n_good]
254
+
255
+ stack_sources = np.array(stack_sources)
256
+
257
+ # Now that the stacking is complete, we need to calculate the
258
+ # percentile value at each pixel
259
+
260
+ def args():
261
+ # Generator for arguments as we run the percentile calculation
262
+ # in parallel
263
+ for i in range(starfield_accum_used.shape[1]):
264
+ # We break up the accumulation array into horizontal
265
+ # strips, with each strip being one job for the parallel
266
+ # processing (trying to strike a balance between making
267
+ # enough work units without making them too small, as it
268
+ # would be if we did each output pixel as one parallel
269
+ # job). We copy each chunk to ensure we're not implicitly
270
+ # sending the whole accumulation array between processes.
271
+ yield (
272
+ starfield_accum_used[:, i].copy(),
273
+ stack_sources if attribution else None,
274
+ reducer)
275
+
276
+ for y, res in enumerate(p.imap(
277
+ # for y, res in enumerate(map(
278
+ _reduce_strip,
279
+ args(),
280
+ chunksize=20)):
281
+ # )):
282
+ pbar_reduce.update()
283
+ if attribution:
284
+ res, srcs = res
285
+ if starfields is None:
286
+ # Allocate what will be the final output arrays, since we
287
+ # now know how many output maps to produce
288
+ starfields = [
289
+ np.full(shape, np.nan) for _ in range(res.shape[0])]
290
+ if attribution:
291
+ attribution_array = np.full(
292
+ (len(starfields), *shape), -1, dtype=int)
293
+ if attribution:
294
+ attribution_array[:, y, xstart:xstop] = srcs
295
+ for starfield, r in zip(starfields, res):
296
+ starfield[y, xstart:xstop] = r
297
+ pbar_reduce.refresh()
298
+ if attribution:
299
+ mask = np.isnan(starfields[0])
300
+ attribution_array[:, mask] = -1
301
+ pbar_stack.close()
302
+ pbar_reduce.close()
303
+ objects = []
304
+ for i in range(len(starfields)):
305
+ sf = starfields[i]
306
+ if frame_count:
307
+ fc = count
308
+ else:
309
+ fc = None
310
+ if attribution:
311
+ a = attribution_array[i]
312
+ else:
313
+ a = None
314
+ objects.append(Starfield(starfield=sf, wcs=starfield_wcs,
315
+ frame_count=fc, attribution=a))
316
+ if len(objects) == 1:
317
+ objects = objects[0]
318
+ if stack_all:
319
+ return objects, starfield_accum_used, stack_sources
320
+ return objects
321
+
322
+
323
+ def _process_file(args):
324
+ """
325
+ Internal function processing a single file. Run in parallel
326
+ """
327
+ fname, starfield_wcs, shape, processor = args
328
+
329
+ image_holder = processor.load_image(fname)
330
+
331
+ # Identify where this image will fall in the whole-sky map
332
+ cdelt = starfield_wcs.wcs.cdelt
333
+ ra_start, dec_start = starfield_wcs.pixel_to_world_values(0, 0)
334
+ ra_stop, dec_stop = starfield_wcs.pixel_to_world_values(
335
+ shape[1] - 1, shape[0] - 1)
336
+ bounds = utils.find_bounds(
337
+ image_holder.wcs, starfield_wcs,
338
+ world_coord_bounds=[ra_start - cdelt[0], ra_stop + cdelt[0],
339
+ dec_start - cdelt[1], dec_stop + cdelt[1]])
340
+
341
+ if bounds is None:
342
+ # This image doesn't span the portion of the all-sky map now being
343
+ # computed, so we can stop now.
344
+ return [None] * 6
345
+ xmin, xmax, ymin, ymax = bounds
346
+
347
+ if xmin < 0:
348
+ xmin = 0
349
+ if ymin < 0:
350
+ ymin = 0
351
+ if xmax >= shape[1]:
352
+ xmax = shape[1]
353
+ if ymax >= shape[0]:
354
+ ymax = shape[0]
355
+
356
+ if xmin >= shape[1] or xmax <= 0 or ymin >= shape[0] or ymax <= 0:
357
+ return [None] * 6
358
+
359
+ image_holder = processor.preprocess_image(image_holder)
360
+
361
+ swcs = starfield_wcs[ymin:ymax, xmin:xmax]
362
+
363
+ output = reproject.reproject_adaptive(
364
+ (image_holder.data, image_holder.wcs), swcs, (ymax - ymin, xmax - xmin),
365
+ return_footprint=False, roundtrip_coords=False,
366
+ boundary_mode='strict',
367
+ conserve_flux=True,
368
+ # This happens to handle the output coordinate wrap-around much better
369
+ center_jacobian=True,
370
+ )
371
+
372
+ output = processor.postprocess_image(output, swcs, image_holder)
373
+
374
+ return ymin, ymax, xmin, xmax, output, fname
375
+
376
+
377
+ def _reduce_strip(args):
378
+ """
379
+ Internal function computing percentiles for a portion of the stack
380
+ """
381
+ data, stack_sources, reducer = args
382
+ with warnings.catch_warnings():
383
+ warnings.filterwarnings(action='ignore',
384
+ message=".*All-NaN slice.*")
385
+ warnings.filterwarnings(action='ignore',
386
+ message=".*Mean of empty slice*")
387
+ result = reducer.reduce_strip(data)
388
+ if len(result.shape) == 1:
389
+ result = result.reshape((1, -1))
390
+ if stack_sources is not None:
391
+ # We need to figure out which input image contributed the output
392
+ # value for each pixel. Since the exact Nth percentile likely lies
393
+ # between two data points and numpy will interpolate between those
394
+ # points, we search for the closest value and call that the
395
+ # contributor.
396
+ sources = []
397
+ for res in result:
398
+ distances = np.abs(data - res)
399
+ distances = np.nan_to_num(distances, nan=np.inf, posinf=np.inf)
400
+ if np.any(np.isfinite(distances)):
401
+ i = np.argmin(distances, axis=0)
402
+ sources.append(stack_sources[i])
403
+ else:
404
+ sources.append(-1)
405
+ return result, sources
406
+ return result
407
+
@@ -0,0 +1,124 @@
1
+ from dataclasses import dataclass
2
+
3
+ from astropy.io import fits
4
+ from astropy.wcs import WCS
5
+ import numpy as np
6
+
7
+ from . import utils
8
+
9
+
10
+ @dataclass
11
+ class ImageHolder():
12
+ """
13
+ Wrapper class to hold image, WCS, and any other data
14
+
15
+ Implementations of `ImageProcessor` may attach additional information as
16
+ attributes of `ImageHolder` instances to carry necessary information
17
+ through the load -> preprocess -> postprocess chain.
18
+ """
19
+ data: np.ndarray
20
+ wcs: WCS
21
+ meta: dict | fits.Header
22
+
23
+
24
+ class ImageProcessor():
25
+ """Class implementing an API for instrument-specific processing
26
+
27
+ By subclassing this class and passing sub-class instances into functions
28
+ that accept a processor, the user can implement processing "hooks"
29
+ containing any custom processing that a data set requires.
30
+
31
+ When an instance of `ImageProcessor` or a subclass is passed to
32
+ `build_starfield_estimate`, each of the input images will be loaded via
33
+ `load_image`. If the loaded image falls within the portion of the sky map
34
+ being assembled, `preprocess_image` will be called, where calibration,
35
+ masking or trimming can be done. After the image is reprojected, it is
36
+ passed to `postprocess_image` before being added to the stack of
37
+ reprojected images.
38
+
39
+ When passed to `Starfield.subtract_from_image`, the input image is loaded
40
+ and preprocessed, but `postprocess_image` is never called. The starfield
41
+ estimate projected into the input image's frame is passed to
42
+ `postprocess_starfield_estimate`, and the result is subtracted from the
43
+ input image.
44
+ """
45
+ def load_image(self, filename: str) -> ImageHolder:
46
+ """Loads an image from a given filename
47
+
48
+ Parameters
49
+ ----------
50
+ filename : ``str``
51
+ The file to load
52
+
53
+ Returns
54
+ -------
55
+ image_holder : `ImageHolder`
56
+ An `ImageHolder` containing the image, its WCS, and any additional
57
+ information that should be stored for later steps
58
+ """
59
+ with fits.open(filename) as hdul:
60
+ image, wcs, header = utils.find_data_and_celestial_wcs(
61
+ hdul, data=True, wcs=True, header=True)
62
+ return ImageHolder(image, wcs, header)
63
+
64
+ def preprocess_image(self, image_holder: ImageHolder) -> ImageHolder:
65
+ """Processes an image array before it is reprojected and stacked
66
+
67
+ Parameters
68
+ ----------
69
+ image_holder : `ImageHolder`
70
+ The `ImageHolder` returned by a corresponding `load_image` call
71
+
72
+ Returns
73
+ -------
74
+ image_holder : `ImageHolder`
75
+ The `ImageHolder` after all adjustments have been made, including
76
+ processing of the image array and modifications to the WCS
77
+ """
78
+ return image_holder
79
+
80
+ def postprocess_image(self,
81
+ processed_image: np.ndarray,
82
+ processed_wcs: WCS,
83
+ image_holder: ImageHolder) -> np.ndarray:
84
+ """
85
+ Processes an image array after it is reprojected, before being stacked.
86
+
87
+ Parameters
88
+ ----------
89
+ processed_image : ``np.ndarray``
90
+ The reprojected image
91
+ processed_wcs : ``WCS``
92
+ The WCS describing the reprojected image
93
+ image_holder : `ImageHolder`
94
+ The `ImageHolder` of the corresponding input image
95
+
96
+ Returns
97
+ -------
98
+ image : ``np.ndarray``
99
+ The post-processed image array
100
+ """
101
+ return processed_image
102
+
103
+ def postprocess_starfield_estimate(
104
+ self,
105
+ starfield_estimate: np.ndarray,
106
+ input_image_holder: ImageHolder) -> np.ndarray:
107
+ """
108
+ Post-processes a starfield estimate before subtracting it from an image
109
+
110
+ Parameters
111
+ ----------
112
+ starfield_estimate : ``np.ndarray``
113
+ The starfield estimate for this image
114
+ input_image_holder : `ImageHolder`
115
+ The `ImageHolder` for the input image corresponding to this
116
+ starfield estimate
117
+
118
+ Returns
119
+ -------
120
+ starfield_estimate : ``np.ndarray``
121
+ The processed starfield estimate, ready to be subtracted from the
122
+ input image
123
+ """
124
+ return starfield_estimate