ACID-code 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,852 @@
1
+ import numpy as np
2
+ import emcee
3
+ import matplotlib.pyplot as plt
4
+ from scipy.optimize import minimize
5
+ from astropy.io import fits
6
+ import ACID_code.LSD_func_faster as LSD
7
+ import glob
8
+ from scipy.interpolate import interp1d
9
+ from scipy.signal import find_peaks
10
+ import multiprocessing as mp
11
+ from functools import partial
12
+ from multiprocessing import Pool
13
+ from statistics import stdev
14
+ import time
15
+ import warnings
16
+
17
+ from math import log10, floor
18
+
19
+ def round_sig(x1, sig):
20
+ return round(x1, sig-int(floor(log10(abs(x1))))-1)
21
+
22
+ from scipy.optimize import curve_fit
23
+
24
+ def gauss(x1, rv, sd, height, cont):
25
+ y1 = height*np.exp(-(x1-rv)**2/(2*sd**2)) + cont
26
+ return y1
27
+
28
+ month = 'August2007'
29
+ directory = '/Users/lucydolan/Starbase/HD189733 old/HD189733/'
30
+
31
+ # run_name = input('Input nickname for this version of code (for saving figures): ')
32
+ run_name = 'test'
33
+
34
+ def findfiles(directory, file_type):
35
+
36
+ filelist1=glob.glob('%s/*/*%s**A_corrected*.fits'%(directory, file_type)) #finding corrected spectra
37
+ filelist=glob.glob('%s/*/*%s**A*.fits'%(directory, file_type)) #finding all A band spectra
38
+
39
+ filelist_final=[]
40
+
41
+ for file in filelist: #filtering out corrected spectra
42
+ count = 0
43
+ for file1 in filelist1:
44
+ if file1 == file:count=1
45
+ if count==0:filelist_final.append(file)
46
+
47
+ return filelist_final
48
+
49
+ def continuumfit(fluxes, wavelengths, errors, poly_ord):
50
+
51
+ cont_factor = fluxes[0]
52
+ if cont_factor == 0:
53
+ cont_factor = np.mean(fluxes)
54
+ idx = wavelengths.argsort()
55
+ wavelength = wavelengths[idx]
56
+ fluxe = fluxes[idx]/cont_factor
57
+ clipped_flux = []
58
+ clipped_waves = []
59
+ binsize =100
60
+ for i in range(0, len(wavelength), binsize):
61
+ waves = wavelength[i:i+binsize]
62
+ flux = fluxe[i:i+binsize]
63
+ indicies = flux.argsort()
64
+ flux = flux[indicies]
65
+ waves = waves[indicies]
66
+ clipped_flux.append(flux[len(flux)-1])
67
+ clipped_waves.append(waves[len(waves)-1])
68
+ coeffs=np.polyfit(clipped_waves, clipped_flux, poly_ord)
69
+ poly = np.poly1d(coeffs)
70
+ fit = poly(wavelengths)*cont_factor
71
+ flux_obs = fluxes/fit
72
+ new_errors = errors/fit
73
+
74
+ return np.concatenate((np.flip(coeffs), [cont_factor])), flux_obs, new_errors, fit
75
+
76
+ def read_in_frames(order, filelist, file_type):
77
+
78
+ # read in first frame
79
+ fluxes, wavelengths, flux_error_order, sn, mid_wave_order, telluric_spec, overlap = LSD.blaze_correct(file_type, 'order', order, filelist[0], directory, 'unmasked', run_name, 'y')
80
+
81
+ frames = np.zeros((len(filelist), len(wavelengths)))
82
+ errors = np.zeros((len(filelist), len(wavelengths)))
83
+ frame_wavelengths = np.zeros((len(filelist), len(wavelengths)))
84
+ sns = np.zeros((len(filelist), ))
85
+
86
+ frames[0] = fluxes
87
+ errors[0] = flux_error_order
88
+ frame_wavelengths[0] = wavelengths
89
+ sns[0] = sn
90
+
91
+ def task_frames(frames, errors, frame_wavelengths, sns, i):
92
+ file = filelist[i]
93
+ frames[i], frame_wavelengths[i], errors[i], sns[i], mid_wave_order, telluric_spec, overlap = LSD.blaze_correct(file_type, 'order', order, file, directory, 'unmasked', run_name, 'y')
94
+ # print(i, frames)
95
+ return frames, frame_wavelengths, errors, sns
96
+
97
+ ### reads in each frame and corrects for the blaze function, adds the spec, errors and sn to their subsequent lists
98
+ for i in range(len(filelist[1:])+1):
99
+ # print(i)
100
+ frames, frame_wavelengths, errors, sns = task_frames(frames, errors, frame_wavelengths, sns, i)
101
+
102
+ ### finding highest S/N frame, saves this as reference frame
103
+
104
+ idx = (sns==np.max(sns))
105
+ global reference_wave
106
+ reference_wave = frame_wavelengths[idx][0]
107
+ reference_frame=frames[idx][0]
108
+ reference_frame[reference_frame == 0]=0.001
109
+ reference_error=errors[idx][0]
110
+ reference_error[reference_frame == 0]=1000000000000000000
111
+
112
+ global frames_unadjusted
113
+ frames_unadjusted = frames
114
+ global frame_errors_unadjusted
115
+ frame_errors_unadjusted = errors
116
+
117
+ ### each frame is divided by reference frame and then adjusted so that all spectra lie at the same continuum
118
+ for n in range(len(frames)):
119
+ f2 = interp1d(frame_wavelengths[n], frames[n], kind = 'linear', bounds_error=False, fill_value = 'extrapolate')
120
+ div_frame = f2(reference_wave)/reference_frame
121
+
122
+ idx_ref = (reference_frame<=0)
123
+ div_frame[idx_ref]=1
124
+
125
+ binned = []
126
+ binned_waves = []
127
+ binsize = int(round(len(div_frame)/5, 1))
128
+ for i in range(0, len(div_frame), binsize):
129
+ if i+binsize<len(reference_wave):
130
+ waves = reference_wave[i:i+binsize]
131
+ flux = div_frame[i:i+binsize]
132
+ waves = waves[abs(flux-np.median(flux))<0.1]
133
+ flux = flux[abs(flux-np.median(flux))<0.1]
134
+ binned.append(np.median(flux))
135
+ binned_waves.append(np.median(waves))
136
+
137
+ binned = np.array(binned)
138
+ binned_waves = np.array(binned_waves)
139
+
140
+ ### fitting polynomial to div_frame
141
+ try:coeffs=np.polyfit(binned_waves, binned, 4)
142
+ except:coeffs=np.polyfit(binned_waves, binned, 2)
143
+ poly = np.poly1d(coeffs)
144
+ fit = poly(frame_wavelengths[n])
145
+ frames[n] = frames[n]/fit
146
+ errors[n] = errors[n]/fit
147
+ idx = (frames[n] ==0)
148
+ frames[n][idx]=0.00001
149
+ errors[n][idx]=1000000000
150
+
151
+ return frame_wavelengths, frames, errors, sns, telluric_spec
152
+
153
+ def calc_deltav(wavelengths):
154
+ """Calculates velocity pixel size
155
+
156
+ Calculates the velocity pixel size for the LSD velocity grid based off the spectral wavelengths.
157
+
158
+ Args:
159
+ wavelengths (array): Wavelengths for ACID input spectrum (in Angstroms).
160
+
161
+ Returns:
162
+ float: Velocity pixel size in km/s
163
+ """
164
+ resol1 = (wavelengths[-1]-wavelengths[0])/len(wavelengths)
165
+ return resol1/(wavelengths[0]+((wavelengths[-1]-wavelengths[0])/2))*2.99792458e5
166
+
167
+ def combine_spec(wavelengths_f, spectra_f, errors_f, sns_f):
168
+
169
+ interp_spec = np.zeros(spectra_f.shape)
170
+ #combine all spectra to one spectrum
171
+ for n in range(len(wavelengths_f)):
172
+
173
+ global reference_wave
174
+ idx_ref = (sns_f==np.max(sns_f))
175
+ reference_wave = wavelengths_f[idx_ref][0]
176
+
177
+ idx = np.where(wavelengths_f[n] != 0)[0]
178
+
179
+ f2 = interp1d(wavelengths_f[n][idx], spectra_f[n][idx], kind = 'linear', bounds_error=False, fill_value = 'extrapolate')
180
+ f2_err = interp1d(wavelengths_f[n][idx], errors_f[n][idx], kind = 'linear', bounds_error=False, fill_value = 'extrapolate')
181
+ spectra_f[n] = f2(reference_wave)
182
+ errors_f[n] = f2_err(reference_wave)
183
+
184
+ # print(spectra_f[n])
185
+ # print(errors_f[n])
186
+
187
+ ## mask out out extrapolated areas
188
+ idx_ex = np.logical_and(reference_wave<=np.max(wavelengths_f[n][idx]), reference_wave>=np.min(wavelengths_f[n][idx]))
189
+ idx_ex = tuple([idx_ex==False])
190
+
191
+ spectra_f[n][idx_ex]=1.
192
+ errors_f[n][idx_ex]=1000000000000
193
+
194
+ ## mask out nans and zeros (these do not contribute to the main spectrum)
195
+ where_are_NaNs = np.isnan(spectra_f[n])
196
+ errors_f[n][where_are_NaNs] = 1000000000000
197
+ where_are_zeros = np.where(spectra_f[n] == 0)[0]
198
+ errors_f[n][where_are_zeros] = 1000000000000
199
+
200
+ where_are_NaNs = np.isnan(errors_f[n])
201
+ errors_f[n][where_are_NaNs] = 1000000000000
202
+ where_are_zeros = np.where(errors_f[n] == 0)[0]
203
+ errors_f[n][where_are_zeros] = 1000000000000
204
+
205
+ width = len(reference_wave)
206
+ spectrum_f = np.zeros((width,))
207
+ spec_errors_f = np.zeros((width,))
208
+
209
+ for n in range(0,width):
210
+ temp_spec_f = spectra_f[:, n]
211
+ temp_err_f = errors_f[:, n]
212
+
213
+ weights_f = (1/temp_err_f**2)
214
+
215
+ idx = tuple([temp_err_f>=1000000000000])
216
+ # print(weights_f[idx])
217
+ weights_f[idx] = 0.
218
+
219
+ if sum(weights_f)>0:
220
+ weights_f = weights_f/np.sum(weights_f)
221
+
222
+ spectrum_f[n]=sum(weights_f*temp_spec_f)
223
+ sn_f = sum(weights_f*sns_f)/sum(weights_f)
224
+
225
+ spec_errors_f[n]=1/(sum(weights_f**2))
226
+
227
+ else:
228
+ spectrum_f[n] = np.mean(temp_spec_f)
229
+ spec_errors_f[n] = 1000000000000
230
+
231
+
232
+ return reference_wave, spectrum_f, spec_errors_f, sn_f
233
+
234
+ def od2flux(x):
235
+ return np.exp(x)-1
236
+
237
+ def flux2od(x):
238
+ return np.log(x+1)
239
+
240
+ no_line = 100
241
+ ## model for the mcmc - takes the profile(z) and the continuum coefficents(inputs[k_max:]) to create a model spectrum.
242
+ def model_func(inputs, x):
243
+ z = inputs[:k_max]
244
+
245
+ mdl = np.dot(alpha, z) ##alpha has been declared a global variable after LSD is run.
246
+
247
+ #converting model from optical depth to flux
248
+ mdl = np.exp(mdl)
249
+
250
+ ## these are used to adjust the wavelengths to between -1 and 1 - makes the continuum coefficents smaller and easier for emcee to handle.
251
+ a = 2/(np.max(x)-np.min(x))
252
+ b = 1 - a*np.max(x)
253
+
254
+ mdl1=0
255
+ for i in range(k_max,len(inputs)-1):
256
+ mdl1 = mdl1 + (inputs[i]*((x*a)+b)**(i-k_max))
257
+
258
+ mdl1 = mdl1 * inputs[-1]
259
+
260
+ mdl = mdl * mdl1
261
+
262
+ return mdl
263
+
264
+ def convolve(profile, alpha):
265
+ spectrum = np.dot(alpha, profile)
266
+ return spectrum
267
+
268
+ ## maximum likelihood estimation for the mcmc model.
269
+ def log_likelihood(theta, x, y, yerr):
270
+ model = model_func(theta, x)
271
+
272
+ lnlike = -0.5 * np.sum(((y) - (model)) ** 2 / yerr**2 + np.log(yerr**2)+ np.log(2*np.pi))
273
+
274
+ return lnlike
275
+
276
+ ## imposes the prior restrictions on the inputs - rejects if profile point is less than -10 or greater than 0.5.
277
+ def log_prior(theta):
278
+
279
+ check = 0
280
+ z = theta[:k_max]
281
+
282
+
283
+ for i in range(len(theta)):
284
+ if i<k_max: ## must lie in z
285
+ if -10<=theta[i]<=0.5: pass
286
+ else:
287
+ check = 1
288
+
289
+ if check==0:
290
+
291
+ # excluding the continuum points in the profile (in flux)
292
+ z_cont = []
293
+ v_cont = []
294
+ for i in range(0, 5):
295
+ z_cont.append(np.exp(z[len(z)-i-1])-1)
296
+ v_cont.append(velocities[len(velocities)-i-1])
297
+ z_cont.append(np.exp(z[i])-1)
298
+ v_cont.append(velocities[i])
299
+
300
+ z_cont = np.array(z_cont)
301
+
302
+ p_pent = np.sum((np.log((1/np.sqrt(2*np.pi*0.01**2)))-0.5*(z_cont/0.01)**2))
303
+
304
+ return p_pent
305
+
306
+ return -np.inf
307
+
308
+ ## calculates log probability - used for mcmc
309
+ def log_probability(theta, x, y, yerr):
310
+ lp = log_prior(theta)
311
+ if not np.isfinite(lp):
312
+ return -np.inf
313
+ final = lp + log_likelihood(theta, x, y, yerr)
314
+ return final
315
+
316
+ ## iterative residual masking - mask continuous areas first - then possibly progress to masking the narrow lines
317
+ def residual_mask(wavelengths, data_spec_in, data_err, initial_inputs, poly_ord, linelist, velocities=np.arange(-25, 25, 0.82), pix_chunk=20, dev_perc=25, tell_lines = [3820.33, 3933.66, 3968.47, 4327.74, 4307.90, 4383.55, 4861.34, 5183.62, 5270.39, 5889.95, 5895.92, 6562.81, 7593.70, 8226.96], n_sig=1):
318
+
319
+ forward = model_func(initial_inputs, wavelengths)
320
+
321
+ a = 2/(np.max(wavelengths)-np.min(wavelengths))
322
+ b = 1 - a*np.max(wavelengths)
323
+
324
+ mdl1=0
325
+ for i in range(k_max,len(initial_inputs)-1):
326
+ mdl1 = mdl1 + (initial_inputs[i]*((wavelengths*a)+b)**(i-k_max))
327
+
328
+ mdl1 = mdl1 * initial_inputs[-1]
329
+
330
+ residuals = (data_spec_in - np.min(data_spec_in))/(np.max(data_spec_in)-np.min(data_spec_in)) - (forward - np.min(forward))/(np.max(forward)-np.min(forward))
331
+
332
+ data_err_compare = data_err.copy()
333
+
334
+ ### finds consectuative sections where at least pix_chunk points have residuals greater than 0.25 - these are masked
335
+ idx = (abs(residuals)>dev_perc/100)
336
+
337
+ flag_min = 0
338
+ flag_max = 0
339
+ for value in range(len(idx)):
340
+ if idx[value] == True and flag_min <= value:
341
+ flag_min = value
342
+ flag_max = value
343
+ elif idx[value] == True and flag_max < value:
344
+ flag_max = value
345
+ elif idx[value] == False and flag_max-flag_min>=pix_chunk:
346
+ data_err[flag_min:flag_max]=10000000000000000000
347
+ flag_min = value
348
+ flag_max = value
349
+
350
+ ##############################################
351
+ # TELLURICS #
352
+ ##############################################
353
+
354
+ # data_err_compare = data_err.copy()
355
+
356
+ ## masking tellurics
357
+ for line in tell_lines:
358
+ limit = (21/2.99792458e5)*line +3
359
+ idx = np.logical_and((line-limit)<=wavelengths, wavelengths<=(limit+line))
360
+ data_err[idx] = 1000000000000000000
361
+
362
+ residual_masks = tuple([data_err>=1000000000000000000])
363
+
364
+ ###################################
365
+ ### sigma clip masking ###
366
+ ###################################
367
+
368
+ m = np.median(residuals)
369
+ sigma = np.std(residuals)
370
+ a = 1
371
+
372
+ upper_clip = m+a*sigma
373
+ lower_clip = m-a*sigma
374
+
375
+ rcopy = residuals.copy()
376
+
377
+ idx1 = tuple([rcopy<=lower_clip])
378
+ idx2 = tuple([rcopy>=upper_clip])
379
+
380
+ data_err[idx1]=10000000000000000000
381
+ data_err[idx2]=10000000000000000000
382
+
383
+ poly_inputs, bin, bye, fit=continuumfit(data_spec_in, (wavelengths*a)+b, data_err, poly_ord)
384
+ velocities1, profile, profile_err, alpha, continuum_waves, continuum_flux, no_line= LSD.LSD(wavelengths, bin, bye, linelist, 'False', poly_ord, 100, 30, run_name, velocities)
385
+
386
+ # ## comment if you would like to keep sigma clipping masking in for final LSD run
387
+ # residual_masks = tuple([data_err>=1000000000000000000])
388
+
389
+ return data_err, np.concatenate((profile, poly_inputs)), residual_masks
390
+
391
+ def get_profiles(all_frames, order, poly_cos, continuum_error, counter):
392
+ flux = frames[counter]
393
+ error = frame_errors[counter]
394
+ wavelengths = frame_wavelengths[counter]
395
+ sn = sns[counter]
396
+
397
+ a = 2/(np.max(wavelengths)-np.min(wavelengths))
398
+ b = 1 - a*np.max(wavelengths)
399
+
400
+ mdl1 =0
401
+ for i in np.arange(0, len(poly_cos)-1):
402
+ mdl1 = mdl1+poly_cos[i]*((a*wavelengths)+b)**(i)
403
+ mdl1 = mdl1*poly_cos[-1]
404
+
405
+ #masking based off residuals interpolated onto new wavelength grid
406
+ if len(frame_wavelengths)>1:
407
+ reference_wave = frame_wavelengths[sns==max(sns)][0]
408
+ else:
409
+ reference_wave = frame_wavelengths[0]
410
+ mask_pos = np.ones(reference_wave.shape)
411
+ mask_pos[mask_idx]=10000000000000000000
412
+ f2 = interp1d(reference_wave, mask_pos, bounds_error = False, fill_value = np.nan)
413
+ interp_mask_pos = f2(wavelengths)
414
+ interp_mask_idx = tuple([interp_mask_pos>=10000000000000000000])
415
+
416
+ error[interp_mask_idx]=10000000000000000000
417
+
418
+ # corrrecting continuum
419
+ error = (error/flux) + (continuum_error/mdl1)
420
+ flux = flux/mdl1
421
+ error = flux*error
422
+
423
+ remove = tuple([flux<0])
424
+ flux[remove]=1.
425
+ error[remove]=10000000000000000000
426
+
427
+ idx = tuple([flux>0])
428
+
429
+ if len(flux[idx])==0:
430
+ print('continuing... frame %s'%counter)
431
+
432
+ else:
433
+ velocities1, profile1, profile_errors, alpha, continuum_waves, continuum_flux, no_line= LSD.LSD(wavelengths, flux, error, linelist, 'False', poly_ord, sn, 10, 'test', velocities)
434
+
435
+ p = np.exp(profile1)-1
436
+
437
+ profile_f = np.exp(profile1)
438
+ profile_errors_f = np.sqrt(profile_errors**2/profile_f**2)
439
+ profile_f = profile_f-1
440
+
441
+ all_frames[counter, order]=[profile_f, profile_errors_f]
442
+
443
+ return all_frames
444
+
445
+ def combineprofiles(spectra, errors):
446
+ spectra = np.array(spectra)
447
+ idx = np.isnan(spectra)
448
+ shape_og = spectra.shape
449
+ if len(spectra[idx])>0:
450
+ spectra = spectra.reshape((len(spectra)*len(spectra[0]), ))
451
+ for n in range(len(spectra)):
452
+ if spectra[n] == np.nan:
453
+ spectra[n] = (spectra[n+1]+spectra[n-1])/2
454
+ if spectra[n] == np.nan:
455
+ spectra[n] = 0.
456
+ spectra = spectra.reshape(shape_og)
457
+ errors = np.array(errors)
458
+
459
+
460
+ spectra_to_combine = []
461
+ weights=[]
462
+ for n in range(0, len(spectra)):
463
+ if np.sum(spectra[n])!=0:
464
+ spectra_to_combine.append(list(spectra[n]))
465
+ temp_err = np.array(errors[n, :])
466
+ weight = (1/temp_err**2)
467
+ weights.append(np.mean(weight))
468
+ weights = np.array(weights/sum(weights))
469
+
470
+ spectra_to_combine = np.array(spectra_to_combine)
471
+
472
+ length, width = np.shape(spectra_to_combine)
473
+ spectrum = np.zeros((1,width))
474
+ spec_errors = np.zeros((1,width))
475
+
476
+ for n in range(0,width):
477
+ temp_spec = spectra_to_combine[:, n]
478
+ spectrum[0,n]=sum(weights*temp_spec)/sum(weights)
479
+ spec_errors[0,n]=(stdev(temp_spec)**2)*np.sqrt(sum(weights**2))
480
+
481
+ spectrum = list(np.reshape(spectrum, (width,)))
482
+ spec_errors = list(np.reshape(spec_errors, (width,)))
483
+
484
+ return spectrum, spec_errors
485
+
486
+ def ACID(input_wavelengths, input_spectra, input_spectral_errors, line, frame_sns, vgrid, all_frames='default', poly_or=3, pix_chunk = 20, dev_perc = 25, n_sig=1, telluric_lines = [3820.33, 3933.66, 3968.47, 4327.74, 4307.90, 4383.55, 4861.34, 5183.62, 5270.39, 5889.95, 5895.92, 6562.81, 7593.70, 8226.96], order = 0):
487
+ """Accurate Continuum fItting and Deconvolution
488
+
489
+ Fits the continuum of the given spectra and performs LSD on the continuum corrected spectra, returning an LSD profile for each spectrum given.
490
+ Spectra must cover a similiar wavelength range.
491
+
492
+ Args:
493
+ input_wavelengths (list): Wavelengths for each frame (in Angstroms).
494
+ input_spectra (list): Spectral frames (in flux).
495
+ input_spectral_errors (list): Errors for each frame (in flux).
496
+ line (str): Path to linelist. Takes VALD linelist in long or short format as input. Minimum line depth input into VALD must be less than 1/(3*SN) where SN is the highest signal-to-noise ratio of the spectra.
497
+ frame_sns (list): Average signal-to-noise ratio for each frame (used to calculate minimum line depth to consider from line list.
498
+ vgrid (array): Velocity grid for LSD profiles (in km/s).
499
+ all_frames (str or array, optional): Output array for resulting profiles. Only neccessary if looping ACID function over many wavelength regions or order (in the case of echelle spectra). General shape needs to be (no. of frames, 1, 2, no. of velocity pixels).
500
+ poly_or (int, optional): Order of polynomial to fit as the continuum.
501
+ pix_chunk (int, optional): Size of 'bad' regions in pixels. 'bad' areas are identified by the residuals between an inital model and the data. If a residual deviates by a specified percentage (dev_perv) for a specified number of pixels (pix_chunk) it is masked. The smaller the region the less aggresive the masking applied will be.
502
+ dev_perc (int, optional): Allowed deviation percentage. 'bad' areas are identified by the residuals between an inital model and the data. If a residual deviates by a specified percentage (dev_perv) for a specified number of pixels (pix_chunk) it is masked. The smaller the deviation percentage the less aggresive the masking applied will be.
503
+ n_sig (int, optional): Number of sigma to clip in sigma clipping. Ill fitting lines are identified by sigma-clipping the residuals between an inital model and the data. The regions that are clipped from the residuals will be masked in the spectra. This masking is only applied to find the continuum fit and is removed when LSD is applied to obtain the final profiles.
504
+ telluric_lines (list, optional): List of wavelengths (in Angstroms) of telluric lines to be masked. This can also include problematic lines/features that should be masked also. For each wavelengths in the list ~3Å eith side of the line is masked.
505
+ order (int, optional): Only applicable if an all_frames output array has been provided as this is the order position in that array where the result should be input. i.e. if order = 5 the output profile and errors would be inserted in all_frames[:, 5].
506
+
507
+ Returns:
508
+ array: Resulting profiles and errors for spectra.
509
+ """
510
+ print('Initialising...')
511
+
512
+ t0 = time.time()
513
+
514
+ global velocities
515
+ velocities = vgrid.copy()
516
+ global linelist
517
+ linelist = line
518
+ global poly_ord
519
+ poly_ord = poly_or
520
+
521
+ ## combines spectra from each frame (weighted based of S/N), returns to S/N of combined spec
522
+ global frames
523
+ global frame_wavelengths
524
+ global frame_errors
525
+ global sns
526
+ frame_wavelengths = np.array(input_wavelengths)
527
+ frames = np.array(input_spectra)
528
+ frame_errors = np.array(input_spectral_errors)
529
+ sns = np.array(frame_sns)
530
+
531
+ if type(all_frames)!=np.ndarray:
532
+ if all_frames=='default':
533
+ all_frames = np.zeros((len(frames), 1, 2, len(velocities)))
534
+
535
+ fw = frame_wavelengths.copy()
536
+ f = frames.copy()
537
+ fe = frame_errors.copy()
538
+ s = sns.copy()
539
+
540
+ if len(fw)>1:
541
+ wavelengths, fluxes, flux_error_order, sn = combine_spec(fw, f, fe, s)
542
+ else: wavelengths, fluxes, flux_error_order, sn = fw[0], f[0], fe[0], s[0]
543
+
544
+ ### getting the initial polynomial coefficents
545
+ a = 2/(np.max(wavelengths)-np.min(wavelengths))
546
+ b = 1 - a*np.max(wavelengths)
547
+ poly_inputs, fluxes1, flux_error_order1, fit = continuumfit(fluxes, (wavelengths*a)+b, flux_error_order, poly_ord)
548
+
549
+ # t2 = time.time()
550
+ # print('Set up before LSD %s'%(t2-t0))
551
+ #### getting the initial profile
552
+ global alpha
553
+ velocities, profile, profile_errors, alpha, continuum_waves, continuum_flux, no_line= LSD.LSD(wavelengths, fluxes1, flux_error_order1, linelist, 'False', poly_ord, sn, 30, run_name, velocities)
554
+
555
+ # t3 = time.time()
556
+ # print('LSD run takes: %s'%(t3-t2))
557
+
558
+ ## Setting the number of points in vgrid (k_max)
559
+ global k_max
560
+ k_max = len(profile)
561
+ model_inputs = np.concatenate((profile, poly_inputs))
562
+
563
+ ## setting x, y, yerr for emcee
564
+ x = wavelengths
565
+ y = fluxes
566
+ yerr = flux_error_order
567
+
568
+ ## setting these normalisation factors as global variables - used in the figures below
569
+ a = 2/(np.max(x)-np.min(x))
570
+ b = 1 - a*np.max(x)
571
+
572
+ #masking based off residuals
573
+ global mask_idx
574
+
575
+ yerr, model_inputs_resi, mask_idx = residual_mask(x, y, yerr, model_inputs, poly_ord, linelist, pix_chunk=pix_chunk, dev_perc=dev_perc, tell_lines = telluric_lines, n_sig=n_sig)
576
+
577
+ # t4 = time.time()
578
+ # print('residual masking takes: %s' %(t4-t3))
579
+
580
+ ## setting number of walkers and their start values(pos)
581
+ ndim = len(model_inputs)
582
+ nwalkers= ndim*3
583
+ rng = np.random.default_rng()
584
+
585
+ ### starting values of walkers with indpendent variation
586
+ sigma = 0.8*0.005
587
+ pos = []
588
+ for i in range(0, ndim):
589
+ if i <ndim-poly_ord-2:
590
+ pos2 = rng.normal(model_inputs[i], sigma, (nwalkers, ))
591
+ else:
592
+ sigma = abs(round_sig(model_inputs[i], 1))/10
593
+ pos2 = rng.normal(model_inputs[i], sigma, (nwalkers, ))
594
+ pos.append(pos2)
595
+
596
+ pos = np.array(pos)
597
+ pos = np.transpose(pos)
598
+
599
+ ## the number of steps is how long it runs for - if it doesn't look like it's settling at a value try increasing the number of steps
600
+ steps_no = 8000
601
+
602
+ t1 = time.time()
603
+ # print('MCMC set up takes: %s'%(t1-t4))
604
+ # print('Initialised in %ss'%round((t1-t0), 2))
605
+
606
+ print('Fitting the Continuum...')
607
+ # sampler = emcee.EnsembleSampler(nwalkers, ndim, log_probability, args=(x, y, yerr))
608
+ # sampler.run_mcmc(pos, steps_no, progress=True)
609
+
610
+ with Pool() as pool:
611
+ sampler = emcee.EnsembleSampler(nwalkers, ndim, log_probability, args=(x, y, yerr), pool=pool)
612
+ sampler.run_mcmc(pos, steps_no, progress=True)
613
+
614
+ ## discarding all vales except the last 1000 steps.
615
+ dis_no = int(np.floor(steps_no-1000))
616
+
617
+ global flat_samples
618
+ ## combining all walkers together
619
+ flat_samples = sampler.get_chain(discard=dis_no, flat=True)
620
+
621
+ ## getting the final profile and continuum values - median of last 1000 steps
622
+ profile = []
623
+ global poly_cos
624
+ poly_cos = []
625
+ profile_err = []
626
+ poly_cos_err = []
627
+
628
+ for i in range(ndim):
629
+ mcmc = np.median(flat_samples[:, i])
630
+ error = np.std(flat_samples[:, i])
631
+ mcmc = np.percentile(flat_samples[:, i], [16, 50, 84])
632
+ error = np.diff(mcmc)
633
+ if i<k_max:
634
+ profile.append(mcmc[1])
635
+ profile_err.append(np.max(error))
636
+ else:
637
+ poly_cos.append(mcmc[1])
638
+ poly_cos_err.append(np.max(error))
639
+
640
+ profile = np.array(profile)
641
+ profile_err = np.array(profile_err)
642
+
643
+ fig_opt = 'n'
644
+ if fig_opt =='y':
645
+
646
+ # plots random models from flat_samples - lets you see if it's converging
647
+ plt.figure()
648
+ inds = np.random.randint(len(flat_samples), size=100)
649
+ for ind in inds:
650
+ sample = flat_samples[ind]
651
+ mdl = model_func(sample, x)
652
+ mdl1 = 0
653
+ for i in np.arange(k_max, len(sample)-1):
654
+ mdl1 = mdl1+sample[i]*((a*x)+b)**(i-k_max)
655
+ mdl1 = mdl1*sample[-1]
656
+ plt.plot(x, mdl1, "C1", alpha=0.1)
657
+ plt.plot(x, mdl, "g", alpha=0.1)
658
+ plt.scatter(x, y, color = 'k', marker = '.', label = 'data')
659
+ plt.xlabel("wavelengths")
660
+ plt.ylabel("flux")
661
+ plt.title('mcmc models and data')
662
+ plt.savefig('figures/mcmc_and_data.png')
663
+
664
+ prof_flux = np.exp(profile)-1
665
+
666
+ # plots the mcmc profile - will have extra panel if it's for data
667
+ fig, ax0 = plt.subplots()
668
+ ax0.plot(velocities, profile, color = 'r', label = 'mcmc')
669
+ zero_line = [0]*len(velocities)
670
+ ax0.plot(velocities, zero_line)
671
+ ax0.plot(velocities, model_inputs[:k_max], label = 'initial')
672
+ ax0.fill_between(velocities, profile-profile_err, profile+profile_err, alpha = 0.3, color = 'r')
673
+ ax0.set_xlabel('velocities')
674
+ ax0.set_ylabel('optical depth')
675
+ secax = ax0.secondary_yaxis('right', functions = (od2flux, flux2od))
676
+ secax.set_ylabel('flux')
677
+ ax0.legend()
678
+ plt.savefig('figures/profile_%s'%(run_name))
679
+
680
+ # plots mcmc continuum fit on top of data
681
+ plt.figure('continuum fit from mcmc')
682
+ plt.plot(x, y, color = 'k', label = 'data')
683
+ mdl1 =0
684
+ for i in np.arange(0, len(poly_cos)-1):
685
+ mdl1 = mdl1+poly_cos[i]*((a*x)+b)**(i)
686
+ mdl1 = mdl1*poly_cos[-1]
687
+ plt.plot(x, mdl1, label = 'mcmc continuum fit')
688
+ mdl1_poserr =0
689
+ for i in np.arange(0, len(poly_cos)-1):
690
+ mdl1_poserr = mdl1_poserr+(poly_cos[i]+poly_cos_err[i])*((a*x)+b)**(i)
691
+ mdl1_poserr = mdl1_poserr*poly_cos[-1]
692
+ mdl1_neg =0
693
+ for i in np.arange(0, len(poly_cos)-1):
694
+ mdl1_neg = mdl1_neg+(poly_cos[i]-poly_cos_err[i])*((a*x)+b)**(i)
695
+ mdl1_neg = mdl1_neg*poly_cos[-1]
696
+ plt.fill_between(x, mdl1_neg, mdl1_poserr, alpha = 0.3)
697
+ mdl1_err =abs(mdl1-mdl1_neg)
698
+ plt.legend()
699
+ plt.title('continuum from mcmc')
700
+ plt.xlabel("wavelengths")
701
+ plt.ylabel("flux")
702
+ plt.savefig('figures/cont_%s'%(run_name))
703
+
704
+ mcmc_inputs = np.concatenate((profile, poly_cos))
705
+ mcmc_mdl = model_func(mcmc_inputs, x)
706
+
707
+ residuals_2 = (y+1) - (mcmc_mdl+1)
708
+
709
+ fig, ax = plt.subplots(2,figsize=(16,9), gridspec_kw={'height_ratios': [2, 1]}, num = 'MCMC and true model', sharex = True)
710
+ non_masked = tuple([yerr<10])
711
+ #ax[0].plot(x, y+1, color = 'r', alpha = 0.3, label = 'data')
712
+ #ax[0].plot(x[non_masked], mcmc_mdl[non_masked]+1, color = 'k', alpha = 0.3, label = 'mcmc spec')
713
+ ax[1].scatter(x[non_masked], residuals_2[non_masked], marker = '.')
714
+ ax[0].plot(x, y, 'r', alpha = 0.3, label = 'data')
715
+ ax[0].plot(x, mcmc_mdl, 'k', alpha =0.3, label = 'mcmc spec')
716
+ residual_masks = tuple([yerr>=100000000000000])
717
+
718
+ #residual_masks = tuple([yerr>10])
719
+ ax[0].scatter(x[residual_masks], y[residual_masks], label = 'masked', color = 'b', alpha = 0.3)
720
+ ax[0].legend(loc = 'lower right')
721
+ #ax[0].set_ylim(0, 1)
722
+ #plotdepths = -np.array(line_depths)
723
+ #ax[0].vlines(line_waves, plotdepths, 1, label = 'line list', color = 'c', alpha = 0.5)
724
+ ax[1].plot(x, residuals_2, '.')
725
+ #ax[1].scatter(x[residual_masks], residuals_2[residual_masks], label = 'masked', color = 'b', alpha = 0.3)
726
+ z_line = [0]*len(x)
727
+ ax[1].plot(x, z_line, '--')
728
+ plt.savefig('figures/forward_%s'%(run_name))
729
+
730
+
731
+ fig, ax0 = plt.subplots()
732
+ ax0.plot(velocities, profile, color = 'r', label = 'mcmc')
733
+ zero_line = [0]*len(velocities)
734
+ ax0.plot(velocities, zero_line)
735
+ ax0.plot(velocities, model_inputs[:k_max], label = 'initial')
736
+ ax0.fill_between(velocities, profile-profile_err, profile+profile_err, alpha = 0.3, color = 'r')
737
+ ax0.set_xlabel('velocities')
738
+ ax0.set_ylabel('optical depth')
739
+ ax0.legend()
740
+ plt.savefig('figures/final_profile_%s'%(run_name))
741
+
742
+ print('Getting the final profiles...')
743
+
744
+ # finding error for the continuuum fit
745
+ inds = np.random.randint(len(flat_samples), size=50)
746
+ conts = []
747
+ for ind in inds:
748
+ sample = flat_samples[ind]
749
+ mdl = model_func(sample, wavelengths)
750
+ #mdl = model_func(sample, x)
751
+ #mdl = mdl[idx]
752
+ mdl1_temp = 0
753
+ for i in np.arange(k_max, len(sample)-1):
754
+ mdl1_temp = mdl1_temp+sample[i]*((a*wavelengths)+b)**(i-k_max)
755
+ mdl1_temp = mdl1_temp*sample[-1]
756
+ conts.append(mdl1_temp)
757
+
758
+ continuum_error = np.std(np.array(conts), axis = 0)
759
+
760
+ task_part = partial(get_profiles, all_frames, order, poly_cos, continuum_error)
761
+ if len(frames)>1:
762
+ with mp.Pool(mp.cpu_count()) as pool:
763
+ results=[pool.map(task_part, np.arange(len(frames)))]
764
+ results = np.array(results[0])
765
+ for i in range(len(frames)):
766
+ all_frames[i]=results[i][i]
767
+ # for counter in range(len(frames)):
768
+ # all_frames = get_profiles(all_frames, order, poly_cos, continuum_error, counter)
769
+ else: all_frames = get_profiles(all_frames, order, poly_cos, continuum_error, 0)
770
+
771
+ return all_frames
772
+
773
+ def ACID_HARPS(filelist, line, vgrid, poly_or=3, order_range=np.arange(10,70), save_path = './', file_type = 'e2ds', pix_chunk = 20, dev_perc = 25, n_sig=1, telluric_lines = [3820.33, 3933.66, 3968.47, 4327.74, 4307.90, 4383.55, 4861.34, 5183.62, 5270.39, 5889.95, 5895.92, 6562.81, 7593.70, 8226.96]):
774
+
775
+ """Accurate Continuum fItting and Deconvolution for HARPS e2ds and s1d spectra (DRS pipeline 3.5)
776
+
777
+ Fits the continuum of the given spectra and performs LSD on the continuum corrected spectra, returning an LSD profile for each file given. Files must all be kept in the same folder as well as thier corresponding blaze files. If 's1d' are being used their e2ds equivalents must also be in this folder. Result files containing profiles and associated errors for each order (or corresponding wavelength range in the case of 's1d' files) will be created and saved to a specified folder. It is recommended that this folder is seperate to the input files.
778
+
779
+ Args:
780
+ filelist (list): List of files. Files must come from the same observation night as continuum is fit for a combined spectrum of all frames. A profile and associated errors will be produced for each file specified.
781
+ line (str): Path to linelist. Takes VALD linelist in long or short format as input. Minimum line depth input into VALD must be less than 1/(3*SN) where SN is the highest signal-to-noise ratio of the spectra.
782
+ vgrid (array): Velocity grid for LSD profiles (in km/s).
783
+ poly_or (int, optional): Order of polynomial to fit as the continuum.
784
+ order_range (array, optional): Orders to be included in the final profiles. If s1d files are input, the corresponding wavelengths will be considered.
785
+ save_path (array, optional): Path to folder that result files will be saved to.
786
+ file_type (str, optional): 'e2ds' or 's1d'.
787
+ pix_chunk (int, optional): Size of 'bad' regions in pixels. 'bad' areas are identified by the residuals between an inital model and the data. If a residual deviates by a specified percentage (dev_perv) for a specified number of pixels (pix_chunk) it is masked. The smaller the region the less aggresive the masking applied will be.
788
+ dev_perc (int, optional): Allowed deviation percentage. 'bad' areas are identified by the residuals between an inital model and the data. If a residual deviates by a specified percentage (dev_perv) for a specified number of pixels (pix_chunk) it is masked. The smaller the deviation percentage the less aggresive the masking applied will be.
789
+ n_sig (int, optional): Number of sigma to clip in sigma clipping. Ill fitting lines are identified by sigma-clipping the residuals between an inital model and the data. The regions that are clipped from the residuals will be masked in the spectra. This masking is only applied to find the continuum fit and is removed when LSD is applied to obtain the final profiles.
790
+ telluric_lines (list, optional): List of wavelengths of telluric lines to be masked in Angstroms. This can also include problematic lines/features that should be masked also. For each wavelengths in the list ~3Å eith side of the line is masked.
791
+
792
+ Returns:
793
+ list: Barycentric Julian Date for files
794
+ list: Profiles (in normalised flux)
795
+ list: Profile Errors (in normalised flux)
796
+ """
797
+
798
+ global velocities
799
+ velocities = vgrid.copy()
800
+ global all_frames
801
+ all_frames = np.zeros((len(filelist), len(order_range), 2, len(velocities)))
802
+ global linelist
803
+ linelist = line
804
+ global poly_ord
805
+ poly_ord = poly_or
806
+
807
+ global frames
808
+ global frame_wavelengths
809
+ global frame_errors
810
+ global sns
811
+
812
+
813
+ for order in order_range:
814
+
815
+ print('Running for order %s/%s...'%(order-min(order_range)+1, max(order_range)-min(order_range)+1))
816
+
817
+ frame_wavelengths, frames, frame_errors, sns, telluric_spec = read_in_frames(order, filelist, file_type)
818
+
819
+ all_frames = ACID(frame_wavelengths, frames, frame_errors, linelist, sns, velocities, all_frames, poly_or, pix_chunk, dev_perc, n_sig, telluric_lines, order = order-min(order_range))
820
+
821
+ # adding into fits files for each frame
822
+ BJDs = []
823
+ profiles = []
824
+ errors = []
825
+ for frame_no in range(0, len(frames)):
826
+ file = filelist[frame_no]
827
+ fits_file = fits.open(file)
828
+ hdu = fits.HDUList()
829
+ hdr = fits.Header()
830
+
831
+ for order in order_range:
832
+ hdr['ORDER'] = order
833
+ hdr['BJD'] = fits_file[0].header['ESO DRS BJD']
834
+ if order == order_range[0]:
835
+ BJDs.append(fits_file[0].header['ESO DRS BJD'])
836
+ hdr['CRVAL1']=np.min(velocities)
837
+ hdr['CDELT1']=velocities[1]-velocities[0]
838
+
839
+ profile = all_frames[frame_no, order-min(order_range), 0]
840
+ profile_err = all_frames[frame_no, order-min(order_range), 1]
841
+
842
+ hdu.append(fits.PrimaryHDU(data = [profile, profile_err], header = hdr))
843
+ if save_path!='no save':
844
+ hdu.writeto('%s%s_%s_%s.fits'%(save_path, month, frame_no, run_name), output_verify = 'fix', overwrite = 'True')
845
+
846
+ result1, result2 = combineprofiles(all_frames[frame_no, :, 0], all_frames[frame_no, :, 1])
847
+ profiles.append(result1)
848
+ errors.append(result2)
849
+
850
+ return BJDs, profiles, errors
851
+
852
+