PyPI - piratical - Versions diffs - 0.1.0__py2.py3-none-any.whl - Mend

piratical 0.1.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

piratical/__init__.py +1 -0
piratical/piratical.py +853 -0
piratical-0.1.0.dist-info/METADATA +27 -0
piratical-0.1.0.dist-info/RECORD +6 -0
piratical-0.1.0.dist-info/WHEEL +6 -0
piratical-0.1.0.dist-info/top_level.txt +1 -0

piratical/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .piratical import piratical_pipeline

piratical/piratical.py ADDED Viewed

@@ -0,0 +1,853 @@
+import numpy as np
+import glob
+import os
+import logging as log
+import pandas as pd
+import jwst
+import datetime
+import warnings
+import matplotlib.pyplot as plt
+import matplotlib as mpl
+from scipy.stats import median_abs_deviation
+from scipy.optimize import minimize
+from astropy.io import fits
+from astropy.table import Table
+from mastquery import jwst as jwst_mq
+from astropy.time import Time
+from jwst import datamodels
+from jwst.pipeline import Detector1Pipeline
+from jwst.pipeline import Spec2Pipeline
+from jwst.pipeline import Spec3Pipeline
+class piratical_pipeline:
+    """
+    Code to return reduced version of all NIRSpec MSA data for an input
+    source catalogue.
+    Parameters
+    ----------
+    catalogue : str or astropy Table or pandas DataFrame
+        Catalogue of sources to search for NIRSpec MSA data on
+    force_mast_update : bool, optional
+        Forces the code to re-query MAST for exposures and re-download
+        MSA files rather than using previously saved versions.
+    skip_l1_pipeline : bool, optional
+        Whether to skip the Level 1 pipeline processing and just download
+        the rate files from MAST. Much faster and uses less disk space
+        but the MAST rate files don't have the flicker noise correction
+        applied. I'd recommend skipping L1 for a first look at the data,
+        then re-running with the L1 pipeline to get final science spectra
+    gratings : list or "all", optional
+        Can be used to restrict which optical elements to pull data for.
+        Defaults to all, but can be set to a list of gratings, e.g.,
+        ["PRISM", "G140M"] - note the required use of capital letters.
+    match_radius : float, optional
+        Radius within which to match input catalogue sources to NIRSpec
+        sources in arcseconds. Default value is 0.2 arcsec.
+    """
+    def __init__(self, catalogue_name, force_mast_update=False,
+                 skip_l1_pipeline=False, gratings="all", match_radius=0.2):
+        self.skip_l1_pipeline = skip_l1_pipeline
+        self.force_mast_update = force_mast_update
+        self.catalogue_name = catalogue_name
+        self.gratings = gratings
+        self.match_radius = match_radius
+        self.catalogue = Table.read(self.catalogue_name)
+        # Change id column to ID if needed for later matching
+        if ("id" in self.catalogue.columns
+            and not "ID" in self.catalogue.columns):
+            self.catalogue.rename_column("id", "ID")
+        if force_mast_update:
+            os.system("rm all_exposures.fits nirspec_msa_all_objects.fits")
+        folders = ["msa_files", "uncal_files", "rate_files", "cal_files",
+                   "s2d_files", "x1d_files", "plots"]
+        for folder in folders:
+            if not os.path.exists(folder):
+                os.mkdir(folder)
+        self.get_all_exposures()
+        self.pull_msa_files()
+        out_cat_name = (self.catalogue_name.replace(".fits", "")
+                                + "_nirspec_observations.fits")
+        # Match with exposure table to find exposures object is in
+        os.system("stilts tmatch2 in1=" + self.catalogue_name
+                  + " in2=NIRSpec_all_observations_all_objects.fits out="
+                  + out_cat_name + " matcher=sky values1='ra dec'"
+                  + " values2='ra dec' params=" + str(self.match_radius)
+                  + " join=1and2 find=all")
+        matched = Table.read(out_cat_name).to_pandas()
+        matched["msa_file_met_id"] = matched["msa_file_met_id"].str.decode("utf-8")
+        matched["grating"] = matched["grating"].str.decode("utf-8")
+        if self.gratings != "all":
+            grating_mask = np.isin(matched["grating"], self.gratings)
+            matched = matched.groupby(grating_mask).get_group(True)
+        self.matched = matched
+        self.all_exp["msa_file_met_id"] = self.all_exp["msa_file_met_id"].str.decode("utf-8")
+        self.all_exp["fileSetName"] = self.all_exp["fileSetName"].str.decode("utf-8")
+        print("\nTable of NIRSpec observations for input catalogue has been"
+              + " saved to", out_cat_name, "To download and reduce the"
+              + " corresponding data,execute the run_pipeline() method.\n")
+    def get_all_exposures(self):
+        # Don't remake the nirspec exposures catalogue if it exists
+        if os.path.exists("all_exposures.fits"):
+            print("\n")
+            print("all_exposures.fits exists, skipping MAST query.")
+            print("To pull new MAST obs delete this file and run again.\n")
+            self.all_exp = Table.read("all_exposures.fits").to_pandas()
+            return
+        print("\n")
+        print("Querying MAST for all NIRSpec exposures...\n")
+        # Controls what's printed to terminal whilst the query is running
+        fmt_str = "%(module)s.%(funcName)s : %(levelname)s : %(message)s"
+        log.basicConfig(level=log.INFO,
+                        handlers=[log.StreamHandler(),
+                                  log.FileHandler('/tmp/mastquery.log')],
+                        format=fmt_str)
+        # Set up query params
+        filters = []
+        filters += jwst_mq.make_query_filter('productLevel', values=['2b'])
+        filters += jwst_mq.make_query_filter('exp_type', text='NRS_MSASPEC')
+        # Extra example filters
+        """
+        # Just guided exposures
+        filters += jwst_mq.make_query_filter('pcs_mode', values=['FINEGUIDE'])
+        filters += jwst_mq.make_program_filter([3543])
+        filters += jwst_mq.make_query_filter('filename',
+                                             text='%_[cr]a%[le].fits')
+        filters += jwst_mq.make_query_filter('expstart',
+                                             range=[59744.5, 59764.5])
+        """
+        # Run query with mastquery
+        res = jwst_mq.query_all_jwst(filters=filters, columns='*').to_pandas()
+        Table.from_pandas(res).write("all_uncal_files.fits", overwrite=True)
+        # Drop files that have not yet been publicly released
+        now = datetime.datetime.now(datetime.timezone.utc)
+        mjd = Time(now, scale='utc').mjd
+        res = res.groupby(res["t_obs_release"] < mjd).get_group(True)
+        # Cut to a sensible number of columns, the whole table is huge
+        res = res[["fileSetName", "msametfl", "msametid", "grating", "title",
+                   "proposal_pi", "filter-pupil", "asntable"]]
+        # Swap out any msametfl not ending with 01_msa.fits (identical)
+        res["orig_msametfl"] = res["msametfl"]
+        for i in range(len(res)):
+            if not res["msametfl"].iloc[i].endswith("01_msa.fits"):
+                res["msametfl"].iloc[i] = (res["msametfl"].iloc[i][:-11]
+                                           + "01_msa.fits")
+        # Create joined column of msametfl + msa meta id for later merge
+        res["msa_file_met_id"] = (res["msametfl"] + "_"
+                                  + res["msametid"].astype(str))
+        # Save to file and load again to make formatting identical
+        Table.from_pandas(res).write("all_exposures.fits",
+                                     overwrite=True)
+        res = Table.read("all_exposures.fits").to_pandas()
+        self.all_exp = res
+    def pull_msa_files(self):
+        if os.path.exists("nirspec_msa_all_objects.fits"):
+            print("\n")
+            print("nirspec_msa_all_objects.fits exists, skipping MSA download.")
+            print("To re-process MSA files delete this file and run again.\n")
+            return
+        # Get list of unique msa metafiles
+        msa_files = self.all_exp["msametfl"]
+        msa_files = msa_files.drop_duplicates().str.decode('utf-8').values
+        print("\n")
+        print("Downloading MSA files from MAST...\n")
+        # Pull all msa metafiles from MAST
+        for i in range(len(msa_files)):
+            if os.path.exists("msa_files/" + msa_files[i]):
+                continue
+            print("Downloading metafile", i + 1, "of", len(msa_files))
+            os.system("curl -H --globoff --location-trusted -f --progress-bar "
+                      + "--output ./msa_files/" + msa_files[i]
+                      + " https://mast.stsci.edu/api/v0.1/Download/file"
+                      + "?uri=mast:JWST/product/" + msa_files[i])
+        msa_files = glob.glob("msa_files/jw*")
+        print("\n")
+        print("Processing MSA files...\n")
+        # Merge all msa files to one catalogue
+        for i in range(len(msa_files)):
+            print("Merging msa file", str(i + 1), "of", str(len(msa_files)))
+            if not os.path.exists(msa_files[i].split("/")[0]
+                                  + "/objects_with_meta_ids_"
+                                  + msa_files[i].split("/")[-1]):
+                # This next bit is maybe unnecessarily complicated?
+                # could maybe just pull the info from the shutter table
+                # ignoring the object table? Object coords are only in
+                # Object table though? Shutter coords in shutter table?
+                # Read in the objects table
+                file = Table.read(msa_files[i], hdu=3)
+                file = file.to_pandas()
+                # Add column containing msa file name object came from
+                file["msa_file"] = msa_files[i].split("/")[-1]
+                Table.from_pandas(file).write(msa_files[i].split("/")[0]
+                                              + "/objects_"
+                                              + msa_files[i].split("/")[-1],
+                                              overwrite=True)
+                # Read in the shutter table
+                shut_file = Table.read(msa_files[i], hdu=2).to_pandas()
+                # Create unique id for each source in each msa config
+                source_id_meta_id = (shut_file["source_id"].astype(str) + "_"
+                                     + shut_file["msa_metadata_id"].astype(str))
+                shut_file["source_id_meta_id"] = source_id_meta_id
+                # Create table containing each object in each msa config
+                col_list = ["source_id_meta_id"]
+                unique_shut_file = shut_file.drop_duplicates(subset=col_list)
+                unique_shut_file = unique_shut_file[["source_id",
+                                                     "msa_metadata_id"]]
+                # Save to file
+                tosave = Table.from_pandas(unique_shut_file)
+                tosave.write(msa_files[i].split("/")[0] + "/shutters_"
+                             + msa_files[i].split("/")[-1], overwrite=True)
+                # Match object and shutter tables to find which objects
+                # are observed in which msa config
+                os.system("stilts tmatch2 in1=" + msa_files[i].split("/")[0]
+                          + "/objects_" + msa_files[i].split("/")[-1]
+                          + " in2=" + msa_files[i].split("/")[0] + "/shutters_"
+                          + msa_files[i].split("/")[-1] + " out="
+                          + msa_files[i].split("/")[0]
+                          + "/objects_with_meta_ids_"
+                          + msa_files[i].split("/")[-1]
+                          + " matcher=exact values1=source_id"
+                          + " values2=source_id join=1and2 find=all")
+            if i == 0:
+                msametfl_table = Table.read(msa_files[i].split("/")[0]
+                                            + "/objects_with_meta_ids_"
+                                            + msa_files[i].split("/")[-1])
+                msametfl_table = msametfl_table.to_pandas()
+            else:
+                single_table = Table.read(msa_files[i].split("/")[0]
+                                          + "/objects_with_meta_ids_"
+                                          + msa_files[i].split("/")[-1])
+                single_table = single_table.to_pandas()
+                msametfl_table = pd.concat([msametfl_table, single_table])
+        # rename and cut down columns
+        msametfl_table["source_id"] = msametfl_table["source_id_1"]
+        msametfl_table = msametfl_table[["source_id", "program", "ra", "dec",
+                                         "msa_metadata_id", "msa_file"]]
+        # Unique ID for each source + msa config to merge with exp table
+        msa_file_met_id = (msametfl_table["msa_file"].str.decode('utf-8') + "_"
+                           + msametfl_table["msa_metadata_id"].astype(str))
+        msametfl_table["msa_file_met_id"] = msa_file_met_id
+        Table.from_pandas(msametfl_table).write("nirspec_msa_all_objects.fits",
+                                                overwrite=True)
+        # Merge table of objects+meta ids with table of exposures
+        os.system("stilts tmatch2 in1=nirspec_msa_all_objects.fits"
+                  + " in2=all_exposures.fits"
+                  + " out=all_objects_all_exposures.fits matcher=exact"
+                  + " values1=msa_file_met_id values2=msa_file_met_id"
+                  + " join=1and2 find=all")
+        # Cut merged table to unique obs+grating combos for each object
+        final = Table.read("all_objects_all_exposures.fits").to_pandas()
+        final["unique"] = (final["source_id"].astype(str) + "_"
+                           + final["msametfl"].str.decode('utf-8') + "_"
+                           + final["grating"].str.decode('utf-8'))
+        final.drop_duplicates(subset="unique", inplace=True)
+        # Cut back to key columns and save to file
+        final["msa_file_met_id"] = final["msa_file_met_id_1"]
+        final = final[["source_id", "program", "ra", "dec", "msa_metadata_id",
+                       "msa_file", "fileSetName", "grating", "title",
+                       "proposal_pi", "filter-pupil", "msa_file_met_id",
+                       "orig_msametfl", "asntable"]]
+        tosave = Table.from_pandas(final)
+        tosave.write("NIRSpec_all_observations_all_objects.fits",
+                     overwrite=True)
+        self.all_obs = final
+    def run_pipeline(self):
+        self.pull_uncal_files()
+        self.run_l1_pipeline()
+        self.run_l2_pipeline()
+        self.run_l3_pipeline()
+        self.post_pipeline_analysis()
+    def pull_uncal_files(self):
+        mask = np.isin(self.all_exp["msa_file_met_id"].str.strip().values,
+                       self.matched["msa_file_met_id"].str.strip().values)
+        fileset = self.all_exp[mask]["fileSetName"]
+        self.uncal_files = []
+        self.req_msa_files = []
+        self.req_l2_asn_files = []
+        for i in range(np.sum(mask)):
+            self.uncal_files.append(fileset.values[i].strip()
+                                    + "_nrs1_uncal.fits")
+            self.uncal_files.append(fileset.values[i].strip()
+                                    + "_nrs2_uncal.fits")
+        self.req_msa_files = self.all_exp[mask]["orig_msametfl"]
+        self.req_msa_files = self.req_msa_files.str.decode("utf-8").values
+        self.req_l2_asn_files = self.all_exp[mask]["asntable"]
+        self.req_l2_asn_files = self.req_l2_asn_files.str.decode("utf-8").values
+        # Download uncal files for each exposure
+        for i in range(len(self.uncal_files)):
+            if not self.skip_l1_pipeline:
+                if os.path.exists("uncal_files/" + self.uncal_files[i]):
+                    continue
+                print("Downloading uncal file", i + 1, "of",
+                      len(self.uncal_files))
+                os.system("curl -H --globoff --location-trusted -f "
+                          + "--progress-bar --output ./uncal_files/"
+                          + self.uncal_files[i]
+                          + " https://mast.stsci.edu/api/v0.1/Download/file"
+                          + "?uri=mast:JWST/product/" + self.uncal_files[i])
+            else:
+                if os.path.exists("rate_files/" + self.uncal_files[i][:-10]
+                                  + "rate.fits"):
+                    continue
+                print("Downloading rate file", i + 1,"of",
+                      len(self.uncal_files))
+                print(self.uncal_files[i][:-10] + "rate.fits")
+                os.system("curl -H --globoff --location-trusted -f "
+                          + "--progress-bar --output ./rate_files/"
+                          + self.uncal_files[i][:-10] + "rate.fits"
+                          + " https://mast.stsci.edu/api/v0.1/Download/file"
+                          + "?uri=mast:JWST/product/"
+                          + self.uncal_files[i][:-10] + "rate.fits")
+    def run_l1_pipeline(self):
+        msa_files = self.req_msa_files
+        # msa files not ending with 01_msa.fits are identical to those that do
+        # so just copy the 01_msa.fits files and rename them for the pipeline
+        for i in range(len(msa_files)):
+            os.system("cp msa_files/" + msa_files[i][:-11] + "01_msa.fits"
+                        + " uncal_files/" + msa_files[i])
+        if self.skip_l1_pipeline:
+            return
+        print("\n")
+        print("Running level 1 pipeline...\n")
+        for file in self.uncal_files:
+            if os.path.exists("rate_files/" + file[:-10] + "rate.fits"):
+                continue
+            print(file)
+            pipe = Detector1Pipeline()
+            pipe.save_results = True
+            pipe.output_dir = "./rate_files"
+            pipe.output_file = file[:-11]
+            pipe.jump.expand_large_events = True
+            pipe.jump.max_cores = 16
+            pipe.clean_flicker_noise.skip = False
+            pipe.clean_flicker_noise.fit_method = 'median'
+            pipe.clean_flicker_noise.mask_science_regions = True
+            pipe.clean_flicker_noise.background_method = None
+            pipe.clean_flicker_noise.n_sigma = 2
+            pipe.ramp_fit.maximum_cores = "all"
+            pipe.run("uncal_files/" + file)
+        os.system("rm uncal_files/*rateints.fits")
+    def run_l2_pipeline(self):
+        # Download level 2 association files
+        l2_asn_files = self.req_l2_asn_files
+        for l2_asn_file in l2_asn_files:
+            if os.path.exists("rate_files/" + l2_asn_file):
+                continue
+            os.system("curl -H --globoff --location-trusted -f --progress-bar "
+                      + "--output ./rate_files/" + l2_asn_file
+                      + " https://mast.stsci.edu/api/v0.1/Download/file"
+                      + "?uri=mast:JWST/product/" + l2_asn_file)
+        msa_files = self.req_msa_files
+        for i in range(len(msa_files)):
+            # Chop the msa files down to just the slitlets we want
+            msam = fits.open("uncal_files/" + msa_files[i])
+            shut = Table.read("uncal_files/" + msa_files[i], hdu=2)
+            shut_pd = shut.to_pandas()
+            unique = (shut_pd["source_id"].astype(str).values
+                      + "_" + shut_pd["msa_metadata_id"].astype(str).values
+                      + "_" + msa_files[i])
+            newcol = fits.Column(name="id_msam_msafile", format="50A",
+                                 array=(unique))
+            msam[2] = fits.BinTableHDU.from_columns(newcol + msam[2].columns,
+                                                    name="SHUTTER_INFO")
+            unique = (self.matched["source_id"].astype(str)+ "_"
+                      + self.matched["msa_metadata_id"].astype(str) + "_"
+                      + self.matched["orig_msametfl"].str.decode("utf-8"))
+            self.matched["id_msam_msafile"] = unique
+            mask = np.isin(msam[2].data["id_msam_msafile"],
+                           self.matched["id_msam_msafile"].values)
+            slit_ids = np.unique(msam[2].data["slitlet_id"][mask])
+            mask2 = np.isin(msam[2].data["slitlet_id"], slit_ids)
+            msam[2].data = msam[2].data[mask2]
+            msam.writeto("rate_files/" + msa_files[i], overwrite=True)
+        print("\n")
+        print("Running level 2 pipeline...\n")
+        # Load up association files
+        asc2_list = glob.glob("rate_files/*_spec2_*_asn.json")
+        # Run level 2 pipeline
+        for asc in asc2_list:
+            print(asc)
+            spec2 = Spec2Pipeline()
+            spec2.save_results = True
+            spec2.output_dir = "cal_files"
+            try:
+                result = spec2.run(asc)
+            except jwst.assign_wcs.util.NoDataOnDetectorError:
+                pass
+    def run_l3_pipeline(self):
+        # Load up list of level 2b products
+        cal_files = glob.glob("cal_files/*_cal.fits")
+        # Find unique datasets to run level 3 pipeline on
+        sep = pd.Series(cal_files).str.split("_")
+        cal_files_base = sep.str[0] + "_" + sep.str[1] + "_" + sep.str[2]
+        unique = cal_files_base.drop_duplicates().values
+        # L2 pipeline refuses to reduce if no slits open on detector
+        # L3 pipeline refuses to run unless nrs1 and nrs2 files exist
+        # So make dummy files, add to asn file, delete after L3 pipe
+        cal_files_no_dummy = np.copy(np.array(cal_files))
+        for i in range(len(unique)):
+            mask = np.isin(cal_files_base, unique[i])
+            for file in cal_files_no_dummy[mask]:
+                if file.endswith("nrs1_cal.fits"):
+                    mirror_file = file.replace("nrs1_cal.fits", "nrs2_cal.fits")
+                elif file.endswith("nrs2_cal.fits"):
+                    mirror_file = file.replace("nrs2_cal.fits", "nrs1_cal.fits")
+                if not np.max(np.isin(cal_files_no_dummy[mask], mirror_file)):
+                    cal_files.append(mirror_file[:-5] + "_dummy.fits")
+                    os.system("cp " + file + " " + mirror_file[:-5]
+                              + "_dummy.fits")
+        # Find unique datasets to run level 3 pipeline on (re-do with dummies)
+        sep = pd.Series(cal_files).str.split("_")
+        cal_files_base = sep.str[0] + "_" + sep.str[1] + "_" + sep.str[2]
+        unique = cal_files_base.drop_duplicates().values
+        # Make level 3 association files
+        for i in range(len(unique)):
+            mask = np.isin(cal_files_base, unique[i])
+            if os.path.exists(unique[i] + "_spec3_asn.json"):
+                os.system("rm " + unique[i] + "_spec3_asn.json")
+            f = open(unique[i] + "_spec3_asn.json", "w")
+            f.write('{"asn_type": "spec3",\n')
+            f.write('"asn_pool": "flubflubflub",\n')
+            f.write('"products": [\n')
+            f.write('{"name": "' + unique[i] + '_{source_id}",\n')
+            f.write('"members": [\n')
+            for j in range(np.sum(mask)-1):
+                f.write('{"expname": "'
+                        + np.array(cal_files)[mask][j].split("/")[1]
+                        + '", "exptype": "science"},\n')
+            f.write('{"expname": "'
+                    + np.array(cal_files)[mask][-1].split("/")[1]
+                    + '", "exptype": "science"}]}]}\n')
+            f.close()
+        print("\n")
+        print("Running level 3 pipeline...\n")
+        # Load up L3 association files
+        asc3_list = glob.glob("cal_files/*_spec3_asn.json")
+        # Run level 3 pipeline
+        for asc in asc3_list:
+            print(asc)
+            spec3 = Spec3Pipeline()
+            spec3.save_results = True
+            spec3.output_dir = "s2d_files"
+            result = spec3.run(asc)
+        for i in range(len(self.matched)):
+            split = self.matched["fileSetName"].str.decode("utf-8").str.split("_")
+            filebase = (split.str[0] + "_" + split.str[1]).values
+            prod = glob.glob("s2d_files/" + filebase[i] + "*"
+                             + self.matched["source_id"].iloc[i].astype(str)
+                             + "_s2d.fits")
+            print(prod[0],
+                  self.matched["ID"].iloc[i].astype(str),
+                  filebase[i],
+                  self.matched["grating"].iloc[i])
+            os.system("mv " + prod[0] + " s2d_files/"
+                      + self.matched["ID"].iloc[i].astype(str) + "_"
+                      + filebase[i] + "_"
+                      + self.matched["grating"].iloc[i]
+                      + "_s2d.fits")
+        os.system("rm s2d_files/*crf.fits s2d_files/*x1d.fits"
+                   + " s2d_files/*cal.fits")
+        os.system("rm cal_files/*_dummy.fits")
+    def _get_wavs(self, reduced):
+        reducedsci = reduced.data
+        wcsobj = reduced.meta.wcs
+        y, x = np.mgrid[:reducedsci.shape[0], : reducedsci.shape[1]]
+        det2sky = wcsobj.get_transform('detector', 'world')
+        reducedra, reduceddec, reducedwave = det2sky(x, y)
+        return reducedwave[0, :]
+    def _model(self, param, x_vals):
+        return param[0]*np.exp(-0.5*(x_vals-param[1])**2/param[2]**2)
+    def _chisq(self, x, args):
+        x_vals = args[0]
+        y_vals = args[1]
+        y_start = args[2]
+        mod = self._model(x, x_vals)
+        # Controls how far the centroid can stray from input position
+        if np.abs(y_start - x[1]) > self.y_tolerance:
+            return 9.9*10**99
+        return np.nansum((mod - y_vals)**2)
+    def rolling_extraction(self, wavs, spec2d, spec2d_err, full_result,
+                           half_width_pix,  weights_collapsed):
+        # the range in spec2d we want to keep, all other weights set to 0
+        clip_range_pix = [int(np.round(full_result['x'][1]
+                                       - 3*full_result['x'][2])),
+                          int(np.round(full_result['x'][1]
+                                       + 3*full_result['x'][2]))]
+        weights = np.zeros(spec2d.shape)
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", category=RuntimeWarning)
+            for i in range(spec2d.shape[1]):
+                left = np.max([i-half_width_pix, 0])
+                right = np.min([spec2d.shape[1], i+half_width_pix+1])
+                if np.isnan(spec2d[:,left:right]).all():
+                    weights[:,i] = 0
+                else:
+                    # check if the SNR is too low for this slice
+                    SNR = (np.nansum(spec2d[clip_range_pix[0]:clip_range_pix[1]+1,i])/
+                           np.sqrt(np.nansum(spec2d_err[clip_range_pix[0]:clip_range_pix[1]+1,i]**2)))
+                    if SNR < 5 or np.isnan(SNR):
+                        # set weights with the collapsed weights
+                        weights[:,i] = weights_collapsed
+                    else:
+                        profile = 10**20*np.nanmedian(spec2d[:,left:right],
+                                                      axis=1)
+                        profile[profile < 0] = 0.
+                        profile[np.isnan(list(profile))] = 0.
+                        # remove outside source range
+                        profile[:clip_range_pix[0]] = 0.
+                        profile[clip_range_pix[1]+1:] = 0.
+                        if np.sum(profile) == 0:
+                            weights[:,i] = weights_collapsed
+                        else:
+                            profile /= np.sum(profile)
+                            weights[:,i] = profile
+        # Do optimal extraction
+        extr = np.nansum(weights*spec2d/spec2d_err**2, axis=0)
+        extr /= np.nansum(weights**2/spec2d_err**2, axis=0)
+        extr_err = np.sqrt(1./np.nansum(weights**2/spec2d_err**2, axis=0))
+        spec1d = np.c_[wavs, extr, extr_err]
+        return spec1d
+    def extract_1d(self, s2d_file, y_centroid=None, y_tolerance=2,
+                   y_max_width=2, width_pix=51):
+        print(s2d_file)
+        reduced = datamodels.open(s2d_file)
+        self.y_tolerance = y_tolerance
+        # Units are inconsistent between output files, fix to cgs
+        photmjsr = reduced.meta.photometry.conversion_megajanskys
+        wavs = self._get_wavs(reduced)*10000
+        spec2d = reduced.data*10**-17*2.9979*10**18/wavs**2/photmjsr
+        spec2d_err = reduced.err*10**-17*2.9979*10**18/wavs**2/photmjsr
+        # Weirdly in some cases all the errors are zero, not sure why
+        if np.nansum(spec2d_err) == 0:
+            spec2d_err += 1.
+        else:
+            spec2d_err[spec2d_err == 0] = np.nan
+        x = np.arange(spec2d.shape[0])
+        # Fit the full profile to get centroids
+        profile = 10**20*np.nanmedian(spec2d, axis=1)
+        profile[profile < 0] = 0.
+        # This is where you specify the y centroid for the 1D extraction
+        #y_start = spec2d.shape[0]/2
+        if y_centroid is None:
+            y_start = np.nanargmax(profile[3:-3]) + 3
+        else:
+            y_start = y_centroid
+        """
+        # Attempt to figure out y centroids from msa metafile
+        msametfl = reduced.meta.instrument.msa_metadata_file
+        msametid = reduced.meta.instrument.msa_metadata_id
+        shut = Table.read("rate_files/" + msametfl, hdu=2).to_pandas()
+        shut = shut.groupby(shut["dither_point_index"] == 1).get_group(True)
+        shut = shut.groupby(shut["msa_metadata_id"] == msametid).get_group(True)
+        print(shut["primary_source"].values)
+        filebase = s2d_file.split("_")[2] + "_" + s2d_file.split("_")[3]
+        mask = self.matched["fileSetName"].str.decode("utf-8").str.contains(filebase)
+        mask2 = shut["source_id"] == self.matched["source_id"][mask].values[0]
+        source_shut = shut.groupby(mask2).get_group(True)
+        if len(source_shut) > 1:
+            primary_mask = (source_shut["primary_source"].str.decode("utf-8") == "Y")
+            source_shut = source_shut.groupby(primary_mask).get_group(True)
+        obj_shut_col = source_shut["shutter_column"].values[0]
+        shut_col_min = np.min(shut["shutter_column"].values)
+        shutter_height_pix = (0.46+0.07)/0.1
+        n_shut = (obj_shut_col - shut_col_min)
+        if len(shut) < 3:
+            n_shut += 3 - len(shut)
+        #          + source_shut["estimated_source_in_shutter_y"].values[0]
+        print(n_shut, source_shut["estimated_source_in_shutter_y"].values[0])
+        y_start = spec2d.shape[0] - 2 - (n_shut*shutter_height_pix + 0.7 + source_shut["estimated_source_in_shutter_y"].values[0]*4.6)
+        print("Initial guess for y centroid:", y_start)
+        input()
+        """
+        # Fit the 1D extraction profile
+        result = minimize(self._chisq, [1., y_start, 1.],
+                          args=[x, profile, y_start],
+                          bounds=[(None, None), (y_start-5, y_start+5),
+                                  (0, y_max_width)])
+        y_mod = self._model(result["x"], x)
+        weights_collapsed = y_mod/np.sum(y_mod)
+        spec1d = self.rolling_extraction(wavs, spec2d, spec2d_err, result,
+                                         int((width_pix-1)/2),
+                                         weights_collapsed)
+        spec1d_mask = np.invert(np.isnan(spec1d[:, 1]))
+        spec1d = spec1d[spec1d_mask, :]
+        # Simplest 1D extraction from Adam's old code
+        #weights = np.expand_dims(y_mod/np.sum(y_mod), axis=1)
+        #extr = np.nansum(weights*spec2d/spec2d_err**2, axis=0)
+        #extr /= np.nansum(weights**2/spec2d_err**2, axis=0)
+        #extr_err = np.sqrt(1./np.nansum(weights**2/spec2d_err**2, axis=0))
+        #mask = (extr != 0.)
+        #spec1d = np.c_[wavs, extr, extr_err]
+        #spec1d = spec1d[mask, :]
+        return spec1d, result, profile
+    def post_pipeline_analysis(self):
+        s2d_file_list = glob.glob("s2d_files/*_s2d.fits")
+        for s2d_file in s2d_file_list:
+            # check if already done
+            if os.path.isfile("x1d_files/" + s2d_file.split('/')[-1][:-8]
+                              + "x1d.txt"):
+                continue
+            # Do 1D extraction
+            spec1d, result, profile = self.extract_1d(s2d_file)
+            # Make plot
+            reduced = datamodels.open(s2d_file)
+            # Units are inconsistent between output files, fix to cgs
+            photmjsr = reduced.meta.photometry.conversion_megajanskys
+            wavs = self._get_wavs(reduced)*10000
+            spec2d = reduced.data*10**-17*2.9979*10**18/wavs**2/photmjsr
+            spec2d_err = reduced.err*10**-17*2.9979*10**18/wavs**2/photmjsr
+            spec2d_err[spec2d_err == 0] = np.nan
+            x = np.arange(spec2d.shape[0])
+            plt.figure(figsize=(12, 5))
+            gs = mpl.gridspec.GridSpec(3, 7, hspace=0.1, wspace=0.1)
+            # profile plot in bottom right
+            profile = profile.astype('float64')
+            profile[np.isnan(profile)] = 0
+            ax_profile = plt.subplot(gs[-1,6])
+            ax_profile.stairs(profile, edges=[0.5]+list(x+0.5), color='k',
+                              orientation='horizontal')
+            ax_profile.axhline(result["x"][1], color="blue", lw=0.5, ls="--",
+                               label="actual extraction centroid")
+            ax_profile.set_yticklabels([])
+            # Save off 1D spectrum file
+            spec1d = spec1d[np.invert(np.isnan(spec1d[:, 1])), :]
+            np.savetxt("x1d_files/" + s2d_file.split('/')[1][:-8]+ "x1d.txt",
+                       spec1d)
+            # Plot 1D spectrum
+            ax = plt.subplot(gs[:-1,:6])
+            ax.plot(spec1d[:, 0], spec1d[:, 1]*10**19, color="dodgerblue")
+            ax.axhline(0, color='gray', ls='--', zorder=-1)
+            ax.fill_between(spec1d[:, 0], 0, spec1d[:, 2]*10**19,
+                            color='lightgray', zorder=-2)
+            ax.set_ylabel("$f_\lambda\ /\ \mathrm{10^{-19}"
+                          + "\ erg\ s^{-1}\ cm^{-2}\ \AA^{-1}}$")
+            ax.set_xlim(spec1d[0,0]-(spec1d[-1,0]-spec1d[0,0])*0.02,
+                        spec1d[-1,0]+(spec1d[-1,0]-spec1d[0,0])*0.02)
+            mask = ((spec1d[:, 1] < np.nanmedian(spec1d[:, 1])
+                     + 1.426*5*median_abs_deviation(spec1d[:, 1],
+                                                    nan_policy="omit"))
+                    & (spec1d[:, 1] > np.nanmedian(spec1d[:, 1])
+                       - 1.426*5*median_abs_deviation(spec1d[:, 1],
+                                                      nan_policy="omit")))
+            ymax = 1.2*10**19*np.nanmax(spec1d[mask, 1])
+            ax.set_ylim(-0.1*ymax, ymax)
+            ax.set_title(s2d_file.split('/')[-1])
+            ax.set_xticklabels([])
+            # 2D plot panel
+            ax2 = plt.subplot(gs[-1,:6])
+            vmin = -1.426*median_abs_deviation(spec2d.flatten(),
+                                               nan_policy="omit")
+            vmax = 1.426*3*median_abs_deviation(spec2d.flatten(),
+                                                nan_policy="omit")
+            ax2.pcolor(np.tile(wavs, (spec2d.shape[0],1)),
+                       np.tile(np.arange(spec2d.shape[0]),
+                               (spec2d.shape[1],1)).T, spec2d,
+                       vmin=vmin, vmax=vmax, cmap='hot')
+            ax2.set_xlabel("Wavelength / \AA")
+            ax2.set_xlim(ax.get_xlim())
+            ax2.axhline(result["x"][1], color="blue", lw=0.5, ls="--",
+                        label="actual extraction centroid")
+            ax2.set_facecolor('lightgray')
+            ax_profile.set_ylim(ax2.get_ylim())
+            plt.savefig("plots/" + s2d_file.split('/')[1][:-9] + ".pdf",
+                        bbox_inches="tight")

piratical-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,27 @@
+Metadata-Version: 2.4
+Name: piratical
+Version: 0.1.0
+Summary: Plunder spectra from the JWST NIRSpec archive
+Home-page: https://bagpipes.readthedocs.io
+Author: Adam Carnall, Ho-Hin Leung
+Author-email: adamc@roe.ac.uk
+Project-URL: GitHub, https://github.com/ACCarnall/piratical_pipeline
+Requires-Dist: numpy<=2.2
+Requires-Dist: pandas
+Requires-Dist: astropy
+Requires-Dist: matplotlib>=2.2.2
+Requires-Dist: scipy
+Requires-Dist: mastquery
+Requires-Dist: jwst
+Dynamic: author
+Dynamic: author-email
+Dynamic: description
+Dynamic: home-page
+Dynamic: project-url
+Dynamic: requires-dist
+Dynamic: summary
+NIRSpec Piratical Pipeline
+--------------------------
+Plunder spectra from the JWST archive.

piratical-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,6 @@
+piratical/__init__.py,sha256=NMLBUxJJapBBaC1KhootZcj-2hB5LJoruf8-ttvVOmc,41
+piratical/piratical.py,sha256=6Qqk_himX9ZeCYNlpRGBQMTi53uNobMmRuNIk2U1nWg,35198
+piratical-0.1.0.dist-info/METADATA,sha256=dE3ManXaq9cZOV0xPTdVqlkJDaN6i6wQjtp4M_Tkyg8,689
+piratical-0.1.0.dist-info/WHEEL,sha256=JNWh1Fm1UdwIQV075glCn4MVuCRs0sotJIq-J6rbxCU,109
+piratical-0.1.0.dist-info/top_level.txt,sha256=Puy2NkUhL3a-I6Izlc1u4sRDxwofL2OcvhkkrnA8KPA,10
+piratical-0.1.0.dist-info/RECORD,,

piratical-0.1.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,6 @@
+Wheel-Version: 1.0
+Generator: setuptools (80.9.0)
+Root-Is-Purelib: true
+Tag: py2-none-any
+Tag: py3-none-any

piratical-0.1.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ piratical