PyPI - isgri - Versions diffs - 0.6.1__py3-none-any.whl → 0.7.0__py3-none-any.whl - Mend

isgri 0.6.1py3-none-any.whl → 0.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

isgri/__version__.py +1 -1
isgri/catalog/__init__.py +2 -1
isgri/catalog/builder.py +358 -16
isgri/catalog/scwquery.py +5 -2
isgri/cli/__init__.py +1 -1
isgri/cli/main.py +185 -168
isgri/cli/query.py +210 -164
isgri/config.py +37 -9
isgri/utils/file_loaders.py +171 -30
isgri/utils/lightcurve.py +103 -13
isgri/utils/pif.py +14 -0
isgri/utils/quality.py +4 -3
{isgri-0.6.1.dist-info → isgri-0.7.0.dist-info}/METADATA +92 -29
isgri-0.7.0.dist-info/RECORD +21 -0
isgri-0.6.1.dist-info/RECORD +0 -21
{isgri-0.6.1.dist-info → isgri-0.7.0.dist-info}/WHEEL +0 -0
{isgri-0.6.1.dist-info → isgri-0.7.0.dist-info}/entry_points.txt +0 -0
{isgri-0.6.1.dist-info → isgri-0.7.0.dist-info}/licenses/LICENSE +0 -0

isgri/__version__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.6.1"
1	+ __version__ = "0.7.0"

isgri/catalog/__init__.py CHANGED Viewed

@@ -1,3 +1,4 @@
 from .scwquery import ScwQuery
+from .builder import CatalogBuilder
-__all__ = ["ScwQuery"]
+__all__ = ["ScwQuery", "CatalogBuilder"]

isgri/catalog/builder.py CHANGED Viewed

@@ -1,19 +1,124 @@
+"""ISGRI Catalog Builder
+======================
+Tools for building and updating INTEGRAL/ISGRI science window catalogs.
+Classes
+-------
+CatalogBuilder : Main catalog builder class
+Examples
+--------
+>>> from isgri.catalog.builder import CatalogBuilder
+>>>
+>>> # Create builder instance
+>>> builder = CatalogBuilder(
+...     archive_path="/path/to/archive",
+...     catalog_path="/path/to/catalog.fits",
+...     lightcurve_cache="/path/to/cache",
+...     n_cores=8
+... )
+>>>
+>>> # Update catalog with new science windows
+>>> builder.update_catalog()
+"""
 from isgri.utils import LightCurve, QualityMetrics
+from ..config import Config
 import numpy as np
-import os, subprocess
+import os, subprocess, glob
 from typing import Optional
 from joblib import Parallel, delayed  # type: ignore
 import multiprocessing
+from collections import defaultdict
+from astropy.table import Table, vstack
+from pathlib import Path
+new_catalog_names = [
+    "REVOL",
+    "SWID",
+    "TSTART",
+    "ONTIME",
+    "TSTOP",
+    "RA_SCX",
+    "DEC_SCX",
+    "RA_SCZ",
+    "DEC_SCZ",
+    "NoEVTS",
+    "CHI",
+    "CUT_CHI",
+    "GTI_CHI",
+]
+new_catalog_dtypes = ["i8", "S12", "f8", "f8", "f8", "f8", "f8", "f8", "f8", "i8", "f8", "f8", "f8"]
 class CatalogBuilder:
+    """ISGRI catalog builder and updater.
+    Processes INTEGRAL/ISGRI science windows to build catalogs containing
+    quality metrics, pointing information, and light curve data.
+    Parameters
+    ----------
+    archive_path : str, optional
+        Path to INTEGRAL archive directory. If None, uses config file.
+    catalog_path : str, optional
+        Path to catalog FITS file. If None, uses config file.
+    lightcurve_cache : str, optional
+        Path to directory for caching light curve arrays. If None, no caching.
+    n_cores : int, optional
+        Number of CPU cores for parallel processing. If None, uses all available cores.
+    Attributes
+    ----------
+    archive_path : str
+        Path to INTEGRAL archive
+    catalog_path : str
+        Path to catalog file
+    lightcurve_cache : str or None
+        Path to light curve cache directory
+    n_cores : int
+        Number of parallel workers
+    catalog : astropy.table.Table
+        Loaded catalog table
+    Examples
+    --------
+    >>> builder = CatalogBuilder(
+    ...     archive_path="/data/integral",
+    ...     catalog_path="catalog.fits",
+    ...     n_cores=4
+    ... )
+    >>> # Update catalog with new observations
+    >>> builder.update_catalog()
+    >>> # Find all science windows
+    >>> swids, paths = builder.find_scws()
+    >>> print(f"Found {len(swids)} science windows")
+    See Also
+    --------
+    ScwQuery : Query and filter catalog data
+    LightCurve : Light curve analysis
+    QualityMetrics : Quality metric computation
+    """
     def __init__(
         self,
-        archive_path: str,
-        catalog_path: str,
+        archive_path: Optional[str] = None,
+        catalog_path: Optional[str] = None,
         lightcurve_cache: Optional[str] = None,
         n_cores: Optional[int] = None,
     ):
+        if archive_path is None or catalog_path is None:
+            cfg = Config()
+            if archive_path is None:
+                archive_path = cfg.archive_path
+            if catalog_path is None:
+                catalog_path = cfg.catalog_path
+        if catalog_path is None:
+            raise FileNotFoundError("Catalog path must be specified either in arguments or config file.")
         self.archive_path = archive_path
         self.catalog_path = catalog_path
         self.lightcurve_cache = lightcurve_cache
@@ -21,25 +126,122 @@ class CatalogBuilder:
         self.catalog = self._load_catalog()
     def _load_catalog(self):
-        if not os.path.exists(self.catalog_path):
-            empty_structure = CatalogStructure.get_empty_structure()
-            return empty_structure
+        """Load existing catalog or create new empty catalog.
+        Returns
+        -------
+        astropy.table.Table
+            Loaded catalog table or new empty table.
+        Raises
+        ------
+        FileNotFoundError
+            If catalog directory does not exist.
+        """
+        catalog_path = Path(self.catalog_path)
+        if catalog_path.is_file():
+            return Table.read(catalog_path)
+        elif catalog_path.parent.is_dir():
+            print("Catalog file not found, creating new catalog.")
+            return Table(names=new_catalog_names, dtype=new_catalog_dtypes)
         else:
-            catalog = CatalogStructure.load_from_fits(self.catalog_path)
-            return catalog
+            raise FileNotFoundError(f"Directory for catalog does not exist: {catalog_path.parent}")
+    def _add_catalog_data(self, table_data_rows: list[dict]):
+        """Add new rows to catalog and save to disk.
+        Parameters
+        ----------
+        table_data_rows : list of dict
+            List of dictionaries containing catalog row data.
+            Each dict must have keys matching catalog column names.
+        Notes
+        -----
+        Updates are written atomically using a temporary file to prevent corruption.
+        The catalog is sorted by TSTART after adding new data.
+        """
+        new_data = Table(rows=table_data_rows, names=new_catalog_names, dtype=new_catalog_dtypes)
+        self.catalog = vstack([self.catalog, new_data])
+        self.catalog.sort("TSTART")
+        temp_catalog_path = Path(self.catalog_path).with_suffix(".tmp")
+        self.catalog.write(temp_catalog_path, overwrite=True, format="fits")
+        os.replace(temp_catalog_path, self.catalog_path)
+    def _add_array_data(self, rev: str, array_data: np.ndarray):
+        """Add light curve array data to cache for a revolution.
+        Parameters
+        ----------
+        rev : str
+            Revolution number (4-digit string, e.g., '0011').
+        array_data : ndarray
+            Structured array containing SWID, TIME, COUNTS, MODULE_COUNTS, and GTIS.
+        Raises
+        ------
+        ValueError
+            If lightcurve_cache path is not set.
+        Notes
+        -----
+        Merges new data with existing revolution data if present.
+        Saves as NumPy .npy file named by revolution number.
+        """
+        if self.lightcurve_cache is None:
+            raise ValueError("Lightcurve cache path is not set.")
+        file_path = Path(self.lightcurve_cache) / f"{int(rev):0>4}.npy"
+        if file_path.exists():
+            old_data = np.load(file_path, allow_pickle=True)
+            mask = ~np.isin(old_data["SWID"], array_data["SWID"])
+            array_data = np.concatenate([old_data[mask], array_data])
+        np.save(file_path, array_data)
     def _process_scw(self, path) -> tuple[dict, list]:
+        """Process a single science window and compute quality metrics.
+        Parameters
+        ----------
+        path : str
+            Path to ISGRI events FITS file.
+        Returns
+        -------
+        table_data : dict
+            Catalog row data containing metadata and quality metrics.
+        array_data : dict
+            Light curve data (time, counts, modules, GTIs).
+        Notes
+        -----
+        Computes three quality metrics:
+        - CHI: Raw chi-squared
+        - CUT_CHI: Sigma-clipped chi-squared
+        - GTI_CHI: GTI-filtered chi-squared (NaN if insufficient GTI coverage)
+        Light curves are binned at 1 second resolution in 15-1000 keV band.
+        """
+        event_file = os.path.join(path, "isgri_events.fits.gz")
+        if not os.path.exists(event_file):
+            return None, None
         lc = LightCurve.load_data(path)
         time, full_counts = lc.rebin(1, emin=15, emax=1000, local_time=False)
         _, module_counts = lc.rebin_by_modules(1, emin=15, emax=1000, local_time=False)
         module_counts.insert(0, full_counts)
         module_counts = np.array(module_counts)
-        quality = QualityMetrics.compute(lc)
+        quality = QualityMetrics(lc)
         quality.module_data = {"time": time, "counts": module_counts[1:]}
         raw_chisq = quality.raw_chi_squared()
         clipped_chisq = quality.sigma_clip_chi_squared()
-        gti_chisq = quality.gti_chi_squared()
+        try:
+            gti_chisq = quality.gti_chi_squared()
+        except ValueError:
+            gti_chisq = np.nan
         # cnames = [
         #     ("REVOL", int),
@@ -73,18 +275,158 @@ class CatalogBuilder:
             "CUT_CHI": clipped_chisq,
             "GTI_CHI": gti_chisq,
         }
-        array_data = [lc.metadata["SWID"], time, module_counts, lc.gti]
+        array_data = {
+            "SWID": lc.metadata["SWID"],
+            "TIME": time,
+            "COUNTS": full_counts,
+            "MODULE_COUNTS": module_counts[1:],
+            "GTIS": lc.gtis,
+        }
         return table_data, array_data
     def _process_rev(self, rev_paths: list[str]) -> tuple[list[dict], list[list]]:
+        """Process all science windows in a revolution in parallel.
+        Parameters
+        ----------
+        rev_paths : list of str
+            Paths to event files for all ScWs in revolution.
+        Returns
+        -------
+        table_data_list : list of dict
+            Catalog rows for all processed ScWs.
+        array_data : ndarray
+            Structured array of light curve data for all ScWs.
+        Notes
+        -----
+        Uses joblib for parallel processing across n_cores workers.
+        """
         data = Parallel(n_jobs=self.n_cores, backend="multiprocessing")(
             delayed(self._process_scw)(path) for path in rev_paths
         )
-        table_data_list, array_data_list = zip(*data)
-        return table_data_list, array_data_list
+        table_data_list, array_data_dicts = zip(*[d for d in data if d[0] is not None])
+        dtype = [("SWID", "U16"), ("TIME", "O"), ("COUNTS", "O"), ("MODULE_COUNTS", "O"), ("GTIS", "O")]
+        array_data = np.empty(len(array_data_dicts), dtype=dtype)
+        for i, d in enumerate(array_data_dicts):
+            array_data[i] = (d["SWID"], d["TIME"], d["COUNTS"], d["MODULE_COUNTS"], d["GTIS"])
+        return table_data_list, array_data
-    def _find_scws(self) -> tuple[np.ndarray[str], np.ndarray[str]]:
+    def find_scws(self) -> tuple[np.ndarray[str], np.ndarray[str]]:
+        """Find all science windows in the archive.
+        Returns
+        -------
+        swids : ndarray of str
+            Array of SWID identifiers (12 characters).
+        swid_paths : ndarray of str
+            Array of corresponding directory paths.
+        Notes
+        -----
+        Only includes ScWs matching pattern with '0.0' (Pointings, slews are omitted) in directory name.
+        Scans all revolution directories in archive_path.
+        """
         # Find all SCW files in the archive
-        scws_files = subprocess.run(
-            ["ls", f"{self.archive_path}/*", "|", "isgri_events.fits.gz"], capture_output=True, text=True
+        revolutions = os.scandir(self.archive_path)
+        swids, swid_paths = [], []
+        for rev in revolutions:
+            if not rev.is_dir():
+                continue
+            for scw in os.scandir(rev.path):
+                swid = scw.name
+                path = scw.path
+                if len(swid) == 16 and "0.0" in swid:
+                    swids.append(swid.split(".")[0])
+                    swid_paths.append(path)
+        return np.array(swids), np.array(swid_paths)
+    def find_event_files(
+        self, swids: np.ndarray[str], swid_paths: np.ndarray[str]
+    ) -> tuple[np.ndarray[str], np.ndarray[str]]:
+        """Filter science windows to those with event files.
+        Parameters
+        ----------
+        swids : ndarray of str
+            Array of SWID identifiers.
+        swid_paths : ndarray of str
+            Array of ScW directory paths.
+        Returns
+        -------
+        valid_swids : ndarray of str
+            SWIDs with existing event files.
+        valid_paths : ndarray of str
+            Paths to corresponding isgri_events.fits.gz files.
+        Notes
+        -----
+        Checks for existence of 'isgri_events.fits.gz' in each ScW directory.
+        """
+        def check_file(swid, path):
+            event_file = os.path.join(path, "isgri_events.fits.gz")
+            return (swid, event_file) if os.path.exists(event_file) else None
+        print("Checking for event files...")
+        results = Parallel(n_jobs=self.n_cores, backend="threading")(
+            delayed(check_file)(swid, path) for swid, path in zip(swids, swid_paths)
         )
+        valid_data = [r for r in results if r is not None]
+        if valid_data:
+            valid_swids, valid_paths = zip(*valid_data)
+            return np.array(valid_swids), np.array(valid_paths)
+        return np.array([]), np.array([])
+    def update_catalog(self):
+        """Update catalog with new science windows from archive.
+        Scans archive for new ScWs not present in catalog, processes them
+        in parallel by revolution, and adds results to catalog and cache.
+        Notes
+        -----
+        Processing workflow:
+        1. Find all ScWs in archive
+        2. Identify new ScWs not in catalog
+        3. Filter to ScWs with event files
+        4. Process by revolution in parallel
+        5. Add to catalog and light curve cache
+        Only ScWs with isgri_events.fits.gz files are processed.
+        Progress is printed for each revolution.
+        Examples
+        --------
+        >>> builder = CatalogBuilder()
+        >>> builder.update_catalog()
+        """
+        print("Looking for ScWs in archive...")
+        scws_in_archive, scws_paths = self.find_scws()
+        print(f"Found {len(scws_in_archive)} ScWs in archive.")
+        scws_in_catalog = np.array(self.catalog["SWID"], dtype=str)
+        mask = np.isin(scws_in_archive, scws_in_catalog, invert=True)
+        to_process_scws = scws_in_archive[mask]
+        to_process_paths = scws_paths[mask]
+        print(f"{len(to_process_scws)} ScWs not in catalog.")
+        # to_process_scws, to_process_paths = self.find_event_files(new_scws, new_paths)
+        # print(f"{len(to_process_scws)} ScWs have event files and will be processed.")
+        # if len(to_process_scws) == 0:
+        #     print("Exiting.")
+        #     return
+        revolutions = defaultdict(list)
+        for swid, path in zip(to_process_scws, to_process_paths):
+            revolutions[swid[:4]].append(path)
+        revolutions = dict(sorted(revolutions.items()))
+        for revolution, rev_paths in revolutions.items():
+            print(f"Processing revolution {revolution} with {len(rev_paths)} ScWs...")
+            table_data_rows, array_data_list = self._process_rev(rev_paths)
+            print(f"Adding {len(table_data_rows)} ScWs from revolution {revolution} to catalog.")
+            self._add_catalog_data(table_data_rows)
+            if self.lightcurve_cache is not None:
+                self._add_array_data(revolution, array_data_list)

isgri/catalog/scwquery.py CHANGED Viewed

@@ -130,7 +130,7 @@ class ScwQuery:
         max_chi : float, optional
             Maximum chi-squared value to accept
         chi_type : str, default "CHI"
-            Column name: "CHI", "CUT_CHI", or "GTI_CHI"
+            Column name: "CHI", "CUT", or "GTI"
         Returns
         -------
@@ -140,9 +140,12 @@ class ScwQuery:
         Examples
         --------
         >>> query.quality(max_chi=2.0)  # High quality data
-        >>> query.quality(max_chi=5.0, chi_type="CUT_CHI")  # Alternative metric
+        >>> query.quality(max_chi=5.0, chi_type="CUT")  # Alternative metric
         """
+        column_names = {"CHI": "CHI", "CUT": "CUT_CHI", "GTI": "GTI_CHI"}
+        chi_type = column_names.get(chi_type.upper(), chi_type)
         if chi_type not in self.catalog.colnames:
             raise ValueError(f"Column {chi_type} not found in catalog")

isgri/cli/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- from .main import main
1	+ from .main import main

isgri 0.6.1__py3-none-any.whl → 0.7.0__py3-none-any.whl

isgri 0.6.1py3-none-any.whl → 0.7.0py3-none-any.whl