PyPI - jacscanomaly - Versions diffs - 0.1.0__py3-none-any.whl - Mend

jacscanomaly 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

jacscanomaly/__init__.py +20 -0
jacscanomaly/config.py +85 -0
jacscanomaly/extract.py +134 -0
jacscanomaly/finder.py +225 -0
jacscanomaly/models.py +107 -0
jacscanomaly/plot.py +500 -0
jacscanomaly/pspl.py +158 -0
jacscanomaly/runner.py +267 -0
jacscanomaly/seasons.py +126 -0
jacscanomaly/utils.py +115 -0
jacscanomaly-0.1.0.dist-info/METADATA +180 -0
jacscanomaly-0.1.0.dist-info/RECORD +15 -0
jacscanomaly-0.1.0.dist-info/WHEEL +5 -0
jacscanomaly-0.1.0.dist-info/licenses/LICENSE +21 -0
jacscanomaly-0.1.0.dist-info/top_level.txt +1 -0

jacscanomaly/__init__.py ADDED Viewed

@@ -0,0 +1,20 @@
+# scanomaly/__init__.py
+from __future__ import annotations
+from jax import config as jax_config
+jax_config.update("jax_enable_x64", True)
+from .config import FinderConfig
+from .finder import Finder
+from .plot import AnomalyPlotter
+from .pspl import PSPLFitter, PSPLFitResult
+__all__ = [
+    "FinderConfig",
+    "Finder",
+    "AnomalyPlotter",
+    "PSPLFitter",
+    "PSPLFitResult",
+]
+__version__ = "0.1.0"

jacscanomaly/config.py ADDED Viewed

@@ -0,0 +1,85 @@
+from __future__ import annotations
+from dataclasses import dataclass
+@dataclass(frozen=True)
+class FinderConfig:
+    """
+    Configuration for :class:`scanomaly.finder.Finder`.
+    This dataclass contains *only* hyperparameters that control the behavior of the
+    anomaly search. It is intentionally dependency-free (no NumPy/JAX imports) and
+    frozen for reproducibility.
+    Parameter groups
+    ----------------
+    1) Season splitting:
+       Split the time series into seasons based on large time gaps.
+    2) Grid construction:
+       Build a (t0, teff) grid per season.
+    3) Grid scan:
+       Evaluate delta-chi2 on the grid within a local window.
+    4) Cluster extraction:
+       Group overlapping candidates and pick the best per cluster.
+    """
+    # ==================================================
+    # 1) Season splitting
+    # ==================================================
+    gap: float = 100.0
+    """Time gap threshold for splitting seasons. A new season starts when dt > gap."""
+    # ==================================================
+    # 2) Grid construction (t0, teff)
+    # ==================================================
+    teff_init: float = 0.03
+    """Initial teff value for the grid (first element of the geometric series)."""
+    common_ratio: float = 4.0 / 3.0
+    """Common ratio for the geometric series of teff values."""
+    teff_grid_n: int = 5
+    """Number of teff values in the grid."""
+    dt0_coeff: float = 0.17
+    """
+    Grid spacing coefficient for t0:
+        dt0 = dt0_coeff * teff
+    """
+    # ==================================================
+    # 3) Grid scan (local evaluation window)
+    # ==================================================
+    sigma: float = 3.0
+    """
+    Threshold parameter used in counting per-point chi2 improvement.
+    (Kept for compatibility with your original `n_out` logic.)
+    """
+    teff_coeff: float = 3.0
+    """
+    Window half-width multiplier in units of teff:
+        window = [t0 - teff_coeff*teff, t0 + teff_coeff*teff]
+    """
+    min_pts_in_window: int = 4
+    """Minimum number of data points required inside the window to evaluate a grid point."""
+    # ==================================================
+    # 4) Cluster extraction
+    # ==================================================
+    overlap_sigma: float = 3.0
+    """
+    Overlap threshold multiplier used to group nearby grid points into clusters:
+        |t0_i - t0_j| < overlap_sigma * (teff_i + teff_j)
+    """
+    min_cluster_points: int = 3
+    """
+    Stop extracting clusters when the number of remaining grid points becomes
+    smaller than this value.
+    """

jacscanomaly/extract.py ADDED Viewed

@@ -0,0 +1,134 @@
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import List, Tuple
+import numpy as np
+@dataclass
+class ResultExtractor:
+    """
+    Cluster extractor for grid-scan candidates.
+    Given arrays of (t0, teff, delta_chi2) evaluated on a grid,
+    this class groups overlapping candidates and returns one representative
+    (the maximum delta_chi2 point) per cluster.
+    Overlap definition
+    ------------------
+    Two candidates i and j are considered overlapping if:
+        |t0_i - t0_j| < sigma_overlap * (teff_i + teff_j)
+    Notes
+    -----
+    - This operates on CPU / NumPy arrays (no JAX).
+    - Returned `clusters` has shape (K, 3) with rows [t0_best, teff_best, dchi2_best].
+    """
+    sigma_overlap: float = 3.0
+    min_points: int = 3
+    def _overlap_with_max(
+        self,
+        t0: np.ndarray,
+        teff: np.ndarray,
+        dchi2: np.ndarray,
+    ) -> Tuple[np.ndarray, int]:
+        """
+        Compute the overlap mask around the current maximum dchi2 point.
+        Returns
+        -------
+        overlap_mask : np.ndarray of bool, shape (N,)
+            Mask selecting points overlapping with the maximum point.
+        i_max : int
+            Index of the maximum point within the provided arrays.
+        """
+        i_max = int(np.nanargmax(dchi2))
+        t0_max = t0[i_max]
+        teff_max = teff[i_max]
+        overlap_mask = np.abs(t0 - t0_max) < self.sigma_overlap * (teff + teff_max)
+        return overlap_mask, i_max
+    def iterative_anomaly_extraction(
+        self,
+        t0_list,
+        teff_list,
+        dchi2_list,
+    ) -> np.ndarray:
+        """
+        Iteratively extract non-overlapping clusters from grid results.
+        Parameters
+        ----------
+        t0_list, teff_list, dchi2_list
+            1D arrays (or array-like) of equal length.
+        Returns
+        -------
+        clusters : np.ndarray, shape (K, 3)
+            Each row is [t0, teff, dchi2] for the best (max dchi2) point
+            in each extracted cluster.
+            Returns an empty array with shape (0, 3) if nothing is extractable.
+        Stopping conditions
+        -------------------
+        - No remaining candidates.
+        - The best remaining candidate is non-finite.
+        - Remaining candidate count drops below `min_points`.
+        """
+        t0 = np.asarray(t0_list, dtype=float)
+        teff = np.asarray(teff_list, dtype=float)
+        dchi2 = np.asarray(dchi2_list, dtype=float)
+        if t0.size == 0:
+            return np.zeros((0, 3), dtype=float)
+        if not (t0.shape == teff.shape == dchi2.shape):
+            raise ValueError(
+                f"Input arrays must have the same shape, got "
+                f"t0={t0.shape}, teff={teff.shape}, dchi2={dchi2.shape}"
+            )
+        clusters: List[List[float]] = []
+        remaining = np.ones_like(dchi2, dtype=bool)
+        while True:
+            if not np.any(remaining):
+                break
+            # pick the best remaining point
+            dchi2_rem = np.where(remaining, dchi2, -np.inf)
+            i_max_global = int(np.argmax(dchi2_rem))
+            if not np.isfinite(dchi2[i_max_global]):
+                break
+            # overlap mask in the "compressed" remaining arrays
+            overlap_mask, _ = self._overlap_with_max(
+                t0[remaining], teff[remaining], dchi2[remaining]
+            )
+            # expand to full mask
+            full_mask = np.zeros_like(remaining)
+            full_mask[np.where(remaining)[0][overlap_mask]] = True
+            # choose the best representative in this cluster
+            cluster_dchi2 = dchi2[full_mask]
+            cluster_t0 = t0[full_mask]
+            cluster_teff = teff[full_mask]
+            i_local_max = int(np.argmax(cluster_dchi2))
+            clusters.append(
+                [float(cluster_t0[i_local_max]), float(cluster_teff[i_local_max]), float(cluster_dchi2[i_local_max])]
+            )
+            # remove this cluster from remaining
+            remaining &= ~full_mask
+            if int(np.sum(remaining)) < self.min_points:
+                break
+        return np.asarray(clusters, dtype=float)

jacscanomaly/finder.py ADDED Viewed

@@ -0,0 +1,225 @@
+# scanomaly/finder.py
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Optional
+import numpy as np
+import jax
+import jax.numpy as jnp
+from .config import FinderConfig
+from .pspl import PSPLFitter
+from .plot import AnomalyPlotter
+from .seasons import SeasonSplitter
+from .extract import ResultExtractor
+from .runner import SeasonGridRunner
+from .models import AnomalyResult, BestCandidate
+@dataclass
+class Finder:
+    """
+    Main entry point of scanomaly.
+    Finder performs:
+      1) PSPL fit on (time, flux, ferr)
+      2) season splitting
+      3) grid scan on PSPL residuals
+      4) cluster extraction
+      5) selection of the best anomaly candidate
+    Users typically call :meth:`run` and then pass the returned
+    :class:`scanomaly.models.AnomalyResult` to :class:`scanomaly.plot.AnomalyPlotter`.
+    """
+    # NOTE: use default_factory for dataclass fields (avoid shared mutable defaults)
+    config: FinderConfig = field(default_factory=FinderConfig)
+    # Allow dependency injection, but create defaults if None
+    fitter: Optional[PSPLFitter] = None
+    plotter: Optional[AnomalyPlotter] = None
+    def __post_init__(self) -> None:
+        if self.fitter is None:
+            self.fitter = PSPLFitter()
+        if self.plotter is None:
+            self.plotter = AnomalyPlotter()
+        splitter = SeasonSplitter(gap=self.config.gap)
+        extractor = ResultExtractor(
+            sigma_overlap=self.config.overlap_sigma,
+            min_points=self.config.min_cluster_points,
+        )
+        self.runner = SeasonGridRunner(
+            splitter=splitter,
+            extractor=extractor,
+            config=self.config,
+        )
+        _last_result: Optional[AnomalyResult] = field(default=None, init=False)
+    # ----------------------------
+    # Public APIs
+    # ----------------------------
+    def fit_pspl(self, time, flux, ferr, p0):
+        """
+        Convenience method: run PSPL fit only.
+        Returns
+        -------
+        PSPLFitResult
+            The PSPL fitting result (JAX arrays inside).
+        """
+        time_j, flux_j, ferr_j, p0_j, _time_np, _flux_np, _ferr_np = self._to_arrays(time, flux, ferr, p0)
+        return self.fitter.fit(time_j, flux_j, ferr_j, p0_j)
+    def run(
+        self,time,flux,ferr,p0,*,
+        verbose: bool = True,log: Optional[logging.Logger] = None,) -> AnomalyResult:
+        """
+        Run the full anomaly finding pipeline.
+        Parameters
+        ----------
+        time, flux, ferr : array-like
+            1D arrays. Stored in the output as NumPy arrays on CPU for fast plotting.
+        p0 : array-like
+            Initial PSPL parameters (t0, tE, u0).
+        Returns
+        -------
+        AnomalyResult
+            Includes PSPL fit, residuals, per-season cluster summaries,
+            flattened clusters, and the best candidate (if any).
+        """
+        time_j, flux_j, ferr_j, p0_j, time_np, flux_np, ferr_np = self._to_arrays(time, flux, ferr, p0)
+        # 1) PSPL fit (JAX)
+        fit = self.fitter.fit(time_j, flux_j, ferr_j, p0_j)
+        residual_j = fit.residual
+        model_flux_j = fit.model_flux
+        # bring to CPU for plotting/analysis
+        residual_np, model_flux_np, chi2_dof = jax.device_get((residual_j, model_flux_j, fit.chi2_dof))
+        residual_np = np.asarray(residual_np, dtype=float)
+        model_flux_np = np.asarray(model_flux_np, dtype=float)
+        chi2_dof = float(chi2_dof)
+        # 2-4) season loop & grid scan & extraction
+        seasons, clusters_all = self.runner.run(
+            time_j=time_j,
+            residual_j=residual_j,
+            ferr_j=ferr_j,
+            time_np=time_np,
+            verbose=verbose,
+            log=log,
+            )
+        # best candidate selection
+        best_obj = self._pick_best_candidate(clusters_all)
+        result = AnomalyResult(
+            time=time_np,
+            flux=flux_np,
+            ferr=ferr_np,
+            fit=fit,
+            residual=residual_np,
+            model_flux=model_flux_np,
+            chi2_dof=chi2_dof,
+            seasons=seasons,
+            clusters_all=clusters_all,
+            best=best_obj,
+        )
+        self._last_result = result
+        return result
+    # ----------------------------
+    # Internal helpers
+    # ----------------------------
+    def _to_arrays(self, time, flux, ferr, p0):
+        """Convert inputs into both NumPy (CPU) and JAX arrays."""
+        time_np = np.asarray(time, dtype=float)
+        flux_np = np.asarray(flux, dtype=float)
+        ferr_np = np.asarray(ferr, dtype=float)
+        if time_np.ndim != 1 or flux_np.ndim != 1 or ferr_np.ndim != 1:
+            raise ValueError("time/flux/ferr must be 1D arrays.")
+        if not (len(time_np) == len(flux_np) == len(ferr_np)):
+            raise ValueError("time/flux/ferr must have the same length.")
+        if np.any(~np.isfinite(time_np)) or np.any(~np.isfinite(flux_np)) or np.any(~np.isfinite(ferr_np)):
+            raise ValueError("time/flux/ferr must be finite.")
+        if np.any(ferr_np <= 0):
+            raise ValueError("ferr must be positive.")
+        time_j = jnp.asarray(time_np)
+        flux_j = jnp.asarray(flux_np)
+        ferr_j = jnp.asarray(ferr_np)
+        p0_j = jnp.asarray(p0, dtype=time_j.dtype)
+        return time_j, flux_j, ferr_j, p0_j, time_np, flux_np, ferr_np
+    def _pick_best_candidate(self, clusters_all: np.ndarray) -> Optional[BestCandidate]:
+        """
+        Pick the single best candidate from flattened clusters and compute a standardized score.
+        """
+        if clusters_all is None or clusters_all.size == 0 or clusters_all.shape[0] < 1:
+            return None
+        # clusters_all rows: [t0, teff, dchi2]
+        max_ind = int(np.argmax(clusters_all[:, 2]))
+        best = clusters_all[max_ind]
+        others = np.delete(clusters_all, max_ind, axis=0)
+        if others.shape[0] >= 2:
+            med = float(np.median(others[:, 2]))
+            std = float(np.std(others[:, 2]))
+            score = float((best[2] - med) / std) if std > 0 else float("inf")
+        else:
+            med, std, score = float("nan"), float("nan"), float("nan")
+        return BestCandidate(
+            t0=float(best[0]),
+            teff=float(best[1]),
+            dchi2=float(best[2]),
+            med_others=med,
+            std_others=std,
+            score=score,
+        )
+    # ----------------------------
+    # Plot sugar APIs
+    # ----------------------------
+    def _require_result(self) -> AnomalyResult:
+        if self._last_result is None:
+            raise RuntimeError("Finder.run() has not been called yet.")
+        return self._last_result
+    def plot_lc(self, **kwargs):
+        """
+        Plot light curve with PSPL model using the last result.
+        """
+        result = self._require_result()
+        return self.plotter.plot_lc(result, **kwargs)
+    def plot_residual(self, **kwargs):
+        """
+        Plot residuals using the last result.
+        """
+        result = self._require_result()
+        return self.plotter.plot_residual(result, **kwargs)
+    def plot_anomaly_window(self, **kwargs):
+        """
+        Plot residuals around the best anomaly window.
+        """
+        result = self._require_result()
+        return self.plotter.plot_anomaly_window(result, **kwargs)
+    def plot_result(self, **kwargs):
+        """
+        Full 3-panel diagnostic plot.
+        """
+        result = self._require_result()
+        return self.plotter.plot_result(result, **kwargs)

jacscanomaly/models.py ADDED Viewed

@@ -0,0 +1,107 @@
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import List, Optional
+import numpy as np
+from .pspl import PSPLFitResult
+@dataclass(frozen=True)
+class BestCandidate:
+    """
+    Best anomaly candidate selected from all extracted clusters.
+    Attributes
+    ----------
+    t0 : float
+        Candidate center time.
+    teff : float
+        Candidate effective timescale.
+    dchi2 : float
+        Improvement in chi-square: chi2_null - chi2_anom (larger is better).
+    med_others : float
+        Median dchi2 among all other candidates (excluding the best).
+    std_others : float
+        Standard deviation of dchi2 among all other candidates (excluding the best).
+    score : float
+        Standardized score of the best candidate:
+            (dchi2_best - med_others) / std_others
+        (may be NaN/inf depending on the number of candidates / std_others).
+    """
+    t0: float
+    teff: float
+    dchi2: float
+    med_others: float
+    std_others: float
+    score: float
+@dataclass(frozen=True)
+class SeasonSummary:
+    """
+    Summary of the anomaly scan for a single season.
+    Attributes
+    ----------
+    season_idx : int
+        0-based season index.
+    t_start, t_end : float
+        Time range of the season.
+    n_grid : int
+        Number of grid points evaluated in this season.
+    clusters : np.ndarray
+        Extracted clusters for this season, shape (K, 3) with rows [t0, teff, dchi2].
+    """
+    season_idx: int
+    t_start: float
+    t_end: float
+    n_grid: int
+    clusters: np.ndarray  # shape (K,3): [t0, teff, dchi2]
+@dataclass(frozen=True)
+class AnomalyResult:
+    """
+    Output of :meth:`scanomaly.finder.Finder.run`.
+    This object is designed to be convenient for plotting and downstream analysis.
+    Arrays are stored on CPU as NumPy arrays.
+    Attributes
+    ----------
+    time, flux, ferr : np.ndarray
+        Input light curve arrays (1D).
+    fit : PSPLFitResult
+        PSPL fitting result (contains params, fs, fb, chi2, model_flux, residual, etc.).
+    residual : np.ndarray
+        Flux residuals on CPU: flux - model_flux.
+    model_flux : np.ndarray
+        PSPL model flux on CPU.
+    chi2_dof : float
+        Reduced chi-square of the PSPL fit.
+    seasons : list[SeasonSummary]
+        Per-season summaries including clusters.
+    clusters_all : np.ndarray
+        Flattened clusters across all seasons, shape (N, 3) with rows [t0, teff, dchi2].
+    best : BestCandidate | None
+        Best candidate over all clusters, or None if no candidate exists.
+    """
+    # input (CPU numpy arrays for fast plotting)
+    time: np.ndarray
+    flux: np.ndarray
+    ferr: np.ndarray
+    # PSPL fit
+    fit: PSPLFitResult
+    residual: np.ndarray
+    model_flux: np.ndarray
+    chi2_dof: float
+    # grid/clusters
+    seasons: List[SeasonSummary]
+    clusters_all: np.ndarray  # shape (N,3)
+    # best candidate
+    best: Optional[BestCandidate]