isgri 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
isgri/utils/quality.py CHANGED
@@ -1,182 +1,389 @@
1
- import numpy as np
2
- from .lightcurve import LightCurve
3
-
4
-
5
- class QualityMetrics:
6
- """
7
- A class for computing statistical quality metrics for ISGRI lightcurves.
8
-
9
- Attributes:
10
- lightcurve (LightCurve): The LightCurve instance to analyze.
11
- binsize (float): The bin size in seconds.
12
- emin (float): The minimum energy value in keV.
13
- emax (float): The maximum energy value in keV.
14
- local_time (bool): If True, uses local time. If False, uses IJD time. GTIs are always in IJD.
15
- module_data (dict): Cached rebinned data for all modules.
16
-
17
- Methods:
18
- raw_chi_squared: Computes raw reduced chi-squared.
19
- sigma_clip_chi_squared: Computes sigma-clipped reduced chi-squared.
20
- gti_chi_squared: Computes GTI-filtered reduced chi-squared.
21
- """
22
-
23
- def __init__(self, lightcurve: LightCurve | None = None, binsize=1.0, emin=1.0, emax=1000.0, local_time=False):
24
- """
25
- Initialize QualityMetrics instance.
26
-
27
- Args:
28
- lightcurve (LightCurve, optional): The LightCurve instance to analyze. Defaults to None.
29
- binsize (float, optional): The bin size in seconds. Defaults to 1.0.
30
- emin (float, optional): The minimum energy value in keV. Defaults to 1.0.
31
- emax (float, optional): The maximum energy value in keV. Defaults to 1000.0.
32
- local_time (bool, optional): If True, uses local time. If False, uses IJD time. Defaults to False.
33
-
34
- Raises:
35
- TypeError: If lightcurve is not a LightCurve instance or None.
36
- """
37
- if type(lightcurve) not in [LightCurve, type(None)]:
38
- raise TypeError("lightcurve must be an instance of LightCurve or None")
39
- self.lightcurve = lightcurve
40
- self.binsize = binsize
41
- self.emin = emin
42
- self.emax = emax
43
- self.local_time = local_time
44
- self.module_data = None
45
-
46
- def _compute_counts(self):
47
- """
48
- Compute or retrieve cached rebinned counts for all modules.
49
-
50
- Args:
51
- None
52
- Returns:
53
- dict: Dictionary with 'time' and 'counts' arrays.
54
-
55
- Raises:
56
- ValueError: If lightcurve is not set.
57
- """
58
- if self.lightcurve is None:
59
- raise ValueError("Lightcurve is not set.")
60
- if self.module_data is not None:
61
- return self.module_data
62
- time, counts = self.lightcurve.rebin_by_modules(
63
- binsize=self.binsize, emin=self.emin, emax=self.emax, local_time=self.local_time
64
- )
65
- module_data = {"time": time, "counts": np.asarray(counts)}
66
- self.module_data = module_data
67
- return module_data
68
-
69
- def _compute_chi_squared_red(self, counts, return_all=False):
70
- """
71
- Compute reduced chi-squared for count data.
72
-
73
- Args:
74
- counts (ndarray): Count array(s) to analyze. Shape: (n_modules, n_bins) or (n_bins,)
75
- return_all (bool, optional): If True, returns detailed results. If False, returns weighted mean. Defaults to False.
76
-
77
- Returns:
78
- If return_all=False:
79
- float: Weighted mean chi-squared (weighted by total counts per module)
80
- If return_all=True:
81
- tuple: (chi_squared, dof, no_counts) where:
82
- - chi_squared: Raw chi-squared values per module
83
- - dof: Degrees of freedom per module (n_bins - 1 excluding NaN)
84
- - no_counts: Total counts per module
85
- """
86
- counts = np.asarray(counts)
87
- counts = np.where(counts == 0, np.nan, counts)
88
- mean_counts = np.nanmean(counts, axis=-1, keepdims=True)
89
- chi_squared = np.nansum((counts - mean_counts) ** 2 / mean_counts, axis=-1)
90
-
91
- # DOF = number of non-empty bins minus 1
92
- nan_mask = ~np.isnan(counts)
93
- dof = np.sum(nan_mask, axis=-1) - 1
94
- no_counts = np.nansum(counts, axis=-1)
95
-
96
- if return_all:
97
- return chi_squared, dof, no_counts
98
-
99
- if np.sum(no_counts) == 0 or np.all(dof <= 0):
100
- return np.nan
101
-
102
- # Weight by total counts per module
103
- return np.average(chi_squared / dof, weights=no_counts)
104
-
105
- def raw_chi_squared(self, counts=None, return_all=False):
106
- """
107
- Computes raw reduced chi-squared for lightcurve data.
108
-
109
- Args:
110
- counts (ndarray, optional): Count array(s) to analyze. If None, uses cached module data. Defaults to None.
111
- return_all (bool, optional): If True, returns chi-squared for each module. If False, returns mean. Defaults to False.
112
-
113
- Returns:
114
- float or ndarray: Reduced chi-squared value(s).
115
-
116
- Examples:
117
- >>> qm = QualityMetrics(lc, binsize=1.0, emin=30, emax=300)
118
- >>> chi = qm.raw_chi_squared()
119
- >>> chi_all_modules = qm.raw_chi_squared(return_all=True)
120
- """
121
- if counts is None:
122
- counts = self._compute_counts()["counts"]
123
- return self._compute_chi_squared_red(counts, return_all=return_all)
124
-
125
- def sigma_clip_chi_squared(self, sigma=1.0, counts=None, return_all=False):
126
- """
127
- Computes sigma-clipped reduced chi-squared for lightcurve data.
128
-
129
- Args:
130
- sigma (float, optional): Sigma clipping threshold in standard deviations. Defaults to 1.0.
131
- counts (ndarray, optional): Count array(s) to analyze. If None, uses cached module data. Defaults to None.
132
- return_all (bool, optional): If True, returns chi-squared for each module. If False, returns mean. Defaults to False.
133
-
134
- Returns:
135
- float or ndarray: Reduced chi-squared value(s) after sigma clipping.
136
-
137
- Examples:
138
- >>> qm = QualityMetrics(lc, binsize=1.0, emin=30, emax=300)
139
- >>> chi = qm.sigma_clip_chi_squared(sigma=3.0)
140
- """
141
- if counts is None:
142
- counts = self._compute_counts()["counts"]
143
- mean_count = np.mean(counts, axis=-1, keepdims=True)
144
- std_count = np.std(counts, axis=-1, keepdims=True)
145
- mask = np.abs(counts - mean_count) < sigma * std_count
146
- filtered_counts = np.where(mask, counts, np.nan)
147
- return self._compute_chi_squared_red(filtered_counts, return_all=return_all)
148
-
149
- def gti_chi_squared(self, time=None, counts=None, gtis=None, return_all=False):
150
- """
151
- Computes GTI-filtered reduced chi-squared for lightcurve data.
152
-
153
- Args:
154
- time (ndarray, optional): Time array. If None, uses cached module data. Defaults to None.
155
- counts (ndarray, optional): Count array(s) to analyze. If None, uses cached module data. Defaults to None.
156
- gtis (ndarray, optional): Good Time Intervals (N, 2) array. If None, uses lightcurve GTIs. Defaults to None.
157
- return_all (bool, optional): If True, returns chi-squared for each module. If False, returns mean. Defaults to False.
158
-
159
- Returns:
160
- float or ndarray: Reduced chi-squared value(s) within GTIs only.
161
-
162
- Raises:
163
- ValueError: If no overlap between GTIs and lightcurve time range.
164
-
165
- Examples:
166
- >>> qm = QualityMetrics(lc, binsize=1.0, emin=30, emax=300)
167
- >>> chi = qm.gti_chi_squared()
168
- """
169
- if counts is None or time is None:
170
- data = self._compute_counts()
171
- time, counts = data["time"], data["counts"]
172
- if gtis is None:
173
- gtis = self.lightcurve.gtis
174
- if gtis[0, 0] > time[-1] or gtis[-1, 1] < time[0]:
175
- raise ValueError(
176
- "No overlap between GTIs and lightcurve time. If Lightcurve is set, verify time is in IJD."
177
- )
178
- gti_mask = np.zeros_like(time, dtype=bool)
179
- for gti_start, gti_stop in gtis:
180
- gti_mask |= (time >= gti_start) & (time <= gti_stop)
181
- filtered_counts = np.where(gti_mask, counts, np.nan)
182
- return self._compute_chi_squared_red(filtered_counts, return_all=return_all)
1
+ """
2
+ ISGRI Data Quality Metrics
3
+ ===========================
4
+
5
+ Statistical quality metrics for INTEGRAL/ISGRI light curves.
6
+
7
+ The main metric is reduced chi-squared (chisq/dof), which tests whether
8
+ count rates are consistent with Poisson statistics. Values near 1.0
9
+ indicate stable background and no variable sources.
10
+
11
+ Classes
12
+ -------
13
+ QualityMetrics : Compute chi-squared metrics for light curves
14
+
15
+ Examples
16
+ --------
17
+ >>> from isgri.utils import LightCurve, QualityMetrics
18
+ >>>
19
+ >>> # Load light curve
20
+ >>> lc = LightCurve.load_data("events.fits")
21
+ >>>
22
+ >>> # Compute quality metrics
23
+ >>> qm = QualityMetrics(lc, binsize=1.0, emin=20, emax=100)
24
+ >>> chi = qm.raw_chi_squared()
25
+ >>> print(f"Raw chisq/dof = {chi:.2f}")
26
+ >>>
27
+ >>> # Sigma-clipped (removes outliers)
28
+ >>> chi_clip = qm.sigma_clip_chi_squared(sigma=3.0)
29
+ >>> print(f"Clipped chisq/dof = {chi_clip:.2f}")
30
+ >>>
31
+ >>> # GTI-filtered (only good time intervals)
32
+ >>> chi_gti = qm.gti_chi_squared()
33
+ >>> print(f"GTI chisq/dof = {chi_gti:.2f}")
34
+
35
+ """
36
+
37
+ import numpy as np
38
+ from numpy.typing import NDArray
39
+ from typing import Optional, Tuple, Union
40
+ from .lightcurve import LightCurve
41
+
42
+
43
+ class QualityMetrics:
44
+ """
45
+ Compute statistical quality metrics for ISGRI light curves.
46
+
47
+ Uses module-by-module light curves to compute chi-squared statistics.
48
+ Results are weighted by total counts per module.
49
+
50
+ Parameters
51
+ ----------
52
+ lightcurve : LightCurve, optional
53
+ LightCurve instance to analyze
54
+ binsize : float, default 1.0
55
+ Bin size in seconds
56
+ emin : float, default 1.0
57
+ Minimum energy in keV
58
+ emax : float, default 1000.0
59
+ Maximum energy in keV
60
+ local_time : bool, default False
61
+ If True, use local time (seconds from T0).
62
+ If False, use IJD time. GTIs are always in IJD.
63
+
64
+ Attributes
65
+ ----------
66
+ module_data : dict or None
67
+ Cached rebinned data {'time': array, 'counts': array}
68
+
69
+ Examples
70
+ --------
71
+ >>> lc = LightCurve.load_data("events.fits")
72
+ >>> qm = QualityMetrics(lc, binsize=1.0, emin=20, emax=100)
73
+ >>>
74
+ >>> # Compute various chi-squared metrics
75
+ >>> raw_chi = qm.raw_chi_squared()
76
+ >>> clip_chi = qm.sigma_clip_chi_squared(sigma=3.0)
77
+ >>> gti_chi = qm.gti_chi_squared()
78
+ >>>
79
+ >>> print(f"Raw: {raw_chi:.2f}, Clipped: {clip_chi:.2f}, GTI: {gti_chi:.2f}")
80
+
81
+ See Also
82
+ --------
83
+ raw_chi_squared : Basic chi-squared test
84
+ sigma_clip_chi_squared : Remove outliers before testing
85
+ gti_chi_squared : Test only good time intervals
86
+ """
87
+
88
+ def __init__(
89
+ self,
90
+ lightcurve: Optional[LightCurve] = None,
91
+ binsize: float = 1.0,
92
+ emin: float = 1.0,
93
+ emax: float = 1000.0,
94
+ local_time: bool = False,
95
+ ) -> None:
96
+ """Initialize QualityMetrics instance."""
97
+ if lightcurve is not None and not isinstance(lightcurve, LightCurve):
98
+ raise TypeError(f"lightcurve must be LightCurve instance or None, got {type(lightcurve)}")
99
+
100
+ if binsize <= 0:
101
+ raise ValueError(f"binsize must be positive, got {binsize}")
102
+
103
+ if emin >= emax:
104
+ raise ValueError(f"emin ({emin}) must be less than emax ({emax})")
105
+
106
+ if emin < 0:
107
+ raise ValueError(f"emin must be non-negative, got {emin}")
108
+
109
+ self.lightcurve = lightcurve
110
+ self.binsize = binsize
111
+ self.emin = emin
112
+ self.emax = emax
113
+ self.local_time = local_time
114
+ self.module_data: Optional[dict] = None
115
+
116
+ def __repr__(self) -> str:
117
+ """String representation."""
118
+ return (
119
+ f"QualityMetrics(binsize={self.binsize}s, "
120
+ f"energy=({self.emin:.1f}-{self.emax:.1f}) keV, "
121
+ f"lightcurve={'set' if self.lightcurve else 'None'})"
122
+ )
123
+
124
+ def _compute_counts(self) -> dict:
125
+ """
126
+ Compute or retrieve cached rebinned counts for all modules.
127
+
128
+ Returns
129
+ -------
130
+ dict
131
+ Dictionary with 'time' (ndarray) and 'counts' (ndarray, shape (8, n_bins))
132
+
133
+ Raises
134
+ ------
135
+ ValueError
136
+ If lightcurve is not set
137
+ """
138
+ if self.lightcurve is None:
139
+ raise ValueError("Lightcurve must be set before computing counts")
140
+
141
+ if self.module_data is not None:
142
+ return self.module_data
143
+
144
+ time, counts = self.lightcurve.rebin_by_modules(
145
+ binsize=self.binsize,
146
+ emin=self.emin,
147
+ emax=self.emax,
148
+ local_time=self.local_time,
149
+ )
150
+
151
+ self.module_data = {
152
+ "time": time,
153
+ "counts": np.asarray(counts), # Shape: (8, n_bins)
154
+ }
155
+
156
+ return self.module_data
157
+
158
+ def _compute_chi_squared_red(
159
+ self,
160
+ counts: NDArray[np.float64],
161
+ return_all: bool = False,
162
+ ) -> Union[float, Tuple[NDArray[np.float64], NDArray[np.int64], NDArray[np.float64]]]:
163
+ """
164
+ Compute reduced chi-squared for count data.
165
+
166
+ Parameters
167
+ ----------
168
+ counts : ndarray
169
+ Count array(s). Shape: (n_modules, n_bins) or (n_bins,)
170
+ return_all : bool, default False
171
+ If True, return (chi_squared, dof, total_counts) per module.
172
+ If False, return weighted mean chi-squared.
173
+
174
+ Returns
175
+ -------
176
+ chi_squared_red : float
177
+ Weighted mean of chisq/dof across modules (if return_all=False)
178
+ chi_squared, dof, total_counts : tuple of ndarrays
179
+ Per-module statistics (if return_all=True)
180
+
181
+ Notes
182
+ -----
183
+ - Empty bins (counts=0) are treated as NaN and excluded
184
+ - DOF = (number of non-empty bins) - 1
185
+ - Weighting by total counts gives more influence to active modules
186
+ """
187
+ counts = np.asarray(counts)
188
+
189
+ # Replace zeros with NaN (exclude empty bins)
190
+ counts = np.where(counts == 0, np.nan, counts)
191
+
192
+ # Compute mean and chi-squared per module
193
+ mean_counts = np.nanmean(counts, axis=-1, keepdims=True)
194
+ chi_squared = np.nansum((counts - mean_counts) ** 2 / mean_counts, axis=-1)
195
+
196
+ # DOF = number of non-empty bins minus 1
197
+ nan_mask = ~np.isnan(counts)
198
+ dof = np.sum(nan_mask, axis=-1) - 1
199
+ total_counts = np.nansum(counts, axis=-1)
200
+
201
+ if return_all:
202
+ return chi_squared, dof, total_counts
203
+
204
+ # Return weighted mean
205
+ if np.sum(total_counts) == 0 or np.all(dof <= 0):
206
+ return np.nan
207
+
208
+ # Weight by total counts (more counts = more reliable chi-squared)
209
+ valid_mask = dof > 0
210
+ chi_squared_red = chi_squared[valid_mask] / dof[valid_mask]
211
+ weights = total_counts[valid_mask]
212
+
213
+ return np.average(chi_squared_red, weights=weights)
214
+
215
+ def raw_chi_squared(
216
+ self,
217
+ counts: Optional[NDArray[np.float64]] = None,
218
+ return_all: bool = False,
219
+ ) -> Union[float, Tuple[NDArray[np.float64], NDArray[np.int64], NDArray[np.float64]]]:
220
+ """
221
+ Compute raw reduced chi-squared (no filtering).
222
+
223
+ Tests whether count rates are consistent with Poisson statistics.
224
+ Values near 1.0 indicate stable, constant background.
225
+
226
+ Parameters
227
+ ----------
228
+ counts : ndarray, optional
229
+ Count array(s) to analyze. If None, uses cached module data.
230
+ return_all : bool, default False
231
+ If True, return per-module results. If False, return weighted mean.
232
+
233
+ Returns
234
+ -------
235
+ chi_squared_red : float
236
+ Reduced chi-squared (chisq/dof)
237
+
238
+ Examples
239
+ --------
240
+ >>> qm = QualityMetrics(lc, binsize=1.0, emin=20, emax=100)
241
+ >>> chi = qm.raw_chi_squared()
242
+ >>> print(f"chisq/dof = {chi:.2f}")
243
+
244
+ >>> # Get per-module results
245
+ >>> chi_vals, dof, counts = qm.raw_chi_squared(return_all=True)
246
+ >>> for i, (c, d) in enumerate(zip(chi_vals, dof)):
247
+ ... print(f"Module {i}: chisq = {c:.1f}, dof = {d}")
248
+ """
249
+ if counts is None:
250
+ counts = self._compute_counts()["counts"]
251
+
252
+ return self._compute_chi_squared_red(counts, return_all=return_all)
253
+
254
+ def sigma_clip_chi_squared(
255
+ self,
256
+ sigma: float = 3.0,
257
+ counts: Optional[NDArray[np.float64]] = None,
258
+ return_all: bool = False,
259
+ ) -> Union[float, Tuple[NDArray[np.float64], NDArray[np.int64], NDArray[np.float64]]]:
260
+ """
261
+ Compute sigma-clipped reduced chi-squared.
262
+
263
+ Removes outlier bins (>sigma standard deviations from mean)
264
+ before computing chi-squared. Useful for detecting transient
265
+ flares or background instabilities.
266
+
267
+ Parameters
268
+ ----------
269
+ sigma : float, default 3.0
270
+ Sigma clipping threshold in standard deviations
271
+ counts : ndarray, optional
272
+ Count array(s) to analyze. If None, uses cached module data.
273
+ return_all : bool, default False
274
+ If True, return per-module results.
275
+
276
+ Returns
277
+ -------
278
+ chi_squared_red : float
279
+ Reduced chi-squared after clipping outliers
280
+
281
+ Examples
282
+ --------
283
+ >>> qm = QualityMetrics(lc, binsize=1.0, emin=20, emax=100)
284
+ >>>
285
+ >>> # Conservative clipping (remove extreme outliers)
286
+ >>> chi_3sig = qm.sigma_clip_chi_squared(sigma=3.0)
287
+ >>>
288
+ >>> # Aggressive clipping (remove moderate outliers)
289
+ >>> chi_1sig = qm.sigma_clip_chi_squared(sigma=1.0)
290
+ >>>
291
+ >>> print(f"3sigma: {chi_3sig:.2f}, 1sigma: {chi_1sig:.2f}")
292
+
293
+ Notes
294
+ -----
295
+ Lower chi-squared after clipping indicates presence of outliers
296
+ (flares, background jumps, etc.)
297
+ """
298
+ if sigma <= 0:
299
+ raise ValueError(f"sigma must be positive, got {sigma}")
300
+
301
+ if counts is None:
302
+ counts = self._compute_counts()["counts"]
303
+
304
+ # Compute mean and std per module
305
+ mean_count = np.nanmean(counts, axis=-1, keepdims=True)
306
+ std_count = np.nanstd(counts, axis=-1, keepdims=True)
307
+
308
+ # Mask outliers
309
+ mask = np.abs(counts - mean_count) < sigma * std_count
310
+ filtered_counts = np.where(mask, counts, np.nan)
311
+
312
+ return self._compute_chi_squared_red(filtered_counts, return_all=return_all)
313
+
314
+ def gti_chi_squared(
315
+ self,
316
+ time: Optional[NDArray[np.float64]] = None,
317
+ counts: Optional[NDArray[np.float64]] = None,
318
+ gtis: Optional[NDArray[np.float64]] = None,
319
+ return_all: bool = False,
320
+ ) -> Union[float, Tuple[NDArray[np.float64], NDArray[np.int64], NDArray[np.float64]]]:
321
+ """
322
+ Compute GTI-filtered reduced chi-squared.
323
+
324
+ Only uses bins within Good Time Intervals (GTIs).
325
+ Useful for excluding known bad data periods.
326
+
327
+ Parameters
328
+ ----------
329
+ time : ndarray, optional
330
+ Time array. If None, uses cached module data.
331
+ counts : ndarray, optional
332
+ Count array(s). If None, uses cached module data.
333
+ gtis : ndarray, optional
334
+ Good Time Intervals (N, 2) array in IJD.
335
+ If None, uses lightcurve.gtis.
336
+ return_all : bool, default False
337
+ If True, return per-module results.
338
+
339
+ Returns
340
+ -------
341
+ chi_squared_red : float
342
+ Reduced chi-squared within GTIs only
343
+
344
+ Raises
345
+ ------
346
+ ValueError
347
+ If no overlap between GTIs and time range
348
+
349
+ Examples
350
+ --------
351
+ >>> qm = QualityMetrics(lc, binsize=1.0, emin=20, emax=100)
352
+ >>> chi_gti = qm.gti_chi_squared()
353
+ >>> print(f"GTI-filtered chisq/dof = {chi_gti:.2f}")
354
+ >>>
355
+ >>> # Use custom GTIs
356
+ >>> custom_gtis = np.array([[3000.0, 3100.0], [3200.0, 3300.0]])
357
+ >>> chi_custom = qm.gti_chi_squared(gtis=custom_gtis)
358
+
359
+ Notes
360
+ -----
361
+ GTIs are always in IJD format, regardless of local_time setting.
362
+ Time array must be converted to IJD for comparison.
363
+ """
364
+ if counts is None or time is None:
365
+ data = self._compute_counts()
366
+ time, counts = data["time"], data["counts"]
367
+
368
+ if gtis is None:
369
+ if self.lightcurve is None:
370
+ raise ValueError("Must provide gtis or set lightcurve")
371
+ gtis = self.lightcurve.gtis
372
+
373
+ # Check for overlap
374
+ if gtis[0, 0] > time[-1] or gtis[-1, 1] < time[0]:
375
+ raise ValueError(
376
+ f"No overlap between GTIs ({gtis[0,0]:.1f}-{gtis[-1,1]:.1f}) "
377
+ f"and time range ({time[0]:.1f}-{time[-1]:.1f}). "
378
+ "Verify time is in IJD format."
379
+ )
380
+
381
+ # Create GTI mask
382
+ gti_mask = np.zeros_like(time, dtype=bool)
383
+ for gti_start, gti_stop in gtis:
384
+ gti_mask |= (time >= gti_start) & (time <= gti_stop)
385
+
386
+ # Apply mask (set non-GTI bins to NaN)
387
+ filtered_counts = np.where(gti_mask, counts, np.nan)
388
+
389
+ return self._compute_chi_squared_red(filtered_counts, return_all=return_all)