isgri 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
isgri/utils/quality.py CHANGED
@@ -1,166 +1,389 @@
1
+ """
2
+ ISGRI Data Quality Metrics
3
+ ===========================
4
+
5
+ Statistical quality metrics for INTEGRAL/ISGRI light curves.
6
+
7
+ The main metric is reduced chi-squared (chisq/dof), which tests whether
8
+ count rates are consistent with Poisson statistics. Values near 1.0
9
+ indicate stable background and no variable sources.
10
+
11
+ Classes
12
+ -------
13
+ QualityMetrics : Compute chi-squared metrics for light curves
14
+
15
+ Examples
16
+ --------
17
+ >>> from isgri.utils import LightCurve, QualityMetrics
18
+ >>>
19
+ >>> # Load light curve
20
+ >>> lc = LightCurve.load_data("events.fits")
21
+ >>>
22
+ >>> # Compute quality metrics
23
+ >>> qm = QualityMetrics(lc, binsize=1.0, emin=20, emax=100)
24
+ >>> chi = qm.raw_chi_squared()
25
+ >>> print(f"Raw chisq/dof = {chi:.2f}")
26
+ >>>
27
+ >>> # Sigma-clipped (removes outliers)
28
+ >>> chi_clip = qm.sigma_clip_chi_squared(sigma=3.0)
29
+ >>> print(f"Clipped chisq/dof = {chi_clip:.2f}")
30
+ >>>
31
+ >>> # GTI-filtered (only good time intervals)
32
+ >>> chi_gti = qm.gti_chi_squared()
33
+ >>> print(f"GTI chisq/dof = {chi_gti:.2f}")
34
+
35
+ """
36
+
1
37
  import numpy as np
38
+ from numpy.typing import NDArray
39
+ from typing import Optional, Tuple, Union
2
40
  from .lightcurve import LightCurve
3
41
 
4
42
 
5
43
  class QualityMetrics:
6
44
  """
7
- A class for computing statistical quality metrics for ISGRI lightcurves.
8
-
9
- Attributes:
10
- lightcurve (LightCurve): The LightCurve instance to analyze.
11
- binsize (float): The bin size in seconds.
12
- emin (float): The minimum energy value in keV.
13
- emax (float): The maximum energy value in keV.
14
- local_time (bool): If True, uses local time. If False, uses IJD time. GTIs are always in IJD.
15
- module_data (dict): Cached rebinned data for all modules.
16
-
17
- Methods:
18
- raw_chi_squared: Computes raw reduced chi-squared.
19
- sigma_clip_chi_squared: Computes sigma-clipped reduced chi-squared.
20
- gti_chi_squared: Computes GTI-filtered reduced chi-squared.
45
+ Compute statistical quality metrics for ISGRI light curves.
46
+
47
+ Uses module-by-module light curves to compute chi-squared statistics.
48
+ Results are weighted by total counts per module.
49
+
50
+ Parameters
51
+ ----------
52
+ lightcurve : LightCurve, optional
53
+ LightCurve instance to analyze
54
+ binsize : float, default 1.0
55
+ Bin size in seconds
56
+ emin : float, default 1.0
57
+ Minimum energy in keV
58
+ emax : float, default 1000.0
59
+ Maximum energy in keV
60
+ local_time : bool, default False
61
+ If True, use local time (seconds from T0).
62
+ If False, use IJD time. GTIs are always in IJD.
63
+
64
+ Attributes
65
+ ----------
66
+ module_data : dict or None
67
+ Cached rebinned data {'time': array, 'counts': array}
68
+
69
+ Examples
70
+ --------
71
+ >>> lc = LightCurve.load_data("events.fits")
72
+ >>> qm = QualityMetrics(lc, binsize=1.0, emin=20, emax=100)
73
+ >>>
74
+ >>> # Compute various chi-squared metrics
75
+ >>> raw_chi = qm.raw_chi_squared()
76
+ >>> clip_chi = qm.sigma_clip_chi_squared(sigma=3.0)
77
+ >>> gti_chi = qm.gti_chi_squared()
78
+ >>>
79
+ >>> print(f"Raw: {raw_chi:.2f}, Clipped: {clip_chi:.2f}, GTI: {gti_chi:.2f}")
80
+
81
+ See Also
82
+ --------
83
+ raw_chi_squared : Basic chi-squared test
84
+ sigma_clip_chi_squared : Remove outliers before testing
85
+ gti_chi_squared : Test only good time intervals
21
86
  """
22
87
 
23
- def __init__(self, lightcurve: LightCurve | None = None, binsize=1.0, emin=1.0, emax=1000.0, local_time=False):
24
- """
25
- Initialize QualityMetrics instance.
88
+ def __init__(
89
+ self,
90
+ lightcurve: Optional[LightCurve] = None,
91
+ binsize: float = 1.0,
92
+ emin: float = 1.0,
93
+ emax: float = 1000.0,
94
+ local_time: bool = False,
95
+ ) -> None:
96
+ """Initialize QualityMetrics instance."""
97
+ if lightcurve is not None and not isinstance(lightcurve, LightCurve):
98
+ raise TypeError(f"lightcurve must be LightCurve instance or None, got {type(lightcurve)}")
26
99
 
27
- Args:
28
- lightcurve (LightCurve, optional): The LightCurve instance to analyze. Defaults to None.
29
- binsize (float, optional): The bin size in seconds. Defaults to 1.0.
30
- emin (float, optional): The minimum energy value in keV. Defaults to 1.0.
31
- emax (float, optional): The maximum energy value in keV. Defaults to 1000.0.
32
- local_time (bool, optional): If True, uses local time. If False, uses IJD time. Defaults to False.
100
+ if binsize <= 0:
101
+ raise ValueError(f"binsize must be positive, got {binsize}")
102
+
103
+ if emin >= emax:
104
+ raise ValueError(f"emin ({emin}) must be less than emax ({emax})")
105
+
106
+ if emin < 0:
107
+ raise ValueError(f"emin must be non-negative, got {emin}")
33
108
 
34
- Raises:
35
- TypeError: If lightcurve is not a LightCurve instance or None.
36
- """
37
- if type(lightcurve) not in [LightCurve, type(None)]:
38
- raise TypeError("lightcurve must be an instance of LightCurve or None")
39
109
  self.lightcurve = lightcurve
40
110
  self.binsize = binsize
41
111
  self.emin = emin
42
112
  self.emax = emax
43
113
  self.local_time = local_time
44
- self.module_data = None
114
+ self.module_data: Optional[dict] = None
45
115
 
46
- def _compute_counts(self):
116
+ def __repr__(self) -> str:
117
+ """String representation."""
118
+ return (
119
+ f"QualityMetrics(binsize={self.binsize}s, "
120
+ f"energy=({self.emin:.1f}-{self.emax:.1f}) keV, "
121
+ f"lightcurve={'set' if self.lightcurve else 'None'})"
122
+ )
123
+
124
+ def _compute_counts(self) -> dict:
47
125
  """
48
126
  Compute or retrieve cached rebinned counts for all modules.
49
127
 
50
- Args:
51
- None
52
- Returns:
53
- dict: Dictionary with 'time' and 'counts' arrays.
128
+ Returns
129
+ -------
130
+ dict
131
+ Dictionary with 'time' (ndarray) and 'counts' (ndarray, shape (8, n_bins))
54
132
 
55
- Raises:
56
- ValueError: If lightcurve is not set.
133
+ Raises
134
+ ------
135
+ ValueError
136
+ If lightcurve is not set
57
137
  """
58
138
  if self.lightcurve is None:
59
- raise ValueError("Lightcurve is not set.")
139
+ raise ValueError("Lightcurve must be set before computing counts")
140
+
60
141
  if self.module_data is not None:
61
142
  return self.module_data
143
+
62
144
  time, counts = self.lightcurve.rebin_by_modules(
63
- binsize=self.binsize, emin=self.emin, emax=self.emax, local_time=self.local_time
145
+ binsize=self.binsize,
146
+ emin=self.emin,
147
+ emax=self.emax,
148
+ local_time=self.local_time,
64
149
  )
65
- module_data = {"time": time, "counts": counts}
66
- self.module_data = module_data
67
- return module_data
68
150
 
69
- def _compute_chi_squared_red(self, counts, return_all=False):
151
+ self.module_data = {
152
+ "time": time,
153
+ "counts": np.asarray(counts), # Shape: (8, n_bins)
154
+ }
155
+
156
+ return self.module_data
157
+
158
+ def _compute_chi_squared_red(
159
+ self,
160
+ counts: NDArray[np.float64],
161
+ return_all: bool = False,
162
+ ) -> Union[float, Tuple[NDArray[np.float64], NDArray[np.int64], NDArray[np.float64]]]:
70
163
  """
71
164
  Compute reduced chi-squared for count data.
72
165
 
73
- Args:
74
- counts (ndarray): Count array(s) to analyze.
75
- return_all (bool, optional): If True, returns chi-squared for each array. If False, returns mean. Defaults to False.
166
+ Parameters
167
+ ----------
168
+ counts : ndarray
169
+ Count array(s). Shape: (n_modules, n_bins) or (n_bins,)
170
+ return_all : bool, default False
171
+ If True, return (chi_squared, dof, total_counts) per module.
172
+ If False, return weighted mean chi-squared.
76
173
 
77
- Returns:
78
- float or ndarray: Reduced chi-squared value(s).
174
+ Returns
175
+ -------
176
+ chi_squared_red : float
177
+ Weighted mean of chisq/dof across modules (if return_all=False)
178
+ chi_squared, dof, total_counts : tuple of ndarrays
179
+ Per-module statistics (if return_all=True)
180
+
181
+ Notes
182
+ -----
183
+ - Empty bins (counts=0) are treated as NaN and excluded
184
+ - DOF = (number of non-empty bins) - 1
185
+ - Weighting by total counts gives more influence to active modules
79
186
  """
80
187
  counts = np.asarray(counts)
188
+
189
+ # Replace zeros with NaN (exclude empty bins)
81
190
  counts = np.where(counts == 0, np.nan, counts)
191
+
192
+ # Compute mean and chi-squared per module
82
193
  mean_counts = np.nanmean(counts, axis=-1, keepdims=True)
83
194
  chi_squared = np.nansum((counts - mean_counts) ** 2 / mean_counts, axis=-1)
84
- dof = counts.shape[-1] - 1
195
+
196
+ # DOF = number of non-empty bins minus 1
197
+ nan_mask = ~np.isnan(counts)
198
+ dof = np.sum(nan_mask, axis=-1) - 1
199
+ total_counts = np.nansum(counts, axis=-1)
200
+
85
201
  if return_all:
86
- return chi_squared / dof
87
- return np.nanmean(chi_squared / dof)
202
+ return chi_squared, dof, total_counts
203
+
204
+ # Return weighted mean
205
+ if np.sum(total_counts) == 0 or np.all(dof <= 0):
206
+ return np.nan
207
+
208
+ # Weight by total counts (more counts = more reliable chi-squared)
209
+ valid_mask = dof > 0
210
+ chi_squared_red = chi_squared[valid_mask] / dof[valid_mask]
211
+ weights = total_counts[valid_mask]
212
+
213
+ return np.average(chi_squared_red, weights=weights)
88
214
 
89
- def raw_chi_squared(self, counts=None, return_all=False):
215
+ def raw_chi_squared(
216
+ self,
217
+ counts: Optional[NDArray[np.float64]] = None,
218
+ return_all: bool = False,
219
+ ) -> Union[float, Tuple[NDArray[np.float64], NDArray[np.int64], NDArray[np.float64]]]:
90
220
  """
91
- Computes raw reduced chi-squared for lightcurve data.
221
+ Compute raw reduced chi-squared (no filtering).
92
222
 
93
- Args:
94
- counts (ndarray, optional): Count array(s) to analyze. If None, uses cached module data. Defaults to None.
95
- return_all (bool, optional): If True, returns chi-squared for each module. If False, returns mean. Defaults to False.
223
+ Tests whether count rates are consistent with Poisson statistics.
224
+ Values near 1.0 indicate stable, constant background.
96
225
 
97
- Returns:
98
- float or ndarray: Reduced chi-squared value(s).
226
+ Parameters
227
+ ----------
228
+ counts : ndarray, optional
229
+ Count array(s) to analyze. If None, uses cached module data.
230
+ return_all : bool, default False
231
+ If True, return per-module results. If False, return weighted mean.
99
232
 
100
- Examples:
101
- >>> qm = QualityMetrics(lc, binsize=1.0, emin=30, emax=300)
102
- >>> chi = qm.raw_chi_squared()
103
- >>> chi_all_modules = qm.raw_chi_squared(return_all=True)
233
+ Returns
234
+ -------
235
+ chi_squared_red : float
236
+ Reduced chi-squared (chisq/dof)
237
+
238
+ Examples
239
+ --------
240
+ >>> qm = QualityMetrics(lc, binsize=1.0, emin=20, emax=100)
241
+ >>> chi = qm.raw_chi_squared()
242
+ >>> print(f"chisq/dof = {chi:.2f}")
243
+
244
+ >>> # Get per-module results
245
+ >>> chi_vals, dof, counts = qm.raw_chi_squared(return_all=True)
246
+ >>> for i, (c, d) in enumerate(zip(chi_vals, dof)):
247
+ ... print(f"Module {i}: chisq = {c:.1f}, dof = {d}")
104
248
  """
105
249
  if counts is None:
106
250
  counts = self._compute_counts()["counts"]
251
+
107
252
  return self._compute_chi_squared_red(counts, return_all=return_all)
108
253
 
109
- def sigma_clip_chi_squared(self, sigma=1.0, counts=None, return_all=False):
254
+ def sigma_clip_chi_squared(
255
+ self,
256
+ sigma: float = 3.0,
257
+ counts: Optional[NDArray[np.float64]] = None,
258
+ return_all: bool = False,
259
+ ) -> Union[float, Tuple[NDArray[np.float64], NDArray[np.int64], NDArray[np.float64]]]:
110
260
  """
111
- Computes sigma-clipped reduced chi-squared for lightcurve data.
261
+ Compute sigma-clipped reduced chi-squared.
262
+
263
+ Removes outlier bins (>sigma standard deviations from mean)
264
+ before computing chi-squared. Useful for detecting transient
265
+ flares or background instabilities.
266
+
267
+ Parameters
268
+ ----------
269
+ sigma : float, default 3.0
270
+ Sigma clipping threshold in standard deviations
271
+ counts : ndarray, optional
272
+ Count array(s) to analyze. If None, uses cached module data.
273
+ return_all : bool, default False
274
+ If True, return per-module results.
112
275
 
113
- Args:
114
- sigma (float, optional): Sigma clipping threshold in standard deviations. Defaults to 1.0.
115
- counts (ndarray, optional): Count array(s) to analyze. If None, uses cached module data. Defaults to None.
116
- return_all (bool, optional): If True, returns chi-squared for each module. If False, returns mean. Defaults to False.
276
+ Returns
277
+ -------
278
+ chi_squared_red : float
279
+ Reduced chi-squared after clipping outliers
117
280
 
118
- Returns:
119
- float or ndarray: Reduced chi-squared value(s) after sigma clipping.
281
+ Examples
282
+ --------
283
+ >>> qm = QualityMetrics(lc, binsize=1.0, emin=20, emax=100)
284
+ >>>
285
+ >>> # Conservative clipping (remove extreme outliers)
286
+ >>> chi_3sig = qm.sigma_clip_chi_squared(sigma=3.0)
287
+ >>>
288
+ >>> # Aggressive clipping (remove moderate outliers)
289
+ >>> chi_1sig = qm.sigma_clip_chi_squared(sigma=1.0)
290
+ >>>
291
+ >>> print(f"3sigma: {chi_3sig:.2f}, 1sigma: {chi_1sig:.2f}")
120
292
 
121
- Examples:
122
- >>> qm = QualityMetrics(lc, binsize=1.0, emin=30, emax=300)
123
- >>> chi = qm.sigma_clip_chi_squared(sigma=3.0)
293
+ Notes
294
+ -----
295
+ Lower chi-squared after clipping indicates presence of outliers
296
+ (flares, background jumps, etc.)
124
297
  """
298
+ if sigma <= 0:
299
+ raise ValueError(f"sigma must be positive, got {sigma}")
300
+
125
301
  if counts is None:
126
302
  counts = self._compute_counts()["counts"]
127
- mean_count = np.mean(counts, axis=-1, keepdims=True)
128
- std_count = np.std(counts, axis=-1, keepdims=True)
303
+
304
+ # Compute mean and std per module
305
+ mean_count = np.nanmean(counts, axis=-1, keepdims=True)
306
+ std_count = np.nanstd(counts, axis=-1, keepdims=True)
307
+
308
+ # Mask outliers
129
309
  mask = np.abs(counts - mean_count) < sigma * std_count
130
310
  filtered_counts = np.where(mask, counts, np.nan)
311
+
131
312
  return self._compute_chi_squared_red(filtered_counts, return_all=return_all)
132
313
 
133
- def gti_chi_squared(self, time=None, counts=None, gtis=None, return_all=False):
314
+ def gti_chi_squared(
315
+ self,
316
+ time: Optional[NDArray[np.float64]] = None,
317
+ counts: Optional[NDArray[np.float64]] = None,
318
+ gtis: Optional[NDArray[np.float64]] = None,
319
+ return_all: bool = False,
320
+ ) -> Union[float, Tuple[NDArray[np.float64], NDArray[np.int64], NDArray[np.float64]]]:
134
321
  """
135
- Computes GTI-filtered reduced chi-squared for lightcurve data.
322
+ Compute GTI-filtered reduced chi-squared.
323
+
324
+ Only uses bins within Good Time Intervals (GTIs).
325
+ Useful for excluding known bad data periods.
326
+
327
+ Parameters
328
+ ----------
329
+ time : ndarray, optional
330
+ Time array. If None, uses cached module data.
331
+ counts : ndarray, optional
332
+ Count array(s). If None, uses cached module data.
333
+ gtis : ndarray, optional
334
+ Good Time Intervals (N, 2) array in IJD.
335
+ If None, uses lightcurve.gtis.
336
+ return_all : bool, default False
337
+ If True, return per-module results.
136
338
 
137
- Args:
138
- time (ndarray, optional): Time array. If None, uses cached module data. Defaults to None.
139
- counts (ndarray, optional): Count array(s) to analyze. If None, uses cached module data. Defaults to None.
140
- gtis (ndarray, optional): Good Time Intervals (N, 2) array. If None, uses lightcurve GTIs. Defaults to None.
141
- return_all (bool, optional): If True, returns chi-squared for each module. If False, returns mean. Defaults to False.
339
+ Returns
340
+ -------
341
+ chi_squared_red : float
342
+ Reduced chi-squared within GTIs only
142
343
 
143
- Returns:
144
- float or ndarray: Reduced chi-squared value(s) within GTIs only.
344
+ Raises
345
+ ------
346
+ ValueError
347
+ If no overlap between GTIs and time range
145
348
 
146
- Raises:
147
- ValueError: If no overlap between GTIs and lightcurve time range.
349
+ Examples
350
+ --------
351
+ >>> qm = QualityMetrics(lc, binsize=1.0, emin=20, emax=100)
352
+ >>> chi_gti = qm.gti_chi_squared()
353
+ >>> print(f"GTI-filtered chisq/dof = {chi_gti:.2f}")
354
+ >>>
355
+ >>> # Use custom GTIs
356
+ >>> custom_gtis = np.array([[3000.0, 3100.0], [3200.0, 3300.0]])
357
+ >>> chi_custom = qm.gti_chi_squared(gtis=custom_gtis)
148
358
 
149
- Examples:
150
- >>> qm = QualityMetrics(lc, binsize=1.0, emin=30, emax=300)
151
- >>> chi = qm.gti_chi_squared()
359
+ Notes
360
+ -----
361
+ GTIs are always in IJD format, regardless of local_time setting.
362
+ Time array must be converted to IJD for comparison.
152
363
  """
153
364
  if counts is None or time is None:
154
365
  data = self._compute_counts()
155
366
  time, counts = data["time"], data["counts"]
367
+
156
368
  if gtis is None:
369
+ if self.lightcurve is None:
370
+ raise ValueError("Must provide gtis or set lightcurve")
157
371
  gtis = self.lightcurve.gtis
372
+
373
+ # Check for overlap
158
374
  if gtis[0, 0] > time[-1] or gtis[-1, 1] < time[0]:
159
375
  raise ValueError(
160
- "No overlap between GTIs and lightcurve time. If Lightcurve is set, verify time is in IJD."
376
+ f"No overlap between GTIs ({gtis[0,0]:.1f}-{gtis[-1,1]:.1f}) "
377
+ f"and time range ({time[0]:.1f}-{time[-1]:.1f}). "
378
+ "Verify time is in IJD format."
161
379
  )
380
+
381
+ # Create GTI mask
162
382
  gti_mask = np.zeros_like(time, dtype=bool)
163
383
  for gti_start, gti_stop in gtis:
164
384
  gti_mask |= (time >= gti_start) & (time <= gti_stop)
385
+
386
+ # Apply mask (set non-GTI bins to NaN)
165
387
  filtered_counts = np.where(gti_mask, counts, np.nan)
388
+
166
389
  return self._compute_chi_squared_red(filtered_counts, return_all=return_all)