isgri 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
isgri/utils/quality.py CHANGED
@@ -1,182 +1,389 @@
1
+ """
2
+ ISGRI Data Quality Metrics
3
+ ===========================
4
+
5
+ Statistical quality metrics for INTEGRAL/ISGRI light curves.
6
+
7
+ The main metric is reduced chi-squared (chisq/dof), which tests whether
8
+ count rates are consistent with Poisson statistics. Values near 1.0
9
+ indicate stable background and no variable sources.
10
+
11
+ Classes
12
+ -------
13
+ QualityMetrics : Compute chi-squared metrics for light curves
14
+
15
+ Examples
16
+ --------
17
+ >>> from isgri.utils import LightCurve, QualityMetrics
18
+ >>>
19
+ >>> # Load light curve
20
+ >>> lc = LightCurve.load_data("events.fits")
21
+ >>>
22
+ >>> # Compute quality metrics
23
+ >>> qm = QualityMetrics(lc, binsize=1.0, emin=20, emax=100)
24
+ >>> chi = qm.raw_chi_squared()
25
+ >>> print(f"Raw chisq/dof = {chi:.2f}")
26
+ >>>
27
+ >>> # Sigma-clipped (removes outliers)
28
+ >>> chi_clip = qm.sigma_clip_chi_squared(sigma=3.0)
29
+ >>> print(f"Clipped chisq/dof = {chi_clip:.2f}")
30
+ >>>
31
+ >>> # GTI-filtered (only good time intervals)
32
+ >>> chi_gti = qm.gti_chi_squared()
33
+ >>> print(f"GTI chisq/dof = {chi_gti:.2f}")
34
+
35
+ """
36
+
1
37
  import numpy as np
38
+ from numpy.typing import NDArray
39
+ from typing import Optional, Tuple, Union
2
40
  from .lightcurve import LightCurve
3
41
 
4
42
 
5
43
  class QualityMetrics:
6
44
  """
7
- A class for computing statistical quality metrics for ISGRI lightcurves.
8
-
9
- Attributes:
10
- lightcurve (LightCurve): The LightCurve instance to analyze.
11
- binsize (float): The bin size in seconds.
12
- emin (float): The minimum energy value in keV.
13
- emax (float): The maximum energy value in keV.
14
- local_time (bool): If True, uses local time. If False, uses IJD time. GTIs are always in IJD.
15
- module_data (dict): Cached rebinned data for all modules.
16
-
17
- Methods:
18
- raw_chi_squared: Computes raw reduced chi-squared.
19
- sigma_clip_chi_squared: Computes sigma-clipped reduced chi-squared.
20
- gti_chi_squared: Computes GTI-filtered reduced chi-squared.
45
+ Compute statistical quality metrics for ISGRI light curves.
46
+
47
+ Uses module-by-module light curves to compute chi-squared statistics.
48
+ Results are weighted by total counts per module.
49
+
50
+ Parameters
51
+ ----------
52
+ lightcurve : LightCurve, optional
53
+ LightCurve instance to analyze
54
+ binsize : float, default 1.0
55
+ Bin size in seconds
56
+ emin : float, default 1.0
57
+ Minimum energy in keV
58
+ emax : float, default 1000.0
59
+ Maximum energy in keV
60
+ local_time : bool, default False
61
+ If True, use local time (seconds from T0).
62
+ If False, use IJD time. GTIs are always in IJD.
63
+
64
+ Attributes
65
+ ----------
66
+ module_data : dict or None
67
+ Cached rebinned data {'time': array, 'counts': array}
68
+
69
+ Examples
70
+ --------
71
+ >>> lc = LightCurve.load_data("events.fits")
72
+ >>> qm = QualityMetrics(lc, binsize=1.0, emin=20, emax=100)
73
+ >>>
74
+ >>> # Compute various chi-squared metrics
75
+ >>> raw_chi = qm.raw_chi_squared()
76
+ >>> clip_chi = qm.sigma_clip_chi_squared(sigma=3.0)
77
+ >>> gti_chi = qm.gti_chi_squared()
78
+ >>>
79
+ >>> print(f"Raw: {raw_chi:.2f}, Clipped: {clip_chi:.2f}, GTI: {gti_chi:.2f}")
80
+
81
+ See Also
82
+ --------
83
+ raw_chi_squared : Basic chi-squared test
84
+ sigma_clip_chi_squared : Remove outliers before testing
85
+ gti_chi_squared : Test only good time intervals
21
86
  """
22
87
 
23
- def __init__(self, lightcurve: LightCurve | None = None, binsize=1.0, emin=1.0, emax=1000.0, local_time=False):
24
- """
25
- Initialize QualityMetrics instance.
88
+ def __init__(
89
+ self,
90
+ lightcurve: Optional[LightCurve] = None,
91
+ binsize: float = 1.0,
92
+ emin: float = 1.0,
93
+ emax: float = 1000.0,
94
+ local_time: bool = False,
95
+ ) -> None:
96
+ """Initialize QualityMetrics instance."""
97
+ if lightcurve is not None and not isinstance(lightcurve, LightCurve):
98
+ raise TypeError(f"lightcurve must be LightCurve instance or None, got {type(lightcurve)}")
26
99
 
27
- Args:
28
- lightcurve (LightCurve, optional): The LightCurve instance to analyze. Defaults to None.
29
- binsize (float, optional): The bin size in seconds. Defaults to 1.0.
30
- emin (float, optional): The minimum energy value in keV. Defaults to 1.0.
31
- emax (float, optional): The maximum energy value in keV. Defaults to 1000.0.
32
- local_time (bool, optional): If True, uses local time. If False, uses IJD time. Defaults to False.
100
+ if binsize <= 0:
101
+ raise ValueError(f"binsize must be positive, got {binsize}")
102
+
103
+ if emin >= emax:
104
+ raise ValueError(f"emin ({emin}) must be less than emax ({emax})")
105
+
106
+ if emin < 0:
107
+ raise ValueError(f"emin must be non-negative, got {emin}")
33
108
 
34
- Raises:
35
- TypeError: If lightcurve is not a LightCurve instance or None.
36
- """
37
- if type(lightcurve) not in [LightCurve, type(None)]:
38
- raise TypeError("lightcurve must be an instance of LightCurve or None")
39
109
  self.lightcurve = lightcurve
40
110
  self.binsize = binsize
41
111
  self.emin = emin
42
112
  self.emax = emax
43
113
  self.local_time = local_time
44
- self.module_data = None
114
+ self.module_data: Optional[dict] = None
115
+
116
+ def __repr__(self) -> str:
117
+ """String representation."""
118
+ return (
119
+ f"QualityMetrics(binsize={self.binsize}s, "
120
+ f"energy=({self.emin:.1f}-{self.emax:.1f}) keV, "
121
+ f"lightcurve={'set' if self.lightcurve else 'None'})"
122
+ )
45
123
 
46
- def _compute_counts(self):
124
+ def _compute_counts(self) -> dict:
47
125
  """
48
126
  Compute or retrieve cached rebinned counts for all modules.
49
127
 
50
- Args:
51
- None
52
- Returns:
53
- dict: Dictionary with 'time' and 'counts' arrays.
128
+ Returns
129
+ -------
130
+ dict
131
+ Dictionary with 'time' (ndarray) and 'counts' (ndarray, shape (8, n_bins))
54
132
 
55
- Raises:
56
- ValueError: If lightcurve is not set.
133
+ Raises
134
+ ------
135
+ ValueError
136
+ If lightcurve is not set
57
137
  """
58
138
  if self.lightcurve is None:
59
- raise ValueError("Lightcurve is not set.")
139
+ raise ValueError("Lightcurve must be set before computing counts")
140
+
60
141
  if self.module_data is not None:
61
142
  return self.module_data
143
+
62
144
  time, counts = self.lightcurve.rebin_by_modules(
63
- binsize=self.binsize, emin=self.emin, emax=self.emax, local_time=self.local_time
145
+ binsize=self.binsize,
146
+ emin=self.emin,
147
+ emax=self.emax,
148
+ local_time=self.local_time,
64
149
  )
65
- module_data = {"time": time, "counts": np.asarray(counts)}
66
- self.module_data = module_data
67
- return module_data
68
150
 
69
- def _compute_chi_squared_red(self, counts, return_all=False):
151
+ self.module_data = {
152
+ "time": time,
153
+ "counts": np.asarray(counts), # Shape: (8, n_bins)
154
+ }
155
+
156
+ return self.module_data
157
+
158
+ def _compute_chi_squared_red(
159
+ self,
160
+ counts: NDArray[np.float64],
161
+ return_all: bool = False,
162
+ ) -> Union[float, Tuple[NDArray[np.float64], NDArray[np.int64], NDArray[np.float64]]]:
70
163
  """
71
164
  Compute reduced chi-squared for count data.
72
165
 
73
- Args:
74
- counts (ndarray): Count array(s) to analyze. Shape: (n_modules, n_bins) or (n_bins,)
75
- return_all (bool, optional): If True, returns detailed results. If False, returns weighted mean. Defaults to False.
76
-
77
- Returns:
78
- If return_all=False:
79
- float: Weighted mean chi-squared (weighted by total counts per module)
80
- If return_all=True:
81
- tuple: (chi_squared, dof, no_counts) where:
82
- - chi_squared: Raw chi-squared values per module
83
- - dof: Degrees of freedom per module (n_bins - 1 excluding NaN)
84
- - no_counts: Total counts per module
166
+ Parameters
167
+ ----------
168
+ counts : ndarray
169
+ Count array(s). Shape: (n_modules, n_bins) or (n_bins,)
170
+ return_all : bool, default False
171
+ If True, return (chi_squared, dof, total_counts) per module.
172
+ If False, return weighted mean chi-squared.
173
+
174
+ Returns
175
+ -------
176
+ chi_squared_red : float
177
+ Weighted mean of chisq/dof across modules (if return_all=False)
178
+ chi_squared, dof, total_counts : tuple of ndarrays
179
+ Per-module statistics (if return_all=True)
180
+
181
+ Notes
182
+ -----
183
+ - Empty bins (counts=0) are treated as NaN and excluded
184
+ - DOF = (number of non-empty bins) - 1
185
+ - Weighting by total counts gives more influence to active modules
85
186
  """
86
187
  counts = np.asarray(counts)
188
+
189
+ # Replace zeros with NaN (exclude empty bins)
87
190
  counts = np.where(counts == 0, np.nan, counts)
191
+
192
+ # Compute mean and chi-squared per module
88
193
  mean_counts = np.nanmean(counts, axis=-1, keepdims=True)
89
194
  chi_squared = np.nansum((counts - mean_counts) ** 2 / mean_counts, axis=-1)
90
195
 
91
196
  # DOF = number of non-empty bins minus 1
92
197
  nan_mask = ~np.isnan(counts)
93
198
  dof = np.sum(nan_mask, axis=-1) - 1
94
- no_counts = np.nansum(counts, axis=-1)
199
+ total_counts = np.nansum(counts, axis=-1)
95
200
 
96
201
  if return_all:
97
- return chi_squared, dof, no_counts
202
+ return chi_squared, dof, total_counts
98
203
 
99
- if np.sum(no_counts) == 0 or np.all(dof <= 0):
204
+ # Return weighted mean
205
+ if np.sum(total_counts) == 0 or np.all(dof <= 0):
100
206
  return np.nan
101
-
102
- # Weight by total counts per module
103
- return np.average(chi_squared / dof, weights=no_counts)
104
207
 
105
- def raw_chi_squared(self, counts=None, return_all=False):
208
+ # Weight by total counts (more counts = more reliable chi-squared)
209
+ valid_mask = dof > 0
210
+ chi_squared_red = chi_squared[valid_mask] / dof[valid_mask]
211
+ weights = total_counts[valid_mask]
212
+
213
+ return np.average(chi_squared_red, weights=weights)
214
+
215
+ def raw_chi_squared(
216
+ self,
217
+ counts: Optional[NDArray[np.float64]] = None,
218
+ return_all: bool = False,
219
+ ) -> Union[float, Tuple[NDArray[np.float64], NDArray[np.int64], NDArray[np.float64]]]:
106
220
  """
107
- Computes raw reduced chi-squared for lightcurve data.
221
+ Compute raw reduced chi-squared (no filtering).
222
+
223
+ Tests whether count rates are consistent with Poisson statistics.
224
+ Values near 1.0 indicate stable, constant background.
108
225
 
109
- Args:
110
- counts (ndarray, optional): Count array(s) to analyze. If None, uses cached module data. Defaults to None.
111
- return_all (bool, optional): If True, returns chi-squared for each module. If False, returns mean. Defaults to False.
226
+ Parameters
227
+ ----------
228
+ counts : ndarray, optional
229
+ Count array(s) to analyze. If None, uses cached module data.
230
+ return_all : bool, default False
231
+ If True, return per-module results. If False, return weighted mean.
112
232
 
113
- Returns:
114
- float or ndarray: Reduced chi-squared value(s).
233
+ Returns
234
+ -------
235
+ chi_squared_red : float
236
+ Reduced chi-squared (chisq/dof)
115
237
 
116
- Examples:
117
- >>> qm = QualityMetrics(lc, binsize=1.0, emin=30, emax=300)
118
- >>> chi = qm.raw_chi_squared()
119
- >>> chi_all_modules = qm.raw_chi_squared(return_all=True)
238
+ Examples
239
+ --------
240
+ >>> qm = QualityMetrics(lc, binsize=1.0, emin=20, emax=100)
241
+ >>> chi = qm.raw_chi_squared()
242
+ >>> print(f"chisq/dof = {chi:.2f}")
243
+
244
+ >>> # Get per-module results
245
+ >>> chi_vals, dof, counts = qm.raw_chi_squared(return_all=True)
246
+ >>> for i, (c, d) in enumerate(zip(chi_vals, dof)):
247
+ ... print(f"Module {i}: chisq = {c:.1f}, dof = {d}")
120
248
  """
121
249
  if counts is None:
122
250
  counts = self._compute_counts()["counts"]
251
+
123
252
  return self._compute_chi_squared_red(counts, return_all=return_all)
124
253
 
125
- def sigma_clip_chi_squared(self, sigma=1.0, counts=None, return_all=False):
254
+ def sigma_clip_chi_squared(
255
+ self,
256
+ sigma: float = 3.0,
257
+ counts: Optional[NDArray[np.float64]] = None,
258
+ return_all: bool = False,
259
+ ) -> Union[float, Tuple[NDArray[np.float64], NDArray[np.int64], NDArray[np.float64]]]:
126
260
  """
127
- Computes sigma-clipped reduced chi-squared for lightcurve data.
261
+ Compute sigma-clipped reduced chi-squared.
262
+
263
+ Removes outlier bins (>sigma standard deviations from mean)
264
+ before computing chi-squared. Useful for detecting transient
265
+ flares or background instabilities.
128
266
 
129
- Args:
130
- sigma (float, optional): Sigma clipping threshold in standard deviations. Defaults to 1.0.
131
- counts (ndarray, optional): Count array(s) to analyze. If None, uses cached module data. Defaults to None.
132
- return_all (bool, optional): If True, returns chi-squared for each module. If False, returns mean. Defaults to False.
267
+ Parameters
268
+ ----------
269
+ sigma : float, default 3.0
270
+ Sigma clipping threshold in standard deviations
271
+ counts : ndarray, optional
272
+ Count array(s) to analyze. If None, uses cached module data.
273
+ return_all : bool, default False
274
+ If True, return per-module results.
133
275
 
134
- Returns:
135
- float or ndarray: Reduced chi-squared value(s) after sigma clipping.
276
+ Returns
277
+ -------
278
+ chi_squared_red : float
279
+ Reduced chi-squared after clipping outliers
136
280
 
137
- Examples:
138
- >>> qm = QualityMetrics(lc, binsize=1.0, emin=30, emax=300)
139
- >>> chi = qm.sigma_clip_chi_squared(sigma=3.0)
281
+ Examples
282
+ --------
283
+ >>> qm = QualityMetrics(lc, binsize=1.0, emin=20, emax=100)
284
+ >>>
285
+ >>> # Conservative clipping (remove extreme outliers)
286
+ >>> chi_3sig = qm.sigma_clip_chi_squared(sigma=3.0)
287
+ >>>
288
+ >>> # Aggressive clipping (remove moderate outliers)
289
+ >>> chi_1sig = qm.sigma_clip_chi_squared(sigma=1.0)
290
+ >>>
291
+ >>> print(f"3sigma: {chi_3sig:.2f}, 1sigma: {chi_1sig:.2f}")
292
+
293
+ Notes
294
+ -----
295
+ Lower chi-squared after clipping indicates presence of outliers
296
+ (flares, background jumps, etc.)
140
297
  """
298
+ if sigma <= 0:
299
+ raise ValueError(f"sigma must be positive, got {sigma}")
300
+
141
301
  if counts is None:
142
302
  counts = self._compute_counts()["counts"]
143
- mean_count = np.mean(counts, axis=-1, keepdims=True)
144
- std_count = np.std(counts, axis=-1, keepdims=True)
303
+
304
+ # Compute mean and std per module
305
+ mean_count = np.nanmean(counts, axis=-1, keepdims=True)
306
+ std_count = np.nanstd(counts, axis=-1, keepdims=True)
307
+
308
+ # Mask outliers
145
309
  mask = np.abs(counts - mean_count) < sigma * std_count
146
310
  filtered_counts = np.where(mask, counts, np.nan)
311
+
147
312
  return self._compute_chi_squared_red(filtered_counts, return_all=return_all)
148
313
 
149
- def gti_chi_squared(self, time=None, counts=None, gtis=None, return_all=False):
314
+ def gti_chi_squared(
315
+ self,
316
+ time: Optional[NDArray[np.float64]] = None,
317
+ counts: Optional[NDArray[np.float64]] = None,
318
+ gtis: Optional[NDArray[np.float64]] = None,
319
+ return_all: bool = False,
320
+ ) -> Union[float, Tuple[NDArray[np.float64], NDArray[np.int64], NDArray[np.float64]]]:
150
321
  """
151
- Computes GTI-filtered reduced chi-squared for lightcurve data.
322
+ Compute GTI-filtered reduced chi-squared.
323
+
324
+ Only uses bins within Good Time Intervals (GTIs).
325
+ Useful for excluding known bad data periods.
326
+
327
+ Parameters
328
+ ----------
329
+ time : ndarray, optional
330
+ Time array. If None, uses cached module data.
331
+ counts : ndarray, optional
332
+ Count array(s). If None, uses cached module data.
333
+ gtis : ndarray, optional
334
+ Good Time Intervals (N, 2) array in IJD.
335
+ If None, uses lightcurve.gtis.
336
+ return_all : bool, default False
337
+ If True, return per-module results.
152
338
 
153
- Args:
154
- time (ndarray, optional): Time array. If None, uses cached module data. Defaults to None.
155
- counts (ndarray, optional): Count array(s) to analyze. If None, uses cached module data. Defaults to None.
156
- gtis (ndarray, optional): Good Time Intervals (N, 2) array. If None, uses lightcurve GTIs. Defaults to None.
157
- return_all (bool, optional): If True, returns chi-squared for each module. If False, returns mean. Defaults to False.
339
+ Returns
340
+ -------
341
+ chi_squared_red : float
342
+ Reduced chi-squared within GTIs only
158
343
 
159
- Returns:
160
- float or ndarray: Reduced chi-squared value(s) within GTIs only.
344
+ Raises
345
+ ------
346
+ ValueError
347
+ If no overlap between GTIs and time range
161
348
 
162
- Raises:
163
- ValueError: If no overlap between GTIs and lightcurve time range.
349
+ Examples
350
+ --------
351
+ >>> qm = QualityMetrics(lc, binsize=1.0, emin=20, emax=100)
352
+ >>> chi_gti = qm.gti_chi_squared()
353
+ >>> print(f"GTI-filtered chisq/dof = {chi_gti:.2f}")
354
+ >>>
355
+ >>> # Use custom GTIs
356
+ >>> custom_gtis = np.array([[3000.0, 3100.0], [3200.0, 3300.0]])
357
+ >>> chi_custom = qm.gti_chi_squared(gtis=custom_gtis)
164
358
 
165
- Examples:
166
- >>> qm = QualityMetrics(lc, binsize=1.0, emin=30, emax=300)
167
- >>> chi = qm.gti_chi_squared()
359
+ Notes
360
+ -----
361
+ GTIs are always in IJD format, regardless of local_time setting.
362
+ Time array must be converted to IJD for comparison.
168
363
  """
169
364
  if counts is None or time is None:
170
365
  data = self._compute_counts()
171
366
  time, counts = data["time"], data["counts"]
367
+
172
368
  if gtis is None:
369
+ if self.lightcurve is None:
370
+ raise ValueError("Must provide gtis or set lightcurve")
173
371
  gtis = self.lightcurve.gtis
372
+
373
+ # Check for overlap
174
374
  if gtis[0, 0] > time[-1] or gtis[-1, 1] < time[0]:
175
375
  raise ValueError(
176
- "No overlap between GTIs and lightcurve time. If Lightcurve is set, verify time is in IJD."
376
+ f"No overlap between GTIs ({gtis[0,0]:.1f}-{gtis[-1,1]:.1f}) "
377
+ f"and time range ({time[0]:.1f}-{time[-1]:.1f}). "
378
+ "Verify time is in IJD format."
177
379
  )
380
+
381
+ # Create GTI mask
178
382
  gti_mask = np.zeros_like(time, dtype=bool)
179
383
  for gti_start, gti_stop in gtis:
180
384
  gti_mask |= (time >= gti_start) & (time <= gti_stop)
385
+
386
+ # Apply mask (set non-GTI bins to NaN)
181
387
  filtered_counts = np.where(gti_mask, counts, np.nan)
388
+
182
389
  return self._compute_chi_squared_red(filtered_counts, return_all=return_all)