isgri 0.6.1__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
isgri/__version__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.6.1"
1
+ __version__ = "0.7.0"
isgri/catalog/__init__.py CHANGED
@@ -1,3 +1,4 @@
1
1
  from .scwquery import ScwQuery
2
+ from .builder import CatalogBuilder
2
3
 
3
- __all__ = ["ScwQuery"]
4
+ __all__ = ["ScwQuery", "CatalogBuilder"]
isgri/catalog/builder.py CHANGED
@@ -1,19 +1,124 @@
1
+ """ISGRI Catalog Builder
2
+ ======================
3
+
4
+ Tools for building and updating INTEGRAL/ISGRI science window catalogs.
5
+
6
+ Classes
7
+ -------
8
+ CatalogBuilder : Main catalog builder class
9
+
10
+ Examples
11
+ --------
12
+ >>> from isgri.catalog.builder import CatalogBuilder
13
+ >>>
14
+ >>> # Create builder instance
15
+ >>> builder = CatalogBuilder(
16
+ ... archive_path="/path/to/archive",
17
+ ... catalog_path="/path/to/catalog.fits",
18
+ ... lightcurve_cache="/path/to/cache",
19
+ ... n_cores=8
20
+ ... )
21
+ >>>
22
+ >>> # Update catalog with new science windows
23
+ >>> builder.update_catalog()
24
+ """
25
+
1
26
  from isgri.utils import LightCurve, QualityMetrics
27
+ from ..config import Config
2
28
  import numpy as np
3
- import os, subprocess
29
+ import os, subprocess, glob
4
30
  from typing import Optional
5
31
  from joblib import Parallel, delayed # type: ignore
6
32
  import multiprocessing
33
+ from collections import defaultdict
34
+ from astropy.table import Table, vstack
35
+ from pathlib import Path
36
+
37
+ new_catalog_names = [
38
+ "REVOL",
39
+ "SWID",
40
+ "TSTART",
41
+ "ONTIME",
42
+ "TSTOP",
43
+ "RA_SCX",
44
+ "DEC_SCX",
45
+ "RA_SCZ",
46
+ "DEC_SCZ",
47
+ "NoEVTS",
48
+ "CHI",
49
+ "CUT_CHI",
50
+ "GTI_CHI",
51
+ ]
52
+ new_catalog_dtypes = ["i8", "S12", "f8", "f8", "f8", "f8", "f8", "f8", "f8", "i8", "f8", "f8", "f8"]
7
53
 
8
54
 
9
55
  class CatalogBuilder:
56
+ """ISGRI catalog builder and updater.
57
+
58
+ Processes INTEGRAL/ISGRI science windows to build catalogs containing
59
+ quality metrics, pointing information, and light curve data.
60
+
61
+ Parameters
62
+ ----------
63
+ archive_path : str, optional
64
+ Path to INTEGRAL archive directory. If None, uses config file.
65
+ catalog_path : str, optional
66
+ Path to catalog FITS file. If None, uses config file.
67
+ lightcurve_cache : str, optional
68
+ Path to directory for caching light curve arrays. If None, no caching.
69
+ n_cores : int, optional
70
+ Number of CPU cores for parallel processing. If None, uses all available cores.
71
+
72
+ Attributes
73
+ ----------
74
+ archive_path : str
75
+ Path to INTEGRAL archive
76
+ catalog_path : str
77
+ Path to catalog file
78
+ lightcurve_cache : str or None
79
+ Path to light curve cache directory
80
+ n_cores : int
81
+ Number of parallel workers
82
+ catalog : astropy.table.Table
83
+ Loaded catalog table
84
+
85
+ Examples
86
+ --------
87
+ >>> builder = CatalogBuilder(
88
+ ... archive_path="/data/integral",
89
+ ... catalog_path="catalog.fits",
90
+ ... n_cores=4
91
+ ... )
92
+
93
+ >>> # Update catalog with new observations
94
+ >>> builder.update_catalog()
95
+
96
+ >>> # Find all science windows
97
+ >>> swids, paths = builder.find_scws()
98
+ >>> print(f"Found {len(swids)} science windows")
99
+
100
+ See Also
101
+ --------
102
+ ScwQuery : Query and filter catalog data
103
+ LightCurve : Light curve analysis
104
+ QualityMetrics : Quality metric computation
105
+ """
106
+
10
107
  def __init__(
11
108
  self,
12
- archive_path: str,
13
- catalog_path: str,
109
+ archive_path: Optional[str] = None,
110
+ catalog_path: Optional[str] = None,
14
111
  lightcurve_cache: Optional[str] = None,
15
112
  n_cores: Optional[int] = None,
16
113
  ):
114
+ if archive_path is None or catalog_path is None:
115
+ cfg = Config()
116
+ if archive_path is None:
117
+ archive_path = cfg.archive_path
118
+ if catalog_path is None:
119
+ catalog_path = cfg.catalog_path
120
+ if catalog_path is None:
121
+ raise FileNotFoundError("Catalog path must be specified either in arguments or config file.")
17
122
  self.archive_path = archive_path
18
123
  self.catalog_path = catalog_path
19
124
  self.lightcurve_cache = lightcurve_cache
@@ -21,25 +126,122 @@ class CatalogBuilder:
21
126
  self.catalog = self._load_catalog()
22
127
 
23
128
  def _load_catalog(self):
24
- if not os.path.exists(self.catalog_path):
25
- empty_structure = CatalogStructure.get_empty_structure()
26
- return empty_structure
129
+ """Load existing catalog or create new empty catalog.
130
+
131
+ Returns
132
+ -------
133
+ astropy.table.Table
134
+ Loaded catalog table or new empty table.
135
+
136
+ Raises
137
+ ------
138
+ FileNotFoundError
139
+ If catalog directory does not exist.
140
+ """
141
+ catalog_path = Path(self.catalog_path)
142
+ if catalog_path.is_file():
143
+ return Table.read(catalog_path)
144
+ elif catalog_path.parent.is_dir():
145
+ print("Catalog file not found, creating new catalog.")
146
+ return Table(names=new_catalog_names, dtype=new_catalog_dtypes)
27
147
  else:
28
- catalog = CatalogStructure.load_from_fits(self.catalog_path)
29
- return catalog
148
+ raise FileNotFoundError(f"Directory for catalog does not exist: {catalog_path.parent}")
149
+
150
+ def _add_catalog_data(self, table_data_rows: list[dict]):
151
+ """Add new rows to catalog and save to disk.
152
+
153
+ Parameters
154
+ ----------
155
+ table_data_rows : list of dict
156
+ List of dictionaries containing catalog row data.
157
+ Each dict must have keys matching catalog column names.
158
+
159
+ Notes
160
+ -----
161
+ Updates are written atomically using a temporary file to prevent corruption.
162
+ The catalog is sorted by TSTART after adding new data.
163
+ """
164
+ new_data = Table(rows=table_data_rows, names=new_catalog_names, dtype=new_catalog_dtypes)
165
+ self.catalog = vstack([self.catalog, new_data])
166
+ self.catalog.sort("TSTART")
167
+
168
+ temp_catalog_path = Path(self.catalog_path).with_suffix(".tmp")
169
+ self.catalog.write(temp_catalog_path, overwrite=True, format="fits")
170
+ os.replace(temp_catalog_path, self.catalog_path)
171
+
172
+ def _add_array_data(self, rev: str, array_data: np.ndarray):
173
+ """Add light curve array data to cache for a revolution.
174
+
175
+ Parameters
176
+ ----------
177
+ rev : str
178
+ Revolution number (4-digit string, e.g., '0011').
179
+ array_data : ndarray
180
+ Structured array containing SWID, TIME, COUNTS, MODULE_COUNTS, and GTIS.
181
+
182
+ Raises
183
+ ------
184
+ ValueError
185
+ If lightcurve_cache path is not set.
186
+
187
+ Notes
188
+ -----
189
+ Merges new data with existing revolution data if present.
190
+ Saves as NumPy .npy file named by revolution number.
191
+ """
192
+ if self.lightcurve_cache is None:
193
+ raise ValueError("Lightcurve cache path is not set.")
194
+ file_path = Path(self.lightcurve_cache) / f"{int(rev):0>4}.npy"
195
+
196
+ if file_path.exists():
197
+ old_data = np.load(file_path, allow_pickle=True)
198
+ mask = ~np.isin(old_data["SWID"], array_data["SWID"])
199
+ array_data = np.concatenate([old_data[mask], array_data])
200
+
201
+ np.save(file_path, array_data)
30
202
 
31
203
  def _process_scw(self, path) -> tuple[dict, list]:
204
+ """Process a single science window and compute quality metrics.
205
+
206
+ Parameters
207
+ ----------
208
+ path : str
209
+ Path to ISGRI events FITS file.
210
+
211
+ Returns
212
+ -------
213
+ table_data : dict
214
+ Catalog row data containing metadata and quality metrics.
215
+ array_data : dict
216
+ Light curve data (time, counts, modules, GTIs).
217
+
218
+ Notes
219
+ -----
220
+ Computes three quality metrics:
221
+ - CHI: Raw chi-squared
222
+ - CUT_CHI: Sigma-clipped chi-squared
223
+ - GTI_CHI: GTI-filtered chi-squared (NaN if insufficient GTI coverage)
224
+
225
+ Light curves are binned at 1 second resolution in 15-1000 keV band.
226
+ """
227
+ event_file = os.path.join(path, "isgri_events.fits.gz")
228
+ if not os.path.exists(event_file):
229
+ return None, None
230
+
32
231
  lc = LightCurve.load_data(path)
33
232
 
34
233
  time, full_counts = lc.rebin(1, emin=15, emax=1000, local_time=False)
35
234
  _, module_counts = lc.rebin_by_modules(1, emin=15, emax=1000, local_time=False)
36
235
  module_counts.insert(0, full_counts)
37
236
  module_counts = np.array(module_counts)
38
- quality = QualityMetrics.compute(lc)
237
+ quality = QualityMetrics(lc)
39
238
  quality.module_data = {"time": time, "counts": module_counts[1:]}
40
239
  raw_chisq = quality.raw_chi_squared()
41
240
  clipped_chisq = quality.sigma_clip_chi_squared()
42
- gti_chisq = quality.gti_chi_squared()
241
+ try:
242
+ gti_chisq = quality.gti_chi_squared()
243
+ except ValueError:
244
+ gti_chisq = np.nan
43
245
 
44
246
  # cnames = [
45
247
  # ("REVOL", int),
@@ -73,18 +275,158 @@ class CatalogBuilder:
73
275
  "CUT_CHI": clipped_chisq,
74
276
  "GTI_CHI": gti_chisq,
75
277
  }
76
- array_data = [lc.metadata["SWID"], time, module_counts, lc.gti]
278
+ array_data = {
279
+ "SWID": lc.metadata["SWID"],
280
+ "TIME": time,
281
+ "COUNTS": full_counts,
282
+ "MODULE_COUNTS": module_counts[1:],
283
+ "GTIS": lc.gtis,
284
+ }
77
285
  return table_data, array_data
78
286
 
79
287
  def _process_rev(self, rev_paths: list[str]) -> tuple[list[dict], list[list]]:
288
+ """Process all science windows in a revolution in parallel.
289
+
290
+ Parameters
291
+ ----------
292
+ rev_paths : list of str
293
+ Paths to event files for all ScWs in revolution.
294
+
295
+ Returns
296
+ -------
297
+ table_data_list : list of dict
298
+ Catalog rows for all processed ScWs.
299
+ array_data : ndarray
300
+ Structured array of light curve data for all ScWs.
301
+
302
+ Notes
303
+ -----
304
+ Uses joblib for parallel processing across n_cores workers.
305
+ """
80
306
  data = Parallel(n_jobs=self.n_cores, backend="multiprocessing")(
81
307
  delayed(self._process_scw)(path) for path in rev_paths
82
308
  )
83
- table_data_list, array_data_list = zip(*data)
84
- return table_data_list, array_data_list
309
+ table_data_list, array_data_dicts = zip(*[d for d in data if d[0] is not None])
310
+
311
+ dtype = [("SWID", "U16"), ("TIME", "O"), ("COUNTS", "O"), ("MODULE_COUNTS", "O"), ("GTIS", "O")]
312
+ array_data = np.empty(len(array_data_dicts), dtype=dtype)
313
+ for i, d in enumerate(array_data_dicts):
314
+ array_data[i] = (d["SWID"], d["TIME"], d["COUNTS"], d["MODULE_COUNTS"], d["GTIS"])
315
+ return table_data_list, array_data
85
316
 
86
- def _find_scws(self) -> tuple[np.ndarray[str], np.ndarray[str]]:
317
+ def find_scws(self) -> tuple[np.ndarray[str], np.ndarray[str]]:
318
+ """Find all science windows in the archive.
319
+
320
+ Returns
321
+ -------
322
+ swids : ndarray of str
323
+ Array of SWID identifiers (12 characters).
324
+ swid_paths : ndarray of str
325
+ Array of corresponding directory paths.
326
+
327
+ Notes
328
+ -----
329
+ Only includes ScWs matching pattern with '0.0' (Pointings, slews are omitted) in directory name.
330
+ Scans all revolution directories in archive_path.
331
+ """
87
332
  # Find all SCW files in the archive
88
- scws_files = subprocess.run(
89
- ["ls", f"{self.archive_path}/*", "|", "isgri_events.fits.gz"], capture_output=True, text=True
333
+ revolutions = os.scandir(self.archive_path)
334
+ swids, swid_paths = [], []
335
+ for rev in revolutions:
336
+ if not rev.is_dir():
337
+ continue
338
+ for scw in os.scandir(rev.path):
339
+ swid = scw.name
340
+ path = scw.path
341
+ if len(swid) == 16 and "0.0" in swid:
342
+ swids.append(swid.split(".")[0])
343
+ swid_paths.append(path)
344
+ return np.array(swids), np.array(swid_paths)
345
+
346
+ def find_event_files(
347
+ self, swids: np.ndarray[str], swid_paths: np.ndarray[str]
348
+ ) -> tuple[np.ndarray[str], np.ndarray[str]]:
349
+ """Filter science windows to those with event files.
350
+
351
+ Parameters
352
+ ----------
353
+ swids : ndarray of str
354
+ Array of SWID identifiers.
355
+ swid_paths : ndarray of str
356
+ Array of ScW directory paths.
357
+
358
+ Returns
359
+ -------
360
+ valid_swids : ndarray of str
361
+ SWIDs with existing event files.
362
+ valid_paths : ndarray of str
363
+ Paths to corresponding isgri_events.fits.gz files.
364
+
365
+ Notes
366
+ -----
367
+ Checks for existence of 'isgri_events.fits.gz' in each ScW directory.
368
+ """
369
+
370
+ def check_file(swid, path):
371
+ event_file = os.path.join(path, "isgri_events.fits.gz")
372
+ return (swid, event_file) if os.path.exists(event_file) else None
373
+
374
+ print("Checking for event files...")
375
+ results = Parallel(n_jobs=self.n_cores, backend="threading")(
376
+ delayed(check_file)(swid, path) for swid, path in zip(swids, swid_paths)
90
377
  )
378
+
379
+ valid_data = [r for r in results if r is not None]
380
+ if valid_data:
381
+ valid_swids, valid_paths = zip(*valid_data)
382
+ return np.array(valid_swids), np.array(valid_paths)
383
+ return np.array([]), np.array([])
384
+
385
+ def update_catalog(self):
386
+ """Update catalog with new science windows from archive.
387
+
388
+ Scans archive for new ScWs not present in catalog, processes them
389
+ in parallel by revolution, and adds results to catalog and cache.
390
+
391
+ Notes
392
+ -----
393
+ Processing workflow:
394
+ 1. Find all ScWs in archive
395
+ 2. Identify new ScWs not in catalog
396
+ 3. Filter to ScWs with event files
397
+ 4. Process by revolution in parallel
398
+ 5. Add to catalog and light curve cache
399
+
400
+ Only ScWs with isgri_events.fits.gz files are processed.
401
+ Progress is printed for each revolution.
402
+
403
+ Examples
404
+ --------
405
+ >>> builder = CatalogBuilder()
406
+ >>> builder.update_catalog()
407
+ """
408
+ print("Looking for ScWs in archive...")
409
+ scws_in_archive, scws_paths = self.find_scws()
410
+ print(f"Found {len(scws_in_archive)} ScWs in archive.")
411
+ scws_in_catalog = np.array(self.catalog["SWID"], dtype=str)
412
+ mask = np.isin(scws_in_archive, scws_in_catalog, invert=True)
413
+ to_process_scws = scws_in_archive[mask]
414
+ to_process_paths = scws_paths[mask]
415
+ print(f"{len(to_process_scws)} ScWs not in catalog.")
416
+ # to_process_scws, to_process_paths = self.find_event_files(new_scws, new_paths)
417
+ # print(f"{len(to_process_scws)} ScWs have event files and will be processed.")
418
+ # if len(to_process_scws) == 0:
419
+ # print("Exiting.")
420
+ # return
421
+
422
+ revolutions = defaultdict(list)
423
+ for swid, path in zip(to_process_scws, to_process_paths):
424
+ revolutions[swid[:4]].append(path)
425
+ revolutions = dict(sorted(revolutions.items()))
426
+ for revolution, rev_paths in revolutions.items():
427
+ print(f"Processing revolution {revolution} with {len(rev_paths)} ScWs...")
428
+ table_data_rows, array_data_list = self._process_rev(rev_paths)
429
+ print(f"Adding {len(table_data_rows)} ScWs from revolution {revolution} to catalog.")
430
+ self._add_catalog_data(table_data_rows)
431
+ if self.lightcurve_cache is not None:
432
+ self._add_array_data(revolution, array_data_list)
isgri/catalog/scwquery.py CHANGED
@@ -130,7 +130,7 @@ class ScwQuery:
130
130
  max_chi : float, optional
131
131
  Maximum chi-squared value to accept
132
132
  chi_type : str, default "CHI"
133
- Column name: "CHI", "CUT_CHI", or "GTI_CHI"
133
+ Column name: "CHI", "CUT", or "GTI"
134
134
 
135
135
  Returns
136
136
  -------
@@ -140,9 +140,12 @@ class ScwQuery:
140
140
  Examples
141
141
  --------
142
142
  >>> query.quality(max_chi=2.0) # High quality data
143
- >>> query.quality(max_chi=5.0, chi_type="CUT_CHI") # Alternative metric
143
+ >>> query.quality(max_chi=5.0, chi_type="CUT") # Alternative metric
144
144
 
145
145
  """
146
+ column_names = {"CHI": "CHI", "CUT": "CUT_CHI", "GTI": "GTI_CHI"}
147
+ chi_type = column_names.get(chi_type.upper(), chi_type)
148
+
146
149
  if chi_type not in self.catalog.colnames:
147
150
  raise ValueError(f"Column {chi_type} not found in catalog")
148
151
 
isgri/cli/__init__.py CHANGED
@@ -1 +1 @@
1
- from .main import main
1
+ from .main import main