deepextractor 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepextractor-0.1.0/.gitattributes +10 -0
- deepextractor-0.1.0/.github/workflows/publish.yml +45 -0
- deepextractor-0.1.0/.gitignore +62 -0
- deepextractor-0.1.0/.readthedocs.yaml +17 -0
- deepextractor-0.1.0/CHANGELOG.md +22 -0
- deepextractor-0.1.0/LICENSE +21 -0
- deepextractor-0.1.0/PKG-INFO +200 -0
- deepextractor-0.1.0/README.md +125 -0
- deepextractor-0.1.0/assets/cdvgan/fingerprint.pb +3 -0
- deepextractor-0.1.0/assets/cdvgan/keras_metadata.pb +3 -0
- deepextractor-0.1.0/assets/cdvgan/saved_model.pb +3 -0
- deepextractor-0.1.0/assets/cdvgan/variables/variables.data-00000-of-00001 +3 -0
- deepextractor-0.1.0/assets/cdvgan/variables/variables.index +3 -0
- deepextractor-0.1.0/assets/data_o3a_sample.csv +171 -0
- deepextractor-0.1.0/assets/glitch_dict_example.pkl +3 -0
- deepextractor-0.1.0/assets/scaler.pkl +3 -0
- deepextractor-0.1.0/assets/scaler_bilby.pkl +3 -0
- deepextractor-0.1.0/docs/_static/custom.css +120 -0
- deepextractor-0.1.0/docs/conf.py +85 -0
- deepextractor-0.1.0/docs/guides/installation.rst +45 -0
- deepextractor-0.1.0/docs/guides/overview.rst +74 -0
- deepextractor-0.1.0/docs/guides/quickstart.rst +103 -0
- deepextractor-0.1.0/docs/guides/training.rst +79 -0
- deepextractor-0.1.0/docs/index.rst +37 -0
- deepextractor-0.1.0/notebooks/deepextractor_example.ipynb +1121 -0
- deepextractor-0.1.0/notebooks/deepextractor_minimal.ipynb +220 -0
- deepextractor-0.1.0/notebooks/glitch_reconstruction_tutorial.ipynb +370 -0
- deepextractor-0.1.0/notebooks/training_tutorial.ipynb +1301 -0
- deepextractor-0.1.0/notebooks/visualizations_and_results.ipynb +1441 -0
- deepextractor-0.1.0/pretrained/DeepExtractor_257/checkpoint_best_bilby_noise_base.pth.tar +3 -0
- deepextractor-0.1.0/pretrained/DeepExtractor_257/checkpoint_best_real_noise_base.pth.tar +3 -0
- deepextractor-0.1.0/pyproject.toml +103 -0
- deepextractor-0.1.0/setup.cfg +4 -0
- deepextractor-0.1.0/src/deepextractor/__init__.py +28 -0
- deepextractor-0.1.0/src/deepextractor/_version.py +24 -0
- deepextractor-0.1.0/src/deepextractor/api.py +60 -0
- deepextractor-0.1.0/src/deepextractor/generation/__init__.py +1 -0
- deepextractor-0.1.0/src/deepextractor/generation/generate_spectrograms.py +158 -0
- deepextractor-0.1.0/src/deepextractor/generation/generate_timeseries.py +162 -0
- deepextractor-0.1.0/src/deepextractor/generation/glitch_functions.py +145 -0
- deepextractor-0.1.0/src/deepextractor/model.py +155 -0
- deepextractor-0.1.0/src/deepextractor/models/__init__.py +19 -0
- deepextractor-0.1.0/src/deepextractor/models/architectures.py +314 -0
- deepextractor-0.1.0/src/deepextractor/py.typed +0 -0
- deepextractor-0.1.0/src/deepextractor/training/__init__.py +1 -0
- deepextractor-0.1.0/src/deepextractor/training/train_fn.py +60 -0
- deepextractor-0.1.0/src/deepextractor/training/trainer.py +265 -0
- deepextractor-0.1.0/src/deepextractor/utils/__init__.py +45 -0
- deepextractor-0.1.0/src/deepextractor/utils/checkpoints.py +102 -0
- deepextractor-0.1.0/src/deepextractor/utils/io.py +165 -0
- deepextractor-0.1.0/src/deepextractor/utils/metrics.py +63 -0
- deepextractor-0.1.0/src/deepextractor/utils/signal.py +85 -0
- deepextractor-0.1.0/src/deepextractor/utils/stft.py +64 -0
- deepextractor-0.1.0/src/deepextractor/utils/visualization.py +121 -0
- deepextractor-0.1.0/src/deepextractor.egg-info/PKG-INFO +200 -0
- deepextractor-0.1.0/src/deepextractor.egg-info/SOURCES.txt +66 -0
- deepextractor-0.1.0/src/deepextractor.egg-info/dependency_links.txt +1 -0
- deepextractor-0.1.0/src/deepextractor.egg-info/entry_points.txt +5 -0
- deepextractor-0.1.0/src/deepextractor.egg-info/requires.txt +34 -0
- deepextractor-0.1.0/src/deepextractor.egg-info/top_level.txt +1 -0
- deepextractor-0.1.0/tests/__init__.py +0 -0
- deepextractor-0.1.0/tests/conftest.py +53 -0
- deepextractor-0.1.0/tests/test_datasets.py +42 -0
- deepextractor-0.1.0/tests/test_deepextractor_model.py +123 -0
- deepextractor-0.1.0/tests/test_glitch_functions.py +60 -0
- deepextractor-0.1.0/tests/test_metrics.py +69 -0
- deepextractor-0.1.0/tests/test_models.py +55 -0
- deepextractor-0.1.0/tests/test_signal_utils.py +57 -0
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
# Pretrained model weights are hosted on Hugging Face Hub (tomdooney/deepextractor).
|
|
2
|
+
# They are downloaded automatically on first use and cached locally in pretrained/.
|
|
3
|
+
# Do NOT track pretrained/ in git — add it to .gitignore instead.
|
|
4
|
+
|
|
5
|
+
# Track bundled data assets with git-lfs
|
|
6
|
+
assets/*.pkl filter=lfs diff=lfs merge=lfs -text
|
|
7
|
+
assets/cdvgan/** filter=lfs diff=lfs merge=lfs -text
|
|
8
|
+
|
|
9
|
+
# Strip notebook outputs automatically on commit
|
|
10
|
+
*.ipynb filter=nbstripout
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "v*"
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
build:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
steps:
|
|
12
|
+
- uses: actions/checkout@v4
|
|
13
|
+
with:
|
|
14
|
+
fetch-depth: 0 # needed for setuptools-scm to read git tags
|
|
15
|
+
|
|
16
|
+
- uses: actions/setup-python@v5
|
|
17
|
+
with:
|
|
18
|
+
python-version: "3.11"
|
|
19
|
+
|
|
20
|
+
- name: Build distributions
|
|
21
|
+
run: |
|
|
22
|
+
pip install build
|
|
23
|
+
python -m build
|
|
24
|
+
|
|
25
|
+
- name: Upload build artifacts
|
|
26
|
+
uses: actions/upload-artifact@v4
|
|
27
|
+
with:
|
|
28
|
+
name: dist
|
|
29
|
+
path: dist/
|
|
30
|
+
|
|
31
|
+
publish:
|
|
32
|
+
needs: build
|
|
33
|
+
runs-on: ubuntu-latest
|
|
34
|
+
environment: pypi
|
|
35
|
+
permissions:
|
|
36
|
+
id-token: write # required for Trusted Publisher (OIDC)
|
|
37
|
+
steps:
|
|
38
|
+
- name: Download build artifacts
|
|
39
|
+
uses: actions/download-artifact@v4
|
|
40
|
+
with:
|
|
41
|
+
name: dist
|
|
42
|
+
path: dist/
|
|
43
|
+
|
|
44
|
+
- name: Publish to PyPI
|
|
45
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.pyo
|
|
5
|
+
*.pyd
|
|
6
|
+
.Python
|
|
7
|
+
*.egg-info/
|
|
8
|
+
dist/
|
|
9
|
+
build/
|
|
10
|
+
.eggs/
|
|
11
|
+
*.egg
|
|
12
|
+
*.whl
|
|
13
|
+
|
|
14
|
+
# Virtual environments
|
|
15
|
+
.venv/
|
|
16
|
+
venv/
|
|
17
|
+
env/
|
|
18
|
+
.env
|
|
19
|
+
|
|
20
|
+
# Setuptools-scm generated version file
|
|
21
|
+
src/deepextractor/_version.py
|
|
22
|
+
|
|
23
|
+
# Testing
|
|
24
|
+
.coverage
|
|
25
|
+
.pytest_cache/
|
|
26
|
+
htmlcov/
|
|
27
|
+
coverage.xml
|
|
28
|
+
|
|
29
|
+
# Type checking
|
|
30
|
+
.mypy_cache/
|
|
31
|
+
|
|
32
|
+
# Linting
|
|
33
|
+
.ruff_cache/
|
|
34
|
+
|
|
35
|
+
# Jupyter
|
|
36
|
+
notebooks/.ipynb_checkpoints/
|
|
37
|
+
.ipynb_checkpoints/
|
|
38
|
+
|
|
39
|
+
# Large generated data files — regenerate with the CLI scripts
|
|
40
|
+
data/
|
|
41
|
+
*.npy
|
|
42
|
+
losses/
|
|
43
|
+
evaluation/
|
|
44
|
+
saved_predictions/
|
|
45
|
+
|
|
46
|
+
# pretrained/ is a local cache for HuggingFace weights — do not commit
|
|
47
|
+
pretrained/
|
|
48
|
+
|
|
49
|
+
# Track bundled assets (scalers, cdvgan weights) via git-lfs (see .gitattributes)
|
|
50
|
+
!assets/**
|
|
51
|
+
|
|
52
|
+
# IDE
|
|
53
|
+
.vscode/
|
|
54
|
+
.idea/
|
|
55
|
+
*.swp
|
|
56
|
+
*.swo
|
|
57
|
+
.DS_Store
|
|
58
|
+
|
|
59
|
+
# Docs build
|
|
60
|
+
docs/_build/
|
|
61
|
+
docs/api/_autosummary/
|
|
62
|
+
docs/notebooks/
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [0.1.0] - 2026-04-01
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- Initial pip-installable package structure
|
|
12
|
+
- `deepextractor.models` — U-Net 1D/2D, DnCNN 1D, Autoencoder 1D/2D architectures
|
|
13
|
+
- `deepextractor.data` — PyTorch Dataset classes for time-series and spectrogram data
|
|
14
|
+
- `deepextractor.training` — training loop and trainer CLI entry point
|
|
15
|
+
- `deepextractor.generation` — synthetic glitch signal generators and data generation scripts
|
|
16
|
+
- `deepextractor.evaluation` — simulated evaluation metrics
|
|
17
|
+
- `deepextractor.utils` — checkpoints, signal processing, metrics, I/O, visualization
|
|
18
|
+
- Sphinx documentation with Furo dark theme
|
|
19
|
+
- CLI entry points: `deepextractor-train`, `deepextractor-generate`, `deepextractor-specgen`, `deepextractor-evaluate`
|
|
20
|
+
- Pretrained checkpoint weights hosted on Hugging Face Hub, downloaded automatically on first use
|
|
21
|
+
- Bundled GravitySpy O3a sample dataset (`assets/data_o3a_sample.csv`) — 170 high-confidence glitch examples across 17 classes
|
|
22
|
+
- Jupyter notebook tutorials: minimal example, glitch reconstruction, training from scratch
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Tom Dooney
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: deepextractor
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Deep learning framework for reconstructing gravitational wave signals and glitches from LIGO detector data
|
|
5
|
+
Author: Tom Dooney
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2026 Tom Dooney
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
Project-URL: Repository, https://github.com/tomdooney95/deepextractor
|
|
29
|
+
Project-URL: Documentation, https://deepextractor.readthedocs.io/
|
|
30
|
+
Project-URL: Paper, https://link.aps.org/doi/10.1103/s91m-c2jw
|
|
31
|
+
Keywords: gravitational-waves,LIGO,deep-learning,glitch,signal-processing
|
|
32
|
+
Classifier: Development Status :: 4 - Beta
|
|
33
|
+
Classifier: Intended Audience :: Science/Research
|
|
34
|
+
Classifier: Topic :: Scientific/Engineering :: Physics
|
|
35
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
36
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
37
|
+
Classifier: Programming Language :: Python :: 3
|
|
38
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
39
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
40
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
41
|
+
Requires-Python: >=3.10
|
|
42
|
+
Description-Content-Type: text/markdown
|
|
43
|
+
License-File: LICENSE
|
|
44
|
+
Requires-Dist: numpy<2.0,>=1.26
|
|
45
|
+
Requires-Dist: torch>=2.1
|
|
46
|
+
Requires-Dist: matplotlib>=3.9
|
|
47
|
+
Requires-Dist: scienceplots>=2.1
|
|
48
|
+
Requires-Dist: pandas>=2.2
|
|
49
|
+
Requires-Dist: tqdm>=4.67
|
|
50
|
+
Requires-Dist: scikit-learn>=1.5
|
|
51
|
+
Requires-Dist: gwpy>=3.0
|
|
52
|
+
Requires-Dist: pycbc>=2.7
|
|
53
|
+
Requires-Dist: scipy>=1.11
|
|
54
|
+
Requires-Dist: huggingface_hub>=0.20
|
|
55
|
+
Requires-Dist: bilby>=2.0
|
|
56
|
+
Provides-Extra: generative
|
|
57
|
+
Requires-Dist: tensorflow>=2.14; extra == "generative"
|
|
58
|
+
Requires-Dist: gengli>=0.2; extra == "generative"
|
|
59
|
+
Provides-Extra: dev
|
|
60
|
+
Requires-Dist: pytest>=8; extra == "dev"
|
|
61
|
+
Requires-Dist: pytest-cov>=5; extra == "dev"
|
|
62
|
+
Requires-Dist: ruff>=0.4; extra == "dev"
|
|
63
|
+
Requires-Dist: mypy>=1.10; extra == "dev"
|
|
64
|
+
Requires-Dist: nbstripout>=0.7; extra == "dev"
|
|
65
|
+
Provides-Extra: docs
|
|
66
|
+
Requires-Dist: sphinx>=7; extra == "docs"
|
|
67
|
+
Requires-Dist: sphinx-autoapi>=3; extra == "docs"
|
|
68
|
+
Requires-Dist: furo>=2024.1.29; extra == "docs"
|
|
69
|
+
Requires-Dist: myst-parser>=3; extra == "docs"
|
|
70
|
+
Requires-Dist: nbsphinx>=0.9; extra == "docs"
|
|
71
|
+
Requires-Dist: ipykernel; extra == "docs"
|
|
72
|
+
Provides-Extra: all
|
|
73
|
+
Requires-Dist: deepextractor[dev,docs,generative]; extra == "all"
|
|
74
|
+
Dynamic: license-file
|
|
75
|
+
|
|
76
|
+
# DeepExtractor
|
|
77
|
+
|
|
78
|
+
Deep learning framework for reconstructing gravitational-wave signals and glitches from LIGO detector data.
|
|
79
|
+
|
|
80
|
+
Built for LIGO's O3 observing run (Hanford and Livingston detectors). Described in the paper:
|
|
81
|
+
|
|
82
|
+
> **Time-domain reconstruction of signals and glitches in gravitational wave data with deep learning**
|
|
83
|
+
> Dooney, Narola, Bromuri, Curier, Van Den Broeck, Caudill, Tan — *Phys. Rev. D* 112, 044022 (2025)
|
|
84
|
+
> [10.1103/s91m-c2jw](https://link.aps.org/doi/10.1103/s91m-c2jw)
|
|
85
|
+
|
|
86
|
+
**[Documentation](https://deepextractor.readthedocs.io/)** | **[GitHub](https://github.com/tomdooney95/deepextractor)** | **[PyPI](https://pypi.org/project/deepextractor/)**
|
|
87
|
+
|
|
88
|
+
---
|
|
89
|
+
|
|
90
|
+
## How it works
|
|
91
|
+
|
|
92
|
+
LIGO strain data contains both astrophysical signals and instrumental glitches — short-duration noise transients that can mimic or obscure gravitational-wave events. DeepExtractor frames glitch reconstruction as a supervised denoising problem:
|
|
93
|
+
|
|
94
|
+
```
|
|
95
|
+
Input: h(t) = background noise + glitch/signal
|
|
96
|
+
Output: n̂(t) = predicted background
|
|
97
|
+
Result: ĝ(t) = h(t) − n̂(t) ← reconstructed glitch or signal
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
The model is a **U-Net** operating on STFT spectrograms (magnitude + phase). The default model, `DeepExtractor_257`, uses a 4-level U-Net with feature maps `[64, 128, 256, 512]`.
|
|
101
|
+
|
|
102
|
+
---
|
|
103
|
+
|
|
104
|
+
## Installation
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
pip install deepextractor
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
Requires Python ≥ 3.10 and PyTorch ≥ 2.1. Pretrained weights are downloaded automatically from Hugging Face Hub on first use — no manual step required.
|
|
111
|
+
|
|
112
|
+
**Install from source:**
|
|
113
|
+
|
|
114
|
+
```bash
|
|
115
|
+
git clone https://github.com/tomdooney95/deepextractor.git
|
|
116
|
+
cd deepextractor
|
|
117
|
+
pip install -e ".[dev]"
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
---
|
|
121
|
+
|
|
122
|
+
## Quickstart
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
import numpy as np
|
|
126
|
+
import deepextractor
|
|
127
|
+
|
|
128
|
+
# Load model (bilby noise variant by default)
|
|
129
|
+
model = deepextractor.DeepExtractorModel()
|
|
130
|
+
|
|
131
|
+
# Reconstruct — extract the transient from noisy strain
|
|
132
|
+
noisy_strain = np.random.randn(8192) # replace with real data
|
|
133
|
+
reconstructed = model.reconstruct(noisy_strain) # extracted signal
|
|
134
|
+
background = model.background(noisy_strain) # noise estimate
|
|
135
|
+
|
|
136
|
+
# One-liner convenience function
|
|
137
|
+
reconstructed = deepextractor.reconstruct(noisy_strain)
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
Two pretrained variants are available:
|
|
141
|
+
|
|
142
|
+
| Variant | Use case |
|
|
143
|
+
|---|---|
|
|
144
|
+
| `bilby_noise` (default) | Simulated LIGO/Virgo noise, injection studies |
|
|
145
|
+
| `real_noise` | Real LIGO O3 detector data |
|
|
146
|
+
|
|
147
|
+
---
|
|
148
|
+
|
|
149
|
+
## Bundled dataset
|
|
150
|
+
|
|
151
|
+
The package ships a sample of the [GravitySpy LIGO O3a high-confidence catalogue](https://doi.org/10.5281/zenodo.1476551) at `assets/data_o3a_sample.csv` — 10 H1 examples per glitch class (17 classes, 170 rows total), SNR > 15.
|
|
152
|
+
|
|
153
|
+
```python
|
|
154
|
+
import pandas as pd
|
|
155
|
+
import importlib.resources as resources
|
|
156
|
+
|
|
157
|
+
with resources.path("deepextractor", "assets") as assets:
|
|
158
|
+
df = pd.read_csv(assets / "data_o3a_sample.csv")
|
|
159
|
+
|
|
160
|
+
print(df["label"].value_counts())
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
---
|
|
164
|
+
|
|
165
|
+
## CLI tools
|
|
166
|
+
|
|
167
|
+
```bash
|
|
168
|
+
# Train a model
|
|
169
|
+
deepextractor-train --model DeepExtractor_257 --data-dir data/spectrogram_domain/
|
|
170
|
+
|
|
171
|
+
# Generate training data
|
|
172
|
+
deepextractor-generate --output-dir data/ --num-train 250000
|
|
173
|
+
|
|
174
|
+
# Convert time-domain data to spectrograms
|
|
175
|
+
deepextractor-specgen --input-dir data/time_domain/ --output-dir data/spectrogram_domain/
|
|
176
|
+
|
|
177
|
+
# Evaluate a trained model
|
|
178
|
+
deepextractor-evaluate --model DeepExtractor_257 --checkpoint-dir checkpoints/ --data-dir data/
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
---
|
|
182
|
+
|
|
183
|
+
## Citation
|
|
184
|
+
|
|
185
|
+
```bibtex
|
|
186
|
+
@article{s91m-c2jw,
|
|
187
|
+
title = {Time-domain reconstruction of signals and glitches in gravitational wave data with deep learning},
|
|
188
|
+
author = {Dooney, Tom and Narola, Harsh and Bromuri, Stefano and Curier, R. Lyana and Van Den Broeck, Chris and Caudill, Sarah and Tan, Daniel Stanley},
|
|
189
|
+
journal = {Phys. Rev. D},
|
|
190
|
+
volume = {112},
|
|
191
|
+
issue = {4},
|
|
192
|
+
pages = {044022},
|
|
193
|
+
numpages = {24},
|
|
194
|
+
year = {2025},
|
|
195
|
+
month = {Aug},
|
|
196
|
+
publisher = {American Physical Society},
|
|
197
|
+
doi = {10.1103/s91m-c2jw},
|
|
198
|
+
url = {https://link.aps.org/doi/10.1103/s91m-c2jw}
|
|
199
|
+
}
|
|
200
|
+
```
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
# DeepExtractor
|
|
2
|
+
|
|
3
|
+
Deep learning framework for reconstructing gravitational-wave signals and glitches from LIGO detector data.
|
|
4
|
+
|
|
5
|
+
Built for LIGO's O3 observing run (Hanford and Livingston detectors). Described in the paper:
|
|
6
|
+
|
|
7
|
+
> **Time-domain reconstruction of signals and glitches in gravitational wave data with deep learning**
|
|
8
|
+
> Dooney, Narola, Bromuri, Curier, Van Den Broeck, Caudill, Tan — *Phys. Rev. D* 112, 044022 (2025)
|
|
9
|
+
> [10.1103/s91m-c2jw](https://link.aps.org/doi/10.1103/s91m-c2jw)
|
|
10
|
+
|
|
11
|
+
**[Documentation](https://deepextractor.readthedocs.io/)** | **[GitHub](https://github.com/tomdooney95/deepextractor)** | **[PyPI](https://pypi.org/project/deepextractor/)**
|
|
12
|
+
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
## How it works
|
|
16
|
+
|
|
17
|
+
LIGO strain data contains both astrophysical signals and instrumental glitches — short-duration noise transients that can mimic or obscure gravitational-wave events. DeepExtractor frames glitch reconstruction as a supervised denoising problem:
|
|
18
|
+
|
|
19
|
+
```
|
|
20
|
+
Input: h(t) = background noise + glitch/signal
|
|
21
|
+
Output: n̂(t) = predicted background
|
|
22
|
+
Result: ĝ(t) = h(t) − n̂(t) ← reconstructed glitch or signal
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
The model is a **U-Net** operating on STFT spectrograms (magnitude + phase). The default model, `DeepExtractor_257`, uses a 4-level U-Net with feature maps `[64, 128, 256, 512]`.
|
|
26
|
+
|
|
27
|
+
---
|
|
28
|
+
|
|
29
|
+
## Installation
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
pip install deepextractor
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
Requires Python ≥ 3.10 and PyTorch ≥ 2.1. Pretrained weights are downloaded automatically from Hugging Face Hub on first use — no manual step required.
|
|
36
|
+
|
|
37
|
+
**Install from source:**
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
git clone https://github.com/tomdooney95/deepextractor.git
|
|
41
|
+
cd deepextractor
|
|
42
|
+
pip install -e ".[dev]"
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
---
|
|
46
|
+
|
|
47
|
+
## Quickstart
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
import numpy as np
|
|
51
|
+
import deepextractor
|
|
52
|
+
|
|
53
|
+
# Load model (bilby noise variant by default)
|
|
54
|
+
model = deepextractor.DeepExtractorModel()
|
|
55
|
+
|
|
56
|
+
# Reconstruct — extract the transient from noisy strain
|
|
57
|
+
noisy_strain = np.random.randn(8192) # replace with real data
|
|
58
|
+
reconstructed = model.reconstruct(noisy_strain) # extracted signal
|
|
59
|
+
background = model.background(noisy_strain) # noise estimate
|
|
60
|
+
|
|
61
|
+
# One-liner convenience function
|
|
62
|
+
reconstructed = deepextractor.reconstruct(noisy_strain)
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
Two pretrained variants are available:
|
|
66
|
+
|
|
67
|
+
| Variant | Use case |
|
|
68
|
+
|---|---|
|
|
69
|
+
| `bilby_noise` (default) | Simulated LIGO/Virgo noise, injection studies |
|
|
70
|
+
| `real_noise` | Real LIGO O3 detector data |
|
|
71
|
+
|
|
72
|
+
---
|
|
73
|
+
|
|
74
|
+
## Bundled dataset
|
|
75
|
+
|
|
76
|
+
The package ships a sample of the [GravitySpy LIGO O3a high-confidence catalogue](https://doi.org/10.5281/zenodo.1476551) at `assets/data_o3a_sample.csv` — 10 H1 examples per glitch class (17 classes, 170 rows total), SNR > 15.
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
import pandas as pd
|
|
80
|
+
import importlib.resources as resources
|
|
81
|
+
|
|
82
|
+
with resources.path("deepextractor", "assets") as assets:
|
|
83
|
+
df = pd.read_csv(assets / "data_o3a_sample.csv")
|
|
84
|
+
|
|
85
|
+
print(df["label"].value_counts())
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
---
|
|
89
|
+
|
|
90
|
+
## CLI tools
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
# Train a model
|
|
94
|
+
deepextractor-train --model DeepExtractor_257 --data-dir data/spectrogram_domain/
|
|
95
|
+
|
|
96
|
+
# Generate training data
|
|
97
|
+
deepextractor-generate --output-dir data/ --num-train 250000
|
|
98
|
+
|
|
99
|
+
# Convert time-domain data to spectrograms
|
|
100
|
+
deepextractor-specgen --input-dir data/time_domain/ --output-dir data/spectrogram_domain/
|
|
101
|
+
|
|
102
|
+
# Evaluate a trained model
|
|
103
|
+
deepextractor-evaluate --model DeepExtractor_257 --checkpoint-dir checkpoints/ --data-dir data/
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
---
|
|
107
|
+
|
|
108
|
+
## Citation
|
|
109
|
+
|
|
110
|
+
```bibtex
|
|
111
|
+
@article{s91m-c2jw,
|
|
112
|
+
title = {Time-domain reconstruction of signals and glitches in gravitational wave data with deep learning},
|
|
113
|
+
author = {Dooney, Tom and Narola, Harsh and Bromuri, Stefano and Curier, R. Lyana and Van Den Broeck, Chris and Caudill, Sarah and Tan, Daniel Stanley},
|
|
114
|
+
journal = {Phys. Rev. D},
|
|
115
|
+
volume = {112},
|
|
116
|
+
issue = {4},
|
|
117
|
+
pages = {044022},
|
|
118
|
+
numpages = {24},
|
|
119
|
+
year = {2025},
|
|
120
|
+
month = {Aug},
|
|
121
|
+
publisher = {American Physical Society},
|
|
122
|
+
doi = {10.1103/s91m-c2jw},
|
|
123
|
+
url = {https://link.aps.org/doi/10.1103/s91m-c2jw}
|
|
124
|
+
}
|
|
125
|
+
```
|