deepextractor 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. deepextractor-0.1.0/.gitattributes +10 -0
  2. deepextractor-0.1.0/.github/workflows/publish.yml +45 -0
  3. deepextractor-0.1.0/.gitignore +62 -0
  4. deepextractor-0.1.0/.readthedocs.yaml +17 -0
  5. deepextractor-0.1.0/CHANGELOG.md +22 -0
  6. deepextractor-0.1.0/LICENSE +21 -0
  7. deepextractor-0.1.0/PKG-INFO +200 -0
  8. deepextractor-0.1.0/README.md +125 -0
  9. deepextractor-0.1.0/assets/cdvgan/fingerprint.pb +3 -0
  10. deepextractor-0.1.0/assets/cdvgan/keras_metadata.pb +3 -0
  11. deepextractor-0.1.0/assets/cdvgan/saved_model.pb +3 -0
  12. deepextractor-0.1.0/assets/cdvgan/variables/variables.data-00000-of-00001 +3 -0
  13. deepextractor-0.1.0/assets/cdvgan/variables/variables.index +3 -0
  14. deepextractor-0.1.0/assets/data_o3a_sample.csv +171 -0
  15. deepextractor-0.1.0/assets/glitch_dict_example.pkl +3 -0
  16. deepextractor-0.1.0/assets/scaler.pkl +3 -0
  17. deepextractor-0.1.0/assets/scaler_bilby.pkl +3 -0
  18. deepextractor-0.1.0/docs/_static/custom.css +120 -0
  19. deepextractor-0.1.0/docs/conf.py +85 -0
  20. deepextractor-0.1.0/docs/guides/installation.rst +45 -0
  21. deepextractor-0.1.0/docs/guides/overview.rst +74 -0
  22. deepextractor-0.1.0/docs/guides/quickstart.rst +103 -0
  23. deepextractor-0.1.0/docs/guides/training.rst +79 -0
  24. deepextractor-0.1.0/docs/index.rst +37 -0
  25. deepextractor-0.1.0/notebooks/deepextractor_example.ipynb +1121 -0
  26. deepextractor-0.1.0/notebooks/deepextractor_minimal.ipynb +220 -0
  27. deepextractor-0.1.0/notebooks/glitch_reconstruction_tutorial.ipynb +370 -0
  28. deepextractor-0.1.0/notebooks/training_tutorial.ipynb +1301 -0
  29. deepextractor-0.1.0/notebooks/visualizations_and_results.ipynb +1441 -0
  30. deepextractor-0.1.0/pretrained/DeepExtractor_257/checkpoint_best_bilby_noise_base.pth.tar +3 -0
  31. deepextractor-0.1.0/pretrained/DeepExtractor_257/checkpoint_best_real_noise_base.pth.tar +3 -0
  32. deepextractor-0.1.0/pyproject.toml +103 -0
  33. deepextractor-0.1.0/setup.cfg +4 -0
  34. deepextractor-0.1.0/src/deepextractor/__init__.py +28 -0
  35. deepextractor-0.1.0/src/deepextractor/_version.py +24 -0
  36. deepextractor-0.1.0/src/deepextractor/api.py +60 -0
  37. deepextractor-0.1.0/src/deepextractor/generation/__init__.py +1 -0
  38. deepextractor-0.1.0/src/deepextractor/generation/generate_spectrograms.py +158 -0
  39. deepextractor-0.1.0/src/deepextractor/generation/generate_timeseries.py +162 -0
  40. deepextractor-0.1.0/src/deepextractor/generation/glitch_functions.py +145 -0
  41. deepextractor-0.1.0/src/deepextractor/model.py +155 -0
  42. deepextractor-0.1.0/src/deepextractor/models/__init__.py +19 -0
  43. deepextractor-0.1.0/src/deepextractor/models/architectures.py +314 -0
  44. deepextractor-0.1.0/src/deepextractor/py.typed +0 -0
  45. deepextractor-0.1.0/src/deepextractor/training/__init__.py +1 -0
  46. deepextractor-0.1.0/src/deepextractor/training/train_fn.py +60 -0
  47. deepextractor-0.1.0/src/deepextractor/training/trainer.py +265 -0
  48. deepextractor-0.1.0/src/deepextractor/utils/__init__.py +45 -0
  49. deepextractor-0.1.0/src/deepextractor/utils/checkpoints.py +102 -0
  50. deepextractor-0.1.0/src/deepextractor/utils/io.py +165 -0
  51. deepextractor-0.1.0/src/deepextractor/utils/metrics.py +63 -0
  52. deepextractor-0.1.0/src/deepextractor/utils/signal.py +85 -0
  53. deepextractor-0.1.0/src/deepextractor/utils/stft.py +64 -0
  54. deepextractor-0.1.0/src/deepextractor/utils/visualization.py +121 -0
  55. deepextractor-0.1.0/src/deepextractor.egg-info/PKG-INFO +200 -0
  56. deepextractor-0.1.0/src/deepextractor.egg-info/SOURCES.txt +66 -0
  57. deepextractor-0.1.0/src/deepextractor.egg-info/dependency_links.txt +1 -0
  58. deepextractor-0.1.0/src/deepextractor.egg-info/entry_points.txt +5 -0
  59. deepextractor-0.1.0/src/deepextractor.egg-info/requires.txt +34 -0
  60. deepextractor-0.1.0/src/deepextractor.egg-info/top_level.txt +1 -0
  61. deepextractor-0.1.0/tests/__init__.py +0 -0
  62. deepextractor-0.1.0/tests/conftest.py +53 -0
  63. deepextractor-0.1.0/tests/test_datasets.py +42 -0
  64. deepextractor-0.1.0/tests/test_deepextractor_model.py +123 -0
  65. deepextractor-0.1.0/tests/test_glitch_functions.py +60 -0
  66. deepextractor-0.1.0/tests/test_metrics.py +69 -0
  67. deepextractor-0.1.0/tests/test_models.py +55 -0
  68. deepextractor-0.1.0/tests/test_signal_utils.py +57 -0
@@ -0,0 +1,10 @@
1
+ # Pretrained model weights are hosted on Hugging Face Hub (tomdooney/deepextractor).
2
+ # They are downloaded automatically on first use and cached locally in pretrained/.
3
+ # Do NOT track pretrained/ in git — add it to .gitignore instead.
4
+
5
+ # Track bundled data assets with git-lfs
6
+ assets/*.pkl filter=lfs diff=lfs merge=lfs -text
7
+ assets/cdvgan/** filter=lfs diff=lfs merge=lfs -text
8
+
9
+ # Strip notebook outputs automatically on commit
10
+ *.ipynb filter=nbstripout
@@ -0,0 +1,45 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+
8
+ jobs:
9
+ build:
10
+ runs-on: ubuntu-latest
11
+ steps:
12
+ - uses: actions/checkout@v4
13
+ with:
14
+ fetch-depth: 0 # needed for setuptools-scm to read git tags
15
+
16
+ - uses: actions/setup-python@v5
17
+ with:
18
+ python-version: "3.11"
19
+
20
+ - name: Build distributions
21
+ run: |
22
+ pip install build
23
+ python -m build
24
+
25
+ - name: Upload build artifacts
26
+ uses: actions/upload-artifact@v4
27
+ with:
28
+ name: dist
29
+ path: dist/
30
+
31
+ publish:
32
+ needs: build
33
+ runs-on: ubuntu-latest
34
+ environment: pypi
35
+ permissions:
36
+ id-token: write # required for Trusted Publisher (OIDC)
37
+ steps:
38
+ - name: Download build artifacts
39
+ uses: actions/download-artifact@v4
40
+ with:
41
+ name: dist
42
+ path: dist/
43
+
44
+ - name: Publish to PyPI
45
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,62 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.pyo
5
+ *.pyd
6
+ .Python
7
+ *.egg-info/
8
+ dist/
9
+ build/
10
+ .eggs/
11
+ *.egg
12
+ *.whl
13
+
14
+ # Virtual environments
15
+ .venv/
16
+ venv/
17
+ env/
18
+ .env
19
+
20
+ # Setuptools-scm generated version file
21
+ src/deepextractor/_version.py
22
+
23
+ # Testing
24
+ .coverage
25
+ .pytest_cache/
26
+ htmlcov/
27
+ coverage.xml
28
+
29
+ # Type checking
30
+ .mypy_cache/
31
+
32
+ # Linting
33
+ .ruff_cache/
34
+
35
+ # Jupyter
36
+ notebooks/.ipynb_checkpoints/
37
+ .ipynb_checkpoints/
38
+
39
+ # Large generated data files — regenerate with the CLI scripts
40
+ data/
41
+ *.npy
42
+ losses/
43
+ evaluation/
44
+ saved_predictions/
45
+
46
+ # pretrained/ is a local cache for HuggingFace weights — do not commit
47
+ pretrained/
48
+
49
+ # Track bundled assets (scalers, cdvgan weights) via git-lfs (see .gitattributes)
50
+ !assets/**
51
+
52
+ # IDE
53
+ .vscode/
54
+ .idea/
55
+ *.swp
56
+ *.swo
57
+ .DS_Store
58
+
59
+ # Docs build
60
+ docs/_build/
61
+ docs/api/_autosummary/
62
+ docs/notebooks/
@@ -0,0 +1,17 @@
1
+ version: 2
2
+
3
+ build:
4
+ os: ubuntu-22.04
5
+ tools:
6
+ python: "3.12"
7
+ apt_packages:
8
+ - pandoc
9
+
10
+ python:
11
+ install:
12
+ - method: pip
13
+ path: .
14
+ extra_requirements: [docs]
15
+
16
+ sphinx:
17
+ configuration: docs/conf.py
@@ -0,0 +1,22 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [0.1.0] - 2026-04-01
9
+
10
+ ### Added
11
+ - Initial pip-installable package structure
12
+ - `deepextractor.models` — U-Net 1D/2D, DnCNN 1D, Autoencoder 1D/2D architectures
13
+ - `deepextractor.data` — PyTorch Dataset classes for time-series and spectrogram data
14
+ - `deepextractor.training` — training loop and trainer CLI entry point
15
+ - `deepextractor.generation` — synthetic glitch signal generators and data generation scripts
16
+ - `deepextractor.evaluation` — simulated evaluation metrics
17
+ - `deepextractor.utils` — checkpoints, signal processing, metrics, I/O, visualization
18
+ - Sphinx documentation with Furo dark theme
19
+ - CLI entry points: `deepextractor-train`, `deepextractor-generate`, `deepextractor-specgen`, `deepextractor-evaluate`
20
+ - Pretrained checkpoint weights hosted on Hugging Face Hub, downloaded automatically on first use
21
+ - Bundled GravitySpy O3a sample dataset (`assets/data_o3a_sample.csv`) — 170 high-confidence glitch examples across 17 classes
22
+ - Jupyter notebook tutorials: minimal example, glitch reconstruction, training from scratch
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Tom Dooney
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,200 @@
1
+ Metadata-Version: 2.4
2
+ Name: deepextractor
3
+ Version: 0.1.0
4
+ Summary: Deep learning framework for reconstructing gravitational wave signals and glitches from LIGO detector data
5
+ Author: Tom Dooney
6
+ License: MIT License
7
+
8
+ Copyright (c) 2026 Tom Dooney
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Repository, https://github.com/tomdooney95/deepextractor
29
+ Project-URL: Documentation, https://deepextractor.readthedocs.io/
30
+ Project-URL: Paper, https://link.aps.org/doi/10.1103/s91m-c2jw
31
+ Keywords: gravitational-waves,LIGO,deep-learning,glitch,signal-processing
32
+ Classifier: Development Status :: 4 - Beta
33
+ Classifier: Intended Audience :: Science/Research
34
+ Classifier: Topic :: Scientific/Engineering :: Physics
35
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
36
+ Classifier: License :: OSI Approved :: Apache Software License
37
+ Classifier: Programming Language :: Python :: 3
38
+ Classifier: Programming Language :: Python :: 3.10
39
+ Classifier: Programming Language :: Python :: 3.11
40
+ Classifier: Programming Language :: Python :: 3.12
41
+ Requires-Python: >=3.10
42
+ Description-Content-Type: text/markdown
43
+ License-File: LICENSE
44
+ Requires-Dist: numpy<2.0,>=1.26
45
+ Requires-Dist: torch>=2.1
46
+ Requires-Dist: matplotlib>=3.9
47
+ Requires-Dist: scienceplots>=2.1
48
+ Requires-Dist: pandas>=2.2
49
+ Requires-Dist: tqdm>=4.67
50
+ Requires-Dist: scikit-learn>=1.5
51
+ Requires-Dist: gwpy>=3.0
52
+ Requires-Dist: pycbc>=2.7
53
+ Requires-Dist: scipy>=1.11
54
+ Requires-Dist: huggingface_hub>=0.20
55
+ Requires-Dist: bilby>=2.0
56
+ Provides-Extra: generative
57
+ Requires-Dist: tensorflow>=2.14; extra == "generative"
58
+ Requires-Dist: gengli>=0.2; extra == "generative"
59
+ Provides-Extra: dev
60
+ Requires-Dist: pytest>=8; extra == "dev"
61
+ Requires-Dist: pytest-cov>=5; extra == "dev"
62
+ Requires-Dist: ruff>=0.4; extra == "dev"
63
+ Requires-Dist: mypy>=1.10; extra == "dev"
64
+ Requires-Dist: nbstripout>=0.7; extra == "dev"
65
+ Provides-Extra: docs
66
+ Requires-Dist: sphinx>=7; extra == "docs"
67
+ Requires-Dist: sphinx-autoapi>=3; extra == "docs"
68
+ Requires-Dist: furo>=2024.1.29; extra == "docs"
69
+ Requires-Dist: myst-parser>=3; extra == "docs"
70
+ Requires-Dist: nbsphinx>=0.9; extra == "docs"
71
+ Requires-Dist: ipykernel; extra == "docs"
72
+ Provides-Extra: all
73
+ Requires-Dist: deepextractor[dev,docs,generative]; extra == "all"
74
+ Dynamic: license-file
75
+
76
+ # DeepExtractor
77
+
78
+ Deep learning framework for reconstructing gravitational-wave signals and glitches from LIGO detector data.
79
+
80
+ Built for LIGO's O3 observing run (Hanford and Livingston detectors). Described in the paper:
81
+
82
+ > **Time-domain reconstruction of signals and glitches in gravitational wave data with deep learning**
83
+ > Dooney, Narola, Bromuri, Curier, Van Den Broeck, Caudill, Tan — *Phys. Rev. D* 112, 044022 (2025)
84
+ > [10.1103/s91m-c2jw](https://link.aps.org/doi/10.1103/s91m-c2jw)
85
+
86
+ **[Documentation](https://deepextractor.readthedocs.io/)** | **[GitHub](https://github.com/tomdooney95/deepextractor)** | **[PyPI](https://pypi.org/project/deepextractor/)**
87
+
88
+ ---
89
+
90
+ ## How it works
91
+
92
+ LIGO strain data contains both astrophysical signals and instrumental glitches — short-duration noise transients that can mimic or obscure gravitational-wave events. DeepExtractor frames glitch reconstruction as a supervised denoising problem:
93
+
94
+ ```
95
+ Input: h(t) = background noise + glitch/signal
96
+ Output: n̂(t) = predicted background
97
+ Result: ĝ(t) = h(t) − n̂(t) ← reconstructed glitch or signal
98
+ ```
99
+
100
+ The model is a **U-Net** operating on STFT spectrograms (magnitude + phase). The default model, `DeepExtractor_257`, uses a 4-level U-Net with feature maps `[64, 128, 256, 512]`.
101
+
102
+ ---
103
+
104
+ ## Installation
105
+
106
+ ```bash
107
+ pip install deepextractor
108
+ ```
109
+
110
+ Requires Python ≥ 3.10 and PyTorch ≥ 2.1. Pretrained weights are downloaded automatically from Hugging Face Hub on first use — no manual step required.
111
+
112
+ **Install from source:**
113
+
114
+ ```bash
115
+ git clone https://github.com/tomdooney95/deepextractor.git
116
+ cd deepextractor
117
+ pip install -e ".[dev]"
118
+ ```
119
+
120
+ ---
121
+
122
+ ## Quickstart
123
+
124
+ ```python
125
+ import numpy as np
126
+ import deepextractor
127
+
128
+ # Load model (bilby noise variant by default)
129
+ model = deepextractor.DeepExtractorModel()
130
+
131
+ # Reconstruct — extract the transient from noisy strain
132
+ noisy_strain = np.random.randn(8192) # replace with real data
133
+ reconstructed = model.reconstruct(noisy_strain) # extracted signal
134
+ background = model.background(noisy_strain) # noise estimate
135
+
136
+ # One-liner convenience function
137
+ reconstructed = deepextractor.reconstruct(noisy_strain)
138
+ ```
139
+
140
+ Two pretrained variants are available:
141
+
142
+ | Variant | Use case |
143
+ |---|---|
144
+ | `bilby_noise` (default) | Simulated LIGO/Virgo noise, injection studies |
145
+ | `real_noise` | Real LIGO O3 detector data |
146
+
147
+ ---
148
+
149
+ ## Bundled dataset
150
+
151
+ The package ships a sample of the [GravitySpy LIGO O3a high-confidence catalogue](https://doi.org/10.5281/zenodo.1476551) at `assets/data_o3a_sample.csv` — 10 H1 examples per glitch class (17 classes, 170 rows total), SNR > 15.
152
+
153
+ ```python
154
+ import pandas as pd
155
+ import importlib.resources as resources
156
+
157
+ with resources.path("deepextractor", "assets") as assets:
158
+ df = pd.read_csv(assets / "data_o3a_sample.csv")
159
+
160
+ print(df["label"].value_counts())
161
+ ```
162
+
163
+ ---
164
+
165
+ ## CLI tools
166
+
167
+ ```bash
168
+ # Train a model
169
+ deepextractor-train --model DeepExtractor_257 --data-dir data/spectrogram_domain/
170
+
171
+ # Generate training data
172
+ deepextractor-generate --output-dir data/ --num-train 250000
173
+
174
+ # Convert time-domain data to spectrograms
175
+ deepextractor-specgen --input-dir data/time_domain/ --output-dir data/spectrogram_domain/
176
+
177
+ # Evaluate a trained model
178
+ deepextractor-evaluate --model DeepExtractor_257 --checkpoint-dir checkpoints/ --data-dir data/
179
+ ```
180
+
181
+ ---
182
+
183
+ ## Citation
184
+
185
+ ```bibtex
186
+ @article{s91m-c2jw,
187
+ title = {Time-domain reconstruction of signals and glitches in gravitational wave data with deep learning},
188
+ author = {Dooney, Tom and Narola, Harsh and Bromuri, Stefano and Curier, R. Lyana and Van Den Broeck, Chris and Caudill, Sarah and Tan, Daniel Stanley},
189
+ journal = {Phys. Rev. D},
190
+ volume = {112},
191
+ issue = {4},
192
+ pages = {044022},
193
+ numpages = {24},
194
+ year = {2025},
195
+ month = {Aug},
196
+ publisher = {American Physical Society},
197
+ doi = {10.1103/s91m-c2jw},
198
+ url = {https://link.aps.org/doi/10.1103/s91m-c2jw}
199
+ }
200
+ ```
@@ -0,0 +1,125 @@
1
+ # DeepExtractor
2
+
3
+ Deep learning framework for reconstructing gravitational-wave signals and glitches from LIGO detector data.
4
+
5
+ Built for LIGO's O3 observing run (Hanford and Livingston detectors). Described in the paper:
6
+
7
+ > **Time-domain reconstruction of signals and glitches in gravitational wave data with deep learning**
8
+ > Dooney, Narola, Bromuri, Curier, Van Den Broeck, Caudill, Tan — *Phys. Rev. D* 112, 044022 (2025)
9
+ > [10.1103/s91m-c2jw](https://link.aps.org/doi/10.1103/s91m-c2jw)
10
+
11
+ **[Documentation](https://deepextractor.readthedocs.io/)** | **[GitHub](https://github.com/tomdooney95/deepextractor)** | **[PyPI](https://pypi.org/project/deepextractor/)**
12
+
13
+ ---
14
+
15
+ ## How it works
16
+
17
+ LIGO strain data contains both astrophysical signals and instrumental glitches — short-duration noise transients that can mimic or obscure gravitational-wave events. DeepExtractor frames glitch reconstruction as a supervised denoising problem:
18
+
19
+ ```
20
+ Input: h(t) = background noise + glitch/signal
21
+ Output: n̂(t) = predicted background
22
+ Result: ĝ(t) = h(t) − n̂(t) ← reconstructed glitch or signal
23
+ ```
24
+
25
+ The model is a **U-Net** operating on STFT spectrograms (magnitude + phase). The default model, `DeepExtractor_257`, uses a 4-level U-Net with feature maps `[64, 128, 256, 512]`.
26
+
27
+ ---
28
+
29
+ ## Installation
30
+
31
+ ```bash
32
+ pip install deepextractor
33
+ ```
34
+
35
+ Requires Python ≥ 3.10 and PyTorch ≥ 2.1. Pretrained weights are downloaded automatically from Hugging Face Hub on first use — no manual step required.
36
+
37
+ **Install from source:**
38
+
39
+ ```bash
40
+ git clone https://github.com/tomdooney95/deepextractor.git
41
+ cd deepextractor
42
+ pip install -e ".[dev]"
43
+ ```
44
+
45
+ ---
46
+
47
+ ## Quickstart
48
+
49
+ ```python
50
+ import numpy as np
51
+ import deepextractor
52
+
53
+ # Load model (bilby noise variant by default)
54
+ model = deepextractor.DeepExtractorModel()
55
+
56
+ # Reconstruct — extract the transient from noisy strain
57
+ noisy_strain = np.random.randn(8192) # replace with real data
58
+ reconstructed = model.reconstruct(noisy_strain) # extracted signal
59
+ background = model.background(noisy_strain) # noise estimate
60
+
61
+ # One-liner convenience function
62
+ reconstructed = deepextractor.reconstruct(noisy_strain)
63
+ ```
64
+
65
+ Two pretrained variants are available:
66
+
67
+ | Variant | Use case |
68
+ |---|---|
69
+ | `bilby_noise` (default) | Simulated LIGO/Virgo noise, injection studies |
70
+ | `real_noise` | Real LIGO O3 detector data |
71
+
72
+ ---
73
+
74
+ ## Bundled dataset
75
+
76
+ The package ships a sample of the [GravitySpy LIGO O3a high-confidence catalogue](https://doi.org/10.5281/zenodo.1476551) at `assets/data_o3a_sample.csv` — 10 H1 examples per glitch class (17 classes, 170 rows total), SNR > 15.
77
+
78
+ ```python
79
+ import pandas as pd
80
+ import importlib.resources as resources
81
+
82
+ with resources.path("deepextractor", "assets") as assets:
83
+ df = pd.read_csv(assets / "data_o3a_sample.csv")
84
+
85
+ print(df["label"].value_counts())
86
+ ```
87
+
88
+ ---
89
+
90
+ ## CLI tools
91
+
92
+ ```bash
93
+ # Train a model
94
+ deepextractor-train --model DeepExtractor_257 --data-dir data/spectrogram_domain/
95
+
96
+ # Generate training data
97
+ deepextractor-generate --output-dir data/ --num-train 250000
98
+
99
+ # Convert time-domain data to spectrograms
100
+ deepextractor-specgen --input-dir data/time_domain/ --output-dir data/spectrogram_domain/
101
+
102
+ # Evaluate a trained model
103
+ deepextractor-evaluate --model DeepExtractor_257 --checkpoint-dir checkpoints/ --data-dir data/
104
+ ```
105
+
106
+ ---
107
+
108
+ ## Citation
109
+
110
+ ```bibtex
111
+ @article{s91m-c2jw,
112
+ title = {Time-domain reconstruction of signals and glitches in gravitational wave data with deep learning},
113
+ author = {Dooney, Tom and Narola, Harsh and Bromuri, Stefano and Curier, R. Lyana and Van Den Broeck, Chris and Caudill, Sarah and Tan, Daniel Stanley},
114
+ journal = {Phys. Rev. D},
115
+ volume = {112},
116
+ issue = {4},
117
+ pages = {044022},
118
+ numpages = {24},
119
+ year = {2025},
120
+ month = {Aug},
121
+ publisher = {American Physical Society},
122
+ doi = {10.1103/s91m-c2jw},
123
+ url = {https://link.aps.org/doi/10.1103/s91m-c2jw}
124
+ }
125
+ ```
@@ -0,0 +1,3 @@
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0e9cdf6b7687679797dbdf33a4939f7880c4a7871bde4acce723202002e2a67
3
+ size 56
@@ -0,0 +1,3 @@
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40577bc6dd914de81866af03dde5e5c52f6defc53232fc6af67c88d993661d9a
3
+ size 48191
@@ -0,0 +1,3 @@
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95d23909fc92fb6006edb6b0f17b29f309b18e12722f2212d464dfc2571d3b48
3
+ size 450480
@@ -0,0 +1,3 @@
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66837c1d272bddcc679b59003e49e5f34f5d9dc21fb1f3570673a884d7eeb5ee
3
+ size 14145257
@@ -0,0 +1,3 @@
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a915bd7e7a654bec1d25b1c7a2978291a4cf4b974fa864bb9133ba0ddd77b860
3
+ size 1605