cooper-beta 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cooper_beta-0.1.0/LICENSE +21 -0
- cooper_beta-0.1.0/MANIFEST.in +8 -0
- cooper_beta-0.1.0/PKG-INFO +203 -0
- cooper_beta-0.1.0/README.md +167 -0
- cooper_beta-0.1.0/environment.yml +7 -0
- cooper_beta-0.1.0/pyproject.toml +86 -0
- cooper_beta-0.1.0/scripts/setup_env.sh +166 -0
- cooper_beta-0.1.0/setup.cfg +4 -0
- cooper_beta-0.1.0/src/cooper_beta/__init__.py +66 -0
- cooper_beta-0.1.0/src/cooper_beta/__main__.py +6 -0
- cooper_beta-0.1.0/src/cooper_beta/alignment.py +75 -0
- cooper_beta-0.1.0/src/cooper_beta/analysis_utils.py +254 -0
- cooper_beta-0.1.0/src/cooper_beta/analyzer.py +472 -0
- cooper_beta-0.1.0/src/cooper_beta/bootstrap.py +11 -0
- cooper_beta-0.1.0/src/cooper_beta/cli.py +164 -0
- cooper_beta-0.1.0/src/cooper_beta/conf/__init__.py +1 -0
- cooper_beta-0.1.0/src/cooper_beta/conf/analyzer/default.yaml +104 -0
- cooper_beta-0.1.0/src/cooper_beta/conf/config.yaml +11 -0
- cooper_beta-0.1.0/src/cooper_beta/conf/input/default.yaml +8 -0
- cooper_beta-0.1.0/src/cooper_beta/conf/output/default.yaml +2 -0
- cooper_beta-0.1.0/src/cooper_beta/conf/runtime/default.yaml +10 -0
- cooper_beta-0.1.0/src/cooper_beta/conf/slicer/default.yaml +2 -0
- cooper_beta-0.1.0/src/cooper_beta/config.py +631 -0
- cooper_beta-0.1.0/src/cooper_beta/constants.py +88 -0
- cooper_beta-0.1.0/src/cooper_beta/ellipse.py +235 -0
- cooper_beta-0.1.0/src/cooper_beta/evaluation/__init__.py +10 -0
- cooper_beta-0.1.0/src/cooper_beta/evaluation/__main__.py +6 -0
- cooper_beta-0.1.0/src/cooper_beta/evaluation/app.py +271 -0
- cooper_beta-0.1.0/src/cooper_beta/evaluation/metrics.py +150 -0
- cooper_beta-0.1.0/src/cooper_beta/evaluation/runner.py +171 -0
- cooper_beta-0.1.0/src/cooper_beta/exceptions.py +29 -0
- cooper_beta-0.1.0/src/cooper_beta/loader.py +323 -0
- cooper_beta-0.1.0/src/cooper_beta/models.py +234 -0
- cooper_beta-0.1.0/src/cooper_beta/pipeline.py +301 -0
- cooper_beta-0.1.0/src/cooper_beta/pipeline_workers.py +995 -0
- cooper_beta-0.1.0/src/cooper_beta/prepare_cache.py +106 -0
- cooper_beta-0.1.0/src/cooper_beta/results.py +248 -0
- cooper_beta-0.1.0/src/cooper_beta/runtime.py +51 -0
- cooper_beta-0.1.0/src/cooper_beta/slicer.py +163 -0
- cooper_beta-0.1.0/src/cooper_beta.egg-info/PKG-INFO +203 -0
- cooper_beta-0.1.0/src/cooper_beta.egg-info/SOURCES.txt +50 -0
- cooper_beta-0.1.0/src/cooper_beta.egg-info/dependency_links.txt +1 -0
- cooper_beta-0.1.0/src/cooper_beta.egg-info/entry_points.txt +3 -0
- cooper_beta-0.1.0/src/cooper_beta.egg-info/requires.txt +19 -0
- cooper_beta-0.1.0/src/cooper_beta.egg-info/top_level.txt +1 -0
- cooper_beta-0.1.0/tests/test_ellipse_fit.py +93 -0
- cooper_beta-0.1.0/tests/test_geometric_rules.py +130 -0
- cooper_beta-0.1.0/tests/test_loader.py +41 -0
- cooper_beta-0.1.0/tests/test_pipeline_results.py +807 -0
- cooper_beta-0.1.0/tests/test_prepare_cache.py +96 -0
- cooper_beta-0.1.0/tests/test_runtime_pipeline.py +243 -0
- cooper_beta-0.1.0/tests/test_smoke.py +95 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Shuyu Zhong
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cooper-beta
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Beta-barrel-like chain detector for PDB/mmCIF structures.
|
|
5
|
+
Author: Shuyu Zhong
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/GeraltZeroZhong/Cooper-Beta
|
|
8
|
+
Project-URL: Source, https://github.com/GeraltZeroZhong/Cooper-Beta
|
|
9
|
+
Project-URL: Issues, https://github.com/GeraltZeroZhong/Cooper-Beta/issues
|
|
10
|
+
Keywords: bioinformatics,beta-barrel,protein-structure,pdb,mmcif
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
15
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
16
|
+
Requires-Python: >=3.10
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
License-File: LICENSE
|
|
19
|
+
Requires-Dist: numpy>=1.23
|
|
20
|
+
Requires-Dist: scipy>=1.10
|
|
21
|
+
Requires-Dist: biopython>=1.81
|
|
22
|
+
Requires-Dist: opencv-python-headless>=4.8
|
|
23
|
+
Requires-Dist: hydra-core>=1.3
|
|
24
|
+
Requires-Dist: tqdm>=4.0
|
|
25
|
+
Provides-Extra: eval
|
|
26
|
+
Requires-Dist: pandas>=1.5; extra == "eval"
|
|
27
|
+
Provides-Extra: full
|
|
28
|
+
Requires-Dist: pandas>=1.5; extra == "full"
|
|
29
|
+
Provides-Extra: dev
|
|
30
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
31
|
+
Requires-Dist: ruff>=0.5; extra == "dev"
|
|
32
|
+
Requires-Dist: build>=1.2; extra == "dev"
|
|
33
|
+
Requires-Dist: check-manifest>=0.49; extra == "dev"
|
|
34
|
+
Requires-Dist: twine>=5.0; extra == "dev"
|
|
35
|
+
Dynamic: license-file
|
|
36
|
+
|
|
37
|
+
# Cooper-Beta
|
|
38
|
+
|
|
39
|
+
Cooper-Beta detects beta-barrel-like protein chains in PDB, CIF, and mmCIF
|
|
40
|
+
structures. It parses structures with Biopython, runs DSSP, slices beta-sheet
|
|
41
|
+
C-alpha coordinates, fits ellipses to cross sections, applies geometric
|
|
42
|
+
consistency rules, and returns chain-level results.
|
|
43
|
+
|
|
44
|
+
## Quick Start
|
|
45
|
+
|
|
46
|
+
Cooper-Beta requires Python 3.10 or newer and a DSSP executable (`mkdssp` or
|
|
47
|
+
`dssp`) on `PATH`.
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
pip install cooper-beta
|
|
51
|
+
cooper-beta --check-env
|
|
52
|
+
cooper-beta path/to/structures --out cooper_beta_results.csv
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
If DSSP is installed outside `PATH`, pass its location as a configuration
|
|
56
|
+
override:
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
cooper-beta path/to/structures runtime.dssp_bin_path=/absolute/path/to/mkdssp
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## Installation
|
|
63
|
+
|
|
64
|
+
Install the detector:
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
pip install cooper-beta
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
Install optional tools:
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
pip install "cooper-beta[eval]" # pandas for evaluation helpers
|
|
74
|
+
pip install "cooper-beta[full]" # all optional extras
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
For development from a source checkout:
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
pip install -e ".[full,dev]"
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
The repository also includes `environment.yml` and `scripts/setup_env.sh` for a
|
|
84
|
+
Conda or Mamba environment that installs DSSP from `conda-forge`:
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
bash scripts/setup_env.sh --dev
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## Command Line
|
|
91
|
+
|
|
92
|
+
Run Cooper-Beta on a single file or a directory:
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
cooper-beta path/to/structure.cif --out results.csv
|
|
96
|
+
cooper-beta path/to/structures --workers 8 --prepare-workers 8 --out results.csv
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
Useful options:
|
|
100
|
+
|
|
101
|
+
- `--check-env`: print the Python executable and resolved DSSP executable.
|
|
102
|
+
- `--workers`: number of analysis worker processes.
|
|
103
|
+
- `--prepare-workers`: number of structure-preparation worker processes.
|
|
104
|
+
- `--out`: output CSV path.
|
|
105
|
+
- `--version`: print the installed Cooper-Beta version.
|
|
106
|
+
|
|
107
|
+
Advanced configuration uses Hydra-style `KEY=VALUE` overrides:
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
cooper-beta path/to/structures \
|
|
111
|
+
runtime.dssp_bin_path=/absolute/path/to/mkdssp \
|
|
112
|
+
analyzer.rules.angle.max_gap_deg=160 \
|
|
113
|
+
output.summary_limit=-1
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
## Python API
|
|
117
|
+
|
|
118
|
+
The recommended Python entry point is `detect`, which returns a structured
|
|
119
|
+
`PipelineRunResult`. CSV output is written only when `output` is provided or
|
|
120
|
+
`write_csv=True`.
|
|
121
|
+
|
|
122
|
+
```python
|
|
123
|
+
from cooper_beta import detect
|
|
124
|
+
|
|
125
|
+
run = detect(
|
|
126
|
+
"path/to/structures",
|
|
127
|
+
workers=4,
|
|
128
|
+
output="results.csv",
|
|
129
|
+
overrides={"runtime.dssp_bin_path": "/usr/bin/mkdssp"},
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
print(run.result_counts)
|
|
133
|
+
for row in run.rows:
|
|
134
|
+
print(row.filename, row.chain, row.result, row.reason)
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
Public interfaces:
|
|
138
|
+
|
|
139
|
+
- `cooper_beta.detect(...)`: run detection and return structured results.
|
|
140
|
+
- `cooper_beta.main(...)`: backward-compatible entry point returning row dicts.
|
|
141
|
+
- `cooper_beta.build_config(...)`: build an `AppConfig` from overrides.
|
|
142
|
+
- `cooper_beta.PipelineRunResult`: complete run result with `rows`,
|
|
143
|
+
`input_files`, `output_path`, and `result_counts`.
|
|
144
|
+
- `cooper_beta.DetectionResult`: one chain-level result row.
|
|
145
|
+
- `cooper_beta.ProteinLoader`: parse structures and collect per-chain C-alpha
|
|
146
|
+
and DSSP annotations.
|
|
147
|
+
- `cooper_beta.PCAAligner`, `ProteinSlicer`, and `BarrelAnalyzer`: lower-level
|
|
148
|
+
analysis components for custom workflows.
|
|
149
|
+
|
|
150
|
+
User-facing failures raise Cooper-Beta exceptions such as
|
|
151
|
+
`InputValidationError`, `DsspNotFoundError`, `DsspError`, `StructureParseError`,
|
|
152
|
+
and `ChainNotFoundError`.
|
|
153
|
+
|
|
154
|
+
## Output
|
|
155
|
+
|
|
156
|
+
The result CSV includes one row per chain. Core columns include:
|
|
157
|
+
|
|
158
|
+
- `filename` and `chain`
|
|
159
|
+
- `result`: `BARREL`, `NON_BARREL`, `FILTERED_OUT`, or `ERROR`
|
|
160
|
+
- `result_stage` and `reason`
|
|
161
|
+
- `decision_score`, `decision_basis`, and `decision_threshold`
|
|
162
|
+
- `score_raw` and `score_adjust`
|
|
163
|
+
- `valid_layers`, `scored_layers`, `total_layers`, `junk_layers`, and
|
|
164
|
+
`invalid_layers`
|
|
165
|
+
- `chain_residues`, `sheet_residues`, and `informative_slices`
|
|
166
|
+
|
|
167
|
+
Large directory runs use bounded prepare and analysis batches, and the CLI writes
|
|
168
|
+
the CSV incrementally. The console summary is capped by default; set
|
|
169
|
+
`output.summary_limit=-1` to print every row.
|
|
170
|
+
|
|
171
|
+
## Evaluation Helpers
|
|
172
|
+
|
|
173
|
+
Evaluation utilities are available after installing `cooper-beta[eval]`:
|
|
174
|
+
|
|
175
|
+
```bash
|
|
176
|
+
cooper-beta-eval \
|
|
177
|
+
--positives path/to/positive-structures \
|
|
178
|
+
--negatives path/to/negative-structures \
|
|
179
|
+
--save-dir evaluation-results
|
|
180
|
+
python -m cooper_beta.evaluation \
|
|
181
|
+
--positives path/to/positive-structures \
|
|
182
|
+
--negatives path/to/negative-structures \
|
|
183
|
+
--ablation
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
The GitHub repository also contains helper scripts for local datasets and
|
|
187
|
+
Cooper-Beta CSV outputs. Local structure datasets, manual review notes, and
|
|
188
|
+
research-only helper scripts are intentionally excluded from the package
|
|
189
|
+
artifacts.
|
|
190
|
+
|
|
191
|
+
## Changelog
|
|
192
|
+
|
|
193
|
+
### 0.1.0
|
|
194
|
+
|
|
195
|
+
- Initial public release.
|
|
196
|
+
- CLI and Python API for PDB/CIF/mmCIF beta-barrel-like chain detection.
|
|
197
|
+
- DSSP-backed secondary-structure parsing.
|
|
198
|
+
- Ellipse fitting, PCA axis search, geometric rules, and CSV output.
|
|
199
|
+
- Evaluation helpers and ablation utilities.
|
|
200
|
+
|
|
201
|
+
## License
|
|
202
|
+
|
|
203
|
+
Cooper-Beta is released under the MIT License. See `LICENSE`.
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
# Cooper-Beta
|
|
2
|
+
|
|
3
|
+
Cooper-Beta detects beta-barrel-like protein chains in PDB, CIF, and mmCIF
|
|
4
|
+
structures. It parses structures with Biopython, runs DSSP, slices beta-sheet
|
|
5
|
+
C-alpha coordinates, fits ellipses to cross sections, applies geometric
|
|
6
|
+
consistency rules, and returns chain-level results.
|
|
7
|
+
|
|
8
|
+
## Quick Start
|
|
9
|
+
|
|
10
|
+
Cooper-Beta requires Python 3.10 or newer and a DSSP executable (`mkdssp` or
|
|
11
|
+
`dssp`) on `PATH`.
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
pip install cooper-beta
|
|
15
|
+
cooper-beta --check-env
|
|
16
|
+
cooper-beta path/to/structures --out cooper_beta_results.csv
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
If DSSP is installed outside `PATH`, pass its location as a configuration
|
|
20
|
+
override:
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
cooper-beta path/to/structures runtime.dssp_bin_path=/absolute/path/to/mkdssp
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Installation
|
|
27
|
+
|
|
28
|
+
Install the detector:
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
pip install cooper-beta
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
Install optional tools:
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
pip install "cooper-beta[eval]" # pandas for evaluation helpers
|
|
38
|
+
pip install "cooper-beta[full]" # all optional extras
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
For development from a source checkout:
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
pip install -e ".[full,dev]"
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
The repository also includes `environment.yml` and `scripts/setup_env.sh` for a
|
|
48
|
+
Conda or Mamba environment that installs DSSP from `conda-forge`:
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
bash scripts/setup_env.sh --dev
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## Command Line
|
|
55
|
+
|
|
56
|
+
Run Cooper-Beta on a single file or a directory:
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
cooper-beta path/to/structure.cif --out results.csv
|
|
60
|
+
cooper-beta path/to/structures --workers 8 --prepare-workers 8 --out results.csv
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
Useful options:
|
|
64
|
+
|
|
65
|
+
- `--check-env`: print the Python executable and resolved DSSP executable.
|
|
66
|
+
- `--workers`: number of analysis worker processes.
|
|
67
|
+
- `--prepare-workers`: number of structure-preparation worker processes.
|
|
68
|
+
- `--out`: output CSV path.
|
|
69
|
+
- `--version`: print the installed Cooper-Beta version.
|
|
70
|
+
|
|
71
|
+
Advanced configuration uses Hydra-style `KEY=VALUE` overrides:
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
cooper-beta path/to/structures \
|
|
75
|
+
runtime.dssp_bin_path=/absolute/path/to/mkdssp \
|
|
76
|
+
analyzer.rules.angle.max_gap_deg=160 \
|
|
77
|
+
output.summary_limit=-1
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
## Python API
|
|
81
|
+
|
|
82
|
+
The recommended Python entry point is `detect`, which returns a structured
|
|
83
|
+
`PipelineRunResult`. CSV output is written only when `output` is provided or
|
|
84
|
+
`write_csv=True`.
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
from cooper_beta import detect
|
|
88
|
+
|
|
89
|
+
run = detect(
|
|
90
|
+
"path/to/structures",
|
|
91
|
+
workers=4,
|
|
92
|
+
output="results.csv",
|
|
93
|
+
overrides={"runtime.dssp_bin_path": "/usr/bin/mkdssp"},
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
print(run.result_counts)
|
|
97
|
+
for row in run.rows:
|
|
98
|
+
print(row.filename, row.chain, row.result, row.reason)
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
Public interfaces:
|
|
102
|
+
|
|
103
|
+
- `cooper_beta.detect(...)`: run detection and return structured results.
|
|
104
|
+
- `cooper_beta.main(...)`: backward-compatible entry point returning row dicts.
|
|
105
|
+
- `cooper_beta.build_config(...)`: build an `AppConfig` from overrides.
|
|
106
|
+
- `cooper_beta.PipelineRunResult`: complete run result with `rows`,
|
|
107
|
+
`input_files`, `output_path`, and `result_counts`.
|
|
108
|
+
- `cooper_beta.DetectionResult`: one chain-level result row.
|
|
109
|
+
- `cooper_beta.ProteinLoader`: parse structures and collect per-chain C-alpha
|
|
110
|
+
and DSSP annotations.
|
|
111
|
+
- `cooper_beta.PCAAligner`, `ProteinSlicer`, and `BarrelAnalyzer`: lower-level
|
|
112
|
+
analysis components for custom workflows.
|
|
113
|
+
|
|
114
|
+
User-facing failures raise Cooper-Beta exceptions such as
|
|
115
|
+
`InputValidationError`, `DsspNotFoundError`, `DsspError`, `StructureParseError`,
|
|
116
|
+
and `ChainNotFoundError`.
|
|
117
|
+
|
|
118
|
+
## Output
|
|
119
|
+
|
|
120
|
+
The result CSV includes one row per chain. Core columns include:
|
|
121
|
+
|
|
122
|
+
- `filename` and `chain`
|
|
123
|
+
- `result`: `BARREL`, `NON_BARREL`, `FILTERED_OUT`, or `ERROR`
|
|
124
|
+
- `result_stage` and `reason`
|
|
125
|
+
- `decision_score`, `decision_basis`, and `decision_threshold`
|
|
126
|
+
- `score_raw` and `score_adjust`
|
|
127
|
+
- `valid_layers`, `scored_layers`, `total_layers`, `junk_layers`, and
|
|
128
|
+
`invalid_layers`
|
|
129
|
+
- `chain_residues`, `sheet_residues`, and `informative_slices`
|
|
130
|
+
|
|
131
|
+
Large directory runs use bounded prepare and analysis batches, and the CLI writes
|
|
132
|
+
the CSV incrementally. The console summary is capped by default; set
|
|
133
|
+
`output.summary_limit=-1` to print every row.
|
|
134
|
+
|
|
135
|
+
## Evaluation Helpers
|
|
136
|
+
|
|
137
|
+
Evaluation utilities are available after installing `cooper-beta[eval]`:
|
|
138
|
+
|
|
139
|
+
```bash
|
|
140
|
+
cooper-beta-eval \
|
|
141
|
+
--positives path/to/positive-structures \
|
|
142
|
+
--negatives path/to/negative-structures \
|
|
143
|
+
--save-dir evaluation-results
|
|
144
|
+
python -m cooper_beta.evaluation \
|
|
145
|
+
--positives path/to/positive-structures \
|
|
146
|
+
--negatives path/to/negative-structures \
|
|
147
|
+
--ablation
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
The GitHub repository also contains helper scripts for local datasets and
|
|
151
|
+
Cooper-Beta CSV outputs. Local structure datasets, manual review notes, and
|
|
152
|
+
research-only helper scripts are intentionally excluded from the package
|
|
153
|
+
artifacts.
|
|
154
|
+
|
|
155
|
+
## Changelog
|
|
156
|
+
|
|
157
|
+
### 0.1.0
|
|
158
|
+
|
|
159
|
+
- Initial public release.
|
|
160
|
+
- CLI and Python API for PDB/CIF/mmCIF beta-barrel-like chain detection.
|
|
161
|
+
- DSSP-backed secondary-structure parsing.
|
|
162
|
+
- Ellipse fitting, PCA axis search, geometric rules, and CSV output.
|
|
163
|
+
- Evaluation helpers and ablation utilities.
|
|
164
|
+
|
|
165
|
+
## License
|
|
166
|
+
|
|
167
|
+
Cooper-Beta is released under the MIT License. See `LICENSE`.
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=77", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "cooper-beta"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Beta-barrel-like chain detector for PDB/mmCIF structures."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = "MIT"
|
|
12
|
+
license-files = ["LICENSE"]
|
|
13
|
+
authors = [
|
|
14
|
+
{ name = "Shuyu Zhong" },
|
|
15
|
+
]
|
|
16
|
+
keywords = ["bioinformatics", "beta-barrel", "protein-structure", "pdb", "mmcif"]
|
|
17
|
+
classifiers = [
|
|
18
|
+
"Development Status :: 3 - Alpha",
|
|
19
|
+
"Intended Audience :: Science/Research",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
22
|
+
"Topic :: Scientific/Engineering :: Bio-Informatics",
|
|
23
|
+
]
|
|
24
|
+
dependencies = [
|
|
25
|
+
"numpy>=1.23",
|
|
26
|
+
"scipy>=1.10",
|
|
27
|
+
"biopython>=1.81",
|
|
28
|
+
"opencv-python-headless>=4.8",
|
|
29
|
+
"hydra-core>=1.3",
|
|
30
|
+
"tqdm>=4.0",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
[project.optional-dependencies]
|
|
34
|
+
eval = [
|
|
35
|
+
"pandas>=1.5",
|
|
36
|
+
]
|
|
37
|
+
full = [
|
|
38
|
+
"pandas>=1.5",
|
|
39
|
+
]
|
|
40
|
+
dev = [
|
|
41
|
+
"pytest>=7.0",
|
|
42
|
+
"ruff>=0.5",
|
|
43
|
+
"build>=1.2",
|
|
44
|
+
"check-manifest>=0.49",
|
|
45
|
+
"twine>=5.0",
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
[project.urls]
|
|
49
|
+
Homepage = "https://github.com/GeraltZeroZhong/Cooper-Beta"
|
|
50
|
+
Source = "https://github.com/GeraltZeroZhong/Cooper-Beta"
|
|
51
|
+
Issues = "https://github.com/GeraltZeroZhong/Cooper-Beta/issues"
|
|
52
|
+
|
|
53
|
+
[project.scripts]
|
|
54
|
+
cooper-beta = "cooper_beta.cli:main"
|
|
55
|
+
cooper-beta-eval = "cooper_beta.evaluation.app:main"
|
|
56
|
+
|
|
57
|
+
[tool.setuptools]
|
|
58
|
+
package-dir = {"" = "src"}
|
|
59
|
+
|
|
60
|
+
[tool.setuptools.packages.find]
|
|
61
|
+
where = ["src"]
|
|
62
|
+
include = ["cooper_beta*"]
|
|
63
|
+
|
|
64
|
+
[tool.setuptools.package-data]
|
|
65
|
+
cooper_beta = ["conf/**/*.yaml"]
|
|
66
|
+
|
|
67
|
+
[tool.ruff]
|
|
68
|
+
line-length = 100
|
|
69
|
+
target-version = "py310"
|
|
70
|
+
extend-exclude = ["data/scripts"]
|
|
71
|
+
|
|
72
|
+
[tool.ruff.lint]
|
|
73
|
+
select = ["E", "F", "I", "UP", "B"]
|
|
74
|
+
ignore = ["E501"]
|
|
75
|
+
|
|
76
|
+
[tool.pytest.ini_options]
|
|
77
|
+
testpaths = ["tests"]
|
|
78
|
+
|
|
79
|
+
[tool.check-manifest]
|
|
80
|
+
ignore = [
|
|
81
|
+
".github/**",
|
|
82
|
+
".gitignore",
|
|
83
|
+
"data/scripts/**",
|
|
84
|
+
"main.py",
|
|
85
|
+
"scripts/*.py",
|
|
86
|
+
]
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
set -euo pipefail
|
|
3
|
+
|
|
4
|
+
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
|
5
|
+
ENV_FILE="$ROOT_DIR/environment.yml"
|
|
6
|
+
ENV_NAME="cooperbeta"
|
|
7
|
+
WITH_DEV=0
|
|
8
|
+
DRY_RUN=0
|
|
9
|
+
|
|
10
|
+
usage() {
|
|
11
|
+
cat <<'EOF'
|
|
12
|
+
Usage:
|
|
13
|
+
bash scripts/setup_env.sh [--name ENV_NAME] [--dev] [--dry-run]
|
|
14
|
+
|
|
15
|
+
What it does:
|
|
16
|
+
1. Prefer mamba/micromamba/conda to create an environment from environment.yml
|
|
17
|
+
so DSSP is installed automatically.
|
|
18
|
+
2. Install this repository in editable mode inside that environment.
|
|
19
|
+
3. If no conda-like tool is available but apt-get exists, fall back to:
|
|
20
|
+
- install DSSP from apt
|
|
21
|
+
- create .venv
|
|
22
|
+
- pip install the project
|
|
23
|
+
|
|
24
|
+
Options:
|
|
25
|
+
--name ENV_NAME Override the conda environment name (default: cooperbeta)
|
|
26
|
+
--dev Also install the project's dev dependencies
|
|
27
|
+
--dry-run Print commands without executing them
|
|
28
|
+
-h, --help Show this help message
|
|
29
|
+
EOF
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
have_cmd() {
|
|
33
|
+
command -v "$1" >/dev/null 2>&1
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
run_cmd() {
|
|
37
|
+
printf '+'
|
|
38
|
+
printf ' %q' "$@"
|
|
39
|
+
printf '\n'
|
|
40
|
+
if [[ "$DRY_RUN" -eq 1 ]]; then
|
|
41
|
+
return 0
|
|
42
|
+
fi
|
|
43
|
+
"$@"
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
choose_conda_frontend() {
|
|
47
|
+
local candidate
|
|
48
|
+
for candidate in mamba micromamba conda; do
|
|
49
|
+
if have_cmd "$candidate"; then
|
|
50
|
+
echo "$candidate"
|
|
51
|
+
return 0
|
|
52
|
+
fi
|
|
53
|
+
done
|
|
54
|
+
return 1
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
conda_env_exists() {
|
|
58
|
+
local frontend="$1"
|
|
59
|
+
local env_name="$2"
|
|
60
|
+
"$frontend" env list 2>/dev/null | awk 'NF && $1 !~ /^#/ { print $1 }' | grep -Fxq "$env_name"
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
install_spec() {
|
|
64
|
+
if [[ "$WITH_DEV" -eq 1 ]]; then
|
|
65
|
+
echo "$ROOT_DIR[full,dev]"
|
|
66
|
+
else
|
|
67
|
+
echo "$ROOT_DIR[full]"
|
|
68
|
+
fi
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
setup_with_conda() {
|
|
72
|
+
local frontend="$1"
|
|
73
|
+
|
|
74
|
+
if conda_env_exists "$frontend" "$ENV_NAME"; then
|
|
75
|
+
run_cmd "$frontend" env update --yes --name "$ENV_NAME" --file "$ENV_FILE" --prune
|
|
76
|
+
else
|
|
77
|
+
run_cmd "$frontend" env create --yes --name "$ENV_NAME" --file "$ENV_FILE"
|
|
78
|
+
fi
|
|
79
|
+
|
|
80
|
+
run_cmd "$frontend" run -n "$ENV_NAME" python -m pip install -e "$(install_spec)"
|
|
81
|
+
|
|
82
|
+
echo
|
|
83
|
+
echo "Environment is ready."
|
|
84
|
+
if [[ "$frontend" == "micromamba" ]]; then
|
|
85
|
+
echo "Activate it with: micromamba activate $ENV_NAME"
|
|
86
|
+
else
|
|
87
|
+
echo "Activate it with: conda activate $ENV_NAME"
|
|
88
|
+
fi
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
setup_with_apt_venv() {
|
|
92
|
+
local venv_dir="$ROOT_DIR/.venv"
|
|
93
|
+
|
|
94
|
+
if [[ "$EUID" -ne 0 ]]; then
|
|
95
|
+
run_cmd sudo apt-get update
|
|
96
|
+
run_cmd sudo apt-get install -y dssp
|
|
97
|
+
else
|
|
98
|
+
run_cmd apt-get update
|
|
99
|
+
run_cmd apt-get install -y dssp
|
|
100
|
+
fi
|
|
101
|
+
|
|
102
|
+
run_cmd python3 -m venv "$venv_dir"
|
|
103
|
+
run_cmd "$venv_dir/bin/python" -m pip install --upgrade pip setuptools wheel
|
|
104
|
+
run_cmd "$venv_dir/bin/python" -m pip install -e "$(install_spec)"
|
|
105
|
+
|
|
106
|
+
echo
|
|
107
|
+
echo "Environment is ready."
|
|
108
|
+
echo "Activate it with: source $venv_dir/bin/activate"
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
while [[ $# -gt 0 ]]; do
|
|
112
|
+
case "$1" in
|
|
113
|
+
--name)
|
|
114
|
+
ENV_NAME="${2:?missing value for --name}"
|
|
115
|
+
shift 2
|
|
116
|
+
;;
|
|
117
|
+
--dev)
|
|
118
|
+
WITH_DEV=1
|
|
119
|
+
shift
|
|
120
|
+
;;
|
|
121
|
+
--dry-run)
|
|
122
|
+
DRY_RUN=1
|
|
123
|
+
shift
|
|
124
|
+
;;
|
|
125
|
+
-h|--help)
|
|
126
|
+
usage
|
|
127
|
+
exit 0
|
|
128
|
+
;;
|
|
129
|
+
*)
|
|
130
|
+
echo "Unknown argument: $1" >&2
|
|
131
|
+
usage >&2
|
|
132
|
+
exit 2
|
|
133
|
+
;;
|
|
134
|
+
esac
|
|
135
|
+
done
|
|
136
|
+
|
|
137
|
+
if [[ ! -f "$ENV_FILE" ]]; then
|
|
138
|
+
echo "Missing environment file: $ENV_FILE" >&2
|
|
139
|
+
exit 1
|
|
140
|
+
fi
|
|
141
|
+
|
|
142
|
+
if frontend="$(choose_conda_frontend)"; then
|
|
143
|
+
setup_with_conda "$frontend"
|
|
144
|
+
exit 0
|
|
145
|
+
fi
|
|
146
|
+
|
|
147
|
+
if have_cmd apt-get; then
|
|
148
|
+
setup_with_apt_venv
|
|
149
|
+
exit 0
|
|
150
|
+
fi
|
|
151
|
+
|
|
152
|
+
cat >&2 <<'EOF'
|
|
153
|
+
No supported installer was found.
|
|
154
|
+
|
|
155
|
+
Recommended options:
|
|
156
|
+
1. Install mamba/conda, then rerun: bash scripts/setup_env.sh
|
|
157
|
+
2. Install DSSP manually and then run:
|
|
158
|
+
python3 -m venv .venv
|
|
159
|
+
source .venv/bin/activate
|
|
160
|
+
pip install -e ".[full]"
|
|
161
|
+
|
|
162
|
+
You can also point Cooper-Beta to a custom DSSP binary with
|
|
163
|
+
`runtime.dssp_bin_path=/absolute/path/to/mkdssp` (Hydra)
|
|
164
|
+
or `cooper_beta.config.Config.DSSP_BIN_PATH` (legacy Python API).
|
|
165
|
+
EOF
|
|
166
|
+
exit 1
|