protqc 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. protqc-0.1.0/LICENSE +21 -0
  2. protqc-0.1.0/PKG-INFO +247 -0
  3. protqc-0.1.0/README.md +205 -0
  4. protqc-0.1.0/protqc/__init__.py +23 -0
  5. protqc-0.1.0/protqc/chat.py +842 -0
  6. protqc-0.1.0/protqc/cli.py +908 -0
  7. protqc-0.1.0/protqc/config.py +100 -0
  8. protqc-0.1.0/protqc/pipeline.py +244 -0
  9. protqc-0.1.0/protqc/progress.py +127 -0
  10. protqc-0.1.0/protqc/report.py +674 -0
  11. protqc-0.1.0/protqc/scoring.py +206 -0
  12. protqc-0.1.0/protqc/tools/__init__.py +44 -0
  13. protqc-0.1.0/protqc/tools/_suppress.py +31 -0
  14. protqc-0.1.0/protqc/tools/fpocket_tool.py +206 -0
  15. protqc-0.1.0/protqc/tools/freesasa_tool.py +116 -0
  16. protqc-0.1.0/protqc/tools/hbond_tool.py +129 -0
  17. protqc-0.1.0/protqc/tools/openmm_tool.py +356 -0
  18. protqc-0.1.0/protqc/tools/ss_stability_tool.py +125 -0
  19. protqc-0.1.0/protqc/tools/structure_scorer.py +170 -0
  20. protqc-0.1.0/protqc/types.py +108 -0
  21. protqc-0.1.0/protqc.egg-info/PKG-INFO +247 -0
  22. protqc-0.1.0/protqc.egg-info/SOURCES.txt +40 -0
  23. protqc-0.1.0/protqc.egg-info/dependency_links.txt +1 -0
  24. protqc-0.1.0/protqc.egg-info/entry_points.txt +2 -0
  25. protqc-0.1.0/protqc.egg-info/requires.txt +28 -0
  26. protqc-0.1.0/protqc.egg-info/top_level.txt +1 -0
  27. protqc-0.1.0/pyproject.toml +63 -0
  28. protqc-0.1.0/setup.cfg +4 -0
  29. protqc-0.1.0/tests/test_chat.py +605 -0
  30. protqc-0.1.0/tests/test_cli.py +274 -0
  31. protqc-0.1.0/tests/test_config.py +108 -0
  32. protqc-0.1.0/tests/test_fpocket_tool.py +300 -0
  33. protqc-0.1.0/tests/test_freesasa_tool.py +102 -0
  34. protqc-0.1.0/tests/test_hbond_tool.py +54 -0
  35. protqc-0.1.0/tests/test_openmm_tool.py +107 -0
  36. protqc-0.1.0/tests/test_pipeline.py +125 -0
  37. protqc-0.1.0/tests/test_progress.py +231 -0
  38. protqc-0.1.0/tests/test_report.py +285 -0
  39. protqc-0.1.0/tests/test_scoring.py +275 -0
  40. protqc-0.1.0/tests/test_ss_stability_tool.py +58 -0
  41. protqc-0.1.0/tests/test_structure_scorer.py +191 -0
  42. protqc-0.1.0/tests/test_types.py +64 -0
protqc-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Omur Koray Guzel
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
protqc-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,247 @@
1
+ Metadata-Version: 2.4
2
+ Name: protqc
3
+ Version: 0.1.0
4
+ Summary: Physics-based verification of AI-designed protein structures
5
+ Author: Ömür Koray Güzel
6
+ License: MIT
7
+ Keywords: protein,design,verification,physics,molecular-dynamics,AI
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Science/Research
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
13
+ Requires-Python: >=3.11
14
+ Description-Content-Type: text/markdown
15
+ License-File: LICENSE
16
+ Requires-Dist: pyyaml>=6.0
17
+ Requires-Dist: numpy>=1.26.0
18
+ Requires-Dist: rich>=13.7.0
19
+ Requires-Dist: jinja2>=3.1.0
20
+ Provides-Extra: chat
21
+ Requires-Dist: litellm>=1.40.0; extra == "chat"
22
+ Provides-Extra: all
23
+ Requires-Dist: litellm>=1.40.0; extra == "all"
24
+ Requires-Dist: fair-esm>=2.0.0; extra == "all"
25
+ Requires-Dist: openmm>=8.1.0; extra == "all"
26
+ Requires-Dist: pdbfixer>=1.9; extra == "all"
27
+ Requires-Dist: mdtraj>=1.10.0; extra == "all"
28
+ Requires-Dist: MDAnalysis>=2.7.0; extra == "all"
29
+ Requires-Dist: freesasa>=2.2.0; extra == "all"
30
+ Requires-Dist: biopython>=1.84; extra == "all"
31
+ Requires-Dist: pandas>=2.2.0; extra == "all"
32
+ Requires-Dist: matplotlib>=3.9.0; extra == "all"
33
+ Requires-Dist: seaborn>=0.13.0; extra == "all"
34
+ Requires-Dist: scipy>=1.13.0; extra == "all"
35
+ Requires-Dist: scikit-learn>=1.5.0; extra == "all"
36
+ Requires-Dist: tqdm>=4.66.0; extra == "all"
37
+ Requires-Dist: requests>=2.32.0; extra == "all"
38
+ Provides-Extra: dev
39
+ Requires-Dist: pytest>=8.0.0; extra == "dev"
40
+ Requires-Dist: pytest-cov>=5.0.0; extra == "dev"
41
+ Dynamic: license-file
42
+
43
+ # ProtQC
44
+
45
+ **Physics-based verification of AI-designed protein structures**
46
+
47
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
48
+ [![Python 3.11+](https://img.shields.io/badge/python-3.11+-blue.svg)](https://www.python.org/downloads/)
49
+
50
+ *Catches structural hallucinations before wet-lab*
51
+
52
+ ---
53
+
54
+ ## Why ProtQC?
55
+
56
+ AI protein design tools (AlphaFold, RFdiffusion, ProteinMPNN, BoltzGen) routinely produce structures with high confidence scores (pLDDT > 90) that still fail experimentally. A protein can look perfect by pLDDT yet harbor internal voids, unstable hydrogen bond networks, or thermodynamic instabilities that only surface in solution.
57
+
58
+ ProtQC combines six physics-based metrics into a composite risk score, catching high-pLDDT hallucinations that no single metric detects on its own.
59
+
60
+ ## Quick Start
61
+
62
+ ```bash
63
+ protqc analyze protein.pdb
64
+ ```
65
+
66
+ ## The 6 Metrics
67
+
68
+ | # | Metric | Source | What It Catches |
69
+ |---|--------|--------|-----------------|
70
+ | 1 | **pLDDT** | Structure prediction | Low confidence regions |
71
+ | 2 | **MD RMSD** | OpenMM | Backbone instability under simulation |
72
+ | 3 | **Cavity Volume** | fpocket | Internal voids and packing defects |
73
+ | 4 | **H-bond Persistence** | MDTraj | Weak hydrogen bond networks |
74
+ | 5 | **SS Preservation** | MDTraj DSSP | Secondary structure loss during MD |
75
+ | 6 | **SASA Polar Ratio** | FreeSASA | Abnormal surface accessibility |
76
+
77
+ Each metric produces a normalized 0–1 sub-score. The composite risk score is a weighted sum, mapped to a verdict:
78
+
79
+ - **PASS** (risk < 0.30) — Design is physically plausible
80
+ - **WARNING** (0.30 ≤ risk < 0.50) — Proceed with caution; review flagged metrics
81
+ - **FAIL** (risk ≥ 0.50) — Design has significant structural issues
82
+
83
+ ### Risk Scoring Weights
84
+
85
+ ```yaml
86
+ risk_weights:
87
+ plddt: 0.12
88
+ md_rmsd: 0.29
89
+ cavity: 0.12
90
+ hbond_persistence: 0.24
91
+ ss_preservation: 0.18
92
+ sasa_ratio: 0.05
93
+ ```
94
+
95
+ ## Validated Results
96
+
97
+ | Protein | Verdict | Risk Score |
98
+ |---------|---------|------------|
99
+ | Ubiquitin (1UBQ) | PASS | 0.257 |
100
+ | GFP (1EMA) | PASS | 0.281 |
101
+ | Alpha-synuclein (1XQ8) | FAIL | 0.555 |
102
+
103
+ ### Performance
104
+
105
+ | Protein | MD Duration | Wall Time | GPU |
106
+ |---------|-------------|-----------|-----|
107
+ | Ubiquitin (76 aa) | 10 ns | ~23 min | RTX 4070 |
108
+ | GFP (238 aa) | 10 ns | ~49 min | RTX 4070 |
109
+
110
+ ## Usage
111
+
112
+ ProtQC provides three usage modes:
113
+
114
+ ### CLI — Single Protein Analysis
115
+
116
+ ```bash
117
+ # Analyze a PDB file
118
+ protqc analyze protein.pdb
119
+
120
+ # Enter a PDB ID — auto-downloads from RCSB
121
+ protqc analyze 1UBQ
122
+
123
+ # Skip MD simulation for quick structural checks
124
+ protqc analyze protein.pdb --skip-md
125
+
126
+ # Set MD simulation length
127
+ protqc analyze protein.pdb --md-duration 10
128
+
129
+ # Use pre-computed MD trajectory
130
+ protqc analyze protein.pdb --trajectory md_output.csv
131
+
132
+ # Generate FastQC-style HTML report
133
+ protqc analyze protein.pdb --html report.html
134
+
135
+ # JSON output
136
+ protqc analyze protein.pdb --format json
137
+ ```
138
+
139
+ ### Interactive Mode
140
+
141
+ ```bash
142
+ # Launch interactive prompt — guides you through analysis
143
+ protqc
144
+ ```
145
+
146
+ ### AI Chat Assistant
147
+
148
+ ```bash
149
+ # Start AI-powered chat for interpreting results
150
+ protqc chat
151
+ ```
152
+
153
+ Chat supports 8 providers via LiteLLM: **OpenAI**, **Anthropic**, **Google**, **DeepSeek**, **OpenRouter**, **Moonshot**, **MiniMax**, **Zhipu**.
154
+
155
+ ## Installation
156
+
157
+ ### Docker (recommended — all platforms)
158
+
159
+ Docker is the easiest way to run ProtQC with all dependencies (OpenMM, CUDA, fpocket, FreeSASA, MDTraj):
160
+
161
+ ```bash
162
+ # Build the image
163
+ docker build -t protqc .
164
+
165
+ # Analyze a protein (GPU-accelerated)
166
+ docker run --gpus all -v $(pwd)/data:/app/data protqc analyze data/benchmark/ubiquitin.pdb
167
+
168
+ # Run with MD simulation
169
+ docker run --gpus all -v $(pwd)/data:/app/data protqc analyze data/benchmark/ubiquitin.pdb --md-duration 10
170
+
171
+ # CPU-only (MD will be slow)
172
+ docker run -v $(pwd)/data:/app/data -e CUDA_VISIBLE_DEVICES="" protqc analyze protein.pdb --skip-md
173
+ ```
174
+
175
+ **Docker Compose:**
176
+
177
+ ```bash
178
+ # GPU-accelerated
179
+ docker compose run protqc analyze data/benchmark/ubiquitin.pdb
180
+
181
+ # CPU-only variant
182
+ docker compose run protqc-cpu analyze data/benchmark/ubiquitin.pdb --skip-md
183
+ ```
184
+
185
+ > **Note:** GPU support requires the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html). Without a GPU, MD simulations still work but are significantly slower (~10–50x). Use `--skip-md` for quick checks without MD.
186
+
187
+ ### Source install (Linux only)
188
+
189
+ ```bash
190
+ conda create -n protqc python=3.11
191
+ conda activate protqc
192
+
193
+ # OpenMM from conda-forge (includes CUDA support)
194
+ conda install -c conda-forge openmm
195
+
196
+ # ProtQC + all dependencies
197
+ pip install -e '.[all]'
198
+ ```
199
+
200
+ > **Platform Support:** Source installation requires Linux. OpenMM and fpocket have limited support on macOS/Windows. Use Docker on non-Linux platforms.
201
+
202
+ ## Configuration
203
+
204
+ All thresholds, weights, and verdict boundaries are defined in [`configs/thresholds.yaml`](configs/thresholds.yaml). Key tunables:
205
+
206
+ - **Intrinsically disordered proteins:** Increase `physics_verifier.md_rmsd_max_angstrom` (e.g., 8.0–10.0) since higher RMSD is expected
207
+ - **Membrane proteins:** Adjust `surface.sasa_polar_ratio_min/max` for transmembrane segments
208
+
209
+ ## Limitations
210
+
211
+ ProtQC is a rapid pre-screening tool, not a substitute for comprehensive computational or experimental validation:
212
+
213
+ - **MD simulation length.** The default 10 ns simulation is a rapid pre-screen that catches catastrophic failures (large RMSD drift, complete unfolding). Subtle instabilities — slow conformational changes, partial unfolding events, aggregation-prone intermediates — may require 100–500 ns simulations for reliable detection (Lindorff-Larsen et al. 2011; Ferruz et al. 2022). Treat a ProtQC PASS as "no obvious red flags," not "experimentally validated."
214
+
215
+ - **Cavity detection.** fpocket was designed for identifying druggable surface binding pockets, not for internal void quality control (Le Guilloux et al. 2009). The suspicious cavity flagging (volume > 800 A^3, druggability < 0.4) is a literature-informed heuristic (Schmidtke et al. 2010), not a validated structural defect detector. Combine with packing density metrics or Voronoi-based tools for higher confidence.
216
+
217
+ - **Risk score weights.** The current weights are expert estimates based on published benchmarks (Dauparas et al. 2022; Ferruz et al. 2022) and will be refined through calibration on larger, more diverse protein sets. Different protein families (membrane proteins, IDPs, repeat proteins) may need substantially different weight profiles.
218
+
219
+ ## Related Tools
220
+
221
+ | Tool | Focus |
222
+ |------|-------|
223
+ | [CHAPERONg](https://github.com/paulshamrat/CHAPERONg) | Automated GROMACS MD analysis |
224
+ | [MolProbity](https://github.com/rlabduke/MolProbity) | Stereochemistry validation |
225
+ | [QMEAN](https://swissmodel.expasy.org/qmean/) | Statistical potential scoring |
226
+ | [VoroMQA](https://bioinformatics.lt/wtsam/voromqa) | Voronoi tessellation quality |
227
+ | [ProSA](https://prosa.services.came.sbg.ac.at/prosa.php) | Statistical analysis of protein structures |
228
+ | [ProteinDJ](https://github.com/PapenfussLab/proteindj) | AI protein design evaluation |
229
+ | [BinderFlow](https://github.com/cryoEM-CNIO/BinderFlow) | Binder design pipeline |
230
+ | [OVO](https://github.com/MSDLLCpapers/ovo) | De novo protein design ecosystem |
231
+
232
+ ## Roadmap
233
+
234
+ **v0.2.0** — Benchmark dataset (25 proteins, Garcia/Hermosilla/Chevalier), Colab MCP integration, weight calibration, replica runs
235
+
236
+ **v0.3.0** — Thermal stability prediction, MultiQC-style batch reports, Nextflow/Snakemake templates, REST API
237
+
238
+ ## License
239
+
240
+ MIT
241
+
242
+ ## Citation
243
+
244
+ ```
245
+ Güzel, Ö.K. (2026). ProtQC: Physics-based verification of AI-designed protein designs.
246
+ github.com/korayguzel/protqc
247
+ ```
protqc-0.1.0/README.md ADDED
@@ -0,0 +1,205 @@
1
+ # ProtQC
2
+
3
+ **Physics-based verification of AI-designed protein structures**
4
+
5
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
6
+ [![Python 3.11+](https://img.shields.io/badge/python-3.11+-blue.svg)](https://www.python.org/downloads/)
7
+
8
+ *Catches structural hallucinations before wet-lab*
9
+
10
+ ---
11
+
12
+ ## Why ProtQC?
13
+
14
+ AI protein design tools (AlphaFold, RFdiffusion, ProteinMPNN, BoltzGen) routinely produce structures with high confidence scores (pLDDT > 90) that still fail experimentally. A protein can look perfect by pLDDT yet harbor internal voids, unstable hydrogen bond networks, or thermodynamic instabilities that only surface in solution.
15
+
16
+ ProtQC combines six physics-based metrics into a composite risk score, catching high-pLDDT hallucinations that no single metric detects on its own.
17
+
18
+ ## Quick Start
19
+
20
+ ```bash
21
+ protqc analyze protein.pdb
22
+ ```
23
+
24
+ ## The 6 Metrics
25
+
26
+ | # | Metric | Source | What It Catches |
27
+ |---|--------|--------|-----------------|
28
+ | 1 | **pLDDT** | Structure prediction | Low confidence regions |
29
+ | 2 | **MD RMSD** | OpenMM | Backbone instability under simulation |
30
+ | 3 | **Cavity Volume** | fpocket | Internal voids and packing defects |
31
+ | 4 | **H-bond Persistence** | MDTraj | Weak hydrogen bond networks |
32
+ | 5 | **SS Preservation** | MDTraj DSSP | Secondary structure loss during MD |
33
+ | 6 | **SASA Polar Ratio** | FreeSASA | Abnormal surface accessibility |
34
+
35
+ Each metric produces a normalized 0–1 sub-score. The composite risk score is a weighted sum, mapped to a verdict:
36
+
37
+ - **PASS** (risk < 0.30) — Design is physically plausible
38
+ - **WARNING** (0.30 ≤ risk < 0.50) — Proceed with caution; review flagged metrics
39
+ - **FAIL** (risk ≥ 0.50) — Design has significant structural issues
40
+
41
+ ### Risk Scoring Weights
42
+
43
+ ```yaml
44
+ risk_weights:
45
+ plddt: 0.12
46
+ md_rmsd: 0.29
47
+ cavity: 0.12
48
+ hbond_persistence: 0.24
49
+ ss_preservation: 0.18
50
+ sasa_ratio: 0.05
51
+ ```
52
+
53
+ ## Validated Results
54
+
55
+ | Protein | Verdict | Risk Score |
56
+ |---------|---------|------------|
57
+ | Ubiquitin (1UBQ) | PASS | 0.257 |
58
+ | GFP (1EMA) | PASS | 0.281 |
59
+ | Alpha-synuclein (1XQ8) | FAIL | 0.555 |
60
+
61
+ ### Performance
62
+
63
+ | Protein | MD Duration | Wall Time | GPU |
64
+ |---------|-------------|-----------|-----|
65
+ | Ubiquitin (76 aa) | 10 ns | ~23 min | RTX 4070 |
66
+ | GFP (238 aa) | 10 ns | ~49 min | RTX 4070 |
67
+
68
+ ## Usage
69
+
70
+ ProtQC provides three usage modes:
71
+
72
+ ### CLI — Single Protein Analysis
73
+
74
+ ```bash
75
+ # Analyze a PDB file
76
+ protqc analyze protein.pdb
77
+
78
+ # Enter a PDB ID — auto-downloads from RCSB
79
+ protqc analyze 1UBQ
80
+
81
+ # Skip MD simulation for quick structural checks
82
+ protqc analyze protein.pdb --skip-md
83
+
84
+ # Set MD simulation length
85
+ protqc analyze protein.pdb --md-duration 10
86
+
87
+ # Use pre-computed MD trajectory
88
+ protqc analyze protein.pdb --trajectory md_output.csv
89
+
90
+ # Generate FastQC-style HTML report
91
+ protqc analyze protein.pdb --html report.html
92
+
93
+ # JSON output
94
+ protqc analyze protein.pdb --format json
95
+ ```
96
+
97
+ ### Interactive Mode
98
+
99
+ ```bash
100
+ # Launch interactive prompt — guides you through analysis
101
+ protqc
102
+ ```
103
+
104
+ ### AI Chat Assistant
105
+
106
+ ```bash
107
+ # Start AI-powered chat for interpreting results
108
+ protqc chat
109
+ ```
110
+
111
+ Chat supports 8 providers via LiteLLM: **OpenAI**, **Anthropic**, **Google**, **DeepSeek**, **OpenRouter**, **Moonshot**, **MiniMax**, **Zhipu**.
112
+
113
+ ## Installation
114
+
115
+ ### Docker (recommended — all platforms)
116
+
117
+ Docker is the easiest way to run ProtQC with all dependencies (OpenMM, CUDA, fpocket, FreeSASA, MDTraj):
118
+
119
+ ```bash
120
+ # Build the image
121
+ docker build -t protqc .
122
+
123
+ # Analyze a protein (GPU-accelerated)
124
+ docker run --gpus all -v $(pwd)/data:/app/data protqc analyze data/benchmark/ubiquitin.pdb
125
+
126
+ # Run with MD simulation
127
+ docker run --gpus all -v $(pwd)/data:/app/data protqc analyze data/benchmark/ubiquitin.pdb --md-duration 10
128
+
129
+ # CPU-only (MD will be slow)
130
+ docker run -v $(pwd)/data:/app/data -e CUDA_VISIBLE_DEVICES="" protqc analyze protein.pdb --skip-md
131
+ ```
132
+
133
+ **Docker Compose:**
134
+
135
+ ```bash
136
+ # GPU-accelerated
137
+ docker compose run protqc analyze data/benchmark/ubiquitin.pdb
138
+
139
+ # CPU-only variant
140
+ docker compose run protqc-cpu analyze data/benchmark/ubiquitin.pdb --skip-md
141
+ ```
142
+
143
+ > **Note:** GPU support requires the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html). Without a GPU, MD simulations still work but are significantly slower (~10–50x). Use `--skip-md` for quick checks without MD.
144
+
145
+ ### Source install (Linux only)
146
+
147
+ ```bash
148
+ conda create -n protqc python=3.11
149
+ conda activate protqc
150
+
151
+ # OpenMM from conda-forge (includes CUDA support)
152
+ conda install -c conda-forge openmm
153
+
154
+ # ProtQC + all dependencies
155
+ pip install -e '.[all]'
156
+ ```
157
+
158
+ > **Platform Support:** Source installation requires Linux. OpenMM and fpocket have limited support on macOS/Windows. Use Docker on non-Linux platforms.
159
+
160
+ ## Configuration
161
+
162
+ All thresholds, weights, and verdict boundaries are defined in [`configs/thresholds.yaml`](configs/thresholds.yaml). Key tunables:
163
+
164
+ - **Intrinsically disordered proteins:** Increase `physics_verifier.md_rmsd_max_angstrom` (e.g., 8.0–10.0) since higher RMSD is expected
165
+ - **Membrane proteins:** Adjust `surface.sasa_polar_ratio_min/max` for transmembrane segments
166
+
167
+ ## Limitations
168
+
169
+ ProtQC is a rapid pre-screening tool, not a substitute for comprehensive computational or experimental validation:
170
+
171
+ - **MD simulation length.** The default 10 ns simulation is a rapid pre-screen that catches catastrophic failures (large RMSD drift, complete unfolding). Subtle instabilities — slow conformational changes, partial unfolding events, aggregation-prone intermediates — may require 100–500 ns simulations for reliable detection (Lindorff-Larsen et al. 2011; Ferruz et al. 2022). Treat a ProtQC PASS as "no obvious red flags," not "experimentally validated."
172
+
173
+ - **Cavity detection.** fpocket was designed for identifying druggable surface binding pockets, not for internal void quality control (Le Guilloux et al. 2009). The suspicious cavity flagging (volume > 800 A^3, druggability < 0.4) is a literature-informed heuristic (Schmidtke et al. 2010), not a validated structural defect detector. Combine with packing density metrics or Voronoi-based tools for higher confidence.
174
+
175
+ - **Risk score weights.** The current weights are expert estimates based on published benchmarks (Dauparas et al. 2022; Ferruz et al. 2022) and will be refined through calibration on larger, more diverse protein sets. Different protein families (membrane proteins, IDPs, repeat proteins) may need substantially different weight profiles.
176
+
177
+ ## Related Tools
178
+
179
+ | Tool | Focus |
180
+ |------|-------|
181
+ | [CHAPERONg](https://github.com/paulshamrat/CHAPERONg) | Automated GROMACS MD analysis |
182
+ | [MolProbity](https://github.com/rlabduke/MolProbity) | Stereochemistry validation |
183
+ | [QMEAN](https://swissmodel.expasy.org/qmean/) | Statistical potential scoring |
184
+ | [VoroMQA](https://bioinformatics.lt/wtsam/voromqa) | Voronoi tessellation quality |
185
+ | [ProSA](https://prosa.services.came.sbg.ac.at/prosa.php) | Statistical analysis of protein structures |
186
+ | [ProteinDJ](https://github.com/PapenfussLab/proteindj) | AI protein design evaluation |
187
+ | [BinderFlow](https://github.com/cryoEM-CNIO/BinderFlow) | Binder design pipeline |
188
+ | [OVO](https://github.com/MSDLLCpapers/ovo) | De novo protein design ecosystem |
189
+
190
+ ## Roadmap
191
+
192
+ **v0.2.0** — Benchmark dataset (25 proteins, Garcia/Hermosilla/Chevalier), Colab MCP integration, weight calibration, replica runs
193
+
194
+ **v0.3.0** — Thermal stability prediction, MultiQC-style batch reports, Nextflow/Snakemake templates, REST API
195
+
196
+ ## License
197
+
198
+ MIT
199
+
200
+ ## Citation
201
+
202
+ ```
203
+ Güzel, Ö.K. (2026). ProtQC: Physics-based verification of AI-designed protein designs.
204
+ github.com/korayguzel/protqc
205
+ ```
@@ -0,0 +1,23 @@
1
+ """
2
+ ProtQC — Physics-based verification of AI-generated protein designs.
3
+
4
+ A multi-agent framework that catches structural hallucinations before wet-lab.
5
+ """
6
+
7
+ __version__ = "0.1.0"
8
+
9
+ from protqc.types import (
10
+ ToolResult,
11
+ VerificationMetrics,
12
+ RiskVerdict,
13
+ PipelineResult,
14
+ )
15
+ from protqc.config import load_config
16
+
17
+ __all__ = [
18
+ "ToolResult",
19
+ "VerificationMetrics",
20
+ "RiskVerdict",
21
+ "PipelineResult",
22
+ "load_config",
23
+ ]