quantumflow-sdk 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- api/main.py +34 -3
- api/models.py +41 -0
- api/routes/algorithm_routes.py +1029 -0
- api/routes/chat_routes.py +565 -0
- api/routes/pipeline_routes.py +578 -0
- db/models.py +357 -0
- quantumflow/algorithms/machine_learning/__init__.py +14 -2
- quantumflow/algorithms/machine_learning/vqe.py +355 -3
- quantumflow/core/__init__.py +10 -1
- quantumflow/core/quantum_compressor.py +379 -1
- quantumflow/integrations/domain_agents.py +617 -0
- quantumflow/pipeline/__init__.py +29 -0
- quantumflow/pipeline/anomaly_detector.py +521 -0
- quantumflow/pipeline/base_pipeline.py +602 -0
- quantumflow/pipeline/checkpoint_manager.py +587 -0
- quantumflow/pipeline/finance/__init__.py +5 -0
- quantumflow/pipeline/finance/portfolio_optimization.py +595 -0
- quantumflow/pipeline/healthcare/__init__.py +5 -0
- quantumflow/pipeline/healthcare/protein_folding.py +994 -0
- quantumflow/pipeline/temporal_memory.py +577 -0
- {quantumflow_sdk-0.2.1.dist-info → quantumflow_sdk-0.4.0.dist-info}/METADATA +3 -3
- {quantumflow_sdk-0.2.1.dist-info → quantumflow_sdk-0.4.0.dist-info}/RECORD +25 -12
- {quantumflow_sdk-0.2.1.dist-info → quantumflow_sdk-0.4.0.dist-info}/WHEEL +0 -0
- {quantumflow_sdk-0.2.1.dist-info → quantumflow_sdk-0.4.0.dist-info}/entry_points.txt +0 -0
- {quantumflow_sdk-0.2.1.dist-info → quantumflow_sdk-0.4.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,994 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Protein Folding Pipeline with VQE.
|
|
3
|
+
|
|
4
|
+
Uses Variational Quantum Eigensolver for energy minimization with:
|
|
5
|
+
- AMBER-like force field (bonds, angles, dihedrals, LJ, electrostatics)
|
|
6
|
+
- PDB structure loading for reference comparison
|
|
7
|
+
- Proper benchmarks: GDT-TS, TM-score, RMSD
|
|
8
|
+
- Secondary structure propensity (helix, sheet, coil)
|
|
9
|
+
- Auto-rollback on folding divergence
|
|
10
|
+
|
|
11
|
+
Benchmarks:
|
|
12
|
+
- CASP (Critical Assessment of protein Structure Prediction)
|
|
13
|
+
- GDT-TS: Global Distance Test (0-100, >50 = good)
|
|
14
|
+
- TM-score: Template Modeling score (>0.5 = same fold, >0.17 = random)
|
|
15
|
+
- RMSD: Root Mean Square Deviation in Angstroms (<2Å = excellent)
|
|
16
|
+
|
|
17
|
+
Example:
|
|
18
|
+
pipeline = ProteinFoldingPipeline(
|
|
19
|
+
name="Hemoglobin Folding",
|
|
20
|
+
sequence="MVLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSH",
|
|
21
|
+
pdb_id="1HHO", # Fetch reference from PDB
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
result = pipeline.run(total_steps=100)
|
|
25
|
+
print(f"GDT-TS: {result.final_state.metrics['gdt_ts']}")
|
|
26
|
+
print(f"TM-score: {result.final_state.metrics['tm_score']}")
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
import math
|
|
30
|
+
import random
|
|
31
|
+
import logging
|
|
32
|
+
from dataclasses import dataclass, field
|
|
33
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
34
|
+
from enum import Enum
|
|
35
|
+
|
|
36
|
+
from quantumflow.pipeline.base_pipeline import (
|
|
37
|
+
BasePipeline,
|
|
38
|
+
PipelineConfig,
|
|
39
|
+
PipelineState,
|
|
40
|
+
)
|
|
41
|
+
from quantumflow.pipeline.anomaly_detector import (
|
|
42
|
+
AnomalyDetector,
|
|
43
|
+
create_energy_spike_detector,
|
|
44
|
+
create_rmsd_detector,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
logger = logging.getLogger(__name__)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
# ============================================================
|
|
51
|
+
# Amino Acid Properties
|
|
52
|
+
# ============================================================
|
|
53
|
+
|
|
54
|
+
class SecondaryStructure(str, Enum):
|
|
55
|
+
"""Secondary structure types."""
|
|
56
|
+
HELIX = "helix" # Alpha helix
|
|
57
|
+
SHEET = "sheet" # Beta sheet
|
|
58
|
+
COIL = "coil" # Random coil/loop
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
# Amino acid properties for force field
|
|
62
|
+
AMINO_ACID_PROPS = {
|
|
63
|
+
# (mass, charge, radius, helix_propensity, sheet_propensity)
|
|
64
|
+
'A': (89.1, 0.0, 1.8, 1.42, 0.83), # Alanine - helix former
|
|
65
|
+
'R': (174.2, 1.0, 2.5, 0.98, 0.93), # Arginine
|
|
66
|
+
'N': (132.1, 0.0, 2.2, 0.67, 0.89), # Asparagine
|
|
67
|
+
'D': (133.1, -1.0, 2.2, 1.01, 0.54), # Aspartic acid
|
|
68
|
+
'C': (121.2, 0.0, 2.0, 0.70, 1.19), # Cysteine
|
|
69
|
+
'E': (147.1, -1.0, 2.3, 1.51, 0.37), # Glutamic acid - helix former
|
|
70
|
+
'Q': (146.2, 0.0, 2.3, 1.11, 1.10), # Glutamine
|
|
71
|
+
'G': (75.1, 0.0, 1.6, 0.57, 0.75), # Glycine - helix breaker
|
|
72
|
+
'H': (155.2, 0.5, 2.3, 1.00, 0.87), # Histidine
|
|
73
|
+
'I': (131.2, 0.0, 2.2, 1.08, 1.60), # Isoleucine - sheet former
|
|
74
|
+
'L': (131.2, 0.0, 2.2, 1.21, 1.30), # Leucine - helix former
|
|
75
|
+
'K': (146.2, 1.0, 2.4, 1.16, 0.74), # Lysine
|
|
76
|
+
'M': (149.2, 0.0, 2.2, 1.45, 1.05), # Methionine - helix former
|
|
77
|
+
'F': (165.2, 0.0, 2.4, 1.13, 1.38), # Phenylalanine
|
|
78
|
+
'P': (115.1, 0.0, 2.0, 0.57, 0.55), # Proline - helix breaker
|
|
79
|
+
'S': (105.1, 0.0, 1.9, 0.77, 0.75), # Serine
|
|
80
|
+
'T': (119.1, 0.0, 2.0, 0.83, 1.19), # Threonine
|
|
81
|
+
'W': (204.2, 0.0, 2.6, 1.08, 1.37), # Tryptophan
|
|
82
|
+
'Y': (181.2, 0.0, 2.5, 0.69, 1.47), # Tyrosine - sheet former
|
|
83
|
+
'V': (117.1, 0.0, 2.1, 1.06, 1.70), # Valine - sheet former
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
# Default for unknown amino acids
|
|
87
|
+
DEFAULT_AA_PROPS = (120.0, 0.0, 2.0, 1.0, 1.0)
|
|
88
|
+
|
|
89
|
+
# Ideal bond lengths and angles (AMBER-like)
|
|
90
|
+
IDEAL_BOND_LENGTH = 3.8 # Å (Cα-Cα distance)
|
|
91
|
+
IDEAL_BOND_ANGLE = 111.0 # degrees (Cα-Cα-Cα)
|
|
92
|
+
IDEAL_PHI = -57.0 # degrees (alpha helix)
|
|
93
|
+
IDEAL_PSI = -47.0 # degrees (alpha helix)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
# ============================================================
|
|
97
|
+
# Configuration and State
|
|
98
|
+
# ============================================================
|
|
99
|
+
|
|
100
|
+
@dataclass
|
|
101
|
+
class ProteinConfig(PipelineConfig):
|
|
102
|
+
"""Configuration for protein folding pipeline."""
|
|
103
|
+
|
|
104
|
+
# Protein settings
|
|
105
|
+
sequence: str = ""
|
|
106
|
+
pdb_id: Optional[str] = None # PDB ID for reference structure
|
|
107
|
+
reference_structure: Optional[List[List[float]]] = None
|
|
108
|
+
|
|
109
|
+
# VQE settings
|
|
110
|
+
n_qubits: int = 8
|
|
111
|
+
ansatz_depth: int = 2
|
|
112
|
+
optimizer: str = "COBYLA"
|
|
113
|
+
max_iterations: int = 100
|
|
114
|
+
|
|
115
|
+
# Force field weights
|
|
116
|
+
bond_weight: float = 100.0
|
|
117
|
+
angle_weight: float = 40.0
|
|
118
|
+
dihedral_weight: float = 1.0
|
|
119
|
+
lj_weight: float = 1.0
|
|
120
|
+
electrostatic_weight: float = 332.0 # Coulomb constant in kcal·Å/mol·e²
|
|
121
|
+
hbond_weight: float = 2.0
|
|
122
|
+
|
|
123
|
+
# Folding thresholds
|
|
124
|
+
max_rmsd: float = 10.0 # Angstroms
|
|
125
|
+
target_gdt_ts: float = 50.0 # Good prediction threshold
|
|
126
|
+
target_tm_score: float = 0.5 # Same fold threshold
|
|
127
|
+
energy_convergence: float = 1e-4
|
|
128
|
+
steric_clash_distance: float = 2.0 # Angstroms
|
|
129
|
+
|
|
130
|
+
# Learning rate
|
|
131
|
+
learning_rate: float = 0.01
|
|
132
|
+
|
|
133
|
+
# Temperature for simulated annealing
|
|
134
|
+
initial_temperature: float = 300.0 # Kelvin
|
|
135
|
+
cooling_rate: float = 0.99
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
@dataclass
|
|
139
|
+
class ProteinState(PipelineState):
|
|
140
|
+
"""State for protein folding pipeline."""
|
|
141
|
+
|
|
142
|
+
# Protein coordinates (Cα atoms)
|
|
143
|
+
coordinates: List[List[float]] = field(default_factory=list)
|
|
144
|
+
|
|
145
|
+
# Backbone angles
|
|
146
|
+
phi_angles: List[float] = field(default_factory=list) # φ angles
|
|
147
|
+
psi_angles: List[float] = field(default_factory=list) # ψ angles
|
|
148
|
+
|
|
149
|
+
# VQE state
|
|
150
|
+
vqe_parameters: List[float] = field(default_factory=list)
|
|
151
|
+
|
|
152
|
+
# Energy components
|
|
153
|
+
energy: float = 0.0
|
|
154
|
+
bond_energy: float = 0.0
|
|
155
|
+
angle_energy: float = 0.0
|
|
156
|
+
dihedral_energy: float = 0.0
|
|
157
|
+
lj_energy: float = 0.0
|
|
158
|
+
electrostatic_energy: float = 0.0
|
|
159
|
+
energy_history: List[float] = field(default_factory=list)
|
|
160
|
+
|
|
161
|
+
# Quality metrics
|
|
162
|
+
rmsd: float = 0.0
|
|
163
|
+
gdt_ts: float = 0.0 # Global Distance Test
|
|
164
|
+
gdt_ha: float = 0.0 # GDT High Accuracy
|
|
165
|
+
tm_score: float = 0.0 # Template Modeling score
|
|
166
|
+
rmsd_history: List[float] = field(default_factory=list)
|
|
167
|
+
|
|
168
|
+
# Secondary structure
|
|
169
|
+
secondary_structure: List[str] = field(default_factory=list)
|
|
170
|
+
helix_content: float = 0.0
|
|
171
|
+
sheet_content: float = 0.0
|
|
172
|
+
|
|
173
|
+
# Steric clashes
|
|
174
|
+
steric_clashes: int = 0
|
|
175
|
+
|
|
176
|
+
# Temperature (for simulated annealing)
|
|
177
|
+
temperature: float = 300.0
|
|
178
|
+
|
|
179
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
180
|
+
"""Serialize to dictionary."""
|
|
181
|
+
base = super().to_dict()
|
|
182
|
+
base.update({
|
|
183
|
+
"coordinates": self.coordinates,
|
|
184
|
+
"phi_angles": self.phi_angles,
|
|
185
|
+
"psi_angles": self.psi_angles,
|
|
186
|
+
"vqe_parameters": self.vqe_parameters,
|
|
187
|
+
"energy": self.energy,
|
|
188
|
+
"bond_energy": self.bond_energy,
|
|
189
|
+
"angle_energy": self.angle_energy,
|
|
190
|
+
"dihedral_energy": self.dihedral_energy,
|
|
191
|
+
"lj_energy": self.lj_energy,
|
|
192
|
+
"electrostatic_energy": self.electrostatic_energy,
|
|
193
|
+
"energy_history": self.energy_history,
|
|
194
|
+
"rmsd": self.rmsd,
|
|
195
|
+
"gdt_ts": self.gdt_ts,
|
|
196
|
+
"gdt_ha": self.gdt_ha,
|
|
197
|
+
"tm_score": self.tm_score,
|
|
198
|
+
"rmsd_history": self.rmsd_history,
|
|
199
|
+
"secondary_structure": self.secondary_structure,
|
|
200
|
+
"helix_content": self.helix_content,
|
|
201
|
+
"sheet_content": self.sheet_content,
|
|
202
|
+
"steric_clashes": self.steric_clashes,
|
|
203
|
+
"temperature": self.temperature,
|
|
204
|
+
})
|
|
205
|
+
return base
|
|
206
|
+
|
|
207
|
+
@classmethod
|
|
208
|
+
def from_dict(cls, data: Dict[str, Any]) -> "ProteinState":
|
|
209
|
+
"""Deserialize from dictionary."""
|
|
210
|
+
state = cls()
|
|
211
|
+
state.step = data.get("step", 0)
|
|
212
|
+
state.data = data.get("data", {})
|
|
213
|
+
state.metrics = data.get("metrics", {})
|
|
214
|
+
state.gradient_history = data.get("gradient_history", [])
|
|
215
|
+
state.coordinates = data.get("coordinates", [])
|
|
216
|
+
state.phi_angles = data.get("phi_angles", [])
|
|
217
|
+
state.psi_angles = data.get("psi_angles", [])
|
|
218
|
+
state.vqe_parameters = data.get("vqe_parameters", [])
|
|
219
|
+
state.energy = data.get("energy", 0.0)
|
|
220
|
+
state.bond_energy = data.get("bond_energy", 0.0)
|
|
221
|
+
state.angle_energy = data.get("angle_energy", 0.0)
|
|
222
|
+
state.dihedral_energy = data.get("dihedral_energy", 0.0)
|
|
223
|
+
state.lj_energy = data.get("lj_energy", 0.0)
|
|
224
|
+
state.electrostatic_energy = data.get("electrostatic_energy", 0.0)
|
|
225
|
+
state.energy_history = data.get("energy_history", [])
|
|
226
|
+
state.rmsd = data.get("rmsd", 0.0)
|
|
227
|
+
state.gdt_ts = data.get("gdt_ts", 0.0)
|
|
228
|
+
state.gdt_ha = data.get("gdt_ha", 0.0)
|
|
229
|
+
state.tm_score = data.get("tm_score", 0.0)
|
|
230
|
+
state.rmsd_history = data.get("rmsd_history", [])
|
|
231
|
+
state.secondary_structure = data.get("secondary_structure", [])
|
|
232
|
+
state.helix_content = data.get("helix_content", 0.0)
|
|
233
|
+
state.sheet_content = data.get("sheet_content", 0.0)
|
|
234
|
+
state.steric_clashes = data.get("steric_clashes", 0)
|
|
235
|
+
state.temperature = data.get("temperature", 300.0)
|
|
236
|
+
return state
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
# ============================================================
|
|
240
|
+
# PDB Utilities
|
|
241
|
+
# ============================================================
|
|
242
|
+
|
|
243
|
+
def fetch_pdb_structure(pdb_id: str) -> Optional[List[List[float]]]:
|
|
244
|
+
"""
|
|
245
|
+
Fetch Cα coordinates from PDB.
|
|
246
|
+
|
|
247
|
+
Args:
|
|
248
|
+
pdb_id: 4-letter PDB ID (e.g., "1HHO")
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
List of [x, y, z] coordinates for Cα atoms
|
|
252
|
+
"""
|
|
253
|
+
try:
|
|
254
|
+
import urllib.request
|
|
255
|
+
|
|
256
|
+
url = f"https://files.rcsb.org/download/{pdb_id.upper()}.pdb"
|
|
257
|
+
|
|
258
|
+
with urllib.request.urlopen(url, timeout=10) as response:
|
|
259
|
+
pdb_data = response.read().decode('utf-8')
|
|
260
|
+
|
|
261
|
+
coordinates = []
|
|
262
|
+
for line in pdb_data.split('\n'):
|
|
263
|
+
if line.startswith('ATOM') and ' CA ' in line:
|
|
264
|
+
try:
|
|
265
|
+
x = float(line[30:38].strip())
|
|
266
|
+
y = float(line[38:46].strip())
|
|
267
|
+
z = float(line[46:54].strip())
|
|
268
|
+
coordinates.append([x, y, z])
|
|
269
|
+
except ValueError:
|
|
270
|
+
continue
|
|
271
|
+
|
|
272
|
+
if coordinates:
|
|
273
|
+
logger.info(f"Fetched {len(coordinates)} Cα atoms from PDB {pdb_id}")
|
|
274
|
+
return coordinates
|
|
275
|
+
else:
|
|
276
|
+
logger.warning(f"No Cα atoms found in PDB {pdb_id}")
|
|
277
|
+
return None
|
|
278
|
+
|
|
279
|
+
except Exception as e:
|
|
280
|
+
logger.warning(f"Failed to fetch PDB {pdb_id}: {e}")
|
|
281
|
+
return None
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def parse_pdb_file(filepath: str) -> Optional[List[List[float]]]:
|
|
285
|
+
"""Parse Cα coordinates from local PDB file."""
|
|
286
|
+
try:
|
|
287
|
+
coordinates = []
|
|
288
|
+
with open(filepath, 'r') as f:
|
|
289
|
+
for line in f:
|
|
290
|
+
if line.startswith('ATOM') and ' CA ' in line:
|
|
291
|
+
try:
|
|
292
|
+
x = float(line[30:38].strip())
|
|
293
|
+
y = float(line[38:46].strip())
|
|
294
|
+
z = float(line[46:54].strip())
|
|
295
|
+
coordinates.append([x, y, z])
|
|
296
|
+
except ValueError:
|
|
297
|
+
continue
|
|
298
|
+
return coordinates if coordinates else None
|
|
299
|
+
except Exception as e:
|
|
300
|
+
logger.warning(f"Failed to parse PDB file: {e}")
|
|
301
|
+
return None
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
# ============================================================
|
|
305
|
+
# Structure Quality Metrics
|
|
306
|
+
# ============================================================
|
|
307
|
+
|
|
308
|
+
def compute_rmsd(coords1: List[List[float]], coords2: List[List[float]]) -> float:
|
|
309
|
+
"""Compute RMSD between two structures after optimal superposition."""
|
|
310
|
+
if len(coords1) != len(coords2) or len(coords1) == 0:
|
|
311
|
+
return float('inf')
|
|
312
|
+
|
|
313
|
+
n = len(coords1)
|
|
314
|
+
|
|
315
|
+
# Center both structures
|
|
316
|
+
c1 = [sum(c[i] for c in coords1) / n for i in range(3)]
|
|
317
|
+
c2 = [sum(c[i] for c in coords2) / n for i in range(3)]
|
|
318
|
+
|
|
319
|
+
centered1 = [[c[i] - c1[i] for i in range(3)] for c in coords1]
|
|
320
|
+
centered2 = [[c[i] - c2[i] for i in range(3)] for c in coords2]
|
|
321
|
+
|
|
322
|
+
# Simple RMSD without rotation (Kabsch would be better)
|
|
323
|
+
sum_sq = 0.0
|
|
324
|
+
for i in range(n):
|
|
325
|
+
for j in range(3):
|
|
326
|
+
sum_sq += (centered1[i][j] - centered2[i][j]) ** 2
|
|
327
|
+
|
|
328
|
+
return math.sqrt(sum_sq / n)
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def compute_gdt_ts(coords1: List[List[float]], coords2: List[List[float]]) -> float:
|
|
332
|
+
"""
|
|
333
|
+
Compute GDT-TS (Global Distance Test - Total Score).
|
|
334
|
+
|
|
335
|
+
GDT-TS = (GDT_P1 + GDT_P2 + GDT_P4 + GDT_P8) / 4
|
|
336
|
+
where GDT_Pn is % of residues within n Å of reference.
|
|
337
|
+
|
|
338
|
+
Returns:
|
|
339
|
+
GDT-TS score (0-100)
|
|
340
|
+
"""
|
|
341
|
+
if len(coords1) != len(coords2) or len(coords1) == 0:
|
|
342
|
+
return 0.0
|
|
343
|
+
|
|
344
|
+
n = len(coords1)
|
|
345
|
+
thresholds = [1.0, 2.0, 4.0, 8.0]
|
|
346
|
+
|
|
347
|
+
counts = [0, 0, 0, 0]
|
|
348
|
+
|
|
349
|
+
for i in range(n):
|
|
350
|
+
dist = math.sqrt(sum((coords1[i][j] - coords2[i][j]) ** 2 for j in range(3)))
|
|
351
|
+
for t_idx, thresh in enumerate(thresholds):
|
|
352
|
+
if dist <= thresh:
|
|
353
|
+
counts[t_idx] += 1
|
|
354
|
+
|
|
355
|
+
gdt_ts = sum(c / n * 100 for c in counts) / 4
|
|
356
|
+
return gdt_ts
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
def compute_gdt_ha(coords1: List[List[float]], coords2: List[List[float]]) -> float:
|
|
360
|
+
"""
|
|
361
|
+
Compute GDT-HA (High Accuracy).
|
|
362
|
+
|
|
363
|
+
Uses thresholds: 0.5, 1.0, 2.0, 4.0 Å
|
|
364
|
+
"""
|
|
365
|
+
if len(coords1) != len(coords2) or len(coords1) == 0:
|
|
366
|
+
return 0.0
|
|
367
|
+
|
|
368
|
+
n = len(coords1)
|
|
369
|
+
thresholds = [0.5, 1.0, 2.0, 4.0]
|
|
370
|
+
|
|
371
|
+
counts = [0, 0, 0, 0]
|
|
372
|
+
|
|
373
|
+
for i in range(n):
|
|
374
|
+
dist = math.sqrt(sum((coords1[i][j] - coords2[i][j]) ** 2 for j in range(3)))
|
|
375
|
+
for t_idx, thresh in enumerate(thresholds):
|
|
376
|
+
if dist <= thresh:
|
|
377
|
+
counts[t_idx] += 1
|
|
378
|
+
|
|
379
|
+
gdt_ha = sum(c / n * 100 for c in counts) / 4
|
|
380
|
+
return gdt_ha
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
def compute_tm_score(coords1: List[List[float]], coords2: List[List[float]]) -> float:
|
|
384
|
+
"""
|
|
385
|
+
Compute TM-score (Template Modeling score).
|
|
386
|
+
|
|
387
|
+
TM-score is length-normalized and less sensitive to local errors.
|
|
388
|
+
- TM-score > 0.5: same fold
|
|
389
|
+
- TM-score > 0.17: better than random
|
|
390
|
+
|
|
391
|
+
Returns:
|
|
392
|
+
TM-score (0-1)
|
|
393
|
+
"""
|
|
394
|
+
if len(coords1) != len(coords2) or len(coords1) == 0:
|
|
395
|
+
return 0.0
|
|
396
|
+
|
|
397
|
+
n = len(coords1)
|
|
398
|
+
|
|
399
|
+
# Length-dependent distance scale
|
|
400
|
+
d0 = 1.24 * (n - 15) ** (1/3) - 1.8 if n > 15 else 0.5
|
|
401
|
+
d0 = max(d0, 0.5)
|
|
402
|
+
|
|
403
|
+
tm_sum = 0.0
|
|
404
|
+
for i in range(n):
|
|
405
|
+
dist = math.sqrt(sum((coords1[i][j] - coords2[i][j]) ** 2 for j in range(3)))
|
|
406
|
+
tm_sum += 1.0 / (1.0 + (dist / d0) ** 2)
|
|
407
|
+
|
|
408
|
+
tm_score = tm_sum / n
|
|
409
|
+
return tm_score
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
# ============================================================
|
|
413
|
+
# Force Field
|
|
414
|
+
# ============================================================
|
|
415
|
+
|
|
416
|
+
class AMBERLikeForceField:
|
|
417
|
+
"""Simplified AMBER-like force field for protein energy calculation."""
|
|
418
|
+
|
|
419
|
+
def __init__(self, config: ProteinConfig):
|
|
420
|
+
self.config = config
|
|
421
|
+
|
|
422
|
+
def compute_total_energy(
|
|
423
|
+
self,
|
|
424
|
+
coords: List[List[float]],
|
|
425
|
+
sequence: str,
|
|
426
|
+
phi_angles: List[float],
|
|
427
|
+
psi_angles: List[float],
|
|
428
|
+
) -> Dict[str, float]:
|
|
429
|
+
"""
|
|
430
|
+
Compute total potential energy.
|
|
431
|
+
|
|
432
|
+
E_total = E_bond + E_angle + E_dihedral + E_LJ + E_electrostatic
|
|
433
|
+
"""
|
|
434
|
+
n = len(coords)
|
|
435
|
+
|
|
436
|
+
# Bond energy (harmonic potential for Cα-Cα)
|
|
437
|
+
e_bond = self._compute_bond_energy(coords)
|
|
438
|
+
|
|
439
|
+
# Angle energy (harmonic for Cα-Cα-Cα)
|
|
440
|
+
e_angle = self._compute_angle_energy(coords)
|
|
441
|
+
|
|
442
|
+
# Dihedral energy (torsional)
|
|
443
|
+
e_dihedral = self._compute_dihedral_energy(phi_angles, psi_angles, sequence)
|
|
444
|
+
|
|
445
|
+
# Lennard-Jones (van der Waals)
|
|
446
|
+
e_lj = self._compute_lj_energy(coords, sequence)
|
|
447
|
+
|
|
448
|
+
# Electrostatic (Coulomb)
|
|
449
|
+
e_elec = self._compute_electrostatic_energy(coords, sequence)
|
|
450
|
+
|
|
451
|
+
# Total weighted energy
|
|
452
|
+
total = (
|
|
453
|
+
self.config.bond_weight * e_bond +
|
|
454
|
+
self.config.angle_weight * e_angle +
|
|
455
|
+
self.config.dihedral_weight * e_dihedral +
|
|
456
|
+
self.config.lj_weight * e_lj +
|
|
457
|
+
self.config.electrostatic_weight * e_elec
|
|
458
|
+
)
|
|
459
|
+
|
|
460
|
+
return {
|
|
461
|
+
"total": total,
|
|
462
|
+
"bond": e_bond,
|
|
463
|
+
"angle": e_angle,
|
|
464
|
+
"dihedral": e_dihedral,
|
|
465
|
+
"lj": e_lj,
|
|
466
|
+
"electrostatic": e_elec,
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
def _compute_bond_energy(self, coords: List[List[float]]) -> float:
|
|
470
|
+
"""Harmonic bond potential: E = k(r - r0)²"""
|
|
471
|
+
energy = 0.0
|
|
472
|
+
k_bond = 200.0 # kcal/mol/Ų
|
|
473
|
+
|
|
474
|
+
for i in range(len(coords) - 1):
|
|
475
|
+
dist = math.sqrt(sum(
|
|
476
|
+
(coords[i+1][j] - coords[i][j]) ** 2 for j in range(3)
|
|
477
|
+
))
|
|
478
|
+
energy += k_bond * (dist - IDEAL_BOND_LENGTH) ** 2
|
|
479
|
+
|
|
480
|
+
return energy
|
|
481
|
+
|
|
482
|
+
def _compute_angle_energy(self, coords: List[List[float]]) -> float:
|
|
483
|
+
"""Harmonic angle potential: E = k(θ - θ0)²"""
|
|
484
|
+
energy = 0.0
|
|
485
|
+
k_angle = 50.0 # kcal/mol/rad²
|
|
486
|
+
|
|
487
|
+
for i in range(len(coords) - 2):
|
|
488
|
+
# Vectors
|
|
489
|
+
v1 = [coords[i][j] - coords[i+1][j] for j in range(3)]
|
|
490
|
+
v2 = [coords[i+2][j] - coords[i+1][j] for j in range(3)]
|
|
491
|
+
|
|
492
|
+
# Angle
|
|
493
|
+
dot = sum(v1[j] * v2[j] for j in range(3))
|
|
494
|
+
mag1 = math.sqrt(sum(v1[j] ** 2 for j in range(3)))
|
|
495
|
+
mag2 = math.sqrt(sum(v2[j] ** 2 for j in range(3)))
|
|
496
|
+
|
|
497
|
+
if mag1 > 0 and mag2 > 0:
|
|
498
|
+
cos_angle = max(-1, min(1, dot / (mag1 * mag2)))
|
|
499
|
+
angle = math.degrees(math.acos(cos_angle))
|
|
500
|
+
energy += k_angle * math.radians(angle - IDEAL_BOND_ANGLE) ** 2
|
|
501
|
+
|
|
502
|
+
return energy
|
|
503
|
+
|
|
504
|
+
def _compute_dihedral_energy(
|
|
505
|
+
self,
|
|
506
|
+
phi_angles: List[float],
|
|
507
|
+
psi_angles: List[float],
|
|
508
|
+
sequence: str,
|
|
509
|
+
) -> float:
|
|
510
|
+
"""Torsional potential based on Ramachandran preferences."""
|
|
511
|
+
energy = 0.0
|
|
512
|
+
|
|
513
|
+
for i, aa in enumerate(sequence):
|
|
514
|
+
if i < len(phi_angles) and i < len(psi_angles):
|
|
515
|
+
phi = phi_angles[i]
|
|
516
|
+
psi = psi_angles[i]
|
|
517
|
+
|
|
518
|
+
props = AMINO_ACID_PROPS.get(aa.upper(), DEFAULT_AA_PROPS)
|
|
519
|
+
helix_prop = props[3]
|
|
520
|
+
sheet_prop = props[4]
|
|
521
|
+
|
|
522
|
+
# Prefer helix or sheet based on propensity
|
|
523
|
+
if helix_prop > sheet_prop:
|
|
524
|
+
# Helix: φ ≈ -57°, ψ ≈ -47°
|
|
525
|
+
energy += (1 - math.cos(math.radians(phi - IDEAL_PHI)))
|
|
526
|
+
energy += (1 - math.cos(math.radians(psi - IDEAL_PSI)))
|
|
527
|
+
else:
|
|
528
|
+
# Sheet: φ ≈ -120°, ψ ≈ +130°
|
|
529
|
+
energy += (1 - math.cos(math.radians(phi - (-120))))
|
|
530
|
+
energy += (1 - math.cos(math.radians(psi - 130)))
|
|
531
|
+
|
|
532
|
+
return energy
|
|
533
|
+
|
|
534
|
+
def _compute_lj_energy(
|
|
535
|
+
self,
|
|
536
|
+
coords: List[List[float]],
|
|
537
|
+
sequence: str,
|
|
538
|
+
) -> float:
|
|
539
|
+
"""Lennard-Jones potential: E = 4ε[(σ/r)¹² - (σ/r)⁶]"""
|
|
540
|
+
energy = 0.0
|
|
541
|
+
n = len(coords)
|
|
542
|
+
|
|
543
|
+
for i in range(n):
|
|
544
|
+
for j in range(i + 3, n): # Skip bonded neighbors
|
|
545
|
+
dist = math.sqrt(sum(
|
|
546
|
+
(coords[j][k] - coords[i][k]) ** 2 for k in range(3)
|
|
547
|
+
))
|
|
548
|
+
|
|
549
|
+
if dist < 0.1:
|
|
550
|
+
dist = 0.1
|
|
551
|
+
|
|
552
|
+
# Get radii
|
|
553
|
+
aa_i = sequence[i].upper() if i < len(sequence) else 'A'
|
|
554
|
+
aa_j = sequence[j].upper() if j < len(sequence) else 'A'
|
|
555
|
+
|
|
556
|
+
r_i = AMINO_ACID_PROPS.get(aa_i, DEFAULT_AA_PROPS)[2]
|
|
557
|
+
r_j = AMINO_ACID_PROPS.get(aa_j, DEFAULT_AA_PROPS)[2]
|
|
558
|
+
|
|
559
|
+
sigma = (r_i + r_j) / 2
|
|
560
|
+
epsilon = 0.1 # kcal/mol
|
|
561
|
+
|
|
562
|
+
r6 = (sigma / dist) ** 6
|
|
563
|
+
r12 = r6 * r6
|
|
564
|
+
|
|
565
|
+
energy += 4 * epsilon * (r12 - r6)
|
|
566
|
+
|
|
567
|
+
return energy
|
|
568
|
+
|
|
569
|
+
def _compute_electrostatic_energy(
|
|
570
|
+
self,
|
|
571
|
+
coords: List[List[float]],
|
|
572
|
+
sequence: str,
|
|
573
|
+
) -> float:
|
|
574
|
+
"""Coulomb electrostatic: E = q1*q2/(ε*r)"""
|
|
575
|
+
energy = 0.0
|
|
576
|
+
n = len(coords)
|
|
577
|
+
dielectric = 4.0 # Effective dielectric constant
|
|
578
|
+
|
|
579
|
+
for i in range(n):
|
|
580
|
+
for j in range(i + 3, n):
|
|
581
|
+
dist = math.sqrt(sum(
|
|
582
|
+
(coords[j][k] - coords[i][k]) ** 2 for k in range(3)
|
|
583
|
+
))
|
|
584
|
+
|
|
585
|
+
if dist < 1.0:
|
|
586
|
+
dist = 1.0
|
|
587
|
+
|
|
588
|
+
aa_i = sequence[i].upper() if i < len(sequence) else 'A'
|
|
589
|
+
aa_j = sequence[j].upper() if j < len(sequence) else 'A'
|
|
590
|
+
|
|
591
|
+
q_i = AMINO_ACID_PROPS.get(aa_i, DEFAULT_AA_PROPS)[1]
|
|
592
|
+
q_j = AMINO_ACID_PROPS.get(aa_j, DEFAULT_AA_PROPS)[1]
|
|
593
|
+
|
|
594
|
+
if q_i != 0 and q_j != 0:
|
|
595
|
+
energy += (q_i * q_j) / (dielectric * dist)
|
|
596
|
+
|
|
597
|
+
return energy
|
|
598
|
+
|
|
599
|
+
|
|
600
|
+
# ============================================================
|
|
601
|
+
# Protein Folding Pipeline
|
|
602
|
+
# ============================================================
|
|
603
|
+
|
|
604
|
+
class ProteinFoldingPipeline(BasePipeline):
|
|
605
|
+
"""
|
|
606
|
+
Pipeline for protein structure prediction using VQE.
|
|
607
|
+
|
|
608
|
+
Features:
|
|
609
|
+
- AMBER-like force field energy minimization
|
|
610
|
+
- PDB reference structure loading
|
|
611
|
+
- Benchmarks: RMSD, GDT-TS, GDT-HA, TM-score
|
|
612
|
+
- Secondary structure prediction
|
|
613
|
+
- Auto-rollback on folding divergence
|
|
614
|
+
"""
|
|
615
|
+
|
|
616
|
+
def __init__(
|
|
617
|
+
self,
|
|
618
|
+
name: str,
|
|
619
|
+
sequence: str,
|
|
620
|
+
pdb_id: Optional[str] = None,
|
|
621
|
+
reference_structure: Optional[List[List[float]]] = None,
|
|
622
|
+
config: Optional[ProteinConfig] = None,
|
|
623
|
+
**kwargs,
|
|
624
|
+
):
|
|
625
|
+
"""
|
|
626
|
+
Initialize protein folding pipeline.
|
|
627
|
+
|
|
628
|
+
Args:
|
|
629
|
+
name: Pipeline name
|
|
630
|
+
sequence: Amino acid sequence (1-letter codes)
|
|
631
|
+
pdb_id: PDB ID for reference structure (fetched online)
|
|
632
|
+
reference_structure: Manual reference coordinates
|
|
633
|
+
config: Pipeline configuration
|
|
634
|
+
"""
|
|
635
|
+
if config is None:
|
|
636
|
+
config = ProteinConfig(
|
|
637
|
+
sequence=sequence,
|
|
638
|
+
pdb_id=pdb_id,
|
|
639
|
+
reference_structure=reference_structure,
|
|
640
|
+
)
|
|
641
|
+
else:
|
|
642
|
+
config.sequence = sequence
|
|
643
|
+
config.pdb_id = pdb_id
|
|
644
|
+
if reference_structure:
|
|
645
|
+
config.reference_structure = reference_structure
|
|
646
|
+
|
|
647
|
+
super().__init__(name=name, config=config, **kwargs)
|
|
648
|
+
|
|
649
|
+
self._sequence = sequence.upper()
|
|
650
|
+
self._reference = reference_structure
|
|
651
|
+
|
|
652
|
+
# Fetch PDB if provided
|
|
653
|
+
if pdb_id and not self._reference:
|
|
654
|
+
self._reference = fetch_pdb_structure(pdb_id)
|
|
655
|
+
if self._reference:
|
|
656
|
+
config.reference_structure = self._reference
|
|
657
|
+
|
|
658
|
+
# Initialize force field
|
|
659
|
+
self._force_field = AMBERLikeForceField(config)
|
|
660
|
+
|
|
661
|
+
self._vqe = None
|
|
662
|
+
|
|
663
|
+
# Setup anomaly detectors
|
|
664
|
+
self._setup_anomaly_detectors()
|
|
665
|
+
|
|
666
|
+
@property
|
|
667
|
+
def pipeline_type(self) -> str:
|
|
668
|
+
return "protein_folding"
|
|
669
|
+
|
|
670
|
+
def _setup_anomaly_detectors(self):
|
|
671
|
+
"""Configure domain-specific anomaly detectors."""
|
|
672
|
+
detector = AnomalyDetector()
|
|
673
|
+
|
|
674
|
+
detector.register_detector(
|
|
675
|
+
"energy_spike",
|
|
676
|
+
create_energy_spike_detector(threshold_multiplier=5.0),
|
|
677
|
+
)
|
|
678
|
+
|
|
679
|
+
config = self.config
|
|
680
|
+
if isinstance(config, ProteinConfig):
|
|
681
|
+
detector.register_detector(
|
|
682
|
+
"rmsd_divergence",
|
|
683
|
+
create_rmsd_detector(max_rmsd=config.max_rmsd),
|
|
684
|
+
)
|
|
685
|
+
|
|
686
|
+
self.set_anomaly_detector(detector)
|
|
687
|
+
|
|
688
|
+
def _get_vqe(self):
|
|
689
|
+
"""Get or create VQE instance."""
|
|
690
|
+
if self._vqe is None:
|
|
691
|
+
try:
|
|
692
|
+
from quantumflow.algorithms.machine_learning.vqe import QuantumVQE
|
|
693
|
+
|
|
694
|
+
config = self.config
|
|
695
|
+
n_qubits = config.n_qubits if isinstance(config, ProteinConfig) else 8
|
|
696
|
+
|
|
697
|
+
self._vqe = QuantumVQE(
|
|
698
|
+
n_qubits=n_qubits,
|
|
699
|
+
backend=self.config.backend,
|
|
700
|
+
)
|
|
701
|
+
except ImportError:
|
|
702
|
+
logger.warning("VQE not available, using gradient descent")
|
|
703
|
+
return self._vqe
|
|
704
|
+
|
|
705
|
+
def initialize(self) -> ProteinState:
|
|
706
|
+
"""Initialize protein folding state with extended chain."""
|
|
707
|
+
state = ProteinState()
|
|
708
|
+
config = self.config
|
|
709
|
+
if not isinstance(config, ProteinConfig):
|
|
710
|
+
config = ProteinConfig()
|
|
711
|
+
|
|
712
|
+
n_residues = len(self._sequence)
|
|
713
|
+
|
|
714
|
+
# Initialize as extended chain (β-strand like)
|
|
715
|
+
state.coordinates = []
|
|
716
|
+
for i in range(n_residues):
|
|
717
|
+
# Extended chain: ~3.8Å between Cα atoms
|
|
718
|
+
x = i * IDEAL_BOND_LENGTH * math.cos(math.radians(180))
|
|
719
|
+
y = (i % 2) * 1.0 # Slight zigzag
|
|
720
|
+
z = i * IDEAL_BOND_LENGTH * math.sin(math.radians(180)) * 0.1
|
|
721
|
+
state.coordinates.append([x, y, z])
|
|
722
|
+
|
|
723
|
+
# Initialize backbone angles (extended: φ=-120°, ψ=130°)
|
|
724
|
+
state.phi_angles = [-120.0] * n_residues
|
|
725
|
+
state.psi_angles = [130.0] * n_residues
|
|
726
|
+
|
|
727
|
+
# Initialize VQE parameters
|
|
728
|
+
n_params = config.n_qubits * config.ansatz_depth * 2
|
|
729
|
+
state.vqe_parameters = [random.uniform(-math.pi, math.pi) for _ in range(n_params)]
|
|
730
|
+
|
|
731
|
+
# Initial temperature
|
|
732
|
+
state.temperature = config.initial_temperature
|
|
733
|
+
|
|
734
|
+
# Compute initial energy
|
|
735
|
+
energies = self._force_field.compute_total_energy(
|
|
736
|
+
state.coordinates, self._sequence,
|
|
737
|
+
state.phi_angles, state.psi_angles
|
|
738
|
+
)
|
|
739
|
+
state.energy = energies["total"]
|
|
740
|
+
state.bond_energy = energies["bond"]
|
|
741
|
+
state.angle_energy = energies["angle"]
|
|
742
|
+
state.dihedral_energy = energies["dihedral"]
|
|
743
|
+
state.lj_energy = energies["lj"]
|
|
744
|
+
state.electrostatic_energy = energies["electrostatic"]
|
|
745
|
+
state.energy_history.append(state.energy)
|
|
746
|
+
|
|
747
|
+
# Compute initial metrics
|
|
748
|
+
self._update_quality_metrics(state)
|
|
749
|
+
|
|
750
|
+
# Predict secondary structure
|
|
751
|
+
state.secondary_structure = self._predict_secondary_structure()
|
|
752
|
+
self._compute_ss_content(state)
|
|
753
|
+
|
|
754
|
+
return state
|
|
755
|
+
|
|
756
|
+
def execute_step(self, step: int, state: ProteinState) -> ProteinState:
|
|
757
|
+
"""Execute one folding step with simulated annealing."""
|
|
758
|
+
config = self.config
|
|
759
|
+
if not isinstance(config, ProteinConfig):
|
|
760
|
+
config = ProteinConfig()
|
|
761
|
+
|
|
762
|
+
# Store old state for Metropolis criterion
|
|
763
|
+
old_coords = [c.copy() for c in state.coordinates]
|
|
764
|
+
old_energy = state.energy
|
|
765
|
+
|
|
766
|
+
# Try VQE optimization
|
|
767
|
+
vqe = self._get_vqe()
|
|
768
|
+
if vqe:
|
|
769
|
+
try:
|
|
770
|
+
hamiltonian = self._create_protein_hamiltonian(state)
|
|
771
|
+
vqe_result = vqe.find_ground_state(
|
|
772
|
+
hamiltonian=hamiltonian,
|
|
773
|
+
initial_params=state.vqe_parameters,
|
|
774
|
+
max_iterations=1,
|
|
775
|
+
)
|
|
776
|
+
state.vqe_parameters = vqe_result.get("optimal_params", state.vqe_parameters)
|
|
777
|
+
except Exception as e:
|
|
778
|
+
logger.debug(f"VQE step skipped: {e}")
|
|
779
|
+
|
|
780
|
+
# Update coordinates using gradient-based minimization
|
|
781
|
+
state = self._minimize_step(state, config)
|
|
782
|
+
|
|
783
|
+
# Apply Metropolis criterion (simulated annealing)
|
|
784
|
+
energies = self._force_field.compute_total_energy(
|
|
785
|
+
state.coordinates, self._sequence,
|
|
786
|
+
state.phi_angles, state.psi_angles
|
|
787
|
+
)
|
|
788
|
+
new_energy = energies["total"]
|
|
789
|
+
|
|
790
|
+
delta_e = new_energy - old_energy
|
|
791
|
+
|
|
792
|
+
if delta_e > 0:
|
|
793
|
+
# Accept with Boltzmann probability
|
|
794
|
+
kT = 0.001987 * state.temperature # kcal/mol
|
|
795
|
+
prob = math.exp(-delta_e / kT) if kT > 0 else 0
|
|
796
|
+
if random.random() > prob:
|
|
797
|
+
# Reject move
|
|
798
|
+
state.coordinates = old_coords
|
|
799
|
+
new_energy = old_energy
|
|
800
|
+
else:
|
|
801
|
+
# Accept unfavorable move
|
|
802
|
+
pass
|
|
803
|
+
|
|
804
|
+
# Update energies
|
|
805
|
+
state.energy = new_energy
|
|
806
|
+
state.bond_energy = energies["bond"]
|
|
807
|
+
state.angle_energy = energies["angle"]
|
|
808
|
+
state.dihedral_energy = energies["dihedral"]
|
|
809
|
+
state.lj_energy = energies["lj"]
|
|
810
|
+
state.electrostatic_energy = energies["electrostatic"]
|
|
811
|
+
state.energy_history.append(state.energy)
|
|
812
|
+
|
|
813
|
+
# Cool down
|
|
814
|
+
state.temperature *= config.cooling_rate
|
|
815
|
+
|
|
816
|
+
# Check steric clashes
|
|
817
|
+
state.steric_clashes = self._count_steric_clashes(
|
|
818
|
+
state.coordinates, config.steric_clash_distance
|
|
819
|
+
)
|
|
820
|
+
|
|
821
|
+
# Update quality metrics
|
|
822
|
+
self._update_quality_metrics(state)
|
|
823
|
+
|
|
824
|
+
# Update secondary structure content
|
|
825
|
+
self._compute_ss_content(state)
|
|
826
|
+
|
|
827
|
+
# Update metrics dict
|
|
828
|
+
state.update_metrics(
|
|
829
|
+
energy=state.energy,
|
|
830
|
+
bond_energy=state.bond_energy,
|
|
831
|
+
angle_energy=state.angle_energy,
|
|
832
|
+
lj_energy=state.lj_energy,
|
|
833
|
+
rmsd=state.rmsd,
|
|
834
|
+
gdt_ts=state.gdt_ts,
|
|
835
|
+
tm_score=state.tm_score,
|
|
836
|
+
helix_content=state.helix_content,
|
|
837
|
+
sheet_content=state.sheet_content,
|
|
838
|
+
steric_clashes=state.steric_clashes,
|
|
839
|
+
temperature=state.temperature,
|
|
840
|
+
)
|
|
841
|
+
|
|
842
|
+
return state
|
|
843
|
+
|
|
844
|
+
def _minimize_step(self, state: ProteinState, config: ProteinConfig) -> ProteinState:
|
|
845
|
+
"""Perform gradient-based minimization step."""
|
|
846
|
+
epsilon = 0.01
|
|
847
|
+
|
|
848
|
+
# Compute numerical gradient for each coordinate
|
|
849
|
+
for i in range(len(state.coordinates)):
|
|
850
|
+
for j in range(3):
|
|
851
|
+
# Forward
|
|
852
|
+
state.coordinates[i][j] += epsilon
|
|
853
|
+
e_plus = self._force_field.compute_total_energy(
|
|
854
|
+
state.coordinates, self._sequence,
|
|
855
|
+
state.phi_angles, state.psi_angles
|
|
856
|
+
)["total"]
|
|
857
|
+
|
|
858
|
+
# Backward
|
|
859
|
+
state.coordinates[i][j] -= 2 * epsilon
|
|
860
|
+
e_minus = self._force_field.compute_total_energy(
|
|
861
|
+
state.coordinates, self._sequence,
|
|
862
|
+
state.phi_angles, state.psi_angles
|
|
863
|
+
)["total"]
|
|
864
|
+
|
|
865
|
+
# Restore
|
|
866
|
+
state.coordinates[i][j] += epsilon
|
|
867
|
+
|
|
868
|
+
# Gradient descent update
|
|
869
|
+
grad = (e_plus - e_minus) / (2 * epsilon)
|
|
870
|
+
state.coordinates[i][j] -= config.learning_rate * grad
|
|
871
|
+
|
|
872
|
+
# Also update backbone angles
|
|
873
|
+
for i in range(len(state.phi_angles)):
|
|
874
|
+
state.phi_angles[i] += random.gauss(0, 5 * (state.temperature / 300))
|
|
875
|
+
state.psi_angles[i] += random.gauss(0, 5 * (state.temperature / 300))
|
|
876
|
+
|
|
877
|
+
# Keep in range
|
|
878
|
+
state.phi_angles[i] = ((state.phi_angles[i] + 180) % 360) - 180
|
|
879
|
+
state.psi_angles[i] = ((state.psi_angles[i] + 180) % 360) - 180
|
|
880
|
+
|
|
881
|
+
return state
|
|
882
|
+
|
|
883
|
+
def _update_quality_metrics(self, state: ProteinState):
|
|
884
|
+
"""Update RMSD, GDT-TS, TM-score."""
|
|
885
|
+
if self._reference and len(self._reference) == len(state.coordinates):
|
|
886
|
+
state.rmsd = compute_rmsd(state.coordinates, self._reference)
|
|
887
|
+
state.gdt_ts = compute_gdt_ts(state.coordinates, self._reference)
|
|
888
|
+
state.gdt_ha = compute_gdt_ha(state.coordinates, self._reference)
|
|
889
|
+
state.tm_score = compute_tm_score(state.coordinates, self._reference)
|
|
890
|
+
else:
|
|
891
|
+
state.rmsd = 0.0
|
|
892
|
+
state.gdt_ts = 0.0
|
|
893
|
+
state.gdt_ha = 0.0
|
|
894
|
+
state.tm_score = 0.0
|
|
895
|
+
|
|
896
|
+
state.rmsd_history.append(state.rmsd)
|
|
897
|
+
|
|
898
|
+
def _predict_secondary_structure(self) -> List[str]:
|
|
899
|
+
"""Predict secondary structure from sequence propensities."""
|
|
900
|
+
ss = []
|
|
901
|
+
for aa in self._sequence:
|
|
902
|
+
props = AMINO_ACID_PROPS.get(aa.upper(), DEFAULT_AA_PROPS)
|
|
903
|
+
helix_prop = props[3]
|
|
904
|
+
sheet_prop = props[4]
|
|
905
|
+
|
|
906
|
+
if helix_prop > 1.1 and helix_prop > sheet_prop:
|
|
907
|
+
ss.append(SecondaryStructure.HELIX.value)
|
|
908
|
+
elif sheet_prop > 1.1 and sheet_prop > helix_prop:
|
|
909
|
+
ss.append(SecondaryStructure.SHEET.value)
|
|
910
|
+
else:
|
|
911
|
+
ss.append(SecondaryStructure.COIL.value)
|
|
912
|
+
|
|
913
|
+
return ss
|
|
914
|
+
|
|
915
|
+
def _compute_ss_content(self, state: ProteinState):
|
|
916
|
+
"""Compute secondary structure content percentages."""
|
|
917
|
+
n = len(state.secondary_structure)
|
|
918
|
+
if n == 0:
|
|
919
|
+
return
|
|
920
|
+
|
|
921
|
+
helix_count = sum(1 for s in state.secondary_structure if s == SecondaryStructure.HELIX.value)
|
|
922
|
+
sheet_count = sum(1 for s in state.secondary_structure if s == SecondaryStructure.SHEET.value)
|
|
923
|
+
|
|
924
|
+
state.helix_content = helix_count / n * 100
|
|
925
|
+
state.sheet_content = sheet_count / n * 100
|
|
926
|
+
|
|
927
|
+
def _count_steric_clashes(
|
|
928
|
+
self, coordinates: List[List[float]], min_distance: float
|
|
929
|
+
) -> int:
|
|
930
|
+
"""Count steric clashes."""
|
|
931
|
+
clashes = 0
|
|
932
|
+
n = len(coordinates)
|
|
933
|
+
|
|
934
|
+
for i in range(n):
|
|
935
|
+
for j in range(i + 3, n): # Skip neighbors
|
|
936
|
+
dist = math.sqrt(sum(
|
|
937
|
+
(coordinates[j][k] - coordinates[i][k]) ** 2 for k in range(3)
|
|
938
|
+
))
|
|
939
|
+
if dist < min_distance:
|
|
940
|
+
clashes += 1
|
|
941
|
+
|
|
942
|
+
return clashes
|
|
943
|
+
|
|
944
|
+
def _create_protein_hamiltonian(self, state: ProteinState) -> Dict[str, Any]:
|
|
945
|
+
"""Create Hamiltonian for VQE based on current structure."""
|
|
946
|
+
energies = self._force_field.compute_total_energy(
|
|
947
|
+
state.coordinates, self._sequence,
|
|
948
|
+
state.phi_angles, state.psi_angles
|
|
949
|
+
)
|
|
950
|
+
|
|
951
|
+
return {
|
|
952
|
+
"type": "protein_folding",
|
|
953
|
+
"total_energy": energies["total"],
|
|
954
|
+
"components": energies,
|
|
955
|
+
"n_residues": len(self._sequence),
|
|
956
|
+
}
|
|
957
|
+
|
|
958
|
+
def get_state_for_checkpoint(self, state: PipelineState) -> Dict[str, Any]:
|
|
959
|
+
"""Get state for checkpoint."""
|
|
960
|
+
if isinstance(state, ProteinState):
|
|
961
|
+
return state.to_dict()
|
|
962
|
+
return state.to_dict()
|
|
963
|
+
|
|
964
|
+
def restore_state_from_checkpoint(self, checkpoint_data: Dict[str, Any]) -> ProteinState:
|
|
965
|
+
"""Restore state from checkpoint."""
|
|
966
|
+
return ProteinState.from_dict(checkpoint_data)
|
|
967
|
+
|
|
968
|
+
def should_stop(self, state: PipelineState) -> bool:
|
|
969
|
+
"""Check convergence criteria."""
|
|
970
|
+
if not isinstance(state, ProteinState):
|
|
971
|
+
return False
|
|
972
|
+
|
|
973
|
+
config = self.config
|
|
974
|
+
if not isinstance(config, ProteinConfig):
|
|
975
|
+
return False
|
|
976
|
+
|
|
977
|
+
# Check energy convergence
|
|
978
|
+
if len(state.energy_history) >= 10:
|
|
979
|
+
recent = state.energy_history[-10:]
|
|
980
|
+
delta = abs(recent[-1] - recent[0])
|
|
981
|
+
if delta < config.energy_convergence:
|
|
982
|
+
logger.info(f"Energy converged: delta={delta}")
|
|
983
|
+
return True
|
|
984
|
+
|
|
985
|
+
# Check if target quality reached
|
|
986
|
+
if state.gdt_ts >= config.target_gdt_ts:
|
|
987
|
+
logger.info(f"Target GDT-TS reached: {state.gdt_ts:.1f}")
|
|
988
|
+
return True
|
|
989
|
+
|
|
990
|
+
if state.tm_score >= config.target_tm_score:
|
|
991
|
+
logger.info(f"Target TM-score reached: {state.tm_score:.3f}")
|
|
992
|
+
return True
|
|
993
|
+
|
|
994
|
+
return False
|