cavefiller 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cavefiller/__init__.py +3 -0
- cavefiller/cavity_finder.py +114 -0
- cavefiller/cavity_selector.py +120 -0
- cavefiller/cli.py +145 -0
- cavefiller/water_filler.py +603 -0
- cavefiller-0.2.0.dist-info/METADATA +225 -0
- cavefiller-0.2.0.dist-info/RECORD +11 -0
- cavefiller-0.2.0.dist-info/WHEEL +5 -0
- cavefiller-0.2.0.dist-info/entry_points.txt +2 -0
- cavefiller-0.2.0.dist-info/licenses/LICENSE +201 -0
- cavefiller-0.2.0.dist-info/top_level.txt +1 -0
cavefiller/__init__.py
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
"""Cavity detection using pyKVFinder."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from typing import List, Dict, Tuple, Any
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
# Grid spacing for cavity detection (in Angstroms)
|
|
8
|
+
DEFAULT_GRID_STEP = 0.6
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def find_cavities(
|
|
12
|
+
protein_file: str,
|
|
13
|
+
probe_in: float = 1.4,
|
|
14
|
+
probe_out: float = 4.0,
|
|
15
|
+
volume_cutoff: float = 5.0,
|
|
16
|
+
output_dir: str = "./output",
|
|
17
|
+
) -> Tuple[List[Dict[str, Any]], Any]:
|
|
18
|
+
"""
|
|
19
|
+
Find cavities in a protein structure using pyKVFinder.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
protein_file: Path to the protein PDB file
|
|
23
|
+
probe_in: Probe In radius for cavity detection (Å)
|
|
24
|
+
probe_out: Probe Out radius for cavity detection (Å)
|
|
25
|
+
volume_cutoff: Minimum cavity volume to consider (Ų)
|
|
26
|
+
output_dir: Directory to save cavity detection results
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
Tuple of (list of cavity dictionaries, cavity_data object)
|
|
30
|
+
"""
|
|
31
|
+
try:
|
|
32
|
+
import pyKVFinder
|
|
33
|
+
except ImportError:
|
|
34
|
+
raise ImportError(
|
|
35
|
+
"pyKVFinder is not installed. Please install it with: pip install pykvfinder"
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
# Run KVFinder to detect cavities
|
|
39
|
+
cavity_data = pyKVFinder.run_workflow(
|
|
40
|
+
input=protein_file,
|
|
41
|
+
probe_in=probe_in,
|
|
42
|
+
probe_out=probe_out,
|
|
43
|
+
step=DEFAULT_GRID_STEP, # Grid step size
|
|
44
|
+
volume_cutoff=volume_cutoff,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
# Extract cavity information
|
|
48
|
+
cavities = []
|
|
49
|
+
|
|
50
|
+
# Get cavity volumes and areas
|
|
51
|
+
if hasattr(cavity_data, 'volume') and cavity_data.volume is not None:
|
|
52
|
+
volumes = cavity_data.volume
|
|
53
|
+
areas = cavity_data.area if hasattr(cavity_data, 'area') else {}
|
|
54
|
+
|
|
55
|
+
# Create mapping from string IDs to integer IDs
|
|
56
|
+
# KVFinder uses string IDs like 'KAA', 'KAB', etc., but the grid uses integers
|
|
57
|
+
cavity_id_map = {}
|
|
58
|
+
for idx, (cavity_str_id, volume) in enumerate(volumes.items(), start=1):
|
|
59
|
+
cavity_id_map[cavity_str_id] = idx
|
|
60
|
+
|
|
61
|
+
# Process each cavity
|
|
62
|
+
for cavity_str_id, volume in volumes.items():
|
|
63
|
+
if volume >= volume_cutoff:
|
|
64
|
+
cavity_info = {
|
|
65
|
+
"id": cavity_id_map[cavity_str_id],
|
|
66
|
+
"string_id": cavity_str_id,
|
|
67
|
+
"volume": volume,
|
|
68
|
+
"area": areas.get(cavity_str_id, 0.0) if areas else 0.0,
|
|
69
|
+
}
|
|
70
|
+
cavities.append(cavity_info)
|
|
71
|
+
|
|
72
|
+
# Sort cavities by volume (largest first)
|
|
73
|
+
cavities.sort(key=lambda x: x["volume"], reverse=True)
|
|
74
|
+
|
|
75
|
+
return cavities, cavity_data
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def get_cavity_grid_points(cavity_data: Any, cavity_id: int) -> np.ndarray:
|
|
79
|
+
"""
|
|
80
|
+
Get the grid points that belong to a specific cavity.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
cavity_data: The cavity data object from pyKVFinder
|
|
84
|
+
cavity_id: Integer ID of the cavity (1-indexed)
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
Array of (x, y, z) coordinates for the cavity grid points
|
|
88
|
+
"""
|
|
89
|
+
if not hasattr(cavity_data, 'cavities') or cavity_data.cavities is None:
|
|
90
|
+
return np.array([])
|
|
91
|
+
|
|
92
|
+
# Get cavity grid
|
|
93
|
+
cavity_grid = cavity_data.cavities
|
|
94
|
+
|
|
95
|
+
# Find all points belonging to this cavity
|
|
96
|
+
# Note: KVFinder uses 1-indexed cavity IDs in the grid
|
|
97
|
+
points = np.argwhere(cavity_grid == cavity_id)
|
|
98
|
+
|
|
99
|
+
# Convert grid indices to real coordinates if origin metadata is available.
|
|
100
|
+
# Different pyKVFinder versions expose metadata on either cavity_data or cavity_data.surface.
|
|
101
|
+
step = getattr(cavity_data, "step", DEFAULT_GRID_STEP)
|
|
102
|
+
origin = None
|
|
103
|
+
|
|
104
|
+
if hasattr(cavity_data, "surface") and hasattr(cavity_data.surface, "P1"):
|
|
105
|
+
origin = np.array([cavity_data.surface.P1[i] for i in range(3)], dtype=float)
|
|
106
|
+
elif hasattr(cavity_data, "P1"):
|
|
107
|
+
origin = np.array([cavity_data.P1[i] for i in range(3)], dtype=float)
|
|
108
|
+
|
|
109
|
+
points = points.astype(float)
|
|
110
|
+
if origin is not None:
|
|
111
|
+
return origin + points * float(step)
|
|
112
|
+
|
|
113
|
+
# Fallback: return index-space points; downstream code will align to protein frame.
|
|
114
|
+
return points
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""Interactive cavity selection interface."""
|
|
2
|
+
|
|
3
|
+
from typing import List, Dict, Any, Optional
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def select_cavities(cavities: List[Dict[str, Any]], prompt_for_waters: bool = True) -> tuple:
|
|
8
|
+
"""
|
|
9
|
+
Allow user to select which cavities to fill and how many waters per cavity.
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
cavities: List of cavity dictionaries with id, volume, and area
|
|
13
|
+
prompt_for_waters: Whether to prompt for number of waters per cavity
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
Tuple of (selected cavity dictionaries, waters_per_cavity dict)
|
|
17
|
+
"""
|
|
18
|
+
print("\n" + "=" * 60)
|
|
19
|
+
print("Available Cavities:")
|
|
20
|
+
print("=" * 60)
|
|
21
|
+
print(f"{'ID':<6} {'Volume (Ų)':<15} {'Area (ų)':<15}")
|
|
22
|
+
print("-" * 60)
|
|
23
|
+
|
|
24
|
+
for cavity in cavities:
|
|
25
|
+
print(
|
|
26
|
+
f"{cavity['id']:<6} {cavity['volume']:<15.2f} {cavity['area']:<15.2f}"
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
print("=" * 60)
|
|
30
|
+
print("\nEnter cavity IDs to fill (comma-separated, e.g., '1,2,3')")
|
|
31
|
+
print("Or enter 'all' to select all cavities")
|
|
32
|
+
print("Or enter 'q' to quit")
|
|
33
|
+
|
|
34
|
+
selected_cavities = None
|
|
35
|
+
while selected_cavities is None:
|
|
36
|
+
try:
|
|
37
|
+
user_input = input("\nYour selection: ").strip().lower()
|
|
38
|
+
|
|
39
|
+
if user_input == 'q':
|
|
40
|
+
print("Selection cancelled.")
|
|
41
|
+
return [], {}
|
|
42
|
+
|
|
43
|
+
if user_input == 'all':
|
|
44
|
+
print(f"Selected all {len(cavities)} cavities")
|
|
45
|
+
selected_cavities = cavities
|
|
46
|
+
else:
|
|
47
|
+
# Parse comma-separated IDs
|
|
48
|
+
selected_ids = [int(x.strip()) for x in user_input.split(",")]
|
|
49
|
+
|
|
50
|
+
# Validate IDs
|
|
51
|
+
valid_ids = {c["id"] for c in cavities}
|
|
52
|
+
invalid_ids = [sid for sid in selected_ids if sid not in valid_ids]
|
|
53
|
+
|
|
54
|
+
if invalid_ids:
|
|
55
|
+
print(f"Invalid cavity IDs: {invalid_ids}")
|
|
56
|
+
print(f"Valid IDs are: {sorted(valid_ids)}")
|
|
57
|
+
continue
|
|
58
|
+
|
|
59
|
+
# Get selected cavities
|
|
60
|
+
selected_cavities = [c for c in cavities if c["id"] in selected_ids]
|
|
61
|
+
|
|
62
|
+
if selected_cavities:
|
|
63
|
+
print(f"\nSelected {len(selected_cavities)} cavities: {[c['id'] for c in selected_cavities]}")
|
|
64
|
+
else:
|
|
65
|
+
print("No cavities selected. Please try again.")
|
|
66
|
+
selected_cavities = None
|
|
67
|
+
|
|
68
|
+
except ValueError as e:
|
|
69
|
+
print(f"Invalid input: {e}")
|
|
70
|
+
print("Please enter comma-separated numbers, 'all', or 'q'")
|
|
71
|
+
except EOFError:
|
|
72
|
+
# Handle case when stdin is not available (e.g., in tests)
|
|
73
|
+
print("\nNo input available. Selecting all cavities by default.")
|
|
74
|
+
selected_cavities = cavities
|
|
75
|
+
except KeyboardInterrupt:
|
|
76
|
+
print("\n\nSelection cancelled.")
|
|
77
|
+
return [], {}
|
|
78
|
+
|
|
79
|
+
# Prompt for number of waters per cavity
|
|
80
|
+
waters_per_cavity = {}
|
|
81
|
+
if prompt_for_waters and selected_cavities:
|
|
82
|
+
print("\n" + "=" * 60)
|
|
83
|
+
print("Specify number of water molecules per cavity")
|
|
84
|
+
print("=" * 60)
|
|
85
|
+
|
|
86
|
+
for cavity in selected_cavities:
|
|
87
|
+
# Default estimate based on volume
|
|
88
|
+
default_waters = max(1, int(cavity['volume'] / 30))
|
|
89
|
+
|
|
90
|
+
while True:
|
|
91
|
+
try:
|
|
92
|
+
prompt = f"Cavity {cavity['id']} (volume: {cavity['volume']:.2f} Ų) - waters [default: {default_waters}]: "
|
|
93
|
+
user_input = input(prompt).strip()
|
|
94
|
+
|
|
95
|
+
if user_input == '':
|
|
96
|
+
waters_per_cavity[cavity['id']] = default_waters
|
|
97
|
+
break
|
|
98
|
+
else:
|
|
99
|
+
n_waters = int(user_input)
|
|
100
|
+
if n_waters < 0:
|
|
101
|
+
print(" Error: Number of waters must be non-negative")
|
|
102
|
+
continue
|
|
103
|
+
waters_per_cavity[cavity['id']] = n_waters
|
|
104
|
+
break
|
|
105
|
+
|
|
106
|
+
except ValueError:
|
|
107
|
+
print(" Error: Please enter a valid number")
|
|
108
|
+
except EOFError:
|
|
109
|
+
# Use default
|
|
110
|
+
waters_per_cavity[cavity['id']] = default_waters
|
|
111
|
+
break
|
|
112
|
+
except KeyboardInterrupt:
|
|
113
|
+
print("\n\nCancelled. Using defaults for remaining cavities.")
|
|
114
|
+
for remaining_cavity in selected_cavities:
|
|
115
|
+
if remaining_cavity['id'] not in waters_per_cavity:
|
|
116
|
+
default = max(1, int(remaining_cavity['volume'] / 30))
|
|
117
|
+
waters_per_cavity[remaining_cavity['id']] = default
|
|
118
|
+
return selected_cavities, waters_per_cavity
|
|
119
|
+
|
|
120
|
+
return selected_cavities, waters_per_cavity
|
cavefiller/cli.py
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
"""Command-line interface for CaveFiller using Typer."""
|
|
2
|
+
|
|
3
|
+
import typer
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Optional, List
|
|
6
|
+
from cavefiller.cavity_finder import find_cavities
|
|
7
|
+
from cavefiller.cavity_selector import select_cavities
|
|
8
|
+
from cavefiller.water_filler import fill_cavities_with_water
|
|
9
|
+
|
|
10
|
+
app = typer.Typer(help="CaveFiller - Find and fill protein cavities with water molecules")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@app.command()
|
|
14
|
+
def run(
|
|
15
|
+
protein_file: Path = typer.Argument(
|
|
16
|
+
...,
|
|
17
|
+
exists=True,
|
|
18
|
+
help="Path to the protein PDB file",
|
|
19
|
+
),
|
|
20
|
+
output_dir: Path = typer.Option(
|
|
21
|
+
Path("./output"),
|
|
22
|
+
help="Directory to save output files",
|
|
23
|
+
),
|
|
24
|
+
probe_in: float = typer.Option(
|
|
25
|
+
1.4,
|
|
26
|
+
help="Probe In radius for cavity detection (Å)",
|
|
27
|
+
),
|
|
28
|
+
probe_out: float = typer.Option(
|
|
29
|
+
4.0,
|
|
30
|
+
help="Probe Out radius for cavity detection (Å)",
|
|
31
|
+
),
|
|
32
|
+
volume_cutoff: float = typer.Option(
|
|
33
|
+
5.0,
|
|
34
|
+
help="Minimum cavity volume to consider (ų)",
|
|
35
|
+
),
|
|
36
|
+
auto_select: bool = typer.Option(
|
|
37
|
+
False,
|
|
38
|
+
help="Automatically select all cavities (no user interaction)",
|
|
39
|
+
),
|
|
40
|
+
cavity_ids: Optional[str] = typer.Option(
|
|
41
|
+
None,
|
|
42
|
+
help="Comma-separated list of cavity IDs to fill (e.g., '1,2,3'). If not provided, user will be prompted.",
|
|
43
|
+
),
|
|
44
|
+
waters_per_cavity: Optional[str] = typer.Option(
|
|
45
|
+
None,
|
|
46
|
+
help="Comma-separated list of water counts per cavity (e.g., '10,15,20'). Must match cavity_ids order.",
|
|
47
|
+
),
|
|
48
|
+
optimize_mmff94: bool = typer.Option(
|
|
49
|
+
True,
|
|
50
|
+
"--optimize-mmff94/--no-optimize-mmff94",
|
|
51
|
+
help="Run MMFF94 after placement with protein atoms fixed and waters movable.",
|
|
52
|
+
),
|
|
53
|
+
mmff_max_iterations: int = typer.Option(
|
|
54
|
+
300,
|
|
55
|
+
help="Maximum MMFF94 iterations when optimization is enabled.",
|
|
56
|
+
),
|
|
57
|
+
):
|
|
58
|
+
"""
|
|
59
|
+
Find cavities in a protein and fill them with explicit water molecules.
|
|
60
|
+
|
|
61
|
+
This tool performs the following steps:
|
|
62
|
+
1. Detects cavities in the protein using KVFinder
|
|
63
|
+
2. Allows user to select which cavities to fill
|
|
64
|
+
3. Uses cavity-grid Monte Carlo sampling to place water oxygens
|
|
65
|
+
4. Optionally runs MMFF94 with protein fixed and waters movable
|
|
66
|
+
5. Builds explicit RDKit H-O-H waters and writes a combined PDB
|
|
67
|
+
"""
|
|
68
|
+
typer.echo(f"🔍 Analyzing protein: {protein_file}")
|
|
69
|
+
|
|
70
|
+
# Create output directory
|
|
71
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
72
|
+
|
|
73
|
+
# Step 1: Find cavities using pykvfinder
|
|
74
|
+
typer.echo("Step 1: Finding cavities with KVFinder...")
|
|
75
|
+
cavities, cavity_data = find_cavities(
|
|
76
|
+
str(protein_file),
|
|
77
|
+
probe_in=probe_in,
|
|
78
|
+
probe_out=probe_out,
|
|
79
|
+
volume_cutoff=volume_cutoff,
|
|
80
|
+
output_dir=str(output_dir),
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
if not cavities:
|
|
84
|
+
typer.echo("❌ No cavities found in the protein.", err=True)
|
|
85
|
+
raise typer.Exit(code=1)
|
|
86
|
+
|
|
87
|
+
typer.echo(f"✅ Found {len(cavities)} cavities")
|
|
88
|
+
|
|
89
|
+
# Step 2: Select cavities to fill
|
|
90
|
+
typer.echo("\nStep 2: Selecting cavities to fill...")
|
|
91
|
+
|
|
92
|
+
waters_dict = {}
|
|
93
|
+
|
|
94
|
+
if cavity_ids:
|
|
95
|
+
# Parse cavity IDs from command line
|
|
96
|
+
selected_ids = [int(x.strip()) for x in cavity_ids.split(",")]
|
|
97
|
+
selected_cavities = [c for c in cavities if c["id"] in selected_ids]
|
|
98
|
+
if not selected_cavities:
|
|
99
|
+
typer.echo(f"❌ No cavities found with IDs: {cavity_ids}", err=True)
|
|
100
|
+
raise typer.Exit(code=1)
|
|
101
|
+
|
|
102
|
+
# Parse waters per cavity if provided
|
|
103
|
+
if waters_per_cavity:
|
|
104
|
+
water_counts = [int(x.strip()) for x in waters_per_cavity.split(",")]
|
|
105
|
+
if len(water_counts) != len(selected_ids):
|
|
106
|
+
typer.echo("❌ Number of water counts must match number of cavity IDs", err=True)
|
|
107
|
+
raise typer.Exit(code=1)
|
|
108
|
+
waters_dict = dict(zip(selected_ids, water_counts))
|
|
109
|
+
|
|
110
|
+
elif auto_select:
|
|
111
|
+
# Auto-select all cavities
|
|
112
|
+
selected_cavities = cavities
|
|
113
|
+
typer.echo(f"Auto-selecting all {len(cavities)} cavities")
|
|
114
|
+
# Use default water counts
|
|
115
|
+
for cavity in selected_cavities:
|
|
116
|
+
waters_dict[cavity['id']] = max(1, int(cavity['volume'] / 30))
|
|
117
|
+
else:
|
|
118
|
+
# Interactive selection
|
|
119
|
+
selected_cavities, waters_dict = select_cavities(cavities, prompt_for_waters=True)
|
|
120
|
+
|
|
121
|
+
if not selected_cavities:
|
|
122
|
+
typer.echo("❌ No cavities selected.", err=True)
|
|
123
|
+
raise typer.Exit(code=1)
|
|
124
|
+
|
|
125
|
+
typer.echo(f"✅ Selected {len(selected_cavities)} cavities")
|
|
126
|
+
|
|
127
|
+
# Step 3: Fill cavities with water
|
|
128
|
+
typer.echo("\nStep 3: Filling cavities with water using Monte Carlo sampling...")
|
|
129
|
+
typer.echo(" (with clash detection, optional fixed-protein MMFF94, and RDKit HOH generation)")
|
|
130
|
+
output_file = fill_cavities_with_water(
|
|
131
|
+
str(protein_file),
|
|
132
|
+
selected_cavities,
|
|
133
|
+
cavity_data,
|
|
134
|
+
str(output_dir),
|
|
135
|
+
waters_per_cavity=waters_dict,
|
|
136
|
+
optimize_mmff94=optimize_mmff94,
|
|
137
|
+
mmff_max_iterations=mmff_max_iterations,
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
typer.echo(f"\n✅ Success! Output saved to: {output_file}")
|
|
141
|
+
typer.echo("\n🎉 CaveFiller completed successfully!")
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
if __name__ == "__main__":
|
|
145
|
+
app()
|