cavefiller 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cavefiller/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """CaveFiller - A tool to find and fill protein cavities with water molecules."""
2
+
3
+ __version__ = "0.2.0"
@@ -0,0 +1,114 @@
1
+ """Cavity detection using pyKVFinder."""
2
+
3
+ import os
4
+ from typing import List, Dict, Tuple, Any
5
+ import numpy as np
6
+
7
+ # Grid spacing for cavity detection (in Angstroms)
8
+ DEFAULT_GRID_STEP = 0.6
9
+
10
+
11
+ def find_cavities(
12
+ protein_file: str,
13
+ probe_in: float = 1.4,
14
+ probe_out: float = 4.0,
15
+ volume_cutoff: float = 5.0,
16
+ output_dir: str = "./output",
17
+ ) -> Tuple[List[Dict[str, Any]], Any]:
18
+ """
19
+ Find cavities in a protein structure using pyKVFinder.
20
+
21
+ Args:
22
+ protein_file: Path to the protein PDB file
23
+ probe_in: Probe In radius for cavity detection (Å)
24
+ probe_out: Probe Out radius for cavity detection (Å)
25
+ volume_cutoff: Minimum cavity volume to consider (Ų)
26
+ output_dir: Directory to save cavity detection results
27
+
28
+ Returns:
29
+ Tuple of (list of cavity dictionaries, cavity_data object)
30
+ """
31
+ try:
32
+ import pyKVFinder
33
+ except ImportError:
34
+ raise ImportError(
35
+ "pyKVFinder is not installed. Please install it with: pip install pykvfinder"
36
+ )
37
+
38
+ # Run KVFinder to detect cavities
39
+ cavity_data = pyKVFinder.run_workflow(
40
+ input=protein_file,
41
+ probe_in=probe_in,
42
+ probe_out=probe_out,
43
+ step=DEFAULT_GRID_STEP, # Grid step size
44
+ volume_cutoff=volume_cutoff,
45
+ )
46
+
47
+ # Extract cavity information
48
+ cavities = []
49
+
50
+ # Get cavity volumes and areas
51
+ if hasattr(cavity_data, 'volume') and cavity_data.volume is not None:
52
+ volumes = cavity_data.volume
53
+ areas = cavity_data.area if hasattr(cavity_data, 'area') else {}
54
+
55
+ # Create mapping from string IDs to integer IDs
56
+ # KVFinder uses string IDs like 'KAA', 'KAB', etc., but the grid uses integers
57
+ cavity_id_map = {}
58
+ for idx, (cavity_str_id, volume) in enumerate(volumes.items(), start=1):
59
+ cavity_id_map[cavity_str_id] = idx
60
+
61
+ # Process each cavity
62
+ for cavity_str_id, volume in volumes.items():
63
+ if volume >= volume_cutoff:
64
+ cavity_info = {
65
+ "id": cavity_id_map[cavity_str_id],
66
+ "string_id": cavity_str_id,
67
+ "volume": volume,
68
+ "area": areas.get(cavity_str_id, 0.0) if areas else 0.0,
69
+ }
70
+ cavities.append(cavity_info)
71
+
72
+ # Sort cavities by volume (largest first)
73
+ cavities.sort(key=lambda x: x["volume"], reverse=True)
74
+
75
+ return cavities, cavity_data
76
+
77
+
78
+ def get_cavity_grid_points(cavity_data: Any, cavity_id: int) -> np.ndarray:
79
+ """
80
+ Get the grid points that belong to a specific cavity.
81
+
82
+ Args:
83
+ cavity_data: The cavity data object from pyKVFinder
84
+ cavity_id: Integer ID of the cavity (1-indexed)
85
+
86
+ Returns:
87
+ Array of (x, y, z) coordinates for the cavity grid points
88
+ """
89
+ if not hasattr(cavity_data, 'cavities') or cavity_data.cavities is None:
90
+ return np.array([])
91
+
92
+ # Get cavity grid
93
+ cavity_grid = cavity_data.cavities
94
+
95
+ # Find all points belonging to this cavity
96
+ # Note: KVFinder uses 1-indexed cavity IDs in the grid
97
+ points = np.argwhere(cavity_grid == cavity_id)
98
+
99
+ # Convert grid indices to real coordinates if origin metadata is available.
100
+ # Different pyKVFinder versions expose metadata on either cavity_data or cavity_data.surface.
101
+ step = getattr(cavity_data, "step", DEFAULT_GRID_STEP)
102
+ origin = None
103
+
104
+ if hasattr(cavity_data, "surface") and hasattr(cavity_data.surface, "P1"):
105
+ origin = np.array([cavity_data.surface.P1[i] for i in range(3)], dtype=float)
106
+ elif hasattr(cavity_data, "P1"):
107
+ origin = np.array([cavity_data.P1[i] for i in range(3)], dtype=float)
108
+
109
+ points = points.astype(float)
110
+ if origin is not None:
111
+ return origin + points * float(step)
112
+
113
+ # Fallback: return index-space points; downstream code will align to protein frame.
114
+ return points
@@ -0,0 +1,120 @@
1
+ """Interactive cavity selection interface."""
2
+
3
+ from typing import List, Dict, Any, Optional
4
+ import sys
5
+
6
+
7
+ def select_cavities(cavities: List[Dict[str, Any]], prompt_for_waters: bool = True) -> tuple:
8
+ """
9
+ Allow user to select which cavities to fill and how many waters per cavity.
10
+
11
+ Args:
12
+ cavities: List of cavity dictionaries with id, volume, and area
13
+ prompt_for_waters: Whether to prompt for number of waters per cavity
14
+
15
+ Returns:
16
+ Tuple of (selected cavity dictionaries, waters_per_cavity dict)
17
+ """
18
+ print("\n" + "=" * 60)
19
+ print("Available Cavities:")
20
+ print("=" * 60)
21
+ print(f"{'ID':<6} {'Volume (Ų)':<15} {'Area (ų)':<15}")
22
+ print("-" * 60)
23
+
24
+ for cavity in cavities:
25
+ print(
26
+ f"{cavity['id']:<6} {cavity['volume']:<15.2f} {cavity['area']:<15.2f}"
27
+ )
28
+
29
+ print("=" * 60)
30
+ print("\nEnter cavity IDs to fill (comma-separated, e.g., '1,2,3')")
31
+ print("Or enter 'all' to select all cavities")
32
+ print("Or enter 'q' to quit")
33
+
34
+ selected_cavities = None
35
+ while selected_cavities is None:
36
+ try:
37
+ user_input = input("\nYour selection: ").strip().lower()
38
+
39
+ if user_input == 'q':
40
+ print("Selection cancelled.")
41
+ return [], {}
42
+
43
+ if user_input == 'all':
44
+ print(f"Selected all {len(cavities)} cavities")
45
+ selected_cavities = cavities
46
+ else:
47
+ # Parse comma-separated IDs
48
+ selected_ids = [int(x.strip()) for x in user_input.split(",")]
49
+
50
+ # Validate IDs
51
+ valid_ids = {c["id"] for c in cavities}
52
+ invalid_ids = [sid for sid in selected_ids if sid not in valid_ids]
53
+
54
+ if invalid_ids:
55
+ print(f"Invalid cavity IDs: {invalid_ids}")
56
+ print(f"Valid IDs are: {sorted(valid_ids)}")
57
+ continue
58
+
59
+ # Get selected cavities
60
+ selected_cavities = [c for c in cavities if c["id"] in selected_ids]
61
+
62
+ if selected_cavities:
63
+ print(f"\nSelected {len(selected_cavities)} cavities: {[c['id'] for c in selected_cavities]}")
64
+ else:
65
+ print("No cavities selected. Please try again.")
66
+ selected_cavities = None
67
+
68
+ except ValueError as e:
69
+ print(f"Invalid input: {e}")
70
+ print("Please enter comma-separated numbers, 'all', or 'q'")
71
+ except EOFError:
72
+ # Handle case when stdin is not available (e.g., in tests)
73
+ print("\nNo input available. Selecting all cavities by default.")
74
+ selected_cavities = cavities
75
+ except KeyboardInterrupt:
76
+ print("\n\nSelection cancelled.")
77
+ return [], {}
78
+
79
+ # Prompt for number of waters per cavity
80
+ waters_per_cavity = {}
81
+ if prompt_for_waters and selected_cavities:
82
+ print("\n" + "=" * 60)
83
+ print("Specify number of water molecules per cavity")
84
+ print("=" * 60)
85
+
86
+ for cavity in selected_cavities:
87
+ # Default estimate based on volume
88
+ default_waters = max(1, int(cavity['volume'] / 30))
89
+
90
+ while True:
91
+ try:
92
+ prompt = f"Cavity {cavity['id']} (volume: {cavity['volume']:.2f} Ų) - waters [default: {default_waters}]: "
93
+ user_input = input(prompt).strip()
94
+
95
+ if user_input == '':
96
+ waters_per_cavity[cavity['id']] = default_waters
97
+ break
98
+ else:
99
+ n_waters = int(user_input)
100
+ if n_waters < 0:
101
+ print(" Error: Number of waters must be non-negative")
102
+ continue
103
+ waters_per_cavity[cavity['id']] = n_waters
104
+ break
105
+
106
+ except ValueError:
107
+ print(" Error: Please enter a valid number")
108
+ except EOFError:
109
+ # Use default
110
+ waters_per_cavity[cavity['id']] = default_waters
111
+ break
112
+ except KeyboardInterrupt:
113
+ print("\n\nCancelled. Using defaults for remaining cavities.")
114
+ for remaining_cavity in selected_cavities:
115
+ if remaining_cavity['id'] not in waters_per_cavity:
116
+ default = max(1, int(remaining_cavity['volume'] / 30))
117
+ waters_per_cavity[remaining_cavity['id']] = default
118
+ return selected_cavities, waters_per_cavity
119
+
120
+ return selected_cavities, waters_per_cavity
cavefiller/cli.py ADDED
@@ -0,0 +1,145 @@
1
+ """Command-line interface for CaveFiller using Typer."""
2
+
3
+ import typer
4
+ from pathlib import Path
5
+ from typing import Optional, List
6
+ from cavefiller.cavity_finder import find_cavities
7
+ from cavefiller.cavity_selector import select_cavities
8
+ from cavefiller.water_filler import fill_cavities_with_water
9
+
10
+ app = typer.Typer(help="CaveFiller - Find and fill protein cavities with water molecules")
11
+
12
+
13
+ @app.command()
14
+ def run(
15
+ protein_file: Path = typer.Argument(
16
+ ...,
17
+ exists=True,
18
+ help="Path to the protein PDB file",
19
+ ),
20
+ output_dir: Path = typer.Option(
21
+ Path("./output"),
22
+ help="Directory to save output files",
23
+ ),
24
+ probe_in: float = typer.Option(
25
+ 1.4,
26
+ help="Probe In radius for cavity detection (Å)",
27
+ ),
28
+ probe_out: float = typer.Option(
29
+ 4.0,
30
+ help="Probe Out radius for cavity detection (Å)",
31
+ ),
32
+ volume_cutoff: float = typer.Option(
33
+ 5.0,
34
+ help="Minimum cavity volume to consider (ų)",
35
+ ),
36
+ auto_select: bool = typer.Option(
37
+ False,
38
+ help="Automatically select all cavities (no user interaction)",
39
+ ),
40
+ cavity_ids: Optional[str] = typer.Option(
41
+ None,
42
+ help="Comma-separated list of cavity IDs to fill (e.g., '1,2,3'). If not provided, user will be prompted.",
43
+ ),
44
+ waters_per_cavity: Optional[str] = typer.Option(
45
+ None,
46
+ help="Comma-separated list of water counts per cavity (e.g., '10,15,20'). Must match cavity_ids order.",
47
+ ),
48
+ optimize_mmff94: bool = typer.Option(
49
+ True,
50
+ "--optimize-mmff94/--no-optimize-mmff94",
51
+ help="Run MMFF94 after placement with protein atoms fixed and waters movable.",
52
+ ),
53
+ mmff_max_iterations: int = typer.Option(
54
+ 300,
55
+ help="Maximum MMFF94 iterations when optimization is enabled.",
56
+ ),
57
+ ):
58
+ """
59
+ Find cavities in a protein and fill them with explicit water molecules.
60
+
61
+ This tool performs the following steps:
62
+ 1. Detects cavities in the protein using KVFinder
63
+ 2. Allows user to select which cavities to fill
64
+ 3. Uses cavity-grid Monte Carlo sampling to place water oxygens
65
+ 4. Optionally runs MMFF94 with protein fixed and waters movable
66
+ 5. Builds explicit RDKit H-O-H waters and writes a combined PDB
67
+ """
68
+ typer.echo(f"🔍 Analyzing protein: {protein_file}")
69
+
70
+ # Create output directory
71
+ output_dir.mkdir(parents=True, exist_ok=True)
72
+
73
+ # Step 1: Find cavities using pykvfinder
74
+ typer.echo("Step 1: Finding cavities with KVFinder...")
75
+ cavities, cavity_data = find_cavities(
76
+ str(protein_file),
77
+ probe_in=probe_in,
78
+ probe_out=probe_out,
79
+ volume_cutoff=volume_cutoff,
80
+ output_dir=str(output_dir),
81
+ )
82
+
83
+ if not cavities:
84
+ typer.echo("❌ No cavities found in the protein.", err=True)
85
+ raise typer.Exit(code=1)
86
+
87
+ typer.echo(f"✅ Found {len(cavities)} cavities")
88
+
89
+ # Step 2: Select cavities to fill
90
+ typer.echo("\nStep 2: Selecting cavities to fill...")
91
+
92
+ waters_dict = {}
93
+
94
+ if cavity_ids:
95
+ # Parse cavity IDs from command line
96
+ selected_ids = [int(x.strip()) for x in cavity_ids.split(",")]
97
+ selected_cavities = [c for c in cavities if c["id"] in selected_ids]
98
+ if not selected_cavities:
99
+ typer.echo(f"❌ No cavities found with IDs: {cavity_ids}", err=True)
100
+ raise typer.Exit(code=1)
101
+
102
+ # Parse waters per cavity if provided
103
+ if waters_per_cavity:
104
+ water_counts = [int(x.strip()) for x in waters_per_cavity.split(",")]
105
+ if len(water_counts) != len(selected_ids):
106
+ typer.echo("❌ Number of water counts must match number of cavity IDs", err=True)
107
+ raise typer.Exit(code=1)
108
+ waters_dict = dict(zip(selected_ids, water_counts))
109
+
110
+ elif auto_select:
111
+ # Auto-select all cavities
112
+ selected_cavities = cavities
113
+ typer.echo(f"Auto-selecting all {len(cavities)} cavities")
114
+ # Use default water counts
115
+ for cavity in selected_cavities:
116
+ waters_dict[cavity['id']] = max(1, int(cavity['volume'] / 30))
117
+ else:
118
+ # Interactive selection
119
+ selected_cavities, waters_dict = select_cavities(cavities, prompt_for_waters=True)
120
+
121
+ if not selected_cavities:
122
+ typer.echo("❌ No cavities selected.", err=True)
123
+ raise typer.Exit(code=1)
124
+
125
+ typer.echo(f"✅ Selected {len(selected_cavities)} cavities")
126
+
127
+ # Step 3: Fill cavities with water
128
+ typer.echo("\nStep 3: Filling cavities with water using Monte Carlo sampling...")
129
+ typer.echo(" (with clash detection, optional fixed-protein MMFF94, and RDKit HOH generation)")
130
+ output_file = fill_cavities_with_water(
131
+ str(protein_file),
132
+ selected_cavities,
133
+ cavity_data,
134
+ str(output_dir),
135
+ waters_per_cavity=waters_dict,
136
+ optimize_mmff94=optimize_mmff94,
137
+ mmff_max_iterations=mmff_max_iterations,
138
+ )
139
+
140
+ typer.echo(f"\n✅ Success! Output saved to: {output_file}")
141
+ typer.echo("\n🎉 CaveFiller completed successfully!")
142
+
143
+
144
+ if __name__ == "__main__":
145
+ app()