cavefiller 0.2.1__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cavefiller
3
- Version: 0.2.1
3
+ Version: 0.3.1
4
4
  Summary: A tool to find and fill protein cavities with water molecules using KVFinder and Packmol
5
5
  Author: CaveFiller Contributors
6
6
  Requires-Python: >=3.8
@@ -11,6 +11,7 @@ Requires-Dist: pykvfinder>=0.6.0
11
11
  Requires-Dist: rdkit>=2022.9.1
12
12
  Requires-Dist: numpy>=1.20.0
13
13
  Requires-Dist: biopython>=1.79
14
+ Requires-Dist: tqdm>=4.67.3
14
15
  Provides-Extra: dev
15
16
  Requires-Dist: pytest>=7.0.0; extra == "dev"
16
17
  Requires-Dist: black>=22.0.0; extra == "dev"
@@ -74,18 +75,23 @@ cavefiller [PROTEIN_FILE] [OPTIONS]
74
75
 
75
76
  **Options:**
76
77
  - `--output-dir PATH`: Directory to save output files (default: `./output`)
78
+ - `--grid-step FLOAT`: Grid spacing for cavity detection in Ångströms (default: 0.6)
77
79
  - `--probe-in FLOAT`: Probe In radius for cavity detection in Ångströms (default: 1.4)
78
80
  - `--probe-out FLOAT`: Probe Out radius for cavity detection in Ångströms (default: 4.0)
81
+ - `--exterior-trim-distance FLOAT`: Exterior trim distance in Ångströms (default: 2.4)
79
82
  - `--volume-cutoff FLOAT`: Minimum cavity volume to consider in Ų (default: 5.0)
80
83
  - `--auto-select`: Automatically select all cavities without user interaction
81
84
  - `--cavity-ids TEXT`: Comma-separated list of cavity IDs to fill (e.g., '1,2,3')
82
85
  - `--waters-per-cavity TEXT`: Comma-separated list of water counts (e.g., '10,15,20'), must match cavity-ids order
83
86
  - `--optimize-mmff94 / --no-optimize-mmff94`: Enable/disable MMFF94 with protein fixed (default: enabled)
84
87
  - `--mmff-max-iterations INTEGER`: Max MMFF94 iterations (default: 300)
88
+ - `--remove-after-optim / --no-remove-after-optim`: After MMFF94, remove waters that fail post-checks (default: enabled)
89
+ - Also accepted: `--remove_after_optim / --no_remove_after_optim`
85
90
 
86
91
  Recommended usage:
87
92
  - Prefer interactive/manual cavity and water-count selection over `--auto-select`. Auto-selection often overfills cavities with too many waters.
88
93
  - Keep `--optimize-mmff94` enabled (recommended) to refine water placement after Monte Carlo sampling.
94
+ - Use `--no-remove-after-optim` if you want to keep all waters after MMFF94, even if they clash or move out of cavity bounds.
89
95
 
90
96
  ### Examples
91
97
 
@@ -106,7 +112,7 @@ cavefiller protein.pdb --cavity-ids "1,3,5" --waters-per-cavity "10,15,20"
106
112
 
107
113
  **Custom cavity detection parameters:**
108
114
  ```bash
109
- cavefiller protein.pdb --probe-in 1.2 --probe-out 5.0 --volume-cutoff 10.0
115
+ cavefiller protein.pdb --grid-step 0.6 --probe-in 1.4 --probe-out 4.0 --exterior-trim-distance 2.4 --volume-cutoff 5.0
110
116
  ```
111
117
 
112
118
  ## Workflow
@@ -180,7 +186,7 @@ This repository includes GitHub Actions workflow at `.github/workflows/ci-cd.yml
180
186
  - Runs `pytest` on every push to `main`
181
187
  - Runs `pytest` on every pull request targeting `main`
182
188
  - Builds package distributions after tests pass
183
- - Publishes to PyPI only when you push a version tag like `v0.1.1`
189
+ - Publishes to PyPI only on pushes to `main` where `pyproject.toml` `project.version` changed
184
190
 
185
191
  #### One-time setup for automatic PyPI publishing
186
192
 
@@ -200,14 +206,7 @@ No PyPI API token secret is needed when using Trusted Publishing.
200
206
  - `pyproject.toml` (`project.version`)
201
207
  - `cavefiller/__init__.py` (`__version__`)
202
208
  2. Commit and push to `main`.
203
- 3. Create and push a matching tag:
204
-
205
- ```bash
206
- git tag v0.1.1
207
- git push origin v0.1.1
208
- ```
209
-
210
- The workflow validates that the tag matches `pyproject.toml` (for example, tag `v0.1.1` must match version `0.1.1`) before publishing.
209
+ 3. CI will publish that pushed version to PyPI automatically, but only if `pyproject.toml` version changed versus the previous commit on `main`.
211
210
 
212
211
  ## License
213
212
 
@@ -55,18 +55,23 @@ cavefiller [PROTEIN_FILE] [OPTIONS]
55
55
 
56
56
  **Options:**
57
57
  - `--output-dir PATH`: Directory to save output files (default: `./output`)
58
+ - `--grid-step FLOAT`: Grid spacing for cavity detection in Ångströms (default: 0.6)
58
59
  - `--probe-in FLOAT`: Probe In radius for cavity detection in Ångströms (default: 1.4)
59
60
  - `--probe-out FLOAT`: Probe Out radius for cavity detection in Ångströms (default: 4.0)
61
+ - `--exterior-trim-distance FLOAT`: Exterior trim distance in Ångströms (default: 2.4)
60
62
  - `--volume-cutoff FLOAT`: Minimum cavity volume to consider in Ų (default: 5.0)
61
63
  - `--auto-select`: Automatically select all cavities without user interaction
62
64
  - `--cavity-ids TEXT`: Comma-separated list of cavity IDs to fill (e.g., '1,2,3')
63
65
  - `--waters-per-cavity TEXT`: Comma-separated list of water counts (e.g., '10,15,20'), must match cavity-ids order
64
66
  - `--optimize-mmff94 / --no-optimize-mmff94`: Enable/disable MMFF94 with protein fixed (default: enabled)
65
67
  - `--mmff-max-iterations INTEGER`: Max MMFF94 iterations (default: 300)
68
+ - `--remove-after-optim / --no-remove-after-optim`: After MMFF94, remove waters that fail post-checks (default: enabled)
69
+ - Also accepted: `--remove_after_optim / --no_remove_after_optim`
66
70
 
67
71
  Recommended usage:
68
72
  - Prefer interactive/manual cavity and water-count selection over `--auto-select`. Auto-selection often overfills cavities with too many waters.
69
73
  - Keep `--optimize-mmff94` enabled (recommended) to refine water placement after Monte Carlo sampling.
74
+ - Use `--no-remove-after-optim` if you want to keep all waters after MMFF94, even if they clash or move out of cavity bounds.
70
75
 
71
76
  ### Examples
72
77
 
@@ -87,7 +92,7 @@ cavefiller protein.pdb --cavity-ids "1,3,5" --waters-per-cavity "10,15,20"
87
92
 
88
93
  **Custom cavity detection parameters:**
89
94
  ```bash
90
- cavefiller protein.pdb --probe-in 1.2 --probe-out 5.0 --volume-cutoff 10.0
95
+ cavefiller protein.pdb --grid-step 0.6 --probe-in 1.4 --probe-out 4.0 --exterior-trim-distance 2.4 --volume-cutoff 5.0
91
96
  ```
92
97
 
93
98
  ## Workflow
@@ -161,7 +166,7 @@ This repository includes GitHub Actions workflow at `.github/workflows/ci-cd.yml
161
166
  - Runs `pytest` on every push to `main`
162
167
  - Runs `pytest` on every pull request targeting `main`
163
168
  - Builds package distributions after tests pass
164
- - Publishes to PyPI only when you push a version tag like `v0.1.1`
169
+ - Publishes to PyPI only on pushes to `main` where `pyproject.toml` `project.version` changed
165
170
 
166
171
  #### One-time setup for automatic PyPI publishing
167
172
 
@@ -181,14 +186,7 @@ No PyPI API token secret is needed when using Trusted Publishing.
181
186
  - `pyproject.toml` (`project.version`)
182
187
  - `cavefiller/__init__.py` (`__version__`)
183
188
  2. Commit and push to `main`.
184
- 3. Create and push a matching tag:
185
-
186
- ```bash
187
- git tag v0.1.1
188
- git push origin v0.1.1
189
- ```
190
-
191
- The workflow validates that the tag matches `pyproject.toml` (for example, tag `v0.1.1` must match version `0.1.1`) before publishing.
189
+ 3. CI will publish that pushed version to PyPI automatically, but only if `pyproject.toml` version changed versus the previous commit on `main`.
192
190
 
193
191
  ## License
194
192
 
@@ -1,3 +1,3 @@
1
1
  """CaveFiller - A tool to find and fill protein cavities with water molecules."""
2
2
 
3
- __version__ = "0.2.1"
3
+ __version__ = "0.3.1"
@@ -0,0 +1,174 @@
1
+ """Cavity detection using pyKVFinder."""
2
+
3
+ from typing import List, Dict, Tuple, Any
4
+ import numpy as np
5
+
6
+ # Grid spacing for cavity detection (in Angstroms)
7
+ DEFAULT_GRID_STEP = 0.6
8
+ DEFAULT_PROBE_IN = 1.4
9
+ DEFAULT_PROBE_OUT = 4.0
10
+ DEFAULT_EXTERIOR_TRIM_DISTANCE = 2.4
11
+ DEFAULT_VOLUME_CUTOFF = 5.0
12
+
13
+
14
+ def _map_volume_keys_to_grid_labels(cavity_data: Any, volume_keys: List[str]) -> Dict[str, int]:
15
+ """
16
+ Map pyKVFinder cavity string IDs (KAA, KAB, ...) to integer labels in `cavity_data.cavities`.
17
+
18
+ pyKVFinder 0.9.0 often uses positive cavity labels starting at 2 (label 1 is reserved),
19
+ while volumes are reported by sequential string IDs. We map by ordered positive labels.
20
+ """
21
+ labels = sorted(int(v) for v in np.unique(cavity_data.cavities) if int(v) > 0)
22
+ if not labels:
23
+ return {}
24
+
25
+ # Most pyKVFinder outputs are contiguous and aligned with volume-key order.
26
+ if len(labels) >= len(volume_keys):
27
+ ordered_labels = labels[: len(volume_keys)]
28
+ else:
29
+ ordered_labels = labels + list(range(labels[-1] + 1, labels[-1] + 1 + (len(volume_keys) - len(labels))))
30
+
31
+ return {key: int(ordered_labels[idx]) for idx, key in enumerate(volume_keys)}
32
+
33
+
34
+ def find_cavities(
35
+ protein_file: str,
36
+ probe_in: float = DEFAULT_PROBE_IN,
37
+ probe_out: float = DEFAULT_PROBE_OUT,
38
+ step: float = DEFAULT_GRID_STEP,
39
+ removal_distance: float = DEFAULT_EXTERIOR_TRIM_DISTANCE,
40
+ volume_cutoff: float = DEFAULT_VOLUME_CUTOFF,
41
+ output_dir: str = "./output",
42
+ ) -> Tuple[List[Dict[str, Any]], Any]:
43
+ """
44
+ Find cavities in a protein structure using pyKVFinder.
45
+
46
+ Args:
47
+ protein_file: Path to the protein PDB file
48
+ probe_in: Probe In radius for cavity detection (Å)
49
+ probe_out: Probe Out radius for cavity detection (Å)
50
+ step: Grid spacing for cavity detection (Å)
51
+ removal_distance: Exterior trim distance for cavity detection (Å)
52
+ volume_cutoff: Minimum cavity volume to consider (Ų)
53
+ output_dir: Directory to save cavity detection results
54
+
55
+ Returns:
56
+ Tuple of (list of cavity dictionaries, cavity_data object)
57
+ """
58
+ try:
59
+ import pyKVFinder
60
+ except ImportError:
61
+ raise ImportError(
62
+ "pyKVFinder is not installed. Please install it with: pip install pykvfinder"
63
+ )
64
+
65
+ # Run KVFinder to detect cavities
66
+ cavity_data = pyKVFinder.run_workflow(
67
+ input=protein_file,
68
+ probe_in=probe_in,
69
+ probe_out=probe_out,
70
+ step=step,
71
+ removal_distance=removal_distance,
72
+ volume_cutoff=volume_cutoff,
73
+ )
74
+
75
+ # Extract cavity information
76
+ cavities = []
77
+
78
+ # Get cavity volumes and areas
79
+ if hasattr(cavity_data, 'volume') and cavity_data.volume is not None:
80
+ volumes = cavity_data.volume
81
+ areas = cavity_data.area if hasattr(cavity_data, 'area') else {}
82
+
83
+ # Map string IDs to underlying integer cavity-grid labels.
84
+ # User-facing IDs remain sequential for compatibility.
85
+ volume_keys = list(volumes.keys())
86
+ cavity_grid_id_map = _map_volume_keys_to_grid_labels(cavity_data, volume_keys)
87
+
88
+ # Process each cavity
89
+ for display_idx, (cavity_str_id, volume) in enumerate(volumes.items(), start=1):
90
+ if volume >= volume_cutoff:
91
+ cavity_info = {
92
+ "id": display_idx,
93
+ "grid_id": cavity_grid_id_map.get(cavity_str_id, display_idx),
94
+ "string_id": cavity_str_id,
95
+ "volume": volume,
96
+ "area": areas.get(cavity_str_id, 0.0) if areas else 0.0,
97
+ }
98
+ cavities.append(cavity_info)
99
+
100
+ # Sort cavities by volume (largest first)
101
+ cavities.sort(key=lambda x: x["volume"], reverse=True)
102
+
103
+ return cavities, cavity_data
104
+
105
+
106
+ def get_cavity_grid_points(cavity_data: Any, cavity_id: int) -> np.ndarray:
107
+ """
108
+ Get the grid points that belong to a specific cavity.
109
+
110
+ Args:
111
+ cavity_data: The cavity data object from pyKVFinder
112
+ cavity_id: Integer ID of the cavity (1-indexed)
113
+
114
+ Returns:
115
+ Array of (x, y, z) coordinates for the cavity grid points
116
+ """
117
+ if not hasattr(cavity_data, 'cavities') or cavity_data.cavities is None:
118
+ return np.array([])
119
+
120
+ # Get cavity grid
121
+ cavity_grid = cavity_data.cavities
122
+
123
+ # Find all points belonging to this cavity
124
+ # Note: KVFinder uses 1-indexed cavity IDs in the grid
125
+ points = np.argwhere(cavity_grid == cavity_id)
126
+
127
+ points = points.astype(float)
128
+
129
+ # Convert grid indices to real coordinates if metadata is available.
130
+ # pyKVFinder versions expose this either as public P1/P2/P3/P4 or private _vertices/_step.
131
+ step = float(getattr(cavity_data, "step", getattr(cavity_data, "_step", DEFAULT_GRID_STEP)))
132
+ vertices = None
133
+
134
+ if hasattr(cavity_data, "surface") and hasattr(cavity_data.surface, "P1"):
135
+ vertices = np.array(
136
+ [
137
+ [cavity_data.surface.P1[i] for i in range(3)],
138
+ [cavity_data.surface.P2[i] for i in range(3)],
139
+ [cavity_data.surface.P3[i] for i in range(3)],
140
+ [cavity_data.surface.P4[i] for i in range(3)],
141
+ ],
142
+ dtype=float,
143
+ )
144
+ elif hasattr(cavity_data, "P1"):
145
+ vertices = np.array(
146
+ [
147
+ [cavity_data.P1[i] for i in range(3)],
148
+ [cavity_data.P2[i] for i in range(3)],
149
+ [cavity_data.P3[i] for i in range(3)],
150
+ [cavity_data.P4[i] for i in range(3)],
151
+ ],
152
+ dtype=float,
153
+ )
154
+ elif hasattr(cavity_data, "_vertices"):
155
+ vertices = np.asarray(cavity_data._vertices, dtype=float)
156
+
157
+ if vertices is not None and vertices.shape[0] >= 4:
158
+ origin = vertices[0]
159
+ axes = [vertices[1] - origin, vertices[2] - origin, vertices[3] - origin]
160
+ unit_axes = []
161
+ for axis in axes:
162
+ norm = np.linalg.norm(axis)
163
+ unit_axes.append(axis / norm if norm > 1e-8 else np.zeros(3, dtype=float))
164
+ return (
165
+ origin
166
+ + points[:, [0]] * (step * unit_axes[0])
167
+ + points[:, [1]] * (step * unit_axes[1])
168
+ + points[:, [2]] * (step * unit_axes[2])
169
+ )
170
+ if vertices is not None and vertices.shape[0] >= 1:
171
+ return vertices[0] + points * step
172
+
173
+ # Fallback: return index-space points; downstream code will align to protein frame.
174
+ return points
@@ -2,8 +2,15 @@
2
2
 
3
3
  import typer
4
4
  from pathlib import Path
5
- from typing import Optional, List
6
- from cavefiller.cavity_finder import find_cavities
5
+ from typing import Optional
6
+ from cavefiller.cavity_finder import (
7
+ find_cavities,
8
+ DEFAULT_GRID_STEP,
9
+ DEFAULT_PROBE_IN,
10
+ DEFAULT_PROBE_OUT,
11
+ DEFAULT_EXTERIOR_TRIM_DISTANCE,
12
+ DEFAULT_VOLUME_CUTOFF,
13
+ )
7
14
  from cavefiller.cavity_selector import select_cavities
8
15
  from cavefiller.water_filler import fill_cavities_with_water
9
16
 
@@ -21,16 +28,26 @@ def run(
21
28
  Path("./output"),
22
29
  help="Directory to save output files",
23
30
  ),
31
+ grid_step: float = typer.Option(
32
+ DEFAULT_GRID_STEP,
33
+ "--grid-step",
34
+ help="Grid spacing for cavity detection (Å)",
35
+ ),
24
36
  probe_in: float = typer.Option(
25
- 1.4,
37
+ DEFAULT_PROBE_IN,
26
38
  help="Probe In radius for cavity detection (Å)",
27
39
  ),
28
40
  probe_out: float = typer.Option(
29
- 4.0,
41
+ DEFAULT_PROBE_OUT,
30
42
  help="Probe Out radius for cavity detection (Å)",
31
43
  ),
44
+ exterior_trim_distance: float = typer.Option(
45
+ DEFAULT_EXTERIOR_TRIM_DISTANCE,
46
+ "--exterior-trim-distance",
47
+ help="Exterior trim distance (KVFinder removal distance) (Å)",
48
+ ),
32
49
  volume_cutoff: float = typer.Option(
33
- 5.0,
50
+ DEFAULT_VOLUME_CUTOFF,
34
51
  help="Minimum cavity volume to consider (ų)",
35
52
  ),
36
53
  auto_select: bool = typer.Option(
@@ -54,6 +71,15 @@ def run(
54
71
  300,
55
72
  help="Maximum MMFF94 iterations when optimization is enabled.",
56
73
  ),
74
+ remove_after_optim: bool = typer.Option(
75
+ True,
76
+ "--remove-after-optim/--no-remove-after-optim",
77
+ "--remove_after_optim/--no_remove_after_optim",
78
+ help=(
79
+ "After MMFF94, remove waters that fail cavity/clash validation. "
80
+ "Disable to keep all optimized waters."
81
+ ),
82
+ ),
57
83
  ):
58
84
  """
59
85
  Find cavities in a protein and fill them with explicit water molecules.
@@ -74,8 +100,10 @@ def run(
74
100
  typer.echo("Step 1: Finding cavities with KVFinder...")
75
101
  cavities, cavity_data = find_cavities(
76
102
  str(protein_file),
103
+ step=grid_step,
77
104
  probe_in=probe_in,
78
105
  probe_out=probe_out,
106
+ removal_distance=exterior_trim_distance,
79
107
  volume_cutoff=volume_cutoff,
80
108
  output_dir=str(output_dir),
81
109
  )
@@ -135,6 +163,7 @@ def run(
135
163
  waters_per_cavity=waters_dict,
136
164
  optimize_mmff94=optimize_mmff94,
137
165
  mmff_max_iterations=mmff_max_iterations,
166
+ remove_after_optim=remove_after_optim,
138
167
  )
139
168
 
140
169
  typer.echo(f"\n✅ Success! Output saved to: {output_file}")
@@ -30,6 +30,46 @@ HOH_ANGLE_DEG = 104.52
30
30
  DEFAULT_MMFF_MAX_ITERS = 300
31
31
 
32
32
 
33
+ def _run_mmff_with_progress(ff: Any, max_iterations: int) -> None:
34
+ """Run MMFF minimization while reporting per-step progress."""
35
+ if max_iterations <= 0:
36
+ return
37
+
38
+ try:
39
+ from tqdm import tqdm # type: ignore
40
+ except Exception:
41
+ tqdm = None
42
+
43
+ converged = False
44
+ if tqdm is not None:
45
+ with tqdm(total=max_iterations, desc="MMFF94", unit="iter") as bar:
46
+ for _step in range(1, max_iterations + 1):
47
+ # RDKit returns 0 on convergence, non-zero otherwise.
48
+ status = ff.Minimize(maxIts=1)
49
+ bar.update(1)
50
+ if status == 0:
51
+ converged = True
52
+ break
53
+ if converged:
54
+ bar.set_postfix_str("converged")
55
+ else:
56
+ bar.set_postfix_str("max iters")
57
+ else:
58
+ print(f"MMFF94 optimization progress: 0/{max_iterations}")
59
+ for step in range(1, max_iterations + 1):
60
+ status = ff.Minimize(maxIts=1)
61
+ if step == 1 or step % 10 == 0 or step == max_iterations or status == 0:
62
+ print(f"MMFF94 optimization progress: {step}/{max_iterations}")
63
+ if status == 0:
64
+ converged = True
65
+ break
66
+
67
+ if converged:
68
+ print("MMFF94 optimization converged before max iterations")
69
+ else:
70
+ print("MMFF94 optimization reached max iterations")
71
+
72
+
33
73
  def fill_cavities_with_water(
34
74
  protein_file: str,
35
75
  selected_cavities: List[Dict[str, Any]],
@@ -38,6 +78,7 @@ def fill_cavities_with_water(
38
78
  waters_per_cavity: Dict[int, int] = None,
39
79
  optimize_mmff94: bool = True,
40
80
  mmff_max_iterations: int = DEFAULT_MMFF_MAX_ITERS,
81
+ remove_after_optim: bool = True,
41
82
  ) -> str:
42
83
  """Place explicit waters in selected cavities and write a combined PDB."""
43
84
  base_name = os.path.splitext(os.path.basename(protein_file))[0]
@@ -51,6 +92,7 @@ def fill_cavities_with_water(
51
92
 
52
93
  for cavity in selected_cavities:
53
94
  cavity_id = cavity["id"]
95
+ cavity_grid_id = int(cavity.get("grid_id", cavity_id))
54
96
  volume = cavity["volume"]
55
97
 
56
98
  if waters_per_cavity and cavity_id in waters_per_cavity:
@@ -63,9 +105,12 @@ def fill_cavities_with_water(
63
105
  f"(volume: {volume:.2f} A^3)..."
64
106
  )
65
107
 
66
- cavity_points = get_cavity_grid_points(cavity_data, cavity_id)
108
+ cavity_points = get_cavity_grid_points(cavity_data, cavity_grid_id)
67
109
  if len(cavity_points) == 0:
68
- print(f" Warning: no grid points found for cavity {cavity_id}")
110
+ print(
111
+ f" Warning: no grid points found for cavity {cavity_id} "
112
+ f"(grid label {cavity_grid_id})"
113
+ )
69
114
  continue
70
115
 
71
116
  cavity_points = _ensure_cavity_points_near_protein(cavity_points, protein_atoms)
@@ -105,12 +150,19 @@ def fill_cavities_with_water(
105
150
  water_origin_cavity_points=all_water_origin_cavity_points,
106
151
  protein_atoms=protein_atoms,
107
152
  max_iterations=mmff_max_iterations,
153
+ remove_after_optim=remove_after_optim,
108
154
  )
109
155
  if optimized_positions:
110
- print(
111
- f"MMFF94 (fixed protein) kept {len(optimized_positions)}/"
112
- f"{len(all_water_positions)} waters after filtering"
113
- )
156
+ if remove_after_optim:
157
+ print(
158
+ f"MMFF94 (fixed protein) kept {len(optimized_positions)}/"
159
+ f"{len(all_water_positions)} waters after filtering"
160
+ )
161
+ else:
162
+ print(
163
+ f"MMFF94 (fixed protein) kept all {len(optimized_positions)} waters "
164
+ "without post-optimization filtering"
165
+ )
114
166
  optimized_waters_mol = build_waters_mol(
115
167
  water_positions=[],
116
168
  chain_id="W",
@@ -278,6 +330,7 @@ def optimize_waters_mmff94_fixed_protein(
278
330
  water_origin_cavity_points: List[np.ndarray],
279
331
  protein_atoms: List[Tuple[str, np.ndarray]],
280
332
  max_iterations: int = DEFAULT_MMFF_MAX_ITERS,
333
+ remove_after_optim: bool = True,
281
334
  ) -> Tuple[List[np.ndarray], List[Tuple[np.ndarray, np.ndarray, np.ndarray]]]:
282
335
  """MMFF94 optimize waters with protein atoms fixed in place."""
283
336
  if not water_positions:
@@ -328,7 +381,7 @@ def optimize_waters_mmff94_fixed_protein(
328
381
  ff.AddFixedPoint(atom_idx)
329
382
 
330
383
  ff.Initialize()
331
- ff.Minimize(maxIts=max_iterations)
384
+ _run_mmff_with_progress(ff, max_iterations)
332
385
 
333
386
  conf = work_mol.GetConformer()
334
387
  optimized_geometries = []
@@ -347,6 +400,9 @@ def optimize_waters_mmff94_fixed_protein(
347
400
  )
348
401
  )
349
402
 
403
+ if not remove_after_optim:
404
+ return [geom[0] for geom in optimized_geometries], optimized_geometries
405
+
350
406
  selected_geometries = _select_valid_geometries_after_mmff(
351
407
  optimized_geometries=optimized_geometries,
352
408
  original_geometries=original_geometries,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cavefiller
3
- Version: 0.2.1
3
+ Version: 0.3.1
4
4
  Summary: A tool to find and fill protein cavities with water molecules using KVFinder and Packmol
5
5
  Author: CaveFiller Contributors
6
6
  Requires-Python: >=3.8
@@ -11,6 +11,7 @@ Requires-Dist: pykvfinder>=0.6.0
11
11
  Requires-Dist: rdkit>=2022.9.1
12
12
  Requires-Dist: numpy>=1.20.0
13
13
  Requires-Dist: biopython>=1.79
14
+ Requires-Dist: tqdm>=4.67.3
14
15
  Provides-Extra: dev
15
16
  Requires-Dist: pytest>=7.0.0; extra == "dev"
16
17
  Requires-Dist: black>=22.0.0; extra == "dev"
@@ -74,18 +75,23 @@ cavefiller [PROTEIN_FILE] [OPTIONS]
74
75
 
75
76
  **Options:**
76
77
  - `--output-dir PATH`: Directory to save output files (default: `./output`)
78
+ - `--grid-step FLOAT`: Grid spacing for cavity detection in Ångströms (default: 0.6)
77
79
  - `--probe-in FLOAT`: Probe In radius for cavity detection in Ångströms (default: 1.4)
78
80
  - `--probe-out FLOAT`: Probe Out radius for cavity detection in Ångströms (default: 4.0)
81
+ - `--exterior-trim-distance FLOAT`: Exterior trim distance in Ångströms (default: 2.4)
79
82
  - `--volume-cutoff FLOAT`: Minimum cavity volume to consider in Ų (default: 5.0)
80
83
  - `--auto-select`: Automatically select all cavities without user interaction
81
84
  - `--cavity-ids TEXT`: Comma-separated list of cavity IDs to fill (e.g., '1,2,3')
82
85
  - `--waters-per-cavity TEXT`: Comma-separated list of water counts (e.g., '10,15,20'), must match cavity-ids order
83
86
  - `--optimize-mmff94 / --no-optimize-mmff94`: Enable/disable MMFF94 with protein fixed (default: enabled)
84
87
  - `--mmff-max-iterations INTEGER`: Max MMFF94 iterations (default: 300)
88
+ - `--remove-after-optim / --no-remove-after-optim`: After MMFF94, remove waters that fail post-checks (default: enabled)
89
+ - Also accepted: `--remove_after_optim / --no_remove_after_optim`
85
90
 
86
91
  Recommended usage:
87
92
  - Prefer interactive/manual cavity and water-count selection over `--auto-select`. Auto-selection often overfills cavities with too many waters.
88
93
  - Keep `--optimize-mmff94` enabled (recommended) to refine water placement after Monte Carlo sampling.
94
+ - Use `--no-remove-after-optim` if you want to keep all waters after MMFF94, even if they clash or move out of cavity bounds.
89
95
 
90
96
  ### Examples
91
97
 
@@ -106,7 +112,7 @@ cavefiller protein.pdb --cavity-ids "1,3,5" --waters-per-cavity "10,15,20"
106
112
 
107
113
  **Custom cavity detection parameters:**
108
114
  ```bash
109
- cavefiller protein.pdb --probe-in 1.2 --probe-out 5.0 --volume-cutoff 10.0
115
+ cavefiller protein.pdb --grid-step 0.6 --probe-in 1.4 --probe-out 4.0 --exterior-trim-distance 2.4 --volume-cutoff 5.0
110
116
  ```
111
117
 
112
118
  ## Workflow
@@ -180,7 +186,7 @@ This repository includes GitHub Actions workflow at `.github/workflows/ci-cd.yml
180
186
  - Runs `pytest` on every push to `main`
181
187
  - Runs `pytest` on every pull request targeting `main`
182
188
  - Builds package distributions after tests pass
183
- - Publishes to PyPI only when you push a version tag like `v0.1.1`
189
+ - Publishes to PyPI only on pushes to `main` where `pyproject.toml` `project.version` changed
184
190
 
185
191
  #### One-time setup for automatic PyPI publishing
186
192
 
@@ -200,14 +206,7 @@ No PyPI API token secret is needed when using Trusted Publishing.
200
206
  - `pyproject.toml` (`project.version`)
201
207
  - `cavefiller/__init__.py` (`__version__`)
202
208
  2. Commit and push to `main`.
203
- 3. Create and push a matching tag:
204
-
205
- ```bash
206
- git tag v0.1.1
207
- git push origin v0.1.1
208
- ```
209
-
210
- The workflow validates that the tag matches `pyproject.toml` (for example, tag `v0.1.1` must match version `0.1.1`) before publishing.
209
+ 3. CI will publish that pushed version to PyPI automatically, but only if `pyproject.toml` version changed versus the previous commit on `main`.
211
210
 
212
211
  ## License
213
212
 
@@ -3,6 +3,7 @@ pykvfinder>=0.6.0
3
3
  rdkit>=2022.9.1
4
4
  numpy>=1.20.0
5
5
  biopython>=1.79
6
+ tqdm>=4.67.3
6
7
 
7
8
  [dev]
8
9
  pytest>=7.0.0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "cavefiller"
7
- version = "0.2.1"
7
+ version = "0.3.1"
8
8
  description = "A tool to find and fill protein cavities with water molecules using KVFinder and Packmol"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -17,6 +17,7 @@ dependencies = [
17
17
  "rdkit>=2022.9.1",
18
18
  "numpy>=1.20.0",
19
19
  "biopython>=1.79",
20
+ "tqdm>=4.67.3",
20
21
  ]
21
22
 
22
23
  [project.optional-dependencies]
@@ -215,6 +215,7 @@ def test_fill_cavities_uses_mmff94_optimizer(monkeypatch):
215
215
  water_origin_cavity_points,
216
216
  protein_atoms,
217
217
  max_iterations,
218
+ remove_after_optim,
218
219
  ):
219
220
  called["value"] = True
220
221
  called["max_iterations"] = max_iterations
@@ -235,3 +236,46 @@ def test_fill_cavities_uses_mmff94_optimizer(monkeypatch):
235
236
  assert called["value"] is True
236
237
  assert called["max_iterations"] == 123
237
238
  assert os.path.exists(output_file)
239
+
240
+
241
+ def test_fill_cavities_passes_remove_after_optim_flag(monkeypatch):
242
+ """Test that fill_cavities_with_water forwards remove_after_optim to MMFF stage."""
243
+ from cavefiller.cavity_finder import find_cavities
244
+ from cavefiller import water_filler as wf
245
+
246
+ if not EXAMPLE_PDB.exists():
247
+ pytest.skip("Example protein file not found")
248
+
249
+ with tempfile.TemporaryDirectory() as tmpdir:
250
+ cavities, cavity_data = find_cavities(str(EXAMPLE_PDB), output_dir=tmpdir)
251
+ if len(cavities) == 0:
252
+ pytest.skip("No cavities found in test protein")
253
+
254
+ called = {"remove_after_optim": None}
255
+ real_builder = wf._build_water_geometries_from_positions
256
+
257
+ def fake_optimize(
258
+ protein_mol,
259
+ water_positions,
260
+ water_origin_cavity_points,
261
+ protein_atoms,
262
+ max_iterations,
263
+ remove_after_optim,
264
+ ):
265
+ called["remove_after_optim"] = remove_after_optim
266
+ return water_positions, real_builder(water_positions, protein_atoms)
267
+
268
+ monkeypatch.setattr(wf, "optimize_waters_mmff94_fixed_protein", fake_optimize)
269
+
270
+ output_file = wf.fill_cavities_with_water(
271
+ str(EXAMPLE_PDB),
272
+ cavities[:1],
273
+ cavity_data,
274
+ tmpdir,
275
+ waters_per_cavity={cavities[0]["id"]: 2},
276
+ optimize_mmff94=True,
277
+ remove_after_optim=False,
278
+ )
279
+
280
+ assert called["remove_after_optim"] is False
281
+ assert os.path.exists(output_file)
@@ -1,114 +0,0 @@
1
- """Cavity detection using pyKVFinder."""
2
-
3
- import os
4
- from typing import List, Dict, Tuple, Any
5
- import numpy as np
6
-
7
- # Grid spacing for cavity detection (in Angstroms)
8
- DEFAULT_GRID_STEP = 0.6
9
-
10
-
11
- def find_cavities(
12
- protein_file: str,
13
- probe_in: float = 1.4,
14
- probe_out: float = 4.0,
15
- volume_cutoff: float = 5.0,
16
- output_dir: str = "./output",
17
- ) -> Tuple[List[Dict[str, Any]], Any]:
18
- """
19
- Find cavities in a protein structure using pyKVFinder.
20
-
21
- Args:
22
- protein_file: Path to the protein PDB file
23
- probe_in: Probe In radius for cavity detection (Å)
24
- probe_out: Probe Out radius for cavity detection (Å)
25
- volume_cutoff: Minimum cavity volume to consider (Ų)
26
- output_dir: Directory to save cavity detection results
27
-
28
- Returns:
29
- Tuple of (list of cavity dictionaries, cavity_data object)
30
- """
31
- try:
32
- import pyKVFinder
33
- except ImportError:
34
- raise ImportError(
35
- "pyKVFinder is not installed. Please install it with: pip install pykvfinder"
36
- )
37
-
38
- # Run KVFinder to detect cavities
39
- cavity_data = pyKVFinder.run_workflow(
40
- input=protein_file,
41
- probe_in=probe_in,
42
- probe_out=probe_out,
43
- step=DEFAULT_GRID_STEP, # Grid step size
44
- volume_cutoff=volume_cutoff,
45
- )
46
-
47
- # Extract cavity information
48
- cavities = []
49
-
50
- # Get cavity volumes and areas
51
- if hasattr(cavity_data, 'volume') and cavity_data.volume is not None:
52
- volumes = cavity_data.volume
53
- areas = cavity_data.area if hasattr(cavity_data, 'area') else {}
54
-
55
- # Create mapping from string IDs to integer IDs
56
- # KVFinder uses string IDs like 'KAA', 'KAB', etc., but the grid uses integers
57
- cavity_id_map = {}
58
- for idx, (cavity_str_id, volume) in enumerate(volumes.items(), start=1):
59
- cavity_id_map[cavity_str_id] = idx
60
-
61
- # Process each cavity
62
- for cavity_str_id, volume in volumes.items():
63
- if volume >= volume_cutoff:
64
- cavity_info = {
65
- "id": cavity_id_map[cavity_str_id],
66
- "string_id": cavity_str_id,
67
- "volume": volume,
68
- "area": areas.get(cavity_str_id, 0.0) if areas else 0.0,
69
- }
70
- cavities.append(cavity_info)
71
-
72
- # Sort cavities by volume (largest first)
73
- cavities.sort(key=lambda x: x["volume"], reverse=True)
74
-
75
- return cavities, cavity_data
76
-
77
-
78
- def get_cavity_grid_points(cavity_data: Any, cavity_id: int) -> np.ndarray:
79
- """
80
- Get the grid points that belong to a specific cavity.
81
-
82
- Args:
83
- cavity_data: The cavity data object from pyKVFinder
84
- cavity_id: Integer ID of the cavity (1-indexed)
85
-
86
- Returns:
87
- Array of (x, y, z) coordinates for the cavity grid points
88
- """
89
- if not hasattr(cavity_data, 'cavities') or cavity_data.cavities is None:
90
- return np.array([])
91
-
92
- # Get cavity grid
93
- cavity_grid = cavity_data.cavities
94
-
95
- # Find all points belonging to this cavity
96
- # Note: KVFinder uses 1-indexed cavity IDs in the grid
97
- points = np.argwhere(cavity_grid == cavity_id)
98
-
99
- # Convert grid indices to real coordinates if origin metadata is available.
100
- # Different pyKVFinder versions expose metadata on either cavity_data or cavity_data.surface.
101
- step = getattr(cavity_data, "step", DEFAULT_GRID_STEP)
102
- origin = None
103
-
104
- if hasattr(cavity_data, "surface") and hasattr(cavity_data.surface, "P1"):
105
- origin = np.array([cavity_data.surface.P1[i] for i in range(3)], dtype=float)
106
- elif hasattr(cavity_data, "P1"):
107
- origin = np.array([cavity_data.P1[i] for i in range(3)], dtype=float)
108
-
109
- points = points.astype(float)
110
- if origin is not None:
111
- return origin + points * float(step)
112
-
113
- # Fallback: return index-space points; downstream code will align to protein frame.
114
- return points
File without changes
File without changes