cmxflow 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. cmxflow/__init__.py +21 -0
  2. cmxflow/block.py +357 -0
  3. cmxflow/cmxmol.py +124 -0
  4. cmxflow/mcp/__init__.py +11 -0
  5. cmxflow/mcp/server.py +1210 -0
  6. cmxflow/mcp/state.py +314 -0
  7. cmxflow/operators/__init__.py +34 -0
  8. cmxflow/operators/align.py +485 -0
  9. cmxflow/operators/base.py +91 -0
  10. cmxflow/operators/cluster.py +119 -0
  11. cmxflow/operators/confgen.py +222 -0
  12. cmxflow/operators/dedup.py +65 -0
  13. cmxflow/operators/dock/__init__.py +106 -0
  14. cmxflow/operators/dock/dock.py +267 -0
  15. cmxflow/operators/dock/ec.py +260 -0
  16. cmxflow/operators/dock/pose.py +836 -0
  17. cmxflow/operators/dock/score.py +628 -0
  18. cmxflow/operators/filter.py +507 -0
  19. cmxflow/operators/ionize.py +184 -0
  20. cmxflow/operators/method.py +145 -0
  21. cmxflow/operators/select.py +216 -0
  22. cmxflow/operators/sim2d.py +187 -0
  23. cmxflow/operators/sim3d.py +276 -0
  24. cmxflow/operators/standardize.py +98 -0
  25. cmxflow/opt/__init__.py +5 -0
  26. cmxflow/opt/optuna.py +225 -0
  27. cmxflow/parameter.py +197 -0
  28. cmxflow/scores/__init__.py +19 -0
  29. cmxflow/scores/automatic.py +310 -0
  30. cmxflow/scores/cluster.py +84 -0
  31. cmxflow/scores/shape.py +230 -0
  32. cmxflow/sinks/__init__.py +15 -0
  33. cmxflow/sinks/molecule.py +106 -0
  34. cmxflow/sinks/table.py +107 -0
  35. cmxflow/sinks/writer.py +57 -0
  36. cmxflow/sources/__init__.py +17 -0
  37. cmxflow/sources/molecule.py +58 -0
  38. cmxflow/sources/reader.py +121 -0
  39. cmxflow/sources/table.py +142 -0
  40. cmxflow/utils/__init__.py +20 -0
  41. cmxflow/utils/parallel.py +617 -0
  42. cmxflow/utils/pymol.py +65 -0
  43. cmxflow/utils/serial.py +213 -0
  44. cmxflow/utils/text.py +244 -0
  45. cmxflow/workflow.py +263 -0
  46. cmxflow-0.1.0.dist-info/METADATA +187 -0
  47. cmxflow-0.1.0.dist-info/RECORD +50 -0
  48. cmxflow-0.1.0.dist-info/WHEEL +4 -0
  49. cmxflow-0.1.0.dist-info/entry_points.txt +3 -0
  50. cmxflow-0.1.0.dist-info/licenses/LICENSE +21 -0
cmxflow/__init__.py ADDED
@@ -0,0 +1,21 @@
1
+ """cmxflow: Automated cheminformatics workflow optimization."""
2
+
3
+ from cmxflow.block import Block, BlockBase, SinkBlock, SourceBlock
4
+ from cmxflow.cmxmol import Mol, unwrap_mol, wrap_mol
5
+ from cmxflow.utils.serial import load_workflow, save_workflow
6
+ from cmxflow.workflow import Workflow, WorkflowValidationError
7
+
8
+ __version__ = "0.1.0"
9
+ __all__ = [
10
+ "Block",
11
+ "BlockBase",
12
+ "Mol",
13
+ "SinkBlock",
14
+ "SourceBlock",
15
+ "Workflow",
16
+ "WorkflowValidationError",
17
+ "load_workflow",
18
+ "save_workflow",
19
+ "unwrap_mol",
20
+ "wrap_mol",
21
+ ]
cmxflow/block.py ADDED
@@ -0,0 +1,357 @@
1
+ """Base class for molecule operation blocks."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from collections.abc import Iterator
5
+ from pathlib import Path
6
+ from typing import Any, Callable
7
+
8
+ from cmxflow.parameter import (
9
+ Categorical,
10
+ Continuous,
11
+ Integer,
12
+ )
13
+ from cmxflow.utils import text
14
+
15
+
16
+ class BlockBase(ABC):
17
+ """Base class with shared functionality for all block types.
18
+
19
+ Attributes:
20
+ name: Human-readable name for the block.
21
+ params: Dictionary of mutable parameters.
22
+ input_files: List of required input file names.
23
+ """
24
+
25
+ def __init__(
26
+ self,
27
+ name: str | None = None,
28
+ input_files: list[str] | None = None,
29
+ input_text: list[str] | None = None,
30
+ ) -> None:
31
+ """Initialize the block.
32
+
33
+ Args:
34
+ name: Optional name for the block. Defaults to class name.
35
+ input_files: Optional files that will be surfaced as required
36
+ input at run time.
37
+ input_text: Optional text that will be surfaces as required
38
+ input at run time.
39
+ """
40
+ self.name = name or self.__class__.__name__
41
+ if input_files is None:
42
+ input_files = []
43
+ if input_text is None:
44
+ input_text = []
45
+ self.input_files: dict[str, Path] = {key: Path(".") for key in input_files}
46
+ self.input_text: dict[str, str] = {key: "" for key in input_text}
47
+ self.params: dict[str, Continuous | Categorical | Integer] = {}
48
+
49
+ def set_inputs(self, **config: str) -> None:
50
+ """Set inputs if matching required files or text.
51
+
52
+ Args:
53
+ **config: Keyword arguments mapping input names to values.
54
+ For file inputs, values should be file paths.
55
+ For text inputs, values should be strings.
56
+ For params, values should match the parameter type.
57
+ """
58
+ for key, value in config.items():
59
+ if key in self.input_files:
60
+ path = Path(value)
61
+ if path.is_file():
62
+ self.input_files[key] = path
63
+ elif key in self.input_text:
64
+ self.input_text[key] = value
65
+ elif key in self.params:
66
+ self.params[key].set(value)
67
+
68
+ def get_params(self) -> dict[str, Any]:
69
+ """Get all mutable parameters for this block.
70
+
71
+ Returns:
72
+ Dictionary mapping parameter names to Parameter objects.
73
+ """
74
+ return self.params
75
+
76
+ def get_param(self, key: str) -> Any:
77
+ """Get the current value of a mutable parameter.
78
+
79
+ Args:
80
+ key: Name of the parameter to retrieve.
81
+
82
+ Returns:
83
+ The current value of the parameter.
84
+
85
+ Raises:
86
+ KeyError: If the parameter name is not registered.
87
+ """
88
+ if key not in self.params:
89
+ raise KeyError(f"{key} is not a valid parameter.")
90
+ return self.params[key].get()
91
+
92
+ def mutable(self, *parameters: Continuous | Integer | Categorical) -> None:
93
+ """Register parameters as mutable for optimization.
94
+
95
+ Args:
96
+ *parameters: Parameter objects to register as mutable.
97
+ """
98
+ for parameter in parameters:
99
+ self.params[parameter.name] = parameter
100
+
101
+ def __repr__(self) -> str:
102
+ block = text.generate_framed_block(self.name, self.params)
103
+ _inputs = {}
104
+ if self.input_files:
105
+ _input_files: dict[str, str | Path] = dict(self.input_files)
106
+ for key, value in _input_files.items():
107
+ if str(value) == ".":
108
+ _input_files[key] = "[FILE]"
109
+ _inputs = _input_files
110
+ if self.input_text:
111
+ _input_text = dict(self.input_text)
112
+ for key, value in _input_text.items():
113
+ if not value:
114
+ _input_text[key] = "[TEXT]"
115
+ _inputs = {**_inputs, **_input_text}
116
+ if _inputs:
117
+ inputs = text.generate_framed_block("RequiredInput", _inputs)
118
+ block = text.left_merge_framed_block(block, inputs)
119
+ return block
120
+
121
+ @abstractmethod
122
+ def __call__(self, *arg: Any) -> Any: ...
123
+
124
+ def reset_cache(self) -> None:
125
+ """Reset any cached state for a new optimization iteration.
126
+
127
+ Called at the start of each optimization trial to ensure blocks
128
+ don't retain stale cached data. Override in subclasses that
129
+ maintain internal caches.
130
+ """
131
+ pass
132
+
133
+
134
+ class Block(BlockBase):
135
+ """Block that transforms items from an iterator.
136
+
137
+ Subclasses must implement `forward` to define the transformation.
138
+ """
139
+
140
+ @abstractmethod
141
+ def forward(self, arg: Any) -> Any:
142
+ """Define a single unit of work.
143
+
144
+ Args:
145
+ arg: Input item to transform.
146
+
147
+ Returns:
148
+ Transformed item.
149
+ """
150
+ ...
151
+
152
+ def __call__(self, iter: Iterator[Any]) -> Iterator[Any]:
153
+ """Execute the block on an iterator of items.
154
+
155
+ Args:
156
+ iter: Iterator of input items to process.
157
+
158
+ Yields:
159
+ Transformed items that pass input and output checks.
160
+ """
161
+ for arg in iter:
162
+ if not self.check_input(arg):
163
+ continue
164
+ out = self.forward(arg)
165
+ if self.check_output(out):
166
+ yield out
167
+
168
+ def check_input(self, arg: Any) -> bool:
169
+ """Validate an input item before processing.
170
+
171
+ Override this method to filter out invalid inputs.
172
+
173
+ Args:
174
+ arg: Input item to validate.
175
+
176
+ Returns:
177
+ True if the item should be processed, False to skip.
178
+ """
179
+ return True
180
+
181
+ def check_output(self, arg: Any) -> bool:
182
+ """Validate an output item before yielding.
183
+
184
+ Override this method to filter out invalid outputs.
185
+
186
+ Args:
187
+ arg: Output item to validate.
188
+
189
+ Returns:
190
+ True if the item should be yielded, False to discard.
191
+ """
192
+ return True
193
+
194
+
195
+ class SourceBlock(BlockBase):
196
+ """Block that produces items from a source file."""
197
+
198
+ def __init__(self, reader: Callable[[Path], Iterator[Any]]) -> None:
199
+ """Initialize a source block with a reader function.
200
+
201
+ Args:
202
+ reader: Callable that takes a Path and yields items.
203
+ """
204
+ self.reader = reader
205
+ super().__init__()
206
+
207
+ def forward(self, path: Path) -> Iterator[Any]:
208
+ """Read items from the source file.
209
+
210
+ Args:
211
+ path: Path to the source file.
212
+
213
+ Yields:
214
+ Items read from the source file.
215
+ """
216
+ for item in self.reader(path):
217
+ yield item
218
+
219
+ def __call__(self, path: Path) -> Iterator[Any]:
220
+ """Execute the source block.
221
+
222
+ Args:
223
+ path: Path to the source file.
224
+
225
+ Returns:
226
+ Iterator of items from the source file.
227
+ """
228
+ return self.forward(path)
229
+
230
+ def __repr__(self) -> str:
231
+ return text.generate_framed_block(self.name, {"input": "[FILE]"})
232
+
233
+
234
+ class SinkBlock(BlockBase):
235
+ """Block that terminates a workflow by writing to a file."""
236
+
237
+ def __init__(self, writer: Callable[[Iterator[Any], Path], None]) -> None:
238
+ """Initialize a sink block with a writer function.
239
+
240
+ Args:
241
+ writer: Callable that takes an iterator and Path to write items.
242
+ """
243
+ self.writer = writer
244
+ super().__init__()
245
+
246
+ def forward(self, iter: Iterator[Any], path: Path) -> None:
247
+ """Write items to the destination file.
248
+
249
+ Args:
250
+ iter: Iterator of items to write.
251
+ path: Path to the destination file.
252
+ """
253
+ self.writer(iter, path)
254
+
255
+ def __call__(self, iter: Iterator[Any], path: Path) -> None:
256
+ """Execute the sink block.
257
+
258
+ Args:
259
+ iter: Iterator of items to write.
260
+ path: Path to the destination file.
261
+ """
262
+ self.forward(iter, path)
263
+
264
+ def __repr__(self) -> str:
265
+ return text.generate_framed_block(self.name, {"output": "[FILE]"})
266
+
267
+
268
+ class ScoreBlock(BlockBase):
269
+ """Block with state-dependent behavior for optimization vs normal execution.
270
+
271
+ During optimization (when uid is provided), computes a score via objective().
272
+ During normal execution, passes items through via forward().
273
+
274
+ Subclasses must implement objective() for optimization scoring.
275
+ Optionally override forward() to transform items during normal execution.
276
+ """
277
+
278
+ def __init__(
279
+ self,
280
+ name: str | None = None,
281
+ input_files: list[str] | None = None,
282
+ input_text: list[str] | None = None,
283
+ ) -> None:
284
+ """Initialize the score block.
285
+
286
+ Args:
287
+ name: Optional name for the block.
288
+ input_files: Optional files required at run time.
289
+ input_text: Optional text required at run time.
290
+ """
291
+ super().__init__(name=name, input_files=input_files, input_text=input_text)
292
+ self._cache: dict[Any, Any] = {}
293
+ self._uid: None | tuple[str, ...] = None
294
+
295
+ def _set_score_properties(self, *args: Any) -> None:
296
+ pass
297
+
298
+ @abstractmethod
299
+ def objective(self, iter: Iterator[Any]) -> float:
300
+ """Compute the optimization objective score.
301
+
302
+ Called only during optimization. Must be implemented by subclasses.
303
+
304
+ Args:
305
+ iter: Iterator of items to score.
306
+
307
+ Returns:
308
+ Score value (higher is better for maximize, lower for minimize).
309
+ """
310
+ ...
311
+
312
+ def forward(self, item: Any) -> Any:
313
+ """Transform a single item during normal (non-optimization) execution.
314
+
315
+ Default implementation passes items through unchanged.
316
+ Override to filter or transform items.
317
+
318
+ Args:
319
+ item: Input item.
320
+
321
+ Returns:
322
+ Transformed item, or None to filter it out.
323
+ """
324
+ return item
325
+
326
+ def __call__(
327
+ self, iter: Iterator[Any], uid: tuple[str, ...] | None = None
328
+ ) -> float | Iterator[Any]:
329
+ """Execute the score block.
330
+
331
+ Args:
332
+ iter: Iterator of input items.
333
+ uid: Unique identifier for caching (present during optimization).
334
+
335
+ Returns:
336
+ If uid is provided (optimization mode): float score.
337
+ If uid is None (normal mode): Iterator of transformed items.
338
+ """
339
+ if uid is not None:
340
+ self._uid = uid
341
+ return self.objective(iter)
342
+ else:
343
+ return self._forward_iter(iter)
344
+
345
+ def _forward_iter(self, iter: Iterator[Any]) -> Iterator[Any]:
346
+ """Apply forward() to each item in the iterator.
347
+
348
+ Args:
349
+ iter: Iterator of input items.
350
+
351
+ Yields:
352
+ Transformed items (items where forward returns None are filtered).
353
+ """
354
+ for item in iter:
355
+ result = self.forward(item)
356
+ if result is not None:
357
+ yield result
cmxflow/cmxmol.py ADDED
@@ -0,0 +1,124 @@
1
+ """Molecule wrapper that preserves properties through pickling."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from rdkit import Chem
8
+
9
+
10
+ class Mol(Chem.Mol):
11
+ """Wrapper around RDKit Mol that preserves properties through pickling.
12
+
13
+ RDKit Mol objects can lose properties during certain operations. This wrapper
14
+ maintains a property cache that survives pickling and can restore properties
15
+ to the underlying Mol object.
16
+
17
+ Attributes:
18
+ mol: The wrapped RDKit Mol object.
19
+ _prop_cache: Cached properties as a dictionary.
20
+ """
21
+
22
+ def __init__(self, mol: Chem.Mol) -> None:
23
+ """Initialize with an RDKit Mol.
24
+
25
+ Args:
26
+ mol: RDKit Mol object to wrap.
27
+ """
28
+ super().__init__(mol)
29
+ self._prop_cache: dict[str, Any] = super().GetPropsAsDict(includePrivate=True)
30
+
31
+ def SetProp(self, key: str, value: str, **kwargs: bool) -> None:
32
+ """Set a string property, caching for pickle preservation.
33
+
34
+ Args:
35
+ key: Property name.
36
+ value: Property value as string.
37
+ **kwargs: Additional arguments passed to RDKit SetProp.
38
+ """
39
+ self._prop_cache[key] = value
40
+ super().SetProp(key, value, **kwargs)
41
+
42
+ def SetDoubleProp(self, key: str, value: float, **kwargs: bool) -> None:
43
+ """Set a float property, caching for pickle preservation.
44
+
45
+ Args:
46
+ key: Property name.
47
+ value: Property value as float.
48
+ **kwargs: Additional arguments passed to RDKit SetDoubleProp.
49
+ """
50
+ self._prop_cache[key] = value
51
+ super().SetDoubleProp(key, value, **kwargs)
52
+
53
+ def SetBoolProp(self, key: str, value: bool, **kwargs: bool) -> None:
54
+ """Set a bool property, caching for pickle preservation.
55
+
56
+ Args:
57
+ key: Property name.
58
+ value: Property value as bool.
59
+ **kwargs: Additional arguments passed to RDKit SetBoolProp.
60
+ """
61
+ self._prop_cache[key] = value
62
+ super().SetBoolProp(key, value, **kwargs)
63
+
64
+ def SetIntProp(self, key: str, value: int, **kwargs: bool) -> None:
65
+ """Set an int property, caching for pickle preservation.
66
+
67
+ Args:
68
+ key: Property name.
69
+ value: Property value as int.
70
+ **kwargs: Additional arguments passed to RDKit SetIntProp.
71
+ """
72
+ self._prop_cache[key] = value
73
+ super().SetIntProp(key, value, **kwargs)
74
+
75
+ def restore_properties(self) -> None:
76
+ """Restore cached properties to the Mol."""
77
+ for key, value in self._prop_cache.items():
78
+ if isinstance(value, float):
79
+ self.SetDoubleProp(key, value)
80
+ elif isinstance(value, int) and not isinstance(value, bool):
81
+ self.SetIntProp(key, value)
82
+ elif isinstance(value, bool):
83
+ self.SetBoolProp(key, value)
84
+ else:
85
+ self.SetProp(key, str(value))
86
+
87
+ def GetPropsAsDict(self, **kwargs: bool) -> dict[str, Any]:
88
+ """Get properties as dict, restoring cached properties first.
89
+
90
+ Args:
91
+ **kwargs: Additional arguments passed to RDKit GetPropsAsDict.
92
+
93
+ Returns:
94
+ Dictionary of all molecule properties.
95
+ """
96
+ self.restore_properties()
97
+ result: dict[str, Any] = super().GetPropsAsDict(**kwargs)
98
+ return result
99
+
100
+
101
+ def wrap_mol(mol: Chem.Mol) -> Mol:
102
+ """Wrap an RDKit Mol in a Mol for property preservation.
103
+
104
+ Args:
105
+ mol: RDKit Mol object.
106
+
107
+ Returns:
108
+ Mol wrapper with cached properties.
109
+ """
110
+ return Mol(mol)
111
+
112
+
113
+ def unwrap_mol(cmx_mol: Mol | Chem.Mol) -> Chem.Mol:
114
+ """Extract the RDKit Mol from a Mol wrapper.
115
+
116
+ Args:
117
+ cmx_mol: Mol wrapper or plain RDKit Mol.
118
+
119
+ Returns:
120
+ The underlying RDKit Mol object.
121
+ """
122
+ if isinstance(cmx_mol, Mol):
123
+ return cmx_mol.mol
124
+ return cmx_mol
@@ -0,0 +1,11 @@
1
+ """MCP server for cmxflow workflow building and execution."""
2
+
3
+ from cmxflow.mcp.server import mcp
4
+
5
+
6
+ def run() -> None:
7
+ """Run the MCP server."""
8
+ mcp.run()
9
+
10
+
11
+ __all__ = ["mcp", "run"]