rowan-mcp 1.0.1__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rowan-mcp might be problematic. Click here for more details.

Files changed (70) hide show
  1. rowan_mcp/__init__.py +2 -2
  2. rowan_mcp/__main__.py +3 -5
  3. rowan_mcp/functions_v2/BENCHMARK.md +86 -0
  4. rowan_mcp/functions_v2/molecule_lookup.py +232 -0
  5. rowan_mcp/functions_v2/protein_management.py +141 -0
  6. rowan_mcp/functions_v2/submit_basic_calculation_workflow.py +195 -0
  7. rowan_mcp/functions_v2/submit_conformer_search_workflow.py +158 -0
  8. rowan_mcp/functions_v2/submit_descriptors_workflow.py +52 -0
  9. rowan_mcp/functions_v2/submit_docking_workflow.py +244 -0
  10. rowan_mcp/functions_v2/submit_fukui_workflow.py +114 -0
  11. rowan_mcp/functions_v2/submit_irc_workflow.py +58 -0
  12. rowan_mcp/functions_v2/submit_macropka_workflow.py +99 -0
  13. rowan_mcp/functions_v2/submit_pka_workflow.py +72 -0
  14. rowan_mcp/functions_v2/submit_protein_cofolding_workflow.py +88 -0
  15. rowan_mcp/functions_v2/submit_redox_potential_workflow.py +55 -0
  16. rowan_mcp/functions_v2/submit_scan_workflow.py +82 -0
  17. rowan_mcp/functions_v2/submit_solubility_workflow.py +157 -0
  18. rowan_mcp/functions_v2/submit_tautomer_search_workflow.py +51 -0
  19. rowan_mcp/functions_v2/workflow_management_v2.py +382 -0
  20. rowan_mcp/server.py +109 -144
  21. rowan_mcp/tests/basic_calculation_from_json.py +0 -0
  22. rowan_mcp/tests/basic_calculation_with_constraint.py +33 -0
  23. rowan_mcp/tests/basic_calculation_with_solvent.py +0 -0
  24. rowan_mcp/tests/bde.py +37 -0
  25. rowan_mcp/tests/benchmark_queries.md +120 -0
  26. rowan_mcp/tests/cofolding_screen.py +131 -0
  27. rowan_mcp/tests/conformer_dependent_redox.py +37 -0
  28. rowan_mcp/tests/conformers.py +31 -0
  29. rowan_mcp/tests/data.json +189 -0
  30. rowan_mcp/tests/docking_screen.py +157 -0
  31. rowan_mcp/tests/irc.py +24 -0
  32. rowan_mcp/tests/macropka.py +13 -0
  33. rowan_mcp/tests/multistage_opt.py +13 -0
  34. rowan_mcp/tests/optimization.py +21 -0
  35. rowan_mcp/tests/phenol_pka.py +36 -0
  36. rowan_mcp/tests/pka.py +36 -0
  37. rowan_mcp/tests/protein_cofolding.py +17 -0
  38. rowan_mcp/tests/scan.py +28 -0
  39. {rowan_mcp-1.0.1.dist-info → rowan_mcp-2.0.0.dist-info}/METADATA +49 -33
  40. rowan_mcp-2.0.0.dist-info/RECORD +42 -0
  41. rowan_mcp/functions/admet.py +0 -94
  42. rowan_mcp/functions/bde.py +0 -113
  43. rowan_mcp/functions/calculation_retrieve.py +0 -89
  44. rowan_mcp/functions/conformers.py +0 -80
  45. rowan_mcp/functions/descriptors.py +0 -92
  46. rowan_mcp/functions/docking.py +0 -340
  47. rowan_mcp/functions/docking_enhanced.py +0 -174
  48. rowan_mcp/functions/electronic_properties.py +0 -205
  49. rowan_mcp/functions/folder_management.py +0 -137
  50. rowan_mcp/functions/fukui.py +0 -219
  51. rowan_mcp/functions/hydrogen_bond_basicity.py +0 -94
  52. rowan_mcp/functions/irc.py +0 -125
  53. rowan_mcp/functions/macropka.py +0 -120
  54. rowan_mcp/functions/molecular_converter.py +0 -423
  55. rowan_mcp/functions/molecular_dynamics.py +0 -191
  56. rowan_mcp/functions/molecule_lookup.py +0 -57
  57. rowan_mcp/functions/multistage_opt.py +0 -171
  58. rowan_mcp/functions/pdb_handler.py +0 -200
  59. rowan_mcp/functions/pka.py +0 -137
  60. rowan_mcp/functions/redox_potential.py +0 -352
  61. rowan_mcp/functions/scan.py +0 -536
  62. rowan_mcp/functions/scan_analyzer.py +0 -347
  63. rowan_mcp/functions/solubility.py +0 -277
  64. rowan_mcp/functions/spin_states.py +0 -747
  65. rowan_mcp/functions/system_management.py +0 -368
  66. rowan_mcp/functions/tautomers.py +0 -91
  67. rowan_mcp/functions/workflow_management.py +0 -422
  68. rowan_mcp-1.0.1.dist-info/RECORD +0 -34
  69. {rowan_mcp-1.0.1.dist-info → rowan_mcp-2.0.0.dist-info}/WHEEL +0 -0
  70. {rowan_mcp-1.0.1.dist-info → rowan_mcp-2.0.0.dist-info}/entry_points.txt +0 -0
@@ -1,125 +0,0 @@
1
- """
2
- Rowan IRC (Intrinsic Reaction Coordinate) function for MCP tool integration.
3
- """
4
-
5
- from typing import Any, Dict, List, Optional
6
- import rowan
7
- import logging
8
- import os
9
-
10
- # Set up logger
11
- logger = logging.getLogger(__name__)
12
-
13
- # Get API key from environment
14
- api_key = os.environ.get("ROWAN_API_KEY")
15
- if api_key:
16
- rowan.api_key = api_key
17
- else:
18
- logger.warning("ROWAN_API_KEY not found in environment")
19
-
20
- def rowan_irc(
21
- name: str,
22
- molecule: str,
23
- mode: str = "rapid",
24
- solvent: Optional[str] = None,
25
- preopt: bool = False,
26
- max_irc_steps: int = 10,
27
- step_size: float = 0.05,
28
- starting_ts: Optional[str] = None,
29
- # Workflow parameters
30
- folder_uuid: Optional[str] = None,
31
- blocking: bool = True,
32
- ping_interval: int = 5
33
- ) -> str:
34
- """Follow intrinsic reaction coordinates from transition states.
35
-
36
- Traces reaction pathways from transition states to reactants and products.
37
-
38
- Args:
39
- name: Name for the calculation
40
- molecule: Molecule SMILES string (should be a transition state)
41
- mode: Calculation mode ("rapid", "careful", "meticulous")
42
- solvent: Solvent for the calculation (optional)
43
- preopt: Whether to pre-optimize the structure before IRC (default: False)
44
- max_irc_steps: Maximum number of IRC steps to take (default: 10)
45
- step_size: Step size for IRC in Angstroms (default: 0.05, range: 0.001-0.1)
46
- starting_ts: UUID of a previous transition state calculation (optional)
47
- folder_uuid: Optional folder UUID for organization
48
- blocking: Whether to wait for completion (default: True)
49
- ping_interval: Check status interval in seconds (default: 5)
50
-
51
- Returns:
52
- IRC pathway results
53
- """
54
- # Parameter validation
55
- valid_modes = ["rapid", "careful", "meticulous"]
56
- mode_lower = mode.lower()
57
- if mode_lower not in valid_modes:
58
- return f"Error: Invalid mode '{mode}'. Valid options: {', '.join(valid_modes)}"
59
-
60
- # Validate step size (0.001 <= step_size <= 0.1)
61
- if step_size < 0.001 or step_size > 0.1:
62
- return f"Error: step_size must be between 0.001 and 0.1 Å (got {step_size})"
63
-
64
- if max_irc_steps <= 0:
65
- return f"Error: max_irc_steps must be positive (got {max_irc_steps})"
66
-
67
- try:
68
- # Build basic parameters for rowan.compute
69
- compute_params = {
70
- "name": name,
71
- "molecule": molecule,
72
- "workflow_type": "irc",
73
- "mode": mode_lower,
74
- "preopt": preopt,
75
- "max_irc_steps": max_irc_steps,
76
- "step_size": step_size,
77
- "folder_uuid": folder_uuid,
78
- "blocking": blocking,
79
- "ping_interval": ping_interval
80
- }
81
-
82
- # Add optional parameters
83
- if solvent:
84
- compute_params["solvent"] = solvent
85
-
86
- if starting_ts:
87
- compute_params["starting_ts"] = starting_ts
88
-
89
- # Submit IRC calculation
90
- result = rowan.compute(**compute_params)
91
-
92
- # Format results
93
- uuid = result.get('uuid', 'N/A')
94
- status = result.get('status', 'unknown')
95
-
96
- if blocking:
97
- if status == "success":
98
- return f"IRC calculation '{name}' completed successfully!\nUUID: {uuid}"
99
- else:
100
- return f"IRC calculation failed\nUUID: {uuid}\nStatus: {status}"
101
- else:
102
- return f"IRC calculation '{name}' submitted!\nUUID: {uuid}\nStatus: Running..."
103
-
104
- except Exception as e:
105
- logger.error(f"Error in rowan_irc: {str(e)}")
106
- return f"IRC calculation failed: {str(e)}"
107
-
108
- def test_rowan_irc():
109
- """Test the rowan_irc function."""
110
- try:
111
- result = rowan_irc(
112
- name="test_irc",
113
- molecule="C=C",
114
- mode="rapid",
115
- max_irc_steps=5,
116
- blocking=False
117
- )
118
- print(f"IRC test result: {result}")
119
- return True
120
- except Exception as e:
121
- print(f"IRC test failed: {e}")
122
- return False
123
-
124
- if __name__ == "__main__":
125
- test_rowan_irc()
@@ -1,120 +0,0 @@
1
- """MacropKa workflow function for MCP server."""
2
-
3
- import os
4
- import logging
5
- from typing import Optional, Union, List
6
-
7
- import rowan
8
-
9
- # Configure logging
10
- logger = logging.getLogger(__name__)
11
-
12
- # Get API key from environment
13
- api_key = os.environ.get("ROWAN_API_KEY")
14
- if api_key:
15
- rowan.api_key = api_key
16
- else:
17
- logger.warning("ROWAN_API_KEY not found in environment")
18
-
19
-
20
- def log_rowan_api_call(func_name: str, **kwargs):
21
- """Log Rowan API calls for debugging."""
22
- logger.debug(f"Calling {func_name} with args: {kwargs}")
23
-
24
-
25
- def rowan_macropka(
26
- name: str,
27
- molecule: str,
28
- min_pH: float = 0.0,
29
- max_pH: float = 14.0,
30
- max_charge: int = 2,
31
- min_charge: int = -2,
32
- compute_aqueous_solubility: bool = False,
33
- compute_solvation_energy: bool = True,
34
- folder_uuid: Optional[str] = None,
35
- blocking: bool = True,
36
- ping_interval: int = 5
37
- ) -> str:
38
- """
39
- Calculate macroscopic pKa values and related properties for a molecule.
40
-
41
- This workflow computes pKa values, microstates, isoelectric point, and optionally
42
- solvation energy and aqueous solubility across different pH values.
43
-
44
- Args:
45
- name: Name for the calculation
46
- molecule: SMILES string of the molecule
47
- min_pH: Minimum pH for calculations (default: 0.0)
48
- max_pH: Maximum pH for calculations (default: 14.0)
49
- max_charge: Maximum charge to consider for microstates (default: 2)
50
- min_charge: Minimum charge to consider for microstates (default: -2)
51
- compute_aqueous_solubility: Whether to compute aqueous solubility by pH (default: False)
52
- compute_solvation_energy: Whether to compute solvation energy for Kpuu (default: True)
53
- folder_uuid: UUID of folder to save results in
54
- blocking: Wait for calculation to complete (default: True)
55
- ping_interval: How often to check status in blocking mode (default: 5 seconds)
56
-
57
- Returns:
58
- String with workflow UUID or results depending on blocking mode
59
- """
60
- try:
61
- # Validate pH range
62
- if min_pH >= max_pH:
63
- return "Error: min_pH must be less than max_pH"
64
-
65
- # Validate charge range
66
- if min_charge >= max_charge:
67
- return "Error: min_charge must be less than max_charge"
68
-
69
- # Log the API call
70
- log_rowan_api_call(
71
- "rowan.compute",
72
- workflow_type="macropka",
73
- name=name,
74
- molecule=molecule,
75
- min_pH=min_pH,
76
- max_pH=max_pH,
77
- max_charge=max_charge,
78
- min_charge=min_charge,
79
- compute_aqueous_solubility=compute_aqueous_solubility,
80
- compute_solvation_energy=compute_solvation_energy,
81
- folder_uuid=folder_uuid,
82
- blocking=blocking,
83
- ping_interval=ping_interval
84
- )
85
-
86
- # Submit calculation
87
- result = rowan.compute(
88
- workflow_type="macropka",
89
- name=name,
90
- molecule=molecule, # Required by rowan.compute() API
91
- folder_uuid=folder_uuid,
92
- blocking=blocking,
93
- ping_interval=ping_interval,
94
- # Workflow-specific parameters for MacropKaWorkflow
95
- initial_smiles=molecule, # Required by MacropKaWorkflow Pydantic model
96
- min_pH=min_pH,
97
- max_pH=max_pH,
98
- max_charge=max_charge,
99
- min_charge=min_charge,
100
- compute_aqueous_solubility=compute_aqueous_solubility,
101
- compute_solvation_energy=compute_solvation_energy
102
- )
103
-
104
- return result
105
-
106
- except Exception as e:
107
- logger.error(f"Error in rowan_macropka: {str(e)}")
108
- return f"MacropKa calculation failed: {str(e)}"
109
-
110
-
111
- # Test function
112
- if __name__ == "__main__":
113
- # Test with ethanol
114
- result = rowan_macropka(
115
- name="Ethanol MacropKa Test",
116
- molecule="CCO",
117
- compute_aqueous_solubility=True,
118
- blocking=True
119
- )
120
- print(result)
@@ -1,423 +0,0 @@
1
- """
2
- Dynamic molecular formula to SMILES converter for coordination complexes.
3
- Uses xyz2mol_tm for transition metal complexes and RDKit for standard molecules.
4
- """
5
-
6
- import re
7
- import logging
8
- from typing import Optional, Dict, List, Tuple
9
- from rdkit import Chem
10
- from rdkit.Chem import rdMolDescriptors
11
-
12
- logger = logging.getLogger(__name__)
13
-
14
- class MolecularConverter:
15
- """Converts various molecular input formats to SMILES strings."""
16
-
17
- def __init__(self):
18
- """Initialize the molecular converter."""
19
- self.transition_metals = {
20
- 'Sc', 'Ti', 'V', 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn',
21
- 'Y', 'Zr', 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd',
22
- 'La', 'Lu', 'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg'
23
- }
24
-
25
- def convert_to_smiles(self, molecule_input: str) -> str:
26
- """
27
- Convert various molecular input formats to SMILES.
28
-
29
- Args:
30
- molecule_input: Input molecular representation
31
-
32
- Returns:
33
- SMILES string representation
34
- """
35
- # Clean input
36
- molecule_input = molecule_input.strip()
37
-
38
- # Normalize Unicode subscripts and superscripts
39
- molecule_input = self._normalize_unicode_formula(molecule_input)
40
-
41
- # Check if already valid SMILES
42
- if self._is_valid_smiles(molecule_input):
43
- return molecule_input
44
-
45
- # Check if XYZ coordinates
46
- if self._is_xyz_format(molecule_input):
47
- return self._convert_xyz_to_smiles(molecule_input)
48
-
49
- # Check if coordination complex formula
50
- if self._is_coordination_complex(molecule_input):
51
- return self._convert_coordination_complex_to_smiles(molecule_input)
52
-
53
- # Check if simple molecular formula
54
- if self._is_molecular_formula(molecule_input):
55
- return self._convert_molecular_formula_to_smiles(molecule_input)
56
-
57
- # Default: assume it's already SMILES or unsupported
58
- return molecule_input
59
-
60
- def _normalize_unicode_formula(self, formula: str) -> str:
61
- """Convert Unicode subscripts and superscripts to regular ASCII."""
62
- # Unicode subscript mappings
63
- subscript_map = {
64
- '₀': '0', '₁': '1', '₂': '2', '₃': '3', '₄': '4',
65
- '₅': '5', '₆': '6', '₇': '7', '₈': '8', '₉': '9'
66
- }
67
-
68
- # Unicode superscript mappings
69
- superscript_map = {
70
- '⁰': '0', '¹': '1', '²': '2', '³': '3', '⁴': '4',
71
- '⁵': '5', '⁶': '6', '⁷': '7', '⁸': '8', '⁹': '9',
72
- '⁺': '+', '⁻': '-'
73
- }
74
-
75
- # Replace subscripts
76
- for unicode_char, ascii_char in subscript_map.items():
77
- formula = formula.replace(unicode_char, ascii_char)
78
-
79
- # Replace superscripts
80
- for unicode_char, ascii_char in superscript_map.items():
81
- formula = formula.replace(unicode_char, ascii_char)
82
-
83
- logger.info(f" Unicode normalized: '{formula}'")
84
- return formula
85
-
86
- def _is_valid_smiles(self, smiles: str) -> bool:
87
- """Check if string is a valid SMILES."""
88
- try:
89
- # First check for obviously malformed coordination complex patterns
90
- if self._is_malformed_coordination_smiles(smiles):
91
- return False
92
-
93
- mol = Chem.MolFromSmiles(smiles)
94
- return mol is not None
95
- except:
96
- return False
97
-
98
- def _is_malformed_coordination_smiles(self, smiles: str) -> bool:
99
- """Check for malformed coordination complex SMILES patterns."""
100
- # Pattern like [Mn+4]([Cl-])([Cl-])... - clearly malformed coordination complex
101
- if re.search(r'\[[A-Z][a-z]?\+\d+\]\(\[.*?\]\)', smiles):
102
- return True
103
-
104
- # Pattern with multiple parenthetical ligands - likely malformed
105
- if smiles.count('([') > 2: # More than 2 parenthetical groups suggests malformed coordination
106
- return True
107
-
108
- # Check for unrealistic oxidation states in brackets
109
- oxidation_match = re.search(r'\[([A-Z][a-z]?)\+(\d+)\]', smiles)
110
- if oxidation_match:
111
- metal, ox_state = oxidation_match.groups()
112
- ox_state = int(ox_state)
113
- # Flag unrealistic oxidation states
114
- if ox_state > 8 or (metal in ['Mn', 'Fe', 'Co', 'Ni', 'Cu'] and ox_state > 7):
115
- return True
116
-
117
- return False
118
-
119
- def _is_xyz_format(self, text: str) -> bool:
120
- """Check if input is XYZ coordinate format."""
121
- lines = text.strip().split('\n')
122
- if len(lines) < 2:
123
- return False
124
-
125
- # Check if lines contain element symbols + 3 coordinates
126
- for line in lines:
127
- parts = line.strip().split()
128
- if len(parts) >= 4:
129
- # First part should be element symbol
130
- element = parts[0]
131
- if not element.isalpha() or len(element) > 2:
132
- return False
133
- # Next 3 should be numbers
134
- try:
135
- [float(x) for x in parts[1:4]]
136
- except ValueError:
137
- return False
138
- else:
139
- return False
140
- return True
141
-
142
- def _is_coordination_complex(self, formula: str) -> bool:
143
- """Check if formula represents a coordination complex."""
144
- # Look for patterns like [MnCl6]4+, Mn(Cl)6, etc.
145
- patterns = [
146
- r'\[.*\]\d*[+-]', # [MnCl6]4+
147
- r'\w+\([A-Z][a-z]?\)\d+', # Mn(Cl)6
148
- ]
149
-
150
- for pattern in patterns:
151
- if re.search(pattern, formula):
152
- return True
153
-
154
- # Check for transition metals with other elements (but not simple organics)
155
- for tm in self.transition_metals:
156
- if tm in formula:
157
- # Make sure it's not just the transition metal alone
158
- if formula != tm:
159
- # Check if it has other elements suggesting coordination
160
- if any(element in formula for element in ['Cl', 'Br', 'I', 'F', 'N', 'O', 'S', 'P']):
161
- return True
162
-
163
- return False
164
-
165
- def _is_molecular_formula(self, formula: str) -> bool:
166
- """Check if input is a simple molecular formula."""
167
- # Pattern for molecular formulas like H2O, CH4, etc.
168
- pattern = r'^[A-Z][a-z]?(\d+)?([A-Z][a-z]?(\d+)?)*$'
169
- return bool(re.match(pattern, formula))
170
-
171
- def _convert_xyz_to_smiles(self, xyz_text: str) -> str:
172
- """
173
- Convert XYZ coordinates to SMILES.
174
- For coordination complexes, attempts to use xyz2mol_tm logic.
175
- """
176
- try:
177
- lines = xyz_text.strip().split('\n')
178
- atoms = []
179
- coords = []
180
-
181
- for line in lines:
182
- parts = line.strip().split()
183
- if len(parts) >= 4:
184
- element = parts[0]
185
- x, y, z = map(float, parts[1:4])
186
- atoms.append(element)
187
- coords.append([x, y, z])
188
-
189
- # Check if contains transition metals
190
- has_tm = any(atom in self.transition_metals for atom in atoms)
191
-
192
- if has_tm:
193
- return self._handle_transition_metal_xyz(atoms, coords)
194
- else:
195
- # For organic molecules, try basic conversion
196
- return self._handle_organic_xyz(atoms, coords)
197
-
198
- except Exception as e:
199
- logger.error(f"Failed to convert XYZ to SMILES: {e}")
200
- return f"UNSUPPORTED_XYZ: {xyz_text[:50]}..."
201
-
202
- def _handle_transition_metal_xyz(self, atoms: List[str], coords: List[List[float]]) -> str:
203
- """Handle XYZ conversion for transition metal complexes."""
204
- # Common coordination complex patterns
205
- atom_counts = {atom: atoms.count(atom) for atom in set(atoms)}
206
-
207
- # MnCl6 pattern
208
- if 'Mn' in atom_counts and 'Cl' in atom_counts and atom_counts.get('Cl', 0) == 6:
209
- return "[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Mn+2]"
210
-
211
- # FeCl6 pattern
212
- elif 'Fe' in atom_counts and 'Cl' in atom_counts and atom_counts.get('Cl', 0) == 6:
213
- return "[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Fe+3]"
214
-
215
- # CoCl6 pattern
216
- elif 'Co' in atom_counts and 'Cl' in atom_counts and atom_counts.get('Cl', 0) == 6:
217
- return "[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Co+3]"
218
-
219
- # Single metal atom
220
- elif len(atom_counts) == 1 and list(atom_counts.keys())[0] in self.transition_metals:
221
- metal = list(atom_counts.keys())[0]
222
- return f"[{metal}]"
223
-
224
- # Generic fallback
225
- else:
226
- return f"COMPLEX_TM: {'-'.join(sorted(atom_counts.keys()))}"
227
-
228
- def _handle_organic_xyz(self, atoms: List[str], coords: List[List[float]]) -> str:
229
- """Handle XYZ conversion for organic molecules."""
230
- # Simple cases
231
- atom_counts = {atom: atoms.count(atom) for atom in set(atoms)}
232
-
233
- if atom_counts == {'C': 1, 'H': 4}:
234
- return "C" # Methane
235
- elif atom_counts == {'H': 2, 'O': 1}:
236
- return "O" # Water
237
- elif atom_counts == {'C': 2, 'H': 6, 'O': 1}:
238
- return "CCO" # Ethanol
239
- else:
240
- return f"ORGANIC: {'-'.join(sorted(atom_counts.keys()))}"
241
-
242
- def _convert_coordination_complex_to_smiles(self, formula: str) -> str:
243
- """Convert coordination complex formulas to SMILES."""
244
- # Parse common coordination complex patterns
245
-
246
- # Handle malformed SMILES like [Mn+4]([Cl-])([Cl-])([Cl-])([Cl-])([Cl-])[Cl-]
247
- malformed_pattern = r'\[([A-Z][a-z]?)\+(\d+)\]'
248
- if re.match(malformed_pattern, formula):
249
- metal_match = re.match(malformed_pattern, formula)
250
- metal, ox_state = metal_match.groups()
251
- ox_state = int(ox_state)
252
-
253
- # Count all chloride ligands in the formula
254
- ligand_count = formula.count('[Cl-]')
255
-
256
- # If we found chloride ligands, convert to proper format
257
- if ligand_count > 0:
258
- # Adjust oxidation state for realistic chemistry
259
- if metal == 'Mn' and ox_state == 4 and ligand_count == 6:
260
- ox_state = 2 # MnCl6 4- is more realistic than Mn4+ with 6 Cl-
261
-
262
- return f"{'[Cl-].' * ligand_count}[{metal}+{ox_state}]".rstrip('.')
263
-
264
- # [MnCl6]4+ pattern
265
- match = re.match(r'\[([A-Z][a-z]?)([A-Z][a-z]?)(\d+)\](\d*)([+-])', formula)
266
- if match:
267
- metal, ligand, ligand_count, charge_num, charge_sign = match.groups()
268
- ligand_count = int(ligand_count)
269
-
270
- if metal in self.transition_metals and ligand == 'Cl':
271
- if charge_sign == '+':
272
- # For positive complex charge, assume higher oxidation state
273
- ox_state = 6 if charge_num == '4' else 3
274
- return f"{'[Cl-].' * ligand_count}[{metal}+{ox_state}]".rstrip('.')
275
- else:
276
- # For negative complex charge, use standard oxidation states
277
- ox_state = 2 if metal == 'Mn' else 3
278
- return f"{'[Cl-].' * ligand_count}[{metal}+{ox_state}]".rstrip('.')
279
-
280
- # Mn(Cl)6+4 pattern (with charge)
281
- match = re.match(r'([A-Z][a-z]?)\(([A-Z][a-z]?)\)(\d+)([+-])(\d+)', formula)
282
- if match:
283
- metal, ligand, ligand_count, charge_sign, charge_value = match.groups()
284
- ligand_count = int(ligand_count)
285
- charge_value = int(charge_value)
286
-
287
- if metal in self.transition_metals and ligand == 'Cl':
288
- # Calculate realistic oxidation state based on charge and ligands
289
- # For MnCl6 with +4 charge: Mn oxidation state should be higher
290
- if charge_sign == '+':
291
- ox_state = charge_value + 2 if metal == 'Mn' else charge_value + 1
292
- else:
293
- ox_state = abs(charge_value) - ligand_count
294
-
295
- # Cap oxidation state at reasonable values
296
- ox_state = min(ox_state, 7)
297
- ox_state = max(ox_state, 1)
298
-
299
- return f"{'[Cl-].' * ligand_count}[{metal}+{ox_state}]".rstrip('.')
300
-
301
- # Mn(Cl)6 pattern (without charge)
302
- match = re.match(r'([A-Z][a-z]?)\(([A-Z][a-z]?)\)(\d+)', formula)
303
- if match:
304
- metal, ligand, ligand_count = match.groups()
305
- ligand_count = int(ligand_count)
306
-
307
- if metal in self.transition_metals and ligand == 'Cl':
308
- ox_state = 2 if metal == 'Mn' else 3
309
- return f"{'[Cl-].' * ligand_count}[{metal}+{ox_state}]".rstrip('.')
310
-
311
- # CoCl6³⁻ pattern (with charge at end) - MUST come before simple MnCl6 pattern
312
- match = re.match(r'([A-Z][a-z]?)([A-Z][a-z]?)(\d+)(\d+)([+-])', formula)
313
- if match:
314
- metal, ligand, ligand_count, charge_value, charge_sign = match.groups()
315
- ligand_count = int(ligand_count)
316
- charge_value = int(charge_value)
317
-
318
- if metal in self.transition_metals and ligand == 'Cl':
319
- # For negatively charged complexes, use standard oxidation states
320
- if charge_sign == '-':
321
- ox_state = 3 if metal == 'Co' else 2
322
- else:
323
- ox_state = charge_value + 2
324
-
325
- # Cap oxidation state at reasonable values
326
- ox_state = min(ox_state, 7)
327
- ox_state = max(ox_state, 1)
328
-
329
- return f"{'[Cl-].' * ligand_count}[{metal}+{ox_state}]".rstrip('.')
330
-
331
- # Simple MnCl6 pattern (without charge)
332
- match = re.match(r'([A-Z][a-z]?)([A-Z][a-z]?)(\d+)$', formula) # Added $ to ensure end of string
333
- if match:
334
- metal, ligand, ligand_count = match.groups()
335
- ligand_count = int(ligand_count)
336
-
337
- if metal in self.transition_metals and ligand == 'Cl':
338
- ox_state = 2 if metal == 'Mn' else 3
339
- return f"{'[Cl-].' * ligand_count}[{metal}+{ox_state}]".rstrip('.')
340
-
341
- # Single metal
342
- if formula in self.transition_metals:
343
- return f"[{formula}]"
344
-
345
- return f"UNSUPPORTED_COMPLEX: {formula}"
346
-
347
- def _convert_molecular_formula_to_smiles(self, formula: str) -> str:
348
- """Convert simple molecular formulas to SMILES."""
349
- # Common molecular formulas
350
- conversions = {
351
- 'H2O': 'O',
352
- 'CH4': 'C',
353
- 'C2H6': 'CC',
354
- 'C2H5OH': 'CCO',
355
- 'C6H6': 'c1ccccc1',
356
- 'NH3': 'N',
357
- 'CO2': 'O=C=O',
358
- 'CO': '[C-]#[O+]'
359
- }
360
-
361
- # Handle single atoms (including transition metals)
362
- if formula in self.transition_metals:
363
- return f"[{formula}]"
364
-
365
- # Handle other single elements
366
- single_elements = ['H', 'C', 'N', 'O', 'F', 'P', 'S', 'Cl', 'Br', 'I']
367
- if formula in single_elements:
368
- return formula
369
-
370
- return conversions.get(formula, f"UNKNOWN_FORMULA: {formula}")
371
-
372
- # Global converter instance
373
- _converter = MolecularConverter()
374
-
375
- def convert_to_smiles(molecule_input: str) -> str:
376
- """
377
- Convert various molecular input formats to SMILES.
378
-
379
- Args:
380
- molecule_input: Input molecular representation
381
-
382
- Returns:
383
- SMILES string representation
384
- """
385
- return _converter.convert_to_smiles(molecule_input)
386
-
387
- def test_molecular_converter():
388
- """Test the molecular converter with various inputs."""
389
- test_cases = [
390
- # Already valid SMILES
391
- ("[Cl-].[Mn+2]", "[Cl-].[Mn+2]"),
392
- ("CCO", "CCO"),
393
-
394
- # Coordination complexes
395
- ("[MnCl6]4+", "[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Mn+6]"),
396
- ("[MnCl6]4-", "[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Mn+2]"),
397
- ("Mn(Cl)6", "[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Mn+2]"),
398
- ("MnCl6", "[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Mn+2]"),
399
-
400
- # Malformed SMILES that need fixing
401
- ("[Mn+4]([Cl-])([Cl-])([Cl-])([Cl-])([Cl-])[Cl-]", "[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Mn+2]"),
402
- ("[Fe+3]([Cl-])([Cl-])([Cl-])([Cl-])([Cl-])([Cl-])", "[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Fe+3]"),
403
-
404
- # Simple formulas
405
- ("H2O", "O"),
406
- ("CH4", "C"),
407
- ("Mn", "[Mn]"),
408
-
409
- # XYZ format
410
- ("Mn 0.0 0.0 0.0\nCl 2.3 0.0 0.0\nCl -2.3 0.0 0.0\nCl 0.0 2.3 0.0\nCl 0.0 -2.3 0.0\nCl 0.0 0.0 2.3\nCl 0.0 0.0 -2.3",
411
- "[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Mn+2]")
412
- ]
413
-
414
- print("Testing molecular converter:")
415
- for input_mol, expected in test_cases:
416
- result = convert_to_smiles(input_mol)
417
- status = "" if result == expected else ""
418
- print(f"{status} '{input_mol[:30]}...' → '{result}'")
419
- if result != expected:
420
- print(f" Expected: '{expected}'")
421
-
422
- if __name__ == "__main__":
423
- test_molecular_converter()