rowan-mcp 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rowan-mcp might be problematic. Click here for more details.

Files changed (35) hide show
  1. rowan_mcp/__init__.py +14 -0
  2. rowan_mcp/__main__.py +14 -0
  3. rowan_mcp/functions/admet.py +94 -0
  4. rowan_mcp/functions/bde.py +113 -0
  5. rowan_mcp/functions/calculation_retrieve.py +89 -0
  6. rowan_mcp/functions/conformers.py +135 -0
  7. rowan_mcp/functions/descriptors.py +92 -0
  8. rowan_mcp/functions/docking.py +340 -0
  9. rowan_mcp/functions/docking_enhanced.py +174 -0
  10. rowan_mcp/functions/electronic_properties.py +263 -0
  11. rowan_mcp/functions/folder_management.py +137 -0
  12. rowan_mcp/functions/fukui.py +355 -0
  13. rowan_mcp/functions/hydrogen_bond_basicity.py +94 -0
  14. rowan_mcp/functions/irc.py +125 -0
  15. rowan_mcp/functions/macropka.py +195 -0
  16. rowan_mcp/functions/molecular_converter.py +423 -0
  17. rowan_mcp/functions/molecular_dynamics.py +191 -0
  18. rowan_mcp/functions/molecule_cache.db +0 -0
  19. rowan_mcp/functions/molecule_lookup.py +446 -0
  20. rowan_mcp/functions/multistage_opt.py +171 -0
  21. rowan_mcp/functions/pdb_handler.py +200 -0
  22. rowan_mcp/functions/pka.py +137 -0
  23. rowan_mcp/functions/redox_potential.py +352 -0
  24. rowan_mcp/functions/scan.py +536 -0
  25. rowan_mcp/functions/scan_analyzer.py +347 -0
  26. rowan_mcp/functions/solubility.py +277 -0
  27. rowan_mcp/functions/spin_states.py +747 -0
  28. rowan_mcp/functions/system_management.py +368 -0
  29. rowan_mcp/functions/tautomers.py +91 -0
  30. rowan_mcp/functions/workflow_management.py +422 -0
  31. rowan_mcp/server.py +169 -0
  32. rowan_mcp-0.1.0.dist-info/METADATA +216 -0
  33. rowan_mcp-0.1.0.dist-info/RECORD +35 -0
  34. rowan_mcp-0.1.0.dist-info/WHEEL +4 -0
  35. rowan_mcp-0.1.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,195 @@
1
+ """MacropKa workflow function for MCP server."""
2
+
3
+ import os
4
+ import json
5
+ import logging
6
+ from typing import Optional, Union, List
7
+
8
+ import rowan
9
+
10
+ # Configure logging
11
+ logger = logging.getLogger(__name__)
12
+
13
+ # Get API key from environment
14
+ api_key = os.environ.get("ROWAN_API_KEY")
15
+ if api_key:
16
+ rowan.api_key = api_key
17
+ else:
18
+ logger.warning("ROWAN_API_KEY not found in environment")
19
+
20
+
21
+ def log_rowan_api_call(func_name: str, **kwargs):
22
+ """Log Rowan API calls for debugging."""
23
+ logger.debug(f"Calling {func_name} with args: {kwargs}")
24
+
25
+
26
+ def rowan_macropka(
27
+ name: str,
28
+ molecule: str,
29
+ min_pH: float = 0.0,
30
+ max_pH: float = 14.0,
31
+ max_charge: int = 2,
32
+ min_charge: int = -2,
33
+ compute_aqueous_solubility: bool = False,
34
+ compute_solvation_energy: bool = True,
35
+ folder_uuid: Optional[str] = None,
36
+ blocking: bool = True,
37
+ ping_interval: int = 5
38
+ ) -> str:
39
+ """
40
+ Calculate macroscopic pKa values and related properties for a molecule.
41
+
42
+ This workflow computes pKa values, microstates, isoelectric point, and optionally
43
+ solvation energy and aqueous solubility across different pH values.
44
+
45
+ Args:
46
+ name: Name for the calculation
47
+ molecule: SMILES string of the molecule
48
+ min_pH: Minimum pH for calculations (default: 0.0)
49
+ max_pH: Maximum pH for calculations (default: 14.0)
50
+ max_charge: Maximum charge to consider for microstates (default: 2)
51
+ min_charge: Minimum charge to consider for microstates (default: -2)
52
+ compute_aqueous_solubility: Whether to compute aqueous solubility by pH (default: False)
53
+ compute_solvation_energy: Whether to compute solvation energy for Kpuu (default: True)
54
+ folder_uuid: UUID of folder to save results in
55
+ blocking: Wait for calculation to complete (default: True)
56
+ ping_interval: How often to check status in blocking mode (default: 5 seconds)
57
+
58
+ Returns:
59
+ String with workflow UUID or results depending on blocking mode
60
+ """
61
+ try:
62
+ # Validate pH range
63
+ if min_pH >= max_pH:
64
+ return json.dumps({"error": "min_pH must be less than max_pH"})
65
+
66
+ # Validate charge range
67
+ if min_charge >= max_charge:
68
+ return json.dumps({"error": "min_charge must be less than max_charge"})
69
+
70
+ # Log the API call
71
+ log_rowan_api_call(
72
+ "rowan.compute",
73
+ workflow_type="macropka",
74
+ name=name,
75
+ molecule=molecule,
76
+ min_pH=min_pH,
77
+ max_pH=max_pH,
78
+ max_charge=max_charge,
79
+ min_charge=min_charge,
80
+ compute_aqueous_solubility=compute_aqueous_solubility,
81
+ compute_solvation_energy=compute_solvation_energy,
82
+ folder_uuid=folder_uuid,
83
+ blocking=blocking,
84
+ ping_interval=ping_interval
85
+ )
86
+
87
+ # Submit calculation
88
+ result = rowan.compute(
89
+ workflow_type="macropka",
90
+ name=name,
91
+ molecule=molecule, # Required by rowan.compute() API
92
+ folder_uuid=folder_uuid,
93
+ blocking=blocking,
94
+ ping_interval=ping_interval,
95
+ # Workflow-specific parameters for MacropKaWorkflow
96
+ initial_smiles=molecule, # Required by MacropKaWorkflow Pydantic model
97
+ min_pH=min_pH,
98
+ max_pH=max_pH,
99
+ max_charge=max_charge,
100
+ min_charge=min_charge,
101
+ compute_aqueous_solubility=compute_aqueous_solubility,
102
+ compute_solvation_energy=compute_solvation_energy
103
+ )
104
+
105
+ if blocking:
106
+ # Format completed results
107
+ status = result.get("status", "unknown")
108
+ uuid = result.get("uuid", "unknown")
109
+
110
+ if status == "success":
111
+ object_data = result.get("object_data", {})
112
+
113
+ # Extract key results
114
+ microstates = object_data.get("microstates", [])
115
+ pka_values = object_data.get("pKa_values", [])
116
+ isoelectric_point = object_data.get("isoelectric_point")
117
+ solvation_energy = object_data.get("solvation_energy")
118
+ kpuu_probability = object_data.get("kpuu_probability")
119
+ microstate_weights_by_pH = object_data.get("microstate_weights_by_pH", [])
120
+ logD_by_pH = object_data.get("logD_by_pH", [])
121
+ aqueous_solubility_by_pH = object_data.get("aqueous_solubility_by_pH", [])
122
+
123
+ formatted = f"✅ MacropKa calculation completed successfully!\n"
124
+ formatted += f"🔖 Workflow UUID: {uuid}\n"
125
+ formatted += f"📋 Status: {status}\n\n"
126
+
127
+ # Format pKa values
128
+ if pka_values:
129
+ formatted += "📊 pKa Values:\n"
130
+ for pka in pka_values:
131
+ formatted += f" • {pka.get('initial_charge', 'N/A')} → {pka.get('final_charge', 'N/A')}: pKa = {pka.get('pKa', 'N/A')}\n"
132
+ formatted += "\n"
133
+
134
+ # Format microstates
135
+ if microstates:
136
+ formatted += f"🔬 Microstates ({len(microstates)} found):\n"
137
+ for i, microstate in enumerate(microstates[:5]): # Show first 5
138
+ formatted += f" {i+1}. Charge: {microstate.get('charge', 'N/A')}, Energy: {microstate.get('energy', 'N/A')} kcal/mol\n"
139
+ if len(microstates) > 5:
140
+ formatted += f" ... and {len(microstates) - 5} more\n"
141
+ formatted += "\n"
142
+
143
+ # Add other properties
144
+ if isoelectric_point is not None:
145
+ formatted += f"⚡ Isoelectric Point: pH {isoelectric_point}\n"
146
+
147
+ if solvation_energy is not None:
148
+ formatted += f"💧 Solvation Energy: {solvation_energy} kcal/mol\n"
149
+
150
+ if kpuu_probability is not None:
151
+ formatted += f"🧠 Kpuu Probability (≥0.3): {kpuu_probability:.2%}\n"
152
+
153
+ # Show pH-dependent properties if available
154
+ if logD_by_pH:
155
+ formatted += f"\n📈 logD values available for {len(logD_by_pH)} pH points\n"
156
+
157
+ if aqueous_solubility_by_pH:
158
+ formatted += f"💧 Aqueous solubility values available for {len(aqueous_solubility_by_pH)} pH points\n"
159
+
160
+ if microstate_weights_by_pH:
161
+ formatted += f"⚖️ Microstate weights available for {len(microstate_weights_by_pH)} pH points\n"
162
+
163
+ return formatted
164
+ else:
165
+ # Handle failed calculation
166
+ return f"❌ MacropKa calculation failed\n🔖 UUID: {uuid}\n📋 Status: {status}\n💬 Check workflow details for more information"
167
+ else:
168
+ # Non-blocking mode - return submission confirmation
169
+ uuid = result.get("uuid", "unknown")
170
+ formatted = f"📋 MacropKa calculation submitted!\n"
171
+ formatted += f"🔖 Workflow UUID: {uuid}\n"
172
+ formatted += f"⏳ Status: Running...\n"
173
+ formatted += f"💡 Use rowan_workflow_management to check status\n"
174
+ formatted += f"\nCalculation parameters:\n"
175
+ formatted += f" • pH range: {min_pH} - {max_pH}\n"
176
+ formatted += f" • Charge range: {min_charge} to {max_charge}\n"
177
+ formatted += f" • Compute solvation energy: {compute_solvation_energy}\n"
178
+ formatted += f" • Compute aqueous solubility: {compute_aqueous_solubility}\n"
179
+ return formatted
180
+
181
+ except Exception as e:
182
+ logger.error(f"Error in rowan_macropka: {str(e)}")
183
+ return json.dumps({"error": str(e)})
184
+
185
+
186
+ # Test function
187
+ if __name__ == "__main__":
188
+ # Test with ethanol
189
+ result = rowan_macropka(
190
+ name="Ethanol MacropKa Test",
191
+ molecule="CCO",
192
+ compute_aqueous_solubility=True,
193
+ blocking=True
194
+ )
195
+ print(result)
@@ -0,0 +1,423 @@
1
+ """
2
+ Dynamic molecular formula to SMILES converter for coordination complexes.
3
+ Uses xyz2mol_tm for transition metal complexes and RDKit for standard molecules.
4
+ """
5
+
6
+ import re
7
+ import logging
8
+ from typing import Optional, Dict, List, Tuple
9
+ from rdkit import Chem
10
+ from rdkit.Chem import rdMolDescriptors
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ class MolecularConverter:
15
+ """Converts various molecular input formats to SMILES strings."""
16
+
17
+ def __init__(self):
18
+ """Initialize the molecular converter."""
19
+ self.transition_metals = {
20
+ 'Sc', 'Ti', 'V', 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn',
21
+ 'Y', 'Zr', 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd',
22
+ 'La', 'Lu', 'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg'
23
+ }
24
+
25
+ def convert_to_smiles(self, molecule_input: str) -> str:
26
+ """
27
+ Convert various molecular input formats to SMILES.
28
+
29
+ Args:
30
+ molecule_input: Input molecular representation
31
+
32
+ Returns:
33
+ SMILES string representation
34
+ """
35
+ # Clean input
36
+ molecule_input = molecule_input.strip()
37
+
38
+ # Normalize Unicode subscripts and superscripts
39
+ molecule_input = self._normalize_unicode_formula(molecule_input)
40
+
41
+ # Check if already valid SMILES
42
+ if self._is_valid_smiles(molecule_input):
43
+ return molecule_input
44
+
45
+ # Check if XYZ coordinates
46
+ if self._is_xyz_format(molecule_input):
47
+ return self._convert_xyz_to_smiles(molecule_input)
48
+
49
+ # Check if coordination complex formula
50
+ if self._is_coordination_complex(molecule_input):
51
+ return self._convert_coordination_complex_to_smiles(molecule_input)
52
+
53
+ # Check if simple molecular formula
54
+ if self._is_molecular_formula(molecule_input):
55
+ return self._convert_molecular_formula_to_smiles(molecule_input)
56
+
57
+ # Default: assume it's already SMILES or unsupported
58
+ return molecule_input
59
+
60
+ def _normalize_unicode_formula(self, formula: str) -> str:
61
+ """Convert Unicode subscripts and superscripts to regular ASCII."""
62
+ # Unicode subscript mappings
63
+ subscript_map = {
64
+ '₀': '0', '₁': '1', '₂': '2', '₃': '3', '₄': '4',
65
+ '₅': '5', '₆': '6', '₇': '7', '₈': '8', '₉': '9'
66
+ }
67
+
68
+ # Unicode superscript mappings
69
+ superscript_map = {
70
+ '⁰': '0', '¹': '1', '²': '2', '³': '3', '⁴': '4',
71
+ '⁵': '5', '⁶': '6', '⁷': '7', '⁸': '8', '⁹': '9',
72
+ '⁺': '+', '⁻': '-'
73
+ }
74
+
75
+ # Replace subscripts
76
+ for unicode_char, ascii_char in subscript_map.items():
77
+ formula = formula.replace(unicode_char, ascii_char)
78
+
79
+ # Replace superscripts
80
+ for unicode_char, ascii_char in superscript_map.items():
81
+ formula = formula.replace(unicode_char, ascii_char)
82
+
83
+ logger.info(f" Unicode normalized: '{formula}'")
84
+ return formula
85
+
86
+ def _is_valid_smiles(self, smiles: str) -> bool:
87
+ """Check if string is a valid SMILES."""
88
+ try:
89
+ # First check for obviously malformed coordination complex patterns
90
+ if self._is_malformed_coordination_smiles(smiles):
91
+ return False
92
+
93
+ mol = Chem.MolFromSmiles(smiles)
94
+ return mol is not None
95
+ except:
96
+ return False
97
+
98
+ def _is_malformed_coordination_smiles(self, smiles: str) -> bool:
99
+ """Check for malformed coordination complex SMILES patterns."""
100
+ # Pattern like [Mn+4]([Cl-])([Cl-])... - clearly malformed coordination complex
101
+ if re.search(r'\[[A-Z][a-z]?\+\d+\]\(\[.*?\]\)', smiles):
102
+ return True
103
+
104
+ # Pattern with multiple parenthetical ligands - likely malformed
105
+ if smiles.count('([') > 2: # More than 2 parenthetical groups suggests malformed coordination
106
+ return True
107
+
108
+ # Check for unrealistic oxidation states in brackets
109
+ oxidation_match = re.search(r'\[([A-Z][a-z]?)\+(\d+)\]', smiles)
110
+ if oxidation_match:
111
+ metal, ox_state = oxidation_match.groups()
112
+ ox_state = int(ox_state)
113
+ # Flag unrealistic oxidation states
114
+ if ox_state > 8 or (metal in ['Mn', 'Fe', 'Co', 'Ni', 'Cu'] and ox_state > 7):
115
+ return True
116
+
117
+ return False
118
+
119
+ def _is_xyz_format(self, text: str) -> bool:
120
+ """Check if input is XYZ coordinate format."""
121
+ lines = text.strip().split('\n')
122
+ if len(lines) < 2:
123
+ return False
124
+
125
+ # Check if lines contain element symbols + 3 coordinates
126
+ for line in lines:
127
+ parts = line.strip().split()
128
+ if len(parts) >= 4:
129
+ # First part should be element symbol
130
+ element = parts[0]
131
+ if not element.isalpha() or len(element) > 2:
132
+ return False
133
+ # Next 3 should be numbers
134
+ try:
135
+ [float(x) for x in parts[1:4]]
136
+ except ValueError:
137
+ return False
138
+ else:
139
+ return False
140
+ return True
141
+
142
+ def _is_coordination_complex(self, formula: str) -> bool:
143
+ """Check if formula represents a coordination complex."""
144
+ # Look for patterns like [MnCl6]4+, Mn(Cl)6, etc.
145
+ patterns = [
146
+ r'\[.*\]\d*[+-]', # [MnCl6]4+
147
+ r'\w+\([A-Z][a-z]?\)\d+', # Mn(Cl)6
148
+ ]
149
+
150
+ for pattern in patterns:
151
+ if re.search(pattern, formula):
152
+ return True
153
+
154
+ # Check for transition metals with other elements (but not simple organics)
155
+ for tm in self.transition_metals:
156
+ if tm in formula:
157
+ # Make sure it's not just the transition metal alone
158
+ if formula != tm:
159
+ # Check if it has other elements suggesting coordination
160
+ if any(element in formula for element in ['Cl', 'Br', 'I', 'F', 'N', 'O', 'S', 'P']):
161
+ return True
162
+
163
+ return False
164
+
165
+ def _is_molecular_formula(self, formula: str) -> bool:
166
+ """Check if input is a simple molecular formula."""
167
+ # Pattern for molecular formulas like H2O, CH4, etc.
168
+ pattern = r'^[A-Z][a-z]?(\d+)?([A-Z][a-z]?(\d+)?)*$'
169
+ return bool(re.match(pattern, formula))
170
+
171
+ def _convert_xyz_to_smiles(self, xyz_text: str) -> str:
172
+ """
173
+ Convert XYZ coordinates to SMILES.
174
+ For coordination complexes, attempts to use xyz2mol_tm logic.
175
+ """
176
+ try:
177
+ lines = xyz_text.strip().split('\n')
178
+ atoms = []
179
+ coords = []
180
+
181
+ for line in lines:
182
+ parts = line.strip().split()
183
+ if len(parts) >= 4:
184
+ element = parts[0]
185
+ x, y, z = map(float, parts[1:4])
186
+ atoms.append(element)
187
+ coords.append([x, y, z])
188
+
189
+ # Check if contains transition metals
190
+ has_tm = any(atom in self.transition_metals for atom in atoms)
191
+
192
+ if has_tm:
193
+ return self._handle_transition_metal_xyz(atoms, coords)
194
+ else:
195
+ # For organic molecules, try basic conversion
196
+ return self._handle_organic_xyz(atoms, coords)
197
+
198
+ except Exception as e:
199
+ logger.error(f"Failed to convert XYZ to SMILES: {e}")
200
+ return f"UNSUPPORTED_XYZ: {xyz_text[:50]}..."
201
+
202
+ def _handle_transition_metal_xyz(self, atoms: List[str], coords: List[List[float]]) -> str:
203
+ """Handle XYZ conversion for transition metal complexes."""
204
+ # Common coordination complex patterns
205
+ atom_counts = {atom: atoms.count(atom) for atom in set(atoms)}
206
+
207
+ # MnCl6 pattern
208
+ if 'Mn' in atom_counts and 'Cl' in atom_counts and atom_counts.get('Cl', 0) == 6:
209
+ return "[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Mn+2]"
210
+
211
+ # FeCl6 pattern
212
+ elif 'Fe' in atom_counts and 'Cl' in atom_counts and atom_counts.get('Cl', 0) == 6:
213
+ return "[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Fe+3]"
214
+
215
+ # CoCl6 pattern
216
+ elif 'Co' in atom_counts and 'Cl' in atom_counts and atom_counts.get('Cl', 0) == 6:
217
+ return "[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Co+3]"
218
+
219
+ # Single metal atom
220
+ elif len(atom_counts) == 1 and list(atom_counts.keys())[0] in self.transition_metals:
221
+ metal = list(atom_counts.keys())[0]
222
+ return f"[{metal}]"
223
+
224
+ # Generic fallback
225
+ else:
226
+ return f"COMPLEX_TM: {'-'.join(sorted(atom_counts.keys()))}"
227
+
228
+ def _handle_organic_xyz(self, atoms: List[str], coords: List[List[float]]) -> str:
229
+ """Handle XYZ conversion for organic molecules."""
230
+ # Simple cases
231
+ atom_counts = {atom: atoms.count(atom) for atom in set(atoms)}
232
+
233
+ if atom_counts == {'C': 1, 'H': 4}:
234
+ return "C" # Methane
235
+ elif atom_counts == {'H': 2, 'O': 1}:
236
+ return "O" # Water
237
+ elif atom_counts == {'C': 2, 'H': 6, 'O': 1}:
238
+ return "CCO" # Ethanol
239
+ else:
240
+ return f"ORGANIC: {'-'.join(sorted(atom_counts.keys()))}"
241
+
242
+ def _convert_coordination_complex_to_smiles(self, formula: str) -> str:
243
+ """Convert coordination complex formulas to SMILES."""
244
+ # Parse common coordination complex patterns
245
+
246
+ # Handle malformed SMILES like [Mn+4]([Cl-])([Cl-])([Cl-])([Cl-])([Cl-])[Cl-]
247
+ malformed_pattern = r'\[([A-Z][a-z]?)\+(\d+)\]'
248
+ if re.match(malformed_pattern, formula):
249
+ metal_match = re.match(malformed_pattern, formula)
250
+ metal, ox_state = metal_match.groups()
251
+ ox_state = int(ox_state)
252
+
253
+ # Count all chloride ligands in the formula
254
+ ligand_count = formula.count('[Cl-]')
255
+
256
+ # If we found chloride ligands, convert to proper format
257
+ if ligand_count > 0:
258
+ # Adjust oxidation state for realistic chemistry
259
+ if metal == 'Mn' and ox_state == 4 and ligand_count == 6:
260
+ ox_state = 2 # MnCl6 4- is more realistic than Mn4+ with 6 Cl-
261
+
262
+ return f"{'[Cl-].' * ligand_count}[{metal}+{ox_state}]".rstrip('.')
263
+
264
+ # [MnCl6]4+ pattern
265
+ match = re.match(r'\[([A-Z][a-z]?)([A-Z][a-z]?)(\d+)\](\d*)([+-])', formula)
266
+ if match:
267
+ metal, ligand, ligand_count, charge_num, charge_sign = match.groups()
268
+ ligand_count = int(ligand_count)
269
+
270
+ if metal in self.transition_metals and ligand == 'Cl':
271
+ if charge_sign == '+':
272
+ # For positive complex charge, assume higher oxidation state
273
+ ox_state = 6 if charge_num == '4' else 3
274
+ return f"{'[Cl-].' * ligand_count}[{metal}+{ox_state}]".rstrip('.')
275
+ else:
276
+ # For negative complex charge, use standard oxidation states
277
+ ox_state = 2 if metal == 'Mn' else 3
278
+ return f"{'[Cl-].' * ligand_count}[{metal}+{ox_state}]".rstrip('.')
279
+
280
+ # Mn(Cl)6+4 pattern (with charge)
281
+ match = re.match(r'([A-Z][a-z]?)\(([A-Z][a-z]?)\)(\d+)([+-])(\d+)', formula)
282
+ if match:
283
+ metal, ligand, ligand_count, charge_sign, charge_value = match.groups()
284
+ ligand_count = int(ligand_count)
285
+ charge_value = int(charge_value)
286
+
287
+ if metal in self.transition_metals and ligand == 'Cl':
288
+ # Calculate realistic oxidation state based on charge and ligands
289
+ # For MnCl6 with +4 charge: Mn oxidation state should be higher
290
+ if charge_sign == '+':
291
+ ox_state = charge_value + 2 if metal == 'Mn' else charge_value + 1
292
+ else:
293
+ ox_state = abs(charge_value) - ligand_count
294
+
295
+ # Cap oxidation state at reasonable values
296
+ ox_state = min(ox_state, 7)
297
+ ox_state = max(ox_state, 1)
298
+
299
+ return f"{'[Cl-].' * ligand_count}[{metal}+{ox_state}]".rstrip('.')
300
+
301
+ # Mn(Cl)6 pattern (without charge)
302
+ match = re.match(r'([A-Z][a-z]?)\(([A-Z][a-z]?)\)(\d+)', formula)
303
+ if match:
304
+ metal, ligand, ligand_count = match.groups()
305
+ ligand_count = int(ligand_count)
306
+
307
+ if metal in self.transition_metals and ligand == 'Cl':
308
+ ox_state = 2 if metal == 'Mn' else 3
309
+ return f"{'[Cl-].' * ligand_count}[{metal}+{ox_state}]".rstrip('.')
310
+
311
+ # CoCl6³⁻ pattern (with charge at end) - MUST come before simple MnCl6 pattern
312
+ match = re.match(r'([A-Z][a-z]?)([A-Z][a-z]?)(\d+)(\d+)([+-])', formula)
313
+ if match:
314
+ metal, ligand, ligand_count, charge_value, charge_sign = match.groups()
315
+ ligand_count = int(ligand_count)
316
+ charge_value = int(charge_value)
317
+
318
+ if metal in self.transition_metals and ligand == 'Cl':
319
+ # For negatively charged complexes, use standard oxidation states
320
+ if charge_sign == '-':
321
+ ox_state = 3 if metal == 'Co' else 2
322
+ else:
323
+ ox_state = charge_value + 2
324
+
325
+ # Cap oxidation state at reasonable values
326
+ ox_state = min(ox_state, 7)
327
+ ox_state = max(ox_state, 1)
328
+
329
+ return f"{'[Cl-].' * ligand_count}[{metal}+{ox_state}]".rstrip('.')
330
+
331
+ # Simple MnCl6 pattern (without charge)
332
+ match = re.match(r'([A-Z][a-z]?)([A-Z][a-z]?)(\d+)$', formula) # Added $ to ensure end of string
333
+ if match:
334
+ metal, ligand, ligand_count = match.groups()
335
+ ligand_count = int(ligand_count)
336
+
337
+ if metal in self.transition_metals and ligand == 'Cl':
338
+ ox_state = 2 if metal == 'Mn' else 3
339
+ return f"{'[Cl-].' * ligand_count}[{metal}+{ox_state}]".rstrip('.')
340
+
341
+ # Single metal
342
+ if formula in self.transition_metals:
343
+ return f"[{formula}]"
344
+
345
+ return f"UNSUPPORTED_COMPLEX: {formula}"
346
+
347
+ def _convert_molecular_formula_to_smiles(self, formula: str) -> str:
348
+ """Convert simple molecular formulas to SMILES."""
349
+ # Common molecular formulas
350
+ conversions = {
351
+ 'H2O': 'O',
352
+ 'CH4': 'C',
353
+ 'C2H6': 'CC',
354
+ 'C2H5OH': 'CCO',
355
+ 'C6H6': 'c1ccccc1',
356
+ 'NH3': 'N',
357
+ 'CO2': 'O=C=O',
358
+ 'CO': '[C-]#[O+]'
359
+ }
360
+
361
+ # Handle single atoms (including transition metals)
362
+ if formula in self.transition_metals:
363
+ return f"[{formula}]"
364
+
365
+ # Handle other single elements
366
+ single_elements = ['H', 'C', 'N', 'O', 'F', 'P', 'S', 'Cl', 'Br', 'I']
367
+ if formula in single_elements:
368
+ return formula
369
+
370
+ return conversions.get(formula, f"UNKNOWN_FORMULA: {formula}")
371
+
372
+ # Global converter instance
373
+ _converter = MolecularConverter()
374
+
375
+ def convert_to_smiles(molecule_input: str) -> str:
376
+ """
377
+ Convert various molecular input formats to SMILES.
378
+
379
+ Args:
380
+ molecule_input: Input molecular representation
381
+
382
+ Returns:
383
+ SMILES string representation
384
+ """
385
+ return _converter.convert_to_smiles(molecule_input)
386
+
387
+ def test_molecular_converter():
388
+ """Test the molecular converter with various inputs."""
389
+ test_cases = [
390
+ # Already valid SMILES
391
+ ("[Cl-].[Mn+2]", "[Cl-].[Mn+2]"),
392
+ ("CCO", "CCO"),
393
+
394
+ # Coordination complexes
395
+ ("[MnCl6]4+", "[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Mn+6]"),
396
+ ("[MnCl6]4-", "[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Mn+2]"),
397
+ ("Mn(Cl)6", "[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Mn+2]"),
398
+ ("MnCl6", "[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Mn+2]"),
399
+
400
+ # Malformed SMILES that need fixing
401
+ ("[Mn+4]([Cl-])([Cl-])([Cl-])([Cl-])([Cl-])[Cl-]", "[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Mn+2]"),
402
+ ("[Fe+3]([Cl-])([Cl-])([Cl-])([Cl-])([Cl-])([Cl-])", "[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Fe+3]"),
403
+
404
+ # Simple formulas
405
+ ("H2O", "O"),
406
+ ("CH4", "C"),
407
+ ("Mn", "[Mn]"),
408
+
409
+ # XYZ format
410
+ ("Mn 0.0 0.0 0.0\nCl 2.3 0.0 0.0\nCl -2.3 0.0 0.0\nCl 0.0 2.3 0.0\nCl 0.0 -2.3 0.0\nCl 0.0 0.0 2.3\nCl 0.0 0.0 -2.3",
411
+ "[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Mn+2]")
412
+ ]
413
+
414
+ print("Testing molecular converter:")
415
+ for input_mol, expected in test_cases:
416
+ result = convert_to_smiles(input_mol)
417
+ status = "" if result == expected else ""
418
+ print(f"{status} '{input_mol[:30]}...' → '{result}'")
419
+ if result != expected:
420
+ print(f" Expected: '{expected}'")
421
+
422
+ if __name__ == "__main__":
423
+ test_molecular_converter()