rowan-mcp 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rowan-mcp might be problematic. Click here for more details.
- rowan_mcp/__init__.py +14 -0
- rowan_mcp/__main__.py +14 -0
- rowan_mcp/functions/admet.py +94 -0
- rowan_mcp/functions/bde.py +113 -0
- rowan_mcp/functions/calculation_retrieve.py +89 -0
- rowan_mcp/functions/conformers.py +135 -0
- rowan_mcp/functions/descriptors.py +92 -0
- rowan_mcp/functions/docking.py +340 -0
- rowan_mcp/functions/docking_enhanced.py +174 -0
- rowan_mcp/functions/electronic_properties.py +263 -0
- rowan_mcp/functions/folder_management.py +137 -0
- rowan_mcp/functions/fukui.py +355 -0
- rowan_mcp/functions/hydrogen_bond_basicity.py +94 -0
- rowan_mcp/functions/irc.py +125 -0
- rowan_mcp/functions/macropka.py +195 -0
- rowan_mcp/functions/molecular_converter.py +423 -0
- rowan_mcp/functions/molecular_dynamics.py +191 -0
- rowan_mcp/functions/molecule_cache.db +0 -0
- rowan_mcp/functions/molecule_lookup.py +446 -0
- rowan_mcp/functions/multistage_opt.py +171 -0
- rowan_mcp/functions/pdb_handler.py +200 -0
- rowan_mcp/functions/pka.py +137 -0
- rowan_mcp/functions/redox_potential.py +352 -0
- rowan_mcp/functions/scan.py +536 -0
- rowan_mcp/functions/scan_analyzer.py +347 -0
- rowan_mcp/functions/solubility.py +277 -0
- rowan_mcp/functions/spin_states.py +747 -0
- rowan_mcp/functions/system_management.py +368 -0
- rowan_mcp/functions/tautomers.py +91 -0
- rowan_mcp/functions/workflow_management.py +422 -0
- rowan_mcp/server.py +169 -0
- rowan_mcp-0.1.0.dist-info/METADATA +216 -0
- rowan_mcp-0.1.0.dist-info/RECORD +35 -0
- rowan_mcp-0.1.0.dist-info/WHEEL +4 -0
- rowan_mcp-0.1.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
"""MacropKa workflow function for MCP server."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import json
|
|
5
|
+
import logging
|
|
6
|
+
from typing import Optional, Union, List
|
|
7
|
+
|
|
8
|
+
import rowan
|
|
9
|
+
|
|
10
|
+
# Configure logging
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
# Get API key from environment
|
|
14
|
+
api_key = os.environ.get("ROWAN_API_KEY")
|
|
15
|
+
if api_key:
|
|
16
|
+
rowan.api_key = api_key
|
|
17
|
+
else:
|
|
18
|
+
logger.warning("ROWAN_API_KEY not found in environment")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def log_rowan_api_call(func_name: str, **kwargs):
|
|
22
|
+
"""Log Rowan API calls for debugging."""
|
|
23
|
+
logger.debug(f"Calling {func_name} with args: {kwargs}")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def rowan_macropka(
|
|
27
|
+
name: str,
|
|
28
|
+
molecule: str,
|
|
29
|
+
min_pH: float = 0.0,
|
|
30
|
+
max_pH: float = 14.0,
|
|
31
|
+
max_charge: int = 2,
|
|
32
|
+
min_charge: int = -2,
|
|
33
|
+
compute_aqueous_solubility: bool = False,
|
|
34
|
+
compute_solvation_energy: bool = True,
|
|
35
|
+
folder_uuid: Optional[str] = None,
|
|
36
|
+
blocking: bool = True,
|
|
37
|
+
ping_interval: int = 5
|
|
38
|
+
) -> str:
|
|
39
|
+
"""
|
|
40
|
+
Calculate macroscopic pKa values and related properties for a molecule.
|
|
41
|
+
|
|
42
|
+
This workflow computes pKa values, microstates, isoelectric point, and optionally
|
|
43
|
+
solvation energy and aqueous solubility across different pH values.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
name: Name for the calculation
|
|
47
|
+
molecule: SMILES string of the molecule
|
|
48
|
+
min_pH: Minimum pH for calculations (default: 0.0)
|
|
49
|
+
max_pH: Maximum pH for calculations (default: 14.0)
|
|
50
|
+
max_charge: Maximum charge to consider for microstates (default: 2)
|
|
51
|
+
min_charge: Minimum charge to consider for microstates (default: -2)
|
|
52
|
+
compute_aqueous_solubility: Whether to compute aqueous solubility by pH (default: False)
|
|
53
|
+
compute_solvation_energy: Whether to compute solvation energy for Kpuu (default: True)
|
|
54
|
+
folder_uuid: UUID of folder to save results in
|
|
55
|
+
blocking: Wait for calculation to complete (default: True)
|
|
56
|
+
ping_interval: How often to check status in blocking mode (default: 5 seconds)
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
String with workflow UUID or results depending on blocking mode
|
|
60
|
+
"""
|
|
61
|
+
try:
|
|
62
|
+
# Validate pH range
|
|
63
|
+
if min_pH >= max_pH:
|
|
64
|
+
return json.dumps({"error": "min_pH must be less than max_pH"})
|
|
65
|
+
|
|
66
|
+
# Validate charge range
|
|
67
|
+
if min_charge >= max_charge:
|
|
68
|
+
return json.dumps({"error": "min_charge must be less than max_charge"})
|
|
69
|
+
|
|
70
|
+
# Log the API call
|
|
71
|
+
log_rowan_api_call(
|
|
72
|
+
"rowan.compute",
|
|
73
|
+
workflow_type="macropka",
|
|
74
|
+
name=name,
|
|
75
|
+
molecule=molecule,
|
|
76
|
+
min_pH=min_pH,
|
|
77
|
+
max_pH=max_pH,
|
|
78
|
+
max_charge=max_charge,
|
|
79
|
+
min_charge=min_charge,
|
|
80
|
+
compute_aqueous_solubility=compute_aqueous_solubility,
|
|
81
|
+
compute_solvation_energy=compute_solvation_energy,
|
|
82
|
+
folder_uuid=folder_uuid,
|
|
83
|
+
blocking=blocking,
|
|
84
|
+
ping_interval=ping_interval
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
# Submit calculation
|
|
88
|
+
result = rowan.compute(
|
|
89
|
+
workflow_type="macropka",
|
|
90
|
+
name=name,
|
|
91
|
+
molecule=molecule, # Required by rowan.compute() API
|
|
92
|
+
folder_uuid=folder_uuid,
|
|
93
|
+
blocking=blocking,
|
|
94
|
+
ping_interval=ping_interval,
|
|
95
|
+
# Workflow-specific parameters for MacropKaWorkflow
|
|
96
|
+
initial_smiles=molecule, # Required by MacropKaWorkflow Pydantic model
|
|
97
|
+
min_pH=min_pH,
|
|
98
|
+
max_pH=max_pH,
|
|
99
|
+
max_charge=max_charge,
|
|
100
|
+
min_charge=min_charge,
|
|
101
|
+
compute_aqueous_solubility=compute_aqueous_solubility,
|
|
102
|
+
compute_solvation_energy=compute_solvation_energy
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
if blocking:
|
|
106
|
+
# Format completed results
|
|
107
|
+
status = result.get("status", "unknown")
|
|
108
|
+
uuid = result.get("uuid", "unknown")
|
|
109
|
+
|
|
110
|
+
if status == "success":
|
|
111
|
+
object_data = result.get("object_data", {})
|
|
112
|
+
|
|
113
|
+
# Extract key results
|
|
114
|
+
microstates = object_data.get("microstates", [])
|
|
115
|
+
pka_values = object_data.get("pKa_values", [])
|
|
116
|
+
isoelectric_point = object_data.get("isoelectric_point")
|
|
117
|
+
solvation_energy = object_data.get("solvation_energy")
|
|
118
|
+
kpuu_probability = object_data.get("kpuu_probability")
|
|
119
|
+
microstate_weights_by_pH = object_data.get("microstate_weights_by_pH", [])
|
|
120
|
+
logD_by_pH = object_data.get("logD_by_pH", [])
|
|
121
|
+
aqueous_solubility_by_pH = object_data.get("aqueous_solubility_by_pH", [])
|
|
122
|
+
|
|
123
|
+
formatted = f"✅ MacropKa calculation completed successfully!\n"
|
|
124
|
+
formatted += f"🔖 Workflow UUID: {uuid}\n"
|
|
125
|
+
formatted += f"📋 Status: {status}\n\n"
|
|
126
|
+
|
|
127
|
+
# Format pKa values
|
|
128
|
+
if pka_values:
|
|
129
|
+
formatted += "📊 pKa Values:\n"
|
|
130
|
+
for pka in pka_values:
|
|
131
|
+
formatted += f" • {pka.get('initial_charge', 'N/A')} → {pka.get('final_charge', 'N/A')}: pKa = {pka.get('pKa', 'N/A')}\n"
|
|
132
|
+
formatted += "\n"
|
|
133
|
+
|
|
134
|
+
# Format microstates
|
|
135
|
+
if microstates:
|
|
136
|
+
formatted += f"🔬 Microstates ({len(microstates)} found):\n"
|
|
137
|
+
for i, microstate in enumerate(microstates[:5]): # Show first 5
|
|
138
|
+
formatted += f" {i+1}. Charge: {microstate.get('charge', 'N/A')}, Energy: {microstate.get('energy', 'N/A')} kcal/mol\n"
|
|
139
|
+
if len(microstates) > 5:
|
|
140
|
+
formatted += f" ... and {len(microstates) - 5} more\n"
|
|
141
|
+
formatted += "\n"
|
|
142
|
+
|
|
143
|
+
# Add other properties
|
|
144
|
+
if isoelectric_point is not None:
|
|
145
|
+
formatted += f"⚡ Isoelectric Point: pH {isoelectric_point}\n"
|
|
146
|
+
|
|
147
|
+
if solvation_energy is not None:
|
|
148
|
+
formatted += f"💧 Solvation Energy: {solvation_energy} kcal/mol\n"
|
|
149
|
+
|
|
150
|
+
if kpuu_probability is not None:
|
|
151
|
+
formatted += f"🧠 Kpuu Probability (≥0.3): {kpuu_probability:.2%}\n"
|
|
152
|
+
|
|
153
|
+
# Show pH-dependent properties if available
|
|
154
|
+
if logD_by_pH:
|
|
155
|
+
formatted += f"\n📈 logD values available for {len(logD_by_pH)} pH points\n"
|
|
156
|
+
|
|
157
|
+
if aqueous_solubility_by_pH:
|
|
158
|
+
formatted += f"💧 Aqueous solubility values available for {len(aqueous_solubility_by_pH)} pH points\n"
|
|
159
|
+
|
|
160
|
+
if microstate_weights_by_pH:
|
|
161
|
+
formatted += f"⚖️ Microstate weights available for {len(microstate_weights_by_pH)} pH points\n"
|
|
162
|
+
|
|
163
|
+
return formatted
|
|
164
|
+
else:
|
|
165
|
+
# Handle failed calculation
|
|
166
|
+
return f"❌ MacropKa calculation failed\n🔖 UUID: {uuid}\n📋 Status: {status}\n💬 Check workflow details for more information"
|
|
167
|
+
else:
|
|
168
|
+
# Non-blocking mode - return submission confirmation
|
|
169
|
+
uuid = result.get("uuid", "unknown")
|
|
170
|
+
formatted = f"📋 MacropKa calculation submitted!\n"
|
|
171
|
+
formatted += f"🔖 Workflow UUID: {uuid}\n"
|
|
172
|
+
formatted += f"⏳ Status: Running...\n"
|
|
173
|
+
formatted += f"💡 Use rowan_workflow_management to check status\n"
|
|
174
|
+
formatted += f"\nCalculation parameters:\n"
|
|
175
|
+
formatted += f" • pH range: {min_pH} - {max_pH}\n"
|
|
176
|
+
formatted += f" • Charge range: {min_charge} to {max_charge}\n"
|
|
177
|
+
formatted += f" • Compute solvation energy: {compute_solvation_energy}\n"
|
|
178
|
+
formatted += f" • Compute aqueous solubility: {compute_aqueous_solubility}\n"
|
|
179
|
+
return formatted
|
|
180
|
+
|
|
181
|
+
except Exception as e:
|
|
182
|
+
logger.error(f"Error in rowan_macropka: {str(e)}")
|
|
183
|
+
return json.dumps({"error": str(e)})
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
# Test function
|
|
187
|
+
if __name__ == "__main__":
|
|
188
|
+
# Test with ethanol
|
|
189
|
+
result = rowan_macropka(
|
|
190
|
+
name="Ethanol MacropKa Test",
|
|
191
|
+
molecule="CCO",
|
|
192
|
+
compute_aqueous_solubility=True,
|
|
193
|
+
blocking=True
|
|
194
|
+
)
|
|
195
|
+
print(result)
|
|
@@ -0,0 +1,423 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Dynamic molecular formula to SMILES converter for coordination complexes.
|
|
3
|
+
Uses xyz2mol_tm for transition metal complexes and RDKit for standard molecules.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import re
|
|
7
|
+
import logging
|
|
8
|
+
from typing import Optional, Dict, List, Tuple
|
|
9
|
+
from rdkit import Chem
|
|
10
|
+
from rdkit.Chem import rdMolDescriptors
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
class MolecularConverter:
|
|
15
|
+
"""Converts various molecular input formats to SMILES strings."""
|
|
16
|
+
|
|
17
|
+
def __init__(self):
|
|
18
|
+
"""Initialize the molecular converter."""
|
|
19
|
+
self.transition_metals = {
|
|
20
|
+
'Sc', 'Ti', 'V', 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn',
|
|
21
|
+
'Y', 'Zr', 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd',
|
|
22
|
+
'La', 'Lu', 'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg'
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
def convert_to_smiles(self, molecule_input: str) -> str:
|
|
26
|
+
"""
|
|
27
|
+
Convert various molecular input formats to SMILES.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
molecule_input: Input molecular representation
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
SMILES string representation
|
|
34
|
+
"""
|
|
35
|
+
# Clean input
|
|
36
|
+
molecule_input = molecule_input.strip()
|
|
37
|
+
|
|
38
|
+
# Normalize Unicode subscripts and superscripts
|
|
39
|
+
molecule_input = self._normalize_unicode_formula(molecule_input)
|
|
40
|
+
|
|
41
|
+
# Check if already valid SMILES
|
|
42
|
+
if self._is_valid_smiles(molecule_input):
|
|
43
|
+
return molecule_input
|
|
44
|
+
|
|
45
|
+
# Check if XYZ coordinates
|
|
46
|
+
if self._is_xyz_format(molecule_input):
|
|
47
|
+
return self._convert_xyz_to_smiles(molecule_input)
|
|
48
|
+
|
|
49
|
+
# Check if coordination complex formula
|
|
50
|
+
if self._is_coordination_complex(molecule_input):
|
|
51
|
+
return self._convert_coordination_complex_to_smiles(molecule_input)
|
|
52
|
+
|
|
53
|
+
# Check if simple molecular formula
|
|
54
|
+
if self._is_molecular_formula(molecule_input):
|
|
55
|
+
return self._convert_molecular_formula_to_smiles(molecule_input)
|
|
56
|
+
|
|
57
|
+
# Default: assume it's already SMILES or unsupported
|
|
58
|
+
return molecule_input
|
|
59
|
+
|
|
60
|
+
def _normalize_unicode_formula(self, formula: str) -> str:
|
|
61
|
+
"""Convert Unicode subscripts and superscripts to regular ASCII."""
|
|
62
|
+
# Unicode subscript mappings
|
|
63
|
+
subscript_map = {
|
|
64
|
+
'₀': '0', '₁': '1', '₂': '2', '₃': '3', '₄': '4',
|
|
65
|
+
'₅': '5', '₆': '6', '₇': '7', '₈': '8', '₉': '9'
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
# Unicode superscript mappings
|
|
69
|
+
superscript_map = {
|
|
70
|
+
'⁰': '0', '¹': '1', '²': '2', '³': '3', '⁴': '4',
|
|
71
|
+
'⁵': '5', '⁶': '6', '⁷': '7', '⁸': '8', '⁹': '9',
|
|
72
|
+
'⁺': '+', '⁻': '-'
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
# Replace subscripts
|
|
76
|
+
for unicode_char, ascii_char in subscript_map.items():
|
|
77
|
+
formula = formula.replace(unicode_char, ascii_char)
|
|
78
|
+
|
|
79
|
+
# Replace superscripts
|
|
80
|
+
for unicode_char, ascii_char in superscript_map.items():
|
|
81
|
+
formula = formula.replace(unicode_char, ascii_char)
|
|
82
|
+
|
|
83
|
+
logger.info(f" Unicode normalized: '{formula}'")
|
|
84
|
+
return formula
|
|
85
|
+
|
|
86
|
+
def _is_valid_smiles(self, smiles: str) -> bool:
|
|
87
|
+
"""Check if string is a valid SMILES."""
|
|
88
|
+
try:
|
|
89
|
+
# First check for obviously malformed coordination complex patterns
|
|
90
|
+
if self._is_malformed_coordination_smiles(smiles):
|
|
91
|
+
return False
|
|
92
|
+
|
|
93
|
+
mol = Chem.MolFromSmiles(smiles)
|
|
94
|
+
return mol is not None
|
|
95
|
+
except:
|
|
96
|
+
return False
|
|
97
|
+
|
|
98
|
+
def _is_malformed_coordination_smiles(self, smiles: str) -> bool:
|
|
99
|
+
"""Check for malformed coordination complex SMILES patterns."""
|
|
100
|
+
# Pattern like [Mn+4]([Cl-])([Cl-])... - clearly malformed coordination complex
|
|
101
|
+
if re.search(r'\[[A-Z][a-z]?\+\d+\]\(\[.*?\]\)', smiles):
|
|
102
|
+
return True
|
|
103
|
+
|
|
104
|
+
# Pattern with multiple parenthetical ligands - likely malformed
|
|
105
|
+
if smiles.count('([') > 2: # More than 2 parenthetical groups suggests malformed coordination
|
|
106
|
+
return True
|
|
107
|
+
|
|
108
|
+
# Check for unrealistic oxidation states in brackets
|
|
109
|
+
oxidation_match = re.search(r'\[([A-Z][a-z]?)\+(\d+)\]', smiles)
|
|
110
|
+
if oxidation_match:
|
|
111
|
+
metal, ox_state = oxidation_match.groups()
|
|
112
|
+
ox_state = int(ox_state)
|
|
113
|
+
# Flag unrealistic oxidation states
|
|
114
|
+
if ox_state > 8 or (metal in ['Mn', 'Fe', 'Co', 'Ni', 'Cu'] and ox_state > 7):
|
|
115
|
+
return True
|
|
116
|
+
|
|
117
|
+
return False
|
|
118
|
+
|
|
119
|
+
def _is_xyz_format(self, text: str) -> bool:
|
|
120
|
+
"""Check if input is XYZ coordinate format."""
|
|
121
|
+
lines = text.strip().split('\n')
|
|
122
|
+
if len(lines) < 2:
|
|
123
|
+
return False
|
|
124
|
+
|
|
125
|
+
# Check if lines contain element symbols + 3 coordinates
|
|
126
|
+
for line in lines:
|
|
127
|
+
parts = line.strip().split()
|
|
128
|
+
if len(parts) >= 4:
|
|
129
|
+
# First part should be element symbol
|
|
130
|
+
element = parts[0]
|
|
131
|
+
if not element.isalpha() or len(element) > 2:
|
|
132
|
+
return False
|
|
133
|
+
# Next 3 should be numbers
|
|
134
|
+
try:
|
|
135
|
+
[float(x) for x in parts[1:4]]
|
|
136
|
+
except ValueError:
|
|
137
|
+
return False
|
|
138
|
+
else:
|
|
139
|
+
return False
|
|
140
|
+
return True
|
|
141
|
+
|
|
142
|
+
def _is_coordination_complex(self, formula: str) -> bool:
|
|
143
|
+
"""Check if formula represents a coordination complex."""
|
|
144
|
+
# Look for patterns like [MnCl6]4+, Mn(Cl)6, etc.
|
|
145
|
+
patterns = [
|
|
146
|
+
r'\[.*\]\d*[+-]', # [MnCl6]4+
|
|
147
|
+
r'\w+\([A-Z][a-z]?\)\d+', # Mn(Cl)6
|
|
148
|
+
]
|
|
149
|
+
|
|
150
|
+
for pattern in patterns:
|
|
151
|
+
if re.search(pattern, formula):
|
|
152
|
+
return True
|
|
153
|
+
|
|
154
|
+
# Check for transition metals with other elements (but not simple organics)
|
|
155
|
+
for tm in self.transition_metals:
|
|
156
|
+
if tm in formula:
|
|
157
|
+
# Make sure it's not just the transition metal alone
|
|
158
|
+
if formula != tm:
|
|
159
|
+
# Check if it has other elements suggesting coordination
|
|
160
|
+
if any(element in formula for element in ['Cl', 'Br', 'I', 'F', 'N', 'O', 'S', 'P']):
|
|
161
|
+
return True
|
|
162
|
+
|
|
163
|
+
return False
|
|
164
|
+
|
|
165
|
+
def _is_molecular_formula(self, formula: str) -> bool:
|
|
166
|
+
"""Check if input is a simple molecular formula."""
|
|
167
|
+
# Pattern for molecular formulas like H2O, CH4, etc.
|
|
168
|
+
pattern = r'^[A-Z][a-z]?(\d+)?([A-Z][a-z]?(\d+)?)*$'
|
|
169
|
+
return bool(re.match(pattern, formula))
|
|
170
|
+
|
|
171
|
+
def _convert_xyz_to_smiles(self, xyz_text: str) -> str:
|
|
172
|
+
"""
|
|
173
|
+
Convert XYZ coordinates to SMILES.
|
|
174
|
+
For coordination complexes, attempts to use xyz2mol_tm logic.
|
|
175
|
+
"""
|
|
176
|
+
try:
|
|
177
|
+
lines = xyz_text.strip().split('\n')
|
|
178
|
+
atoms = []
|
|
179
|
+
coords = []
|
|
180
|
+
|
|
181
|
+
for line in lines:
|
|
182
|
+
parts = line.strip().split()
|
|
183
|
+
if len(parts) >= 4:
|
|
184
|
+
element = parts[0]
|
|
185
|
+
x, y, z = map(float, parts[1:4])
|
|
186
|
+
atoms.append(element)
|
|
187
|
+
coords.append([x, y, z])
|
|
188
|
+
|
|
189
|
+
# Check if contains transition metals
|
|
190
|
+
has_tm = any(atom in self.transition_metals for atom in atoms)
|
|
191
|
+
|
|
192
|
+
if has_tm:
|
|
193
|
+
return self._handle_transition_metal_xyz(atoms, coords)
|
|
194
|
+
else:
|
|
195
|
+
# For organic molecules, try basic conversion
|
|
196
|
+
return self._handle_organic_xyz(atoms, coords)
|
|
197
|
+
|
|
198
|
+
except Exception as e:
|
|
199
|
+
logger.error(f"Failed to convert XYZ to SMILES: {e}")
|
|
200
|
+
return f"UNSUPPORTED_XYZ: {xyz_text[:50]}..."
|
|
201
|
+
|
|
202
|
+
def _handle_transition_metal_xyz(self, atoms: List[str], coords: List[List[float]]) -> str:
|
|
203
|
+
"""Handle XYZ conversion for transition metal complexes."""
|
|
204
|
+
# Common coordination complex patterns
|
|
205
|
+
atom_counts = {atom: atoms.count(atom) for atom in set(atoms)}
|
|
206
|
+
|
|
207
|
+
# MnCl6 pattern
|
|
208
|
+
if 'Mn' in atom_counts and 'Cl' in atom_counts and atom_counts.get('Cl', 0) == 6:
|
|
209
|
+
return "[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Mn+2]"
|
|
210
|
+
|
|
211
|
+
# FeCl6 pattern
|
|
212
|
+
elif 'Fe' in atom_counts and 'Cl' in atom_counts and atom_counts.get('Cl', 0) == 6:
|
|
213
|
+
return "[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Fe+3]"
|
|
214
|
+
|
|
215
|
+
# CoCl6 pattern
|
|
216
|
+
elif 'Co' in atom_counts and 'Cl' in atom_counts and atom_counts.get('Cl', 0) == 6:
|
|
217
|
+
return "[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Co+3]"
|
|
218
|
+
|
|
219
|
+
# Single metal atom
|
|
220
|
+
elif len(atom_counts) == 1 and list(atom_counts.keys())[0] in self.transition_metals:
|
|
221
|
+
metal = list(atom_counts.keys())[0]
|
|
222
|
+
return f"[{metal}]"
|
|
223
|
+
|
|
224
|
+
# Generic fallback
|
|
225
|
+
else:
|
|
226
|
+
return f"COMPLEX_TM: {'-'.join(sorted(atom_counts.keys()))}"
|
|
227
|
+
|
|
228
|
+
def _handle_organic_xyz(self, atoms: List[str], coords: List[List[float]]) -> str:
|
|
229
|
+
"""Handle XYZ conversion for organic molecules."""
|
|
230
|
+
# Simple cases
|
|
231
|
+
atom_counts = {atom: atoms.count(atom) for atom in set(atoms)}
|
|
232
|
+
|
|
233
|
+
if atom_counts == {'C': 1, 'H': 4}:
|
|
234
|
+
return "C" # Methane
|
|
235
|
+
elif atom_counts == {'H': 2, 'O': 1}:
|
|
236
|
+
return "O" # Water
|
|
237
|
+
elif atom_counts == {'C': 2, 'H': 6, 'O': 1}:
|
|
238
|
+
return "CCO" # Ethanol
|
|
239
|
+
else:
|
|
240
|
+
return f"ORGANIC: {'-'.join(sorted(atom_counts.keys()))}"
|
|
241
|
+
|
|
242
|
+
def _convert_coordination_complex_to_smiles(self, formula: str) -> str:
|
|
243
|
+
"""Convert coordination complex formulas to SMILES."""
|
|
244
|
+
# Parse common coordination complex patterns
|
|
245
|
+
|
|
246
|
+
# Handle malformed SMILES like [Mn+4]([Cl-])([Cl-])([Cl-])([Cl-])([Cl-])[Cl-]
|
|
247
|
+
malformed_pattern = r'\[([A-Z][a-z]?)\+(\d+)\]'
|
|
248
|
+
if re.match(malformed_pattern, formula):
|
|
249
|
+
metal_match = re.match(malformed_pattern, formula)
|
|
250
|
+
metal, ox_state = metal_match.groups()
|
|
251
|
+
ox_state = int(ox_state)
|
|
252
|
+
|
|
253
|
+
# Count all chloride ligands in the formula
|
|
254
|
+
ligand_count = formula.count('[Cl-]')
|
|
255
|
+
|
|
256
|
+
# If we found chloride ligands, convert to proper format
|
|
257
|
+
if ligand_count > 0:
|
|
258
|
+
# Adjust oxidation state for realistic chemistry
|
|
259
|
+
if metal == 'Mn' and ox_state == 4 and ligand_count == 6:
|
|
260
|
+
ox_state = 2 # MnCl6 4- is more realistic than Mn4+ with 6 Cl-
|
|
261
|
+
|
|
262
|
+
return f"{'[Cl-].' * ligand_count}[{metal}+{ox_state}]".rstrip('.')
|
|
263
|
+
|
|
264
|
+
# [MnCl6]4+ pattern
|
|
265
|
+
match = re.match(r'\[([A-Z][a-z]?)([A-Z][a-z]?)(\d+)\](\d*)([+-])', formula)
|
|
266
|
+
if match:
|
|
267
|
+
metal, ligand, ligand_count, charge_num, charge_sign = match.groups()
|
|
268
|
+
ligand_count = int(ligand_count)
|
|
269
|
+
|
|
270
|
+
if metal in self.transition_metals and ligand == 'Cl':
|
|
271
|
+
if charge_sign == '+':
|
|
272
|
+
# For positive complex charge, assume higher oxidation state
|
|
273
|
+
ox_state = 6 if charge_num == '4' else 3
|
|
274
|
+
return f"{'[Cl-].' * ligand_count}[{metal}+{ox_state}]".rstrip('.')
|
|
275
|
+
else:
|
|
276
|
+
# For negative complex charge, use standard oxidation states
|
|
277
|
+
ox_state = 2 if metal == 'Mn' else 3
|
|
278
|
+
return f"{'[Cl-].' * ligand_count}[{metal}+{ox_state}]".rstrip('.')
|
|
279
|
+
|
|
280
|
+
# Mn(Cl)6+4 pattern (with charge)
|
|
281
|
+
match = re.match(r'([A-Z][a-z]?)\(([A-Z][a-z]?)\)(\d+)([+-])(\d+)', formula)
|
|
282
|
+
if match:
|
|
283
|
+
metal, ligand, ligand_count, charge_sign, charge_value = match.groups()
|
|
284
|
+
ligand_count = int(ligand_count)
|
|
285
|
+
charge_value = int(charge_value)
|
|
286
|
+
|
|
287
|
+
if metal in self.transition_metals and ligand == 'Cl':
|
|
288
|
+
# Calculate realistic oxidation state based on charge and ligands
|
|
289
|
+
# For MnCl6 with +4 charge: Mn oxidation state should be higher
|
|
290
|
+
if charge_sign == '+':
|
|
291
|
+
ox_state = charge_value + 2 if metal == 'Mn' else charge_value + 1
|
|
292
|
+
else:
|
|
293
|
+
ox_state = abs(charge_value) - ligand_count
|
|
294
|
+
|
|
295
|
+
# Cap oxidation state at reasonable values
|
|
296
|
+
ox_state = min(ox_state, 7)
|
|
297
|
+
ox_state = max(ox_state, 1)
|
|
298
|
+
|
|
299
|
+
return f"{'[Cl-].' * ligand_count}[{metal}+{ox_state}]".rstrip('.')
|
|
300
|
+
|
|
301
|
+
# Mn(Cl)6 pattern (without charge)
|
|
302
|
+
match = re.match(r'([A-Z][a-z]?)\(([A-Z][a-z]?)\)(\d+)', formula)
|
|
303
|
+
if match:
|
|
304
|
+
metal, ligand, ligand_count = match.groups()
|
|
305
|
+
ligand_count = int(ligand_count)
|
|
306
|
+
|
|
307
|
+
if metal in self.transition_metals and ligand == 'Cl':
|
|
308
|
+
ox_state = 2 if metal == 'Mn' else 3
|
|
309
|
+
return f"{'[Cl-].' * ligand_count}[{metal}+{ox_state}]".rstrip('.')
|
|
310
|
+
|
|
311
|
+
# CoCl6³⁻ pattern (with charge at end) - MUST come before simple MnCl6 pattern
|
|
312
|
+
match = re.match(r'([A-Z][a-z]?)([A-Z][a-z]?)(\d+)(\d+)([+-])', formula)
|
|
313
|
+
if match:
|
|
314
|
+
metal, ligand, ligand_count, charge_value, charge_sign = match.groups()
|
|
315
|
+
ligand_count = int(ligand_count)
|
|
316
|
+
charge_value = int(charge_value)
|
|
317
|
+
|
|
318
|
+
if metal in self.transition_metals and ligand == 'Cl':
|
|
319
|
+
# For negatively charged complexes, use standard oxidation states
|
|
320
|
+
if charge_sign == '-':
|
|
321
|
+
ox_state = 3 if metal == 'Co' else 2
|
|
322
|
+
else:
|
|
323
|
+
ox_state = charge_value + 2
|
|
324
|
+
|
|
325
|
+
# Cap oxidation state at reasonable values
|
|
326
|
+
ox_state = min(ox_state, 7)
|
|
327
|
+
ox_state = max(ox_state, 1)
|
|
328
|
+
|
|
329
|
+
return f"{'[Cl-].' * ligand_count}[{metal}+{ox_state}]".rstrip('.')
|
|
330
|
+
|
|
331
|
+
# Simple MnCl6 pattern (without charge)
|
|
332
|
+
match = re.match(r'([A-Z][a-z]?)([A-Z][a-z]?)(\d+)$', formula) # Added $ to ensure end of string
|
|
333
|
+
if match:
|
|
334
|
+
metal, ligand, ligand_count = match.groups()
|
|
335
|
+
ligand_count = int(ligand_count)
|
|
336
|
+
|
|
337
|
+
if metal in self.transition_metals and ligand == 'Cl':
|
|
338
|
+
ox_state = 2 if metal == 'Mn' else 3
|
|
339
|
+
return f"{'[Cl-].' * ligand_count}[{metal}+{ox_state}]".rstrip('.')
|
|
340
|
+
|
|
341
|
+
# Single metal
|
|
342
|
+
if formula in self.transition_metals:
|
|
343
|
+
return f"[{formula}]"
|
|
344
|
+
|
|
345
|
+
return f"UNSUPPORTED_COMPLEX: {formula}"
|
|
346
|
+
|
|
347
|
+
def _convert_molecular_formula_to_smiles(self, formula: str) -> str:
|
|
348
|
+
"""Convert simple molecular formulas to SMILES."""
|
|
349
|
+
# Common molecular formulas
|
|
350
|
+
conversions = {
|
|
351
|
+
'H2O': 'O',
|
|
352
|
+
'CH4': 'C',
|
|
353
|
+
'C2H6': 'CC',
|
|
354
|
+
'C2H5OH': 'CCO',
|
|
355
|
+
'C6H6': 'c1ccccc1',
|
|
356
|
+
'NH3': 'N',
|
|
357
|
+
'CO2': 'O=C=O',
|
|
358
|
+
'CO': '[C-]#[O+]'
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
# Handle single atoms (including transition metals)
|
|
362
|
+
if formula in self.transition_metals:
|
|
363
|
+
return f"[{formula}]"
|
|
364
|
+
|
|
365
|
+
# Handle other single elements
|
|
366
|
+
single_elements = ['H', 'C', 'N', 'O', 'F', 'P', 'S', 'Cl', 'Br', 'I']
|
|
367
|
+
if formula in single_elements:
|
|
368
|
+
return formula
|
|
369
|
+
|
|
370
|
+
return conversions.get(formula, f"UNKNOWN_FORMULA: {formula}")
|
|
371
|
+
|
|
372
|
+
# Global converter instance
|
|
373
|
+
_converter = MolecularConverter()
|
|
374
|
+
|
|
375
|
+
def convert_to_smiles(molecule_input: str) -> str:
|
|
376
|
+
"""
|
|
377
|
+
Convert various molecular input formats to SMILES.
|
|
378
|
+
|
|
379
|
+
Args:
|
|
380
|
+
molecule_input: Input molecular representation
|
|
381
|
+
|
|
382
|
+
Returns:
|
|
383
|
+
SMILES string representation
|
|
384
|
+
"""
|
|
385
|
+
return _converter.convert_to_smiles(molecule_input)
|
|
386
|
+
|
|
387
|
+
def test_molecular_converter():
|
|
388
|
+
"""Test the molecular converter with various inputs."""
|
|
389
|
+
test_cases = [
|
|
390
|
+
# Already valid SMILES
|
|
391
|
+
("[Cl-].[Mn+2]", "[Cl-].[Mn+2]"),
|
|
392
|
+
("CCO", "CCO"),
|
|
393
|
+
|
|
394
|
+
# Coordination complexes
|
|
395
|
+
("[MnCl6]4+", "[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Mn+6]"),
|
|
396
|
+
("[MnCl6]4-", "[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Mn+2]"),
|
|
397
|
+
("Mn(Cl)6", "[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Mn+2]"),
|
|
398
|
+
("MnCl6", "[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Mn+2]"),
|
|
399
|
+
|
|
400
|
+
# Malformed SMILES that need fixing
|
|
401
|
+
("[Mn+4]([Cl-])([Cl-])([Cl-])([Cl-])([Cl-])[Cl-]", "[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Mn+2]"),
|
|
402
|
+
("[Fe+3]([Cl-])([Cl-])([Cl-])([Cl-])([Cl-])([Cl-])", "[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Fe+3]"),
|
|
403
|
+
|
|
404
|
+
# Simple formulas
|
|
405
|
+
("H2O", "O"),
|
|
406
|
+
("CH4", "C"),
|
|
407
|
+
("Mn", "[Mn]"),
|
|
408
|
+
|
|
409
|
+
# XYZ format
|
|
410
|
+
("Mn 0.0 0.0 0.0\nCl 2.3 0.0 0.0\nCl -2.3 0.0 0.0\nCl 0.0 2.3 0.0\nCl 0.0 -2.3 0.0\nCl 0.0 0.0 2.3\nCl 0.0 0.0 -2.3",
|
|
411
|
+
"[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Cl-].[Mn+2]")
|
|
412
|
+
]
|
|
413
|
+
|
|
414
|
+
print("Testing molecular converter:")
|
|
415
|
+
for input_mol, expected in test_cases:
|
|
416
|
+
result = convert_to_smiles(input_mol)
|
|
417
|
+
status = "" if result == expected else ""
|
|
418
|
+
print(f"{status} '{input_mol[:30]}...' → '{result}'")
|
|
419
|
+
if result != expected:
|
|
420
|
+
print(f" Expected: '{expected}'")
|
|
421
|
+
|
|
422
|
+
if __name__ == "__main__":
|
|
423
|
+
test_molecular_converter()
|