rowan-mcp 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rowan-mcp might be problematic. Click here for more details.
- rowan_mcp/__init__.py +14 -0
- rowan_mcp/__main__.py +14 -0
- rowan_mcp/functions/admet.py +94 -0
- rowan_mcp/functions/bde.py +113 -0
- rowan_mcp/functions/calculation_retrieve.py +89 -0
- rowan_mcp/functions/conformers.py +135 -0
- rowan_mcp/functions/descriptors.py +92 -0
- rowan_mcp/functions/docking.py +340 -0
- rowan_mcp/functions/docking_enhanced.py +174 -0
- rowan_mcp/functions/electronic_properties.py +263 -0
- rowan_mcp/functions/folder_management.py +137 -0
- rowan_mcp/functions/fukui.py +355 -0
- rowan_mcp/functions/hydrogen_bond_basicity.py +94 -0
- rowan_mcp/functions/irc.py +125 -0
- rowan_mcp/functions/macropka.py +195 -0
- rowan_mcp/functions/molecular_converter.py +423 -0
- rowan_mcp/functions/molecular_dynamics.py +191 -0
- rowan_mcp/functions/molecule_cache.db +0 -0
- rowan_mcp/functions/molecule_lookup.py +446 -0
- rowan_mcp/functions/multistage_opt.py +171 -0
- rowan_mcp/functions/pdb_handler.py +200 -0
- rowan_mcp/functions/pka.py +137 -0
- rowan_mcp/functions/redox_potential.py +352 -0
- rowan_mcp/functions/scan.py +536 -0
- rowan_mcp/functions/scan_analyzer.py +347 -0
- rowan_mcp/functions/solubility.py +277 -0
- rowan_mcp/functions/spin_states.py +747 -0
- rowan_mcp/functions/system_management.py +368 -0
- rowan_mcp/functions/tautomers.py +91 -0
- rowan_mcp/functions/workflow_management.py +422 -0
- rowan_mcp/server.py +169 -0
- rowan_mcp-0.1.0.dist-info/METADATA +216 -0
- rowan_mcp-0.1.0.dist-info/RECORD +35 -0
- rowan_mcp-0.1.0.dist-info/WHEEL +4 -0
- rowan_mcp-0.1.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Rowan molecular dynamics function for MCP tool integration.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import Any, Dict, List, Optional
|
|
6
|
+
import rowan
|
|
7
|
+
|
|
8
|
+
def rowan_molecular_dynamics(
|
|
9
|
+
name: str,
|
|
10
|
+
molecule: str,
|
|
11
|
+
ensemble: str = "nvt",
|
|
12
|
+
initialization: str = "random",
|
|
13
|
+
timestep: float = 1.0,
|
|
14
|
+
num_steps: int = 500,
|
|
15
|
+
save_interval: int = 10,
|
|
16
|
+
temperature: float = 300.0,
|
|
17
|
+
pressure: Optional[float] = None,
|
|
18
|
+
langevin_thermostat_timescale: float = 100.0,
|
|
19
|
+
berendsen_barostat_timescale: float = 1000.0,
|
|
20
|
+
constraints: Optional[List[Dict[str, Any]]] = None,
|
|
21
|
+
confining_constraint: Optional[Dict[str, Any]] = None,
|
|
22
|
+
# Calculation settings parameters
|
|
23
|
+
method: Optional[str] = None,
|
|
24
|
+
basis_set: Optional[str] = None,
|
|
25
|
+
engine: Optional[str] = None,
|
|
26
|
+
charge: int = 0,
|
|
27
|
+
multiplicity: int = 1,
|
|
28
|
+
# Workflow control parameters
|
|
29
|
+
folder_uuid: Optional[str] = None,
|
|
30
|
+
blocking: bool = True,
|
|
31
|
+
ping_interval: int = 5
|
|
32
|
+
) -> str:
|
|
33
|
+
"""Run molecular dynamics simulations following Rowan's MolecularDynamicsWorkflow.
|
|
34
|
+
|
|
35
|
+
Performs MD simulations to study molecular dynamics, conformational sampling,
|
|
36
|
+
and thermal properties using various thermodynamic ensembles.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
name: Name for the calculation
|
|
40
|
+
molecule: Molecule SMILES string or common name
|
|
41
|
+
ensemble: Thermodynamic ensemble ("nvt", "npt", "nve")
|
|
42
|
+
initialization: Initial velocities ("random", "quasiclassical", "read")
|
|
43
|
+
timestep: Integration timestep in femtoseconds
|
|
44
|
+
num_steps: Number of MD steps to run
|
|
45
|
+
save_interval: Save trajectory every N steps
|
|
46
|
+
temperature: Temperature in Kelvin
|
|
47
|
+
pressure: Pressure in atm (required for NPT)
|
|
48
|
+
langevin_thermostat_timescale: Thermostat coupling timescale in fs
|
|
49
|
+
berendsen_barostat_timescale: Barostat coupling timescale in fs
|
|
50
|
+
constraints: List of pairwise harmonic constraints
|
|
51
|
+
confining_constraint: Spherical harmonic constraint
|
|
52
|
+
method: QM method for force calculation
|
|
53
|
+
basis_set: Basis set for force calculation
|
|
54
|
+
engine: Computational engine for force calculation
|
|
55
|
+
charge: Molecular charge
|
|
56
|
+
multiplicity: Spin multiplicity
|
|
57
|
+
folder_uuid: Optional folder UUID for organization
|
|
58
|
+
blocking: Whether to wait for completion
|
|
59
|
+
ping_interval: Check status interval in seconds
|
|
60
|
+
|
|
61
|
+
Example:
|
|
62
|
+
result = rowan_molecular_dynamics(
|
|
63
|
+
name="ethanol_md_simulation",
|
|
64
|
+
molecule="ethanol",
|
|
65
|
+
ensemble="NVT",
|
|
66
|
+
temperature=298,
|
|
67
|
+
num_steps=1000,
|
|
68
|
+
blocking=False
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
Molecular dynamics workflow result
|
|
73
|
+
"""
|
|
74
|
+
# Parameter validation
|
|
75
|
+
valid_ensembles = ["nvt", "npt", "nve"]
|
|
76
|
+
valid_initializations = ["random", "quasiclassical", "read"]
|
|
77
|
+
|
|
78
|
+
# Validate ensemble
|
|
79
|
+
ensemble_lower = ensemble.lower()
|
|
80
|
+
if ensemble_lower not in valid_ensembles:
|
|
81
|
+
return f" Error: Invalid ensemble '{ensemble}'. Valid options: {', '.join(valid_ensembles)}"
|
|
82
|
+
|
|
83
|
+
# Validate initialization
|
|
84
|
+
initialization_lower = initialization.lower()
|
|
85
|
+
if initialization_lower not in valid_initializations:
|
|
86
|
+
return f" Error: Invalid initialization '{initialization}'. Valid options: {', '.join(valid_initializations)}"
|
|
87
|
+
|
|
88
|
+
# Validate numeric parameters
|
|
89
|
+
if timestep <= 0:
|
|
90
|
+
return f" Error: timestep must be positive (got {timestep})"
|
|
91
|
+
if num_steps <= 0:
|
|
92
|
+
return f" Error: num_steps must be positive (got {num_steps})"
|
|
93
|
+
if save_interval <= 0:
|
|
94
|
+
return f" Error: save_interval must be positive (got {save_interval})"
|
|
95
|
+
if temperature <= 0:
|
|
96
|
+
return f" Error: temperature must be positive (got {temperature})"
|
|
97
|
+
|
|
98
|
+
# Validate NPT ensemble requirements
|
|
99
|
+
if ensemble_lower == "npt" and pressure is None:
|
|
100
|
+
return f" Error: NPT ensemble requires pressure to be specified"
|
|
101
|
+
if pressure is not None and pressure <= 0:
|
|
102
|
+
return f" Error: pressure must be positive (got {pressure})"
|
|
103
|
+
|
|
104
|
+
# Convert molecule name to SMILES using lookup system
|
|
105
|
+
try:
|
|
106
|
+
from .molecule_lookup import get_lookup_instance
|
|
107
|
+
lookup = get_lookup_instance()
|
|
108
|
+
smiles, source, metadata = lookup.get_smiles(molecule)
|
|
109
|
+
if smiles:
|
|
110
|
+
resolved_smiles = smiles
|
|
111
|
+
else:
|
|
112
|
+
resolved_smiles = molecule # Fallback to original
|
|
113
|
+
except Exception:
|
|
114
|
+
resolved_smiles = molecule # Fallback if lookup fails
|
|
115
|
+
|
|
116
|
+
# Apply smart defaults for MD calculations
|
|
117
|
+
if engine is None:
|
|
118
|
+
engine = "xtb" # Default to xTB for fast MD forces
|
|
119
|
+
if method is None and engine.lower() == "xtb":
|
|
120
|
+
method = "gfn2-xtb" # Default xTB method
|
|
121
|
+
elif method is None and engine.lower() != "xtb":
|
|
122
|
+
method = "b3lyp" # Default DFT method for other engines
|
|
123
|
+
if basis_set is None and engine.lower() != "xtb":
|
|
124
|
+
basis_set = "def2-svp" # Default basis set for non-xTB engines
|
|
125
|
+
|
|
126
|
+
# Build MD settings
|
|
127
|
+
md_settings = {
|
|
128
|
+
"ensemble": ensemble_lower,
|
|
129
|
+
"initialization": initialization_lower,
|
|
130
|
+
"timestep": timestep,
|
|
131
|
+
"num_steps": num_steps,
|
|
132
|
+
"save_interval": save_interval,
|
|
133
|
+
"temperature": temperature,
|
|
134
|
+
"langevin_thermostat_timescale": langevin_thermostat_timescale,
|
|
135
|
+
"berendsen_barostat_timescale": berendsen_barostat_timescale,
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
# Add optional fields if provided
|
|
139
|
+
if pressure is not None:
|
|
140
|
+
md_settings["pressure"] = pressure
|
|
141
|
+
|
|
142
|
+
if constraints:
|
|
143
|
+
md_settings["constraints"] = constraints
|
|
144
|
+
|
|
145
|
+
if confining_constraint:
|
|
146
|
+
md_settings["confining_constraint"] = confining_constraint
|
|
147
|
+
|
|
148
|
+
# Build calc_settings
|
|
149
|
+
calc_settings = {
|
|
150
|
+
"charge": charge,
|
|
151
|
+
"multiplicity": multiplicity,
|
|
152
|
+
"engine": engine.lower()
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
# Add method if specified
|
|
156
|
+
if method:
|
|
157
|
+
calc_settings["method"] = method.lower()
|
|
158
|
+
|
|
159
|
+
# Add basis_set if specified (not needed for xTB)
|
|
160
|
+
if basis_set and engine.lower() != "xtb":
|
|
161
|
+
calc_settings["basis_set"] = basis_set.lower()
|
|
162
|
+
|
|
163
|
+
# Build parameters for Rowan API
|
|
164
|
+
workflow_params = {
|
|
165
|
+
"name": name,
|
|
166
|
+
"molecule": resolved_smiles,
|
|
167
|
+
"workflow_type": "molecular_dynamics",
|
|
168
|
+
"settings": md_settings,
|
|
169
|
+
"calc_settings": calc_settings,
|
|
170
|
+
"folder_uuid": folder_uuid,
|
|
171
|
+
"blocking": blocking,
|
|
172
|
+
"ping_interval": ping_interval
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
# Add calc_engine at top level
|
|
176
|
+
if engine:
|
|
177
|
+
workflow_params["calc_engine"] = engine.lower()
|
|
178
|
+
|
|
179
|
+
try:
|
|
180
|
+
# Submit molecular dynamics calculation to Rowan
|
|
181
|
+
result = rowan.compute(**workflow_params)
|
|
182
|
+
return str(result)
|
|
183
|
+
except Exception as e:
|
|
184
|
+
error_response = {
|
|
185
|
+
"success": False,
|
|
186
|
+
"error": f"Molecular dynamics calculation failed: {str(e)}",
|
|
187
|
+
"name": name,
|
|
188
|
+
"molecule": molecule,
|
|
189
|
+
"resolved_smiles": resolved_smiles
|
|
190
|
+
}
|
|
191
|
+
return str(error_response)
|
|
Binary file
|
|
@@ -0,0 +1,446 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Advanced molecule lookup using PubChemPy + SQLite Cache + RDKit validation.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import sqlite3
|
|
6
|
+
import logging
|
|
7
|
+
from datetime import datetime, timedelta
|
|
8
|
+
from typing import Optional, Tuple
|
|
9
|
+
import os
|
|
10
|
+
|
|
11
|
+
# Set up logging
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
# Import dependencies with fallbacks
|
|
15
|
+
try:
|
|
16
|
+
import pubchempy as pcp
|
|
17
|
+
PUBCHEMPY_AVAILABLE = True
|
|
18
|
+
except ImportError:
|
|
19
|
+
logger.warning("pubchempy not available - install with: pip install pubchempy")
|
|
20
|
+
PUBCHEMPY_AVAILABLE = False
|
|
21
|
+
|
|
22
|
+
try:
|
|
23
|
+
from rdkit import Chem
|
|
24
|
+
from rdkit.Chem import Descriptors
|
|
25
|
+
RDKIT_AVAILABLE = True
|
|
26
|
+
except ImportError:
|
|
27
|
+
logger.warning("rdkit not available - install with: pip install rdkit")
|
|
28
|
+
RDKIT_AVAILABLE = False
|
|
29
|
+
|
|
30
|
+
class MoleculeLookup:
|
|
31
|
+
"""Molecule lookup with PubChem API, SQLite caching, and RDKit validation."""
|
|
32
|
+
|
|
33
|
+
def __init__(self, cache_db: str = 'molecule_cache.db', cache_expiry_days: int = 30):
|
|
34
|
+
"""Initialize the molecule lookup system."""
|
|
35
|
+
self.cache_expiry_days = cache_expiry_days
|
|
36
|
+
|
|
37
|
+
# Create cache database
|
|
38
|
+
cache_path = os.path.join(os.path.dirname(__file__), cache_db)
|
|
39
|
+
self.conn = sqlite3.connect(cache_path, check_same_thread=False)
|
|
40
|
+
|
|
41
|
+
# Create tables if they don't exist
|
|
42
|
+
self.conn.execute('''
|
|
43
|
+
CREATE TABLE IF NOT EXISTS molecules (
|
|
44
|
+
identifier TEXT PRIMARY KEY,
|
|
45
|
+
smiles TEXT,
|
|
46
|
+
canonical_smiles TEXT,
|
|
47
|
+
name TEXT,
|
|
48
|
+
iupac_name TEXT,
|
|
49
|
+
formula TEXT,
|
|
50
|
+
molecular_weight REAL,
|
|
51
|
+
cid INTEGER,
|
|
52
|
+
retrieved_at TIMESTAMP,
|
|
53
|
+
source TEXT
|
|
54
|
+
)
|
|
55
|
+
''')
|
|
56
|
+
|
|
57
|
+
self.conn.execute('''
|
|
58
|
+
CREATE TABLE IF NOT EXISTS lookup_stats (
|
|
59
|
+
date TEXT PRIMARY KEY,
|
|
60
|
+
cache_hits INTEGER DEFAULT 0,
|
|
61
|
+
api_calls INTEGER DEFAULT 0,
|
|
62
|
+
failed_lookups INTEGER DEFAULT 0
|
|
63
|
+
)
|
|
64
|
+
''')
|
|
65
|
+
|
|
66
|
+
self.conn.commit()
|
|
67
|
+
logger.info("Molecule lookup cache initialized")
|
|
68
|
+
|
|
69
|
+
def validate_smiles(self, smiles: str) -> Optional[str]:
|
|
70
|
+
"""Validate and canonicalize SMILES using RDKit."""
|
|
71
|
+
if not RDKIT_AVAILABLE:
|
|
72
|
+
logger.warning("RDKit not available - returning SMILES as-is")
|
|
73
|
+
return smiles
|
|
74
|
+
|
|
75
|
+
try:
|
|
76
|
+
mol = Chem.MolFromSmiles(smiles)
|
|
77
|
+
if mol is not None:
|
|
78
|
+
canonical = Chem.MolToSmiles(mol, canonical=True)
|
|
79
|
+
logger.debug(f"SMILES validated: {smiles} -> {canonical}")
|
|
80
|
+
return canonical
|
|
81
|
+
except Exception as e:
|
|
82
|
+
logger.warning(f"SMILES validation failed for {smiles}: {e}")
|
|
83
|
+
|
|
84
|
+
return None
|
|
85
|
+
|
|
86
|
+
def get_molecular_properties(self, smiles: str) -> dict:
|
|
87
|
+
"""Calculate molecular properties using RDKit."""
|
|
88
|
+
if not RDKIT_AVAILABLE:
|
|
89
|
+
return {}
|
|
90
|
+
|
|
91
|
+
try:
|
|
92
|
+
mol = Chem.MolFromSmiles(smiles)
|
|
93
|
+
if mol is not None:
|
|
94
|
+
return {
|
|
95
|
+
'molecular_weight': round(Descriptors.MolWt(mol), 2),
|
|
96
|
+
'logp': round(Descriptors.MolLogP(mol), 2),
|
|
97
|
+
'hbd': Descriptors.NumHDonors(mol),
|
|
98
|
+
'hba': Descriptors.NumHAcceptors(mol),
|
|
99
|
+
'rotatable_bonds': Descriptors.NumRotatableBonds(mol),
|
|
100
|
+
'aromatic_rings': Descriptors.NumAromaticRings(mol)
|
|
101
|
+
}
|
|
102
|
+
except Exception as e:
|
|
103
|
+
logger.warning(f"Property calculation failed for {smiles}: {e}")
|
|
104
|
+
|
|
105
|
+
return {}
|
|
106
|
+
|
|
107
|
+
def _is_cache_valid(self, retrieved_at: str) -> bool:
|
|
108
|
+
"""Check if cache entry is still valid."""
|
|
109
|
+
try:
|
|
110
|
+
cache_time = datetime.fromisoformat(retrieved_at)
|
|
111
|
+
expiry_time = datetime.now() - timedelta(days=self.cache_expiry_days)
|
|
112
|
+
return cache_time > expiry_time
|
|
113
|
+
except:
|
|
114
|
+
return False
|
|
115
|
+
|
|
116
|
+
def _update_stats(self, stat_type: str):
|
|
117
|
+
"""Update lookup statistics."""
|
|
118
|
+
today = datetime.now().date().isoformat()
|
|
119
|
+
|
|
120
|
+
# Insert or update today's stats
|
|
121
|
+
self.conn.execute(f'''
|
|
122
|
+
INSERT OR IGNORE INTO lookup_stats (date, {stat_type}) VALUES (?, 1)
|
|
123
|
+
''', (today,))
|
|
124
|
+
|
|
125
|
+
self.conn.execute(f'''
|
|
126
|
+
UPDATE lookup_stats SET {stat_type} = {stat_type} + 1 WHERE date = ?
|
|
127
|
+
''', (today,))
|
|
128
|
+
|
|
129
|
+
self.conn.commit()
|
|
130
|
+
|
|
131
|
+
def get_smiles(self, identifier: str) -> Tuple[Optional[str], str, dict]:
|
|
132
|
+
"""Get canonical SMILES for a molecule identifier."""
|
|
133
|
+
identifier = identifier.strip()
|
|
134
|
+
identifier_lower = identifier.lower()
|
|
135
|
+
|
|
136
|
+
# 1. Check cache first
|
|
137
|
+
cursor = self.conn.execute('''
|
|
138
|
+
SELECT smiles, canonical_smiles, name, iupac_name, formula,
|
|
139
|
+
molecular_weight, cid, retrieved_at, source
|
|
140
|
+
FROM molecules WHERE identifier = ?
|
|
141
|
+
''', (identifier_lower,))
|
|
142
|
+
|
|
143
|
+
result = cursor.fetchone()
|
|
144
|
+
if result:
|
|
145
|
+
retrieved_at = result[7]
|
|
146
|
+
if self._is_cache_valid(retrieved_at):
|
|
147
|
+
self._update_stats('cache_hits')
|
|
148
|
+
logger.info(f"Cache hit for: {identifier}")
|
|
149
|
+
|
|
150
|
+
metadata = {
|
|
151
|
+
'name': result[2],
|
|
152
|
+
'iupac_name': result[3],
|
|
153
|
+
'formula': result[4],
|
|
154
|
+
'molecular_weight': result[5],
|
|
155
|
+
'cid': result[6],
|
|
156
|
+
'source': result[8],
|
|
157
|
+
'cached': True
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
return result[1], result[8], metadata # Return canonical_smiles
|
|
161
|
+
|
|
162
|
+
# 2. Check if input is already a valid SMILES
|
|
163
|
+
validated_smiles = self.validate_smiles(identifier)
|
|
164
|
+
if validated_smiles and validated_smiles != identifier:
|
|
165
|
+
logger.info(f"Input was valid SMILES, canonicalized: {identifier} -> {validated_smiles}")
|
|
166
|
+
|
|
167
|
+
# Cache the result
|
|
168
|
+
metadata = {'source': 'input_smiles', 'cached': False}
|
|
169
|
+
properties = self.get_molecular_properties(validated_smiles)
|
|
170
|
+
metadata.update(properties)
|
|
171
|
+
|
|
172
|
+
self._cache_result(identifier_lower, identifier, validated_smiles,
|
|
173
|
+
"User Input SMILES", "", "",
|
|
174
|
+
properties.get('molecular_weight'), None, 'input_smiles')
|
|
175
|
+
|
|
176
|
+
return validated_smiles, 'input_smiles', metadata
|
|
177
|
+
|
|
178
|
+
# 3. Fetch from PubChem using PubChemPy
|
|
179
|
+
if not PUBCHEMPY_AVAILABLE:
|
|
180
|
+
logger.error("PubChemPy not available for API lookup")
|
|
181
|
+
self._update_stats('failed_lookups')
|
|
182
|
+
return None, 'error', {'error': 'PubChemPy not available'}
|
|
183
|
+
|
|
184
|
+
try:
|
|
185
|
+
self._update_stats('api_calls')
|
|
186
|
+
logger.info(f"PubChem API lookup for: {identifier}")
|
|
187
|
+
|
|
188
|
+
# Try name lookup first
|
|
189
|
+
compounds = pcp.get_compounds(identifier, 'name')
|
|
190
|
+
|
|
191
|
+
# If name lookup fails, try as SMILES/InChI
|
|
192
|
+
if not compounds:
|
|
193
|
+
compounds = pcp.get_compounds(identifier, 'smiles')
|
|
194
|
+
|
|
195
|
+
if compounds:
|
|
196
|
+
compound = compounds[0]
|
|
197
|
+
|
|
198
|
+
# Validate the SMILES from PubChem
|
|
199
|
+
pubchem_smiles = compound.canonical_smiles
|
|
200
|
+
validated_smiles = self.validate_smiles(pubchem_smiles)
|
|
201
|
+
|
|
202
|
+
if validated_smiles:
|
|
203
|
+
# Get additional properties
|
|
204
|
+
properties = self.get_molecular_properties(validated_smiles)
|
|
205
|
+
|
|
206
|
+
# Cache the successful result
|
|
207
|
+
self._cache_result(
|
|
208
|
+
identifier_lower,
|
|
209
|
+
pubchem_smiles,
|
|
210
|
+
validated_smiles,
|
|
211
|
+
getattr(compound, 'iupac_name', '') or identifier,
|
|
212
|
+
getattr(compound, 'iupac_name', ''),
|
|
213
|
+
getattr(compound, 'molecular_formula', ''),
|
|
214
|
+
properties.get('molecular_weight') or getattr(compound, 'molecular_weight', None),
|
|
215
|
+
getattr(compound, 'cid', None),
|
|
216
|
+
'pubchem'
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
metadata = {
|
|
220
|
+
'name': identifier,
|
|
221
|
+
'iupac_name': getattr(compound, 'iupac_name', ''),
|
|
222
|
+
'formula': getattr(compound, 'molecular_formula', ''),
|
|
223
|
+
'molecular_weight': properties.get('molecular_weight') or getattr(compound, 'molecular_weight', None),
|
|
224
|
+
'cid': getattr(compound, 'cid', None),
|
|
225
|
+
'source': 'pubchem',
|
|
226
|
+
'cached': False
|
|
227
|
+
}
|
|
228
|
+
metadata.update(properties)
|
|
229
|
+
|
|
230
|
+
logger.info(f"PubChem lookup successful: {identifier} -> {validated_smiles}")
|
|
231
|
+
return validated_smiles, 'pubchem', metadata
|
|
232
|
+
|
|
233
|
+
except Exception as e:
|
|
234
|
+
logger.error(f"PubChem lookup failed for {identifier}: {e}")
|
|
235
|
+
self._update_stats('failed_lookups')
|
|
236
|
+
return None, 'error', {'error': str(e)}
|
|
237
|
+
|
|
238
|
+
# 4. No results found
|
|
239
|
+
logger.warning(f"No results found for: {identifier}")
|
|
240
|
+
self._update_stats('failed_lookups')
|
|
241
|
+
return None, 'not_found', {'error': 'No results found'}
|
|
242
|
+
|
|
243
|
+
def _cache_result(self, identifier: str, original_smiles: str, canonical_smiles: str,
|
|
244
|
+
name: str, iupac_name: str, formula: str,
|
|
245
|
+
molecular_weight: Optional[float], cid: Optional[int], source: str):
|
|
246
|
+
"""Cache a successful lookup result."""
|
|
247
|
+
try:
|
|
248
|
+
self.conn.execute('''
|
|
249
|
+
INSERT OR REPLACE INTO molecules
|
|
250
|
+
(identifier, smiles, canonical_smiles, name, iupac_name, formula,
|
|
251
|
+
molecular_weight, cid, retrieved_at, source)
|
|
252
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
253
|
+
''', (identifier, original_smiles, canonical_smiles, name, iupac_name,
|
|
254
|
+
formula, molecular_weight, cid, datetime.now().isoformat(), source))
|
|
255
|
+
|
|
256
|
+
self.conn.commit()
|
|
257
|
+
logger.debug(f"Cached result for: {identifier}")
|
|
258
|
+
except Exception as e:
|
|
259
|
+
logger.error(f"Failed to cache result: {e}")
|
|
260
|
+
|
|
261
|
+
def get_cache_stats(self) -> dict:
|
|
262
|
+
"""Get cache usage statistics."""
|
|
263
|
+
cursor = self.conn.execute('''
|
|
264
|
+
SELECT COUNT(*) as total_entries,
|
|
265
|
+
COUNT(CASE WHEN source = 'pubchem' THEN 1 END) as pubchem_entries,
|
|
266
|
+
COUNT(CASE WHEN source = 'input_smiles' THEN 1 END) as smiles_entries
|
|
267
|
+
FROM molecules
|
|
268
|
+
''')
|
|
269
|
+
|
|
270
|
+
cache_stats = cursor.fetchone()
|
|
271
|
+
|
|
272
|
+
cursor = self.conn.execute('''
|
|
273
|
+
SELECT SUM(cache_hits) as total_hits,
|
|
274
|
+
SUM(api_calls) as total_calls,
|
|
275
|
+
SUM(failed_lookups) as total_failures
|
|
276
|
+
FROM lookup_stats
|
|
277
|
+
''')
|
|
278
|
+
|
|
279
|
+
usage_stats = cursor.fetchone()
|
|
280
|
+
|
|
281
|
+
return {
|
|
282
|
+
'total_cached_molecules': cache_stats[0] or 0,
|
|
283
|
+
'pubchem_entries': cache_stats[1] or 0,
|
|
284
|
+
'smiles_entries': cache_stats[2] or 0,
|
|
285
|
+
'total_cache_hits': usage_stats[0] or 0,
|
|
286
|
+
'total_api_calls': usage_stats[1] or 0,
|
|
287
|
+
'total_failures': usage_stats[2] or 0
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
# Global instance
|
|
291
|
+
_lookup_instance = None
|
|
292
|
+
|
|
293
|
+
def get_lookup_instance():
|
|
294
|
+
"""Get or create the global MoleculeLookup instance."""
|
|
295
|
+
global _lookup_instance
|
|
296
|
+
if _lookup_instance is None:
|
|
297
|
+
_lookup_instance = MoleculeLookup()
|
|
298
|
+
return _lookup_instance
|
|
299
|
+
|
|
300
|
+
def rowan_molecule_lookup(molecule_name: str, show_properties: bool = False) -> str:
|
|
301
|
+
"""Advanced molecule lookup with PubChem API, SQLite caching, and RDKit validation.
|
|
302
|
+
|
|
303
|
+
Features:
|
|
304
|
+
- PubChemPy integration for reliable API access
|
|
305
|
+
- SQLite caching for faster repeated lookups
|
|
306
|
+
- RDKit validation and canonicalization
|
|
307
|
+
- Comprehensive molecular properties
|
|
308
|
+
- Usage statistics and cache management
|
|
309
|
+
|
|
310
|
+
Args:
|
|
311
|
+
molecule_name: Name of the molecule (e.g., "aspirin", "taxol", "remdesivir")
|
|
312
|
+
show_properties: Include molecular properties in output
|
|
313
|
+
|
|
314
|
+
Returns:
|
|
315
|
+
Comprehensive molecule information with canonical SMILES
|
|
316
|
+
"""
|
|
317
|
+
|
|
318
|
+
if not molecule_name.strip():
|
|
319
|
+
lookup = get_lookup_instance()
|
|
320
|
+
stats = lookup.get_cache_stats()
|
|
321
|
+
|
|
322
|
+
formatted = "**Advanced Molecule SMILES Lookup**\n\n"
|
|
323
|
+
formatted += "**Features:**\n"
|
|
324
|
+
formatted += "• PubChemPy integration - Official PubChem API access\n"
|
|
325
|
+
formatted += "• SQLite caching - Faster repeated lookups\n"
|
|
326
|
+
formatted += "• RDKit validation - Canonical SMILES standardization\n"
|
|
327
|
+
formatted += "• Molecular properties - MW, LogP, H-bond donors/acceptors\n\n"
|
|
328
|
+
|
|
329
|
+
formatted += "**Usage Examples:**\n"
|
|
330
|
+
formatted += "• rowan_molecule_lookup('aspirin') - Look up pharmaceuticals\n"
|
|
331
|
+
formatted += "• rowan_molecule_lookup('taxol') - Complex natural products\n"
|
|
332
|
+
formatted += "• rowan_molecule_lookup('remdesivir') - Modern drugs\n"
|
|
333
|
+
formatted += "• rowan_molecule_lookup('SMILES_STRING') - Validate existing SMILES\n\n"
|
|
334
|
+
|
|
335
|
+
formatted += "**Cache Statistics:**\n"
|
|
336
|
+
formatted += f"• Cached molecules: {stats['total_cached_molecules']}\n"
|
|
337
|
+
formatted += f"• Cache hits: {stats['total_cache_hits']}\n"
|
|
338
|
+
formatted += f"• API calls made: {stats['total_api_calls']}\n"
|
|
339
|
+
formatted += f"• Failed lookups: {stats['total_failures']}\n\n"
|
|
340
|
+
|
|
341
|
+
formatted += "**Dependencies Status:**\n"
|
|
342
|
+
formatted += f"• PubChemPy: {'✓ Available' if PUBCHEMPY_AVAILABLE else '✗ Missing (pip install pubchempy)'}\n"
|
|
343
|
+
formatted += f"• RDKit: {'✓ Available' if RDKIT_AVAILABLE else '✗ Missing (pip install rdkit)'}\n"
|
|
344
|
+
|
|
345
|
+
return formatted
|
|
346
|
+
|
|
347
|
+
lookup = get_lookup_instance()
|
|
348
|
+
smiles, source, metadata = lookup.get_smiles(molecule_name)
|
|
349
|
+
|
|
350
|
+
if source == 'error':
|
|
351
|
+
formatted = f"**Lookup Error for '{molecule_name}'**\n\n"
|
|
352
|
+
formatted += f"**Error:** {metadata.get('error', 'Unknown error')}\n\n"
|
|
353
|
+
formatted += "**Troubleshooting:**\n"
|
|
354
|
+
formatted += "• Check internet connection for PubChem access\n"
|
|
355
|
+
formatted += "• Verify molecule name spelling\n"
|
|
356
|
+
formatted += "• Try alternative names or systematic names\n"
|
|
357
|
+
return formatted
|
|
358
|
+
|
|
359
|
+
elif source == 'not_found':
|
|
360
|
+
formatted = f"**No results found for '{molecule_name}'**\n\n"
|
|
361
|
+
formatted += "**Searched in:**\n"
|
|
362
|
+
formatted += "• PubChem database (via PubChemPy)\n"
|
|
363
|
+
formatted += "• Local SQLite cache\n\n"
|
|
364
|
+
formatted += "**Suggestions:**\n"
|
|
365
|
+
formatted += "• Check spelling of molecule name\n"
|
|
366
|
+
formatted += "• Try alternative names (e.g., 'acetaminophen' vs 'paracetamol')\n"
|
|
367
|
+
formatted += "• Try systematic IUPAC name\n"
|
|
368
|
+
formatted += "• Try CAS registry number\n"
|
|
369
|
+
formatted += "• If you have a SMILES string, it will be validated automatically\n"
|
|
370
|
+
return formatted
|
|
371
|
+
|
|
372
|
+
else:
|
|
373
|
+
source_names = {
|
|
374
|
+
'pubchem': 'PubChem Database (via PubChemPy)',
|
|
375
|
+
'input_smiles': 'Input SMILES Validation (RDKit)',
|
|
376
|
+
'cache': 'Local Cache'
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
formatted = f"**SMILES lookup successful!** {'(Cached)' if metadata.get('cached') else ''}\n\n"
|
|
380
|
+
formatted += f"**Molecule:** {molecule_name}\n"
|
|
381
|
+
formatted += f"**Canonical SMILES:** {smiles}\n"
|
|
382
|
+
formatted += f"**Source:** {source_names.get(source, source)}\n\n"
|
|
383
|
+
|
|
384
|
+
# Add molecular information if available
|
|
385
|
+
if metadata.get('name') and metadata['name'] != molecule_name:
|
|
386
|
+
formatted += f"**Common Name:** {metadata['name']}\n"
|
|
387
|
+
|
|
388
|
+
if metadata.get('iupac_name'):
|
|
389
|
+
formatted += f"**IUPAC Name:** {metadata['iupac_name']}\n"
|
|
390
|
+
|
|
391
|
+
if metadata.get('formula'):
|
|
392
|
+
formatted += f"**Formula:** {metadata['formula']}\n"
|
|
393
|
+
|
|
394
|
+
if metadata.get('cid'):
|
|
395
|
+
formatted += f"**PubChem CID:** {metadata['cid']}\n"
|
|
396
|
+
|
|
397
|
+
# Add molecular properties if requested or available
|
|
398
|
+
if show_properties or any(key in metadata for key in ['molecular_weight', 'logp', 'hbd', 'hba']):
|
|
399
|
+
formatted += "\n**Molecular Properties:**\n"
|
|
400
|
+
|
|
401
|
+
if metadata.get('molecular_weight'):
|
|
402
|
+
formatted += f"• Molecular Weight: {metadata['molecular_weight']:.2f} g/mol\n"
|
|
403
|
+
|
|
404
|
+
if metadata.get('logp') is not None:
|
|
405
|
+
formatted += f"• LogP: {metadata['logp']:.2f}\n"
|
|
406
|
+
|
|
407
|
+
if metadata.get('hbd') is not None:
|
|
408
|
+
formatted += f"• H-bond Donors: {metadata['hbd']}\n"
|
|
409
|
+
|
|
410
|
+
if metadata.get('hba') is not None:
|
|
411
|
+
formatted += f"• H-bond Acceptors: {metadata['hba']}\n"
|
|
412
|
+
|
|
413
|
+
if metadata.get('rotatable_bonds') is not None:
|
|
414
|
+
formatted += f"• Rotatable Bonds: {metadata['rotatable_bonds']}\n"
|
|
415
|
+
|
|
416
|
+
if metadata.get('aromatic_rings') is not None:
|
|
417
|
+
formatted += f"• Aromatic Rings: {metadata['aromatic_rings']}\n"
|
|
418
|
+
|
|
419
|
+
formatted += f"\n**Usage:** Use '{smiles}' in Rowan calculations for consistent results\n"
|
|
420
|
+
|
|
421
|
+
return formatted
|
|
422
|
+
|
|
423
|
+
def test_rowan_molecule_lookup():
|
|
424
|
+
"""Test the advanced molecule lookup function."""
|
|
425
|
+
try:
|
|
426
|
+
print("Testing advanced molecule lookup...")
|
|
427
|
+
|
|
428
|
+
# Test common molecule
|
|
429
|
+
print("1. Testing phenol...")
|
|
430
|
+
result1 = rowan_molecule_lookup("phenol")
|
|
431
|
+
print("✓ Phenol lookup successful")
|
|
432
|
+
|
|
433
|
+
# Test cache stats
|
|
434
|
+
print("2. Testing cache statistics...")
|
|
435
|
+
result2 = rowan_molecule_lookup("")
|
|
436
|
+
print("✓ Cache statistics successful")
|
|
437
|
+
|
|
438
|
+
print("Advanced molecule lookup test successful!")
|
|
439
|
+
return True
|
|
440
|
+
except Exception as e:
|
|
441
|
+
print(f"Advanced molecule lookup test failed: {e}")
|
|
442
|
+
return False
|
|
443
|
+
|
|
444
|
+
if __name__ == "__main__":
|
|
445
|
+
test_rowan_molecule_lookup()
|
|
446
|
+
|