rowan-mcp 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rowan-mcp might be problematic. Click here for more details.

Files changed (35) hide show
  1. rowan_mcp/__init__.py +14 -0
  2. rowan_mcp/__main__.py +14 -0
  3. rowan_mcp/functions/admet.py +94 -0
  4. rowan_mcp/functions/bde.py +113 -0
  5. rowan_mcp/functions/calculation_retrieve.py +89 -0
  6. rowan_mcp/functions/conformers.py +135 -0
  7. rowan_mcp/functions/descriptors.py +92 -0
  8. rowan_mcp/functions/docking.py +340 -0
  9. rowan_mcp/functions/docking_enhanced.py +174 -0
  10. rowan_mcp/functions/electronic_properties.py +263 -0
  11. rowan_mcp/functions/folder_management.py +137 -0
  12. rowan_mcp/functions/fukui.py +355 -0
  13. rowan_mcp/functions/hydrogen_bond_basicity.py +94 -0
  14. rowan_mcp/functions/irc.py +125 -0
  15. rowan_mcp/functions/macropka.py +195 -0
  16. rowan_mcp/functions/molecular_converter.py +423 -0
  17. rowan_mcp/functions/molecular_dynamics.py +191 -0
  18. rowan_mcp/functions/molecule_cache.db +0 -0
  19. rowan_mcp/functions/molecule_lookup.py +446 -0
  20. rowan_mcp/functions/multistage_opt.py +171 -0
  21. rowan_mcp/functions/pdb_handler.py +200 -0
  22. rowan_mcp/functions/pka.py +137 -0
  23. rowan_mcp/functions/redox_potential.py +352 -0
  24. rowan_mcp/functions/scan.py +536 -0
  25. rowan_mcp/functions/scan_analyzer.py +347 -0
  26. rowan_mcp/functions/solubility.py +277 -0
  27. rowan_mcp/functions/spin_states.py +747 -0
  28. rowan_mcp/functions/system_management.py +368 -0
  29. rowan_mcp/functions/tautomers.py +91 -0
  30. rowan_mcp/functions/workflow_management.py +422 -0
  31. rowan_mcp/server.py +169 -0
  32. rowan_mcp-0.1.0.dist-info/METADATA +216 -0
  33. rowan_mcp-0.1.0.dist-info/RECORD +35 -0
  34. rowan_mcp-0.1.0.dist-info/WHEEL +4 -0
  35. rowan_mcp-0.1.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,191 @@
1
+ """
2
+ Rowan molecular dynamics function for MCP tool integration.
3
+ """
4
+
5
+ from typing import Any, Dict, List, Optional
6
+ import rowan
7
+
8
+ def rowan_molecular_dynamics(
9
+ name: str,
10
+ molecule: str,
11
+ ensemble: str = "nvt",
12
+ initialization: str = "random",
13
+ timestep: float = 1.0,
14
+ num_steps: int = 500,
15
+ save_interval: int = 10,
16
+ temperature: float = 300.0,
17
+ pressure: Optional[float] = None,
18
+ langevin_thermostat_timescale: float = 100.0,
19
+ berendsen_barostat_timescale: float = 1000.0,
20
+ constraints: Optional[List[Dict[str, Any]]] = None,
21
+ confining_constraint: Optional[Dict[str, Any]] = None,
22
+ # Calculation settings parameters
23
+ method: Optional[str] = None,
24
+ basis_set: Optional[str] = None,
25
+ engine: Optional[str] = None,
26
+ charge: int = 0,
27
+ multiplicity: int = 1,
28
+ # Workflow control parameters
29
+ folder_uuid: Optional[str] = None,
30
+ blocking: bool = True,
31
+ ping_interval: int = 5
32
+ ) -> str:
33
+ """Run molecular dynamics simulations following Rowan's MolecularDynamicsWorkflow.
34
+
35
+ Performs MD simulations to study molecular dynamics, conformational sampling,
36
+ and thermal properties using various thermodynamic ensembles.
37
+
38
+ Args:
39
+ name: Name for the calculation
40
+ molecule: Molecule SMILES string or common name
41
+ ensemble: Thermodynamic ensemble ("nvt", "npt", "nve")
42
+ initialization: Initial velocities ("random", "quasiclassical", "read")
43
+ timestep: Integration timestep in femtoseconds
44
+ num_steps: Number of MD steps to run
45
+ save_interval: Save trajectory every N steps
46
+ temperature: Temperature in Kelvin
47
+ pressure: Pressure in atm (required for NPT)
48
+ langevin_thermostat_timescale: Thermostat coupling timescale in fs
49
+ berendsen_barostat_timescale: Barostat coupling timescale in fs
50
+ constraints: List of pairwise harmonic constraints
51
+ confining_constraint: Spherical harmonic constraint
52
+ method: QM method for force calculation
53
+ basis_set: Basis set for force calculation
54
+ engine: Computational engine for force calculation
55
+ charge: Molecular charge
56
+ multiplicity: Spin multiplicity
57
+ folder_uuid: Optional folder UUID for organization
58
+ blocking: Whether to wait for completion
59
+ ping_interval: Check status interval in seconds
60
+
61
+ Example:
62
+ result = rowan_molecular_dynamics(
63
+ name="ethanol_md_simulation",
64
+ molecule="ethanol",
65
+ ensemble="NVT",
66
+ temperature=298,
67
+ num_steps=1000,
68
+ blocking=False
69
+ )
70
+
71
+ Returns:
72
+ Molecular dynamics workflow result
73
+ """
74
+ # Parameter validation
75
+ valid_ensembles = ["nvt", "npt", "nve"]
76
+ valid_initializations = ["random", "quasiclassical", "read"]
77
+
78
+ # Validate ensemble
79
+ ensemble_lower = ensemble.lower()
80
+ if ensemble_lower not in valid_ensembles:
81
+ return f" Error: Invalid ensemble '{ensemble}'. Valid options: {', '.join(valid_ensembles)}"
82
+
83
+ # Validate initialization
84
+ initialization_lower = initialization.lower()
85
+ if initialization_lower not in valid_initializations:
86
+ return f" Error: Invalid initialization '{initialization}'. Valid options: {', '.join(valid_initializations)}"
87
+
88
+ # Validate numeric parameters
89
+ if timestep <= 0:
90
+ return f" Error: timestep must be positive (got {timestep})"
91
+ if num_steps <= 0:
92
+ return f" Error: num_steps must be positive (got {num_steps})"
93
+ if save_interval <= 0:
94
+ return f" Error: save_interval must be positive (got {save_interval})"
95
+ if temperature <= 0:
96
+ return f" Error: temperature must be positive (got {temperature})"
97
+
98
+ # Validate NPT ensemble requirements
99
+ if ensemble_lower == "npt" and pressure is None:
100
+ return f" Error: NPT ensemble requires pressure to be specified"
101
+ if pressure is not None and pressure <= 0:
102
+ return f" Error: pressure must be positive (got {pressure})"
103
+
104
+ # Convert molecule name to SMILES using lookup system
105
+ try:
106
+ from .molecule_lookup import get_lookup_instance
107
+ lookup = get_lookup_instance()
108
+ smiles, source, metadata = lookup.get_smiles(molecule)
109
+ if smiles:
110
+ resolved_smiles = smiles
111
+ else:
112
+ resolved_smiles = molecule # Fallback to original
113
+ except Exception:
114
+ resolved_smiles = molecule # Fallback if lookup fails
115
+
116
+ # Apply smart defaults for MD calculations
117
+ if engine is None:
118
+ engine = "xtb" # Default to xTB for fast MD forces
119
+ if method is None and engine.lower() == "xtb":
120
+ method = "gfn2-xtb" # Default xTB method
121
+ elif method is None and engine.lower() != "xtb":
122
+ method = "b3lyp" # Default DFT method for other engines
123
+ if basis_set is None and engine.lower() != "xtb":
124
+ basis_set = "def2-svp" # Default basis set for non-xTB engines
125
+
126
+ # Build MD settings
127
+ md_settings = {
128
+ "ensemble": ensemble_lower,
129
+ "initialization": initialization_lower,
130
+ "timestep": timestep,
131
+ "num_steps": num_steps,
132
+ "save_interval": save_interval,
133
+ "temperature": temperature,
134
+ "langevin_thermostat_timescale": langevin_thermostat_timescale,
135
+ "berendsen_barostat_timescale": berendsen_barostat_timescale,
136
+ }
137
+
138
+ # Add optional fields if provided
139
+ if pressure is not None:
140
+ md_settings["pressure"] = pressure
141
+
142
+ if constraints:
143
+ md_settings["constraints"] = constraints
144
+
145
+ if confining_constraint:
146
+ md_settings["confining_constraint"] = confining_constraint
147
+
148
+ # Build calc_settings
149
+ calc_settings = {
150
+ "charge": charge,
151
+ "multiplicity": multiplicity,
152
+ "engine": engine.lower()
153
+ }
154
+
155
+ # Add method if specified
156
+ if method:
157
+ calc_settings["method"] = method.lower()
158
+
159
+ # Add basis_set if specified (not needed for xTB)
160
+ if basis_set and engine.lower() != "xtb":
161
+ calc_settings["basis_set"] = basis_set.lower()
162
+
163
+ # Build parameters for Rowan API
164
+ workflow_params = {
165
+ "name": name,
166
+ "molecule": resolved_smiles,
167
+ "workflow_type": "molecular_dynamics",
168
+ "settings": md_settings,
169
+ "calc_settings": calc_settings,
170
+ "folder_uuid": folder_uuid,
171
+ "blocking": blocking,
172
+ "ping_interval": ping_interval
173
+ }
174
+
175
+ # Add calc_engine at top level
176
+ if engine:
177
+ workflow_params["calc_engine"] = engine.lower()
178
+
179
+ try:
180
+ # Submit molecular dynamics calculation to Rowan
181
+ result = rowan.compute(**workflow_params)
182
+ return str(result)
183
+ except Exception as e:
184
+ error_response = {
185
+ "success": False,
186
+ "error": f"Molecular dynamics calculation failed: {str(e)}",
187
+ "name": name,
188
+ "molecule": molecule,
189
+ "resolved_smiles": resolved_smiles
190
+ }
191
+ return str(error_response)
Binary file
@@ -0,0 +1,446 @@
1
+ """
2
+ Advanced molecule lookup using PubChemPy + SQLite Cache + RDKit validation.
3
+ """
4
+
5
+ import sqlite3
6
+ import logging
7
+ from datetime import datetime, timedelta
8
+ from typing import Optional, Tuple
9
+ import os
10
+
11
+ # Set up logging
12
+ logger = logging.getLogger(__name__)
13
+
14
+ # Import dependencies with fallbacks
15
+ try:
16
+ import pubchempy as pcp
17
+ PUBCHEMPY_AVAILABLE = True
18
+ except ImportError:
19
+ logger.warning("pubchempy not available - install with: pip install pubchempy")
20
+ PUBCHEMPY_AVAILABLE = False
21
+
22
+ try:
23
+ from rdkit import Chem
24
+ from rdkit.Chem import Descriptors
25
+ RDKIT_AVAILABLE = True
26
+ except ImportError:
27
+ logger.warning("rdkit not available - install with: pip install rdkit")
28
+ RDKIT_AVAILABLE = False
29
+
30
+ class MoleculeLookup:
31
+ """Molecule lookup with PubChem API, SQLite caching, and RDKit validation."""
32
+
33
+ def __init__(self, cache_db: str = 'molecule_cache.db', cache_expiry_days: int = 30):
34
+ """Initialize the molecule lookup system."""
35
+ self.cache_expiry_days = cache_expiry_days
36
+
37
+ # Create cache database
38
+ cache_path = os.path.join(os.path.dirname(__file__), cache_db)
39
+ self.conn = sqlite3.connect(cache_path, check_same_thread=False)
40
+
41
+ # Create tables if they don't exist
42
+ self.conn.execute('''
43
+ CREATE TABLE IF NOT EXISTS molecules (
44
+ identifier TEXT PRIMARY KEY,
45
+ smiles TEXT,
46
+ canonical_smiles TEXT,
47
+ name TEXT,
48
+ iupac_name TEXT,
49
+ formula TEXT,
50
+ molecular_weight REAL,
51
+ cid INTEGER,
52
+ retrieved_at TIMESTAMP,
53
+ source TEXT
54
+ )
55
+ ''')
56
+
57
+ self.conn.execute('''
58
+ CREATE TABLE IF NOT EXISTS lookup_stats (
59
+ date TEXT PRIMARY KEY,
60
+ cache_hits INTEGER DEFAULT 0,
61
+ api_calls INTEGER DEFAULT 0,
62
+ failed_lookups INTEGER DEFAULT 0
63
+ )
64
+ ''')
65
+
66
+ self.conn.commit()
67
+ logger.info("Molecule lookup cache initialized")
68
+
69
+ def validate_smiles(self, smiles: str) -> Optional[str]:
70
+ """Validate and canonicalize SMILES using RDKit."""
71
+ if not RDKIT_AVAILABLE:
72
+ logger.warning("RDKit not available - returning SMILES as-is")
73
+ return smiles
74
+
75
+ try:
76
+ mol = Chem.MolFromSmiles(smiles)
77
+ if mol is not None:
78
+ canonical = Chem.MolToSmiles(mol, canonical=True)
79
+ logger.debug(f"SMILES validated: {smiles} -> {canonical}")
80
+ return canonical
81
+ except Exception as e:
82
+ logger.warning(f"SMILES validation failed for {smiles}: {e}")
83
+
84
+ return None
85
+
86
+ def get_molecular_properties(self, smiles: str) -> dict:
87
+ """Calculate molecular properties using RDKit."""
88
+ if not RDKIT_AVAILABLE:
89
+ return {}
90
+
91
+ try:
92
+ mol = Chem.MolFromSmiles(smiles)
93
+ if mol is not None:
94
+ return {
95
+ 'molecular_weight': round(Descriptors.MolWt(mol), 2),
96
+ 'logp': round(Descriptors.MolLogP(mol), 2),
97
+ 'hbd': Descriptors.NumHDonors(mol),
98
+ 'hba': Descriptors.NumHAcceptors(mol),
99
+ 'rotatable_bonds': Descriptors.NumRotatableBonds(mol),
100
+ 'aromatic_rings': Descriptors.NumAromaticRings(mol)
101
+ }
102
+ except Exception as e:
103
+ logger.warning(f"Property calculation failed for {smiles}: {e}")
104
+
105
+ return {}
106
+
107
+ def _is_cache_valid(self, retrieved_at: str) -> bool:
108
+ """Check if cache entry is still valid."""
109
+ try:
110
+ cache_time = datetime.fromisoformat(retrieved_at)
111
+ expiry_time = datetime.now() - timedelta(days=self.cache_expiry_days)
112
+ return cache_time > expiry_time
113
+ except:
114
+ return False
115
+
116
+ def _update_stats(self, stat_type: str):
117
+ """Update lookup statistics."""
118
+ today = datetime.now().date().isoformat()
119
+
120
+ # Insert or update today's stats
121
+ self.conn.execute(f'''
122
+ INSERT OR IGNORE INTO lookup_stats (date, {stat_type}) VALUES (?, 1)
123
+ ''', (today,))
124
+
125
+ self.conn.execute(f'''
126
+ UPDATE lookup_stats SET {stat_type} = {stat_type} + 1 WHERE date = ?
127
+ ''', (today,))
128
+
129
+ self.conn.commit()
130
+
131
+ def get_smiles(self, identifier: str) -> Tuple[Optional[str], str, dict]:
132
+ """Get canonical SMILES for a molecule identifier."""
133
+ identifier = identifier.strip()
134
+ identifier_lower = identifier.lower()
135
+
136
+ # 1. Check cache first
137
+ cursor = self.conn.execute('''
138
+ SELECT smiles, canonical_smiles, name, iupac_name, formula,
139
+ molecular_weight, cid, retrieved_at, source
140
+ FROM molecules WHERE identifier = ?
141
+ ''', (identifier_lower,))
142
+
143
+ result = cursor.fetchone()
144
+ if result:
145
+ retrieved_at = result[7]
146
+ if self._is_cache_valid(retrieved_at):
147
+ self._update_stats('cache_hits')
148
+ logger.info(f"Cache hit for: {identifier}")
149
+
150
+ metadata = {
151
+ 'name': result[2],
152
+ 'iupac_name': result[3],
153
+ 'formula': result[4],
154
+ 'molecular_weight': result[5],
155
+ 'cid': result[6],
156
+ 'source': result[8],
157
+ 'cached': True
158
+ }
159
+
160
+ return result[1], result[8], metadata # Return canonical_smiles
161
+
162
+ # 2. Check if input is already a valid SMILES
163
+ validated_smiles = self.validate_smiles(identifier)
164
+ if validated_smiles and validated_smiles != identifier:
165
+ logger.info(f"Input was valid SMILES, canonicalized: {identifier} -> {validated_smiles}")
166
+
167
+ # Cache the result
168
+ metadata = {'source': 'input_smiles', 'cached': False}
169
+ properties = self.get_molecular_properties(validated_smiles)
170
+ metadata.update(properties)
171
+
172
+ self._cache_result(identifier_lower, identifier, validated_smiles,
173
+ "User Input SMILES", "", "",
174
+ properties.get('molecular_weight'), None, 'input_smiles')
175
+
176
+ return validated_smiles, 'input_smiles', metadata
177
+
178
+ # 3. Fetch from PubChem using PubChemPy
179
+ if not PUBCHEMPY_AVAILABLE:
180
+ logger.error("PubChemPy not available for API lookup")
181
+ self._update_stats('failed_lookups')
182
+ return None, 'error', {'error': 'PubChemPy not available'}
183
+
184
+ try:
185
+ self._update_stats('api_calls')
186
+ logger.info(f"PubChem API lookup for: {identifier}")
187
+
188
+ # Try name lookup first
189
+ compounds = pcp.get_compounds(identifier, 'name')
190
+
191
+ # If name lookup fails, try as SMILES/InChI
192
+ if not compounds:
193
+ compounds = pcp.get_compounds(identifier, 'smiles')
194
+
195
+ if compounds:
196
+ compound = compounds[0]
197
+
198
+ # Validate the SMILES from PubChem
199
+ pubchem_smiles = compound.canonical_smiles
200
+ validated_smiles = self.validate_smiles(pubchem_smiles)
201
+
202
+ if validated_smiles:
203
+ # Get additional properties
204
+ properties = self.get_molecular_properties(validated_smiles)
205
+
206
+ # Cache the successful result
207
+ self._cache_result(
208
+ identifier_lower,
209
+ pubchem_smiles,
210
+ validated_smiles,
211
+ getattr(compound, 'iupac_name', '') or identifier,
212
+ getattr(compound, 'iupac_name', ''),
213
+ getattr(compound, 'molecular_formula', ''),
214
+ properties.get('molecular_weight') or getattr(compound, 'molecular_weight', None),
215
+ getattr(compound, 'cid', None),
216
+ 'pubchem'
217
+ )
218
+
219
+ metadata = {
220
+ 'name': identifier,
221
+ 'iupac_name': getattr(compound, 'iupac_name', ''),
222
+ 'formula': getattr(compound, 'molecular_formula', ''),
223
+ 'molecular_weight': properties.get('molecular_weight') or getattr(compound, 'molecular_weight', None),
224
+ 'cid': getattr(compound, 'cid', None),
225
+ 'source': 'pubchem',
226
+ 'cached': False
227
+ }
228
+ metadata.update(properties)
229
+
230
+ logger.info(f"PubChem lookup successful: {identifier} -> {validated_smiles}")
231
+ return validated_smiles, 'pubchem', metadata
232
+
233
+ except Exception as e:
234
+ logger.error(f"PubChem lookup failed for {identifier}: {e}")
235
+ self._update_stats('failed_lookups')
236
+ return None, 'error', {'error': str(e)}
237
+
238
+ # 4. No results found
239
+ logger.warning(f"No results found for: {identifier}")
240
+ self._update_stats('failed_lookups')
241
+ return None, 'not_found', {'error': 'No results found'}
242
+
243
+ def _cache_result(self, identifier: str, original_smiles: str, canonical_smiles: str,
244
+ name: str, iupac_name: str, formula: str,
245
+ molecular_weight: Optional[float], cid: Optional[int], source: str):
246
+ """Cache a successful lookup result."""
247
+ try:
248
+ self.conn.execute('''
249
+ INSERT OR REPLACE INTO molecules
250
+ (identifier, smiles, canonical_smiles, name, iupac_name, formula,
251
+ molecular_weight, cid, retrieved_at, source)
252
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
253
+ ''', (identifier, original_smiles, canonical_smiles, name, iupac_name,
254
+ formula, molecular_weight, cid, datetime.now().isoformat(), source))
255
+
256
+ self.conn.commit()
257
+ logger.debug(f"Cached result for: {identifier}")
258
+ except Exception as e:
259
+ logger.error(f"Failed to cache result: {e}")
260
+
261
+ def get_cache_stats(self) -> dict:
262
+ """Get cache usage statistics."""
263
+ cursor = self.conn.execute('''
264
+ SELECT COUNT(*) as total_entries,
265
+ COUNT(CASE WHEN source = 'pubchem' THEN 1 END) as pubchem_entries,
266
+ COUNT(CASE WHEN source = 'input_smiles' THEN 1 END) as smiles_entries
267
+ FROM molecules
268
+ ''')
269
+
270
+ cache_stats = cursor.fetchone()
271
+
272
+ cursor = self.conn.execute('''
273
+ SELECT SUM(cache_hits) as total_hits,
274
+ SUM(api_calls) as total_calls,
275
+ SUM(failed_lookups) as total_failures
276
+ FROM lookup_stats
277
+ ''')
278
+
279
+ usage_stats = cursor.fetchone()
280
+
281
+ return {
282
+ 'total_cached_molecules': cache_stats[0] or 0,
283
+ 'pubchem_entries': cache_stats[1] or 0,
284
+ 'smiles_entries': cache_stats[2] or 0,
285
+ 'total_cache_hits': usage_stats[0] or 0,
286
+ 'total_api_calls': usage_stats[1] or 0,
287
+ 'total_failures': usage_stats[2] or 0
288
+ }
289
+
290
+ # Global instance
291
+ _lookup_instance = None
292
+
293
+ def get_lookup_instance():
294
+ """Get or create the global MoleculeLookup instance."""
295
+ global _lookup_instance
296
+ if _lookup_instance is None:
297
+ _lookup_instance = MoleculeLookup()
298
+ return _lookup_instance
299
+
300
+ def rowan_molecule_lookup(molecule_name: str, show_properties: bool = False) -> str:
301
+ """Advanced molecule lookup with PubChem API, SQLite caching, and RDKit validation.
302
+
303
+ Features:
304
+ - PubChemPy integration for reliable API access
305
+ - SQLite caching for faster repeated lookups
306
+ - RDKit validation and canonicalization
307
+ - Comprehensive molecular properties
308
+ - Usage statistics and cache management
309
+
310
+ Args:
311
+ molecule_name: Name of the molecule (e.g., "aspirin", "taxol", "remdesivir")
312
+ show_properties: Include molecular properties in output
313
+
314
+ Returns:
315
+ Comprehensive molecule information with canonical SMILES
316
+ """
317
+
318
+ if not molecule_name.strip():
319
+ lookup = get_lookup_instance()
320
+ stats = lookup.get_cache_stats()
321
+
322
+ formatted = "**Advanced Molecule SMILES Lookup**\n\n"
323
+ formatted += "**Features:**\n"
324
+ formatted += "• PubChemPy integration - Official PubChem API access\n"
325
+ formatted += "• SQLite caching - Faster repeated lookups\n"
326
+ formatted += "• RDKit validation - Canonical SMILES standardization\n"
327
+ formatted += "• Molecular properties - MW, LogP, H-bond donors/acceptors\n\n"
328
+
329
+ formatted += "**Usage Examples:**\n"
330
+ formatted += "• rowan_molecule_lookup('aspirin') - Look up pharmaceuticals\n"
331
+ formatted += "• rowan_molecule_lookup('taxol') - Complex natural products\n"
332
+ formatted += "• rowan_molecule_lookup('remdesivir') - Modern drugs\n"
333
+ formatted += "• rowan_molecule_lookup('SMILES_STRING') - Validate existing SMILES\n\n"
334
+
335
+ formatted += "**Cache Statistics:**\n"
336
+ formatted += f"• Cached molecules: {stats['total_cached_molecules']}\n"
337
+ formatted += f"• Cache hits: {stats['total_cache_hits']}\n"
338
+ formatted += f"• API calls made: {stats['total_api_calls']}\n"
339
+ formatted += f"• Failed lookups: {stats['total_failures']}\n\n"
340
+
341
+ formatted += "**Dependencies Status:**\n"
342
+ formatted += f"• PubChemPy: {'✓ Available' if PUBCHEMPY_AVAILABLE else '✗ Missing (pip install pubchempy)'}\n"
343
+ formatted += f"• RDKit: {'✓ Available' if RDKIT_AVAILABLE else '✗ Missing (pip install rdkit)'}\n"
344
+
345
+ return formatted
346
+
347
+ lookup = get_lookup_instance()
348
+ smiles, source, metadata = lookup.get_smiles(molecule_name)
349
+
350
+ if source == 'error':
351
+ formatted = f"**Lookup Error for '{molecule_name}'**\n\n"
352
+ formatted += f"**Error:** {metadata.get('error', 'Unknown error')}\n\n"
353
+ formatted += "**Troubleshooting:**\n"
354
+ formatted += "• Check internet connection for PubChem access\n"
355
+ formatted += "• Verify molecule name spelling\n"
356
+ formatted += "• Try alternative names or systematic names\n"
357
+ return formatted
358
+
359
+ elif source == 'not_found':
360
+ formatted = f"**No results found for '{molecule_name}'**\n\n"
361
+ formatted += "**Searched in:**\n"
362
+ formatted += "• PubChem database (via PubChemPy)\n"
363
+ formatted += "• Local SQLite cache\n\n"
364
+ formatted += "**Suggestions:**\n"
365
+ formatted += "• Check spelling of molecule name\n"
366
+ formatted += "• Try alternative names (e.g., 'acetaminophen' vs 'paracetamol')\n"
367
+ formatted += "• Try systematic IUPAC name\n"
368
+ formatted += "• Try CAS registry number\n"
369
+ formatted += "• If you have a SMILES string, it will be validated automatically\n"
370
+ return formatted
371
+
372
+ else:
373
+ source_names = {
374
+ 'pubchem': 'PubChem Database (via PubChemPy)',
375
+ 'input_smiles': 'Input SMILES Validation (RDKit)',
376
+ 'cache': 'Local Cache'
377
+ }
378
+
379
+ formatted = f"**SMILES lookup successful!** {'(Cached)' if metadata.get('cached') else ''}\n\n"
380
+ formatted += f"**Molecule:** {molecule_name}\n"
381
+ formatted += f"**Canonical SMILES:** {smiles}\n"
382
+ formatted += f"**Source:** {source_names.get(source, source)}\n\n"
383
+
384
+ # Add molecular information if available
385
+ if metadata.get('name') and metadata['name'] != molecule_name:
386
+ formatted += f"**Common Name:** {metadata['name']}\n"
387
+
388
+ if metadata.get('iupac_name'):
389
+ formatted += f"**IUPAC Name:** {metadata['iupac_name']}\n"
390
+
391
+ if metadata.get('formula'):
392
+ formatted += f"**Formula:** {metadata['formula']}\n"
393
+
394
+ if metadata.get('cid'):
395
+ formatted += f"**PubChem CID:** {metadata['cid']}\n"
396
+
397
+ # Add molecular properties if requested or available
398
+ if show_properties or any(key in metadata for key in ['molecular_weight', 'logp', 'hbd', 'hba']):
399
+ formatted += "\n**Molecular Properties:**\n"
400
+
401
+ if metadata.get('molecular_weight'):
402
+ formatted += f"• Molecular Weight: {metadata['molecular_weight']:.2f} g/mol\n"
403
+
404
+ if metadata.get('logp') is not None:
405
+ formatted += f"• LogP: {metadata['logp']:.2f}\n"
406
+
407
+ if metadata.get('hbd') is not None:
408
+ formatted += f"• H-bond Donors: {metadata['hbd']}\n"
409
+
410
+ if metadata.get('hba') is not None:
411
+ formatted += f"• H-bond Acceptors: {metadata['hba']}\n"
412
+
413
+ if metadata.get('rotatable_bonds') is not None:
414
+ formatted += f"• Rotatable Bonds: {metadata['rotatable_bonds']}\n"
415
+
416
+ if metadata.get('aromatic_rings') is not None:
417
+ formatted += f"• Aromatic Rings: {metadata['aromatic_rings']}\n"
418
+
419
+ formatted += f"\n**Usage:** Use '{smiles}' in Rowan calculations for consistent results\n"
420
+
421
+ return formatted
422
+
423
+ def test_rowan_molecule_lookup():
424
+ """Test the advanced molecule lookup function."""
425
+ try:
426
+ print("Testing advanced molecule lookup...")
427
+
428
+ # Test common molecule
429
+ print("1. Testing phenol...")
430
+ result1 = rowan_molecule_lookup("phenol")
431
+ print("✓ Phenol lookup successful")
432
+
433
+ # Test cache stats
434
+ print("2. Testing cache statistics...")
435
+ result2 = rowan_molecule_lookup("")
436
+ print("✓ Cache statistics successful")
437
+
438
+ print("Advanced molecule lookup test successful!")
439
+ return True
440
+ except Exception as e:
441
+ print(f"Advanced molecule lookup test failed: {e}")
442
+ return False
443
+
444
+ if __name__ == "__main__":
445
+ test_rowan_molecule_lookup()
446
+