rowan-mcp 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rowan-mcp might be problematic. Click here for more details.

@@ -1,446 +1,57 @@
1
- """
2
- Advanced molecule lookup using PubChemPy + SQLite Cache + RDKit validation.
3
- """
1
+ from urllib.request import urlopen
2
+ from urllib.parse import quote
4
3
 
5
- import sqlite3
6
- import logging
7
- from datetime import datetime, timedelta
8
- from typing import Optional, Tuple
9
- import os
10
4
 
11
- # Set up logging
12
- logger = logging.getLogger(__name__)
13
-
14
- # Import dependencies with fallbacks
15
- try:
16
- import pubchempy as pcp
17
- PUBCHEMPY_AVAILABLE = True
18
- except ImportError:
19
- logger.warning("pubchempy not available - install with: pip install pubchempy")
20
- PUBCHEMPY_AVAILABLE = False
21
-
22
- try:
23
- from rdkit import Chem
24
- from rdkit.Chem import Descriptors
25
- RDKIT_AVAILABLE = True
26
- except ImportError:
27
- logger.warning("rdkit not available - install with: pip install rdkit")
28
- RDKIT_AVAILABLE = False
29
-
30
- class MoleculeLookup:
31
- """Molecule lookup with PubChem API, SQLite caching, and RDKit validation."""
32
-
33
- def __init__(self, cache_db: str = 'molecule_cache.db', cache_expiry_days: int = 30):
34
- """Initialize the molecule lookup system."""
35
- self.cache_expiry_days = cache_expiry_days
36
-
37
- # Create cache database
38
- cache_path = os.path.join(os.path.dirname(__file__), cache_db)
39
- self.conn = sqlite3.connect(cache_path, check_same_thread=False)
40
-
41
- # Create tables if they don't exist
42
- self.conn.execute('''
43
- CREATE TABLE IF NOT EXISTS molecules (
44
- identifier TEXT PRIMARY KEY,
45
- smiles TEXT,
46
- canonical_smiles TEXT,
47
- name TEXT,
48
- iupac_name TEXT,
49
- formula TEXT,
50
- molecular_weight REAL,
51
- cid INTEGER,
52
- retrieved_at TIMESTAMP,
53
- source TEXT
54
- )
55
- ''')
56
-
57
- self.conn.execute('''
58
- CREATE TABLE IF NOT EXISTS lookup_stats (
59
- date TEXT PRIMARY KEY,
60
- cache_hits INTEGER DEFAULT 0,
61
- api_calls INTEGER DEFAULT 0,
62
- failed_lookups INTEGER DEFAULT 0
63
- )
64
- ''')
65
-
66
- self.conn.commit()
67
- logger.info("Molecule lookup cache initialized")
68
-
69
- def validate_smiles(self, smiles: str) -> Optional[str]:
70
- """Validate and canonicalize SMILES using RDKit."""
71
- if not RDKIT_AVAILABLE:
72
- logger.warning("RDKit not available - returning SMILES as-is")
73
- return smiles
74
-
75
- try:
76
- mol = Chem.MolFromSmiles(smiles)
77
- if mol is not None:
78
- canonical = Chem.MolToSmiles(mol, canonical=True)
79
- logger.debug(f"SMILES validated: {smiles} -> {canonical}")
80
- return canonical
81
- except Exception as e:
82
- logger.warning(f"SMILES validation failed for {smiles}: {e}")
83
-
84
- return None
85
-
86
- def get_molecular_properties(self, smiles: str) -> dict:
87
- """Calculate molecular properties using RDKit."""
88
- if not RDKIT_AVAILABLE:
89
- return {}
90
-
91
- try:
92
- mol = Chem.MolFromSmiles(smiles)
93
- if mol is not None:
94
- return {
95
- 'molecular_weight': round(Descriptors.MolWt(mol), 2),
96
- 'logp': round(Descriptors.MolLogP(mol), 2),
97
- 'hbd': Descriptors.NumHDonors(mol),
98
- 'hba': Descriptors.NumHAcceptors(mol),
99
- 'rotatable_bonds': Descriptors.NumRotatableBonds(mol),
100
- 'aromatic_rings': Descriptors.NumAromaticRings(mol)
101
- }
102
- except Exception as e:
103
- logger.warning(f"Property calculation failed for {smiles}: {e}")
104
-
105
- return {}
106
-
107
- def _is_cache_valid(self, retrieved_at: str) -> bool:
108
- """Check if cache entry is still valid."""
109
- try:
110
- cache_time = datetime.fromisoformat(retrieved_at)
111
- expiry_time = datetime.now() - timedelta(days=self.cache_expiry_days)
112
- return cache_time > expiry_time
113
- except:
114
- return False
115
-
116
- def _update_stats(self, stat_type: str):
117
- """Update lookup statistics."""
118
- today = datetime.now().date().isoformat()
119
-
120
- # Insert or update today's stats
121
- self.conn.execute(f'''
122
- INSERT OR IGNORE INTO lookup_stats (date, {stat_type}) VALUES (?, 1)
123
- ''', (today,))
124
-
125
- self.conn.execute(f'''
126
- UPDATE lookup_stats SET {stat_type} = {stat_type} + 1 WHERE date = ?
127
- ''', (today,))
128
-
129
- self.conn.commit()
130
-
131
- def get_smiles(self, identifier: str) -> Tuple[Optional[str], str, dict]:
132
- """Get canonical SMILES for a molecule identifier."""
133
- identifier = identifier.strip()
134
- identifier_lower = identifier.lower()
135
-
136
- # 1. Check cache first
137
- cursor = self.conn.execute('''
138
- SELECT smiles, canonical_smiles, name, iupac_name, formula,
139
- molecular_weight, cid, retrieved_at, source
140
- FROM molecules WHERE identifier = ?
141
- ''', (identifier_lower,))
142
-
143
- result = cursor.fetchone()
144
- if result:
145
- retrieved_at = result[7]
146
- if self._is_cache_valid(retrieved_at):
147
- self._update_stats('cache_hits')
148
- logger.info(f"Cache hit for: {identifier}")
149
-
150
- metadata = {
151
- 'name': result[2],
152
- 'iupac_name': result[3],
153
- 'formula': result[4],
154
- 'molecular_weight': result[5],
155
- 'cid': result[6],
156
- 'source': result[8],
157
- 'cached': True
158
- }
159
-
160
- return result[1], result[8], metadata # Return canonical_smiles
161
-
162
- # 2. Check if input is already a valid SMILES
163
- validated_smiles = self.validate_smiles(identifier)
164
- if validated_smiles and validated_smiles != identifier:
165
- logger.info(f"Input was valid SMILES, canonicalized: {identifier} -> {validated_smiles}")
166
-
167
- # Cache the result
168
- metadata = {'source': 'input_smiles', 'cached': False}
169
- properties = self.get_molecular_properties(validated_smiles)
170
- metadata.update(properties)
171
-
172
- self._cache_result(identifier_lower, identifier, validated_smiles,
173
- "User Input SMILES", "", "",
174
- properties.get('molecular_weight'), None, 'input_smiles')
175
-
176
- return validated_smiles, 'input_smiles', metadata
177
-
178
- # 3. Fetch from PubChem using PubChemPy
179
- if not PUBCHEMPY_AVAILABLE:
180
- logger.error("PubChemPy not available for API lookup")
181
- self._update_stats('failed_lookups')
182
- return None, 'error', {'error': 'PubChemPy not available'}
183
-
184
- try:
185
- self._update_stats('api_calls')
186
- logger.info(f"PubChem API lookup for: {identifier}")
187
-
188
- # Try name lookup first
189
- compounds = pcp.get_compounds(identifier, 'name')
190
-
191
- # If name lookup fails, try as SMILES/InChI
192
- if not compounds:
193
- compounds = pcp.get_compounds(identifier, 'smiles')
194
-
195
- if compounds:
196
- compound = compounds[0]
197
-
198
- # Validate the SMILES from PubChem
199
- pubchem_smiles = compound.canonical_smiles
200
- validated_smiles = self.validate_smiles(pubchem_smiles)
201
-
202
- if validated_smiles:
203
- # Get additional properties
204
- properties = self.get_molecular_properties(validated_smiles)
205
-
206
- # Cache the successful result
207
- self._cache_result(
208
- identifier_lower,
209
- pubchem_smiles,
210
- validated_smiles,
211
- getattr(compound, 'iupac_name', '') or identifier,
212
- getattr(compound, 'iupac_name', ''),
213
- getattr(compound, 'molecular_formula', ''),
214
- properties.get('molecular_weight') or getattr(compound, 'molecular_weight', None),
215
- getattr(compound, 'cid', None),
216
- 'pubchem'
217
- )
218
-
219
- metadata = {
220
- 'name': identifier,
221
- 'iupac_name': getattr(compound, 'iupac_name', ''),
222
- 'formula': getattr(compound, 'molecular_formula', ''),
223
- 'molecular_weight': properties.get('molecular_weight') or getattr(compound, 'molecular_weight', None),
224
- 'cid': getattr(compound, 'cid', None),
225
- 'source': 'pubchem',
226
- 'cached': False
227
- }
228
- metadata.update(properties)
229
-
230
- logger.info(f"PubChem lookup successful: {identifier} -> {validated_smiles}")
231
- return validated_smiles, 'pubchem', metadata
232
-
233
- except Exception as e:
234
- logger.error(f"PubChem lookup failed for {identifier}: {e}")
235
- self._update_stats('failed_lookups')
236
- return None, 'error', {'error': str(e)}
237
-
238
- # 4. No results found
239
- logger.warning(f"No results found for: {identifier}")
240
- self._update_stats('failed_lookups')
241
- return None, 'not_found', {'error': 'No results found'}
5
+ def CIRconvert(ids):
6
+ """
7
+ Convert molecule name/identifier to SMILES using Chemical Identifier Resolver.
242
8
 
243
- def _cache_result(self, identifier: str, original_smiles: str, canonical_smiles: str,
244
- name: str, iupac_name: str, formula: str,
245
- molecular_weight: Optional[float], cid: Optional[int], source: str):
246
- """Cache a successful lookup result."""
247
- try:
248
- self.conn.execute('''
249
- INSERT OR REPLACE INTO molecules
250
- (identifier, smiles, canonical_smiles, name, iupac_name, formula,
251
- molecular_weight, cid, retrieved_at, source)
252
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
253
- ''', (identifier, original_smiles, canonical_smiles, name, iupac_name,
254
- formula, molecular_weight, cid, datetime.now().isoformat(), source))
255
-
256
- self.conn.commit()
257
- logger.debug(f"Cached result for: {identifier}")
258
- except Exception as e:
259
- logger.error(f"Failed to cache result: {e}")
9
+ Args:
10
+ ids (str): Molecule name or identifier (e.g., 'Aspirin', '3-Methylheptane', CAS numbers)
260
11
 
261
- def get_cache_stats(self) -> dict:
262
- """Get cache usage statistics."""
263
- cursor = self.conn.execute('''
264
- SELECT COUNT(*) as total_entries,
265
- COUNT(CASE WHEN source = 'pubchem' THEN 1 END) as pubchem_entries,
266
- COUNT(CASE WHEN source = 'input_smiles' THEN 1 END) as smiles_entries
267
- FROM molecules
268
- ''')
269
-
270
- cache_stats = cursor.fetchone()
271
-
272
- cursor = self.conn.execute('''
273
- SELECT SUM(cache_hits) as total_hits,
274
- SUM(api_calls) as total_calls,
275
- SUM(failed_lookups) as total_failures
276
- FROM lookup_stats
277
- ''')
278
-
279
- usage_stats = cursor.fetchone()
280
-
281
- return {
282
- 'total_cached_molecules': cache_stats[0] or 0,
283
- 'pubchem_entries': cache_stats[1] or 0,
284
- 'smiles_entries': cache_stats[2] or 0,
285
- 'total_cache_hits': usage_stats[0] or 0,
286
- 'total_api_calls': usage_stats[1] or 0,
287
- 'total_failures': usage_stats[2] or 0
288
- }
289
-
290
- # Global instance
291
- _lookup_instance = None
12
+ Returns:
13
+ str: SMILES string if found, 'Did not work' if failed
14
+ """
15
+ try:
16
+ url = 'http://cactus.nci.nih.gov/chemical/structure/' + quote(ids) + '/smiles'
17
+ ans = urlopen(url).read().decode('utf8')
18
+ return ans
19
+ except:
20
+ return 'Did not work'
292
21
 
293
- def get_lookup_instance():
294
- """Get or create the global MoleculeLookup instance."""
295
- global _lookup_instance
296
- if _lookup_instance is None:
297
- _lookup_instance = MoleculeLookup()
298
- return _lookup_instance
299
22
 
300
- def rowan_molecule_lookup(molecule_name: str, show_properties: bool = False) -> str:
301
- """Advanced molecule lookup with PubChem API, SQLite caching, and RDKit validation.
302
-
303
- Features:
304
- - PubChemPy integration for reliable API access
305
- - SQLite caching for faster repeated lookups
306
- - RDKit validation and canonicalization
307
- - Comprehensive molecular properties
308
- - Usage statistics and cache management
23
+ def rowan_molecule_lookup(molecule_name: str) -> str:
24
+ """
25
+ Convert a molecule name to SMILES using Chemical Identifier Resolver.
309
26
 
310
27
  Args:
311
- molecule_name: Name of the molecule (e.g., "aspirin", "taxol", "remdesivir")
312
- show_properties: Include molecular properties in output
28
+ molecule_name (str): Name of the molecule (e.g., 'aspirin', 'benzene')
313
29
 
314
30
  Returns:
315
- Comprehensive molecule information with canonical SMILES
31
+ str: SMILES notation, or error message if not found
316
32
  """
33
+ smiles = CIRconvert(molecule_name)
317
34
 
318
- if not molecule_name.strip():
319
- lookup = get_lookup_instance()
320
- stats = lookup.get_cache_stats()
321
-
322
- formatted = "**Advanced Molecule SMILES Lookup**\n\n"
323
- formatted += "**Features:**\n"
324
- formatted += "• PubChemPy integration - Official PubChem API access\n"
325
- formatted += "• SQLite caching - Faster repeated lookups\n"
326
- formatted += "• RDKit validation - Canonical SMILES standardization\n"
327
- formatted += "• Molecular properties - MW, LogP, H-bond donors/acceptors\n\n"
328
-
329
- formatted += "**Usage Examples:**\n"
330
- formatted += "• rowan_molecule_lookup('aspirin') - Look up pharmaceuticals\n"
331
- formatted += "• rowan_molecule_lookup('taxol') - Complex natural products\n"
332
- formatted += "• rowan_molecule_lookup('remdesivir') - Modern drugs\n"
333
- formatted += "• rowan_molecule_lookup('SMILES_STRING') - Validate existing SMILES\n\n"
334
-
335
- formatted += "**Cache Statistics:**\n"
336
- formatted += f"• Cached molecules: {stats['total_cached_molecules']}\n"
337
- formatted += f"• Cache hits: {stats['total_cache_hits']}\n"
338
- formatted += f"• API calls made: {stats['total_api_calls']}\n"
339
- formatted += f"• Failed lookups: {stats['total_failures']}\n\n"
340
-
341
- formatted += "**Dependencies Status:**\n"
342
- formatted += f"• PubChemPy: {'✓ Available' if PUBCHEMPY_AVAILABLE else '✗ Missing (pip install pubchempy)'}\n"
343
- formatted += f"• RDKit: {'✓ Available' if RDKIT_AVAILABLE else '✗ Missing (pip install rdkit)'}\n"
344
-
345
- return formatted
35
+ if smiles == 'Did not work':
36
+ return f"{molecule_name}: Not found"
37
+ else:
38
+ return smiles.strip() # Remove any trailing newlines
39
+
40
+
41
+ def batch_convert(identifiers):
42
+ """
43
+ Convert multiple molecule identifiers to SMILES.
346
44
 
347
- lookup = get_lookup_instance()
348
- smiles, source, metadata = lookup.get_smiles(molecule_name)
45
+ Args:
46
+ identifiers (list): List of molecule names/identifiers
349
47
 
350
- if source == 'error':
351
- formatted = f"**Lookup Error for '{molecule_name}'**\n\n"
352
- formatted += f"**Error:** {metadata.get('error', 'Unknown error')}\n\n"
353
- formatted += "**Troubleshooting:**\n"
354
- formatted += "• Check internet connection for PubChem access\n"
355
- formatted += "• Verify molecule name spelling\n"
356
- formatted += "• Try alternative names or systematic names\n"
357
- return formatted
48
+ Returns:
49
+ dict: Dictionary mapping identifiers to SMILES
50
+ """
51
+ results = {}
358
52
 
359
- elif source == 'not_found':
360
- formatted = f"**No results found for '{molecule_name}'**\n\n"
361
- formatted += "**Searched in:**\n"
362
- formatted += "• PubChem database (via PubChemPy)\n"
363
- formatted += "• Local SQLite cache\n\n"
364
- formatted += "**Suggestions:**\n"
365
- formatted += "• Check spelling of molecule name\n"
366
- formatted += "• Try alternative names (e.g., 'acetaminophen' vs 'paracetamol')\n"
367
- formatted += "• Try systematic IUPAC name\n"
368
- formatted += "• Try CAS registry number\n"
369
- formatted += "• If you have a SMILES string, it will be validated automatically\n"
370
- return formatted
53
+ for ids in identifiers:
54
+ results[ids] = CIRconvert(ids)
371
55
 
372
- else:
373
- source_names = {
374
- 'pubchem': 'PubChem Database (via PubChemPy)',
375
- 'input_smiles': 'Input SMILES Validation (RDKit)',
376
- 'cache': 'Local Cache'
377
- }
378
-
379
- formatted = f"**SMILES lookup successful!** {'(Cached)' if metadata.get('cached') else ''}\n\n"
380
- formatted += f"**Molecule:** {molecule_name}\n"
381
- formatted += f"**Canonical SMILES:** {smiles}\n"
382
- formatted += f"**Source:** {source_names.get(source, source)}\n\n"
383
-
384
- # Add molecular information if available
385
- if metadata.get('name') and metadata['name'] != molecule_name:
386
- formatted += f"**Common Name:** {metadata['name']}\n"
387
-
388
- if metadata.get('iupac_name'):
389
- formatted += f"**IUPAC Name:** {metadata['iupac_name']}\n"
390
-
391
- if metadata.get('formula'):
392
- formatted += f"**Formula:** {metadata['formula']}\n"
393
-
394
- if metadata.get('cid'):
395
- formatted += f"**PubChem CID:** {metadata['cid']}\n"
396
-
397
- # Add molecular properties if requested or available
398
- if show_properties or any(key in metadata for key in ['molecular_weight', 'logp', 'hbd', 'hba']):
399
- formatted += "\n**Molecular Properties:**\n"
400
-
401
- if metadata.get('molecular_weight'):
402
- formatted += f"• Molecular Weight: {metadata['molecular_weight']:.2f} g/mol\n"
403
-
404
- if metadata.get('logp') is not None:
405
- formatted += f"• LogP: {metadata['logp']:.2f}\n"
406
-
407
- if metadata.get('hbd') is not None:
408
- formatted += f"• H-bond Donors: {metadata['hbd']}\n"
409
-
410
- if metadata.get('hba') is not None:
411
- formatted += f"• H-bond Acceptors: {metadata['hba']}\n"
412
-
413
- if metadata.get('rotatable_bonds') is not None:
414
- formatted += f"• Rotatable Bonds: {metadata['rotatable_bonds']}\n"
415
-
416
- if metadata.get('aromatic_rings') is not None:
417
- formatted += f"• Aromatic Rings: {metadata['aromatic_rings']}\n"
418
-
419
- formatted += f"\n**Usage:** Use '{smiles}' in Rowan calculations for consistent results\n"
420
-
421
- return formatted
422
-
423
- def test_rowan_molecule_lookup():
424
- """Test the advanced molecule lookup function."""
425
- try:
426
- print("Testing advanced molecule lookup...")
427
-
428
- # Test common molecule
429
- print("1. Testing phenol...")
430
- result1 = rowan_molecule_lookup("phenol")
431
- print("✓ Phenol lookup successful")
432
-
433
- # Test cache stats
434
- print("2. Testing cache statistics...")
435
- result2 = rowan_molecule_lookup("")
436
- print("✓ Cache statistics successful")
437
-
438
- print("Advanced molecule lookup test successful!")
439
- return True
440
- except Exception as e:
441
- print(f"Advanced molecule lookup test failed: {e}")
442
- return False
443
-
444
- if __name__ == "__main__":
445
- test_rowan_molecule_lookup()
56
+ return results
446
57
 
@@ -15,7 +15,7 @@ if not hasattr(rowan, 'api_key') or not rowan.api_key:
15
15
  api_key = os.getenv("ROWAN_API_KEY")
16
16
  if api_key:
17
17
  rowan.api_key = api_key
18
- logger.info("🔑 Rowan API key configured")
18
+ logger.info("Rowan API key configured")
19
19
  else:
20
20
  logger.error("No ROWAN_API_KEY found in environment")
21
21
 
@@ -39,7 +39,7 @@ def rowan_pka(
39
39
 
40
40
  Args:
41
41
  name: Name for the calculation
42
- molecule: Molecule SMILES string or common name
42
+ molecule: Molecule SMILES string
43
43
  folder_uuid: UUID of folder to organize calculation in
44
44
  blocking: Whether to wait for completion (default: True)
45
45
  ping_interval: How often to check status in seconds (default: 5)
@@ -57,56 +57,7 @@ def rowan_pka(
57
57
  ping_interval=ping_interval
58
58
  )
59
59
 
60
- # Format results based on whether we waited or not
61
- if blocking:
62
- # We waited for completion - format actual results
63
- status = result.get('status', result.get('object_status', 'Unknown'))
64
-
65
- if status == 2: # Completed successfully
66
- formatted = f" pKa calculation for '{name}' completed successfully!\n\n"
67
- elif status == 3: # Failed
68
- formatted = f" pKa calculation for '{name}' failed!\n\n"
69
- else:
70
- formatted = f" pKa calculation for '{name}' finished with status {status}\n\n"
71
-
72
- formatted += f" Molecule: {molecule}\n"
73
- formatted += f" Job UUID: {result.get('uuid', 'N/A')}\n"
74
- formatted += f" Status: {status}\n"
75
-
76
- # Try to extract pKa results
77
- if isinstance(result, dict) and 'object_data' in result and result['object_data']:
78
- data = result['object_data']
79
-
80
- # Extract pKa values
81
- if 'strongest_acid' in data:
82
- if data['strongest_acid'] is not None:
83
- formatted += f" Strongest Acid pKa: {data['strongest_acid']:.2f}\n"
84
- else:
85
- formatted += f" Strongest Acid pKa: N/A (no acidic sites found)\n"
86
-
87
- if 'strongest_base' in data:
88
- if data['strongest_base'] is not None:
89
- formatted += f" Strongest Base pKa: {data['strongest_base']:.2f}\n"
90
- else:
91
- formatted += f" Strongest Base pKa: N/A (no basic sites found)\n"
92
- if 'pka_values' in data and isinstance(data['pka_values'], list):
93
- formatted += f" All pKa values: {', '.join([f'{val:.2f}' for val in data['pka_values']])}\n"
94
-
95
- # Additional properties if available
96
- if 'ionizable_sites' in data:
97
- formatted += f" Ionizable sites found: {data['ionizable_sites']}\n"
98
-
99
- # Basic guidance
100
- if status == 2:
101
- formatted += f"\n Use rowan_workflow_management(action='retrieve', workflow_uuid='{result.get('uuid')}') for detailed data\n"
102
- else:
103
- # Non-blocking mode - just submission confirmation
104
- formatted = f" pKa calculation for '{name}' submitted!\n\n"
105
- formatted += f" Molecule: {molecule}\n"
106
- formatted += f" Job UUID: {result.get('uuid', 'N/A')}\n"
107
- formatted += f" Status: {result.get('status', 'Submitted')}\n"
108
-
109
- return formatted
60
+ return str(result)
110
61
 
111
62
  except Exception as e:
112
63
  error_response = {
@@ -125,7 +76,7 @@ def test_rowan_pka():
125
76
  name="test_pka_water",
126
77
  molecule="O"
127
78
  )
128
- print("pKa test successful!")
79
+ print("pKa test successful")
129
80
  print(f"Result: {result}")
130
81
  return True
131
82
  except Exception as e: