rowan-mcp 1.0.2__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rowan-mcp might be problematic. Click here for more details.

Files changed (57) hide show
  1. rowan_mcp/__init__.py +1 -1
  2. rowan_mcp/__main__.py +3 -5
  3. rowan_mcp/functions/admet.py +0 -5
  4. rowan_mcp/functions/bde.py +1 -8
  5. rowan_mcp/functions/conformers.py +1 -4
  6. rowan_mcp/functions/descriptors.py +1 -4
  7. rowan_mcp/functions/docking.py +6 -56
  8. rowan_mcp/functions/electronic_properties.py +1 -4
  9. rowan_mcp/functions/folder_management.py +1 -8
  10. rowan_mcp/functions/fukui.py +1 -4
  11. rowan_mcp/functions/hydrogen_bond_basicity.py +1 -8
  12. rowan_mcp/functions/multistage_opt.py +1 -4
  13. rowan_mcp/functions/pka.py +1 -8
  14. rowan_mcp/functions/redox_potential.py +2 -5
  15. rowan_mcp/functions/system_management.py +1 -8
  16. rowan_mcp/functions/tautomers.py +1 -4
  17. rowan_mcp/functions_v2/BENCHMARK.md +86 -0
  18. rowan_mcp/functions_v2/molecule_lookup.py +232 -0
  19. rowan_mcp/functions_v2/protein_management.py +141 -0
  20. rowan_mcp/functions_v2/submit_basic_calculation_workflow.py +195 -0
  21. rowan_mcp/functions_v2/submit_conformer_search_workflow.py +158 -0
  22. rowan_mcp/functions_v2/submit_descriptors_workflow.py +52 -0
  23. rowan_mcp/functions_v2/submit_docking_workflow.py +244 -0
  24. rowan_mcp/functions_v2/submit_fukui_workflow.py +114 -0
  25. rowan_mcp/functions_v2/submit_irc_workflow.py +58 -0
  26. rowan_mcp/functions_v2/submit_macropka_workflow.py +99 -0
  27. rowan_mcp/functions_v2/submit_pka_workflow.py +72 -0
  28. rowan_mcp/functions_v2/submit_protein_cofolding_workflow.py +88 -0
  29. rowan_mcp/functions_v2/submit_redox_potential_workflow.py +55 -0
  30. rowan_mcp/functions_v2/submit_scan_workflow.py +82 -0
  31. rowan_mcp/functions_v2/submit_solubility_workflow.py +157 -0
  32. rowan_mcp/functions_v2/submit_tautomer_search_workflow.py +51 -0
  33. rowan_mcp/functions_v2/workflow_management_v2.py +382 -0
  34. rowan_mcp/server.py +109 -144
  35. rowan_mcp/tests/basic_calculation_from_json.py +0 -0
  36. rowan_mcp/tests/basic_calculation_with_constraint.py +33 -0
  37. rowan_mcp/tests/basic_calculation_with_solvent.py +0 -0
  38. rowan_mcp/tests/bde.py +37 -0
  39. rowan_mcp/tests/benchmark_queries.md +120 -0
  40. rowan_mcp/tests/cofolding_screen.py +131 -0
  41. rowan_mcp/tests/conformer_dependent_redox.py +37 -0
  42. rowan_mcp/tests/conformers.py +31 -0
  43. rowan_mcp/tests/data.json +189 -0
  44. rowan_mcp/tests/docking_screen.py +157 -0
  45. rowan_mcp/tests/irc.py +24 -0
  46. rowan_mcp/tests/macropka.py +13 -0
  47. rowan_mcp/tests/multistage_opt.py +13 -0
  48. rowan_mcp/tests/optimization.py +21 -0
  49. rowan_mcp/tests/phenol_pka.py +36 -0
  50. rowan_mcp/tests/pka.py +36 -0
  51. rowan_mcp/tests/protein_cofolding.py +17 -0
  52. rowan_mcp/tests/scan.py +28 -0
  53. {rowan_mcp-1.0.2.dist-info → rowan_mcp-2.0.1.dist-info}/METADATA +38 -45
  54. rowan_mcp-2.0.1.dist-info/RECORD +69 -0
  55. rowan_mcp-1.0.2.dist-info/RECORD +0 -34
  56. {rowan_mcp-1.0.2.dist-info → rowan_mcp-2.0.1.dist-info}/WHEEL +0 -0
  57. {rowan_mcp-1.0.2.dist-info → rowan_mcp-2.0.1.dist-info}/entry_points.txt +0 -0
rowan_mcp/__init__.py CHANGED
@@ -5,7 +5,7 @@ This package provides MCP (Model Context Protocol) server functionality
5
5
  for integrating with Rowan's computational chemistry platform.
6
6
  """
7
7
 
8
- __version__ = "1.0.0"
8
+ __version__ = "1.0.2"
9
9
  __author__ = "Kat Yenko"
10
10
  __description__ = "MCP server for Rowan computational chemistry platform"
11
11
 
rowan_mcp/__main__.py CHANGED
@@ -2,13 +2,11 @@
2
2
  Main entry point for Rowan MCP Server when run as a module.
3
3
 
4
4
  Usage:
5
- python -m src # STDIO mode (default)
6
- python -m src --stdio # STDIO mode
7
- python -m src --http # HTTP mode
8
- python -m src --help # Show help
5
+ python -m rowan_mcp # HTTP/SSE mode
6
+ python -m rowan_mcp --help # Show help
9
7
  """
10
8
 
11
9
  if __name__ == "__main__":
12
- # All modes now handled by the unified server
10
+ # HTTP transport only
13
11
  from .server import main
14
12
  main()
@@ -16,11 +16,6 @@ except ImportError:
16
16
  # Setup logging
17
17
  logger = logging.getLogger(__name__)
18
18
 
19
- # Setup API key
20
- api_key = os.getenv("ROWAN_API_KEY")
21
- if rowan and api_key:
22
- rowan.api_key = api_key
23
-
24
19
 
25
20
  def log_rowan_api_call(workflow_type: str, **kwargs):
26
21
  """Log Rowan API calls with detailed parameters."""
@@ -10,14 +10,7 @@ from typing import Optional, List, Union
10
10
  import logging
11
11
  logger = logging.getLogger(__name__)
12
12
 
13
- # Configure rowan API key
14
- if not hasattr(rowan, 'api_key') or not rowan.api_key:
15
- api_key = os.getenv("ROWAN_API_KEY")
16
- if api_key:
17
- rowan.api_key = api_key
18
- logger.info("🔑 Rowan API key configured")
19
- else:
20
- logger.error("No ROWAN_API_KEY found in environment")
13
+
21
14
 
22
15
  def log_rowan_api_call(workflow_type: str, **kwargs):
23
16
  """Log Rowan API calls and let Rowan handle its own errors."""
@@ -15,10 +15,7 @@ except ImportError:
15
15
  # Setup logging
16
16
  logger = logging.getLogger(__name__)
17
17
 
18
- # Setup API key
19
- api_key = os.getenv("ROWAN_API_KEY")
20
- if rowan and api_key:
21
- rowan.api_key = api_key
18
+
22
19
 
23
20
  def log_rowan_api_call(workflow_type: str, **kwargs):
24
21
  """Log Rowan API calls and let Rowan handle its own errors."""
@@ -15,10 +15,7 @@ except ImportError:
15
15
  # Setup logging
16
16
  logger = logging.getLogger(__name__)
17
17
 
18
- # Setup API key
19
- api_key = os.getenv("ROWAN_API_KEY")
20
- if rowan and api_key:
21
- rowan.api_key = api_key
18
+
22
19
 
23
20
  def log_rowan_api_call(workflow_type: str, **kwargs):
24
21
  """Log Rowan API calls with detailed parameters."""
@@ -192,63 +192,13 @@ def rowan_docking(
192
192
  if conformers is not None:
193
193
  compute_params["conformers"] = conformers
194
194
 
195
- # Submit docking calculation
195
+ # Submit docking calculation and return raw result
196
196
  result = rowan.compute(**compute_params)
197
-
198
- # Format results
199
- uuid = result.get('uuid', 'N/A')
200
- status = result.get('status', 'unknown')
201
-
202
- if blocking:
203
- # Blocking mode - check if successful
204
- if status == "success":
205
- formatted = f"✅ Docking calculation '{name}' completed successfully!\n"
206
- formatted += f"🔖 Workflow UUID: {uuid}\n"
207
- formatted += f"📊 Status: {status}\n\n"
208
-
209
- # Extract docking results if available
210
- object_data = result.get("object_data", {})
211
- scores = object_data.get("scores", [])
212
-
213
- if scores:
214
- formatted += f"🎯 Docking Results: {len(scores)} poses generated\n"
215
- formatted += f"📈 Best docking score: {scores[0] if scores else 'N/A'}\n"
216
-
217
- # Show top poses
218
- formatted += "\nTop poses:\n"
219
- for i, score in enumerate(scores[:5]):
220
- formatted += f" {i+1}. Score: {score}\n"
221
-
222
- if len(scores) > 5:
223
- formatted += f" ... and {len(scores) - 5} more poses\n"
224
- else:
225
- formatted += "📈 Results: Check workflow details for docking data\n"
226
-
227
- return formatted
228
- else:
229
- # Failed calculation
230
- return f"❌ Docking calculation failed\n🔖 UUID: {uuid}\n📋 Status: {status}\n💬 Check workflow details for more information"
231
- else:
232
- # Non-blocking mode
233
- formatted = f"📋 Docking calculation '{name}' submitted!\n"
234
- formatted += f"🔖 Workflow UUID: {uuid}\n"
235
- formatted += f"⏳ Status: Running...\n"
236
- formatted += f"💡 Use rowan_workflow_management to check status\n\n"
237
-
238
- formatted += f"Docking Details:\n"
239
- formatted += f"🧬 Ligand: {initial_molecule}\n"
240
- formatted += f"🎯 Target: {target_uuid or target[:50] + '...' if target and len(target) > 50 else target}\n"
241
- formatted += f"📍 Pocket: center={pocket[0]}, size={pocket[1]}\n"
242
- formatted += f"⚙️ Settings: csearch={do_csearch}, optimize={do_optimization}, refine={do_pose_refinement}\n"
243
-
244
- if conformers:
245
- formatted += f"🔬 Pre-optimized conformers: {len(conformers)}\n"
246
-
247
- return formatted
197
+ return result
248
198
 
249
199
  except Exception as e:
250
200
  logger.error(f"Error in rowan_docking: {str(e)}")
251
- return f"Docking calculation failed: {str(e)}"
201
+ return f"Docking calculation failed: {str(e)}"
252
202
 
253
203
  def rowan_docking_pdb_id(
254
204
  name: str,
@@ -297,7 +247,7 @@ def rowan_docking_pdb_id(
297
247
  else:
298
248
  ligand_param = f" smiles={smiles},\n"
299
249
 
300
- return (f"PDB {pdb_id} found in RCSB database!\n\n"
250
+ return (f"PDB {pdb_id} found in RCSB database!\n\n"
301
251
  f"To perform docking with this protein:\n\n"
302
252
  f"1. Go to https://labs.rowansci.com\n"
303
253
  f"2. Upload the PDB file for {pdb_id}\n"
@@ -329,11 +279,11 @@ def test_rowan_docking():
329
279
  pocket=((0.0, 0.0, 0.0), (20.0, 20.0, 20.0)),
330
280
  blocking=False
331
281
  )
332
- print("Docking test result:")
282
+ print("Docking test result:")
333
283
  print(result)
334
284
  return True
335
285
  except Exception as e:
336
- print(f"Docking test failed: {e}")
286
+ print(f"Docking test failed: {e}")
337
287
  return False
338
288
 
339
289
  if __name__ == "__main__":
@@ -22,10 +22,7 @@ except ImportError:
22
22
  logging.basicConfig(level=logging.INFO)
23
23
  logger = logging.getLogger(__name__)
24
24
 
25
- # Setup API key
26
- api_key = os.getenv("ROWAN_API_KEY")
27
- if api_key and rowan:
28
- rowan.api_key = api_key
25
+
29
26
 
30
27
  def log_rowan_api_call(workflow_type: str, **kwargs):
31
28
  """Log Rowan API calls with detailed parameters."""
@@ -10,14 +10,7 @@ from typing import Optional
10
10
  import logging
11
11
  logger = logging.getLogger(__name__)
12
12
 
13
- # Configure rowan API key
14
- if not hasattr(rowan, 'api_key') or not rowan.api_key:
15
- api_key = os.getenv("ROWAN_API_KEY")
16
- if api_key:
17
- rowan.api_key = api_key
18
- logger.info("Rowan API key configured")
19
- else:
20
- logger.error("No ROWAN_API_KEY found in environment")
13
+
21
14
 
22
15
  def rowan_folder_management(
23
16
  action: str,
@@ -22,10 +22,7 @@ except ImportError:
22
22
  logging.basicConfig(level=logging.INFO)
23
23
  logger = logging.getLogger(__name__)
24
24
 
25
- # Setup API key
26
- api_key = os.getenv("ROWAN_API_KEY")
27
- if api_key and rowan:
28
- rowan.api_key = api_key
25
+
29
26
 
30
27
  def log_rowan_api_call(workflow_type: str, **kwargs):
31
28
  """Log Rowan API calls with detailed parameters."""
@@ -10,14 +10,7 @@ from typing import Optional
10
10
  import logging
11
11
  logger = logging.getLogger(__name__)
12
12
 
13
- # Configure rowan API key
14
- if not hasattr(rowan, 'api_key') or not rowan.api_key:
15
- api_key = os.getenv("ROWAN_API_KEY")
16
- if api_key:
17
- rowan.api_key = api_key
18
- logger.info("🔑 Rowan API key configured")
19
- else:
20
- logger.error("No ROWAN_API_KEY found in environment")
13
+
21
14
 
22
15
  def log_rowan_api_call(workflow_type: str, **kwargs):
23
16
  """Log Rowan API calls and let Rowan handle its own errors."""
@@ -15,10 +15,7 @@ except ImportError:
15
15
  # Setup logging
16
16
  logger = logging.getLogger(__name__)
17
17
 
18
- # Setup API key
19
- api_key = os.getenv("ROWAN_API_KEY")
20
- if rowan and api_key:
21
- rowan.api_key = api_key
18
+
22
19
 
23
20
  def log_rowan_api_call(workflow_type: str, **kwargs):
24
21
  """Log Rowan API calls with detailed parameters."""
@@ -10,14 +10,7 @@ from typing import Optional
10
10
  import logging
11
11
  logger = logging.getLogger(__name__)
12
12
 
13
- # Configure rowan API key
14
- if not hasattr(rowan, 'api_key') or not rowan.api_key:
15
- api_key = os.getenv("ROWAN_API_KEY")
16
- if api_key:
17
- rowan.api_key = api_key
18
- logger.info("Rowan API key configured")
19
- else:
20
- logger.error("No ROWAN_API_KEY found in environment")
13
+
21
14
 
22
15
  def log_rowan_api_call(workflow_type: str, **kwargs):
23
16
  """Log Rowan API calls and let Rowan handle its own errors."""
@@ -15,10 +15,7 @@ except ImportError:
15
15
  # Setup logging
16
16
  logger = logging.getLogger(__name__)
17
17
 
18
- # Setup API key
19
- api_key = os.getenv("ROWAN_API_KEY")
20
- if rowan and api_key:
21
- rowan.api_key = api_key
18
+
22
19
 
23
20
  def lookup_molecule_smiles(molecule_name: str) -> str:
24
21
  """Look up canonical SMILES for common molecule names."""
@@ -287,7 +284,7 @@ def rowan_redox_potential(
287
284
  formatted += f" Job UUID: {result.get('uuid', 'N/A')}\n"
288
285
  formatted += f" Status: {status}\n"
289
286
  formatted += f"⚙ Mode: {mode_lower.title()}\n"
290
- formatted += f"💧 Solvent: Acetonitrile\n"
287
+ formatted += f"Solvent: Acetonitrile\n"
291
288
 
292
289
  # Show which potentials were calculated
293
290
  calc_types = []
@@ -11,14 +11,7 @@ from typing import Optional
11
11
  # Set up logging
12
12
  logger = logging.getLogger(__name__)
13
13
 
14
- # Configure rowan API key
15
- if not hasattr(rowan, 'api_key') or not rowan.api_key:
16
- api_key = os.getenv("ROWAN_API_KEY")
17
- if api_key:
18
- rowan.api_key = api_key
19
- logger.info("Rowan API key configured")
20
- else:
21
- logger.error("No ROWAN_API_KEY found in environment")
14
+
22
15
 
23
16
  def rowan_system_management(
24
17
  action: str,
@@ -15,10 +15,7 @@ except ImportError:
15
15
  # Setup logging
16
16
  logger = logging.getLogger(__name__)
17
17
 
18
- # Setup API key
19
- api_key = os.getenv("ROWAN_API_KEY")
20
- if rowan and api_key:
21
- rowan.api_key = api_key
18
+
22
19
 
23
20
  def log_rowan_api_call(workflow_type: str, **kwargs):
24
21
  """Log Rowan API calls with detailed parameters."""
@@ -0,0 +1,86 @@
1
+ # Rowan MCP Benchmark Suite
2
+
3
+ ## Overview
4
+ Systematic evaluation of the Rowan MCP server's ability to handle chemistry workflows through natural language queries.
5
+
6
+ ## Evaluation Tiers
7
+
8
+ ### Tier 1: Single Tool Calls
9
+ **Tests**: Basic tool invocation and parameter passing
10
+ **Characteristics**:
11
+ - Single workflow submission
12
+ - Explicit parameters
13
+ - No dependencies
14
+ - Direct SMILES or common molecule names
15
+
16
+ **Example Queries**:
17
+ - "Calculate the pKa of phenol"
18
+ - "Optimize water geometry with GFN2-xTB"
19
+ - "Find conformers of ethanol"
20
+
21
+ ### Tier 2: Parameter Interpretation
22
+ **Tests**: Natural language to parameter mapping, molecule name resolution
23
+ **Characteristics**:
24
+ - Requires interpreting descriptive terms into API parameters
25
+ - Mode selection (rapid/careful/meticulous)
26
+ - Element specification by name vs atomic number
27
+ - Common name to SMILES conversion
28
+
29
+ **Example Queries**:
30
+ - "Calculate the oxidation potential of caffeine using careful mode"
31
+ - "Find the pKa of aspirin, only considering oxygen atoms"
32
+ - "Dock ibuprofen to CDK2 without optimization"
33
+
34
+ ### Tier 3: Batch Operations
35
+ **Tests**: Multiple independent calculations, result organization
36
+ **Characteristics**:
37
+ - Multiple molecules or methods
38
+ - Parallel workflow submission
39
+ - Result comparison/aggregation
40
+ - Folder organization
41
+
42
+ **Example Queries**:
43
+ - "Calculate pKa for phenol, p-nitrophenol, and p-chlorophenol"
44
+ - "Optimize butane with GFN2-xTB, UMA, and R2SCAN-3c methods"
45
+ - "Screen 5 molecules for docking against CDK2"
46
+
47
+ ### Tier 4: Workflow Chaining
48
+ **Tests**: Sequential dependent calculations, data extraction from results
49
+ **Characteristics**:
50
+ - Output from one workflow feeds into next
51
+ - Requires waiting for completion
52
+ - UUID and result extraction
53
+ - Proper async handling
54
+
55
+ **Example Queries**:
56
+ - "Find conformers of benzophenone, then calculate redox potential for top 3"
57
+ - "Optimize this transition state, then run IRC from the result"
58
+ - "Calculate pKa, then run conformer search at the predicted pKa value"
59
+
60
+ ### Tier 5: Conditional Logic
61
+ **Tests**: Decision-making based on results, complex multi-step analysis
62
+ **Characteristics**:
63
+ - Conditional branching based on results
64
+ - Threshold-based decisions
65
+ - Error handling and retries
66
+ - Statistical analysis of results
67
+
68
+ **Example Queries**:
69
+ - "Screen molecules for docking, only run detailed analysis if score < -8.0"
70
+ - "Calculate conformer energies, identify outliers (>2 kcal/mol from lowest), recalculate outliers with meticulous mode"
71
+ - "Find pKa sites, if any are between 6-8, run pH-dependent calculations at those values"
72
+
73
+ ## Scoring Criteria
74
+
75
+ ### Per Query
76
+ - **Success**: Workflow submitted correctly (1 point)
77
+ - **Parameters**: All parameters correctly mapped (1 point)
78
+ - **Completion**: Workflow completes without error (1 point)
79
+ - **Chaining**: Dependencies handled correctly (1 point, Tier 4-5 only)
80
+ - **Logic**: Conditional logic executed correctly (1 point, Tier 5 only)
81
+
82
+ ### Overall Metrics
83
+ - Success rate per tier
84
+ - Average time to completion
85
+ - Error recovery rate
86
+ - Parameter accuracy rate
@@ -0,0 +1,232 @@
1
+ """
2
+ Molecule name to SMILES converter using Chemical Identifier Resolver (CIR).
3
+ Enables natural language molecule input for Rowan workflows.
4
+ """
5
+
6
+ from typing import List, Dict, Annotated
7
+ from urllib.request import urlopen
8
+ from urllib.parse import quote
9
+ import logging
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ def molecule_lookup(
15
+ molecule_name: Annotated[str, "Common name, IUPAC name, or CAS number of molecule (e.g., 'aspirin', 'caffeine', '50-78-2')"],
16
+ fallback_to_input: Annotated[bool, "If lookup fails, return the input string assuming it might be SMILES"] = False
17
+ ) -> str:
18
+ """Convert molecule names to SMILES using Chemical Identifier Resolver (CIR).
19
+
20
+ Args:
21
+ molecule_name: Common name, IUPAC name, or CAS number of molecule (e.g., 'aspirin', 'caffeine', '50-78-2')
22
+ fallback_to_input: If lookup fails, return the input string assuming it might be SMILES
23
+
24
+ This tool enables natural language input for molecules by converting common names,
25
+ IUPAC names, CAS numbers, and other identifiers to SMILES strings that can be
26
+ used with Rowan workflows.
27
+
28
+ Supported Input Types:
29
+ - Common names: 'aspirin', 'caffeine', 'benzene', 'glucose'
30
+ - IUPAC names: '2-acetoxybenzoic acid', '1,3,7-trimethylpurine-2,6-dione'
31
+ - CAS numbers: '50-78-2' (aspirin), '58-08-2' (caffeine)
32
+ - InChI strings
33
+ - Already valid SMILES (will be validated)
34
+
35
+ Returns:
36
+ SMILES string if successful, error message if not found
37
+
38
+ Examples:
39
+ # Common drug name
40
+ result = molecule_lookup("aspirin")
41
+ # Returns: "CC(=O)Oc1ccccc1C(=O)O"
42
+
43
+ # IUPAC name
44
+ result = molecule_lookup("2-acetoxybenzoic acid")
45
+ # Returns: "CC(=O)Oc1ccccc1C(=O)O"
46
+
47
+ # CAS number
48
+ result = molecule_lookup("50-78-2")
49
+ # Returns: "CC(=O)Oc1ccccc1C(=O)O"
50
+
51
+ # Complex molecule
52
+ result = molecule_lookup("paracetamol")
53
+ # Returns: "CC(=O)Nc1ccc(O)cc1"
54
+ """
55
+ try:
56
+ # Clean input
57
+ molecule_name = molecule_name.strip()
58
+
59
+ # Check if already SMILES-like (contains typical SMILES characters)
60
+ smiles_chars = {'=', '#', '(', ')', '[', ']', '@', '+', '-'}
61
+ if any(char in molecule_name for char in smiles_chars):
62
+ logger.info(f"Input '{molecule_name}' appears to be SMILES, returning as-is")
63
+ return molecule_name
64
+
65
+ # Query CIR service
66
+ logger.info(f"Looking up molecule: {molecule_name}")
67
+ url = f'http://cactus.nci.nih.gov/chemical/structure/{quote(molecule_name)}/smiles'
68
+
69
+ response = urlopen(url, timeout=10)
70
+ smiles = response.read().decode('utf8').strip()
71
+
72
+ # CIR may return multiple SMILES for some queries, take the first one
73
+ if '\n' in smiles:
74
+ smiles = smiles.split('\n')[0]
75
+
76
+ logger.info(f"Successfully converted '{molecule_name}' to SMILES: {smiles}")
77
+ return smiles
78
+
79
+ except Exception as e:
80
+ logger.warning(f"Failed to lookup '{molecule_name}': {e}")
81
+
82
+ if fallback_to_input:
83
+ logger.info(f"Returning original input as fallback: {molecule_name}")
84
+ return molecule_name
85
+ else:
86
+ return f"Could not find SMILES for '{molecule_name}'. Please check the name or provide a valid SMILES string."
87
+
88
+
89
+ def batch_molecule_lookup(
90
+ molecule_names: Annotated[List[str], "List of molecule names to convert to SMILES"],
91
+ skip_failures: Annotated[bool, "Skip molecules that fail lookup instead of stopping"] = True
92
+ ) -> Dict[str, str]:
93
+ """Convert multiple molecule names to SMILES in batch.
94
+
95
+ Args:
96
+ molecule_names: List of molecule names to convert to SMILES
97
+ skip_failures: Skip molecules that fail lookup instead of stopping
98
+
99
+ Useful for preparing multiple molecules for workflows or screening.
100
+
101
+ Returns:
102
+ Dictionary mapping input names to SMILES strings (or error messages)
103
+
104
+ Examples:
105
+ # Drug screening set
106
+ result = batch_molecule_lookup([
107
+ "aspirin",
108
+ "ibuprofen",
109
+ "paracetamol",
110
+ "caffeine"
111
+ ])
112
+ # Returns: {
113
+ # "aspirin": "CC(=O)Oc1ccccc1C(=O)O",
114
+ # "ibuprofen": "CC(C)Cc1ccc(C(C)C(=O)O)cc1",
115
+ # "paracetamol": "CC(=O)Nc1ccc(O)cc1",
116
+ # "caffeine": "CN1C=NC2=C1C(=O)N(C(=O)N2C)C"
117
+ # }
118
+
119
+ # Mixed input types
120
+ result = batch_molecule_lookup([
121
+ "benzene", # Common name
122
+ "50-78-2", # CAS number
123
+ "ethanoic acid" # IUPAC name
124
+ ])
125
+ """
126
+ results = {}
127
+
128
+ for name in molecule_names:
129
+ try:
130
+ smiles = molecule_lookup(name, fallback_to_input=False)
131
+ results[name] = smiles
132
+ except Exception as e:
133
+ error_msg = f"Lookup failed: {str(e)}"
134
+ if skip_failures:
135
+ logger.warning(f"Skipping {name}: {error_msg}")
136
+ results[name] = error_msg
137
+ else:
138
+ raise ValueError(f"Failed to lookup '{name}': {error_msg}")
139
+
140
+ return results
141
+
142
+
143
+ def validate_smiles(
144
+ smiles: Annotated[str, "SMILES string to validate"]
145
+ ) -> Dict[str, any]:
146
+ """Validate a SMILES string and return basic molecular properties.
147
+
148
+ Args:
149
+ smiles: SMILES string to validate
150
+
151
+ Uses RDKit to validate SMILES and extract basic properties.
152
+
153
+ Returns:
154
+ Dictionary with validation status and properties if valid
155
+
156
+ Examples:
157
+ result = validate_smiles("CC(=O)O")
158
+ # Returns: {
159
+ # "valid": True,
160
+ # "canonical_smiles": "CC(=O)O",
161
+ # "molecular_formula": "C2H4O2",
162
+ # "molecular_weight": 60.05
163
+ # }
164
+ """
165
+ try:
166
+ from rdkit import Chem
167
+ from rdkit.Chem import Descriptors
168
+
169
+ mol = Chem.MolFromSmiles(smiles)
170
+
171
+ if mol is None:
172
+ return {
173
+ "valid": False,
174
+ "error": "Invalid SMILES string"
175
+ }
176
+
177
+ return {
178
+ "valid": True,
179
+ "canonical_smiles": Chem.MolToSmiles(mol),
180
+ "molecular_formula": Chem.rdMolDescriptors.CalcMolFormula(mol),
181
+ "molecular_weight": round(Descriptors.MolWt(mol), 2),
182
+ "num_atoms": mol.GetNumAtoms(),
183
+ "num_bonds": mol.GetNumBonds()
184
+ }
185
+
186
+ except ImportError:
187
+ return {
188
+ "valid": "unknown",
189
+ "error": "RDKit not available for validation"
190
+ }
191
+ except Exception as e:
192
+ return {
193
+ "valid": False,
194
+ "error": str(e)
195
+ }
196
+
197
+
198
+ # Common molecules reference (for documentation)
199
+ COMMON_MOLECULES = {
200
+ # Drugs
201
+ "aspirin": "CC(=O)Oc1ccccc1C(=O)O",
202
+ "paracetamol": "CC(=O)Nc1ccc(O)cc1",
203
+ "acetaminophen": "CC(=O)Nc1ccc(O)cc1", # Same as paracetamol
204
+ "ibuprofen": "CC(C)Cc1ccc(C(C)C(=O)O)cc1",
205
+ "caffeine": "CN1C=NC2=C1C(=O)N(C(=O)N2C)C",
206
+ "penicillin": "CC1(C)SC2C(NC(=O)Cc3ccccc3)C(=O)N2C1C(=O)O",
207
+
208
+ # Solvents
209
+ "water": "O",
210
+ "ethanol": "CCO",
211
+ "methanol": "CO",
212
+ "acetone": "CC(=O)C",
213
+ "dmso": "CS(=O)C",
214
+ "chloroform": "C(Cl)(Cl)Cl",
215
+ "benzene": "c1ccccc1",
216
+ "toluene": "Cc1ccccc1",
217
+
218
+ # Organic compounds
219
+ "glucose": "C(C1C(C(C(C(O1)O)O)O)O)O",
220
+ "acetic acid": "CC(=O)O",
221
+ "ethanoic acid": "CC(=O)O", # IUPAC for acetic acid
222
+ "phenol": "Oc1ccccc1",
223
+ "aniline": "Nc1ccccc1",
224
+ "naphthalene": "c1ccc2c(c1)cccc2",
225
+
226
+ # Amino acids
227
+ "glycine": "C(C(=O)O)N",
228
+ "alanine": "CC(C(=O)O)N",
229
+ "valine": "CC(C)C(C(=O)O)N",
230
+ "leucine": "CC(C)CC(C(=O)O)N",
231
+ "lysine": "C(CCN)CC(C(=O)O)N",
232
+ }