rowan-mcp 1.0.2__py3-none-any.whl → 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rowan-mcp might be problematic. Click here for more details.
- rowan_mcp/__init__.py +1 -1
- rowan_mcp/__main__.py +3 -5
- rowan_mcp/functions/admet.py +0 -5
- rowan_mcp/functions/bde.py +1 -8
- rowan_mcp/functions/conformers.py +1 -4
- rowan_mcp/functions/descriptors.py +1 -4
- rowan_mcp/functions/docking.py +6 -56
- rowan_mcp/functions/electronic_properties.py +1 -4
- rowan_mcp/functions/folder_management.py +1 -8
- rowan_mcp/functions/fukui.py +1 -4
- rowan_mcp/functions/hydrogen_bond_basicity.py +1 -8
- rowan_mcp/functions/multistage_opt.py +1 -4
- rowan_mcp/functions/pka.py +1 -8
- rowan_mcp/functions/redox_potential.py +2 -5
- rowan_mcp/functions/system_management.py +1 -8
- rowan_mcp/functions/tautomers.py +1 -4
- rowan_mcp/functions_v2/BENCHMARK.md +86 -0
- rowan_mcp/functions_v2/molecule_lookup.py +232 -0
- rowan_mcp/functions_v2/protein_management.py +141 -0
- rowan_mcp/functions_v2/submit_basic_calculation_workflow.py +195 -0
- rowan_mcp/functions_v2/submit_conformer_search_workflow.py +158 -0
- rowan_mcp/functions_v2/submit_descriptors_workflow.py +52 -0
- rowan_mcp/functions_v2/submit_docking_workflow.py +244 -0
- rowan_mcp/functions_v2/submit_fukui_workflow.py +114 -0
- rowan_mcp/functions_v2/submit_irc_workflow.py +58 -0
- rowan_mcp/functions_v2/submit_macropka_workflow.py +99 -0
- rowan_mcp/functions_v2/submit_pka_workflow.py +72 -0
- rowan_mcp/functions_v2/submit_protein_cofolding_workflow.py +88 -0
- rowan_mcp/functions_v2/submit_redox_potential_workflow.py +55 -0
- rowan_mcp/functions_v2/submit_scan_workflow.py +82 -0
- rowan_mcp/functions_v2/submit_solubility_workflow.py +157 -0
- rowan_mcp/functions_v2/submit_tautomer_search_workflow.py +51 -0
- rowan_mcp/functions_v2/workflow_management_v2.py +382 -0
- rowan_mcp/server.py +109 -144
- rowan_mcp/tests/basic_calculation_from_json.py +0 -0
- rowan_mcp/tests/basic_calculation_with_constraint.py +33 -0
- rowan_mcp/tests/basic_calculation_with_solvent.py +0 -0
- rowan_mcp/tests/bde.py +37 -0
- rowan_mcp/tests/benchmark_queries.md +120 -0
- rowan_mcp/tests/cofolding_screen.py +131 -0
- rowan_mcp/tests/conformer_dependent_redox.py +37 -0
- rowan_mcp/tests/conformers.py +31 -0
- rowan_mcp/tests/data.json +189 -0
- rowan_mcp/tests/docking_screen.py +157 -0
- rowan_mcp/tests/irc.py +24 -0
- rowan_mcp/tests/macropka.py +13 -0
- rowan_mcp/tests/multistage_opt.py +13 -0
- rowan_mcp/tests/optimization.py +21 -0
- rowan_mcp/tests/phenol_pka.py +36 -0
- rowan_mcp/tests/pka.py +36 -0
- rowan_mcp/tests/protein_cofolding.py +17 -0
- rowan_mcp/tests/scan.py +28 -0
- {rowan_mcp-1.0.2.dist-info → rowan_mcp-2.0.1.dist-info}/METADATA +38 -45
- rowan_mcp-2.0.1.dist-info/RECORD +69 -0
- rowan_mcp-1.0.2.dist-info/RECORD +0 -34
- {rowan_mcp-1.0.2.dist-info → rowan_mcp-2.0.1.dist-info}/WHEEL +0 -0
- {rowan_mcp-1.0.2.dist-info → rowan_mcp-2.0.1.dist-info}/entry_points.txt +0 -0
rowan_mcp/__init__.py
CHANGED
|
@@ -5,7 +5,7 @@ This package provides MCP (Model Context Protocol) server functionality
|
|
|
5
5
|
for integrating with Rowan's computational chemistry platform.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
__version__ = "1.0.
|
|
8
|
+
__version__ = "1.0.2"
|
|
9
9
|
__author__ = "Kat Yenko"
|
|
10
10
|
__description__ = "MCP server for Rowan computational chemistry platform"
|
|
11
11
|
|
rowan_mcp/__main__.py
CHANGED
|
@@ -2,13 +2,11 @@
|
|
|
2
2
|
Main entry point for Rowan MCP Server when run as a module.
|
|
3
3
|
|
|
4
4
|
Usage:
|
|
5
|
-
python -m
|
|
6
|
-
python -m
|
|
7
|
-
python -m src --http # HTTP mode
|
|
8
|
-
python -m src --help # Show help
|
|
5
|
+
python -m rowan_mcp # HTTP/SSE mode
|
|
6
|
+
python -m rowan_mcp --help # Show help
|
|
9
7
|
"""
|
|
10
8
|
|
|
11
9
|
if __name__ == "__main__":
|
|
12
|
-
#
|
|
10
|
+
# HTTP transport only
|
|
13
11
|
from .server import main
|
|
14
12
|
main()
|
rowan_mcp/functions/admet.py
CHANGED
|
@@ -16,11 +16,6 @@ except ImportError:
|
|
|
16
16
|
# Setup logging
|
|
17
17
|
logger = logging.getLogger(__name__)
|
|
18
18
|
|
|
19
|
-
# Setup API key
|
|
20
|
-
api_key = os.getenv("ROWAN_API_KEY")
|
|
21
|
-
if rowan and api_key:
|
|
22
|
-
rowan.api_key = api_key
|
|
23
|
-
|
|
24
19
|
|
|
25
20
|
def log_rowan_api_call(workflow_type: str, **kwargs):
|
|
26
21
|
"""Log Rowan API calls with detailed parameters."""
|
rowan_mcp/functions/bde.py
CHANGED
|
@@ -10,14 +10,7 @@ from typing import Optional, List, Union
|
|
|
10
10
|
import logging
|
|
11
11
|
logger = logging.getLogger(__name__)
|
|
12
12
|
|
|
13
|
-
|
|
14
|
-
if not hasattr(rowan, 'api_key') or not rowan.api_key:
|
|
15
|
-
api_key = os.getenv("ROWAN_API_KEY")
|
|
16
|
-
if api_key:
|
|
17
|
-
rowan.api_key = api_key
|
|
18
|
-
logger.info("🔑 Rowan API key configured")
|
|
19
|
-
else:
|
|
20
|
-
logger.error("No ROWAN_API_KEY found in environment")
|
|
13
|
+
|
|
21
14
|
|
|
22
15
|
def log_rowan_api_call(workflow_type: str, **kwargs):
|
|
23
16
|
"""Log Rowan API calls and let Rowan handle its own errors."""
|
|
@@ -15,10 +15,7 @@ except ImportError:
|
|
|
15
15
|
# Setup logging
|
|
16
16
|
logger = logging.getLogger(__name__)
|
|
17
17
|
|
|
18
|
-
|
|
19
|
-
api_key = os.getenv("ROWAN_API_KEY")
|
|
20
|
-
if rowan and api_key:
|
|
21
|
-
rowan.api_key = api_key
|
|
18
|
+
|
|
22
19
|
|
|
23
20
|
def log_rowan_api_call(workflow_type: str, **kwargs):
|
|
24
21
|
"""Log Rowan API calls and let Rowan handle its own errors."""
|
|
@@ -15,10 +15,7 @@ except ImportError:
|
|
|
15
15
|
# Setup logging
|
|
16
16
|
logger = logging.getLogger(__name__)
|
|
17
17
|
|
|
18
|
-
|
|
19
|
-
api_key = os.getenv("ROWAN_API_KEY")
|
|
20
|
-
if rowan and api_key:
|
|
21
|
-
rowan.api_key = api_key
|
|
18
|
+
|
|
22
19
|
|
|
23
20
|
def log_rowan_api_call(workflow_type: str, **kwargs):
|
|
24
21
|
"""Log Rowan API calls with detailed parameters."""
|
rowan_mcp/functions/docking.py
CHANGED
|
@@ -192,63 +192,13 @@ def rowan_docking(
|
|
|
192
192
|
if conformers is not None:
|
|
193
193
|
compute_params["conformers"] = conformers
|
|
194
194
|
|
|
195
|
-
# Submit docking calculation
|
|
195
|
+
# Submit docking calculation and return raw result
|
|
196
196
|
result = rowan.compute(**compute_params)
|
|
197
|
-
|
|
198
|
-
# Format results
|
|
199
|
-
uuid = result.get('uuid', 'N/A')
|
|
200
|
-
status = result.get('status', 'unknown')
|
|
201
|
-
|
|
202
|
-
if blocking:
|
|
203
|
-
# Blocking mode - check if successful
|
|
204
|
-
if status == "success":
|
|
205
|
-
formatted = f"✅ Docking calculation '{name}' completed successfully!\n"
|
|
206
|
-
formatted += f"🔖 Workflow UUID: {uuid}\n"
|
|
207
|
-
formatted += f"📊 Status: {status}\n\n"
|
|
208
|
-
|
|
209
|
-
# Extract docking results if available
|
|
210
|
-
object_data = result.get("object_data", {})
|
|
211
|
-
scores = object_data.get("scores", [])
|
|
212
|
-
|
|
213
|
-
if scores:
|
|
214
|
-
formatted += f"🎯 Docking Results: {len(scores)} poses generated\n"
|
|
215
|
-
formatted += f"📈 Best docking score: {scores[0] if scores else 'N/A'}\n"
|
|
216
|
-
|
|
217
|
-
# Show top poses
|
|
218
|
-
formatted += "\nTop poses:\n"
|
|
219
|
-
for i, score in enumerate(scores[:5]):
|
|
220
|
-
formatted += f" {i+1}. Score: {score}\n"
|
|
221
|
-
|
|
222
|
-
if len(scores) > 5:
|
|
223
|
-
formatted += f" ... and {len(scores) - 5} more poses\n"
|
|
224
|
-
else:
|
|
225
|
-
formatted += "📈 Results: Check workflow details for docking data\n"
|
|
226
|
-
|
|
227
|
-
return formatted
|
|
228
|
-
else:
|
|
229
|
-
# Failed calculation
|
|
230
|
-
return f"❌ Docking calculation failed\n🔖 UUID: {uuid}\n📋 Status: {status}\n💬 Check workflow details for more information"
|
|
231
|
-
else:
|
|
232
|
-
# Non-blocking mode
|
|
233
|
-
formatted = f"📋 Docking calculation '{name}' submitted!\n"
|
|
234
|
-
formatted += f"🔖 Workflow UUID: {uuid}\n"
|
|
235
|
-
formatted += f"⏳ Status: Running...\n"
|
|
236
|
-
formatted += f"💡 Use rowan_workflow_management to check status\n\n"
|
|
237
|
-
|
|
238
|
-
formatted += f"Docking Details:\n"
|
|
239
|
-
formatted += f"🧬 Ligand: {initial_molecule}\n"
|
|
240
|
-
formatted += f"🎯 Target: {target_uuid or target[:50] + '...' if target and len(target) > 50 else target}\n"
|
|
241
|
-
formatted += f"📍 Pocket: center={pocket[0]}, size={pocket[1]}\n"
|
|
242
|
-
formatted += f"⚙️ Settings: csearch={do_csearch}, optimize={do_optimization}, refine={do_pose_refinement}\n"
|
|
243
|
-
|
|
244
|
-
if conformers:
|
|
245
|
-
formatted += f"🔬 Pre-optimized conformers: {len(conformers)}\n"
|
|
246
|
-
|
|
247
|
-
return formatted
|
|
197
|
+
return result
|
|
248
198
|
|
|
249
199
|
except Exception as e:
|
|
250
200
|
logger.error(f"Error in rowan_docking: {str(e)}")
|
|
251
|
-
return f"
|
|
201
|
+
return f"Docking calculation failed: {str(e)}"
|
|
252
202
|
|
|
253
203
|
def rowan_docking_pdb_id(
|
|
254
204
|
name: str,
|
|
@@ -297,7 +247,7 @@ def rowan_docking_pdb_id(
|
|
|
297
247
|
else:
|
|
298
248
|
ligand_param = f" smiles={smiles},\n"
|
|
299
249
|
|
|
300
|
-
return (f"
|
|
250
|
+
return (f"PDB {pdb_id} found in RCSB database!\n\n"
|
|
301
251
|
f"To perform docking with this protein:\n\n"
|
|
302
252
|
f"1. Go to https://labs.rowansci.com\n"
|
|
303
253
|
f"2. Upload the PDB file for {pdb_id}\n"
|
|
@@ -329,11 +279,11 @@ def test_rowan_docking():
|
|
|
329
279
|
pocket=((0.0, 0.0, 0.0), (20.0, 20.0, 20.0)),
|
|
330
280
|
blocking=False
|
|
331
281
|
)
|
|
332
|
-
print("
|
|
282
|
+
print("Docking test result:")
|
|
333
283
|
print(result)
|
|
334
284
|
return True
|
|
335
285
|
except Exception as e:
|
|
336
|
-
print(f"
|
|
286
|
+
print(f"Docking test failed: {e}")
|
|
337
287
|
return False
|
|
338
288
|
|
|
339
289
|
if __name__ == "__main__":
|
|
@@ -22,10 +22,7 @@ except ImportError:
|
|
|
22
22
|
logging.basicConfig(level=logging.INFO)
|
|
23
23
|
logger = logging.getLogger(__name__)
|
|
24
24
|
|
|
25
|
-
|
|
26
|
-
api_key = os.getenv("ROWAN_API_KEY")
|
|
27
|
-
if api_key and rowan:
|
|
28
|
-
rowan.api_key = api_key
|
|
25
|
+
|
|
29
26
|
|
|
30
27
|
def log_rowan_api_call(workflow_type: str, **kwargs):
|
|
31
28
|
"""Log Rowan API calls with detailed parameters."""
|
|
@@ -10,14 +10,7 @@ from typing import Optional
|
|
|
10
10
|
import logging
|
|
11
11
|
logger = logging.getLogger(__name__)
|
|
12
12
|
|
|
13
|
-
|
|
14
|
-
if not hasattr(rowan, 'api_key') or not rowan.api_key:
|
|
15
|
-
api_key = os.getenv("ROWAN_API_KEY")
|
|
16
|
-
if api_key:
|
|
17
|
-
rowan.api_key = api_key
|
|
18
|
-
logger.info("Rowan API key configured")
|
|
19
|
-
else:
|
|
20
|
-
logger.error("No ROWAN_API_KEY found in environment")
|
|
13
|
+
|
|
21
14
|
|
|
22
15
|
def rowan_folder_management(
|
|
23
16
|
action: str,
|
rowan_mcp/functions/fukui.py
CHANGED
|
@@ -22,10 +22,7 @@ except ImportError:
|
|
|
22
22
|
logging.basicConfig(level=logging.INFO)
|
|
23
23
|
logger = logging.getLogger(__name__)
|
|
24
24
|
|
|
25
|
-
|
|
26
|
-
api_key = os.getenv("ROWAN_API_KEY")
|
|
27
|
-
if api_key and rowan:
|
|
28
|
-
rowan.api_key = api_key
|
|
25
|
+
|
|
29
26
|
|
|
30
27
|
def log_rowan_api_call(workflow_type: str, **kwargs):
|
|
31
28
|
"""Log Rowan API calls with detailed parameters."""
|
|
@@ -10,14 +10,7 @@ from typing import Optional
|
|
|
10
10
|
import logging
|
|
11
11
|
logger = logging.getLogger(__name__)
|
|
12
12
|
|
|
13
|
-
|
|
14
|
-
if not hasattr(rowan, 'api_key') or not rowan.api_key:
|
|
15
|
-
api_key = os.getenv("ROWAN_API_KEY")
|
|
16
|
-
if api_key:
|
|
17
|
-
rowan.api_key = api_key
|
|
18
|
-
logger.info("🔑 Rowan API key configured")
|
|
19
|
-
else:
|
|
20
|
-
logger.error("No ROWAN_API_KEY found in environment")
|
|
13
|
+
|
|
21
14
|
|
|
22
15
|
def log_rowan_api_call(workflow_type: str, **kwargs):
|
|
23
16
|
"""Log Rowan API calls and let Rowan handle its own errors."""
|
|
@@ -15,10 +15,7 @@ except ImportError:
|
|
|
15
15
|
# Setup logging
|
|
16
16
|
logger = logging.getLogger(__name__)
|
|
17
17
|
|
|
18
|
-
|
|
19
|
-
api_key = os.getenv("ROWAN_API_KEY")
|
|
20
|
-
if rowan and api_key:
|
|
21
|
-
rowan.api_key = api_key
|
|
18
|
+
|
|
22
19
|
|
|
23
20
|
def log_rowan_api_call(workflow_type: str, **kwargs):
|
|
24
21
|
"""Log Rowan API calls with detailed parameters."""
|
rowan_mcp/functions/pka.py
CHANGED
|
@@ -10,14 +10,7 @@ from typing import Optional
|
|
|
10
10
|
import logging
|
|
11
11
|
logger = logging.getLogger(__name__)
|
|
12
12
|
|
|
13
|
-
|
|
14
|
-
if not hasattr(rowan, 'api_key') or not rowan.api_key:
|
|
15
|
-
api_key = os.getenv("ROWAN_API_KEY")
|
|
16
|
-
if api_key:
|
|
17
|
-
rowan.api_key = api_key
|
|
18
|
-
logger.info("Rowan API key configured")
|
|
19
|
-
else:
|
|
20
|
-
logger.error("No ROWAN_API_KEY found in environment")
|
|
13
|
+
|
|
21
14
|
|
|
22
15
|
def log_rowan_api_call(workflow_type: str, **kwargs):
|
|
23
16
|
"""Log Rowan API calls and let Rowan handle its own errors."""
|
|
@@ -15,10 +15,7 @@ except ImportError:
|
|
|
15
15
|
# Setup logging
|
|
16
16
|
logger = logging.getLogger(__name__)
|
|
17
17
|
|
|
18
|
-
|
|
19
|
-
api_key = os.getenv("ROWAN_API_KEY")
|
|
20
|
-
if rowan and api_key:
|
|
21
|
-
rowan.api_key = api_key
|
|
18
|
+
|
|
22
19
|
|
|
23
20
|
def lookup_molecule_smiles(molecule_name: str) -> str:
|
|
24
21
|
"""Look up canonical SMILES for common molecule names."""
|
|
@@ -287,7 +284,7 @@ def rowan_redox_potential(
|
|
|
287
284
|
formatted += f" Job UUID: {result.get('uuid', 'N/A')}\n"
|
|
288
285
|
formatted += f" Status: {status}\n"
|
|
289
286
|
formatted += f"⚙ Mode: {mode_lower.title()}\n"
|
|
290
|
-
formatted += f"
|
|
287
|
+
formatted += f"Solvent: Acetonitrile\n"
|
|
291
288
|
|
|
292
289
|
# Show which potentials were calculated
|
|
293
290
|
calc_types = []
|
|
@@ -11,14 +11,7 @@ from typing import Optional
|
|
|
11
11
|
# Set up logging
|
|
12
12
|
logger = logging.getLogger(__name__)
|
|
13
13
|
|
|
14
|
-
|
|
15
|
-
if not hasattr(rowan, 'api_key') or not rowan.api_key:
|
|
16
|
-
api_key = os.getenv("ROWAN_API_KEY")
|
|
17
|
-
if api_key:
|
|
18
|
-
rowan.api_key = api_key
|
|
19
|
-
logger.info("Rowan API key configured")
|
|
20
|
-
else:
|
|
21
|
-
logger.error("No ROWAN_API_KEY found in environment")
|
|
14
|
+
|
|
22
15
|
|
|
23
16
|
def rowan_system_management(
|
|
24
17
|
action: str,
|
rowan_mcp/functions/tautomers.py
CHANGED
|
@@ -15,10 +15,7 @@ except ImportError:
|
|
|
15
15
|
# Setup logging
|
|
16
16
|
logger = logging.getLogger(__name__)
|
|
17
17
|
|
|
18
|
-
|
|
19
|
-
api_key = os.getenv("ROWAN_API_KEY")
|
|
20
|
-
if rowan and api_key:
|
|
21
|
-
rowan.api_key = api_key
|
|
18
|
+
|
|
22
19
|
|
|
23
20
|
def log_rowan_api_call(workflow_type: str, **kwargs):
|
|
24
21
|
"""Log Rowan API calls with detailed parameters."""
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# Rowan MCP Benchmark Suite
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
Systematic evaluation of the Rowan MCP server's ability to handle chemistry workflows through natural language queries.
|
|
5
|
+
|
|
6
|
+
## Evaluation Tiers
|
|
7
|
+
|
|
8
|
+
### Tier 1: Single Tool Calls
|
|
9
|
+
**Tests**: Basic tool invocation and parameter passing
|
|
10
|
+
**Characteristics**:
|
|
11
|
+
- Single workflow submission
|
|
12
|
+
- Explicit parameters
|
|
13
|
+
- No dependencies
|
|
14
|
+
- Direct SMILES or common molecule names
|
|
15
|
+
|
|
16
|
+
**Example Queries**:
|
|
17
|
+
- "Calculate the pKa of phenol"
|
|
18
|
+
- "Optimize water geometry with GFN2-xTB"
|
|
19
|
+
- "Find conformers of ethanol"
|
|
20
|
+
|
|
21
|
+
### Tier 2: Parameter Interpretation
|
|
22
|
+
**Tests**: Natural language to parameter mapping, molecule name resolution
|
|
23
|
+
**Characteristics**:
|
|
24
|
+
- Requires interpreting descriptive terms into API parameters
|
|
25
|
+
- Mode selection (rapid/careful/meticulous)
|
|
26
|
+
- Element specification by name vs atomic number
|
|
27
|
+
- Common name to SMILES conversion
|
|
28
|
+
|
|
29
|
+
**Example Queries**:
|
|
30
|
+
- "Calculate the oxidation potential of caffeine using careful mode"
|
|
31
|
+
- "Find the pKa of aspirin, only considering oxygen atoms"
|
|
32
|
+
- "Dock ibuprofen to CDK2 without optimization"
|
|
33
|
+
|
|
34
|
+
### Tier 3: Batch Operations
|
|
35
|
+
**Tests**: Multiple independent calculations, result organization
|
|
36
|
+
**Characteristics**:
|
|
37
|
+
- Multiple molecules or methods
|
|
38
|
+
- Parallel workflow submission
|
|
39
|
+
- Result comparison/aggregation
|
|
40
|
+
- Folder organization
|
|
41
|
+
|
|
42
|
+
**Example Queries**:
|
|
43
|
+
- "Calculate pKa for phenol, p-nitrophenol, and p-chlorophenol"
|
|
44
|
+
- "Optimize butane with GFN2-xTB, UMA, and R2SCAN-3c methods"
|
|
45
|
+
- "Screen 5 molecules for docking against CDK2"
|
|
46
|
+
|
|
47
|
+
### Tier 4: Workflow Chaining
|
|
48
|
+
**Tests**: Sequential dependent calculations, data extraction from results
|
|
49
|
+
**Characteristics**:
|
|
50
|
+
- Output from one workflow feeds into next
|
|
51
|
+
- Requires waiting for completion
|
|
52
|
+
- UUID and result extraction
|
|
53
|
+
- Proper async handling
|
|
54
|
+
|
|
55
|
+
**Example Queries**:
|
|
56
|
+
- "Find conformers of benzophenone, then calculate redox potential for top 3"
|
|
57
|
+
- "Optimize this transition state, then run IRC from the result"
|
|
58
|
+
- "Calculate pKa, then run conformer search at the predicted pKa value"
|
|
59
|
+
|
|
60
|
+
### Tier 5: Conditional Logic
|
|
61
|
+
**Tests**: Decision-making based on results, complex multi-step analysis
|
|
62
|
+
**Characteristics**:
|
|
63
|
+
- Conditional branching based on results
|
|
64
|
+
- Threshold-based decisions
|
|
65
|
+
- Error handling and retries
|
|
66
|
+
- Statistical analysis of results
|
|
67
|
+
|
|
68
|
+
**Example Queries**:
|
|
69
|
+
- "Screen molecules for docking, only run detailed analysis if score < -8.0"
|
|
70
|
+
- "Calculate conformer energies, identify outliers (>2 kcal/mol from lowest), recalculate outliers with meticulous mode"
|
|
71
|
+
- "Find pKa sites, if any are between 6-8, run pH-dependent calculations at those values"
|
|
72
|
+
|
|
73
|
+
## Scoring Criteria
|
|
74
|
+
|
|
75
|
+
### Per Query
|
|
76
|
+
- **Success**: Workflow submitted correctly (1 point)
|
|
77
|
+
- **Parameters**: All parameters correctly mapped (1 point)
|
|
78
|
+
- **Completion**: Workflow completes without error (1 point)
|
|
79
|
+
- **Chaining**: Dependencies handled correctly (1 point, Tier 4-5 only)
|
|
80
|
+
- **Logic**: Conditional logic executed correctly (1 point, Tier 5 only)
|
|
81
|
+
|
|
82
|
+
### Overall Metrics
|
|
83
|
+
- Success rate per tier
|
|
84
|
+
- Average time to completion
|
|
85
|
+
- Error recovery rate
|
|
86
|
+
- Parameter accuracy rate
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Molecule name to SMILES converter using Chemical Identifier Resolver (CIR).
|
|
3
|
+
Enables natural language molecule input for Rowan workflows.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import List, Dict, Annotated
|
|
7
|
+
from urllib.request import urlopen
|
|
8
|
+
from urllib.parse import quote
|
|
9
|
+
import logging
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def molecule_lookup(
|
|
15
|
+
molecule_name: Annotated[str, "Common name, IUPAC name, or CAS number of molecule (e.g., 'aspirin', 'caffeine', '50-78-2')"],
|
|
16
|
+
fallback_to_input: Annotated[bool, "If lookup fails, return the input string assuming it might be SMILES"] = False
|
|
17
|
+
) -> str:
|
|
18
|
+
"""Convert molecule names to SMILES using Chemical Identifier Resolver (CIR).
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
molecule_name: Common name, IUPAC name, or CAS number of molecule (e.g., 'aspirin', 'caffeine', '50-78-2')
|
|
22
|
+
fallback_to_input: If lookup fails, return the input string assuming it might be SMILES
|
|
23
|
+
|
|
24
|
+
This tool enables natural language input for molecules by converting common names,
|
|
25
|
+
IUPAC names, CAS numbers, and other identifiers to SMILES strings that can be
|
|
26
|
+
used with Rowan workflows.
|
|
27
|
+
|
|
28
|
+
Supported Input Types:
|
|
29
|
+
- Common names: 'aspirin', 'caffeine', 'benzene', 'glucose'
|
|
30
|
+
- IUPAC names: '2-acetoxybenzoic acid', '1,3,7-trimethylpurine-2,6-dione'
|
|
31
|
+
- CAS numbers: '50-78-2' (aspirin), '58-08-2' (caffeine)
|
|
32
|
+
- InChI strings
|
|
33
|
+
- Already valid SMILES (will be validated)
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
SMILES string if successful, error message if not found
|
|
37
|
+
|
|
38
|
+
Examples:
|
|
39
|
+
# Common drug name
|
|
40
|
+
result = molecule_lookup("aspirin")
|
|
41
|
+
# Returns: "CC(=O)Oc1ccccc1C(=O)O"
|
|
42
|
+
|
|
43
|
+
# IUPAC name
|
|
44
|
+
result = molecule_lookup("2-acetoxybenzoic acid")
|
|
45
|
+
# Returns: "CC(=O)Oc1ccccc1C(=O)O"
|
|
46
|
+
|
|
47
|
+
# CAS number
|
|
48
|
+
result = molecule_lookup("50-78-2")
|
|
49
|
+
# Returns: "CC(=O)Oc1ccccc1C(=O)O"
|
|
50
|
+
|
|
51
|
+
# Complex molecule
|
|
52
|
+
result = molecule_lookup("paracetamol")
|
|
53
|
+
# Returns: "CC(=O)Nc1ccc(O)cc1"
|
|
54
|
+
"""
|
|
55
|
+
try:
|
|
56
|
+
# Clean input
|
|
57
|
+
molecule_name = molecule_name.strip()
|
|
58
|
+
|
|
59
|
+
# Check if already SMILES-like (contains typical SMILES characters)
|
|
60
|
+
smiles_chars = {'=', '#', '(', ')', '[', ']', '@', '+', '-'}
|
|
61
|
+
if any(char in molecule_name for char in smiles_chars):
|
|
62
|
+
logger.info(f"Input '{molecule_name}' appears to be SMILES, returning as-is")
|
|
63
|
+
return molecule_name
|
|
64
|
+
|
|
65
|
+
# Query CIR service
|
|
66
|
+
logger.info(f"Looking up molecule: {molecule_name}")
|
|
67
|
+
url = f'http://cactus.nci.nih.gov/chemical/structure/{quote(molecule_name)}/smiles'
|
|
68
|
+
|
|
69
|
+
response = urlopen(url, timeout=10)
|
|
70
|
+
smiles = response.read().decode('utf8').strip()
|
|
71
|
+
|
|
72
|
+
# CIR may return multiple SMILES for some queries, take the first one
|
|
73
|
+
if '\n' in smiles:
|
|
74
|
+
smiles = smiles.split('\n')[0]
|
|
75
|
+
|
|
76
|
+
logger.info(f"Successfully converted '{molecule_name}' to SMILES: {smiles}")
|
|
77
|
+
return smiles
|
|
78
|
+
|
|
79
|
+
except Exception as e:
|
|
80
|
+
logger.warning(f"Failed to lookup '{molecule_name}': {e}")
|
|
81
|
+
|
|
82
|
+
if fallback_to_input:
|
|
83
|
+
logger.info(f"Returning original input as fallback: {molecule_name}")
|
|
84
|
+
return molecule_name
|
|
85
|
+
else:
|
|
86
|
+
return f"Could not find SMILES for '{molecule_name}'. Please check the name or provide a valid SMILES string."
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def batch_molecule_lookup(
|
|
90
|
+
molecule_names: Annotated[List[str], "List of molecule names to convert to SMILES"],
|
|
91
|
+
skip_failures: Annotated[bool, "Skip molecules that fail lookup instead of stopping"] = True
|
|
92
|
+
) -> Dict[str, str]:
|
|
93
|
+
"""Convert multiple molecule names to SMILES in batch.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
molecule_names: List of molecule names to convert to SMILES
|
|
97
|
+
skip_failures: Skip molecules that fail lookup instead of stopping
|
|
98
|
+
|
|
99
|
+
Useful for preparing multiple molecules for workflows or screening.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
Dictionary mapping input names to SMILES strings (or error messages)
|
|
103
|
+
|
|
104
|
+
Examples:
|
|
105
|
+
# Drug screening set
|
|
106
|
+
result = batch_molecule_lookup([
|
|
107
|
+
"aspirin",
|
|
108
|
+
"ibuprofen",
|
|
109
|
+
"paracetamol",
|
|
110
|
+
"caffeine"
|
|
111
|
+
])
|
|
112
|
+
# Returns: {
|
|
113
|
+
# "aspirin": "CC(=O)Oc1ccccc1C(=O)O",
|
|
114
|
+
# "ibuprofen": "CC(C)Cc1ccc(C(C)C(=O)O)cc1",
|
|
115
|
+
# "paracetamol": "CC(=O)Nc1ccc(O)cc1",
|
|
116
|
+
# "caffeine": "CN1C=NC2=C1C(=O)N(C(=O)N2C)C"
|
|
117
|
+
# }
|
|
118
|
+
|
|
119
|
+
# Mixed input types
|
|
120
|
+
result = batch_molecule_lookup([
|
|
121
|
+
"benzene", # Common name
|
|
122
|
+
"50-78-2", # CAS number
|
|
123
|
+
"ethanoic acid" # IUPAC name
|
|
124
|
+
])
|
|
125
|
+
"""
|
|
126
|
+
results = {}
|
|
127
|
+
|
|
128
|
+
for name in molecule_names:
|
|
129
|
+
try:
|
|
130
|
+
smiles = molecule_lookup(name, fallback_to_input=False)
|
|
131
|
+
results[name] = smiles
|
|
132
|
+
except Exception as e:
|
|
133
|
+
error_msg = f"Lookup failed: {str(e)}"
|
|
134
|
+
if skip_failures:
|
|
135
|
+
logger.warning(f"Skipping {name}: {error_msg}")
|
|
136
|
+
results[name] = error_msg
|
|
137
|
+
else:
|
|
138
|
+
raise ValueError(f"Failed to lookup '{name}': {error_msg}")
|
|
139
|
+
|
|
140
|
+
return results
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def validate_smiles(
|
|
144
|
+
smiles: Annotated[str, "SMILES string to validate"]
|
|
145
|
+
) -> Dict[str, any]:
|
|
146
|
+
"""Validate a SMILES string and return basic molecular properties.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
smiles: SMILES string to validate
|
|
150
|
+
|
|
151
|
+
Uses RDKit to validate SMILES and extract basic properties.
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
Dictionary with validation status and properties if valid
|
|
155
|
+
|
|
156
|
+
Examples:
|
|
157
|
+
result = validate_smiles("CC(=O)O")
|
|
158
|
+
# Returns: {
|
|
159
|
+
# "valid": True,
|
|
160
|
+
# "canonical_smiles": "CC(=O)O",
|
|
161
|
+
# "molecular_formula": "C2H4O2",
|
|
162
|
+
# "molecular_weight": 60.05
|
|
163
|
+
# }
|
|
164
|
+
"""
|
|
165
|
+
try:
|
|
166
|
+
from rdkit import Chem
|
|
167
|
+
from rdkit.Chem import Descriptors
|
|
168
|
+
|
|
169
|
+
mol = Chem.MolFromSmiles(smiles)
|
|
170
|
+
|
|
171
|
+
if mol is None:
|
|
172
|
+
return {
|
|
173
|
+
"valid": False,
|
|
174
|
+
"error": "Invalid SMILES string"
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
return {
|
|
178
|
+
"valid": True,
|
|
179
|
+
"canonical_smiles": Chem.MolToSmiles(mol),
|
|
180
|
+
"molecular_formula": Chem.rdMolDescriptors.CalcMolFormula(mol),
|
|
181
|
+
"molecular_weight": round(Descriptors.MolWt(mol), 2),
|
|
182
|
+
"num_atoms": mol.GetNumAtoms(),
|
|
183
|
+
"num_bonds": mol.GetNumBonds()
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
except ImportError:
|
|
187
|
+
return {
|
|
188
|
+
"valid": "unknown",
|
|
189
|
+
"error": "RDKit not available for validation"
|
|
190
|
+
}
|
|
191
|
+
except Exception as e:
|
|
192
|
+
return {
|
|
193
|
+
"valid": False,
|
|
194
|
+
"error": str(e)
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
# Common molecules reference (for documentation)
|
|
199
|
+
COMMON_MOLECULES = {
|
|
200
|
+
# Drugs
|
|
201
|
+
"aspirin": "CC(=O)Oc1ccccc1C(=O)O",
|
|
202
|
+
"paracetamol": "CC(=O)Nc1ccc(O)cc1",
|
|
203
|
+
"acetaminophen": "CC(=O)Nc1ccc(O)cc1", # Same as paracetamol
|
|
204
|
+
"ibuprofen": "CC(C)Cc1ccc(C(C)C(=O)O)cc1",
|
|
205
|
+
"caffeine": "CN1C=NC2=C1C(=O)N(C(=O)N2C)C",
|
|
206
|
+
"penicillin": "CC1(C)SC2C(NC(=O)Cc3ccccc3)C(=O)N2C1C(=O)O",
|
|
207
|
+
|
|
208
|
+
# Solvents
|
|
209
|
+
"water": "O",
|
|
210
|
+
"ethanol": "CCO",
|
|
211
|
+
"methanol": "CO",
|
|
212
|
+
"acetone": "CC(=O)C",
|
|
213
|
+
"dmso": "CS(=O)C",
|
|
214
|
+
"chloroform": "C(Cl)(Cl)Cl",
|
|
215
|
+
"benzene": "c1ccccc1",
|
|
216
|
+
"toluene": "Cc1ccccc1",
|
|
217
|
+
|
|
218
|
+
# Organic compounds
|
|
219
|
+
"glucose": "C(C1C(C(C(C(O1)O)O)O)O)O",
|
|
220
|
+
"acetic acid": "CC(=O)O",
|
|
221
|
+
"ethanoic acid": "CC(=O)O", # IUPAC for acetic acid
|
|
222
|
+
"phenol": "Oc1ccccc1",
|
|
223
|
+
"aniline": "Nc1ccccc1",
|
|
224
|
+
"naphthalene": "c1ccc2c(c1)cccc2",
|
|
225
|
+
|
|
226
|
+
# Amino acids
|
|
227
|
+
"glycine": "C(C(=O)O)N",
|
|
228
|
+
"alanine": "CC(C(=O)O)N",
|
|
229
|
+
"valine": "CC(C)C(C(=O)O)N",
|
|
230
|
+
"leucine": "CC(C)CC(C(=O)O)N",
|
|
231
|
+
"lysine": "C(CCN)CC(C(=O)O)N",
|
|
232
|
+
}
|