cdxml-toolkit 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cdxml_toolkit/__init__.py +18 -0
- cdxml_toolkit/_jre/__init__.py +2 -0
- cdxml_toolkit/_jre/temurin-21-jre-win-x64.zip +0 -0
- cdxml_toolkit/analysis/__init__.py +35 -0
- cdxml_toolkit/analysis/deterministic/__init__.py +12 -0
- cdxml_toolkit/analysis/deterministic/discover_experiment_files.py +413 -0
- cdxml_toolkit/analysis/deterministic/lab_book_formatter.py +701 -0
- cdxml_toolkit/analysis/deterministic/lcms_file_categorizer.py +928 -0
- cdxml_toolkit/analysis/deterministic/lcms_identifier.py +598 -0
- cdxml_toolkit/analysis/deterministic/mass_resolver.py +654 -0
- cdxml_toolkit/analysis/deterministic/multi_lcms_analyzer.py +1412 -0
- cdxml_toolkit/analysis/deterministic/procedure_writer.py +446 -0
- cdxml_toolkit/analysis/extract_nmr.py +47 -0
- cdxml_toolkit/analysis/format_procedure_entry.py +479 -0
- cdxml_toolkit/analysis/lcms_analyzer.py +1299 -0
- cdxml_toolkit/analysis/parse_analysis_file.py +134 -0
- cdxml_toolkit/cdxml_builder.py +920 -0
- cdxml_toolkit/cdxml_utils.py +342 -0
- cdxml_toolkit/chemdraw/__init__.py +5 -0
- cdxml_toolkit/chemdraw/_chemscript_server.py +562 -0
- cdxml_toolkit/chemdraw/cdx_converter.py +527 -0
- cdxml_toolkit/chemdraw/cdxml_to_image.py +262 -0
- cdxml_toolkit/chemdraw/cdxml_to_image_rdkit.py +296 -0
- cdxml_toolkit/chemdraw/chemscript_bridge.py +901 -0
- cdxml_toolkit/constants.py +304 -0
- cdxml_toolkit/coord_normalizer.py +438 -0
- cdxml_toolkit/deterministic_pipeline/__init__.py +6 -0
- cdxml_toolkit/deterministic_pipeline/legacy/__init__.py +5 -0
- cdxml_toolkit/deterministic_pipeline/legacy/eln_cdx_cleanup.py +509 -0
- cdxml_toolkit/deterministic_pipeline/legacy/eln_enrichment.py +1394 -0
- cdxml_toolkit/deterministic_pipeline/legacy/scheme_aligner.py +428 -0
- cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher.py +1337 -0
- cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher_v2.py +1340 -0
- cdxml_toolkit/deterministic_pipeline/scheme_reader_audit.py +931 -0
- cdxml_toolkit/deterministic_pipeline/scheme_reader_verify.py +1160 -0
- cdxml_toolkit/image/__init__.py +15 -0
- cdxml_toolkit/image/reaction_from_image.py +2103 -0
- cdxml_toolkit/image/structure_from_image.py +1711 -0
- cdxml_toolkit/layout/__init__.py +5 -0
- cdxml_toolkit/layout/alignment.py +1642 -0
- cdxml_toolkit/layout/reaction_cleanup.py +1002 -0
- cdxml_toolkit/layout/scheme_merger.py +2260 -0
- cdxml_toolkit/mcp_server/__init__.py +0 -0
- cdxml_toolkit/mcp_server/__main__.py +5 -0
- cdxml_toolkit/mcp_server/server.py +1567 -0
- cdxml_toolkit/naming/__init__.py +6 -0
- cdxml_toolkit/naming/aligned_namer.py +2342 -0
- cdxml_toolkit/naming/mol_builder.py +3722 -0
- cdxml_toolkit/naming/name_decomposer.py +2843 -0
- cdxml_toolkit/naming/reactions_datamol.json +2414 -0
- cdxml_toolkit/office/__init__.py +5 -0
- cdxml_toolkit/office/doc_from_template.py +722 -0
- cdxml_toolkit/office/ole_embedder.py +808 -0
- cdxml_toolkit/office/ole_extractor.py +272 -0
- cdxml_toolkit/perception/__init__.py +10 -0
- cdxml_toolkit/perception/compound_search.py +229 -0
- cdxml_toolkit/perception/eln_csv_parser.py +240 -0
- cdxml_toolkit/perception/rdf_parser.py +664 -0
- cdxml_toolkit/perception/reactant_heuristic.py +1045 -0
- cdxml_toolkit/perception/reaction_parser.py +2150 -0
- cdxml_toolkit/perception/scheme_reader.py +2948 -0
- cdxml_toolkit/perception/scheme_refine.py +1404 -0
- cdxml_toolkit/perception/scheme_segmenter.py +619 -0
- cdxml_toolkit/perception/spatial_assignment.py +1013 -0
- cdxml_toolkit/rdkit_utils.py +605 -0
- cdxml_toolkit/render/__init__.py +17 -0
- cdxml_toolkit/render/auto_layout.py +229 -0
- cdxml_toolkit/render/compact_parser.py +632 -0
- cdxml_toolkit/render/parser.py +706 -0
- cdxml_toolkit/render/render_scheme.py +267 -0
- cdxml_toolkit/render/renderer.py +2387 -0
- cdxml_toolkit/render/schema.py +90 -0
- cdxml_toolkit/render/scheme_maker.py +1043 -0
- cdxml_toolkit/render/scheme_yaml_writer.py +1487 -0
- cdxml_toolkit/resolve/__init__.py +13 -0
- cdxml_toolkit/resolve/cas_resolver.py +430 -0
- cdxml_toolkit/resolve/chemscanner_abbreviations.json +28813 -0
- cdxml_toolkit/resolve/condensed_formula.py +493 -0
- cdxml_toolkit/resolve/jre_manager.py +195 -0
- cdxml_toolkit/resolve/reagent_abbreviations.json +1046 -0
- cdxml_toolkit/resolve/reagent_db.py +285 -0
- cdxml_toolkit/resolve/superatom_data.json +2856 -0
- cdxml_toolkit/resolve/superatom_table.py +146 -0
- cdxml_toolkit/text_formatting.py +298 -0
- cdxml_toolkit-0.5.0.dist-info/METADATA +318 -0
- cdxml_toolkit-0.5.0.dist-info/RECORD +91 -0
- cdxml_toolkit-0.5.0.dist-info/WHEEL +5 -0
- cdxml_toolkit-0.5.0.dist-info/entry_points.txt +17 -0
- cdxml_toolkit-0.5.0.dist-info/licenses/LICENSE +21 -0
- cdxml_toolkit-0.5.0.dist-info/licenses/NOTICE.md +37 -0
- cdxml_toolkit-0.5.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,901 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
ChemScript Bridge — Python wrapper around PerkinElmer's ChemScript .NET library.
|
|
4
|
+
|
|
5
|
+
Provides native access to ChemDraw's chemical intelligence: format conversion,
|
|
6
|
+
name↔structure, structure cleanup, substructure search, reaction handling, and
|
|
7
|
+
more — all from Python.
|
|
8
|
+
|
|
9
|
+
Architecture:
|
|
10
|
+
ChemScript's .NET DLL is 32-bit, so we run a thin JSON-RPC server
|
|
11
|
+
(_chemscript_server.py) under a 32-bit Python environment (chemscript32)
|
|
12
|
+
and communicate via subprocess stdin/stdout.
|
|
13
|
+
|
|
14
|
+
Usage (CLI):
|
|
15
|
+
python chemscript_bridge.py convert input.cdx output.cdxml
|
|
16
|
+
python chemscript_bridge.py name2struct "morpholine" -o output.cdxml
|
|
17
|
+
python chemscript_bridge.py smiles2struct "C1COCCN1" -o morpholine.cdxml
|
|
18
|
+
python chemscript_bridge.py cleanup messy.cdxml -o clean.cdxml
|
|
19
|
+
python chemscript_bridge.py info structure.cdx
|
|
20
|
+
python chemscript_bridge.py search --target target.cdx --query query.cdx
|
|
21
|
+
python chemscript_bridge.py reaction input.cdx --list
|
|
22
|
+
python chemscript_bridge.py lcs mol1.cdx mol2.cdx
|
|
23
|
+
|
|
24
|
+
Python API:
|
|
25
|
+
from cdxml_toolkit.chemdraw.chemscript_bridge import ChemScriptBridge
|
|
26
|
+
cs = ChemScriptBridge()
|
|
27
|
+
cdxml = cs.name_to_cdxml("morpholine")
|
|
28
|
+
cs.convert_file("input.cdx", "output.cdxml")
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
import argparse
|
|
32
|
+
import json
|
|
33
|
+
import os
|
|
34
|
+
import re
|
|
35
|
+
import subprocess
|
|
36
|
+
import sys
|
|
37
|
+
import textwrap
|
|
38
|
+
from pathlib import Path
|
|
39
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
40
|
+
|
|
41
|
+
from ..constants import ACS_STYLE
|
|
42
|
+
|
|
43
|
+
# ---------------------------------------------------------------------------
|
|
44
|
+
# Configuration
|
|
45
|
+
# ---------------------------------------------------------------------------
|
|
46
|
+
|
|
47
|
+
CONFIG_PATH = Path.home() / ".chemscript_config.json"
|
|
48
|
+
|
|
49
|
+
# Default 32-bit Python path (chemscript32 conda env)
|
|
50
|
+
DEFAULT_PYTHON32 = None # auto-detected
|
|
51
|
+
|
|
52
|
+
# ACS Document 1996 style attributes to inject into ChemScript CDXML output.
|
|
53
|
+
# Imported from constants.py — kept as module-level alias for backward compat.
|
|
54
|
+
ACS_STYLE_ATTRS = ACS_STYLE
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _find_python32() -> Optional[str]:
|
|
58
|
+
"""Locate the 32-bit Python interpreter for the chemscript32 conda env."""
|
|
59
|
+
candidates = [
|
|
60
|
+
Path.home() / "miniconda3" / "envs" / "chemscript32" / "python.exe",
|
|
61
|
+
Path(os.environ.get("CONDA_PREFIX", "")) / ".." / "chemscript32" / "python.exe",
|
|
62
|
+
Path(os.environ.get("USERPROFILE", "")) / "miniconda3" / "envs" / "chemscript32" / "python.exe",
|
|
63
|
+
Path(os.environ.get("USERPROFILE", "")) / "Anaconda3" / "envs" / "chemscript32" / "python.exe",
|
|
64
|
+
]
|
|
65
|
+
for p in candidates:
|
|
66
|
+
resolved = p.resolve()
|
|
67
|
+
if resolved.exists():
|
|
68
|
+
return str(resolved)
|
|
69
|
+
return None
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _load_config() -> dict:
|
|
73
|
+
"""Load saved config (python32 path, DLL path, etc.)."""
|
|
74
|
+
if CONFIG_PATH.exists():
|
|
75
|
+
try:
|
|
76
|
+
return json.loads(CONFIG_PATH.read_text())
|
|
77
|
+
except (json.JSONDecodeError, OSError):
|
|
78
|
+
pass
|
|
79
|
+
return {}
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _save_config(cfg: dict):
|
|
83
|
+
"""Persist config."""
|
|
84
|
+
try:
|
|
85
|
+
CONFIG_PATH.write_text(json.dumps(cfg, indent=2))
|
|
86
|
+
except OSError as e:
|
|
87
|
+
print(f"Warning: could not save config: {e}", file=sys.stderr)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
# ---------------------------------------------------------------------------
|
|
91
|
+
# CDXML post-processing — inject ACS 1996 style
|
|
92
|
+
# ---------------------------------------------------------------------------
|
|
93
|
+
|
|
94
|
+
def _inject_acs_style(cdxml: str) -> str:
|
|
95
|
+
"""
|
|
96
|
+
Post-process CDXML from ChemScript to inject ACS Document 1996 style.
|
|
97
|
+
|
|
98
|
+
ChemScript defaults to BondLength=30; we rescale to 14.40 and set other
|
|
99
|
+
ACS style attributes on the root <CDXML> element.
|
|
100
|
+
"""
|
|
101
|
+
if not cdxml or "<CDXML" not in cdxml:
|
|
102
|
+
return cdxml
|
|
103
|
+
|
|
104
|
+
# Parse the current BondLength from ChemScript output
|
|
105
|
+
bl_match = re.search(r'BondLength="([^"]+)"', cdxml)
|
|
106
|
+
cs_bond_length = float(bl_match.group(1)) if bl_match else 30.0
|
|
107
|
+
target_bond_length = float(ACS_STYLE_ATTRS["BondLength"])
|
|
108
|
+
|
|
109
|
+
if cs_bond_length <= 0:
|
|
110
|
+
cs_bond_length = 30.0
|
|
111
|
+
scale = target_bond_length / cs_bond_length
|
|
112
|
+
|
|
113
|
+
# Inject/replace style attributes on the root <CDXML> element
|
|
114
|
+
for attr, val in ACS_STYLE_ATTRS.items():
|
|
115
|
+
pat = re.compile(rf'\b{attr}="[^"]*"')
|
|
116
|
+
if pat.search(cdxml):
|
|
117
|
+
cdxml = pat.sub(f'{attr}="{val}"', cdxml)
|
|
118
|
+
else:
|
|
119
|
+
# Insert before the closing > of <CDXML ...>
|
|
120
|
+
cdxml = re.sub(
|
|
121
|
+
r"(<CDXML\b[^>]*?)(>)",
|
|
122
|
+
rf'\1 {attr}="{val}"\2',
|
|
123
|
+
cdxml,
|
|
124
|
+
count=1,
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
# Rescale all coordinate values if scale != 1.0
|
|
128
|
+
if abs(scale - 1.0) > 0.001:
|
|
129
|
+
cdxml = _rescale_cdxml_coords(cdxml, scale)
|
|
130
|
+
|
|
131
|
+
return cdxml
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _rescale_cdxml_coords(cdxml: str, scale: float) -> str:
|
|
135
|
+
"""Rescale point coordinates (p=, BoundingBox=) in CDXML by a factor."""
|
|
136
|
+
|
|
137
|
+
def _scale_point(match: re.Match) -> str:
|
|
138
|
+
attr = match.group(1)
|
|
139
|
+
values = match.group(2)
|
|
140
|
+
nums = values.split()
|
|
141
|
+
scaled = " ".join(f"{float(n) * scale:.2f}" for n in nums)
|
|
142
|
+
return f'{attr}="{scaled}"'
|
|
143
|
+
|
|
144
|
+
# Scale p="x y" (node positions)
|
|
145
|
+
cdxml = re.sub(r'(p)="([^"]+)"', _scale_point, cdxml)
|
|
146
|
+
# Scale BoundingBox="l t r b"
|
|
147
|
+
cdxml = re.sub(r'(BoundingBox)="([^"]+)"', _scale_point, cdxml)
|
|
148
|
+
|
|
149
|
+
return cdxml
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
# ---------------------------------------------------------------------------
|
|
153
|
+
# ChemScriptBridge class — Python API
|
|
154
|
+
# ---------------------------------------------------------------------------
|
|
155
|
+
|
|
156
|
+
class ChemScriptBridge:
|
|
157
|
+
"""
|
|
158
|
+
High-level Python interface to ChemScript via a 32-bit subprocess server.
|
|
159
|
+
|
|
160
|
+
Usage:
|
|
161
|
+
cs = ChemScriptBridge()
|
|
162
|
+
cdxml = cs.name_to_cdxml("morpholine")
|
|
163
|
+
cs.convert_file("in.cdx", "out.cdxml")
|
|
164
|
+
"""
|
|
165
|
+
|
|
166
|
+
def __init__(self, python32_path: str = None):
|
|
167
|
+
cfg = _load_config()
|
|
168
|
+
self._python32 = python32_path or cfg.get("python32") or _find_python32()
|
|
169
|
+
if self._python32 is None:
|
|
170
|
+
raise RuntimeError(
|
|
171
|
+
"Could not find 32-bit Python (chemscript32 conda env).\n"
|
|
172
|
+
"Create it with: CONDA_SUBDIR=win-32 conda create -n chemscript32 python=3.10\n"
|
|
173
|
+
"Then install pythonnet: chemscript32/python.exe -m pip install pythonnet\n"
|
|
174
|
+
"Or specify the path: ChemScriptBridge(python32_path=r'C:\\...\\python.exe')"
|
|
175
|
+
)
|
|
176
|
+
# Save for next time
|
|
177
|
+
if cfg.get("python32") != self._python32:
|
|
178
|
+
cfg["python32"] = self._python32
|
|
179
|
+
_save_config(cfg)
|
|
180
|
+
|
|
181
|
+
self._server_script = str(
|
|
182
|
+
Path(__file__).resolve().parent / "_chemscript_server.py"
|
|
183
|
+
)
|
|
184
|
+
self._proc: Optional[subprocess.Popen] = None
|
|
185
|
+
|
|
186
|
+
def _ensure_server(self):
|
|
187
|
+
"""Start the server subprocess if not already running."""
|
|
188
|
+
if self._proc is not None and self._proc.poll() is None:
|
|
189
|
+
return
|
|
190
|
+
cmd = [self._python32, self._server_script]
|
|
191
|
+
# Pass DLL config from ~/.chemscript_config.json so the server
|
|
192
|
+
# can locate the correct ChemScript DLL (ChemDraw 15 vs 16).
|
|
193
|
+
cfg = _load_config()
|
|
194
|
+
if cfg.get("dll_dir"):
|
|
195
|
+
cmd += ["--dll-dir", cfg["dll_dir"]]
|
|
196
|
+
if cfg.get("assembly"):
|
|
197
|
+
cmd += ["--assembly", cfg["assembly"]]
|
|
198
|
+
self._proc = subprocess.Popen(
|
|
199
|
+
cmd,
|
|
200
|
+
stdin=subprocess.PIPE,
|
|
201
|
+
stdout=subprocess.PIPE,
|
|
202
|
+
stderr=subprocess.PIPE,
|
|
203
|
+
text=True,
|
|
204
|
+
encoding="utf-8",
|
|
205
|
+
)
|
|
206
|
+
# Wait for ready signal
|
|
207
|
+
ready_line = self._proc.stdout.readline()
|
|
208
|
+
if not ready_line:
|
|
209
|
+
err = self._proc.stderr.read()
|
|
210
|
+
raise RuntimeError(f"ChemScript server failed to start: {err}")
|
|
211
|
+
ready = json.loads(ready_line)
|
|
212
|
+
if not ready.get("ready"):
|
|
213
|
+
raise RuntimeError(f"ChemScript server unexpected: {ready}")
|
|
214
|
+
|
|
215
|
+
def _call(self, cmd: str, **args) -> dict:
|
|
216
|
+
"""Send a command to the server and return the response."""
|
|
217
|
+
self._ensure_server()
|
|
218
|
+
request = json.dumps({"cmd": cmd, "args": args})
|
|
219
|
+
self._proc.stdin.write(request + "\n")
|
|
220
|
+
self._proc.stdin.flush()
|
|
221
|
+
resp_line = self._proc.stdout.readline()
|
|
222
|
+
if not resp_line:
|
|
223
|
+
err = self._proc.stderr.read() if self._proc.stderr else "no output"
|
|
224
|
+
raise RuntimeError(f"ChemScript server died: {err}")
|
|
225
|
+
return json.loads(resp_line)
|
|
226
|
+
|
|
227
|
+
def close(self):
|
|
228
|
+
"""Shut down the server."""
|
|
229
|
+
if self._proc and self._proc.poll() is None:
|
|
230
|
+
try:
|
|
231
|
+
self._proc.stdin.write(json.dumps({"cmd": "quit"}) + "\n")
|
|
232
|
+
self._proc.stdin.flush()
|
|
233
|
+
self._proc.wait(timeout=5)
|
|
234
|
+
except Exception:
|
|
235
|
+
self._proc.kill()
|
|
236
|
+
self._proc = None
|
|
237
|
+
|
|
238
|
+
def __del__(self):
|
|
239
|
+
self.close()
|
|
240
|
+
|
|
241
|
+
def __enter__(self):
|
|
242
|
+
return self
|
|
243
|
+
|
|
244
|
+
def __exit__(self, *args):
|
|
245
|
+
self.close()
|
|
246
|
+
|
|
247
|
+
# -----------------------------------------------------------------------
|
|
248
|
+
# Public API
|
|
249
|
+
# -----------------------------------------------------------------------
|
|
250
|
+
|
|
251
|
+
def convert_file(self, input_path: str, output_path: str) -> dict:
|
|
252
|
+
"""
|
|
253
|
+
Convert a chemistry file between formats.
|
|
254
|
+
|
|
255
|
+
Supported: CDX, CDXML, MOL, SDF, RXN, SMILES.
|
|
256
|
+
Format determined by file extension.
|
|
257
|
+
"""
|
|
258
|
+
result = self._call("convert", input=os.path.abspath(input_path),
|
|
259
|
+
output=os.path.abspath(output_path))
|
|
260
|
+
if not result.get("ok"):
|
|
261
|
+
raise RuntimeError(result.get("error", "Conversion failed"))
|
|
262
|
+
# Post-process CDXML output for ACS style
|
|
263
|
+
if output_path.lower().endswith(".cdxml"):
|
|
264
|
+
self._postprocess_cdxml(output_path)
|
|
265
|
+
return result
|
|
266
|
+
|
|
267
|
+
def name_to_cdxml(self, name: str, output: str = None) -> str:
|
|
268
|
+
"""
|
|
269
|
+
Convert a chemical name to CDXML string.
|
|
270
|
+
|
|
271
|
+
Args:
|
|
272
|
+
name: Chemical name (e.g. "morpholine", "benzene").
|
|
273
|
+
output: Optional file path to write CDXML.
|
|
274
|
+
|
|
275
|
+
Returns:
|
|
276
|
+
CDXML string with ACS 1996 style.
|
|
277
|
+
"""
|
|
278
|
+
result = self._call("name_to_cdxml", name=name,
|
|
279
|
+
output=os.path.abspath(output) if output else None)
|
|
280
|
+
if not result.get("ok"):
|
|
281
|
+
raise RuntimeError(result.get("error", f"Name resolution failed: {name}"))
|
|
282
|
+
cdxml = _inject_acs_style(result["cdxml"])
|
|
283
|
+
if output:
|
|
284
|
+
Path(output).write_text(cdxml, encoding="utf-8")
|
|
285
|
+
return cdxml
|
|
286
|
+
|
|
287
|
+
def smiles_to_cdxml(self, smiles: str, output: str = None) -> str:
|
|
288
|
+
"""
|
|
289
|
+
Convert a SMILES string to CDXML.
|
|
290
|
+
|
|
291
|
+
Args:
|
|
292
|
+
smiles: SMILES string (e.g. "C1COCCN1").
|
|
293
|
+
output: Optional file path to write CDXML.
|
|
294
|
+
|
|
295
|
+
Returns:
|
|
296
|
+
CDXML string with ACS 1996 style.
|
|
297
|
+
"""
|
|
298
|
+
result = self._call("smiles_to_cdxml", smiles=smiles,
|
|
299
|
+
output=os.path.abspath(output) if output else None)
|
|
300
|
+
if not result.get("ok"):
|
|
301
|
+
raise RuntimeError(result.get("error", f"SMILES parse failed: {smiles}"))
|
|
302
|
+
cdxml = _inject_acs_style(result["cdxml"])
|
|
303
|
+
if output:
|
|
304
|
+
Path(output).write_text(cdxml, encoding="utf-8")
|
|
305
|
+
return cdxml
|
|
306
|
+
|
|
307
|
+
def cleanup(self, input_path: str, output: str = None) -> str:
|
|
308
|
+
"""
|
|
309
|
+
Clean up a structure file — normalize coordinates, bond lengths, etc.
|
|
310
|
+
|
|
311
|
+
Args:
|
|
312
|
+
input_path: Path to structure file.
|
|
313
|
+
output: Output path (defaults to overwriting input).
|
|
314
|
+
|
|
315
|
+
Returns:
|
|
316
|
+
Output file path.
|
|
317
|
+
"""
|
|
318
|
+
out = output or input_path
|
|
319
|
+
result = self._call("cleanup",
|
|
320
|
+
input=os.path.abspath(input_path),
|
|
321
|
+
output=os.path.abspath(out))
|
|
322
|
+
if not result.get("ok"):
|
|
323
|
+
raise RuntimeError(result.get("error", "Cleanup failed"))
|
|
324
|
+
if out.lower().endswith(".cdxml"):
|
|
325
|
+
self._postprocess_cdxml(out)
|
|
326
|
+
return out
|
|
327
|
+
|
|
328
|
+
def get_name(self, source: str) -> str:
|
|
329
|
+
"""Get IUPAC name for a structure file or SMILES string."""
|
|
330
|
+
args = {"source": source}
|
|
331
|
+
if os.path.isfile(source):
|
|
332
|
+
args["source"] = os.path.abspath(source)
|
|
333
|
+
result = self._call("get_name", **args)
|
|
334
|
+
if not result.get("ok"):
|
|
335
|
+
raise RuntimeError(result.get("error", "Name lookup failed"))
|
|
336
|
+
return result["name"]
|
|
337
|
+
|
|
338
|
+
def get_formula(self, source: str) -> str:
|
|
339
|
+
"""Get molecular formula for a structure file or SMILES string."""
|
|
340
|
+
args = {"source": source}
|
|
341
|
+
if os.path.isfile(source):
|
|
342
|
+
args["source"] = os.path.abspath(source)
|
|
343
|
+
result = self._call("get_formula", **args)
|
|
344
|
+
if not result.get("ok"):
|
|
345
|
+
raise RuntimeError(result.get("error", "Formula lookup failed"))
|
|
346
|
+
return result["formula"]
|
|
347
|
+
|
|
348
|
+
def get_info(self, source: str) -> dict:
|
|
349
|
+
"""
|
|
350
|
+
Get full chemical info: name, formula, SMILES, InChI, atom/bond count.
|
|
351
|
+
|
|
352
|
+
Works with both structure files and reaction files.
|
|
353
|
+
"""
|
|
354
|
+
args = {"source": source}
|
|
355
|
+
if os.path.isfile(source):
|
|
356
|
+
args["source"] = os.path.abspath(source)
|
|
357
|
+
result = self._call("get_info", **args)
|
|
358
|
+
if not result.get("ok"):
|
|
359
|
+
raise RuntimeError(result.get("error", "Info lookup failed"))
|
|
360
|
+
return result
|
|
361
|
+
|
|
362
|
+
def contains_substructure(self, target: str, query: str) -> bool:
|
|
363
|
+
"""
|
|
364
|
+
Check if target contains query as a substructure.
|
|
365
|
+
|
|
366
|
+
Args can be file paths or SMILES strings.
|
|
367
|
+
"""
|
|
368
|
+
t_args = {"target": os.path.abspath(target) if os.path.isfile(target) else target}
|
|
369
|
+
q_args = {"query": os.path.abspath(query) if os.path.isfile(query) else query}
|
|
370
|
+
if not os.path.isfile(target):
|
|
371
|
+
t_args["target_format"] = "smiles"
|
|
372
|
+
if not os.path.isfile(query):
|
|
373
|
+
q_args["query_format"] = "smiles"
|
|
374
|
+
result = self._call("contains_substructure", **t_args, **q_args)
|
|
375
|
+
if not result.get("ok"):
|
|
376
|
+
raise RuntimeError(result.get("error", "Substructure search failed"))
|
|
377
|
+
return result["contains"]
|
|
378
|
+
|
|
379
|
+
def substructure_search(self, target: str, query: str) -> dict:
|
|
380
|
+
"""
|
|
381
|
+
Perform atom-by-atom substructure search.
|
|
382
|
+
|
|
383
|
+
Returns dict with 'contains' bool and 'maps' list of atom mappings.
|
|
384
|
+
"""
|
|
385
|
+
t_args = {"target": os.path.abspath(target) if os.path.isfile(target) else target}
|
|
386
|
+
q_args = {"query": os.path.abspath(query) if os.path.isfile(query) else query}
|
|
387
|
+
if not os.path.isfile(target):
|
|
388
|
+
t_args["target_format"] = "smiles"
|
|
389
|
+
if not os.path.isfile(query):
|
|
390
|
+
q_args["query_format"] = "smiles"
|
|
391
|
+
result = self._call("substructure_search", **t_args, **q_args)
|
|
392
|
+
if not result.get("ok"):
|
|
393
|
+
raise RuntimeError(result.get("error", "Substructure search failed"))
|
|
394
|
+
return result
|
|
395
|
+
|
|
396
|
+
def load_reaction(self, source: str, include_cdxml: bool = False) -> dict:
|
|
397
|
+
"""
|
|
398
|
+
Load a reaction file and return component information.
|
|
399
|
+
|
|
400
|
+
Args:
|
|
401
|
+
source: Path to reaction file (CDX, RXN) or reaction SMILES.
|
|
402
|
+
include_cdxml: If True, include CDXML for each component.
|
|
403
|
+
|
|
404
|
+
Returns:
|
|
405
|
+
Dict with 'formula', 'reactants', 'products'.
|
|
406
|
+
"""
|
|
407
|
+
args = {"source": source, "include_cdxml": include_cdxml}
|
|
408
|
+
if os.path.isfile(source):
|
|
409
|
+
args["source"] = os.path.abspath(source)
|
|
410
|
+
else:
|
|
411
|
+
args["format"] = "smiles"
|
|
412
|
+
result = self._call("load_reaction", **args)
|
|
413
|
+
if not result.get("ok"):
|
|
414
|
+
raise RuntimeError(result.get("error", "Reaction load failed"))
|
|
415
|
+
return result
|
|
416
|
+
|
|
417
|
+
def largest_common_substructure(self, mol1: str, mol2: str) -> dict:
|
|
418
|
+
"""
|
|
419
|
+
Find the largest common substructure between two molecules.
|
|
420
|
+
|
|
421
|
+
Args can be file paths or SMILES strings.
|
|
422
|
+
|
|
423
|
+
Returns:
|
|
424
|
+
Dict with 'atom_map' and 'common_atom_count'.
|
|
425
|
+
"""
|
|
426
|
+
args = {}
|
|
427
|
+
args["mol1"] = os.path.abspath(mol1) if os.path.isfile(mol1) else mol1
|
|
428
|
+
args["mol2"] = os.path.abspath(mol2) if os.path.isfile(mol2) else mol2
|
|
429
|
+
if not os.path.isfile(mol1):
|
|
430
|
+
args["mol1_format"] = "smiles"
|
|
431
|
+
if not os.path.isfile(mol2):
|
|
432
|
+
args["mol2_format"] = "smiles"
|
|
433
|
+
result = self._call("largest_common_substructure", **args)
|
|
434
|
+
if not result.get("ok"):
|
|
435
|
+
raise RuntimeError(result.get("error", "LCS failed"))
|
|
436
|
+
return result
|
|
437
|
+
|
|
438
|
+
def overlay(self, source: str, target: str,
|
|
439
|
+
source_format: str = None,
|
|
440
|
+
target_format: str = None) -> Tuple[str, bool]:
|
|
441
|
+
"""
|
|
442
|
+
Overlay (2D-align) a molecule onto a reference molecule.
|
|
443
|
+
|
|
444
|
+
Args:
|
|
445
|
+
source: File path or CDXML string of the molecule to align.
|
|
446
|
+
target: File path or CDXML string of the reference molecule.
|
|
447
|
+
source_format: Format hint for source (optional).
|
|
448
|
+
target_format: Format hint for target (optional).
|
|
449
|
+
|
|
450
|
+
Returns:
|
|
451
|
+
Tuple of (aligned_cdxml_string, success_bool).
|
|
452
|
+
"""
|
|
453
|
+
args = {}
|
|
454
|
+
args["source"] = os.path.abspath(source) if os.path.isfile(source) else source
|
|
455
|
+
args["target"] = os.path.abspath(target) if os.path.isfile(target) else target
|
|
456
|
+
if source_format:
|
|
457
|
+
args["source_format"] = source_format
|
|
458
|
+
if target_format:
|
|
459
|
+
args["target_format"] = target_format
|
|
460
|
+
result = self._call("overlay", **args)
|
|
461
|
+
if not result.get("ok"):
|
|
462
|
+
raise RuntimeError(result.get("error", "Overlay failed"))
|
|
463
|
+
cdxml = _inject_acs_style(result["aligned_cdxml"])
|
|
464
|
+
return cdxml, result.get("success", False)
|
|
465
|
+
|
|
466
|
+
def substructure_align(self, query: str, target: str,
|
|
467
|
+
query_format: str = None,
|
|
468
|
+
target_format: str = None) -> Optional[List]:
|
|
469
|
+
"""
|
|
470
|
+
Align a small molecule (query) to its substructure match in a
|
|
471
|
+
larger molecule (target).
|
|
472
|
+
|
|
473
|
+
Uses ChemScript to confirm substructure match and get SMILES,
|
|
474
|
+
then RDKit for atom-index mapping (avoiding ChemScript naming bugs).
|
|
475
|
+
|
|
476
|
+
Returns a list of (x, y) positions for each query atom (in the
|
|
477
|
+
query's CDXML atom iteration order), taken from the matched
|
|
478
|
+
target atoms. Returns None if no substructure match was found.
|
|
479
|
+
"""
|
|
480
|
+
import re as _re
|
|
481
|
+
from xml.etree import ElementTree as ET
|
|
482
|
+
|
|
483
|
+
args = {}
|
|
484
|
+
args["query"] = os.path.abspath(query) if os.path.isfile(query) else query
|
|
485
|
+
args["target"] = os.path.abspath(target) if os.path.isfile(target) else target
|
|
486
|
+
if query_format:
|
|
487
|
+
args["query_format"] = query_format
|
|
488
|
+
if target_format:
|
|
489
|
+
args["target_format"] = target_format
|
|
490
|
+
result = self._call("substructure_align", **args)
|
|
491
|
+
if not result.get("ok") or not result.get("contains"):
|
|
492
|
+
return None
|
|
493
|
+
|
|
494
|
+
target_cdxml = result.get("target_cdxml", "")
|
|
495
|
+
query_cdxml = result.get("query_cdxml", "")
|
|
496
|
+
target_mol_block = result.get("target_mol", "")
|
|
497
|
+
query_mol_block = result.get("query_mol", "")
|
|
498
|
+
|
|
499
|
+
if not target_cdxml or not target_mol_block or not query_mol_block:
|
|
500
|
+
return None
|
|
501
|
+
|
|
502
|
+
# --- Use RDKit for substructure matching via MOL blocks ---
|
|
503
|
+
# MOL block atom order is guaranteed to match ChemScript's iteration
|
|
504
|
+
# order (both come from the same StructureData), so RDKit atom indices
|
|
505
|
+
# from the MOL block = ChemScript atom indices = CDXML <n> order.
|
|
506
|
+
try:
|
|
507
|
+
from rdkit import Chem
|
|
508
|
+
except ImportError:
|
|
509
|
+
return None
|
|
510
|
+
|
|
511
|
+
# ChemScript MOL blocks may have aromatic bonds that RDKit can't
|
|
512
|
+
# kekulize, so parse without sanitization then sanitize everything
|
|
513
|
+
# except kekulization.
|
|
514
|
+
target_mol = Chem.MolFromMolBlock(target_mol_block, sanitize=False)
|
|
515
|
+
if target_mol:
|
|
516
|
+
Chem.SanitizeMol(
|
|
517
|
+
target_mol,
|
|
518
|
+
Chem.SanitizeFlags.SANITIZE_ALL
|
|
519
|
+
^ Chem.SanitizeFlags.SANITIZE_KEKULIZE,
|
|
520
|
+
)
|
|
521
|
+
query_mol = Chem.MolFromMolBlock(query_mol_block, sanitize=False)
|
|
522
|
+
if query_mol:
|
|
523
|
+
Chem.SanitizeMol(query_mol)
|
|
524
|
+
if target_mol is None or query_mol is None:
|
|
525
|
+
return None
|
|
526
|
+
|
|
527
|
+
match = target_mol.GetSubstructMatch(query_mol)
|
|
528
|
+
if not match:
|
|
529
|
+
return None
|
|
530
|
+
# match[i] = target atom index that corresponds to query atom i
|
|
531
|
+
|
|
532
|
+
# --- Parse target CDXML to extract atom positions ---
|
|
533
|
+
clean = _re.sub(r'<!DOCTYPE[^>]*>', '', target_cdxml)
|
|
534
|
+
clean = _re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]', '', clean)
|
|
535
|
+
try:
|
|
536
|
+
troot = ET.fromstring(clean)
|
|
537
|
+
except ET.ParseError:
|
|
538
|
+
return None
|
|
539
|
+
|
|
540
|
+
target_positions = []
|
|
541
|
+
for n in troot.iter("n"):
|
|
542
|
+
p = n.get("p", "")
|
|
543
|
+
if p:
|
|
544
|
+
try:
|
|
545
|
+
px, py = p.split()[:2]
|
|
546
|
+
target_positions.append((float(px), float(py)))
|
|
547
|
+
except (ValueError, IndexError):
|
|
548
|
+
target_positions.append(None)
|
|
549
|
+
else:
|
|
550
|
+
target_positions.append(None)
|
|
551
|
+
|
|
552
|
+
# Build positions for each query atom
|
|
553
|
+
positions = []
|
|
554
|
+
for qi in range(len(match)):
|
|
555
|
+
ti = match[qi] # target atom index
|
|
556
|
+
if ti < len(target_positions) and target_positions[ti] is not None:
|
|
557
|
+
positions.append(target_positions[ti])
|
|
558
|
+
else:
|
|
559
|
+
positions.append(None)
|
|
560
|
+
|
|
561
|
+
return positions
|
|
562
|
+
|
|
563
|
+
def write_data(self, source: str, target_format: str,
|
|
564
|
+
source_format: str = None) -> str:
|
|
565
|
+
"""
|
|
566
|
+
Convert a structure to a specific format string.
|
|
567
|
+
|
|
568
|
+
Args:
|
|
569
|
+
source: File path or data string.
|
|
570
|
+
target_format: Output format (smiles, inchi, mol, cdxml, name, etc.).
|
|
571
|
+
source_format: Input format hint (optional).
|
|
572
|
+
|
|
573
|
+
Returns:
|
|
574
|
+
Data string in target format.
|
|
575
|
+
"""
|
|
576
|
+
args = {"target_format": target_format}
|
|
577
|
+
if os.path.isfile(source):
|
|
578
|
+
args["source"] = os.path.abspath(source)
|
|
579
|
+
else:
|
|
580
|
+
args["source"] = source
|
|
581
|
+
if source_format:
|
|
582
|
+
args["source_format"] = source_format
|
|
583
|
+
result = self._call("write_data", **args)
|
|
584
|
+
if not result.get("ok"):
|
|
585
|
+
raise RuntimeError(result.get("error", "WriteData failed"))
|
|
586
|
+
return result["data"]
|
|
587
|
+
|
|
588
|
+
def mimetypes(self) -> List[str]:
|
|
589
|
+
"""List all supported mimetypes."""
|
|
590
|
+
result = self._call("mimetypes")
|
|
591
|
+
return result.get("mimetypes", [])
|
|
592
|
+
|
|
593
|
+
# -----------------------------------------------------------------------
|
|
594
|
+
# Internal helpers
|
|
595
|
+
# -----------------------------------------------------------------------
|
|
596
|
+
|
|
597
|
+
def _postprocess_cdxml(self, path: str):
|
|
598
|
+
"""Read a CDXML file written by ChemScript and inject ACS style."""
|
|
599
|
+
try:
|
|
600
|
+
text = Path(path).read_text(encoding="utf-8")
|
|
601
|
+
text = _inject_acs_style(text)
|
|
602
|
+
Path(path).write_text(text, encoding="utf-8")
|
|
603
|
+
except Exception as e:
|
|
604
|
+
print(f"Warning: CDXML post-processing failed: {e}", file=sys.stderr)
|
|
605
|
+
|
|
606
|
+
|
|
607
|
+
# ---------------------------------------------------------------------------
|
|
608
|
+
# CLI interface
|
|
609
|
+
# ---------------------------------------------------------------------------
|
|
610
|
+
|
|
611
|
+
|
|
612
|
+
def _cli_configure(args) -> int:
|
|
613
|
+
"""Auto-detect ChemDraw version and save config."""
|
|
614
|
+
cfg = _load_config()
|
|
615
|
+
|
|
616
|
+
# Detect python32 path
|
|
617
|
+
py32 = cfg.get("python32") or _find_python32()
|
|
618
|
+
if py32:
|
|
619
|
+
cfg["python32"] = py32
|
|
620
|
+
print(f" 32-bit Python: {py32}")
|
|
621
|
+
else:
|
|
622
|
+
print(" WARNING: 32-bit Python (chemscript32 env) not found.")
|
|
623
|
+
print(" Create it with: set CONDA_SUBDIR=win-32 && conda create -n chemscript32 python=3.10")
|
|
624
|
+
|
|
625
|
+
# Detect ChemDraw / ChemScript DLL
|
|
626
|
+
# Search order:
|
|
627
|
+
# 1. Local chemscript_dlls/ directory (portable deployment with bundled DLLs)
|
|
628
|
+
# 2. Standard PerkinElmerInformatics install paths (ChemOffice2016, then 2015)
|
|
629
|
+
# 3. CambridgeSoft install paths (older naming convention)
|
|
630
|
+
found_version = None
|
|
631
|
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
632
|
+
local_dll_dir = os.path.join(script_dir, "chemscript_dlls")
|
|
633
|
+
|
|
634
|
+
# Check local bundled DLLs first
|
|
635
|
+
for assembly in ["CambridgeSoft.ChemScript16", "CambridgeSoft.ChemScript15"]:
|
|
636
|
+
dll_file = os.path.join(local_dll_dir, f"{assembly}.dll")
|
|
637
|
+
if os.path.isfile(dll_file):
|
|
638
|
+
cfg["dll_dir"] = local_dll_dir
|
|
639
|
+
cfg["assembly"] = assembly
|
|
640
|
+
found_version = f"local ({assembly})"
|
|
641
|
+
print(f" ChemScript DLL: {dll_file} (bundled)")
|
|
642
|
+
break
|
|
643
|
+
|
|
644
|
+
# Check standard install paths
|
|
645
|
+
if not found_version:
|
|
646
|
+
prog_x86 = os.environ.get("PROGRAMFILES(X86)", r"C:\Program Files (x86)")
|
|
647
|
+
search_bases = [
|
|
648
|
+
os.path.join(prog_x86, "PerkinElmerInformatics"),
|
|
649
|
+
os.path.join(prog_x86, "CambridgeSoft"),
|
|
650
|
+
]
|
|
651
|
+
for pei_base in search_bases:
|
|
652
|
+
if found_version:
|
|
653
|
+
break
|
|
654
|
+
for version_dir, assembly in [
|
|
655
|
+
("ChemOffice2016", "CambridgeSoft.ChemScript16"),
|
|
656
|
+
("ChemOffice2015", "CambridgeSoft.ChemScript15"),
|
|
657
|
+
]:
|
|
658
|
+
dll_dir = os.path.join(pei_base, version_dir, "ChemScript", "Lib", "Net")
|
|
659
|
+
dll_file = os.path.join(dll_dir, f"{assembly}.dll")
|
|
660
|
+
if os.path.isfile(dll_file):
|
|
661
|
+
cfg["dll_dir"] = dll_dir
|
|
662
|
+
cfg["assembly"] = assembly
|
|
663
|
+
found_version = version_dir
|
|
664
|
+
print(f" ChemScript DLL: {dll_file}")
|
|
665
|
+
break
|
|
666
|
+
|
|
667
|
+
if not found_version:
|
|
668
|
+
print(" WARNING: ChemScript DLL not found.")
|
|
669
|
+
print(f" Searched: {local_dll_dir}")
|
|
670
|
+
print(f" Searched: Program Files (x86)\\PerkinElmerInformatics\\ChemOffice20XX")
|
|
671
|
+
print(f" Searched: Program Files (x86)\\CambridgeSoft\\ChemOffice20XX")
|
|
672
|
+
print(" Either install ChemDraw with ChemScript, or copy the DLLs to:")
|
|
673
|
+
print(f" {local_dll_dir}")
|
|
674
|
+
print(" Required: CambridgeSoft.ChemScript16.dll + ChemScript160.dll")
|
|
675
|
+
|
|
676
|
+
_save_config(cfg)
|
|
677
|
+
print(f"\n Config saved to: {CONFIG_PATH}")
|
|
678
|
+
return 0
|
|
679
|
+
|
|
680
|
+
|
|
681
|
+
def _cli_ping(args, cs: ChemScriptBridge) -> int:
|
|
682
|
+
"""Test that the ChemScript bridge is working."""
|
|
683
|
+
result = cs._call("ping")
|
|
684
|
+
if result.get("ok"):
|
|
685
|
+
print("ChemScript bridge OK: server is responding")
|
|
686
|
+
return 0
|
|
687
|
+
else:
|
|
688
|
+
print(f"ChemScript bridge FAILED: {result.get('error', 'unknown')}", file=sys.stderr)
|
|
689
|
+
return 1
|
|
690
|
+
|
|
691
|
+
|
|
692
|
+
def _cli_convert(args, cs: ChemScriptBridge) -> int:
|
|
693
|
+
result = cs.convert_file(args.input, args.output)
|
|
694
|
+
kind = result.get("type", "unknown")
|
|
695
|
+
formula = result.get("formula", "?")
|
|
696
|
+
print(f"Converted ({kind}): {formula}", file=sys.stderr)
|
|
697
|
+
print(f"Written to {args.output}", file=sys.stderr)
|
|
698
|
+
return 0
|
|
699
|
+
|
|
700
|
+
|
|
701
|
+
def _cli_name2struct(args, cs: ChemScriptBridge) -> int:
|
|
702
|
+
output = args.output
|
|
703
|
+
if output == "-":
|
|
704
|
+
cdxml = cs.name_to_cdxml(args.name)
|
|
705
|
+
print(cdxml)
|
|
706
|
+
else:
|
|
707
|
+
cdxml = cs.name_to_cdxml(args.name, output=output)
|
|
708
|
+
print(f"Written to {output}", file=sys.stderr)
|
|
709
|
+
return 0
|
|
710
|
+
|
|
711
|
+
|
|
712
|
+
def _cli_smiles2struct(args, cs: ChemScriptBridge) -> int:
|
|
713
|
+
output = args.output
|
|
714
|
+
if output == "-":
|
|
715
|
+
cdxml = cs.smiles_to_cdxml(args.smiles)
|
|
716
|
+
print(cdxml)
|
|
717
|
+
else:
|
|
718
|
+
cdxml = cs.smiles_to_cdxml(args.smiles, output=output)
|
|
719
|
+
print(f"Written to {output}", file=sys.stderr)
|
|
720
|
+
return 0
|
|
721
|
+
|
|
722
|
+
|
|
723
|
+
def _cli_cleanup(args, cs: ChemScriptBridge) -> int:
|
|
724
|
+
output = args.output or args.input
|
|
725
|
+
cs.cleanup(args.input, output=output)
|
|
726
|
+
print(f"Cleaned up → {output}", file=sys.stderr)
|
|
727
|
+
return 0
|
|
728
|
+
|
|
729
|
+
|
|
730
|
+
def _cli_info(args, cs: ChemScriptBridge) -> int:
|
|
731
|
+
info = cs.get_info(args.source)
|
|
732
|
+
if info.get("type") == "structure":
|
|
733
|
+
print(f"Type: structure")
|
|
734
|
+
print(f"Name: {info.get('name', '(unknown)')}")
|
|
735
|
+
print(f"Formula: {info.get('formula', '?')}")
|
|
736
|
+
print(f"SMILES: {info.get('smiles', '?')}")
|
|
737
|
+
if info.get("inchi"):
|
|
738
|
+
print(f"InChI: {info['inchi']}")
|
|
739
|
+
print(f"Atoms: {info.get('atom_count', '?')}")
|
|
740
|
+
print(f"Bonds: {info.get('bond_count', '?')}")
|
|
741
|
+
elif info.get("type") == "reaction":
|
|
742
|
+
print(f"Type: reaction")
|
|
743
|
+
print(f"Formula: {info.get('formula', '?')}")
|
|
744
|
+
print(f"Reactants ({len(info.get('reactants', []))}):")
|
|
745
|
+
for i, rct in enumerate(info.get("reactants", []), 1):
|
|
746
|
+
name = rct.get("name") or "(unknown)"
|
|
747
|
+
print(f" {i}. {rct['formula']} - {name} [{rct['smiles']}]")
|
|
748
|
+
print(f"Products ({len(info.get('products', []))}):")
|
|
749
|
+
for i, prod in enumerate(info.get("products", []), 1):
|
|
750
|
+
name = prod.get("name") or "(unknown)"
|
|
751
|
+
print(f" {i}. {prod['formula']} - {name} [{prod['smiles']}]")
|
|
752
|
+
else:
|
|
753
|
+
print(json.dumps(info, indent=2))
|
|
754
|
+
return 0
|
|
755
|
+
|
|
756
|
+
|
|
757
|
+
def _cli_search(args, cs: ChemScriptBridge) -> int:
|
|
758
|
+
if args.atom_map:
|
|
759
|
+
result = cs.substructure_search(args.target, args.query)
|
|
760
|
+
print(f"Contains substructure: {result['contains']}")
|
|
761
|
+
if result["maps"]:
|
|
762
|
+
for i, m in enumerate(result["maps"], 1):
|
|
763
|
+
print(f" Map {i}:")
|
|
764
|
+
for k, v in m.items():
|
|
765
|
+
print(f" {k} -> {v}")
|
|
766
|
+
else:
|
|
767
|
+
found = cs.contains_substructure(args.target, args.query)
|
|
768
|
+
print(f"Contains substructure: {found}")
|
|
769
|
+
return 0
|
|
770
|
+
|
|
771
|
+
|
|
772
|
+
def _cli_reaction(args, cs: ChemScriptBridge) -> int:
|
|
773
|
+
info = cs.load_reaction(args.input, include_cdxml=args.cdxml)
|
|
774
|
+
print(f"Reaction: {info['formula']}")
|
|
775
|
+
print(f"Reactants ({len(info['reactants'])}):")
|
|
776
|
+
for i, rct in enumerate(info["reactants"], 1):
|
|
777
|
+
name = rct.get("name") or "(unknown)"
|
|
778
|
+
print(f" {i}. {rct['formula']} - {name} [{rct['smiles']}]")
|
|
779
|
+
print(f"Products ({len(info['products'])}):")
|
|
780
|
+
for i, prod in enumerate(info["products"], 1):
|
|
781
|
+
name = prod.get("name") or "(unknown)"
|
|
782
|
+
print(f" {i}. {prod['formula']} - {name} [{prod['smiles']}]")
|
|
783
|
+
if args.json:
|
|
784
|
+
print(json.dumps(info, indent=2))
|
|
785
|
+
return 0
|
|
786
|
+
|
|
787
|
+
|
|
788
|
+
def _cli_lcs(args, cs: ChemScriptBridge) -> int:
|
|
789
|
+
result = cs.largest_common_substructure(args.mol1, args.mol2)
|
|
790
|
+
print(f"Common atoms: {result.get('common_atom_count', 0)}")
|
|
791
|
+
for entry in result.get("atom_map", []):
|
|
792
|
+
print(f" {entry['common']}: mol1={entry['mol1']}, mol2={entry['mol2']}")
|
|
793
|
+
return 0
|
|
794
|
+
|
|
795
|
+
|
|
796
|
+
def main(argv: Optional[List[str]] = None) -> int:
|
|
797
|
+
parser = argparse.ArgumentParser(
|
|
798
|
+
description="ChemScript Bridge — access ChemDraw's chemical intelligence from Python.",
|
|
799
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
800
|
+
epilog=textwrap.dedent("""\
|
|
801
|
+
Examples:
|
|
802
|
+
%(prog)s convert input.cdx output.cdxml
|
|
803
|
+
%(prog)s name2struct "morpholine" -o morpholine.cdxml
|
|
804
|
+
%(prog)s smiles2struct "C1COCCN1" -o morpholine.cdxml
|
|
805
|
+
%(prog)s cleanup messy.cdxml -o clean.cdxml
|
|
806
|
+
%(prog)s info structure.cdx
|
|
807
|
+
%(prog)s search --target target.cdx --query query.cdx
|
|
808
|
+
%(prog)s reaction input.cdx --list
|
|
809
|
+
%(prog)s lcs "C1(C)CCCC1CCO" "C1CCCC1C"
|
|
810
|
+
"""),
|
|
811
|
+
)
|
|
812
|
+
|
|
813
|
+
sub = parser.add_subparsers(dest="command", required=True)
|
|
814
|
+
|
|
815
|
+
# configure — no ChemScript needed
|
|
816
|
+
sub.add_parser("configure", help="Auto-detect ChemDraw version and save config")
|
|
817
|
+
|
|
818
|
+
# ping — test bridge connectivity
|
|
819
|
+
sub.add_parser("ping", help="Test ChemScript bridge connectivity")
|
|
820
|
+
|
|
821
|
+
# convert
|
|
822
|
+
p = sub.add_parser("convert", help="Convert between chemical file formats")
|
|
823
|
+
p.add_argument("input", help="Input file (CDX, CDXML, MOL, RXN, etc.)")
|
|
824
|
+
p.add_argument("output", help="Output file (format from extension)")
|
|
825
|
+
|
|
826
|
+
# name2struct
|
|
827
|
+
p = sub.add_parser("name2struct", help="Chemical name → CDXML structure")
|
|
828
|
+
p.add_argument("name", help="Chemical name (e.g. 'morpholine')")
|
|
829
|
+
p.add_argument("-o", "--output", default="-", help="Output CDXML file (default: stdout)")
|
|
830
|
+
|
|
831
|
+
# smiles2struct
|
|
832
|
+
p = sub.add_parser("smiles2struct", help="SMILES → CDXML structure")
|
|
833
|
+
p.add_argument("smiles", help="SMILES string")
|
|
834
|
+
p.add_argument("-o", "--output", default="-", help="Output CDXML file (default: stdout)")
|
|
835
|
+
|
|
836
|
+
# cleanup
|
|
837
|
+
p = sub.add_parser("cleanup", help="Clean up structure coordinates")
|
|
838
|
+
p.add_argument("input", help="Input structure file")
|
|
839
|
+
p.add_argument("-o", "--output", default=None, help="Output file (default: overwrite input)")
|
|
840
|
+
|
|
841
|
+
# info
|
|
842
|
+
p = sub.add_parser("info", help="Get chemical info (name, formula, SMILES, etc.)")
|
|
843
|
+
p.add_argument("source", help="Structure/reaction file or SMILES string")
|
|
844
|
+
|
|
845
|
+
# search
|
|
846
|
+
p = sub.add_parser("search", help="Substructure search")
|
|
847
|
+
p.add_argument("--target", required=True, help="Target structure (file or SMILES)")
|
|
848
|
+
p.add_argument("--query", required=True, help="Query substructure (file or SMILES)")
|
|
849
|
+
p.add_argument("--atom-map", action="store_true", help="Show atom-by-atom mapping")
|
|
850
|
+
|
|
851
|
+
# reaction
|
|
852
|
+
p = sub.add_parser("reaction", help="Extract reaction components")
|
|
853
|
+
p.add_argument("input", help="Reaction file (CDX, RXN) or reaction SMILES")
|
|
854
|
+
p.add_argument("--list", action="store_true", dest="list_components",
|
|
855
|
+
help="List reactants and products")
|
|
856
|
+
p.add_argument("--cdxml", action="store_true", help="Include CDXML for each component")
|
|
857
|
+
p.add_argument("--json", action="store_true", help="Output full JSON")
|
|
858
|
+
|
|
859
|
+
# lcs
|
|
860
|
+
p = sub.add_parser("lcs", help="Largest common substructure")
|
|
861
|
+
p.add_argument("mol1", help="First molecule (file or SMILES)")
|
|
862
|
+
p.add_argument("mol2", help="Second molecule (file or SMILES)")
|
|
863
|
+
|
|
864
|
+
args = parser.parse_args(argv)
|
|
865
|
+
|
|
866
|
+
# 'configure' doesn't need a running ChemScript server
|
|
867
|
+
if args.command == "configure":
|
|
868
|
+
return _cli_configure(args)
|
|
869
|
+
|
|
870
|
+
try:
|
|
871
|
+
cs = ChemScriptBridge()
|
|
872
|
+
except RuntimeError as e:
|
|
873
|
+
print(f"ERROR: {e}", file=sys.stderr)
|
|
874
|
+
return 1
|
|
875
|
+
|
|
876
|
+
try:
|
|
877
|
+
dispatch = {
|
|
878
|
+
"ping": _cli_ping,
|
|
879
|
+
"convert": _cli_convert,
|
|
880
|
+
"name2struct": _cli_name2struct,
|
|
881
|
+
"smiles2struct": _cli_smiles2struct,
|
|
882
|
+
"cleanup": _cli_cleanup,
|
|
883
|
+
"info": _cli_info,
|
|
884
|
+
"search": _cli_search,
|
|
885
|
+
"reaction": _cli_reaction,
|
|
886
|
+
"lcs": _cli_lcs,
|
|
887
|
+
}
|
|
888
|
+
handler = dispatch[args.command]
|
|
889
|
+
return handler(args, cs)
|
|
890
|
+
except RuntimeError as e:
|
|
891
|
+
print(f"ERROR: {e}", file=sys.stderr)
|
|
892
|
+
return 1
|
|
893
|
+
except Exception as e:
|
|
894
|
+
print(f"ERROR: {e}", file=sys.stderr)
|
|
895
|
+
return 1
|
|
896
|
+
finally:
|
|
897
|
+
cs.close()
|
|
898
|
+
|
|
899
|
+
|
|
900
|
+
if __name__ == "__main__":
|
|
901
|
+
sys.exit(main())
|