cdxml-toolkit 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cdxml_toolkit/__init__.py +18 -0
- cdxml_toolkit/_jre/__init__.py +2 -0
- cdxml_toolkit/_jre/temurin-21-jre-win-x64.zip +0 -0
- cdxml_toolkit/analysis/__init__.py +35 -0
- cdxml_toolkit/analysis/deterministic/__init__.py +12 -0
- cdxml_toolkit/analysis/deterministic/discover_experiment_files.py +413 -0
- cdxml_toolkit/analysis/deterministic/lab_book_formatter.py +701 -0
- cdxml_toolkit/analysis/deterministic/lcms_file_categorizer.py +928 -0
- cdxml_toolkit/analysis/deterministic/lcms_identifier.py +598 -0
- cdxml_toolkit/analysis/deterministic/mass_resolver.py +654 -0
- cdxml_toolkit/analysis/deterministic/multi_lcms_analyzer.py +1412 -0
- cdxml_toolkit/analysis/deterministic/procedure_writer.py +446 -0
- cdxml_toolkit/analysis/extract_nmr.py +47 -0
- cdxml_toolkit/analysis/format_procedure_entry.py +479 -0
- cdxml_toolkit/analysis/lcms_analyzer.py +1299 -0
- cdxml_toolkit/analysis/parse_analysis_file.py +134 -0
- cdxml_toolkit/cdxml_builder.py +920 -0
- cdxml_toolkit/cdxml_utils.py +342 -0
- cdxml_toolkit/chemdraw/__init__.py +5 -0
- cdxml_toolkit/chemdraw/_chemscript_server.py +562 -0
- cdxml_toolkit/chemdraw/cdx_converter.py +527 -0
- cdxml_toolkit/chemdraw/cdxml_to_image.py +262 -0
- cdxml_toolkit/chemdraw/cdxml_to_image_rdkit.py +296 -0
- cdxml_toolkit/chemdraw/chemscript_bridge.py +901 -0
- cdxml_toolkit/constants.py +304 -0
- cdxml_toolkit/coord_normalizer.py +438 -0
- cdxml_toolkit/deterministic_pipeline/__init__.py +6 -0
- cdxml_toolkit/deterministic_pipeline/legacy/__init__.py +5 -0
- cdxml_toolkit/deterministic_pipeline/legacy/eln_cdx_cleanup.py +509 -0
- cdxml_toolkit/deterministic_pipeline/legacy/eln_enrichment.py +1394 -0
- cdxml_toolkit/deterministic_pipeline/legacy/scheme_aligner.py +428 -0
- cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher.py +1337 -0
- cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher_v2.py +1340 -0
- cdxml_toolkit/deterministic_pipeline/scheme_reader_audit.py +931 -0
- cdxml_toolkit/deterministic_pipeline/scheme_reader_verify.py +1160 -0
- cdxml_toolkit/image/__init__.py +15 -0
- cdxml_toolkit/image/reaction_from_image.py +2103 -0
- cdxml_toolkit/image/structure_from_image.py +1711 -0
- cdxml_toolkit/layout/__init__.py +5 -0
- cdxml_toolkit/layout/alignment.py +1642 -0
- cdxml_toolkit/layout/reaction_cleanup.py +1002 -0
- cdxml_toolkit/layout/scheme_merger.py +2260 -0
- cdxml_toolkit/mcp_server/__init__.py +0 -0
- cdxml_toolkit/mcp_server/__main__.py +5 -0
- cdxml_toolkit/mcp_server/server.py +1567 -0
- cdxml_toolkit/naming/__init__.py +6 -0
- cdxml_toolkit/naming/aligned_namer.py +2342 -0
- cdxml_toolkit/naming/mol_builder.py +3722 -0
- cdxml_toolkit/naming/name_decomposer.py +2843 -0
- cdxml_toolkit/naming/reactions_datamol.json +2414 -0
- cdxml_toolkit/office/__init__.py +5 -0
- cdxml_toolkit/office/doc_from_template.py +722 -0
- cdxml_toolkit/office/ole_embedder.py +808 -0
- cdxml_toolkit/office/ole_extractor.py +272 -0
- cdxml_toolkit/perception/__init__.py +10 -0
- cdxml_toolkit/perception/compound_search.py +229 -0
- cdxml_toolkit/perception/eln_csv_parser.py +240 -0
- cdxml_toolkit/perception/rdf_parser.py +664 -0
- cdxml_toolkit/perception/reactant_heuristic.py +1045 -0
- cdxml_toolkit/perception/reaction_parser.py +2150 -0
- cdxml_toolkit/perception/scheme_reader.py +2948 -0
- cdxml_toolkit/perception/scheme_refine.py +1404 -0
- cdxml_toolkit/perception/scheme_segmenter.py +619 -0
- cdxml_toolkit/perception/spatial_assignment.py +1013 -0
- cdxml_toolkit/rdkit_utils.py +605 -0
- cdxml_toolkit/render/__init__.py +17 -0
- cdxml_toolkit/render/auto_layout.py +229 -0
- cdxml_toolkit/render/compact_parser.py +632 -0
- cdxml_toolkit/render/parser.py +706 -0
- cdxml_toolkit/render/render_scheme.py +267 -0
- cdxml_toolkit/render/renderer.py +2387 -0
- cdxml_toolkit/render/schema.py +90 -0
- cdxml_toolkit/render/scheme_maker.py +1043 -0
- cdxml_toolkit/render/scheme_yaml_writer.py +1487 -0
- cdxml_toolkit/resolve/__init__.py +13 -0
- cdxml_toolkit/resolve/cas_resolver.py +430 -0
- cdxml_toolkit/resolve/chemscanner_abbreviations.json +28813 -0
- cdxml_toolkit/resolve/condensed_formula.py +493 -0
- cdxml_toolkit/resolve/jre_manager.py +195 -0
- cdxml_toolkit/resolve/reagent_abbreviations.json +1046 -0
- cdxml_toolkit/resolve/reagent_db.py +285 -0
- cdxml_toolkit/resolve/superatom_data.json +2856 -0
- cdxml_toolkit/resolve/superatom_table.py +146 -0
- cdxml_toolkit/text_formatting.py +298 -0
- cdxml_toolkit-0.5.0.dist-info/METADATA +318 -0
- cdxml_toolkit-0.5.0.dist-info/RECORD +91 -0
- cdxml_toolkit-0.5.0.dist-info/WHEEL +5 -0
- cdxml_toolkit-0.5.0.dist-info/entry_points.txt +17 -0
- cdxml_toolkit-0.5.0.dist-info/licenses/LICENSE +21 -0
- cdxml_toolkit-0.5.0.dist-info/licenses/NOTICE.md +37 -0
- cdxml_toolkit-0.5.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
cdxml_to_image.py — Render a CDXML file to PNG or SVG via ChemDraw COM.
|
|
4
|
+
|
|
5
|
+
Requires ChemDraw to be installed (ChemDraw Professional 16+).
|
|
6
|
+
ChemDraw does NOT need to be open — it is launched as a hidden background
|
|
7
|
+
process and closed automatically after export.
|
|
8
|
+
|
|
9
|
+
Usage
|
|
10
|
+
-----
|
|
11
|
+
python cdxml_to_image.py input.cdxml # PNG alongside input file
|
|
12
|
+
python cdxml_to_image.py input.cdxml -o out.png # explicit output path
|
|
13
|
+
python cdxml_to_image.py input.cdxml -o out.svg # SVG output
|
|
14
|
+
python cdxml_to_image.py input.cdxml --dpi 150 # lower resolution PNG
|
|
15
|
+
python cdxml_to_image.py --batch f1.cdxml f2.cdxml # batch render (one COM session)
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
import argparse
|
|
19
|
+
import json
|
|
20
|
+
import sys
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
from typing import Optional
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# ---------------------------------------------------------------------------
|
|
26
|
+
# ChemDraw COM helpers
|
|
27
|
+
# ---------------------------------------------------------------------------
|
|
28
|
+
|
|
29
|
+
def _get_chemdraw():
|
|
30
|
+
"""Get a ChemDraw COM instance, reusing an existing session if available.
|
|
31
|
+
|
|
32
|
+
Returns (app, launched) where launched is True if we started a new instance.
|
|
33
|
+
"""
|
|
34
|
+
import win32com.client as win32
|
|
35
|
+
try:
|
|
36
|
+
app = win32.GetActiveObject("ChemDraw.Application")
|
|
37
|
+
launched = False
|
|
38
|
+
except Exception:
|
|
39
|
+
app = win32.Dispatch("ChemDraw.Application")
|
|
40
|
+
launched = True
|
|
41
|
+
return app, launched
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
# ---------------------------------------------------------------------------
|
|
45
|
+
# ChemDraw COM backend
|
|
46
|
+
# ---------------------------------------------------------------------------
|
|
47
|
+
|
|
48
|
+
def cdxml_to_image(
|
|
49
|
+
cdxml_path: str,
|
|
50
|
+
output_path: Optional[str] = None,
|
|
51
|
+
png_dpi: int = 300,
|
|
52
|
+
) -> str:
|
|
53
|
+
"""
|
|
54
|
+
Render a CDXML file to PNG or SVG using ChemDraw via COM automation.
|
|
55
|
+
|
|
56
|
+
ChemDraw infers the output format from the file extension (.png, .svg,
|
|
57
|
+
.emf, .cdxml, …). TransparentPNGs is forced off so the background is
|
|
58
|
+
solid white rather than a transparent checkerboard.
|
|
59
|
+
|
|
60
|
+
Parameters
|
|
61
|
+
----------
|
|
62
|
+
cdxml_path : path to the source .cdxml file
|
|
63
|
+
output_path : destination file; if None, derived from cdxml_path as .png
|
|
64
|
+
png_dpi : resolution for PNG export (default 300 dpi)
|
|
65
|
+
|
|
66
|
+
Returns
|
|
67
|
+
-------
|
|
68
|
+
Absolute path to the written output file.
|
|
69
|
+
"""
|
|
70
|
+
src = Path(cdxml_path)
|
|
71
|
+
if not src.exists():
|
|
72
|
+
raise FileNotFoundError(f"CDXML file not found: {cdxml_path}")
|
|
73
|
+
|
|
74
|
+
if output_path is None:
|
|
75
|
+
output_path = str(src.with_suffix(".png"))
|
|
76
|
+
|
|
77
|
+
cdxml_abs = str(src.resolve())
|
|
78
|
+
out_abs = str(Path(output_path).resolve())
|
|
79
|
+
|
|
80
|
+
app, launched = _get_chemdraw()
|
|
81
|
+
was_visible = app.Visible
|
|
82
|
+
app.Visible = False
|
|
83
|
+
doc = None
|
|
84
|
+
try:
|
|
85
|
+
prefs = app.Preferences
|
|
86
|
+
prefs.TransparentPNGs = False # solid white background
|
|
87
|
+
prefs.PNGResolution = png_dpi
|
|
88
|
+
|
|
89
|
+
doc = app.Documents.Open(cdxml_abs)
|
|
90
|
+
doc.SaveAs(out_abs)
|
|
91
|
+
|
|
92
|
+
return out_abs
|
|
93
|
+
|
|
94
|
+
finally:
|
|
95
|
+
try:
|
|
96
|
+
if doc is not None:
|
|
97
|
+
doc.Close(False)
|
|
98
|
+
except Exception:
|
|
99
|
+
pass
|
|
100
|
+
if launched:
|
|
101
|
+
try:
|
|
102
|
+
app.Quit()
|
|
103
|
+
except Exception:
|
|
104
|
+
pass
|
|
105
|
+
else:
|
|
106
|
+
app.Visible = was_visible
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def batch_render(
|
|
110
|
+
cdxml_paths: list,
|
|
111
|
+
png_dpi: int = 300,
|
|
112
|
+
) -> dict:
|
|
113
|
+
"""Render multiple CDXML files to PNG in a single COM session.
|
|
114
|
+
|
|
115
|
+
Returns dict mapping input_path -> {"output": path, "error": None} on
|
|
116
|
+
success, or {"output": None, "error": message} on failure.
|
|
117
|
+
"""
|
|
118
|
+
results = {}
|
|
119
|
+
if not cdxml_paths:
|
|
120
|
+
return results
|
|
121
|
+
|
|
122
|
+
app, launched = _get_chemdraw()
|
|
123
|
+
was_visible = app.Visible
|
|
124
|
+
app.Visible = False
|
|
125
|
+
try:
|
|
126
|
+
prefs = app.Preferences
|
|
127
|
+
prefs.TransparentPNGs = False
|
|
128
|
+
prefs.PNGResolution = png_dpi
|
|
129
|
+
|
|
130
|
+
for cdxml_path in cdxml_paths:
|
|
131
|
+
src = Path(cdxml_path)
|
|
132
|
+
out_path = str(src.with_suffix(".png"))
|
|
133
|
+
cdxml_abs = str(src.resolve())
|
|
134
|
+
out_abs = str(Path(out_path).resolve())
|
|
135
|
+
try:
|
|
136
|
+
doc = app.Documents.Open(cdxml_abs)
|
|
137
|
+
doc.SaveAs(out_abs)
|
|
138
|
+
doc.Close(False)
|
|
139
|
+
results[cdxml_path] = {"output": out_abs, "error": None}
|
|
140
|
+
except Exception as e:
|
|
141
|
+
results[cdxml_path] = {"output": None, "error": str(e)}
|
|
142
|
+
finally:
|
|
143
|
+
if launched:
|
|
144
|
+
try:
|
|
145
|
+
app.Quit()
|
|
146
|
+
except Exception:
|
|
147
|
+
pass
|
|
148
|
+
else:
|
|
149
|
+
app.Visible = was_visible
|
|
150
|
+
|
|
151
|
+
return results
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
# ---------------------------------------------------------------------------
|
|
155
|
+
# CLI
|
|
156
|
+
# ---------------------------------------------------------------------------
|
|
157
|
+
|
|
158
|
+
def _build_arg_parser() -> argparse.ArgumentParser:
|
|
159
|
+
p = argparse.ArgumentParser(
|
|
160
|
+
description="Render a CDXML file to PNG or SVG using ChemDraw.",
|
|
161
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
162
|
+
epilog=__doc__,
|
|
163
|
+
)
|
|
164
|
+
p.add_argument(
|
|
165
|
+
"input", nargs="?",
|
|
166
|
+
help="Input CDXML file",
|
|
167
|
+
)
|
|
168
|
+
p.add_argument(
|
|
169
|
+
"--output", "-o",
|
|
170
|
+
default=None,
|
|
171
|
+
help="Output file path (default: <input>.png). "
|
|
172
|
+
"Extension determines format: .png or .svg",
|
|
173
|
+
)
|
|
174
|
+
p.add_argument(
|
|
175
|
+
"--dpi",
|
|
176
|
+
type=int,
|
|
177
|
+
default=300,
|
|
178
|
+
help="PNG resolution in DPI (default: 300)",
|
|
179
|
+
)
|
|
180
|
+
p.add_argument(
|
|
181
|
+
"--batch",
|
|
182
|
+
nargs="+",
|
|
183
|
+
metavar="FILE",
|
|
184
|
+
help="Batch-render multiple CDXML files in one COM session",
|
|
185
|
+
)
|
|
186
|
+
p.add_argument(
|
|
187
|
+
"--json",
|
|
188
|
+
action="store_true",
|
|
189
|
+
help="Output result as JSON to stdout",
|
|
190
|
+
)
|
|
191
|
+
return p
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def main(argv: Optional[list] = None) -> int:
|
|
195
|
+
parser = _build_arg_parser()
|
|
196
|
+
args = parser.parse_args(argv)
|
|
197
|
+
|
|
198
|
+
# --batch mode
|
|
199
|
+
if args.batch:
|
|
200
|
+
missing = [f for f in args.batch if not Path(f).exists()]
|
|
201
|
+
if missing:
|
|
202
|
+
for f in missing:
|
|
203
|
+
print(f"Error: file not found: {f}", file=sys.stderr)
|
|
204
|
+
return 1
|
|
205
|
+
results = batch_render(args.batch, png_dpi=args.dpi)
|
|
206
|
+
if args.json:
|
|
207
|
+
json_results = []
|
|
208
|
+
for inp, info in results.items():
|
|
209
|
+
entry = {"input": str(Path(inp).resolve())}
|
|
210
|
+
if info["error"]:
|
|
211
|
+
entry["error"] = info["error"]
|
|
212
|
+
else:
|
|
213
|
+
entry["output"] = info["output"]
|
|
214
|
+
json_results.append(entry)
|
|
215
|
+
print(json.dumps(json_results, indent=2))
|
|
216
|
+
else:
|
|
217
|
+
ok = sum(1 for v in results.values() if v["error"] is None)
|
|
218
|
+
fail = len(results) - ok
|
|
219
|
+
for inp, info in results.items():
|
|
220
|
+
if info["error"]:
|
|
221
|
+
print(f" FAIL: {inp} — {info['error']}")
|
|
222
|
+
else:
|
|
223
|
+
print(f" OK: {inp} -> {info['output']}")
|
|
224
|
+
print(f"Batch: {ok} rendered, {fail} failed")
|
|
225
|
+
return 1 if any(v["error"] for v in results.values()) else 0
|
|
226
|
+
|
|
227
|
+
if not args.input:
|
|
228
|
+
parser.error("the following arguments are required: input (or --batch)")
|
|
229
|
+
|
|
230
|
+
try:
|
|
231
|
+
out = cdxml_to_image(
|
|
232
|
+
args.input,
|
|
233
|
+
output_path=args.output,
|
|
234
|
+
png_dpi=args.dpi,
|
|
235
|
+
)
|
|
236
|
+
if args.json:
|
|
237
|
+
out_path = Path(out)
|
|
238
|
+
fmt = out_path.suffix.lstrip(".").lower()
|
|
239
|
+
try:
|
|
240
|
+
from PIL import Image
|
|
241
|
+
with Image.open(out) as img:
|
|
242
|
+
width, height = img.size
|
|
243
|
+
except Exception:
|
|
244
|
+
width, height = None, None
|
|
245
|
+
result = {
|
|
246
|
+
"input": str(Path(args.input).resolve()),
|
|
247
|
+
"output": out,
|
|
248
|
+
"format": fmt,
|
|
249
|
+
"width": width,
|
|
250
|
+
"height": height,
|
|
251
|
+
}
|
|
252
|
+
print(json.dumps(result, indent=2))
|
|
253
|
+
else:
|
|
254
|
+
print(out)
|
|
255
|
+
return 0
|
|
256
|
+
except Exception as exc:
|
|
257
|
+
print(f"ERROR: {exc}", file=sys.stderr)
|
|
258
|
+
return 1
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
if __name__ == "__main__":
|
|
262
|
+
sys.exit(main())
|
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
cdxml_to_image_rdkit.py — BACKUP renderer for CDXML → PNG/SVG using RDKit.
|
|
4
|
+
|
|
5
|
+
⚠️ USE cdxml_to_image.py (ChemDraw COM) INSTEAD WHENEVER POSSIBLE. ⚠️
|
|
6
|
+
|
|
7
|
+
This script exists only as a fallback for environments where ChemDraw is not
|
|
8
|
+
installed (e.g. a remote server, CI, or a colleague's machine).
|
|
9
|
+
|
|
10
|
+
Known limitations vs ChemDraw COM
|
|
11
|
+
----------------------------------
|
|
12
|
+
- Single molecules only — reaction schemes with arrows are NOT supported.
|
|
13
|
+
- Bond geometry is recomputed by RDKit from scratch; the original ChemDraw
|
|
14
|
+
layout is discarded.
|
|
15
|
+
- Aromatic systems are re-perceived by RDKit, which may differ from the
|
|
16
|
+
Kekulé form stored in the CDXML.
|
|
17
|
+
- Superatom / nickname nodes (R-groups, OTs, Boc, etc.) are not rendered;
|
|
18
|
+
the script will abort with an error if any are present.
|
|
19
|
+
- Stereo wedges are not transferred from the CDXML.
|
|
20
|
+
- No reaction conditions text, no yield labels, no compound numbering.
|
|
21
|
+
- RDKit in this environment lacks Cairo support, so PNG output requires
|
|
22
|
+
cairosvg or wand to be installed; otherwise only SVG is produced.
|
|
23
|
+
|
|
24
|
+
Usage
|
|
25
|
+
-----
|
|
26
|
+
python cdxml_to_image_rdkit.py input.cdxml
|
|
27
|
+
python cdxml_to_image_rdkit.py input.cdxml -o out.svg
|
|
28
|
+
python cdxml_to_image_rdkit.py input.cdxml -o out.png
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
import argparse
|
|
32
|
+
import re
|
|
33
|
+
import sys
|
|
34
|
+
import xml.etree.ElementTree as ET
|
|
35
|
+
from pathlib import Path
|
|
36
|
+
from typing import Dict, List, Optional, Tuple
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# ---------------------------------------------------------------------------
|
|
40
|
+
# CDXML → atom/bond parser (minimal — just what RDKit needs)
|
|
41
|
+
# ---------------------------------------------------------------------------
|
|
42
|
+
|
|
43
|
+
ELEMENT_SYMBOLS: Dict[int, str] = {
|
|
44
|
+
1: "H", 5: "B", 6: "C", 7: "N", 8: "O",
|
|
45
|
+
9: "F", 14: "Si", 15: "P", 16: "S", 17: "Cl",
|
|
46
|
+
34: "Se", 35: "Br", 53: "I",
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
_BOND_ORDER_MAP = {"1": 1, "2": 2, "3": 3, "1.5": 4, "": 1}
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _parse_cdxml(path: str):
|
|
53
|
+
"""
|
|
54
|
+
Parse a CDXML file and return (atoms, bonds).
|
|
55
|
+
|
|
56
|
+
atoms : dict id → {"symbol": str, "x": float, "y": float}
|
|
57
|
+
bonds : list of {"b": int, "e": int, "order": int}
|
|
58
|
+
|
|
59
|
+
Raises ValueError if reaction arrows or unsupported node types are found.
|
|
60
|
+
"""
|
|
61
|
+
with open(path, "rb") as fh:
|
|
62
|
+
raw_bytes = fh.read()
|
|
63
|
+
|
|
64
|
+
raw_bytes = re.sub(rb'<objecttag\b[^/]*/>', b'', raw_bytes)
|
|
65
|
+
raw_bytes = re.sub(rb'<objecttag\b.*?</objecttag>', b'', raw_bytes,
|
|
66
|
+
flags=re.DOTALL)
|
|
67
|
+
raw = raw_bytes.decode("latin-1", errors="replace")
|
|
68
|
+
raw = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f\ufffe\uffff]', '', raw)
|
|
69
|
+
raw = re.sub(r'<!DOCTYPE[^>]*>', '', raw)
|
|
70
|
+
|
|
71
|
+
root = ET.fromstring(raw)
|
|
72
|
+
|
|
73
|
+
atoms: Dict[int, dict] = {}
|
|
74
|
+
bonds: List[dict] = []
|
|
75
|
+
has_arrows = False
|
|
76
|
+
|
|
77
|
+
def walk(elem):
|
|
78
|
+
nonlocal has_arrows
|
|
79
|
+
tag = elem.tag
|
|
80
|
+
|
|
81
|
+
if tag == "arrow":
|
|
82
|
+
has_arrows = True
|
|
83
|
+
|
|
84
|
+
elif tag == "n":
|
|
85
|
+
aid = int(elem.get("id", "0"))
|
|
86
|
+
px, py = elem.get("p", "0 0").split()[:2]
|
|
87
|
+
|
|
88
|
+
el_num = elem.get("Element")
|
|
89
|
+
symbol = ELEMENT_SYMBOLS.get(int(el_num), "?") if el_num else "C"
|
|
90
|
+
|
|
91
|
+
node_type = elem.get("NodeType", "")
|
|
92
|
+
if node_type in ("Fragment", "Nickname", "Unspecified"):
|
|
93
|
+
raise ValueError(
|
|
94
|
+
f"Superatom / nickname node (id={aid}) found — "
|
|
95
|
+
"RDKit cannot render abbreviated groups. "
|
|
96
|
+
"Use cdxml_to_image.py (ChemDraw COM) instead."
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
atoms[aid] = {"symbol": symbol, "x": float(px), "y": float(py)}
|
|
100
|
+
|
|
101
|
+
elif tag == "b":
|
|
102
|
+
order_str = elem.get("Order", "1")
|
|
103
|
+
bonds.append({
|
|
104
|
+
"b": int(elem.get("B", "0")),
|
|
105
|
+
"e": int(elem.get("E", "0")),
|
|
106
|
+
"order": _BOND_ORDER_MAP.get(order_str, 1),
|
|
107
|
+
})
|
|
108
|
+
|
|
109
|
+
for child in elem:
|
|
110
|
+
if tag == "n" and child.tag in ("n", "t"):
|
|
111
|
+
continue
|
|
112
|
+
walk(child)
|
|
113
|
+
|
|
114
|
+
walk(root)
|
|
115
|
+
|
|
116
|
+
if has_arrows:
|
|
117
|
+
raise ValueError(
|
|
118
|
+
"Reaction scheme detected (arrow elements present). "
|
|
119
|
+
"RDKit cannot render multi-fragment reaction schemes. "
|
|
120
|
+
"Use cdxml_to_image.py (ChemDraw COM) instead."
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
return atoms, bonds
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
# ---------------------------------------------------------------------------
|
|
127
|
+
# RDKit renderer
|
|
128
|
+
# ---------------------------------------------------------------------------
|
|
129
|
+
|
|
130
|
+
def _render(atoms: dict, bonds: list, output_path: str,
|
|
131
|
+
width: int, height: int) -> str:
|
|
132
|
+
try:
|
|
133
|
+
from rdkit import Chem
|
|
134
|
+
from rdkit.Chem.Draw import rdMolDraw2D
|
|
135
|
+
except ImportError:
|
|
136
|
+
raise RuntimeError("RDKit is not installed in this environment.")
|
|
137
|
+
|
|
138
|
+
rw = Chem.RWMol()
|
|
139
|
+
atom_idx: Dict[int, int] = {}
|
|
140
|
+
|
|
141
|
+
bond_type_map = {
|
|
142
|
+
1: Chem.BondType.SINGLE,
|
|
143
|
+
2: Chem.BondType.DOUBLE,
|
|
144
|
+
3: Chem.BondType.TRIPLE,
|
|
145
|
+
4: Chem.BondType.AROMATIC,
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
for aid, atom in atoms.items():
|
|
149
|
+
sym = atom["symbol"]
|
|
150
|
+
try:
|
|
151
|
+
rd_atom = Chem.Atom(sym)
|
|
152
|
+
except Exception:
|
|
153
|
+
raise ValueError(f"RDKit does not recognise element '{sym}'.")
|
|
154
|
+
atom_idx[aid] = rw.AddAtom(rd_atom)
|
|
155
|
+
|
|
156
|
+
for bond in bonds:
|
|
157
|
+
b = atom_idx.get(bond["b"])
|
|
158
|
+
e = atom_idx.get(bond["e"])
|
|
159
|
+
if b is None or e is None:
|
|
160
|
+
raise ValueError(f"Bond references unknown atom id: {bond}")
|
|
161
|
+
rw.AddBond(b, e, bond_type_map.get(bond["order"], Chem.BondType.SINGLE))
|
|
162
|
+
|
|
163
|
+
mol = rw.GetMol()
|
|
164
|
+
try:
|
|
165
|
+
Chem.SanitizeMol(mol)
|
|
166
|
+
except Exception as exc:
|
|
167
|
+
raise ValueError(f"RDKit sanitization failed: {exc}")
|
|
168
|
+
|
|
169
|
+
# Attach original 2D coordinates from CDXML
|
|
170
|
+
conf = Chem.Conformer(mol.GetNumAtoms())
|
|
171
|
+
for aid, rd_idx in atom_idx.items():
|
|
172
|
+
conf.SetAtomPosition(rd_idx, (atoms[aid]["x"], atoms[aid]["y"], 0.0))
|
|
173
|
+
mol.AddConformer(conf, assignId=True)
|
|
174
|
+
|
|
175
|
+
out = Path(output_path)
|
|
176
|
+
ext = out.suffix.lower()
|
|
177
|
+
|
|
178
|
+
if ext == ".svg":
|
|
179
|
+
drawer = rdMolDraw2D.MolDraw2DSVG(width, height)
|
|
180
|
+
drawer.drawOptions().addStereoAnnotation = False
|
|
181
|
+
drawer.DrawMolecule(mol)
|
|
182
|
+
drawer.FinishDrawing()
|
|
183
|
+
out.write_text(drawer.GetDrawingText(), encoding="utf-8")
|
|
184
|
+
return str(out)
|
|
185
|
+
|
|
186
|
+
# PNG — try Cairo, then MolToImage, then save SVG as fallback
|
|
187
|
+
if hasattr(rdMolDraw2D, "MolDraw2DCairo"):
|
|
188
|
+
drawer = rdMolDraw2D.MolDraw2DCairo(width, height)
|
|
189
|
+
drawer.drawOptions().addStereoAnnotation = False
|
|
190
|
+
drawer.DrawMolecule(mol)
|
|
191
|
+
drawer.FinishDrawing()
|
|
192
|
+
out.write_bytes(drawer.GetDrawingText())
|
|
193
|
+
return str(out)
|
|
194
|
+
|
|
195
|
+
# Try cairosvg via intermediate SVG
|
|
196
|
+
svg_drawer = rdMolDraw2D.MolDraw2DSVG(width, height)
|
|
197
|
+
svg_drawer.drawOptions().addStereoAnnotation = False
|
|
198
|
+
svg_drawer.DrawMolecule(mol)
|
|
199
|
+
svg_drawer.FinishDrawing()
|
|
200
|
+
svg_text = svg_drawer.GetDrawingText()
|
|
201
|
+
|
|
202
|
+
try:
|
|
203
|
+
import cairosvg
|
|
204
|
+
cairosvg.svg2png(bytestring=svg_text.encode(), write_to=str(out))
|
|
205
|
+
return str(out)
|
|
206
|
+
except ImportError:
|
|
207
|
+
pass
|
|
208
|
+
|
|
209
|
+
# Final fallback: write SVG and warn
|
|
210
|
+
svg_out = out.with_suffix(".svg")
|
|
211
|
+
svg_out.write_text(svg_text, encoding="utf-8")
|
|
212
|
+
print(
|
|
213
|
+
f"[warning] No PNG renderer available (RDKit Cairo / cairosvg not installed).\n"
|
|
214
|
+
f" Saved as SVG instead: {svg_out}\n"
|
|
215
|
+
f" Use cdxml_to_image.py (ChemDraw COM) for proper PNG output.",
|
|
216
|
+
file=sys.stderr,
|
|
217
|
+
)
|
|
218
|
+
return str(svg_out)
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
# ---------------------------------------------------------------------------
|
|
222
|
+
# Public entry point
|
|
223
|
+
# ---------------------------------------------------------------------------
|
|
224
|
+
|
|
225
|
+
def cdxml_to_image_rdkit(
|
|
226
|
+
cdxml_path: str,
|
|
227
|
+
output_path: Optional[str] = None,
|
|
228
|
+
width: int = 600,
|
|
229
|
+
height: int = 400,
|
|
230
|
+
) -> str:
|
|
231
|
+
"""
|
|
232
|
+
Render a single-molecule CDXML to PNG or SVG using RDKit.
|
|
233
|
+
|
|
234
|
+
PREFER cdxml_to_image.py (ChemDraw COM) over this function.
|
|
235
|
+
See module docstring for limitations.
|
|
236
|
+
"""
|
|
237
|
+
src = Path(cdxml_path)
|
|
238
|
+
if not src.exists():
|
|
239
|
+
raise FileNotFoundError(f"CDXML file not found: {cdxml_path}")
|
|
240
|
+
|
|
241
|
+
if output_path is None:
|
|
242
|
+
output_path = str(src.with_suffix(".png"))
|
|
243
|
+
|
|
244
|
+
atoms, bonds = _parse_cdxml(str(src))
|
|
245
|
+
|
|
246
|
+
if not atoms:
|
|
247
|
+
raise ValueError("No atoms found in CDXML.")
|
|
248
|
+
|
|
249
|
+
return _render(atoms, bonds, output_path, width, height)
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
# ---------------------------------------------------------------------------
|
|
253
|
+
# CLI
|
|
254
|
+
# ---------------------------------------------------------------------------
|
|
255
|
+
|
|
256
|
+
def main(argv: Optional[list] = None) -> int:
|
|
257
|
+
p = argparse.ArgumentParser(
|
|
258
|
+
description=(
|
|
259
|
+
"⚠ BACKUP ONLY — render CDXML to PNG/SVG using RDKit.\n"
|
|
260
|
+
" Use cdxml_to_image.py (ChemDraw COM) whenever possible."
|
|
261
|
+
),
|
|
262
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
263
|
+
epilog=__doc__,
|
|
264
|
+
)
|
|
265
|
+
p.add_argument("input", help="Input CDXML file (single molecule only)")
|
|
266
|
+
p.add_argument(
|
|
267
|
+
"--output", "-o",
|
|
268
|
+
default=None,
|
|
269
|
+
help="Output file (default: <input>.png). Extension sets format.",
|
|
270
|
+
)
|
|
271
|
+
p.add_argument("--width", type=int, default=600, help="Canvas width px (default 600)")
|
|
272
|
+
p.add_argument("--height", type=int, default=400, help="Canvas height px (default 400)")
|
|
273
|
+
args = p.parse_args(argv)
|
|
274
|
+
|
|
275
|
+
print(
|
|
276
|
+
"⚠ cdxml_to_image_rdkit.py: backup renderer — output quality is limited.\n"
|
|
277
|
+
" Use cdxml_to_image.py (ChemDraw COM) for production use.",
|
|
278
|
+
file=sys.stderr,
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
try:
|
|
282
|
+
out = cdxml_to_image_rdkit(
|
|
283
|
+
args.input,
|
|
284
|
+
output_path=args.output,
|
|
285
|
+
width=args.width,
|
|
286
|
+
height=args.height,
|
|
287
|
+
)
|
|
288
|
+
print(out)
|
|
289
|
+
return 0
|
|
290
|
+
except Exception as exc:
|
|
291
|
+
print(f"ERROR: {exc}", file=sys.stderr)
|
|
292
|
+
return 1
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
if __name__ == "__main__":
|
|
296
|
+
sys.exit(main())
|