cdxml-toolkit 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cdxml_toolkit/__init__.py +18 -0
- cdxml_toolkit/_jre/__init__.py +2 -0
- cdxml_toolkit/_jre/temurin-21-jre-win-x64.zip +0 -0
- cdxml_toolkit/analysis/__init__.py +35 -0
- cdxml_toolkit/analysis/deterministic/__init__.py +12 -0
- cdxml_toolkit/analysis/deterministic/discover_experiment_files.py +413 -0
- cdxml_toolkit/analysis/deterministic/lab_book_formatter.py +701 -0
- cdxml_toolkit/analysis/deterministic/lcms_file_categorizer.py +928 -0
- cdxml_toolkit/analysis/deterministic/lcms_identifier.py +598 -0
- cdxml_toolkit/analysis/deterministic/mass_resolver.py +654 -0
- cdxml_toolkit/analysis/deterministic/multi_lcms_analyzer.py +1412 -0
- cdxml_toolkit/analysis/deterministic/procedure_writer.py +446 -0
- cdxml_toolkit/analysis/extract_nmr.py +47 -0
- cdxml_toolkit/analysis/format_procedure_entry.py +479 -0
- cdxml_toolkit/analysis/lcms_analyzer.py +1299 -0
- cdxml_toolkit/analysis/parse_analysis_file.py +134 -0
- cdxml_toolkit/cdxml_builder.py +920 -0
- cdxml_toolkit/cdxml_utils.py +342 -0
- cdxml_toolkit/chemdraw/__init__.py +5 -0
- cdxml_toolkit/chemdraw/_chemscript_server.py +562 -0
- cdxml_toolkit/chemdraw/cdx_converter.py +527 -0
- cdxml_toolkit/chemdraw/cdxml_to_image.py +262 -0
- cdxml_toolkit/chemdraw/cdxml_to_image_rdkit.py +296 -0
- cdxml_toolkit/chemdraw/chemscript_bridge.py +901 -0
- cdxml_toolkit/constants.py +304 -0
- cdxml_toolkit/coord_normalizer.py +438 -0
- cdxml_toolkit/deterministic_pipeline/__init__.py +6 -0
- cdxml_toolkit/deterministic_pipeline/legacy/__init__.py +5 -0
- cdxml_toolkit/deterministic_pipeline/legacy/eln_cdx_cleanup.py +509 -0
- cdxml_toolkit/deterministic_pipeline/legacy/eln_enrichment.py +1394 -0
- cdxml_toolkit/deterministic_pipeline/legacy/scheme_aligner.py +428 -0
- cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher.py +1337 -0
- cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher_v2.py +1340 -0
- cdxml_toolkit/deterministic_pipeline/scheme_reader_audit.py +931 -0
- cdxml_toolkit/deterministic_pipeline/scheme_reader_verify.py +1160 -0
- cdxml_toolkit/image/__init__.py +15 -0
- cdxml_toolkit/image/reaction_from_image.py +2103 -0
- cdxml_toolkit/image/structure_from_image.py +1711 -0
- cdxml_toolkit/layout/__init__.py +5 -0
- cdxml_toolkit/layout/alignment.py +1642 -0
- cdxml_toolkit/layout/reaction_cleanup.py +1002 -0
- cdxml_toolkit/layout/scheme_merger.py +2260 -0
- cdxml_toolkit/mcp_server/__init__.py +0 -0
- cdxml_toolkit/mcp_server/__main__.py +5 -0
- cdxml_toolkit/mcp_server/server.py +1567 -0
- cdxml_toolkit/naming/__init__.py +6 -0
- cdxml_toolkit/naming/aligned_namer.py +2342 -0
- cdxml_toolkit/naming/mol_builder.py +3722 -0
- cdxml_toolkit/naming/name_decomposer.py +2843 -0
- cdxml_toolkit/naming/reactions_datamol.json +2414 -0
- cdxml_toolkit/office/__init__.py +5 -0
- cdxml_toolkit/office/doc_from_template.py +722 -0
- cdxml_toolkit/office/ole_embedder.py +808 -0
- cdxml_toolkit/office/ole_extractor.py +272 -0
- cdxml_toolkit/perception/__init__.py +10 -0
- cdxml_toolkit/perception/compound_search.py +229 -0
- cdxml_toolkit/perception/eln_csv_parser.py +240 -0
- cdxml_toolkit/perception/rdf_parser.py +664 -0
- cdxml_toolkit/perception/reactant_heuristic.py +1045 -0
- cdxml_toolkit/perception/reaction_parser.py +2150 -0
- cdxml_toolkit/perception/scheme_reader.py +2948 -0
- cdxml_toolkit/perception/scheme_refine.py +1404 -0
- cdxml_toolkit/perception/scheme_segmenter.py +619 -0
- cdxml_toolkit/perception/spatial_assignment.py +1013 -0
- cdxml_toolkit/rdkit_utils.py +605 -0
- cdxml_toolkit/render/__init__.py +17 -0
- cdxml_toolkit/render/auto_layout.py +229 -0
- cdxml_toolkit/render/compact_parser.py +632 -0
- cdxml_toolkit/render/parser.py +706 -0
- cdxml_toolkit/render/render_scheme.py +267 -0
- cdxml_toolkit/render/renderer.py +2387 -0
- cdxml_toolkit/render/schema.py +90 -0
- cdxml_toolkit/render/scheme_maker.py +1043 -0
- cdxml_toolkit/render/scheme_yaml_writer.py +1487 -0
- cdxml_toolkit/resolve/__init__.py +13 -0
- cdxml_toolkit/resolve/cas_resolver.py +430 -0
- cdxml_toolkit/resolve/chemscanner_abbreviations.json +28813 -0
- cdxml_toolkit/resolve/condensed_formula.py +493 -0
- cdxml_toolkit/resolve/jre_manager.py +195 -0
- cdxml_toolkit/resolve/reagent_abbreviations.json +1046 -0
- cdxml_toolkit/resolve/reagent_db.py +285 -0
- cdxml_toolkit/resolve/superatom_data.json +2856 -0
- cdxml_toolkit/resolve/superatom_table.py +146 -0
- cdxml_toolkit/text_formatting.py +298 -0
- cdxml_toolkit-0.5.0.dist-info/METADATA +318 -0
- cdxml_toolkit-0.5.0.dist-info/RECORD +91 -0
- cdxml_toolkit-0.5.0.dist-info/WHEEL +5 -0
- cdxml_toolkit-0.5.0.dist-info/entry_points.txt +17 -0
- cdxml_toolkit-0.5.0.dist-info/licenses/LICENSE +21 -0
- cdxml_toolkit-0.5.0.dist-info/licenses/NOTICE.md +37 -0
- cdxml_toolkit-0.5.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,527 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
CDX ↔ CDXML Converter
|
|
4
|
+
Converts between ChemDraw CDX (binary) and CDXML (XML) formats.
|
|
5
|
+
|
|
6
|
+
Backends (tried in order):
|
|
7
|
+
1. ChemDraw COM automation (most reliable, requires ChemDraw installed)
|
|
8
|
+
2. pycdxml library (good open-source fallback)
|
|
9
|
+
3. Open Babel CLI (last resort, patchy for complex structures)
|
|
10
|
+
|
|
11
|
+
Usage:
|
|
12
|
+
python cdx_converter.py input.cdx [-o output.cdxml] [--method com|pycdxml|obabel]
|
|
13
|
+
python cdx_converter.py input.cdxml [-o output.cdx] [--method com|pycdxml|obabel]
|
|
14
|
+
|
|
15
|
+
Python API:
|
|
16
|
+
from cdxml_toolkit.chemdraw.cdx_converter import convert_cdx_to_cdxml, convert_file
|
|
17
|
+
cdxml_str = convert_cdx_to_cdxml(cdx_bytes)
|
|
18
|
+
convert_file("input.cdx", "output.cdxml")
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
import argparse
|
|
22
|
+
import json
|
|
23
|
+
import os
|
|
24
|
+
import sys
|
|
25
|
+
import subprocess
|
|
26
|
+
import tempfile
|
|
27
|
+
from typing import Optional
|
|
28
|
+
|
|
29
|
+
# ---------------------------------------------------------------------------
|
|
30
|
+
# Backend availability detection
|
|
31
|
+
# ---------------------------------------------------------------------------
|
|
32
|
+
|
|
33
|
+
HAS_COM = False
|
|
34
|
+
HAS_PYCDXML = False
|
|
35
|
+
HAS_OBABEL = False
|
|
36
|
+
|
|
37
|
+
try:
|
|
38
|
+
import win32com.client
|
|
39
|
+
HAS_COM = True
|
|
40
|
+
except ImportError:
|
|
41
|
+
pass
|
|
42
|
+
|
|
43
|
+
try:
|
|
44
|
+
from pycdxml import cdxml_converter as _pycdxml
|
|
45
|
+
HAS_PYCDXML = True
|
|
46
|
+
except ImportError:
|
|
47
|
+
pass
|
|
48
|
+
|
|
49
|
+
try:
|
|
50
|
+
result = subprocess.run(
|
|
51
|
+
["obabel", "-V"], capture_output=True, timeout=5
|
|
52
|
+
)
|
|
53
|
+
if result.returncode == 0:
|
|
54
|
+
HAS_OBABEL = True
|
|
55
|
+
except (FileNotFoundError, subprocess.TimeoutExpired):
|
|
56
|
+
pass
|
|
57
|
+
|
|
58
|
+
BACKEND_ORDER = ["com", "pycdxml", "obabel"]
|
|
59
|
+
|
|
60
|
+
# ---------------------------------------------------------------------------
|
|
61
|
+
# CDXML sanitiser
|
|
62
|
+
# ---------------------------------------------------------------------------
|
|
63
|
+
|
|
64
|
+
import re as _re
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def sanitise_cdxml(cdxml: str) -> str:
|
|
68
|
+
"""Remove content that makes ChemDraw's strict XML parser reject the file.
|
|
69
|
+
|
|
70
|
+
Findmolecule embeds internal GUIDs as raw binary bytes inside
|
|
71
|
+
<objecttag Name="Molecule ID" Value="..."/> attributes. These bytes
|
|
72
|
+
include XML-illegal control characters (< 0x09, 0x0B-0x0C, 0x0E-0x1F)
|
|
73
|
+
that cause ChemDraw to report "not well-formed (invalid token)".
|
|
74
|
+
|
|
75
|
+
The Molecule ID tags carry no chemistry information — they are ELN
|
|
76
|
+
bookkeeping handles that ChemDraw doesn't need to render the structure.
|
|
77
|
+
We strip the entire element. Any remaining stray control characters are
|
|
78
|
+
also removed so the file is clean XML 1.0.
|
|
79
|
+
"""
|
|
80
|
+
# 1. Strip all <objecttag ... Name="Molecule ID" .../> elements (self-closing).
|
|
81
|
+
# Attribute order in ChemDraw CDXML can vary, so match both orderings.
|
|
82
|
+
cdxml_bytes = cdxml.encode("utf-8", errors="replace")
|
|
83
|
+
cdxml_bytes = _re.sub(
|
|
84
|
+
rb'<objecttag\s[^>]*Name="Molecule ID"[^>]*/\s*>',
|
|
85
|
+
b"",
|
|
86
|
+
cdxml_bytes,
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
# 2. Strip XML 1.0 illegal control characters anywhere in the file.
|
|
90
|
+
# Legal: 0x09 (tab), 0x0A (LF), 0x0D (CR), 0x20+ (printable + high bytes)
|
|
91
|
+
out = bytearray()
|
|
92
|
+
for byte in cdxml_bytes:
|
|
93
|
+
if byte == 0x09 or byte == 0x0A or byte == 0x0D or byte >= 0x20:
|
|
94
|
+
out.append(byte)
|
|
95
|
+
|
|
96
|
+
return out.decode("utf-8", errors="replace")
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def sanitise_cdxml_file(path: str) -> None:
|
|
100
|
+
"""Sanitise a CDXML file in-place."""
|
|
101
|
+
with open(path, "r", encoding="utf-8", errors="replace") as f:
|
|
102
|
+
content = f.read()
|
|
103
|
+
cleaned = sanitise_cdxml(content)
|
|
104
|
+
with open(path, "w", encoding="utf-8") as f:
|
|
105
|
+
f.write(cleaned)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
# ---------------------------------------------------------------------------
|
|
109
|
+
# COM backend
|
|
110
|
+
# ---------------------------------------------------------------------------
|
|
111
|
+
|
|
112
|
+
def _get_chemdraw():
|
|
113
|
+
"""Get a ChemDraw COM instance, reusing an existing session if available.
|
|
114
|
+
|
|
115
|
+
Returns (app, launched) where launched is True if we started a new instance.
|
|
116
|
+
Always sets Visible=False to suppress flashing.
|
|
117
|
+
"""
|
|
118
|
+
try:
|
|
119
|
+
app = win32com.client.GetActiveObject("ChemDraw.Application")
|
|
120
|
+
launched = False
|
|
121
|
+
except Exception:
|
|
122
|
+
app = win32com.client.Dispatch("ChemDraw.Application")
|
|
123
|
+
launched = True
|
|
124
|
+
return app, launched
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _com_convert_file(input_path: str, output_path: str) -> None:
|
|
128
|
+
"""Convert using ChemDraw COM automation."""
|
|
129
|
+
app, launched = _get_chemdraw()
|
|
130
|
+
was_visible = app.Visible
|
|
131
|
+
app.Visible = False
|
|
132
|
+
try:
|
|
133
|
+
doc = app.Documents.Open(os.path.abspath(input_path))
|
|
134
|
+
doc.SaveAs(os.path.abspath(output_path))
|
|
135
|
+
doc.Close()
|
|
136
|
+
finally:
|
|
137
|
+
if launched:
|
|
138
|
+
app.Quit()
|
|
139
|
+
else:
|
|
140
|
+
app.Visible = was_visible
|
|
141
|
+
if output_path.lower().endswith(".cdxml"):
|
|
142
|
+
sanitise_cdxml_file(output_path)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def _com_cdx_to_cdxml(cdx_data: bytes) -> str:
|
|
146
|
+
"""Convert CDX bytes → CDXML string via COM (uses temp files)."""
|
|
147
|
+
with tempfile.NamedTemporaryFile(suffix=".cdx", delete=False) as tmp_in:
|
|
148
|
+
tmp_in.write(cdx_data)
|
|
149
|
+
tmp_in_path = tmp_in.name
|
|
150
|
+
tmp_out_path = tmp_in_path.replace(".cdx", ".cdxml")
|
|
151
|
+
try:
|
|
152
|
+
_com_convert_file(tmp_in_path, tmp_out_path)
|
|
153
|
+
with open(tmp_out_path, "r", encoding="utf-8") as f:
|
|
154
|
+
return f.read() # sanitise_cdxml_file already ran inside _com_convert_file
|
|
155
|
+
finally:
|
|
156
|
+
for p in (tmp_in_path, tmp_out_path):
|
|
157
|
+
if os.path.exists(p):
|
|
158
|
+
os.unlink(p)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def _com_cdxml_to_cdx(cdxml_data: str) -> bytes:
|
|
162
|
+
"""Convert CDXML string → CDX bytes via COM (uses temp files)."""
|
|
163
|
+
with tempfile.NamedTemporaryFile(
|
|
164
|
+
suffix=".cdxml", delete=False, mode="w", encoding="utf-8"
|
|
165
|
+
) as tmp_in:
|
|
166
|
+
tmp_in.write(cdxml_data)
|
|
167
|
+
tmp_in_path = tmp_in.name
|
|
168
|
+
tmp_out_path = tmp_in_path.replace(".cdxml", ".cdx")
|
|
169
|
+
try:
|
|
170
|
+
_com_convert_file(tmp_in_path, tmp_out_path)
|
|
171
|
+
with open(tmp_out_path, "rb") as f:
|
|
172
|
+
return f.read()
|
|
173
|
+
finally:
|
|
174
|
+
for p in (tmp_in_path, tmp_out_path):
|
|
175
|
+
if os.path.exists(p):
|
|
176
|
+
os.unlink(p)
|
|
177
|
+
|
|
178
|
+
# ---------------------------------------------------------------------------
|
|
179
|
+
# pycdxml backend
|
|
180
|
+
# ---------------------------------------------------------------------------
|
|
181
|
+
|
|
182
|
+
def _pycdxml_convert_file(input_path: str, output_path: str) -> None:
|
|
183
|
+
"""Convert using pycdxml library."""
|
|
184
|
+
in_ext = os.path.splitext(input_path)[1].lower()
|
|
185
|
+
if in_ext == ".cdx":
|
|
186
|
+
doc = _pycdxml.read_cdx(input_path)
|
|
187
|
+
cdxml_str = doc.to_cdxml()
|
|
188
|
+
with open(output_path, "w", encoding="utf-8") as f:
|
|
189
|
+
f.write(cdxml_str)
|
|
190
|
+
elif in_ext == ".cdxml":
|
|
191
|
+
doc = _pycdxml.read_cdxml(input_path)
|
|
192
|
+
_pycdxml.write_cdx_file(doc, output_path)
|
|
193
|
+
else:
|
|
194
|
+
raise ValueError(f"Unsupported input extension: {in_ext}")
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def _pycdxml_cdx_to_cdxml(cdx_data: bytes) -> str:
|
|
198
|
+
"""Convert CDX bytes → CDXML string via pycdxml (uses temp files)."""
|
|
199
|
+
with tempfile.NamedTemporaryFile(suffix=".cdx", delete=False) as tmp_in:
|
|
200
|
+
tmp_in.write(cdx_data)
|
|
201
|
+
tmp_in_path = tmp_in.name
|
|
202
|
+
try:
|
|
203
|
+
doc = _pycdxml.read_cdx(tmp_in_path)
|
|
204
|
+
return doc.to_cdxml()
|
|
205
|
+
finally:
|
|
206
|
+
if os.path.exists(tmp_in_path):
|
|
207
|
+
os.unlink(tmp_in_path)
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def _pycdxml_cdxml_to_cdx(cdxml_data: str) -> bytes:
|
|
211
|
+
"""Convert CDXML string → CDX bytes via pycdxml (uses temp files)."""
|
|
212
|
+
with tempfile.NamedTemporaryFile(
|
|
213
|
+
suffix=".cdxml", delete=False, mode="w", encoding="utf-8"
|
|
214
|
+
) as tmp_in:
|
|
215
|
+
tmp_in.write(cdxml_data)
|
|
216
|
+
tmp_in_path = tmp_in.name
|
|
217
|
+
tmp_out_path = tmp_in_path.replace(".cdxml", ".cdx")
|
|
218
|
+
try:
|
|
219
|
+
doc = _pycdxml.read_cdxml(tmp_in_path)
|
|
220
|
+
_pycdxml.write_cdx_file(doc, tmp_out_path)
|
|
221
|
+
with open(tmp_out_path, "rb") as f:
|
|
222
|
+
return f.read()
|
|
223
|
+
finally:
|
|
224
|
+
for p in (tmp_in_path, tmp_out_path):
|
|
225
|
+
if os.path.exists(p):
|
|
226
|
+
os.unlink(p)
|
|
227
|
+
|
|
228
|
+
# ---------------------------------------------------------------------------
|
|
229
|
+
# Open Babel backend
|
|
230
|
+
# ---------------------------------------------------------------------------
|
|
231
|
+
|
|
232
|
+
def _obabel_convert_file(input_path: str, output_path: str) -> None:
|
|
233
|
+
"""Convert using Open Babel CLI."""
|
|
234
|
+
result = subprocess.run(
|
|
235
|
+
["obabel", os.path.abspath(input_path), "-O", os.path.abspath(output_path)],
|
|
236
|
+
capture_output=True, text=True, timeout=30
|
|
237
|
+
)
|
|
238
|
+
if result.returncode != 0:
|
|
239
|
+
raise RuntimeError(f"obabel failed: {result.stderr}")
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def _obabel_cdx_to_cdxml(cdx_data: bytes) -> str:
|
|
243
|
+
"""Convert CDX bytes → CDXML string via obabel (uses temp files)."""
|
|
244
|
+
with tempfile.NamedTemporaryFile(suffix=".cdx", delete=False) as tmp_in:
|
|
245
|
+
tmp_in.write(cdx_data)
|
|
246
|
+
tmp_in_path = tmp_in.name
|
|
247
|
+
tmp_out_path = tmp_in_path.replace(".cdx", ".cdxml")
|
|
248
|
+
try:
|
|
249
|
+
_obabel_convert_file(tmp_in_path, tmp_out_path)
|
|
250
|
+
with open(tmp_out_path, "r", encoding="utf-8") as f:
|
|
251
|
+
return f.read()
|
|
252
|
+
finally:
|
|
253
|
+
for p in (tmp_in_path, tmp_out_path):
|
|
254
|
+
if os.path.exists(p):
|
|
255
|
+
os.unlink(p)
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def _obabel_cdxml_to_cdx(cdxml_data: str) -> bytes:
|
|
259
|
+
"""Convert CDXML string → CDX bytes via obabel (uses temp files)."""
|
|
260
|
+
with tempfile.NamedTemporaryFile(
|
|
261
|
+
suffix=".cdxml", delete=False, mode="w", encoding="utf-8"
|
|
262
|
+
) as tmp_in:
|
|
263
|
+
tmp_in.write(cdxml_data)
|
|
264
|
+
tmp_in_path = tmp_in.name
|
|
265
|
+
tmp_out_path = tmp_in_path.replace(".cdxml", ".cdx")
|
|
266
|
+
try:
|
|
267
|
+
_obabel_convert_file(tmp_in_path, tmp_out_path)
|
|
268
|
+
with open(tmp_out_path, "rb") as f:
|
|
269
|
+
return f.read()
|
|
270
|
+
finally:
|
|
271
|
+
for p in (tmp_in_path, tmp_out_path):
|
|
272
|
+
if os.path.exists(p):
|
|
273
|
+
os.unlink(p)
|
|
274
|
+
|
|
275
|
+
# ---------------------------------------------------------------------------
|
|
276
|
+
# Backend dispatch
|
|
277
|
+
# ---------------------------------------------------------------------------
|
|
278
|
+
|
|
279
|
+
_FILE_CONVERTERS = {
|
|
280
|
+
"com": _com_convert_file if HAS_COM else None,
|
|
281
|
+
"pycdxml": _pycdxml_convert_file if HAS_PYCDXML else None,
|
|
282
|
+
"obabel": _obabel_convert_file if HAS_OBABEL else None,
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
_CDX_TO_CDXML = {
|
|
286
|
+
"com": _com_cdx_to_cdxml if HAS_COM else None,
|
|
287
|
+
"pycdxml": _pycdxml_cdx_to_cdxml if HAS_PYCDXML else None,
|
|
288
|
+
"obabel": _obabel_cdx_to_cdxml if HAS_OBABEL else None,
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
_CDXML_TO_CDX = {
|
|
292
|
+
"com": _com_cdxml_to_cdx if HAS_COM else None,
|
|
293
|
+
"pycdxml": _pycdxml_cdxml_to_cdx if HAS_PYCDXML else None,
|
|
294
|
+
"obabel": _obabel_cdxml_to_cdx if HAS_OBABEL else None,
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def _pick_backend(method: str, dispatch_table: dict):
|
|
299
|
+
"""Select a backend function. 'auto' tries in priority order."""
|
|
300
|
+
if method == "auto":
|
|
301
|
+
for name in BACKEND_ORDER:
|
|
302
|
+
fn = dispatch_table.get(name)
|
|
303
|
+
if fn is not None:
|
|
304
|
+
return name, fn
|
|
305
|
+
raise RuntimeError(
|
|
306
|
+
"No conversion backend available. "
|
|
307
|
+
"Install ChemDraw (COM), pycdxml, or Open Babel."
|
|
308
|
+
)
|
|
309
|
+
fn = dispatch_table.get(method)
|
|
310
|
+
if fn is None:
|
|
311
|
+
available = {k: v for k, v in dispatch_table.items() if v}
|
|
312
|
+
raise RuntimeError(
|
|
313
|
+
f"Backend '{method}' not available. "
|
|
314
|
+
f"Available: {list(available.keys()) or 'none'}"
|
|
315
|
+
)
|
|
316
|
+
return method, fn
|
|
317
|
+
|
|
318
|
+
# ---------------------------------------------------------------------------
|
|
319
|
+
# Public API
|
|
320
|
+
# ---------------------------------------------------------------------------
|
|
321
|
+
|
|
322
|
+
def convert_cdx_to_cdxml(cdx_data: bytes, method: str = "auto") -> str:
|
|
323
|
+
"""Convert raw CDX bytes to CDXML string."""
|
|
324
|
+
name, fn = _pick_backend(method, _CDX_TO_CDXML)
|
|
325
|
+
return fn(cdx_data)
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def convert_cdxml_to_cdx(cdxml_data: str, method: str = "auto") -> bytes:
|
|
329
|
+
"""Convert CDXML string to raw CDX bytes."""
|
|
330
|
+
name, fn = _pick_backend(method, _CDXML_TO_CDX)
|
|
331
|
+
return fn(cdxml_data)
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def batch_convert_files(
|
|
335
|
+
input_paths: list, method: str = "auto"
|
|
336
|
+
) -> dict:
|
|
337
|
+
"""Convert multiple CDX/CDXML files in a single COM session.
|
|
338
|
+
|
|
339
|
+
Returns dict mapping input_path -> {"output": path, "error": None} on
|
|
340
|
+
success, or {"output": None, "error": message} on failure.
|
|
341
|
+
|
|
342
|
+
For COM backend: one GetActiveObject/Dispatch, loop through all files,
|
|
343
|
+
one conditional Quit. For non-COM backends: falls back to per-file
|
|
344
|
+
convert_file().
|
|
345
|
+
"""
|
|
346
|
+
results = {}
|
|
347
|
+
if not input_paths:
|
|
348
|
+
return results
|
|
349
|
+
|
|
350
|
+
name, _ = _pick_backend(method, _FILE_CONVERTERS)
|
|
351
|
+
|
|
352
|
+
if name == "com":
|
|
353
|
+
app, launched = _get_chemdraw()
|
|
354
|
+
was_visible = app.Visible
|
|
355
|
+
app.Visible = False
|
|
356
|
+
try:
|
|
357
|
+
for inp in input_paths:
|
|
358
|
+
in_ext = os.path.splitext(inp)[1].lower()
|
|
359
|
+
if in_ext == ".cdx":
|
|
360
|
+
out_ext = ".cdxml"
|
|
361
|
+
elif in_ext == ".cdxml":
|
|
362
|
+
out_ext = ".cdx"
|
|
363
|
+
else:
|
|
364
|
+
results[inp] = {
|
|
365
|
+
"output": None,
|
|
366
|
+
"error": f"Unsupported extension: {in_ext}",
|
|
367
|
+
}
|
|
368
|
+
continue
|
|
369
|
+
out = os.path.splitext(inp)[0] + out_ext
|
|
370
|
+
try:
|
|
371
|
+
doc = app.Documents.Open(os.path.abspath(inp))
|
|
372
|
+
doc.SaveAs(os.path.abspath(out))
|
|
373
|
+
doc.Close()
|
|
374
|
+
if out.lower().endswith(".cdxml"):
|
|
375
|
+
sanitise_cdxml_file(out)
|
|
376
|
+
results[inp] = {"output": out, "error": None}
|
|
377
|
+
except Exception as e:
|
|
378
|
+
results[inp] = {"output": None, "error": str(e)}
|
|
379
|
+
finally:
|
|
380
|
+
if launched:
|
|
381
|
+
app.Quit()
|
|
382
|
+
else:
|
|
383
|
+
app.Visible = was_visible
|
|
384
|
+
else:
|
|
385
|
+
# Non-COM: fall back to per-file conversion
|
|
386
|
+
for inp in input_paths:
|
|
387
|
+
try:
|
|
388
|
+
out = convert_file(inp, method=method)
|
|
389
|
+
results[inp] = {"output": out, "error": None}
|
|
390
|
+
except Exception as e:
|
|
391
|
+
results[inp] = {"output": None, "error": str(e)}
|
|
392
|
+
|
|
393
|
+
return results
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
def convert_file(
|
|
397
|
+
input_path: str, output_path: Optional[str] = None, method: str = "auto"
|
|
398
|
+
) -> str:
|
|
399
|
+
"""Convert a file between CDX and CDXML. Returns output path."""
|
|
400
|
+
in_ext = os.path.splitext(input_path)[1].lower()
|
|
401
|
+
if in_ext == ".cdx":
|
|
402
|
+
out_ext = ".cdxml"
|
|
403
|
+
elif in_ext == ".cdxml":
|
|
404
|
+
out_ext = ".cdx"
|
|
405
|
+
else:
|
|
406
|
+
raise ValueError(f"Unsupported file extension: {in_ext}. Use .cdx or .cdxml.")
|
|
407
|
+
|
|
408
|
+
if output_path is None:
|
|
409
|
+
output_path = os.path.splitext(input_path)[0] + out_ext
|
|
410
|
+
|
|
411
|
+
name, fn = _pick_backend(method, _FILE_CONVERTERS)
|
|
412
|
+
fn(input_path, output_path)
|
|
413
|
+
return output_path
|
|
414
|
+
|
|
415
|
+
# ---------------------------------------------------------------------------
|
|
416
|
+
# CLI
|
|
417
|
+
# ---------------------------------------------------------------------------
|
|
418
|
+
|
|
419
|
+
def main(argv=None) -> int:
|
|
420
|
+
parser = argparse.ArgumentParser(
|
|
421
|
+
description="Convert between ChemDraw CDX (binary) and CDXML (XML) formats."
|
|
422
|
+
)
|
|
423
|
+
parser.add_argument(
|
|
424
|
+
"input", nargs="?", help="Input file (.cdx or .cdxml)"
|
|
425
|
+
)
|
|
426
|
+
parser.add_argument(
|
|
427
|
+
"-o", "--output",
|
|
428
|
+
help="Output file (default: same name with swapped extension)"
|
|
429
|
+
)
|
|
430
|
+
parser.add_argument(
|
|
431
|
+
"--method",
|
|
432
|
+
choices=["auto", "com", "pycdxml", "obabel"],
|
|
433
|
+
default="auto",
|
|
434
|
+
help="Conversion backend (default: auto — tries com, pycdxml, obabel)"
|
|
435
|
+
)
|
|
436
|
+
parser.add_argument(
|
|
437
|
+
"--batch",
|
|
438
|
+
nargs="+",
|
|
439
|
+
metavar="FILE",
|
|
440
|
+
help="Batch-convert multiple files in one COM session"
|
|
441
|
+
)
|
|
442
|
+
parser.add_argument(
|
|
443
|
+
"--list-backends",
|
|
444
|
+
action="store_true",
|
|
445
|
+
help="Show available backends and exit"
|
|
446
|
+
)
|
|
447
|
+
parser.add_argument(
|
|
448
|
+
"--json",
|
|
449
|
+
action="store_true",
|
|
450
|
+
help="Output result as JSON to stdout"
|
|
451
|
+
)
|
|
452
|
+
args = parser.parse_args(argv)
|
|
453
|
+
|
|
454
|
+
if args.list_backends:
|
|
455
|
+
print("Available backends:")
|
|
456
|
+
for name in BACKEND_ORDER:
|
|
457
|
+
status = "available" if _FILE_CONVERTERS.get(name) else "not available"
|
|
458
|
+
print(f" {name}: {status}")
|
|
459
|
+
return 0
|
|
460
|
+
|
|
461
|
+
# --batch mode: convert multiple files in one COM session
|
|
462
|
+
if args.batch:
|
|
463
|
+
missing = [f for f in args.batch if not os.path.isfile(f)]
|
|
464
|
+
if missing:
|
|
465
|
+
for f in missing:
|
|
466
|
+
print(f"Error: file not found: {f}", file=sys.stderr)
|
|
467
|
+
return 1
|
|
468
|
+
try:
|
|
469
|
+
results = batch_convert_files(args.batch, args.method)
|
|
470
|
+
backend_name, _ = _pick_backend(args.method, _FILE_CONVERTERS)
|
|
471
|
+
if args.json:
|
|
472
|
+
json_results = []
|
|
473
|
+
for inp, info in results.items():
|
|
474
|
+
entry = {"input": os.path.abspath(inp), "method": backend_name}
|
|
475
|
+
if info["error"]:
|
|
476
|
+
entry["error"] = info["error"]
|
|
477
|
+
else:
|
|
478
|
+
entry["output"] = os.path.abspath(info["output"])
|
|
479
|
+
json_results.append(entry)
|
|
480
|
+
print(json.dumps(json_results, indent=2))
|
|
481
|
+
else:
|
|
482
|
+
ok = sum(1 for v in results.values() if v["error"] is None)
|
|
483
|
+
fail = len(results) - ok
|
|
484
|
+
for inp, info in results.items():
|
|
485
|
+
if info["error"]:
|
|
486
|
+
print(f" FAIL: {inp} — {info['error']}")
|
|
487
|
+
else:
|
|
488
|
+
size = os.path.getsize(info["output"])
|
|
489
|
+
print(f" OK: {inp} -> {info['output']} ({size:,} bytes)")
|
|
490
|
+
print(f"Batch: {ok} converted, {fail} failed [backend: {backend_name}]")
|
|
491
|
+
return 1 if any(v["error"] for v in results.values()) else 0
|
|
492
|
+
except Exception as e:
|
|
493
|
+
print(f"Error: {e}", file=sys.stderr)
|
|
494
|
+
return 1
|
|
495
|
+
|
|
496
|
+
if not args.input:
|
|
497
|
+
parser.error("the following arguments are required: input (or --batch)")
|
|
498
|
+
|
|
499
|
+
if not os.path.isfile(args.input):
|
|
500
|
+
print(f"Error: file not found: {args.input}", file=sys.stderr)
|
|
501
|
+
return 1
|
|
502
|
+
|
|
503
|
+
try:
|
|
504
|
+
out = convert_file(args.input, args.output, args.method)
|
|
505
|
+
backend_name, _ = _pick_backend(args.method, _FILE_CONVERTERS)
|
|
506
|
+
if args.json:
|
|
507
|
+
in_ext = os.path.splitext(args.input)[1].lower().lstrip(".")
|
|
508
|
+
out_ext = os.path.splitext(out)[1].lower().lstrip(".")
|
|
509
|
+
result = {
|
|
510
|
+
"input": os.path.abspath(args.input),
|
|
511
|
+
"output": os.path.abspath(out),
|
|
512
|
+
"input_format": in_ext,
|
|
513
|
+
"output_format": out_ext,
|
|
514
|
+
"method": backend_name,
|
|
515
|
+
}
|
|
516
|
+
print(json.dumps(result, indent=2))
|
|
517
|
+
else:
|
|
518
|
+
size = os.path.getsize(out)
|
|
519
|
+
print(f"Converted: {args.input} -> {out} ({size:,} bytes) [backend: {backend_name}]")
|
|
520
|
+
return 0
|
|
521
|
+
except Exception as e:
|
|
522
|
+
print(f"Error: {e}", file=sys.stderr)
|
|
523
|
+
return 1
|
|
524
|
+
|
|
525
|
+
|
|
526
|
+
if __name__ == "__main__":
|
|
527
|
+
sys.exit(main())
|