cdxml-toolkit 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cdxml_toolkit/__init__.py +18 -0
- cdxml_toolkit/_jre/__init__.py +2 -0
- cdxml_toolkit/_jre/temurin-21-jre-win-x64.zip +0 -0
- cdxml_toolkit/analysis/__init__.py +35 -0
- cdxml_toolkit/analysis/deterministic/__init__.py +12 -0
- cdxml_toolkit/analysis/deterministic/discover_experiment_files.py +413 -0
- cdxml_toolkit/analysis/deterministic/lab_book_formatter.py +701 -0
- cdxml_toolkit/analysis/deterministic/lcms_file_categorizer.py +928 -0
- cdxml_toolkit/analysis/deterministic/lcms_identifier.py +598 -0
- cdxml_toolkit/analysis/deterministic/mass_resolver.py +654 -0
- cdxml_toolkit/analysis/deterministic/multi_lcms_analyzer.py +1412 -0
- cdxml_toolkit/analysis/deterministic/procedure_writer.py +446 -0
- cdxml_toolkit/analysis/extract_nmr.py +47 -0
- cdxml_toolkit/analysis/format_procedure_entry.py +479 -0
- cdxml_toolkit/analysis/lcms_analyzer.py +1299 -0
- cdxml_toolkit/analysis/parse_analysis_file.py +134 -0
- cdxml_toolkit/cdxml_builder.py +920 -0
- cdxml_toolkit/cdxml_utils.py +342 -0
- cdxml_toolkit/chemdraw/__init__.py +5 -0
- cdxml_toolkit/chemdraw/_chemscript_server.py +562 -0
- cdxml_toolkit/chemdraw/cdx_converter.py +527 -0
- cdxml_toolkit/chemdraw/cdxml_to_image.py +262 -0
- cdxml_toolkit/chemdraw/cdxml_to_image_rdkit.py +296 -0
- cdxml_toolkit/chemdraw/chemscript_bridge.py +901 -0
- cdxml_toolkit/constants.py +304 -0
- cdxml_toolkit/coord_normalizer.py +438 -0
- cdxml_toolkit/deterministic_pipeline/__init__.py +6 -0
- cdxml_toolkit/deterministic_pipeline/legacy/__init__.py +5 -0
- cdxml_toolkit/deterministic_pipeline/legacy/eln_cdx_cleanup.py +509 -0
- cdxml_toolkit/deterministic_pipeline/legacy/eln_enrichment.py +1394 -0
- cdxml_toolkit/deterministic_pipeline/legacy/scheme_aligner.py +428 -0
- cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher.py +1337 -0
- cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher_v2.py +1340 -0
- cdxml_toolkit/deterministic_pipeline/scheme_reader_audit.py +931 -0
- cdxml_toolkit/deterministic_pipeline/scheme_reader_verify.py +1160 -0
- cdxml_toolkit/image/__init__.py +15 -0
- cdxml_toolkit/image/reaction_from_image.py +2103 -0
- cdxml_toolkit/image/structure_from_image.py +1711 -0
- cdxml_toolkit/layout/__init__.py +5 -0
- cdxml_toolkit/layout/alignment.py +1642 -0
- cdxml_toolkit/layout/reaction_cleanup.py +1002 -0
- cdxml_toolkit/layout/scheme_merger.py +2260 -0
- cdxml_toolkit/mcp_server/__init__.py +0 -0
- cdxml_toolkit/mcp_server/__main__.py +5 -0
- cdxml_toolkit/mcp_server/server.py +1567 -0
- cdxml_toolkit/naming/__init__.py +6 -0
- cdxml_toolkit/naming/aligned_namer.py +2342 -0
- cdxml_toolkit/naming/mol_builder.py +3722 -0
- cdxml_toolkit/naming/name_decomposer.py +2843 -0
- cdxml_toolkit/naming/reactions_datamol.json +2414 -0
- cdxml_toolkit/office/__init__.py +5 -0
- cdxml_toolkit/office/doc_from_template.py +722 -0
- cdxml_toolkit/office/ole_embedder.py +808 -0
- cdxml_toolkit/office/ole_extractor.py +272 -0
- cdxml_toolkit/perception/__init__.py +10 -0
- cdxml_toolkit/perception/compound_search.py +229 -0
- cdxml_toolkit/perception/eln_csv_parser.py +240 -0
- cdxml_toolkit/perception/rdf_parser.py +664 -0
- cdxml_toolkit/perception/reactant_heuristic.py +1045 -0
- cdxml_toolkit/perception/reaction_parser.py +2150 -0
- cdxml_toolkit/perception/scheme_reader.py +2948 -0
- cdxml_toolkit/perception/scheme_refine.py +1404 -0
- cdxml_toolkit/perception/scheme_segmenter.py +619 -0
- cdxml_toolkit/perception/spatial_assignment.py +1013 -0
- cdxml_toolkit/rdkit_utils.py +605 -0
- cdxml_toolkit/render/__init__.py +17 -0
- cdxml_toolkit/render/auto_layout.py +229 -0
- cdxml_toolkit/render/compact_parser.py +632 -0
- cdxml_toolkit/render/parser.py +706 -0
- cdxml_toolkit/render/render_scheme.py +267 -0
- cdxml_toolkit/render/renderer.py +2387 -0
- cdxml_toolkit/render/schema.py +90 -0
- cdxml_toolkit/render/scheme_maker.py +1043 -0
- cdxml_toolkit/render/scheme_yaml_writer.py +1487 -0
- cdxml_toolkit/resolve/__init__.py +13 -0
- cdxml_toolkit/resolve/cas_resolver.py +430 -0
- cdxml_toolkit/resolve/chemscanner_abbreviations.json +28813 -0
- cdxml_toolkit/resolve/condensed_formula.py +493 -0
- cdxml_toolkit/resolve/jre_manager.py +195 -0
- cdxml_toolkit/resolve/reagent_abbreviations.json +1046 -0
- cdxml_toolkit/resolve/reagent_db.py +285 -0
- cdxml_toolkit/resolve/superatom_data.json +2856 -0
- cdxml_toolkit/resolve/superatom_table.py +146 -0
- cdxml_toolkit/text_formatting.py +298 -0
- cdxml_toolkit-0.5.0.dist-info/METADATA +318 -0
- cdxml_toolkit-0.5.0.dist-info/RECORD +91 -0
- cdxml_toolkit-0.5.0.dist-info/WHEEL +5 -0
- cdxml_toolkit-0.5.0.dist-info/entry_points.txt +17 -0
- cdxml_toolkit-0.5.0.dist-info/licenses/LICENSE +21 -0
- cdxml_toolkit-0.5.0.dist-info/licenses/NOTICE.md +37 -0
- cdxml_toolkit-0.5.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,509 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
ELN CDX Reaction Cleanup Tool
|
|
4
|
+
|
|
5
|
+
Cleans up reaction schemes exported from Findmolecule ELN (.cdx files):
|
|
6
|
+
- Converts CDX to CDXML
|
|
7
|
+
- Scales coordinates to match ACS Document 1996 style
|
|
8
|
+
- Applies ACS Document 1996 document settings
|
|
9
|
+
- Cleans up individual structures (bond lengths, angles)
|
|
10
|
+
- Sets all text labels to Arial 10pt Bold
|
|
11
|
+
- Cleans up reaction layout (arrow alignment, reagent/condition placement)
|
|
12
|
+
|
|
13
|
+
Uses a two-pass ChemDraw COM approach:
|
|
14
|
+
Pass 1: Convert CDX -> CDXML, scale coordinates, apply style, clean structures, fix fonts
|
|
15
|
+
Pass 2: Reopen and clean reaction layout (requires fresh document load)
|
|
16
|
+
|
|
17
|
+
ChemDraw must be CLOSED before running this tool.
|
|
18
|
+
ChemDraw is launched minimized, restored to normal before quitting
|
|
19
|
+
(so toolbar state is preserved), and closed automatically when done.
|
|
20
|
+
|
|
21
|
+
Usage:
|
|
22
|
+
python eln_cdx_cleanup.py input.cdx [-o output.cdxml] [--scale 0.5]
|
|
23
|
+
python eln_cdx_cleanup.py input1.cdx input2.cdx input3.cdx
|
|
24
|
+
python eln_cdx_cleanup.py *.cdx --output-dir cleaned/
|
|
25
|
+
|
|
26
|
+
Python API:
|
|
27
|
+
from .eln_cdx_cleanup import cleanup_eln_cdx
|
|
28
|
+
cleanup_eln_cdx("KL-CC-001.cdx", "KL-CC-001-cleaned.cdxml", scale_factor=0.5)
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
import argparse
|
|
32
|
+
import json
|
|
33
|
+
import os
|
|
34
|
+
import re
|
|
35
|
+
import sys
|
|
36
|
+
import time
|
|
37
|
+
import tempfile
|
|
38
|
+
import xml.etree.ElementTree as ET
|
|
39
|
+
|
|
40
|
+
# ---------------------------------------------------------------------------
|
|
41
|
+
# XML-based coordinate scaling
|
|
42
|
+
# ---------------------------------------------------------------------------
|
|
43
|
+
|
|
44
|
+
def _parse_point(s):
|
|
45
|
+
"""Parse space-separated coordinate string into list of floats."""
|
|
46
|
+
return [float(v) for v in s.split()]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _format_point(vals):
|
|
50
|
+
"""Format list of floats to space-separated string (2 decimal places)."""
|
|
51
|
+
return ' '.join('{:.2f}'.format(v) for v in vals)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _scale_point(x, y, cx, cy, factor):
|
|
55
|
+
"""Scale point (x,y) toward centroid (cx,cy) by factor."""
|
|
56
|
+
return cx + (x - cx) * factor, cy + (y - cy) * factor
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _sanitize_cdxml(filepath):
|
|
60
|
+
"""
|
|
61
|
+
Sanitize a CDXML file by removing invalid XML characters.
|
|
62
|
+
|
|
63
|
+
ChemDraw COM exports may include binary data in objecttag Value
|
|
64
|
+
attributes (e.g. Findmolecule ELN metadata). These contain bytes
|
|
65
|
+
that are not valid in XML 1.0 and cause parsing failures.
|
|
66
|
+
|
|
67
|
+
Replaces invalid characters with empty string in-place.
|
|
68
|
+
"""
|
|
69
|
+
with open(filepath, 'rb') as f:
|
|
70
|
+
raw = f.read()
|
|
71
|
+
|
|
72
|
+
# XML 1.0 valid characters:
|
|
73
|
+
# #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
|
|
74
|
+
# Decode as UTF-8 (with replacement for truly broken bytes),
|
|
75
|
+
# then strip invalid XML chars.
|
|
76
|
+
text = raw.decode('utf-8', errors='replace')
|
|
77
|
+
# Remove control chars except tab, newline, carriage return
|
|
78
|
+
cleaned = re.sub(r'[^\x09\x0A\x0D\x20-\uD7FF\uE000-\uFFFD]', '', text)
|
|
79
|
+
|
|
80
|
+
with open(filepath, 'w', encoding='utf-8') as f:
|
|
81
|
+
f.write(cleaned)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def scale_cdxml_coordinates(input_path, output_path, factor=0.5):
|
|
85
|
+
"""
|
|
86
|
+
Scale all coordinates in a CDXML file by the given factor,
|
|
87
|
+
centered on the centroid of all node/text positions.
|
|
88
|
+
|
|
89
|
+
This shrinks structures while preserving text sizes, preparing
|
|
90
|
+
them for ChemDraw's Clean Up Structure to normalize to the
|
|
91
|
+
target bond length.
|
|
92
|
+
"""
|
|
93
|
+
tree = ET.parse(input_path)
|
|
94
|
+
root = tree.getroot()
|
|
95
|
+
|
|
96
|
+
# Collect centroid from node and text positions
|
|
97
|
+
positions = []
|
|
98
|
+
for elem in root.iter():
|
|
99
|
+
if elem.tag in ('n', 't') and 'p' in elem.attrib:
|
|
100
|
+
pt = _parse_point(elem.attrib['p'])
|
|
101
|
+
positions.append((pt[0], pt[1]))
|
|
102
|
+
|
|
103
|
+
if not positions:
|
|
104
|
+
# Nothing to scale — just copy
|
|
105
|
+
tree.write(output_path, xml_declaration=True, encoding='UTF-8')
|
|
106
|
+
return
|
|
107
|
+
|
|
108
|
+
cx = sum(p[0] for p in positions) / len(positions)
|
|
109
|
+
cy = sum(p[1] for p in positions) / len(positions)
|
|
110
|
+
|
|
111
|
+
# Scale all coordinate attributes
|
|
112
|
+
for elem in root.iter():
|
|
113
|
+
# p="x y" — node and text positions
|
|
114
|
+
if 'p' in elem.attrib:
|
|
115
|
+
pt = _parse_point(elem.attrib['p'])
|
|
116
|
+
nx, ny = _scale_point(pt[0], pt[1], cx, cy, factor)
|
|
117
|
+
elem.attrib['p'] = _format_point([nx, ny])
|
|
118
|
+
|
|
119
|
+
# BoundingBox="x1 y1 x2 y2"
|
|
120
|
+
if 'BoundingBox' in elem.attrib:
|
|
121
|
+
pt = _parse_point(elem.attrib['BoundingBox'])
|
|
122
|
+
if len(pt) >= 4:
|
|
123
|
+
nx1, ny1 = _scale_point(pt[0], pt[1], cx, cy, factor)
|
|
124
|
+
nx2, ny2 = _scale_point(pt[2], pt[3], cx, cy, factor)
|
|
125
|
+
elem.attrib['BoundingBox'] = _format_point([nx1, ny1, nx2, ny2])
|
|
126
|
+
|
|
127
|
+
# 3D points on arrows: Head3D, Tail3D, Center3D, etc.
|
|
128
|
+
for attr in ['Head3D', 'Tail3D', 'Center3D',
|
|
129
|
+
'MajorAxisEnd3D', 'MinorAxisEnd3D']:
|
|
130
|
+
if attr in elem.attrib:
|
|
131
|
+
pt = _parse_point(elem.attrib[attr])
|
|
132
|
+
nx, ny = _scale_point(pt[0], pt[1], cx, cy, factor)
|
|
133
|
+
if len(pt) >= 3:
|
|
134
|
+
elem.attrib[attr] = _format_point([nx, ny, pt[2]])
|
|
135
|
+
else:
|
|
136
|
+
elem.attrib[attr] = _format_point([nx, ny])
|
|
137
|
+
|
|
138
|
+
tree.write(output_path, xml_declaration=True, encoding='UTF-8')
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
# ---------------------------------------------------------------------------
|
|
142
|
+
# ChemDraw COM helpers
|
|
143
|
+
# ---------------------------------------------------------------------------
|
|
144
|
+
|
|
145
|
+
def _find_chemdraw_windows():
|
|
146
|
+
"""Find all ChemDraw window handles."""
|
|
147
|
+
import win32gui
|
|
148
|
+
|
|
149
|
+
def callback(hwnd, results):
|
|
150
|
+
try:
|
|
151
|
+
title = win32gui.GetWindowText(hwnd)
|
|
152
|
+
if 'ChemDraw' in title:
|
|
153
|
+
results.append(hwnd)
|
|
154
|
+
except:
|
|
155
|
+
pass
|
|
156
|
+
results = []
|
|
157
|
+
win32gui.EnumWindows(callback, results)
|
|
158
|
+
return results
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def _minimize_chemdraw():
|
|
162
|
+
"""Minimize all ChemDraw windows to avoid disrupting the user."""
|
|
163
|
+
import win32gui
|
|
164
|
+
import win32con
|
|
165
|
+
hwnds = _find_chemdraw_windows()
|
|
166
|
+
for hwnd in hwnds:
|
|
167
|
+
win32gui.ShowWindow(hwnd, win32con.SW_MINIMIZE)
|
|
168
|
+
return hwnds
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def _restore_chemdraw_window():
|
|
172
|
+
"""
|
|
173
|
+
Restore (un-minimize) ChemDraw windows before quitting.
|
|
174
|
+
|
|
175
|
+
ChemDraw saves toolbar/window state to the registry on Quit().
|
|
176
|
+
If we quit while minimized, it saves a 'no toolbars' state.
|
|
177
|
+
Restoring the window first ensures proper state is saved.
|
|
178
|
+
"""
|
|
179
|
+
import win32gui
|
|
180
|
+
import win32con
|
|
181
|
+
hwnds = _find_chemdraw_windows()
|
|
182
|
+
for hwnd in hwnds:
|
|
183
|
+
win32gui.ShowWindow(hwnd, win32con.SW_RESTORE)
|
|
184
|
+
time.sleep(0.5)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def _get_chemdraw():
|
|
188
|
+
"""
|
|
189
|
+
Get or launch ChemDraw COM instance.
|
|
190
|
+
Returns (cdApp, launched_new).
|
|
191
|
+
If an existing instance is found, it is reused.
|
|
192
|
+
"""
|
|
193
|
+
import win32com.client
|
|
194
|
+
try:
|
|
195
|
+
cdApp = win32com.client.GetActiveObject('ChemDraw.Application')
|
|
196
|
+
return cdApp, False
|
|
197
|
+
except:
|
|
198
|
+
cdApp = win32com.client.Dispatch('ChemDraw.Application')
|
|
199
|
+
return cdApp, True
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def _chemdraw_open(cdApp, filepath):
|
|
203
|
+
"""Open a file in ChemDraw (minimized), activate the document."""
|
|
204
|
+
cdApp.Visible = True
|
|
205
|
+
time.sleep(1)
|
|
206
|
+
_minimize_chemdraw()
|
|
207
|
+
doc = cdApp.Documents.Open(filepath)
|
|
208
|
+
time.sleep(1)
|
|
209
|
+
_minimize_chemdraw()
|
|
210
|
+
doc.Activate()
|
|
211
|
+
time.sleep(0.5)
|
|
212
|
+
return doc
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
# ---------------------------------------------------------------------------
|
|
216
|
+
# CDX to CDXML conversion via COM
|
|
217
|
+
# ---------------------------------------------------------------------------
|
|
218
|
+
|
|
219
|
+
def _cdx_to_cdxml_com(cdx_path, cdxml_path):
|
|
220
|
+
"""Convert CDX to CDXML using ChemDraw COM."""
|
|
221
|
+
import win32com.client
|
|
222
|
+
cdApp, launched = _get_chemdraw()
|
|
223
|
+
doc = _chemdraw_open(cdApp, cdx_path)
|
|
224
|
+
doc.SaveAs(cdxml_path)
|
|
225
|
+
time.sleep(0.5)
|
|
226
|
+
doc.Close(False)
|
|
227
|
+
if launched:
|
|
228
|
+
_restore_chemdraw_window()
|
|
229
|
+
cdApp.Quit()
|
|
230
|
+
return cdxml_path
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
# ---------------------------------------------------------------------------
|
|
234
|
+
# Main cleanup workflow
|
|
235
|
+
# ---------------------------------------------------------------------------
|
|
236
|
+
|
|
237
|
+
# Default ACS Document 1996 style sheet path
|
|
238
|
+
ACS_STYLE_PATH = os.path.join(
|
|
239
|
+
r'C:\ProgramData\PerkinElmerInformatics\ChemOffice2016',
|
|
240
|
+
r'ChemDraw\ChemDraw Items\ACS Document 1996.cds'
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def cleanup_eln_cdx(input_path, output_path=None, scale_factor=0.5,
|
|
245
|
+
style_path=None):
|
|
246
|
+
"""
|
|
247
|
+
Clean up a reaction scheme exported from Findmolecule ELN.
|
|
248
|
+
|
|
249
|
+
Parameters
|
|
250
|
+
----------
|
|
251
|
+
input_path : str
|
|
252
|
+
Path to input .cdx or .cdxml file.
|
|
253
|
+
output_path : str, optional
|
|
254
|
+
Path for output .cdxml file. Defaults to input stem + '-cleaned.cdxml'.
|
|
255
|
+
scale_factor : float, optional
|
|
256
|
+
Factor to scale coordinates before cleanup (default 0.5).
|
|
257
|
+
Set to 1.0 to skip scaling.
|
|
258
|
+
style_path : str, optional
|
|
259
|
+
Path to .cds style sheet (default: ACS Document 1996).
|
|
260
|
+
|
|
261
|
+
Returns
|
|
262
|
+
-------
|
|
263
|
+
str
|
|
264
|
+
Path to the cleaned output file.
|
|
265
|
+
"""
|
|
266
|
+
import win32com.client
|
|
267
|
+
|
|
268
|
+
if style_path is None:
|
|
269
|
+
style_path = ACS_STYLE_PATH
|
|
270
|
+
|
|
271
|
+
if not os.path.exists(style_path):
|
|
272
|
+
print("WARNING: Style sheet not found: {}".format(style_path))
|
|
273
|
+
print(" Skipping style application.")
|
|
274
|
+
style_path = None
|
|
275
|
+
|
|
276
|
+
input_path = os.path.abspath(input_path)
|
|
277
|
+
input_ext = os.path.splitext(input_path)[1].lower()
|
|
278
|
+
input_stem = os.path.splitext(input_path)[0]
|
|
279
|
+
|
|
280
|
+
if output_path is None:
|
|
281
|
+
output_path = input_stem + '-cleaned.cdxml'
|
|
282
|
+
output_path = os.path.abspath(output_path)
|
|
283
|
+
|
|
284
|
+
# Create temp directory for intermediate files
|
|
285
|
+
tmpdir = tempfile.mkdtemp(prefix='eln_cleanup_')
|
|
286
|
+
|
|
287
|
+
try:
|
|
288
|
+
# --- Step 0: Convert CDX to CDXML if needed ---
|
|
289
|
+
if input_ext == '.cdx':
|
|
290
|
+
cdxml_path = os.path.join(tmpdir, 'converted.cdxml')
|
|
291
|
+
print(" Converting CDX to CDXML...")
|
|
292
|
+
_cdx_to_cdxml_com(input_path, cdxml_path)
|
|
293
|
+
# Sanitize: remove invalid XML chars from ELN metadata
|
|
294
|
+
_sanitize_cdxml(cdxml_path)
|
|
295
|
+
elif input_ext == '.cdxml':
|
|
296
|
+
cdxml_path = input_path
|
|
297
|
+
else:
|
|
298
|
+
raise ValueError("Unsupported file format: {}".format(input_ext))
|
|
299
|
+
|
|
300
|
+
# --- Step 1: Scale coordinates in XML ---
|
|
301
|
+
scaled_path = os.path.join(tmpdir, 'scaled.cdxml')
|
|
302
|
+
if scale_factor != 1.0:
|
|
303
|
+
print(" Scaling coordinates by {}...".format(scale_factor))
|
|
304
|
+
scale_cdxml_coordinates(cdxml_path, scaled_path, factor=scale_factor)
|
|
305
|
+
else:
|
|
306
|
+
scaled_path = cdxml_path
|
|
307
|
+
|
|
308
|
+
# --- Pass 1: Apply style + Clean Structure + Change fonts ---
|
|
309
|
+
print(" Pass 1: Style + Clean Structure + Fonts...")
|
|
310
|
+
cdApp, launched = _get_chemdraw()
|
|
311
|
+
doc = _chemdraw_open(cdApp, scaled_path)
|
|
312
|
+
|
|
313
|
+
# Apply style
|
|
314
|
+
if style_path:
|
|
315
|
+
doc.Settings.ApplySettings(style_path, style_path)
|
|
316
|
+
time.sleep(0.5)
|
|
317
|
+
|
|
318
|
+
# Clean Up Structure
|
|
319
|
+
doc.Objects.Select()
|
|
320
|
+
time.sleep(0.5)
|
|
321
|
+
cdApp.MenuBars(1).Menus(5).MenuItems(6).Execute()
|
|
322
|
+
time.sleep(1)
|
|
323
|
+
|
|
324
|
+
# Change all caption text to Arial 10pt Bold
|
|
325
|
+
captions = doc.Objects.Captions
|
|
326
|
+
for i in range(1, captions.Count + 1):
|
|
327
|
+
cap = captions.Item(i)
|
|
328
|
+
cap.Family = "Arial"
|
|
329
|
+
cap.Size = 10.0
|
|
330
|
+
cap.Face = 96 # Bold
|
|
331
|
+
|
|
332
|
+
# Also set document-level label defaults
|
|
333
|
+
doc.Settings.LabelFont = "Arial"
|
|
334
|
+
doc.Settings.LabelSize = 10.0
|
|
335
|
+
doc.Settings.LabelFace = 96
|
|
336
|
+
|
|
337
|
+
# Save pass 1 result
|
|
338
|
+
pass1_path = os.path.join(tmpdir, 'pass1.cdxml')
|
|
339
|
+
doc.SaveAs(pass1_path)
|
|
340
|
+
time.sleep(0.5)
|
|
341
|
+
doc.Close(False)
|
|
342
|
+
|
|
343
|
+
# Close ChemDraw between passes if we launched it
|
|
344
|
+
if launched:
|
|
345
|
+
_restore_chemdraw_window()
|
|
346
|
+
cdApp.Quit()
|
|
347
|
+
time.sleep(1)
|
|
348
|
+
|
|
349
|
+
# --- Pass 2: Reopen fresh + Clean Up Reaction ---
|
|
350
|
+
print(" Pass 2: Clean Up Reaction...")
|
|
351
|
+
cdApp, launched = _get_chemdraw()
|
|
352
|
+
doc = _chemdraw_open(cdApp, pass1_path)
|
|
353
|
+
|
|
354
|
+
doc.Objects.Select()
|
|
355
|
+
time.sleep(1)
|
|
356
|
+
cdApp.MenuBars(1).Menus(5).MenuItems(7).Execute()
|
|
357
|
+
time.sleep(1)
|
|
358
|
+
|
|
359
|
+
# Save final output
|
|
360
|
+
doc.SaveAs(output_path)
|
|
361
|
+
time.sleep(0.5)
|
|
362
|
+
doc.Close(False)
|
|
363
|
+
|
|
364
|
+
if launched:
|
|
365
|
+
_restore_chemdraw_window()
|
|
366
|
+
cdApp.Quit()
|
|
367
|
+
|
|
368
|
+
finally:
|
|
369
|
+
# Cleanup temp files
|
|
370
|
+
import shutil
|
|
371
|
+
try:
|
|
372
|
+
shutil.rmtree(tmpdir)
|
|
373
|
+
except:
|
|
374
|
+
pass
|
|
375
|
+
|
|
376
|
+
return output_path
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def cleanup_multiple(input_paths, output_dir=None, scale_factor=0.5,
|
|
380
|
+
style_path=None):
|
|
381
|
+
"""
|
|
382
|
+
Clean up multiple CDX/CDXML files.
|
|
383
|
+
|
|
384
|
+
Parameters
|
|
385
|
+
----------
|
|
386
|
+
input_paths : list of str
|
|
387
|
+
Paths to input files.
|
|
388
|
+
output_dir : str, optional
|
|
389
|
+
Directory for output files. Defaults to same directory as each input.
|
|
390
|
+
scale_factor : float
|
|
391
|
+
Coordinate scale factor (default 0.5).
|
|
392
|
+
style_path : str, optional
|
|
393
|
+
Path to .cds style sheet.
|
|
394
|
+
|
|
395
|
+
Returns
|
|
396
|
+
-------
|
|
397
|
+
list of str
|
|
398
|
+
Paths to cleaned output files.
|
|
399
|
+
"""
|
|
400
|
+
results = []
|
|
401
|
+
for path in input_paths:
|
|
402
|
+
name = os.path.splitext(os.path.basename(path))[0]
|
|
403
|
+
if output_dir:
|
|
404
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
405
|
+
out = os.path.join(output_dir, name + '-cleaned.cdxml')
|
|
406
|
+
else:
|
|
407
|
+
out = os.path.join(os.path.dirname(path), name + '-cleaned.cdxml')
|
|
408
|
+
|
|
409
|
+
print("Processing: {}".format(os.path.basename(path)))
|
|
410
|
+
try:
|
|
411
|
+
result = cleanup_eln_cdx(path, out, scale_factor=scale_factor,
|
|
412
|
+
style_path=style_path)
|
|
413
|
+
print(" -> {}\n".format(result))
|
|
414
|
+
results.append(result)
|
|
415
|
+
except Exception as e:
|
|
416
|
+
print(" ERROR: {}\n".format(e))
|
|
417
|
+
results.append(None)
|
|
418
|
+
|
|
419
|
+
return results
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
# ---------------------------------------------------------------------------
|
|
423
|
+
# CLI
|
|
424
|
+
# ---------------------------------------------------------------------------
|
|
425
|
+
|
|
426
|
+
def main(argv=None) -> int:
|
|
427
|
+
parser = argparse.ArgumentParser(
|
|
428
|
+
description='Clean up ELN-exported CDX reaction schemes to ACS 1996 style.',
|
|
429
|
+
epilog='Examples:\n'
|
|
430
|
+
' python eln_cdx_cleanup.py KL-CC-001.cdx\n'
|
|
431
|
+
' python eln_cdx_cleanup.py *.cdx --output-dir cleaned/\n'
|
|
432
|
+
' python eln_cdx_cleanup.py input.cdx -o output.cdxml --scale 0.5\n',
|
|
433
|
+
formatter_class=argparse.RawDescriptionHelpFormatter
|
|
434
|
+
)
|
|
435
|
+
parser.add_argument('input', nargs='+',
|
|
436
|
+
help='Input .cdx or .cdxml file(s)')
|
|
437
|
+
parser.add_argument('-o', '--output',
|
|
438
|
+
help='Output file path (single file mode only)')
|
|
439
|
+
parser.add_argument('--output-dir',
|
|
440
|
+
help='Output directory (batch mode)')
|
|
441
|
+
parser.add_argument('--scale', type=float, default=0.5,
|
|
442
|
+
help='Coordinate scale factor (default: 0.5)')
|
|
443
|
+
parser.add_argument('--style',
|
|
444
|
+
help='Path to .cds style sheet '
|
|
445
|
+
'(default: ACS Document 1996)')
|
|
446
|
+
parser.add_argument('--json', action='store_true',
|
|
447
|
+
help='Output result as JSON to stdout')
|
|
448
|
+
|
|
449
|
+
args = parser.parse_args(argv)
|
|
450
|
+
|
|
451
|
+
# When --json, redirect status prints to stderr and capture warnings
|
|
452
|
+
if args.json:
|
|
453
|
+
import io
|
|
454
|
+
_real_stdout = sys.stdout
|
|
455
|
+
_capture = io.StringIO()
|
|
456
|
+
sys.stdout = _capture
|
|
457
|
+
|
|
458
|
+
try:
|
|
459
|
+
if len(args.input) == 1 and args.output:
|
|
460
|
+
# Single file mode
|
|
461
|
+
result_path = cleanup_eln_cdx(args.input[0], args.output,
|
|
462
|
+
scale_factor=args.scale, style_path=args.style)
|
|
463
|
+
if args.json:
|
|
464
|
+
sys.stdout = _real_stdout
|
|
465
|
+
captured = _capture.getvalue()
|
|
466
|
+
warnings = [l.strip() for l in captured.splitlines()
|
|
467
|
+
if 'WARNING' in l.upper()]
|
|
468
|
+
# Dump captured status to stderr
|
|
469
|
+
if captured.strip():
|
|
470
|
+
print(captured, file=sys.stderr, end='')
|
|
471
|
+
result = {
|
|
472
|
+
"input": os.path.abspath(args.input[0]),
|
|
473
|
+
"output": os.path.abspath(result_path),
|
|
474
|
+
"warnings": warnings,
|
|
475
|
+
}
|
|
476
|
+
print(json.dumps(result, indent=2))
|
|
477
|
+
elif args.json:
|
|
478
|
+
# Batch mode with --json
|
|
479
|
+
results = cleanup_multiple(args.input, output_dir=args.output_dir,
|
|
480
|
+
scale_factor=args.scale,
|
|
481
|
+
style_path=args.style)
|
|
482
|
+
sys.stdout = _real_stdout
|
|
483
|
+
captured = _capture.getvalue()
|
|
484
|
+
warnings = [l.strip() for l in captured.splitlines()
|
|
485
|
+
if 'WARNING' in l.upper()]
|
|
486
|
+
if captured.strip():
|
|
487
|
+
print(captured, file=sys.stderr, end='')
|
|
488
|
+
json_results = []
|
|
489
|
+
for inp, out in zip(args.input, results):
|
|
490
|
+
json_results.append({
|
|
491
|
+
"input": os.path.abspath(inp),
|
|
492
|
+
"output": os.path.abspath(out) if out else None,
|
|
493
|
+
"warnings": warnings,
|
|
494
|
+
})
|
|
495
|
+
print(json.dumps(json_results, indent=2))
|
|
496
|
+
else:
|
|
497
|
+
# Batch mode
|
|
498
|
+
cleanup_multiple(args.input, output_dir=args.output_dir,
|
|
499
|
+
scale_factor=args.scale, style_path=args.style)
|
|
500
|
+
except Exception:
|
|
501
|
+
if args.json:
|
|
502
|
+
sys.stdout = _real_stdout
|
|
503
|
+
raise
|
|
504
|
+
|
|
505
|
+
return 0
|
|
506
|
+
|
|
507
|
+
|
|
508
|
+
if __name__ == '__main__':
|
|
509
|
+
sys.exit(main())
|