cdxml-toolkit 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. cdxml_toolkit/__init__.py +18 -0
  2. cdxml_toolkit/_jre/__init__.py +2 -0
  3. cdxml_toolkit/_jre/temurin-21-jre-win-x64.zip +0 -0
  4. cdxml_toolkit/analysis/__init__.py +35 -0
  5. cdxml_toolkit/analysis/deterministic/__init__.py +12 -0
  6. cdxml_toolkit/analysis/deterministic/discover_experiment_files.py +413 -0
  7. cdxml_toolkit/analysis/deterministic/lab_book_formatter.py +701 -0
  8. cdxml_toolkit/analysis/deterministic/lcms_file_categorizer.py +928 -0
  9. cdxml_toolkit/analysis/deterministic/lcms_identifier.py +598 -0
  10. cdxml_toolkit/analysis/deterministic/mass_resolver.py +654 -0
  11. cdxml_toolkit/analysis/deterministic/multi_lcms_analyzer.py +1412 -0
  12. cdxml_toolkit/analysis/deterministic/procedure_writer.py +446 -0
  13. cdxml_toolkit/analysis/extract_nmr.py +47 -0
  14. cdxml_toolkit/analysis/format_procedure_entry.py +479 -0
  15. cdxml_toolkit/analysis/lcms_analyzer.py +1299 -0
  16. cdxml_toolkit/analysis/parse_analysis_file.py +134 -0
  17. cdxml_toolkit/cdxml_builder.py +920 -0
  18. cdxml_toolkit/cdxml_utils.py +342 -0
  19. cdxml_toolkit/chemdraw/__init__.py +5 -0
  20. cdxml_toolkit/chemdraw/_chemscript_server.py +562 -0
  21. cdxml_toolkit/chemdraw/cdx_converter.py +527 -0
  22. cdxml_toolkit/chemdraw/cdxml_to_image.py +262 -0
  23. cdxml_toolkit/chemdraw/cdxml_to_image_rdkit.py +296 -0
  24. cdxml_toolkit/chemdraw/chemscript_bridge.py +901 -0
  25. cdxml_toolkit/constants.py +304 -0
  26. cdxml_toolkit/coord_normalizer.py +438 -0
  27. cdxml_toolkit/deterministic_pipeline/__init__.py +6 -0
  28. cdxml_toolkit/deterministic_pipeline/legacy/__init__.py +5 -0
  29. cdxml_toolkit/deterministic_pipeline/legacy/eln_cdx_cleanup.py +509 -0
  30. cdxml_toolkit/deterministic_pipeline/legacy/eln_enrichment.py +1394 -0
  31. cdxml_toolkit/deterministic_pipeline/legacy/scheme_aligner.py +428 -0
  32. cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher.py +1337 -0
  33. cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher_v2.py +1340 -0
  34. cdxml_toolkit/deterministic_pipeline/scheme_reader_audit.py +931 -0
  35. cdxml_toolkit/deterministic_pipeline/scheme_reader_verify.py +1160 -0
  36. cdxml_toolkit/image/__init__.py +15 -0
  37. cdxml_toolkit/image/reaction_from_image.py +2103 -0
  38. cdxml_toolkit/image/structure_from_image.py +1711 -0
  39. cdxml_toolkit/layout/__init__.py +5 -0
  40. cdxml_toolkit/layout/alignment.py +1642 -0
  41. cdxml_toolkit/layout/reaction_cleanup.py +1002 -0
  42. cdxml_toolkit/layout/scheme_merger.py +2260 -0
  43. cdxml_toolkit/mcp_server/__init__.py +0 -0
  44. cdxml_toolkit/mcp_server/__main__.py +5 -0
  45. cdxml_toolkit/mcp_server/server.py +1567 -0
  46. cdxml_toolkit/naming/__init__.py +6 -0
  47. cdxml_toolkit/naming/aligned_namer.py +2342 -0
  48. cdxml_toolkit/naming/mol_builder.py +3722 -0
  49. cdxml_toolkit/naming/name_decomposer.py +2843 -0
  50. cdxml_toolkit/naming/reactions_datamol.json +2414 -0
  51. cdxml_toolkit/office/__init__.py +5 -0
  52. cdxml_toolkit/office/doc_from_template.py +722 -0
  53. cdxml_toolkit/office/ole_embedder.py +808 -0
  54. cdxml_toolkit/office/ole_extractor.py +272 -0
  55. cdxml_toolkit/perception/__init__.py +10 -0
  56. cdxml_toolkit/perception/compound_search.py +229 -0
  57. cdxml_toolkit/perception/eln_csv_parser.py +240 -0
  58. cdxml_toolkit/perception/rdf_parser.py +664 -0
  59. cdxml_toolkit/perception/reactant_heuristic.py +1045 -0
  60. cdxml_toolkit/perception/reaction_parser.py +2150 -0
  61. cdxml_toolkit/perception/scheme_reader.py +2948 -0
  62. cdxml_toolkit/perception/scheme_refine.py +1404 -0
  63. cdxml_toolkit/perception/scheme_segmenter.py +619 -0
  64. cdxml_toolkit/perception/spatial_assignment.py +1013 -0
  65. cdxml_toolkit/rdkit_utils.py +605 -0
  66. cdxml_toolkit/render/__init__.py +17 -0
  67. cdxml_toolkit/render/auto_layout.py +229 -0
  68. cdxml_toolkit/render/compact_parser.py +632 -0
  69. cdxml_toolkit/render/parser.py +706 -0
  70. cdxml_toolkit/render/render_scheme.py +267 -0
  71. cdxml_toolkit/render/renderer.py +2387 -0
  72. cdxml_toolkit/render/schema.py +90 -0
  73. cdxml_toolkit/render/scheme_maker.py +1043 -0
  74. cdxml_toolkit/render/scheme_yaml_writer.py +1487 -0
  75. cdxml_toolkit/resolve/__init__.py +13 -0
  76. cdxml_toolkit/resolve/cas_resolver.py +430 -0
  77. cdxml_toolkit/resolve/chemscanner_abbreviations.json +28813 -0
  78. cdxml_toolkit/resolve/condensed_formula.py +493 -0
  79. cdxml_toolkit/resolve/jre_manager.py +195 -0
  80. cdxml_toolkit/resolve/reagent_abbreviations.json +1046 -0
  81. cdxml_toolkit/resolve/reagent_db.py +285 -0
  82. cdxml_toolkit/resolve/superatom_data.json +2856 -0
  83. cdxml_toolkit/resolve/superatom_table.py +146 -0
  84. cdxml_toolkit/text_formatting.py +298 -0
  85. cdxml_toolkit-0.5.0.dist-info/METADATA +318 -0
  86. cdxml_toolkit-0.5.0.dist-info/RECORD +91 -0
  87. cdxml_toolkit-0.5.0.dist-info/WHEEL +5 -0
  88. cdxml_toolkit-0.5.0.dist-info/entry_points.txt +17 -0
  89. cdxml_toolkit-0.5.0.dist-info/licenses/LICENSE +21 -0
  90. cdxml_toolkit-0.5.0.dist-info/licenses/NOTICE.md +37 -0
  91. cdxml_toolkit-0.5.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,509 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ ELN CDX Reaction Cleanup Tool
4
+
5
+ Cleans up reaction schemes exported from Findmolecule ELN (.cdx files):
6
+ - Converts CDX to CDXML
7
+ - Scales coordinates to match ACS Document 1996 style
8
+ - Applies ACS Document 1996 document settings
9
+ - Cleans up individual structures (bond lengths, angles)
10
+ - Sets all text labels to Arial 10pt Bold
11
+ - Cleans up reaction layout (arrow alignment, reagent/condition placement)
12
+
13
+ Uses a two-pass ChemDraw COM approach:
14
+ Pass 1: Convert CDX -> CDXML, scale coordinates, apply style, clean structures, fix fonts
15
+ Pass 2: Reopen and clean reaction layout (requires fresh document load)
16
+
17
+ ChemDraw must be CLOSED before running this tool.
18
+ ChemDraw is launched minimized, restored to normal before quitting
19
+ (so toolbar state is preserved), and closed automatically when done.
20
+
21
+ Usage:
22
+ python eln_cdx_cleanup.py input.cdx [-o output.cdxml] [--scale 0.5]
23
+ python eln_cdx_cleanup.py input1.cdx input2.cdx input3.cdx
24
+ python eln_cdx_cleanup.py *.cdx --output-dir cleaned/
25
+
26
+ Python API:
27
+ from .eln_cdx_cleanup import cleanup_eln_cdx
28
+ cleanup_eln_cdx("KL-CC-001.cdx", "KL-CC-001-cleaned.cdxml", scale_factor=0.5)
29
+ """
30
+
31
+ import argparse
32
+ import json
33
+ import os
34
+ import re
35
+ import sys
36
+ import time
37
+ import tempfile
38
+ import xml.etree.ElementTree as ET
39
+
40
+ # ---------------------------------------------------------------------------
41
+ # XML-based coordinate scaling
42
+ # ---------------------------------------------------------------------------
43
+
44
+ def _parse_point(s):
45
+ """Parse space-separated coordinate string into list of floats."""
46
+ return [float(v) for v in s.split()]
47
+
48
+
49
+ def _format_point(vals):
50
+ """Format list of floats to space-separated string (2 decimal places)."""
51
+ return ' '.join('{:.2f}'.format(v) for v in vals)
52
+
53
+
54
+ def _scale_point(x, y, cx, cy, factor):
55
+ """Scale point (x,y) toward centroid (cx,cy) by factor."""
56
+ return cx + (x - cx) * factor, cy + (y - cy) * factor
57
+
58
+
59
+ def _sanitize_cdxml(filepath):
60
+ """
61
+ Sanitize a CDXML file by removing invalid XML characters.
62
+
63
+ ChemDraw COM exports may include binary data in objecttag Value
64
+ attributes (e.g. Findmolecule ELN metadata). These contain bytes
65
+ that are not valid in XML 1.0 and cause parsing failures.
66
+
67
+ Replaces invalid characters with empty string in-place.
68
+ """
69
+ with open(filepath, 'rb') as f:
70
+ raw = f.read()
71
+
72
+ # XML 1.0 valid characters:
73
+ # #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
74
+ # Decode as UTF-8 (with replacement for truly broken bytes),
75
+ # then strip invalid XML chars.
76
+ text = raw.decode('utf-8', errors='replace')
77
+ # Remove control chars except tab, newline, carriage return
78
+ cleaned = re.sub(r'[^\x09\x0A\x0D\x20-\uD7FF\uE000-\uFFFD]', '', text)
79
+
80
+ with open(filepath, 'w', encoding='utf-8') as f:
81
+ f.write(cleaned)
82
+
83
+
84
+ def scale_cdxml_coordinates(input_path, output_path, factor=0.5):
85
+ """
86
+ Scale all coordinates in a CDXML file by the given factor,
87
+ centered on the centroid of all node/text positions.
88
+
89
+ This shrinks structures while preserving text sizes, preparing
90
+ them for ChemDraw's Clean Up Structure to normalize to the
91
+ target bond length.
92
+ """
93
+ tree = ET.parse(input_path)
94
+ root = tree.getroot()
95
+
96
+ # Collect centroid from node and text positions
97
+ positions = []
98
+ for elem in root.iter():
99
+ if elem.tag in ('n', 't') and 'p' in elem.attrib:
100
+ pt = _parse_point(elem.attrib['p'])
101
+ positions.append((pt[0], pt[1]))
102
+
103
+ if not positions:
104
+ # Nothing to scale — just copy
105
+ tree.write(output_path, xml_declaration=True, encoding='UTF-8')
106
+ return
107
+
108
+ cx = sum(p[0] for p in positions) / len(positions)
109
+ cy = sum(p[1] for p in positions) / len(positions)
110
+
111
+ # Scale all coordinate attributes
112
+ for elem in root.iter():
113
+ # p="x y" — node and text positions
114
+ if 'p' in elem.attrib:
115
+ pt = _parse_point(elem.attrib['p'])
116
+ nx, ny = _scale_point(pt[0], pt[1], cx, cy, factor)
117
+ elem.attrib['p'] = _format_point([nx, ny])
118
+
119
+ # BoundingBox="x1 y1 x2 y2"
120
+ if 'BoundingBox' in elem.attrib:
121
+ pt = _parse_point(elem.attrib['BoundingBox'])
122
+ if len(pt) >= 4:
123
+ nx1, ny1 = _scale_point(pt[0], pt[1], cx, cy, factor)
124
+ nx2, ny2 = _scale_point(pt[2], pt[3], cx, cy, factor)
125
+ elem.attrib['BoundingBox'] = _format_point([nx1, ny1, nx2, ny2])
126
+
127
+ # 3D points on arrows: Head3D, Tail3D, Center3D, etc.
128
+ for attr in ['Head3D', 'Tail3D', 'Center3D',
129
+ 'MajorAxisEnd3D', 'MinorAxisEnd3D']:
130
+ if attr in elem.attrib:
131
+ pt = _parse_point(elem.attrib[attr])
132
+ nx, ny = _scale_point(pt[0], pt[1], cx, cy, factor)
133
+ if len(pt) >= 3:
134
+ elem.attrib[attr] = _format_point([nx, ny, pt[2]])
135
+ else:
136
+ elem.attrib[attr] = _format_point([nx, ny])
137
+
138
+ tree.write(output_path, xml_declaration=True, encoding='UTF-8')
139
+
140
+
141
+ # ---------------------------------------------------------------------------
142
+ # ChemDraw COM helpers
143
+ # ---------------------------------------------------------------------------
144
+
145
+ def _find_chemdraw_windows():
146
+ """Find all ChemDraw window handles."""
147
+ import win32gui
148
+
149
+ def callback(hwnd, results):
150
+ try:
151
+ title = win32gui.GetWindowText(hwnd)
152
+ if 'ChemDraw' in title:
153
+ results.append(hwnd)
154
+ except:
155
+ pass
156
+ results = []
157
+ win32gui.EnumWindows(callback, results)
158
+ return results
159
+
160
+
161
+ def _minimize_chemdraw():
162
+ """Minimize all ChemDraw windows to avoid disrupting the user."""
163
+ import win32gui
164
+ import win32con
165
+ hwnds = _find_chemdraw_windows()
166
+ for hwnd in hwnds:
167
+ win32gui.ShowWindow(hwnd, win32con.SW_MINIMIZE)
168
+ return hwnds
169
+
170
+
171
+ def _restore_chemdraw_window():
172
+ """
173
+ Restore (un-minimize) ChemDraw windows before quitting.
174
+
175
+ ChemDraw saves toolbar/window state to the registry on Quit().
176
+ If we quit while minimized, it saves a 'no toolbars' state.
177
+ Restoring the window first ensures proper state is saved.
178
+ """
179
+ import win32gui
180
+ import win32con
181
+ hwnds = _find_chemdraw_windows()
182
+ for hwnd in hwnds:
183
+ win32gui.ShowWindow(hwnd, win32con.SW_RESTORE)
184
+ time.sleep(0.5)
185
+
186
+
187
+ def _get_chemdraw():
188
+ """
189
+ Get or launch ChemDraw COM instance.
190
+ Returns (cdApp, launched_new).
191
+ If an existing instance is found, it is reused.
192
+ """
193
+ import win32com.client
194
+ try:
195
+ cdApp = win32com.client.GetActiveObject('ChemDraw.Application')
196
+ return cdApp, False
197
+ except:
198
+ cdApp = win32com.client.Dispatch('ChemDraw.Application')
199
+ return cdApp, True
200
+
201
+
202
+ def _chemdraw_open(cdApp, filepath):
203
+ """Open a file in ChemDraw (minimized), activate the document."""
204
+ cdApp.Visible = True
205
+ time.sleep(1)
206
+ _minimize_chemdraw()
207
+ doc = cdApp.Documents.Open(filepath)
208
+ time.sleep(1)
209
+ _minimize_chemdraw()
210
+ doc.Activate()
211
+ time.sleep(0.5)
212
+ return doc
213
+
214
+
215
+ # ---------------------------------------------------------------------------
216
+ # CDX to CDXML conversion via COM
217
+ # ---------------------------------------------------------------------------
218
+
219
+ def _cdx_to_cdxml_com(cdx_path, cdxml_path):
220
+ """Convert CDX to CDXML using ChemDraw COM."""
221
+ import win32com.client
222
+ cdApp, launched = _get_chemdraw()
223
+ doc = _chemdraw_open(cdApp, cdx_path)
224
+ doc.SaveAs(cdxml_path)
225
+ time.sleep(0.5)
226
+ doc.Close(False)
227
+ if launched:
228
+ _restore_chemdraw_window()
229
+ cdApp.Quit()
230
+ return cdxml_path
231
+
232
+
233
+ # ---------------------------------------------------------------------------
234
+ # Main cleanup workflow
235
+ # ---------------------------------------------------------------------------
236
+
237
+ # Default ACS Document 1996 style sheet path
238
+ ACS_STYLE_PATH = os.path.join(
239
+ r'C:\ProgramData\PerkinElmerInformatics\ChemOffice2016',
240
+ r'ChemDraw\ChemDraw Items\ACS Document 1996.cds'
241
+ )
242
+
243
+
244
+ def cleanup_eln_cdx(input_path, output_path=None, scale_factor=0.5,
245
+ style_path=None):
246
+ """
247
+ Clean up a reaction scheme exported from Findmolecule ELN.
248
+
249
+ Parameters
250
+ ----------
251
+ input_path : str
252
+ Path to input .cdx or .cdxml file.
253
+ output_path : str, optional
254
+ Path for output .cdxml file. Defaults to input stem + '-cleaned.cdxml'.
255
+ scale_factor : float, optional
256
+ Factor to scale coordinates before cleanup (default 0.5).
257
+ Set to 1.0 to skip scaling.
258
+ style_path : str, optional
259
+ Path to .cds style sheet (default: ACS Document 1996).
260
+
261
+ Returns
262
+ -------
263
+ str
264
+ Path to the cleaned output file.
265
+ """
266
+ import win32com.client
267
+
268
+ if style_path is None:
269
+ style_path = ACS_STYLE_PATH
270
+
271
+ if not os.path.exists(style_path):
272
+ print("WARNING: Style sheet not found: {}".format(style_path))
273
+ print(" Skipping style application.")
274
+ style_path = None
275
+
276
+ input_path = os.path.abspath(input_path)
277
+ input_ext = os.path.splitext(input_path)[1].lower()
278
+ input_stem = os.path.splitext(input_path)[0]
279
+
280
+ if output_path is None:
281
+ output_path = input_stem + '-cleaned.cdxml'
282
+ output_path = os.path.abspath(output_path)
283
+
284
+ # Create temp directory for intermediate files
285
+ tmpdir = tempfile.mkdtemp(prefix='eln_cleanup_')
286
+
287
+ try:
288
+ # --- Step 0: Convert CDX to CDXML if needed ---
289
+ if input_ext == '.cdx':
290
+ cdxml_path = os.path.join(tmpdir, 'converted.cdxml')
291
+ print(" Converting CDX to CDXML...")
292
+ _cdx_to_cdxml_com(input_path, cdxml_path)
293
+ # Sanitize: remove invalid XML chars from ELN metadata
294
+ _sanitize_cdxml(cdxml_path)
295
+ elif input_ext == '.cdxml':
296
+ cdxml_path = input_path
297
+ else:
298
+ raise ValueError("Unsupported file format: {}".format(input_ext))
299
+
300
+ # --- Step 1: Scale coordinates in XML ---
301
+ scaled_path = os.path.join(tmpdir, 'scaled.cdxml')
302
+ if scale_factor != 1.0:
303
+ print(" Scaling coordinates by {}...".format(scale_factor))
304
+ scale_cdxml_coordinates(cdxml_path, scaled_path, factor=scale_factor)
305
+ else:
306
+ scaled_path = cdxml_path
307
+
308
+ # --- Pass 1: Apply style + Clean Structure + Change fonts ---
309
+ print(" Pass 1: Style + Clean Structure + Fonts...")
310
+ cdApp, launched = _get_chemdraw()
311
+ doc = _chemdraw_open(cdApp, scaled_path)
312
+
313
+ # Apply style
314
+ if style_path:
315
+ doc.Settings.ApplySettings(style_path, style_path)
316
+ time.sleep(0.5)
317
+
318
+ # Clean Up Structure
319
+ doc.Objects.Select()
320
+ time.sleep(0.5)
321
+ cdApp.MenuBars(1).Menus(5).MenuItems(6).Execute()
322
+ time.sleep(1)
323
+
324
+ # Change all caption text to Arial 10pt Bold
325
+ captions = doc.Objects.Captions
326
+ for i in range(1, captions.Count + 1):
327
+ cap = captions.Item(i)
328
+ cap.Family = "Arial"
329
+ cap.Size = 10.0
330
+ cap.Face = 96 # Bold
331
+
332
+ # Also set document-level label defaults
333
+ doc.Settings.LabelFont = "Arial"
334
+ doc.Settings.LabelSize = 10.0
335
+ doc.Settings.LabelFace = 96
336
+
337
+ # Save pass 1 result
338
+ pass1_path = os.path.join(tmpdir, 'pass1.cdxml')
339
+ doc.SaveAs(pass1_path)
340
+ time.sleep(0.5)
341
+ doc.Close(False)
342
+
343
+ # Close ChemDraw between passes if we launched it
344
+ if launched:
345
+ _restore_chemdraw_window()
346
+ cdApp.Quit()
347
+ time.sleep(1)
348
+
349
+ # --- Pass 2: Reopen fresh + Clean Up Reaction ---
350
+ print(" Pass 2: Clean Up Reaction...")
351
+ cdApp, launched = _get_chemdraw()
352
+ doc = _chemdraw_open(cdApp, pass1_path)
353
+
354
+ doc.Objects.Select()
355
+ time.sleep(1)
356
+ cdApp.MenuBars(1).Menus(5).MenuItems(7).Execute()
357
+ time.sleep(1)
358
+
359
+ # Save final output
360
+ doc.SaveAs(output_path)
361
+ time.sleep(0.5)
362
+ doc.Close(False)
363
+
364
+ if launched:
365
+ _restore_chemdraw_window()
366
+ cdApp.Quit()
367
+
368
+ finally:
369
+ # Cleanup temp files
370
+ import shutil
371
+ try:
372
+ shutil.rmtree(tmpdir)
373
+ except:
374
+ pass
375
+
376
+ return output_path
377
+
378
+
379
+ def cleanup_multiple(input_paths, output_dir=None, scale_factor=0.5,
380
+ style_path=None):
381
+ """
382
+ Clean up multiple CDX/CDXML files.
383
+
384
+ Parameters
385
+ ----------
386
+ input_paths : list of str
387
+ Paths to input files.
388
+ output_dir : str, optional
389
+ Directory for output files. Defaults to same directory as each input.
390
+ scale_factor : float
391
+ Coordinate scale factor (default 0.5).
392
+ style_path : str, optional
393
+ Path to .cds style sheet.
394
+
395
+ Returns
396
+ -------
397
+ list of str
398
+ Paths to cleaned output files.
399
+ """
400
+ results = []
401
+ for path in input_paths:
402
+ name = os.path.splitext(os.path.basename(path))[0]
403
+ if output_dir:
404
+ os.makedirs(output_dir, exist_ok=True)
405
+ out = os.path.join(output_dir, name + '-cleaned.cdxml')
406
+ else:
407
+ out = os.path.join(os.path.dirname(path), name + '-cleaned.cdxml')
408
+
409
+ print("Processing: {}".format(os.path.basename(path)))
410
+ try:
411
+ result = cleanup_eln_cdx(path, out, scale_factor=scale_factor,
412
+ style_path=style_path)
413
+ print(" -> {}\n".format(result))
414
+ results.append(result)
415
+ except Exception as e:
416
+ print(" ERROR: {}\n".format(e))
417
+ results.append(None)
418
+
419
+ return results
420
+
421
+
422
+ # ---------------------------------------------------------------------------
423
+ # CLI
424
+ # ---------------------------------------------------------------------------
425
+
426
+ def main(argv=None) -> int:
427
+ parser = argparse.ArgumentParser(
428
+ description='Clean up ELN-exported CDX reaction schemes to ACS 1996 style.',
429
+ epilog='Examples:\n'
430
+ ' python eln_cdx_cleanup.py KL-CC-001.cdx\n'
431
+ ' python eln_cdx_cleanup.py *.cdx --output-dir cleaned/\n'
432
+ ' python eln_cdx_cleanup.py input.cdx -o output.cdxml --scale 0.5\n',
433
+ formatter_class=argparse.RawDescriptionHelpFormatter
434
+ )
435
+ parser.add_argument('input', nargs='+',
436
+ help='Input .cdx or .cdxml file(s)')
437
+ parser.add_argument('-o', '--output',
438
+ help='Output file path (single file mode only)')
439
+ parser.add_argument('--output-dir',
440
+ help='Output directory (batch mode)')
441
+ parser.add_argument('--scale', type=float, default=0.5,
442
+ help='Coordinate scale factor (default: 0.5)')
443
+ parser.add_argument('--style',
444
+ help='Path to .cds style sheet '
445
+ '(default: ACS Document 1996)')
446
+ parser.add_argument('--json', action='store_true',
447
+ help='Output result as JSON to stdout')
448
+
449
+ args = parser.parse_args(argv)
450
+
451
+ # When --json, redirect status prints to stderr and capture warnings
452
+ if args.json:
453
+ import io
454
+ _real_stdout = sys.stdout
455
+ _capture = io.StringIO()
456
+ sys.stdout = _capture
457
+
458
+ try:
459
+ if len(args.input) == 1 and args.output:
460
+ # Single file mode
461
+ result_path = cleanup_eln_cdx(args.input[0], args.output,
462
+ scale_factor=args.scale, style_path=args.style)
463
+ if args.json:
464
+ sys.stdout = _real_stdout
465
+ captured = _capture.getvalue()
466
+ warnings = [l.strip() for l in captured.splitlines()
467
+ if 'WARNING' in l.upper()]
468
+ # Dump captured status to stderr
469
+ if captured.strip():
470
+ print(captured, file=sys.stderr, end='')
471
+ result = {
472
+ "input": os.path.abspath(args.input[0]),
473
+ "output": os.path.abspath(result_path),
474
+ "warnings": warnings,
475
+ }
476
+ print(json.dumps(result, indent=2))
477
+ elif args.json:
478
+ # Batch mode with --json
479
+ results = cleanup_multiple(args.input, output_dir=args.output_dir,
480
+ scale_factor=args.scale,
481
+ style_path=args.style)
482
+ sys.stdout = _real_stdout
483
+ captured = _capture.getvalue()
484
+ warnings = [l.strip() for l in captured.splitlines()
485
+ if 'WARNING' in l.upper()]
486
+ if captured.strip():
487
+ print(captured, file=sys.stderr, end='')
488
+ json_results = []
489
+ for inp, out in zip(args.input, results):
490
+ json_results.append({
491
+ "input": os.path.abspath(inp),
492
+ "output": os.path.abspath(out) if out else None,
493
+ "warnings": warnings,
494
+ })
495
+ print(json.dumps(json_results, indent=2))
496
+ else:
497
+ # Batch mode
498
+ cleanup_multiple(args.input, output_dir=args.output_dir,
499
+ scale_factor=args.scale, style_path=args.style)
500
+ except Exception:
501
+ if args.json:
502
+ sys.stdout = _real_stdout
503
+ raise
504
+
505
+ return 0
506
+
507
+
508
+ if __name__ == '__main__':
509
+ sys.exit(main())