cdxml-toolkit 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. cdxml_toolkit/__init__.py +18 -0
  2. cdxml_toolkit/_jre/__init__.py +2 -0
  3. cdxml_toolkit/_jre/temurin-21-jre-win-x64.zip +0 -0
  4. cdxml_toolkit/analysis/__init__.py +35 -0
  5. cdxml_toolkit/analysis/deterministic/__init__.py +12 -0
  6. cdxml_toolkit/analysis/deterministic/discover_experiment_files.py +413 -0
  7. cdxml_toolkit/analysis/deterministic/lab_book_formatter.py +701 -0
  8. cdxml_toolkit/analysis/deterministic/lcms_file_categorizer.py +928 -0
  9. cdxml_toolkit/analysis/deterministic/lcms_identifier.py +598 -0
  10. cdxml_toolkit/analysis/deterministic/mass_resolver.py +654 -0
  11. cdxml_toolkit/analysis/deterministic/multi_lcms_analyzer.py +1412 -0
  12. cdxml_toolkit/analysis/deterministic/procedure_writer.py +446 -0
  13. cdxml_toolkit/analysis/extract_nmr.py +47 -0
  14. cdxml_toolkit/analysis/format_procedure_entry.py +479 -0
  15. cdxml_toolkit/analysis/lcms_analyzer.py +1299 -0
  16. cdxml_toolkit/analysis/parse_analysis_file.py +134 -0
  17. cdxml_toolkit/cdxml_builder.py +920 -0
  18. cdxml_toolkit/cdxml_utils.py +342 -0
  19. cdxml_toolkit/chemdraw/__init__.py +5 -0
  20. cdxml_toolkit/chemdraw/_chemscript_server.py +562 -0
  21. cdxml_toolkit/chemdraw/cdx_converter.py +527 -0
  22. cdxml_toolkit/chemdraw/cdxml_to_image.py +262 -0
  23. cdxml_toolkit/chemdraw/cdxml_to_image_rdkit.py +296 -0
  24. cdxml_toolkit/chemdraw/chemscript_bridge.py +901 -0
  25. cdxml_toolkit/constants.py +304 -0
  26. cdxml_toolkit/coord_normalizer.py +438 -0
  27. cdxml_toolkit/deterministic_pipeline/__init__.py +6 -0
  28. cdxml_toolkit/deterministic_pipeline/legacy/__init__.py +5 -0
  29. cdxml_toolkit/deterministic_pipeline/legacy/eln_cdx_cleanup.py +509 -0
  30. cdxml_toolkit/deterministic_pipeline/legacy/eln_enrichment.py +1394 -0
  31. cdxml_toolkit/deterministic_pipeline/legacy/scheme_aligner.py +428 -0
  32. cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher.py +1337 -0
  33. cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher_v2.py +1340 -0
  34. cdxml_toolkit/deterministic_pipeline/scheme_reader_audit.py +931 -0
  35. cdxml_toolkit/deterministic_pipeline/scheme_reader_verify.py +1160 -0
  36. cdxml_toolkit/image/__init__.py +15 -0
  37. cdxml_toolkit/image/reaction_from_image.py +2103 -0
  38. cdxml_toolkit/image/structure_from_image.py +1711 -0
  39. cdxml_toolkit/layout/__init__.py +5 -0
  40. cdxml_toolkit/layout/alignment.py +1642 -0
  41. cdxml_toolkit/layout/reaction_cleanup.py +1002 -0
  42. cdxml_toolkit/layout/scheme_merger.py +2260 -0
  43. cdxml_toolkit/mcp_server/__init__.py +0 -0
  44. cdxml_toolkit/mcp_server/__main__.py +5 -0
  45. cdxml_toolkit/mcp_server/server.py +1567 -0
  46. cdxml_toolkit/naming/__init__.py +6 -0
  47. cdxml_toolkit/naming/aligned_namer.py +2342 -0
  48. cdxml_toolkit/naming/mol_builder.py +3722 -0
  49. cdxml_toolkit/naming/name_decomposer.py +2843 -0
  50. cdxml_toolkit/naming/reactions_datamol.json +2414 -0
  51. cdxml_toolkit/office/__init__.py +5 -0
  52. cdxml_toolkit/office/doc_from_template.py +722 -0
  53. cdxml_toolkit/office/ole_embedder.py +808 -0
  54. cdxml_toolkit/office/ole_extractor.py +272 -0
  55. cdxml_toolkit/perception/__init__.py +10 -0
  56. cdxml_toolkit/perception/compound_search.py +229 -0
  57. cdxml_toolkit/perception/eln_csv_parser.py +240 -0
  58. cdxml_toolkit/perception/rdf_parser.py +664 -0
  59. cdxml_toolkit/perception/reactant_heuristic.py +1045 -0
  60. cdxml_toolkit/perception/reaction_parser.py +2150 -0
  61. cdxml_toolkit/perception/scheme_reader.py +2948 -0
  62. cdxml_toolkit/perception/scheme_refine.py +1404 -0
  63. cdxml_toolkit/perception/scheme_segmenter.py +619 -0
  64. cdxml_toolkit/perception/spatial_assignment.py +1013 -0
  65. cdxml_toolkit/rdkit_utils.py +605 -0
  66. cdxml_toolkit/render/__init__.py +17 -0
  67. cdxml_toolkit/render/auto_layout.py +229 -0
  68. cdxml_toolkit/render/compact_parser.py +632 -0
  69. cdxml_toolkit/render/parser.py +706 -0
  70. cdxml_toolkit/render/render_scheme.py +267 -0
  71. cdxml_toolkit/render/renderer.py +2387 -0
  72. cdxml_toolkit/render/schema.py +90 -0
  73. cdxml_toolkit/render/scheme_maker.py +1043 -0
  74. cdxml_toolkit/render/scheme_yaml_writer.py +1487 -0
  75. cdxml_toolkit/resolve/__init__.py +13 -0
  76. cdxml_toolkit/resolve/cas_resolver.py +430 -0
  77. cdxml_toolkit/resolve/chemscanner_abbreviations.json +28813 -0
  78. cdxml_toolkit/resolve/condensed_formula.py +493 -0
  79. cdxml_toolkit/resolve/jre_manager.py +195 -0
  80. cdxml_toolkit/resolve/reagent_abbreviations.json +1046 -0
  81. cdxml_toolkit/resolve/reagent_db.py +285 -0
  82. cdxml_toolkit/resolve/superatom_data.json +2856 -0
  83. cdxml_toolkit/resolve/superatom_table.py +146 -0
  84. cdxml_toolkit/text_formatting.py +298 -0
  85. cdxml_toolkit-0.5.0.dist-info/METADATA +318 -0
  86. cdxml_toolkit-0.5.0.dist-info/RECORD +91 -0
  87. cdxml_toolkit-0.5.0.dist-info/WHEEL +5 -0
  88. cdxml_toolkit-0.5.0.dist-info/entry_points.txt +17 -0
  89. cdxml_toolkit-0.5.0.dist-info/licenses/LICENSE +21 -0
  90. cdxml_toolkit-0.5.0.dist-info/licenses/NOTICE.md +37 -0
  91. cdxml_toolkit-0.5.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,527 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ CDX ↔ CDXML Converter
4
+ Converts between ChemDraw CDX (binary) and CDXML (XML) formats.
5
+
6
+ Backends (tried in order):
7
+ 1. ChemDraw COM automation (most reliable, requires ChemDraw installed)
8
+ 2. pycdxml library (good open-source fallback)
9
+ 3. Open Babel CLI (last resort, patchy for complex structures)
10
+
11
+ Usage:
12
+ python cdx_converter.py input.cdx [-o output.cdxml] [--method com|pycdxml|obabel]
13
+ python cdx_converter.py input.cdxml [-o output.cdx] [--method com|pycdxml|obabel]
14
+
15
+ Python API:
16
+ from cdxml_toolkit.chemdraw.cdx_converter import convert_cdx_to_cdxml, convert_file
17
+ cdxml_str = convert_cdx_to_cdxml(cdx_bytes)
18
+ convert_file("input.cdx", "output.cdxml")
19
+ """
20
+
21
+ import argparse
22
+ import json
23
+ import os
24
+ import sys
25
+ import subprocess
26
+ import tempfile
27
+ from typing import Optional
28
+
29
+ # ---------------------------------------------------------------------------
30
+ # Backend availability detection
31
+ # ---------------------------------------------------------------------------
32
+
33
+ HAS_COM = False
34
+ HAS_PYCDXML = False
35
+ HAS_OBABEL = False
36
+
37
+ try:
38
+ import win32com.client
39
+ HAS_COM = True
40
+ except ImportError:
41
+ pass
42
+
43
+ try:
44
+ from pycdxml import cdxml_converter as _pycdxml
45
+ HAS_PYCDXML = True
46
+ except ImportError:
47
+ pass
48
+
49
+ try:
50
+ result = subprocess.run(
51
+ ["obabel", "-V"], capture_output=True, timeout=5
52
+ )
53
+ if result.returncode == 0:
54
+ HAS_OBABEL = True
55
+ except (FileNotFoundError, subprocess.TimeoutExpired):
56
+ pass
57
+
58
+ BACKEND_ORDER = ["com", "pycdxml", "obabel"]
59
+
60
+ # ---------------------------------------------------------------------------
61
+ # CDXML sanitiser
62
+ # ---------------------------------------------------------------------------
63
+
64
+ import re as _re
65
+
66
+
67
+ def sanitise_cdxml(cdxml: str) -> str:
68
+ """Remove content that makes ChemDraw's strict XML parser reject the file.
69
+
70
+ Findmolecule embeds internal GUIDs as raw binary bytes inside
71
+ <objecttag Name="Molecule ID" Value="..."/> attributes. These bytes
72
+ include XML-illegal control characters (< 0x09, 0x0B-0x0C, 0x0E-0x1F)
73
+ that cause ChemDraw to report "not well-formed (invalid token)".
74
+
75
+ The Molecule ID tags carry no chemistry information — they are ELN
76
+ bookkeeping handles that ChemDraw doesn't need to render the structure.
77
+ We strip the entire element. Any remaining stray control characters are
78
+ also removed so the file is clean XML 1.0.
79
+ """
80
+ # 1. Strip all <objecttag ... Name="Molecule ID" .../> elements (self-closing).
81
+ # Attribute order in ChemDraw CDXML can vary, so match both orderings.
82
+ cdxml_bytes = cdxml.encode("utf-8", errors="replace")
83
+ cdxml_bytes = _re.sub(
84
+ rb'<objecttag\s[^>]*Name="Molecule ID"[^>]*/\s*>',
85
+ b"",
86
+ cdxml_bytes,
87
+ )
88
+
89
+ # 2. Strip XML 1.0 illegal control characters anywhere in the file.
90
+ # Legal: 0x09 (tab), 0x0A (LF), 0x0D (CR), 0x20+ (printable + high bytes)
91
+ out = bytearray()
92
+ for byte in cdxml_bytes:
93
+ if byte == 0x09 or byte == 0x0A or byte == 0x0D or byte >= 0x20:
94
+ out.append(byte)
95
+
96
+ return out.decode("utf-8", errors="replace")
97
+
98
+
99
+ def sanitise_cdxml_file(path: str) -> None:
100
+ """Sanitise a CDXML file in-place."""
101
+ with open(path, "r", encoding="utf-8", errors="replace") as f:
102
+ content = f.read()
103
+ cleaned = sanitise_cdxml(content)
104
+ with open(path, "w", encoding="utf-8") as f:
105
+ f.write(cleaned)
106
+
107
+
108
+ # ---------------------------------------------------------------------------
109
+ # COM backend
110
+ # ---------------------------------------------------------------------------
111
+
112
+ def _get_chemdraw():
113
+ """Get a ChemDraw COM instance, reusing an existing session if available.
114
+
115
+ Returns (app, launched) where launched is True if we started a new instance.
116
+ Always sets Visible=False to suppress flashing.
117
+ """
118
+ try:
119
+ app = win32com.client.GetActiveObject("ChemDraw.Application")
120
+ launched = False
121
+ except Exception:
122
+ app = win32com.client.Dispatch("ChemDraw.Application")
123
+ launched = True
124
+ return app, launched
125
+
126
+
127
+ def _com_convert_file(input_path: str, output_path: str) -> None:
128
+ """Convert using ChemDraw COM automation."""
129
+ app, launched = _get_chemdraw()
130
+ was_visible = app.Visible
131
+ app.Visible = False
132
+ try:
133
+ doc = app.Documents.Open(os.path.abspath(input_path))
134
+ doc.SaveAs(os.path.abspath(output_path))
135
+ doc.Close()
136
+ finally:
137
+ if launched:
138
+ app.Quit()
139
+ else:
140
+ app.Visible = was_visible
141
+ if output_path.lower().endswith(".cdxml"):
142
+ sanitise_cdxml_file(output_path)
143
+
144
+
145
+ def _com_cdx_to_cdxml(cdx_data: bytes) -> str:
146
+ """Convert CDX bytes → CDXML string via COM (uses temp files)."""
147
+ with tempfile.NamedTemporaryFile(suffix=".cdx", delete=False) as tmp_in:
148
+ tmp_in.write(cdx_data)
149
+ tmp_in_path = tmp_in.name
150
+ tmp_out_path = tmp_in_path.replace(".cdx", ".cdxml")
151
+ try:
152
+ _com_convert_file(tmp_in_path, tmp_out_path)
153
+ with open(tmp_out_path, "r", encoding="utf-8") as f:
154
+ return f.read() # sanitise_cdxml_file already ran inside _com_convert_file
155
+ finally:
156
+ for p in (tmp_in_path, tmp_out_path):
157
+ if os.path.exists(p):
158
+ os.unlink(p)
159
+
160
+
161
+ def _com_cdxml_to_cdx(cdxml_data: str) -> bytes:
162
+ """Convert CDXML string → CDX bytes via COM (uses temp files)."""
163
+ with tempfile.NamedTemporaryFile(
164
+ suffix=".cdxml", delete=False, mode="w", encoding="utf-8"
165
+ ) as tmp_in:
166
+ tmp_in.write(cdxml_data)
167
+ tmp_in_path = tmp_in.name
168
+ tmp_out_path = tmp_in_path.replace(".cdxml", ".cdx")
169
+ try:
170
+ _com_convert_file(tmp_in_path, tmp_out_path)
171
+ with open(tmp_out_path, "rb") as f:
172
+ return f.read()
173
+ finally:
174
+ for p in (tmp_in_path, tmp_out_path):
175
+ if os.path.exists(p):
176
+ os.unlink(p)
177
+
178
+ # ---------------------------------------------------------------------------
179
+ # pycdxml backend
180
+ # ---------------------------------------------------------------------------
181
+
182
+ def _pycdxml_convert_file(input_path: str, output_path: str) -> None:
183
+ """Convert using pycdxml library."""
184
+ in_ext = os.path.splitext(input_path)[1].lower()
185
+ if in_ext == ".cdx":
186
+ doc = _pycdxml.read_cdx(input_path)
187
+ cdxml_str = doc.to_cdxml()
188
+ with open(output_path, "w", encoding="utf-8") as f:
189
+ f.write(cdxml_str)
190
+ elif in_ext == ".cdxml":
191
+ doc = _pycdxml.read_cdxml(input_path)
192
+ _pycdxml.write_cdx_file(doc, output_path)
193
+ else:
194
+ raise ValueError(f"Unsupported input extension: {in_ext}")
195
+
196
+
197
+ def _pycdxml_cdx_to_cdxml(cdx_data: bytes) -> str:
198
+ """Convert CDX bytes → CDXML string via pycdxml (uses temp files)."""
199
+ with tempfile.NamedTemporaryFile(suffix=".cdx", delete=False) as tmp_in:
200
+ tmp_in.write(cdx_data)
201
+ tmp_in_path = tmp_in.name
202
+ try:
203
+ doc = _pycdxml.read_cdx(tmp_in_path)
204
+ return doc.to_cdxml()
205
+ finally:
206
+ if os.path.exists(tmp_in_path):
207
+ os.unlink(tmp_in_path)
208
+
209
+
210
+ def _pycdxml_cdxml_to_cdx(cdxml_data: str) -> bytes:
211
+ """Convert CDXML string → CDX bytes via pycdxml (uses temp files)."""
212
+ with tempfile.NamedTemporaryFile(
213
+ suffix=".cdxml", delete=False, mode="w", encoding="utf-8"
214
+ ) as tmp_in:
215
+ tmp_in.write(cdxml_data)
216
+ tmp_in_path = tmp_in.name
217
+ tmp_out_path = tmp_in_path.replace(".cdxml", ".cdx")
218
+ try:
219
+ doc = _pycdxml.read_cdxml(tmp_in_path)
220
+ _pycdxml.write_cdx_file(doc, tmp_out_path)
221
+ with open(tmp_out_path, "rb") as f:
222
+ return f.read()
223
+ finally:
224
+ for p in (tmp_in_path, tmp_out_path):
225
+ if os.path.exists(p):
226
+ os.unlink(p)
227
+
228
+ # ---------------------------------------------------------------------------
229
+ # Open Babel backend
230
+ # ---------------------------------------------------------------------------
231
+
232
+ def _obabel_convert_file(input_path: str, output_path: str) -> None:
233
+ """Convert using Open Babel CLI."""
234
+ result = subprocess.run(
235
+ ["obabel", os.path.abspath(input_path), "-O", os.path.abspath(output_path)],
236
+ capture_output=True, text=True, timeout=30
237
+ )
238
+ if result.returncode != 0:
239
+ raise RuntimeError(f"obabel failed: {result.stderr}")
240
+
241
+
242
+ def _obabel_cdx_to_cdxml(cdx_data: bytes) -> str:
243
+ """Convert CDX bytes → CDXML string via obabel (uses temp files)."""
244
+ with tempfile.NamedTemporaryFile(suffix=".cdx", delete=False) as tmp_in:
245
+ tmp_in.write(cdx_data)
246
+ tmp_in_path = tmp_in.name
247
+ tmp_out_path = tmp_in_path.replace(".cdx", ".cdxml")
248
+ try:
249
+ _obabel_convert_file(tmp_in_path, tmp_out_path)
250
+ with open(tmp_out_path, "r", encoding="utf-8") as f:
251
+ return f.read()
252
+ finally:
253
+ for p in (tmp_in_path, tmp_out_path):
254
+ if os.path.exists(p):
255
+ os.unlink(p)
256
+
257
+
258
+ def _obabel_cdxml_to_cdx(cdxml_data: str) -> bytes:
259
+ """Convert CDXML string → CDX bytes via obabel (uses temp files)."""
260
+ with tempfile.NamedTemporaryFile(
261
+ suffix=".cdxml", delete=False, mode="w", encoding="utf-8"
262
+ ) as tmp_in:
263
+ tmp_in.write(cdxml_data)
264
+ tmp_in_path = tmp_in.name
265
+ tmp_out_path = tmp_in_path.replace(".cdxml", ".cdx")
266
+ try:
267
+ _obabel_convert_file(tmp_in_path, tmp_out_path)
268
+ with open(tmp_out_path, "rb") as f:
269
+ return f.read()
270
+ finally:
271
+ for p in (tmp_in_path, tmp_out_path):
272
+ if os.path.exists(p):
273
+ os.unlink(p)
274
+
275
+ # ---------------------------------------------------------------------------
276
+ # Backend dispatch
277
+ # ---------------------------------------------------------------------------
278
+
279
+ _FILE_CONVERTERS = {
280
+ "com": _com_convert_file if HAS_COM else None,
281
+ "pycdxml": _pycdxml_convert_file if HAS_PYCDXML else None,
282
+ "obabel": _obabel_convert_file if HAS_OBABEL else None,
283
+ }
284
+
285
+ _CDX_TO_CDXML = {
286
+ "com": _com_cdx_to_cdxml if HAS_COM else None,
287
+ "pycdxml": _pycdxml_cdx_to_cdxml if HAS_PYCDXML else None,
288
+ "obabel": _obabel_cdx_to_cdxml if HAS_OBABEL else None,
289
+ }
290
+
291
+ _CDXML_TO_CDX = {
292
+ "com": _com_cdxml_to_cdx if HAS_COM else None,
293
+ "pycdxml": _pycdxml_cdxml_to_cdx if HAS_PYCDXML else None,
294
+ "obabel": _obabel_cdxml_to_cdx if HAS_OBABEL else None,
295
+ }
296
+
297
+
298
+ def _pick_backend(method: str, dispatch_table: dict):
299
+ """Select a backend function. 'auto' tries in priority order."""
300
+ if method == "auto":
301
+ for name in BACKEND_ORDER:
302
+ fn = dispatch_table.get(name)
303
+ if fn is not None:
304
+ return name, fn
305
+ raise RuntimeError(
306
+ "No conversion backend available. "
307
+ "Install ChemDraw (COM), pycdxml, or Open Babel."
308
+ )
309
+ fn = dispatch_table.get(method)
310
+ if fn is None:
311
+ available = {k: v for k, v in dispatch_table.items() if v}
312
+ raise RuntimeError(
313
+ f"Backend '{method}' not available. "
314
+ f"Available: {list(available.keys()) or 'none'}"
315
+ )
316
+ return method, fn
317
+
318
+ # ---------------------------------------------------------------------------
319
+ # Public API
320
+ # ---------------------------------------------------------------------------
321
+
322
+ def convert_cdx_to_cdxml(cdx_data: bytes, method: str = "auto") -> str:
323
+ """Convert raw CDX bytes to CDXML string."""
324
+ name, fn = _pick_backend(method, _CDX_TO_CDXML)
325
+ return fn(cdx_data)
326
+
327
+
328
+ def convert_cdxml_to_cdx(cdxml_data: str, method: str = "auto") -> bytes:
329
+ """Convert CDXML string to raw CDX bytes."""
330
+ name, fn = _pick_backend(method, _CDXML_TO_CDX)
331
+ return fn(cdxml_data)
332
+
333
+
334
+ def batch_convert_files(
335
+ input_paths: list, method: str = "auto"
336
+ ) -> dict:
337
+ """Convert multiple CDX/CDXML files in a single COM session.
338
+
339
+ Returns dict mapping input_path -> {"output": path, "error": None} on
340
+ success, or {"output": None, "error": message} on failure.
341
+
342
+ For COM backend: one GetActiveObject/Dispatch, loop through all files,
343
+ one conditional Quit. For non-COM backends: falls back to per-file
344
+ convert_file().
345
+ """
346
+ results = {}
347
+ if not input_paths:
348
+ return results
349
+
350
+ name, _ = _pick_backend(method, _FILE_CONVERTERS)
351
+
352
+ if name == "com":
353
+ app, launched = _get_chemdraw()
354
+ was_visible = app.Visible
355
+ app.Visible = False
356
+ try:
357
+ for inp in input_paths:
358
+ in_ext = os.path.splitext(inp)[1].lower()
359
+ if in_ext == ".cdx":
360
+ out_ext = ".cdxml"
361
+ elif in_ext == ".cdxml":
362
+ out_ext = ".cdx"
363
+ else:
364
+ results[inp] = {
365
+ "output": None,
366
+ "error": f"Unsupported extension: {in_ext}",
367
+ }
368
+ continue
369
+ out = os.path.splitext(inp)[0] + out_ext
370
+ try:
371
+ doc = app.Documents.Open(os.path.abspath(inp))
372
+ doc.SaveAs(os.path.abspath(out))
373
+ doc.Close()
374
+ if out.lower().endswith(".cdxml"):
375
+ sanitise_cdxml_file(out)
376
+ results[inp] = {"output": out, "error": None}
377
+ except Exception as e:
378
+ results[inp] = {"output": None, "error": str(e)}
379
+ finally:
380
+ if launched:
381
+ app.Quit()
382
+ else:
383
+ app.Visible = was_visible
384
+ else:
385
+ # Non-COM: fall back to per-file conversion
386
+ for inp in input_paths:
387
+ try:
388
+ out = convert_file(inp, method=method)
389
+ results[inp] = {"output": out, "error": None}
390
+ except Exception as e:
391
+ results[inp] = {"output": None, "error": str(e)}
392
+
393
+ return results
394
+
395
+
396
+ def convert_file(
397
+ input_path: str, output_path: Optional[str] = None, method: str = "auto"
398
+ ) -> str:
399
+ """Convert a file between CDX and CDXML. Returns output path."""
400
+ in_ext = os.path.splitext(input_path)[1].lower()
401
+ if in_ext == ".cdx":
402
+ out_ext = ".cdxml"
403
+ elif in_ext == ".cdxml":
404
+ out_ext = ".cdx"
405
+ else:
406
+ raise ValueError(f"Unsupported file extension: {in_ext}. Use .cdx or .cdxml.")
407
+
408
+ if output_path is None:
409
+ output_path = os.path.splitext(input_path)[0] + out_ext
410
+
411
+ name, fn = _pick_backend(method, _FILE_CONVERTERS)
412
+ fn(input_path, output_path)
413
+ return output_path
414
+
415
+ # ---------------------------------------------------------------------------
416
+ # CLI
417
+ # ---------------------------------------------------------------------------
418
+
419
+ def main(argv=None) -> int:
420
+ parser = argparse.ArgumentParser(
421
+ description="Convert between ChemDraw CDX (binary) and CDXML (XML) formats."
422
+ )
423
+ parser.add_argument(
424
+ "input", nargs="?", help="Input file (.cdx or .cdxml)"
425
+ )
426
+ parser.add_argument(
427
+ "-o", "--output",
428
+ help="Output file (default: same name with swapped extension)"
429
+ )
430
+ parser.add_argument(
431
+ "--method",
432
+ choices=["auto", "com", "pycdxml", "obabel"],
433
+ default="auto",
434
+ help="Conversion backend (default: auto — tries com, pycdxml, obabel)"
435
+ )
436
+ parser.add_argument(
437
+ "--batch",
438
+ nargs="+",
439
+ metavar="FILE",
440
+ help="Batch-convert multiple files in one COM session"
441
+ )
442
+ parser.add_argument(
443
+ "--list-backends",
444
+ action="store_true",
445
+ help="Show available backends and exit"
446
+ )
447
+ parser.add_argument(
448
+ "--json",
449
+ action="store_true",
450
+ help="Output result as JSON to stdout"
451
+ )
452
+ args = parser.parse_args(argv)
453
+
454
+ if args.list_backends:
455
+ print("Available backends:")
456
+ for name in BACKEND_ORDER:
457
+ status = "available" if _FILE_CONVERTERS.get(name) else "not available"
458
+ print(f" {name}: {status}")
459
+ return 0
460
+
461
+ # --batch mode: convert multiple files in one COM session
462
+ if args.batch:
463
+ missing = [f for f in args.batch if not os.path.isfile(f)]
464
+ if missing:
465
+ for f in missing:
466
+ print(f"Error: file not found: {f}", file=sys.stderr)
467
+ return 1
468
+ try:
469
+ results = batch_convert_files(args.batch, args.method)
470
+ backend_name, _ = _pick_backend(args.method, _FILE_CONVERTERS)
471
+ if args.json:
472
+ json_results = []
473
+ for inp, info in results.items():
474
+ entry = {"input": os.path.abspath(inp), "method": backend_name}
475
+ if info["error"]:
476
+ entry["error"] = info["error"]
477
+ else:
478
+ entry["output"] = os.path.abspath(info["output"])
479
+ json_results.append(entry)
480
+ print(json.dumps(json_results, indent=2))
481
+ else:
482
+ ok = sum(1 for v in results.values() if v["error"] is None)
483
+ fail = len(results) - ok
484
+ for inp, info in results.items():
485
+ if info["error"]:
486
+ print(f" FAIL: {inp} — {info['error']}")
487
+ else:
488
+ size = os.path.getsize(info["output"])
489
+ print(f" OK: {inp} -> {info['output']} ({size:,} bytes)")
490
+ print(f"Batch: {ok} converted, {fail} failed [backend: {backend_name}]")
491
+ return 1 if any(v["error"] for v in results.values()) else 0
492
+ except Exception as e:
493
+ print(f"Error: {e}", file=sys.stderr)
494
+ return 1
495
+
496
+ if not args.input:
497
+ parser.error("the following arguments are required: input (or --batch)")
498
+
499
+ if not os.path.isfile(args.input):
500
+ print(f"Error: file not found: {args.input}", file=sys.stderr)
501
+ return 1
502
+
503
+ try:
504
+ out = convert_file(args.input, args.output, args.method)
505
+ backend_name, _ = _pick_backend(args.method, _FILE_CONVERTERS)
506
+ if args.json:
507
+ in_ext = os.path.splitext(args.input)[1].lower().lstrip(".")
508
+ out_ext = os.path.splitext(out)[1].lower().lstrip(".")
509
+ result = {
510
+ "input": os.path.abspath(args.input),
511
+ "output": os.path.abspath(out),
512
+ "input_format": in_ext,
513
+ "output_format": out_ext,
514
+ "method": backend_name,
515
+ }
516
+ print(json.dumps(result, indent=2))
517
+ else:
518
+ size = os.path.getsize(out)
519
+ print(f"Converted: {args.input} -> {out} ({size:,} bytes) [backend: {backend_name}]")
520
+ return 0
521
+ except Exception as e:
522
+ print(f"Error: {e}", file=sys.stderr)
523
+ return 1
524
+
525
+
526
+ if __name__ == "__main__":
527
+ sys.exit(main())