cdxml-toolkit 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. cdxml_toolkit/__init__.py +18 -0
  2. cdxml_toolkit/_jre/__init__.py +2 -0
  3. cdxml_toolkit/_jre/temurin-21-jre-win-x64.zip +0 -0
  4. cdxml_toolkit/analysis/__init__.py +35 -0
  5. cdxml_toolkit/analysis/deterministic/__init__.py +12 -0
  6. cdxml_toolkit/analysis/deterministic/discover_experiment_files.py +413 -0
  7. cdxml_toolkit/analysis/deterministic/lab_book_formatter.py +701 -0
  8. cdxml_toolkit/analysis/deterministic/lcms_file_categorizer.py +928 -0
  9. cdxml_toolkit/analysis/deterministic/lcms_identifier.py +598 -0
  10. cdxml_toolkit/analysis/deterministic/mass_resolver.py +654 -0
  11. cdxml_toolkit/analysis/deterministic/multi_lcms_analyzer.py +1412 -0
  12. cdxml_toolkit/analysis/deterministic/procedure_writer.py +446 -0
  13. cdxml_toolkit/analysis/extract_nmr.py +47 -0
  14. cdxml_toolkit/analysis/format_procedure_entry.py +479 -0
  15. cdxml_toolkit/analysis/lcms_analyzer.py +1299 -0
  16. cdxml_toolkit/analysis/parse_analysis_file.py +134 -0
  17. cdxml_toolkit/cdxml_builder.py +920 -0
  18. cdxml_toolkit/cdxml_utils.py +342 -0
  19. cdxml_toolkit/chemdraw/__init__.py +5 -0
  20. cdxml_toolkit/chemdraw/_chemscript_server.py +562 -0
  21. cdxml_toolkit/chemdraw/cdx_converter.py +527 -0
  22. cdxml_toolkit/chemdraw/cdxml_to_image.py +262 -0
  23. cdxml_toolkit/chemdraw/cdxml_to_image_rdkit.py +296 -0
  24. cdxml_toolkit/chemdraw/chemscript_bridge.py +901 -0
  25. cdxml_toolkit/constants.py +304 -0
  26. cdxml_toolkit/coord_normalizer.py +438 -0
  27. cdxml_toolkit/deterministic_pipeline/__init__.py +6 -0
  28. cdxml_toolkit/deterministic_pipeline/legacy/__init__.py +5 -0
  29. cdxml_toolkit/deterministic_pipeline/legacy/eln_cdx_cleanup.py +509 -0
  30. cdxml_toolkit/deterministic_pipeline/legacy/eln_enrichment.py +1394 -0
  31. cdxml_toolkit/deterministic_pipeline/legacy/scheme_aligner.py +428 -0
  32. cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher.py +1337 -0
  33. cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher_v2.py +1340 -0
  34. cdxml_toolkit/deterministic_pipeline/scheme_reader_audit.py +931 -0
  35. cdxml_toolkit/deterministic_pipeline/scheme_reader_verify.py +1160 -0
  36. cdxml_toolkit/image/__init__.py +15 -0
  37. cdxml_toolkit/image/reaction_from_image.py +2103 -0
  38. cdxml_toolkit/image/structure_from_image.py +1711 -0
  39. cdxml_toolkit/layout/__init__.py +5 -0
  40. cdxml_toolkit/layout/alignment.py +1642 -0
  41. cdxml_toolkit/layout/reaction_cleanup.py +1002 -0
  42. cdxml_toolkit/layout/scheme_merger.py +2260 -0
  43. cdxml_toolkit/mcp_server/__init__.py +0 -0
  44. cdxml_toolkit/mcp_server/__main__.py +5 -0
  45. cdxml_toolkit/mcp_server/server.py +1567 -0
  46. cdxml_toolkit/naming/__init__.py +6 -0
  47. cdxml_toolkit/naming/aligned_namer.py +2342 -0
  48. cdxml_toolkit/naming/mol_builder.py +3722 -0
  49. cdxml_toolkit/naming/name_decomposer.py +2843 -0
  50. cdxml_toolkit/naming/reactions_datamol.json +2414 -0
  51. cdxml_toolkit/office/__init__.py +5 -0
  52. cdxml_toolkit/office/doc_from_template.py +722 -0
  53. cdxml_toolkit/office/ole_embedder.py +808 -0
  54. cdxml_toolkit/office/ole_extractor.py +272 -0
  55. cdxml_toolkit/perception/__init__.py +10 -0
  56. cdxml_toolkit/perception/compound_search.py +229 -0
  57. cdxml_toolkit/perception/eln_csv_parser.py +240 -0
  58. cdxml_toolkit/perception/rdf_parser.py +664 -0
  59. cdxml_toolkit/perception/reactant_heuristic.py +1045 -0
  60. cdxml_toolkit/perception/reaction_parser.py +2150 -0
  61. cdxml_toolkit/perception/scheme_reader.py +2948 -0
  62. cdxml_toolkit/perception/scheme_refine.py +1404 -0
  63. cdxml_toolkit/perception/scheme_segmenter.py +619 -0
  64. cdxml_toolkit/perception/spatial_assignment.py +1013 -0
  65. cdxml_toolkit/rdkit_utils.py +605 -0
  66. cdxml_toolkit/render/__init__.py +17 -0
  67. cdxml_toolkit/render/auto_layout.py +229 -0
  68. cdxml_toolkit/render/compact_parser.py +632 -0
  69. cdxml_toolkit/render/parser.py +706 -0
  70. cdxml_toolkit/render/render_scheme.py +267 -0
  71. cdxml_toolkit/render/renderer.py +2387 -0
  72. cdxml_toolkit/render/schema.py +90 -0
  73. cdxml_toolkit/render/scheme_maker.py +1043 -0
  74. cdxml_toolkit/render/scheme_yaml_writer.py +1487 -0
  75. cdxml_toolkit/resolve/__init__.py +13 -0
  76. cdxml_toolkit/resolve/cas_resolver.py +430 -0
  77. cdxml_toolkit/resolve/chemscanner_abbreviations.json +28813 -0
  78. cdxml_toolkit/resolve/condensed_formula.py +493 -0
  79. cdxml_toolkit/resolve/jre_manager.py +195 -0
  80. cdxml_toolkit/resolve/reagent_abbreviations.json +1046 -0
  81. cdxml_toolkit/resolve/reagent_db.py +285 -0
  82. cdxml_toolkit/resolve/superatom_data.json +2856 -0
  83. cdxml_toolkit/resolve/superatom_table.py +146 -0
  84. cdxml_toolkit/text_formatting.py +298 -0
  85. cdxml_toolkit-0.5.0.dist-info/METADATA +318 -0
  86. cdxml_toolkit-0.5.0.dist-info/RECORD +91 -0
  87. cdxml_toolkit-0.5.0.dist-info/WHEEL +5 -0
  88. cdxml_toolkit-0.5.0.dist-info/entry_points.txt +17 -0
  89. cdxml_toolkit-0.5.0.dist-info/licenses/LICENSE +21 -0
  90. cdxml_toolkit-0.5.0.dist-info/licenses/NOTICE.md +37 -0
  91. cdxml_toolkit-0.5.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,262 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ cdxml_to_image.py — Render a CDXML file to PNG or SVG via ChemDraw COM.
4
+
5
+ Requires ChemDraw to be installed (ChemDraw Professional 16+).
6
+ ChemDraw does NOT need to be open — it is launched as a hidden background
7
+ process and closed automatically after export.
8
+
9
+ Usage
10
+ -----
11
+ python cdxml_to_image.py input.cdxml # PNG alongside input file
12
+ python cdxml_to_image.py input.cdxml -o out.png # explicit output path
13
+ python cdxml_to_image.py input.cdxml -o out.svg # SVG output
14
+ python cdxml_to_image.py input.cdxml --dpi 150 # lower resolution PNG
15
+ python cdxml_to_image.py --batch f1.cdxml f2.cdxml # batch render (one COM session)
16
+ """
17
+
18
+ import argparse
19
+ import json
20
+ import sys
21
+ from pathlib import Path
22
+ from typing import Optional
23
+
24
+
25
+ # ---------------------------------------------------------------------------
26
+ # ChemDraw COM helpers
27
+ # ---------------------------------------------------------------------------
28
+
29
+ def _get_chemdraw():
30
+ """Get a ChemDraw COM instance, reusing an existing session if available.
31
+
32
+ Returns (app, launched) where launched is True if we started a new instance.
33
+ """
34
+ import win32com.client as win32
35
+ try:
36
+ app = win32.GetActiveObject("ChemDraw.Application")
37
+ launched = False
38
+ except Exception:
39
+ app = win32.Dispatch("ChemDraw.Application")
40
+ launched = True
41
+ return app, launched
42
+
43
+
44
+ # ---------------------------------------------------------------------------
45
+ # ChemDraw COM backend
46
+ # ---------------------------------------------------------------------------
47
+
48
+ def cdxml_to_image(
49
+ cdxml_path: str,
50
+ output_path: Optional[str] = None,
51
+ png_dpi: int = 300,
52
+ ) -> str:
53
+ """
54
+ Render a CDXML file to PNG or SVG using ChemDraw via COM automation.
55
+
56
+ ChemDraw infers the output format from the file extension (.png, .svg,
57
+ .emf, .cdxml, …). TransparentPNGs is forced off so the background is
58
+ solid white rather than a transparent checkerboard.
59
+
60
+ Parameters
61
+ ----------
62
+ cdxml_path : path to the source .cdxml file
63
+ output_path : destination file; if None, derived from cdxml_path as .png
64
+ png_dpi : resolution for PNG export (default 300 dpi)
65
+
66
+ Returns
67
+ -------
68
+ Absolute path to the written output file.
69
+ """
70
+ src = Path(cdxml_path)
71
+ if not src.exists():
72
+ raise FileNotFoundError(f"CDXML file not found: {cdxml_path}")
73
+
74
+ if output_path is None:
75
+ output_path = str(src.with_suffix(".png"))
76
+
77
+ cdxml_abs = str(src.resolve())
78
+ out_abs = str(Path(output_path).resolve())
79
+
80
+ app, launched = _get_chemdraw()
81
+ was_visible = app.Visible
82
+ app.Visible = False
83
+ doc = None
84
+ try:
85
+ prefs = app.Preferences
86
+ prefs.TransparentPNGs = False # solid white background
87
+ prefs.PNGResolution = png_dpi
88
+
89
+ doc = app.Documents.Open(cdxml_abs)
90
+ doc.SaveAs(out_abs)
91
+
92
+ return out_abs
93
+
94
+ finally:
95
+ try:
96
+ if doc is not None:
97
+ doc.Close(False)
98
+ except Exception:
99
+ pass
100
+ if launched:
101
+ try:
102
+ app.Quit()
103
+ except Exception:
104
+ pass
105
+ else:
106
+ app.Visible = was_visible
107
+
108
+
109
+ def batch_render(
110
+ cdxml_paths: list,
111
+ png_dpi: int = 300,
112
+ ) -> dict:
113
+ """Render multiple CDXML files to PNG in a single COM session.
114
+
115
+ Returns dict mapping input_path -> {"output": path, "error": None} on
116
+ success, or {"output": None, "error": message} on failure.
117
+ """
118
+ results = {}
119
+ if not cdxml_paths:
120
+ return results
121
+
122
+ app, launched = _get_chemdraw()
123
+ was_visible = app.Visible
124
+ app.Visible = False
125
+ try:
126
+ prefs = app.Preferences
127
+ prefs.TransparentPNGs = False
128
+ prefs.PNGResolution = png_dpi
129
+
130
+ for cdxml_path in cdxml_paths:
131
+ src = Path(cdxml_path)
132
+ out_path = str(src.with_suffix(".png"))
133
+ cdxml_abs = str(src.resolve())
134
+ out_abs = str(Path(out_path).resolve())
135
+ try:
136
+ doc = app.Documents.Open(cdxml_abs)
137
+ doc.SaveAs(out_abs)
138
+ doc.Close(False)
139
+ results[cdxml_path] = {"output": out_abs, "error": None}
140
+ except Exception as e:
141
+ results[cdxml_path] = {"output": None, "error": str(e)}
142
+ finally:
143
+ if launched:
144
+ try:
145
+ app.Quit()
146
+ except Exception:
147
+ pass
148
+ else:
149
+ app.Visible = was_visible
150
+
151
+ return results
152
+
153
+
154
+ # ---------------------------------------------------------------------------
155
+ # CLI
156
+ # ---------------------------------------------------------------------------
157
+
158
+ def _build_arg_parser() -> argparse.ArgumentParser:
159
+ p = argparse.ArgumentParser(
160
+ description="Render a CDXML file to PNG or SVG using ChemDraw.",
161
+ formatter_class=argparse.RawDescriptionHelpFormatter,
162
+ epilog=__doc__,
163
+ )
164
+ p.add_argument(
165
+ "input", nargs="?",
166
+ help="Input CDXML file",
167
+ )
168
+ p.add_argument(
169
+ "--output", "-o",
170
+ default=None,
171
+ help="Output file path (default: <input>.png). "
172
+ "Extension determines format: .png or .svg",
173
+ )
174
+ p.add_argument(
175
+ "--dpi",
176
+ type=int,
177
+ default=300,
178
+ help="PNG resolution in DPI (default: 300)",
179
+ )
180
+ p.add_argument(
181
+ "--batch",
182
+ nargs="+",
183
+ metavar="FILE",
184
+ help="Batch-render multiple CDXML files in one COM session",
185
+ )
186
+ p.add_argument(
187
+ "--json",
188
+ action="store_true",
189
+ help="Output result as JSON to stdout",
190
+ )
191
+ return p
192
+
193
+
194
+ def main(argv: Optional[list] = None) -> int:
195
+ parser = _build_arg_parser()
196
+ args = parser.parse_args(argv)
197
+
198
+ # --batch mode
199
+ if args.batch:
200
+ missing = [f for f in args.batch if not Path(f).exists()]
201
+ if missing:
202
+ for f in missing:
203
+ print(f"Error: file not found: {f}", file=sys.stderr)
204
+ return 1
205
+ results = batch_render(args.batch, png_dpi=args.dpi)
206
+ if args.json:
207
+ json_results = []
208
+ for inp, info in results.items():
209
+ entry = {"input": str(Path(inp).resolve())}
210
+ if info["error"]:
211
+ entry["error"] = info["error"]
212
+ else:
213
+ entry["output"] = info["output"]
214
+ json_results.append(entry)
215
+ print(json.dumps(json_results, indent=2))
216
+ else:
217
+ ok = sum(1 for v in results.values() if v["error"] is None)
218
+ fail = len(results) - ok
219
+ for inp, info in results.items():
220
+ if info["error"]:
221
+ print(f" FAIL: {inp} — {info['error']}")
222
+ else:
223
+ print(f" OK: {inp} -> {info['output']}")
224
+ print(f"Batch: {ok} rendered, {fail} failed")
225
+ return 1 if any(v["error"] for v in results.values()) else 0
226
+
227
+ if not args.input:
228
+ parser.error("the following arguments are required: input (or --batch)")
229
+
230
+ try:
231
+ out = cdxml_to_image(
232
+ args.input,
233
+ output_path=args.output,
234
+ png_dpi=args.dpi,
235
+ )
236
+ if args.json:
237
+ out_path = Path(out)
238
+ fmt = out_path.suffix.lstrip(".").lower()
239
+ try:
240
+ from PIL import Image
241
+ with Image.open(out) as img:
242
+ width, height = img.size
243
+ except Exception:
244
+ width, height = None, None
245
+ result = {
246
+ "input": str(Path(args.input).resolve()),
247
+ "output": out,
248
+ "format": fmt,
249
+ "width": width,
250
+ "height": height,
251
+ }
252
+ print(json.dumps(result, indent=2))
253
+ else:
254
+ print(out)
255
+ return 0
256
+ except Exception as exc:
257
+ print(f"ERROR: {exc}", file=sys.stderr)
258
+ return 1
259
+
260
+
261
+ if __name__ == "__main__":
262
+ sys.exit(main())
@@ -0,0 +1,296 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ cdxml_to_image_rdkit.py — BACKUP renderer for CDXML → PNG/SVG using RDKit.
4
+
5
+ ⚠️ USE cdxml_to_image.py (ChemDraw COM) INSTEAD WHENEVER POSSIBLE. ⚠️
6
+
7
+ This script exists only as a fallback for environments where ChemDraw is not
8
+ installed (e.g. a remote server, CI, or a colleague's machine).
9
+
10
+ Known limitations vs ChemDraw COM
11
+ ----------------------------------
12
+ - Single molecules only — reaction schemes with arrows are NOT supported.
13
+ - Bond geometry is recomputed by RDKit from scratch; the original ChemDraw
14
+ layout is discarded.
15
+ - Aromatic systems are re-perceived by RDKit, which may differ from the
16
+ Kekulé form stored in the CDXML.
17
+ - Superatom / nickname nodes (R-groups, OTs, Boc, etc.) are not rendered;
18
+ the script will abort with an error if any are present.
19
+ - Stereo wedges are not transferred from the CDXML.
20
+ - No reaction conditions text, no yield labels, no compound numbering.
21
+ - RDKit in this environment lacks Cairo support, so PNG output requires
22
+ cairosvg or wand to be installed; otherwise only SVG is produced.
23
+
24
+ Usage
25
+ -----
26
+ python cdxml_to_image_rdkit.py input.cdxml
27
+ python cdxml_to_image_rdkit.py input.cdxml -o out.svg
28
+ python cdxml_to_image_rdkit.py input.cdxml -o out.png
29
+ """
30
+
31
+ import argparse
32
+ import re
33
+ import sys
34
+ import xml.etree.ElementTree as ET
35
+ from pathlib import Path
36
+ from typing import Dict, List, Optional, Tuple
37
+
38
+
39
+ # ---------------------------------------------------------------------------
40
+ # CDXML → atom/bond parser (minimal — just what RDKit needs)
41
+ # ---------------------------------------------------------------------------
42
+
43
+ ELEMENT_SYMBOLS: Dict[int, str] = {
44
+ 1: "H", 5: "B", 6: "C", 7: "N", 8: "O",
45
+ 9: "F", 14: "Si", 15: "P", 16: "S", 17: "Cl",
46
+ 34: "Se", 35: "Br", 53: "I",
47
+ }
48
+
49
+ _BOND_ORDER_MAP = {"1": 1, "2": 2, "3": 3, "1.5": 4, "": 1}
50
+
51
+
52
+ def _parse_cdxml(path: str):
53
+ """
54
+ Parse a CDXML file and return (atoms, bonds).
55
+
56
+ atoms : dict id → {"symbol": str, "x": float, "y": float}
57
+ bonds : list of {"b": int, "e": int, "order": int}
58
+
59
+ Raises ValueError if reaction arrows or unsupported node types are found.
60
+ """
61
+ with open(path, "rb") as fh:
62
+ raw_bytes = fh.read()
63
+
64
+ raw_bytes = re.sub(rb'<objecttag\b[^/]*/>', b'', raw_bytes)
65
+ raw_bytes = re.sub(rb'<objecttag\b.*?</objecttag>', b'', raw_bytes,
66
+ flags=re.DOTALL)
67
+ raw = raw_bytes.decode("latin-1", errors="replace")
68
+ raw = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f\ufffe\uffff]', '', raw)
69
+ raw = re.sub(r'<!DOCTYPE[^>]*>', '', raw)
70
+
71
+ root = ET.fromstring(raw)
72
+
73
+ atoms: Dict[int, dict] = {}
74
+ bonds: List[dict] = []
75
+ has_arrows = False
76
+
77
+ def walk(elem):
78
+ nonlocal has_arrows
79
+ tag = elem.tag
80
+
81
+ if tag == "arrow":
82
+ has_arrows = True
83
+
84
+ elif tag == "n":
85
+ aid = int(elem.get("id", "0"))
86
+ px, py = elem.get("p", "0 0").split()[:2]
87
+
88
+ el_num = elem.get("Element")
89
+ symbol = ELEMENT_SYMBOLS.get(int(el_num), "?") if el_num else "C"
90
+
91
+ node_type = elem.get("NodeType", "")
92
+ if node_type in ("Fragment", "Nickname", "Unspecified"):
93
+ raise ValueError(
94
+ f"Superatom / nickname node (id={aid}) found — "
95
+ "RDKit cannot render abbreviated groups. "
96
+ "Use cdxml_to_image.py (ChemDraw COM) instead."
97
+ )
98
+
99
+ atoms[aid] = {"symbol": symbol, "x": float(px), "y": float(py)}
100
+
101
+ elif tag == "b":
102
+ order_str = elem.get("Order", "1")
103
+ bonds.append({
104
+ "b": int(elem.get("B", "0")),
105
+ "e": int(elem.get("E", "0")),
106
+ "order": _BOND_ORDER_MAP.get(order_str, 1),
107
+ })
108
+
109
+ for child in elem:
110
+ if tag == "n" and child.tag in ("n", "t"):
111
+ continue
112
+ walk(child)
113
+
114
+ walk(root)
115
+
116
+ if has_arrows:
117
+ raise ValueError(
118
+ "Reaction scheme detected (arrow elements present). "
119
+ "RDKit cannot render multi-fragment reaction schemes. "
120
+ "Use cdxml_to_image.py (ChemDraw COM) instead."
121
+ )
122
+
123
+ return atoms, bonds
124
+
125
+
126
+ # ---------------------------------------------------------------------------
127
+ # RDKit renderer
128
+ # ---------------------------------------------------------------------------
129
+
130
+ def _render(atoms: dict, bonds: list, output_path: str,
131
+ width: int, height: int) -> str:
132
+ try:
133
+ from rdkit import Chem
134
+ from rdkit.Chem.Draw import rdMolDraw2D
135
+ except ImportError:
136
+ raise RuntimeError("RDKit is not installed in this environment.")
137
+
138
+ rw = Chem.RWMol()
139
+ atom_idx: Dict[int, int] = {}
140
+
141
+ bond_type_map = {
142
+ 1: Chem.BondType.SINGLE,
143
+ 2: Chem.BondType.DOUBLE,
144
+ 3: Chem.BondType.TRIPLE,
145
+ 4: Chem.BondType.AROMATIC,
146
+ }
147
+
148
+ for aid, atom in atoms.items():
149
+ sym = atom["symbol"]
150
+ try:
151
+ rd_atom = Chem.Atom(sym)
152
+ except Exception:
153
+ raise ValueError(f"RDKit does not recognise element '{sym}'.")
154
+ atom_idx[aid] = rw.AddAtom(rd_atom)
155
+
156
+ for bond in bonds:
157
+ b = atom_idx.get(bond["b"])
158
+ e = atom_idx.get(bond["e"])
159
+ if b is None or e is None:
160
+ raise ValueError(f"Bond references unknown atom id: {bond}")
161
+ rw.AddBond(b, e, bond_type_map.get(bond["order"], Chem.BondType.SINGLE))
162
+
163
+ mol = rw.GetMol()
164
+ try:
165
+ Chem.SanitizeMol(mol)
166
+ except Exception as exc:
167
+ raise ValueError(f"RDKit sanitization failed: {exc}")
168
+
169
+ # Attach original 2D coordinates from CDXML
170
+ conf = Chem.Conformer(mol.GetNumAtoms())
171
+ for aid, rd_idx in atom_idx.items():
172
+ conf.SetAtomPosition(rd_idx, (atoms[aid]["x"], atoms[aid]["y"], 0.0))
173
+ mol.AddConformer(conf, assignId=True)
174
+
175
+ out = Path(output_path)
176
+ ext = out.suffix.lower()
177
+
178
+ if ext == ".svg":
179
+ drawer = rdMolDraw2D.MolDraw2DSVG(width, height)
180
+ drawer.drawOptions().addStereoAnnotation = False
181
+ drawer.DrawMolecule(mol)
182
+ drawer.FinishDrawing()
183
+ out.write_text(drawer.GetDrawingText(), encoding="utf-8")
184
+ return str(out)
185
+
186
+ # PNG — try Cairo, then MolToImage, then save SVG as fallback
187
+ if hasattr(rdMolDraw2D, "MolDraw2DCairo"):
188
+ drawer = rdMolDraw2D.MolDraw2DCairo(width, height)
189
+ drawer.drawOptions().addStereoAnnotation = False
190
+ drawer.DrawMolecule(mol)
191
+ drawer.FinishDrawing()
192
+ out.write_bytes(drawer.GetDrawingText())
193
+ return str(out)
194
+
195
+ # Try cairosvg via intermediate SVG
196
+ svg_drawer = rdMolDraw2D.MolDraw2DSVG(width, height)
197
+ svg_drawer.drawOptions().addStereoAnnotation = False
198
+ svg_drawer.DrawMolecule(mol)
199
+ svg_drawer.FinishDrawing()
200
+ svg_text = svg_drawer.GetDrawingText()
201
+
202
+ try:
203
+ import cairosvg
204
+ cairosvg.svg2png(bytestring=svg_text.encode(), write_to=str(out))
205
+ return str(out)
206
+ except ImportError:
207
+ pass
208
+
209
+ # Final fallback: write SVG and warn
210
+ svg_out = out.with_suffix(".svg")
211
+ svg_out.write_text(svg_text, encoding="utf-8")
212
+ print(
213
+ f"[warning] No PNG renderer available (RDKit Cairo / cairosvg not installed).\n"
214
+ f" Saved as SVG instead: {svg_out}\n"
215
+ f" Use cdxml_to_image.py (ChemDraw COM) for proper PNG output.",
216
+ file=sys.stderr,
217
+ )
218
+ return str(svg_out)
219
+
220
+
221
+ # ---------------------------------------------------------------------------
222
+ # Public entry point
223
+ # ---------------------------------------------------------------------------
224
+
225
+ def cdxml_to_image_rdkit(
226
+ cdxml_path: str,
227
+ output_path: Optional[str] = None,
228
+ width: int = 600,
229
+ height: int = 400,
230
+ ) -> str:
231
+ """
232
+ Render a single-molecule CDXML to PNG or SVG using RDKit.
233
+
234
+ PREFER cdxml_to_image.py (ChemDraw COM) over this function.
235
+ See module docstring for limitations.
236
+ """
237
+ src = Path(cdxml_path)
238
+ if not src.exists():
239
+ raise FileNotFoundError(f"CDXML file not found: {cdxml_path}")
240
+
241
+ if output_path is None:
242
+ output_path = str(src.with_suffix(".png"))
243
+
244
+ atoms, bonds = _parse_cdxml(str(src))
245
+
246
+ if not atoms:
247
+ raise ValueError("No atoms found in CDXML.")
248
+
249
+ return _render(atoms, bonds, output_path, width, height)
250
+
251
+
252
+ # ---------------------------------------------------------------------------
253
+ # CLI
254
+ # ---------------------------------------------------------------------------
255
+
256
+ def main(argv: Optional[list] = None) -> int:
257
+ p = argparse.ArgumentParser(
258
+ description=(
259
+ "⚠ BACKUP ONLY — render CDXML to PNG/SVG using RDKit.\n"
260
+ " Use cdxml_to_image.py (ChemDraw COM) whenever possible."
261
+ ),
262
+ formatter_class=argparse.RawDescriptionHelpFormatter,
263
+ epilog=__doc__,
264
+ )
265
+ p.add_argument("input", help="Input CDXML file (single molecule only)")
266
+ p.add_argument(
267
+ "--output", "-o",
268
+ default=None,
269
+ help="Output file (default: <input>.png). Extension sets format.",
270
+ )
271
+ p.add_argument("--width", type=int, default=600, help="Canvas width px (default 600)")
272
+ p.add_argument("--height", type=int, default=400, help="Canvas height px (default 400)")
273
+ args = p.parse_args(argv)
274
+
275
+ print(
276
+ "⚠ cdxml_to_image_rdkit.py: backup renderer — output quality is limited.\n"
277
+ " Use cdxml_to_image.py (ChemDraw COM) for production use.",
278
+ file=sys.stderr,
279
+ )
280
+
281
+ try:
282
+ out = cdxml_to_image_rdkit(
283
+ args.input,
284
+ output_path=args.output,
285
+ width=args.width,
286
+ height=args.height,
287
+ )
288
+ print(out)
289
+ return 0
290
+ except Exception as exc:
291
+ print(f"ERROR: {exc}", file=sys.stderr)
292
+ return 1
293
+
294
+
295
+ if __name__ == "__main__":
296
+ sys.exit(main())