cdxml-toolkit 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cdxml_toolkit/__init__.py +18 -0
- cdxml_toolkit/_jre/__init__.py +2 -0
- cdxml_toolkit/_jre/temurin-21-jre-win-x64.zip +0 -0
- cdxml_toolkit/analysis/__init__.py +35 -0
- cdxml_toolkit/analysis/deterministic/__init__.py +12 -0
- cdxml_toolkit/analysis/deterministic/discover_experiment_files.py +413 -0
- cdxml_toolkit/analysis/deterministic/lab_book_formatter.py +701 -0
- cdxml_toolkit/analysis/deterministic/lcms_file_categorizer.py +928 -0
- cdxml_toolkit/analysis/deterministic/lcms_identifier.py +598 -0
- cdxml_toolkit/analysis/deterministic/mass_resolver.py +654 -0
- cdxml_toolkit/analysis/deterministic/multi_lcms_analyzer.py +1412 -0
- cdxml_toolkit/analysis/deterministic/procedure_writer.py +446 -0
- cdxml_toolkit/analysis/extract_nmr.py +47 -0
- cdxml_toolkit/analysis/format_procedure_entry.py +479 -0
- cdxml_toolkit/analysis/lcms_analyzer.py +1299 -0
- cdxml_toolkit/analysis/parse_analysis_file.py +134 -0
- cdxml_toolkit/cdxml_builder.py +920 -0
- cdxml_toolkit/cdxml_utils.py +342 -0
- cdxml_toolkit/chemdraw/__init__.py +5 -0
- cdxml_toolkit/chemdraw/_chemscript_server.py +562 -0
- cdxml_toolkit/chemdraw/cdx_converter.py +527 -0
- cdxml_toolkit/chemdraw/cdxml_to_image.py +262 -0
- cdxml_toolkit/chemdraw/cdxml_to_image_rdkit.py +296 -0
- cdxml_toolkit/chemdraw/chemscript_bridge.py +901 -0
- cdxml_toolkit/constants.py +304 -0
- cdxml_toolkit/coord_normalizer.py +438 -0
- cdxml_toolkit/deterministic_pipeline/__init__.py +6 -0
- cdxml_toolkit/deterministic_pipeline/legacy/__init__.py +5 -0
- cdxml_toolkit/deterministic_pipeline/legacy/eln_cdx_cleanup.py +509 -0
- cdxml_toolkit/deterministic_pipeline/legacy/eln_enrichment.py +1394 -0
- cdxml_toolkit/deterministic_pipeline/legacy/scheme_aligner.py +428 -0
- cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher.py +1337 -0
- cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher_v2.py +1340 -0
- cdxml_toolkit/deterministic_pipeline/scheme_reader_audit.py +931 -0
- cdxml_toolkit/deterministic_pipeline/scheme_reader_verify.py +1160 -0
- cdxml_toolkit/image/__init__.py +15 -0
- cdxml_toolkit/image/reaction_from_image.py +2103 -0
- cdxml_toolkit/image/structure_from_image.py +1711 -0
- cdxml_toolkit/layout/__init__.py +5 -0
- cdxml_toolkit/layout/alignment.py +1642 -0
- cdxml_toolkit/layout/reaction_cleanup.py +1002 -0
- cdxml_toolkit/layout/scheme_merger.py +2260 -0
- cdxml_toolkit/mcp_server/__init__.py +0 -0
- cdxml_toolkit/mcp_server/__main__.py +5 -0
- cdxml_toolkit/mcp_server/server.py +1567 -0
- cdxml_toolkit/naming/__init__.py +6 -0
- cdxml_toolkit/naming/aligned_namer.py +2342 -0
- cdxml_toolkit/naming/mol_builder.py +3722 -0
- cdxml_toolkit/naming/name_decomposer.py +2843 -0
- cdxml_toolkit/naming/reactions_datamol.json +2414 -0
- cdxml_toolkit/office/__init__.py +5 -0
- cdxml_toolkit/office/doc_from_template.py +722 -0
- cdxml_toolkit/office/ole_embedder.py +808 -0
- cdxml_toolkit/office/ole_extractor.py +272 -0
- cdxml_toolkit/perception/__init__.py +10 -0
- cdxml_toolkit/perception/compound_search.py +229 -0
- cdxml_toolkit/perception/eln_csv_parser.py +240 -0
- cdxml_toolkit/perception/rdf_parser.py +664 -0
- cdxml_toolkit/perception/reactant_heuristic.py +1045 -0
- cdxml_toolkit/perception/reaction_parser.py +2150 -0
- cdxml_toolkit/perception/scheme_reader.py +2948 -0
- cdxml_toolkit/perception/scheme_refine.py +1404 -0
- cdxml_toolkit/perception/scheme_segmenter.py +619 -0
- cdxml_toolkit/perception/spatial_assignment.py +1013 -0
- cdxml_toolkit/rdkit_utils.py +605 -0
- cdxml_toolkit/render/__init__.py +17 -0
- cdxml_toolkit/render/auto_layout.py +229 -0
- cdxml_toolkit/render/compact_parser.py +632 -0
- cdxml_toolkit/render/parser.py +706 -0
- cdxml_toolkit/render/render_scheme.py +267 -0
- cdxml_toolkit/render/renderer.py +2387 -0
- cdxml_toolkit/render/schema.py +90 -0
- cdxml_toolkit/render/scheme_maker.py +1043 -0
- cdxml_toolkit/render/scheme_yaml_writer.py +1487 -0
- cdxml_toolkit/resolve/__init__.py +13 -0
- cdxml_toolkit/resolve/cas_resolver.py +430 -0
- cdxml_toolkit/resolve/chemscanner_abbreviations.json +28813 -0
- cdxml_toolkit/resolve/condensed_formula.py +493 -0
- cdxml_toolkit/resolve/jre_manager.py +195 -0
- cdxml_toolkit/resolve/reagent_abbreviations.json +1046 -0
- cdxml_toolkit/resolve/reagent_db.py +285 -0
- cdxml_toolkit/resolve/superatom_data.json +2856 -0
- cdxml_toolkit/resolve/superatom_table.py +146 -0
- cdxml_toolkit/text_formatting.py +298 -0
- cdxml_toolkit-0.5.0.dist-info/METADATA +318 -0
- cdxml_toolkit-0.5.0.dist-info/RECORD +91 -0
- cdxml_toolkit-0.5.0.dist-info/WHEEL +5 -0
- cdxml_toolkit-0.5.0.dist-info/entry_points.txt +17 -0
- cdxml_toolkit-0.5.0.dist-info/licenses/LICENSE +21 -0
- cdxml_toolkit-0.5.0.dist-info/licenses/NOTICE.md +37 -0
- cdxml_toolkit-0.5.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,920 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
cdxml_builder.py — Build valid ChemDraw 16 CDXML from structured atom/bond data.
|
|
4
|
+
|
|
5
|
+
Produces CDXML that opens correctly in ChemDraw 16 using ACS Document 1996 style:
|
|
6
|
+
BondLength=14.40, ChainAngle=120, Arial 10 pt captions / 9 pt labels.
|
|
7
|
+
|
|
8
|
+
Modes
|
|
9
|
+
-----
|
|
10
|
+
Single molecule
|
|
11
|
+
python cdxml_builder.py --input molecule.json --output molecule.cdxml
|
|
12
|
+
|
|
13
|
+
Reaction scheme
|
|
14
|
+
python cdxml_builder.py --input reaction.json --mode reaction --output scheme.cdxml
|
|
15
|
+
|
|
16
|
+
Input JSON — single molecule
|
|
17
|
+
{
|
|
18
|
+
"atoms": [
|
|
19
|
+
{"index": 1, "symbol": "C", "x": 150.0, "y": 300.0},
|
|
20
|
+
{"index": 2, "symbol": "N", "x": 164.4, "y": 308.2, "num_hydrogens": 0},
|
|
21
|
+
...
|
|
22
|
+
],
|
|
23
|
+
"bonds": [
|
|
24
|
+
{"index": 1, "order": 1, "atom1": 1, "atom2": 2},
|
|
25
|
+
{"index": 2, "order": 2, "atom1": 2, "atom2": 3},
|
|
26
|
+
...
|
|
27
|
+
]
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
Input JSON — reaction
|
|
31
|
+
{
|
|
32
|
+
"reactants": [ <molecule>, ... ],
|
|
33
|
+
"products": [ <molecule>, ... ],
|
|
34
|
+
"conditions": {
|
|
35
|
+
"above": ["Pd2dba3 (5 mol%)", "BINAP (10 mol%)"],
|
|
36
|
+
"below": ["Cs2CO3 (2 eq.)", "dioxane", "100 °C, 24 h"]
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
Coordinates must already be in CDXML points (run coord_normalizer.py first).
|
|
41
|
+
|
|
42
|
+
Atom dict keys
|
|
43
|
+
index int atom number (1-based, must be unique)
|
|
44
|
+
symbol str element symbol ("C", "N", "Br", …)
|
|
45
|
+
x, y float position in CDXML points
|
|
46
|
+
num_hydrogens int explicit H count (omit or None for C to get implicit)
|
|
47
|
+
cfg int stereo flag (1=wedge up, 6=wedge down, 4=either)
|
|
48
|
+
charge int formal charge (0 = omit)
|
|
49
|
+
|
|
50
|
+
Bond dict keys
|
|
51
|
+
index int bond number (1-based, must be unique)
|
|
52
|
+
order int 1=single, 2=double, 3=triple, 4=aromatic
|
|
53
|
+
atom1, atom2 int atom indices
|
|
54
|
+
cfg int stereo: 1=up, 4=either, 6=down (wedge/dash)
|
|
55
|
+
double_pos str "Right" | "Left" (for double bonds in rings)
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
import argparse
|
|
59
|
+
import json
|
|
60
|
+
import math
|
|
61
|
+
import sys
|
|
62
|
+
from copy import deepcopy
|
|
63
|
+
from typing import Dict, List, Optional, Tuple
|
|
64
|
+
from xml.sax.saxutils import escape as xml_escape
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
# ---------------------------------------------------------------------------
|
|
68
|
+
# Constants — ACS Document 1996 (from shared constants.py)
|
|
69
|
+
# ---------------------------------------------------------------------------
|
|
70
|
+
|
|
71
|
+
from .constants import (
|
|
72
|
+
ACS_BOND_LENGTH_STR as ACS_BOND_LENGTH,
|
|
73
|
+
ACS_CHAIN_ANGLE_STR as ACS_CHAIN_ANGLE,
|
|
74
|
+
ACS_LABEL_FONT, ACS_LABEL_SIZE, ACS_LABEL_FACE,
|
|
75
|
+
ACS_CAPTION_SIZE, ACS_CAPTION_FACE,
|
|
76
|
+
ACS_LINE_WIDTH, ACS_BOLD_WIDTH, ACS_BOND_SPACING,
|
|
77
|
+
ACS_HASH_SPACING, ACS_MARGIN_WIDTH,
|
|
78
|
+
CDXML_HEADER as _CDXML_HEADER,
|
|
79
|
+
CDXML_FOOTER as _CDXML_FOOTER,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
# Element numbers for heteroatoms we care about
|
|
83
|
+
ELEMENT_NUMBERS: Dict[str, int] = {
|
|
84
|
+
"H": 1, "B": 5, "C": 6, "N": 7, "O": 8,
|
|
85
|
+
"F": 9, "Si": 14, "P": 15, "S": 16, "Cl": 17,
|
|
86
|
+
"Se": 34, "Br": 35, "I": 53, "Cs": 55,
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
# Two-character element symbols (need special handling in label alignment)
|
|
90
|
+
WIDE_SYMBOLS = {"Br", "Cl", "Si", "Se", "Cs"}
|
|
91
|
+
|
|
92
|
+
# Bond order → CDXML Order attribute (1 is default so we can omit it)
|
|
93
|
+
BOND_ORDER_ATTR: Dict[int, Optional[str]] = {
|
|
94
|
+
1: None, # single — omit Order attribute
|
|
95
|
+
2: "2",
|
|
96
|
+
3: "3",
|
|
97
|
+
4: "1.5", # aromatic rendered as 1.5 in ChemDraw
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
# Stereo bond config → ChemDraw BS / Display attribute
|
|
101
|
+
BOND_STEREO_ATTR: Dict[int, str] = {
|
|
102
|
+
1: "WedgeBegin", # solid wedge up
|
|
103
|
+
4: "WedgeBegin", # either / unknown (use same, ChemDraw re-interprets)
|
|
104
|
+
6: "WedgedHashBegin", # dashed wedge
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
# ---------------------------------------------------------------------------
|
|
109
|
+
# ID counter
|
|
110
|
+
# ---------------------------------------------------------------------------
|
|
111
|
+
|
|
112
|
+
class _IDGen:
|
|
113
|
+
"""Simple incrementing integer ID generator."""
|
|
114
|
+
def __init__(self, start: int = 1000):
|
|
115
|
+
self._n = start
|
|
116
|
+
|
|
117
|
+
def next(self) -> int:
|
|
118
|
+
v = self._n
|
|
119
|
+
self._n += 1
|
|
120
|
+
return v
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
# ---------------------------------------------------------------------------
|
|
124
|
+
# Label position helper
|
|
125
|
+
# ---------------------------------------------------------------------------
|
|
126
|
+
|
|
127
|
+
def _label_offset(symbol: str) -> Tuple[float, float]:
|
|
128
|
+
"""
|
|
129
|
+
Return (dx, dy) offset from atom position to the top-left of the <t> label.
|
|
130
|
+
Approximates ChemDraw's own offsets (3.25 pt horizontal, 3.5 pt vertical).
|
|
131
|
+
"""
|
|
132
|
+
# ChemDraw positions labels slightly to the left and above the atom centre.
|
|
133
|
+
# Wide symbols shift further left.
|
|
134
|
+
char_w = 7.0 if symbol in WIDE_SYMBOLS else 3.5
|
|
135
|
+
return -char_w + 0.75, -7.5 # dx, dy from atom p to label top-left
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _label_bbox(x: float, y: float, symbol: str) -> str:
|
|
139
|
+
"""Return BoundingBox string for a heteroatom label."""
|
|
140
|
+
char_w = 7.0 if symbol in WIDE_SYMBOLS else 6.0
|
|
141
|
+
# p is the bottom of the label in ChemDraw convention
|
|
142
|
+
lx = x - char_w / 2.0
|
|
143
|
+
ty = y - 7.52 # top
|
|
144
|
+
by = y # bottom ≈ atom y
|
|
145
|
+
rx = lx + char_w
|
|
146
|
+
return f"{lx:.2f} {ty:.2f} {rx:.2f} {by:.2f}"
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
# ---------------------------------------------------------------------------
|
|
150
|
+
# Inner fragment builder for abbreviation nodes
|
|
151
|
+
# ---------------------------------------------------------------------------
|
|
152
|
+
|
|
153
|
+
def _build_abbrev_inner_fragment(
|
|
154
|
+
label_smiles: str,
|
|
155
|
+
anchor_x: float,
|
|
156
|
+
anchor_y: float,
|
|
157
|
+
ids: _IDGen,
|
|
158
|
+
) -> str:
|
|
159
|
+
"""Build inner ``<fragment>`` XML for a ``NodeType="Fragment"`` abbreviation.
|
|
160
|
+
|
|
161
|
+
Generates 2D coords from *label_smiles*, normalises to ACS bond length,
|
|
162
|
+
positions near (*anchor_x*, *anchor_y*), and adds an
|
|
163
|
+
``ExternalConnectionPoint`` on the first atom (the attachment point).
|
|
164
|
+
|
|
165
|
+
Returns the ``<fragment>...</fragment>`` XML string, or ``""`` on failure.
|
|
166
|
+
"""
|
|
167
|
+
try:
|
|
168
|
+
from .image.structure_from_image import smiles_to_coords
|
|
169
|
+
from .coord_normalizer import normalize_coords
|
|
170
|
+
except ImportError:
|
|
171
|
+
return ""
|
|
172
|
+
|
|
173
|
+
mol_data = smiles_to_coords(label_smiles, offset_index=0)
|
|
174
|
+
if not mol_data or not mol_data.get("atoms"):
|
|
175
|
+
return ""
|
|
176
|
+
|
|
177
|
+
atoms, bonds = normalize_coords(
|
|
178
|
+
mol_data["atoms"], mol_data["bonds"],
|
|
179
|
+
center_x=anchor_x, center_y=anchor_y,
|
|
180
|
+
flip_y=True,
|
|
181
|
+
)
|
|
182
|
+
if not atoms:
|
|
183
|
+
return ""
|
|
184
|
+
|
|
185
|
+
frag_id = ids.next()
|
|
186
|
+
lines: List[str] = [f'<fragment id="{frag_id}">']
|
|
187
|
+
|
|
188
|
+
inner_map: Dict[int, int] = {}
|
|
189
|
+
for a in atoms:
|
|
190
|
+
aid = ids.next()
|
|
191
|
+
inner_map[a["index"]] = aid
|
|
192
|
+
sym = a.get("symbol", "C")
|
|
193
|
+
ax, ay = a["x"], a["y"]
|
|
194
|
+
z = ids.next()
|
|
195
|
+
attrs = [f'id="{aid}"', f'p="{ax:.2f} {ay:.2f}"', f'Z="{z}"']
|
|
196
|
+
if sym != "C":
|
|
197
|
+
el_num = ELEMENT_NUMBERS.get(sym, 0)
|
|
198
|
+
if el_num:
|
|
199
|
+
attrs.append(f'Element="{el_num}"')
|
|
200
|
+
nh = a.get("num_hydrogens", 0)
|
|
201
|
+
attrs.append(f'NumHydrogens="{nh}"')
|
|
202
|
+
attrs.append('NeedsClean="yes"')
|
|
203
|
+
lines.append(f'<n {" ".join(attrs)}/>')
|
|
204
|
+
|
|
205
|
+
for b in bonds:
|
|
206
|
+
bid = ids.next()
|
|
207
|
+
z = ids.next()
|
|
208
|
+
a1 = inner_map.get(b["atom1"], 0)
|
|
209
|
+
a2 = inner_map.get(b["atom2"], 0)
|
|
210
|
+
order = b.get("order", 1)
|
|
211
|
+
attrs = [f'id="{bid}"', f'Z="{z}"', f'B="{a1}"', f'E="{a2}"']
|
|
212
|
+
order_attr = BOND_ORDER_ATTR.get(order)
|
|
213
|
+
if order_attr:
|
|
214
|
+
attrs.append(f'Order="{order_attr}"')
|
|
215
|
+
lines.append(f'<b {" ".join(attrs)}/>')
|
|
216
|
+
|
|
217
|
+
# ExternalConnectionPoint — bonded to first atom (attachment point)
|
|
218
|
+
ecp_id = ids.next()
|
|
219
|
+
ecp_z = ids.next()
|
|
220
|
+
first_atom = atoms[0]
|
|
221
|
+
ecp_x = first_atom["x"] - 14.4
|
|
222
|
+
ecp_y = first_atom["y"]
|
|
223
|
+
first_inner_id = inner_map.get(first_atom["index"], 0)
|
|
224
|
+
lines.append(
|
|
225
|
+
f'<n id="{ecp_id}" NodeType="ExternalConnectionPoint" '
|
|
226
|
+
f'p="{ecp_x:.2f} {ecp_y:.2f}" Z="{ecp_z}" '
|
|
227
|
+
f'ExternalConnectionNum="1"/>'
|
|
228
|
+
)
|
|
229
|
+
ecp_bond_id = ids.next()
|
|
230
|
+
ecp_bond_z = ids.next()
|
|
231
|
+
lines.append(
|
|
232
|
+
f'<b id="{ecp_bond_id}" Z="{ecp_bond_z}" '
|
|
233
|
+
f'B="{ecp_id}" E="{first_inner_id}"/>'
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
lines.append('</fragment>')
|
|
237
|
+
return "\n".join(lines)
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
# ---------------------------------------------------------------------------
|
|
241
|
+
# Fragment (molecule) builder
|
|
242
|
+
# ---------------------------------------------------------------------------
|
|
243
|
+
|
|
244
|
+
def _build_fragment(
|
|
245
|
+
atoms: List[Dict],
|
|
246
|
+
bonds: List[Dict],
|
|
247
|
+
ids: _IDGen,
|
|
248
|
+
atom_id_map: Optional[Dict[int, int]] = None, # out-param: atom index → xml id
|
|
249
|
+
) -> Tuple[str, Dict[int, int], int]:
|
|
250
|
+
"""
|
|
251
|
+
Build a <fragment> XML string.
|
|
252
|
+
|
|
253
|
+
Supports three atom types via optional dict keys:
|
|
254
|
+
|
|
255
|
+
* **Normal atoms** — standard CDXML atoms (carbon or heteroatom with label).
|
|
256
|
+
* **Abbreviation atoms** (``is_abbreviation=True``) — rendered as
|
|
257
|
+
``NodeType="Fragment"`` with an inner ``<fragment>`` and a text label.
|
|
258
|
+
Requires ``abbrev_label``; ``abbrev_smiles`` used for inner fragment.
|
|
259
|
+
* **Generic group atoms** (``is_generic=True``) — rendered as
|
|
260
|
+
``NodeType="GenericNickname"`` (or other *node_type*) with a text label.
|
|
261
|
+
Requires ``generic_label``.
|
|
262
|
+
|
|
263
|
+
Returns (xml_string, atom_id_map, fragment_xml_id).
|
|
264
|
+
atom_id_map maps caller's atom index → the XML element id used.
|
|
265
|
+
"""
|
|
266
|
+
if atom_id_map is None:
|
|
267
|
+
atom_id_map = {}
|
|
268
|
+
|
|
269
|
+
frag_id = ids.next()
|
|
270
|
+
|
|
271
|
+
# Compute bounding box
|
|
272
|
+
xs = [a["x"] for a in atoms]
|
|
273
|
+
ys = [a["y"] for a in atoms]
|
|
274
|
+
bb_x1, bb_y1 = min(xs), min(ys)
|
|
275
|
+
bb_x2, bb_y2 = max(xs), max(ys)
|
|
276
|
+
|
|
277
|
+
lines: List[str] = []
|
|
278
|
+
lines.append(
|
|
279
|
+
f'<fragment id="{frag_id}" '
|
|
280
|
+
f'BoundingBox="{bb_x1:.2f} {bb_y1:.2f} {bb_x2:.2f} {bb_y2:.2f}" '
|
|
281
|
+
f'Z="{ids.next()}">'
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
# Atoms
|
|
285
|
+
for a in atoms:
|
|
286
|
+
atom_xml_id = ids.next()
|
|
287
|
+
atom_id_map[a["index"]] = atom_xml_id
|
|
288
|
+
|
|
289
|
+
ax, ay = a["x"], a["y"]
|
|
290
|
+
z = ids.next()
|
|
291
|
+
|
|
292
|
+
# ---- Abbreviation group (NodeType="Fragment") ----
|
|
293
|
+
if a.get("is_abbreviation"):
|
|
294
|
+
label = a.get("abbrev_label", "?")
|
|
295
|
+
label_smiles = a.get("abbrev_smiles")
|
|
296
|
+
|
|
297
|
+
lines.append(
|
|
298
|
+
f'<n id="{atom_xml_id}" NodeType="Fragment" '
|
|
299
|
+
f'p="{ax:.2f} {ay:.2f}" Z="{z}" AS="N">'
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
# Inner fragment from SMILES (optional — ChemDraw needs it)
|
|
303
|
+
if label_smiles:
|
|
304
|
+
inner_xml = _build_abbrev_inner_fragment(
|
|
305
|
+
label_smiles, ax, ay, ids)
|
|
306
|
+
if inner_xml:
|
|
307
|
+
lines.append(inner_xml)
|
|
308
|
+
|
|
309
|
+
# Label text
|
|
310
|
+
lx = ax - 3.25
|
|
311
|
+
ly = ay + 3.52
|
|
312
|
+
# Estimate bbox based on label length
|
|
313
|
+
label_w = max(len(label) * 5.5, 6.0)
|
|
314
|
+
lbx1 = ax - label_w / 2.0
|
|
315
|
+
lby1 = ay - 7.52
|
|
316
|
+
lbx2 = ax + label_w / 2.0
|
|
317
|
+
lby2 = ay
|
|
318
|
+
tid = ids.next()
|
|
319
|
+
lines.append(
|
|
320
|
+
f'<t id="{tid}" p="{lx:.2f} {ly:.2f}" '
|
|
321
|
+
f'BoundingBox="{lbx1:.2f} {lby1:.2f} {lbx2:.2f} {lby2:.2f}" '
|
|
322
|
+
f'LabelJustification="Left">'
|
|
323
|
+
)
|
|
324
|
+
lines.append(
|
|
325
|
+
f'<s font="{ACS_LABEL_FONT}" size="{ACS_LABEL_SIZE}" '
|
|
326
|
+
f'color="0" face="{ACS_LABEL_FACE}">'
|
|
327
|
+
f'{xml_escape(label)}</s>'
|
|
328
|
+
)
|
|
329
|
+
lines.append('</t>')
|
|
330
|
+
lines.append('</n>')
|
|
331
|
+
continue
|
|
332
|
+
|
|
333
|
+
# ---- Generic variable group (R, X, Ar, R1, …) ----
|
|
334
|
+
if a.get("is_generic"):
|
|
335
|
+
label = a.get("generic_label", "R")
|
|
336
|
+
node_type = a.get("node_type", "GenericNickname")
|
|
337
|
+
|
|
338
|
+
attrs = [
|
|
339
|
+
f'id="{atom_xml_id}"',
|
|
340
|
+
f'NodeType="{node_type}"',
|
|
341
|
+
f'p="{ax:.2f} {ay:.2f}"',
|
|
342
|
+
f'Z="{z}"',
|
|
343
|
+
f'AS="N"',
|
|
344
|
+
]
|
|
345
|
+
if node_type == "GenericNickname":
|
|
346
|
+
attrs.append(f'GenericNickname="{xml_escape(label)}"')
|
|
347
|
+
|
|
348
|
+
lines.append(f'<n {" ".join(attrs)}>')
|
|
349
|
+
|
|
350
|
+
lx = ax - 3.25
|
|
351
|
+
ly = ay + 3.52
|
|
352
|
+
label_w = max(len(label) * 5.5, 6.0)
|
|
353
|
+
lbx1 = ax - label_w / 2.0
|
|
354
|
+
lby1 = ay - 7.52
|
|
355
|
+
lbx2 = ax + label_w / 2.0
|
|
356
|
+
lby2 = ay
|
|
357
|
+
tid = ids.next()
|
|
358
|
+
lines.append(
|
|
359
|
+
f'<t id="{tid}" p="{lx:.2f} {ly:.2f}" '
|
|
360
|
+
f'BoundingBox="{lbx1:.2f} {lby1:.2f} {lbx2:.2f} {lby2:.2f}" '
|
|
361
|
+
f'LabelJustification="Left">'
|
|
362
|
+
)
|
|
363
|
+
lines.append(
|
|
364
|
+
f'<s font="{ACS_LABEL_FONT}" size="{ACS_LABEL_SIZE}" '
|
|
365
|
+
f'color="0" face="{ACS_LABEL_FACE}">'
|
|
366
|
+
f'{xml_escape(label)}</s>'
|
|
367
|
+
)
|
|
368
|
+
lines.append('</t>')
|
|
369
|
+
lines.append('</n>')
|
|
370
|
+
continue
|
|
371
|
+
|
|
372
|
+
# ---- Normal atom ----
|
|
373
|
+
sym = a.get("symbol", "C")
|
|
374
|
+
|
|
375
|
+
# Base attributes
|
|
376
|
+
attrs = [f'id="{atom_xml_id}"', f'p="{ax:.2f} {ay:.2f}"', f'Z="{z}"']
|
|
377
|
+
|
|
378
|
+
is_carbon = (sym == "C")
|
|
379
|
+
|
|
380
|
+
# Charge
|
|
381
|
+
charge = a.get("charge", 0)
|
|
382
|
+
|
|
383
|
+
if not is_carbon:
|
|
384
|
+
el_num = ELEMENT_NUMBERS.get(sym, 0)
|
|
385
|
+
if el_num:
|
|
386
|
+
attrs.append(f'Element="{el_num}"')
|
|
387
|
+
nh = a.get("num_hydrogens", 0)
|
|
388
|
+
attrs.append(f'NumHydrogens="{nh}"')
|
|
389
|
+
attrs.append('NeedsClean="yes"')
|
|
390
|
+
attrs.append('AS="N"')
|
|
391
|
+
|
|
392
|
+
if charge:
|
|
393
|
+
attrs.append(f'Charge="{charge}"')
|
|
394
|
+
|
|
395
|
+
# Stereo cfg (atom)
|
|
396
|
+
cfg = a.get("cfg", 0)
|
|
397
|
+
if cfg:
|
|
398
|
+
attrs.append(f'Stereo="{cfg}"')
|
|
399
|
+
|
|
400
|
+
if is_carbon and not charge:
|
|
401
|
+
# Carbon: no Element, no label, no NumHydrogens
|
|
402
|
+
lines.append(f'<n {" ".join(attrs)}/>')
|
|
403
|
+
else:
|
|
404
|
+
# Heteroatom: needs <t> child label
|
|
405
|
+
# Label position: offset from atom centre
|
|
406
|
+
lx = ax - 3.25
|
|
407
|
+
ly = ay + 3.52
|
|
408
|
+
bbox = _label_bbox(ax, ay, sym)
|
|
409
|
+
|
|
410
|
+
# Build label text including hydrogens: "N" → "NH", "O" → "OH"
|
|
411
|
+
nh = a.get("num_hydrogens", 0)
|
|
412
|
+
if nh == 1:
|
|
413
|
+
label_text = xml_escape(sym) + "H"
|
|
414
|
+
elif nh > 1:
|
|
415
|
+
label_text = xml_escape(sym) + "H" + str(nh)
|
|
416
|
+
else:
|
|
417
|
+
label_text = xml_escape(sym)
|
|
418
|
+
label_align = ""
|
|
419
|
+
if sym in WIDE_SYMBOLS:
|
|
420
|
+
label_align = ' LabelAlignment="Left"'
|
|
421
|
+
|
|
422
|
+
lines.append(f'<n {" ".join(attrs)}>')
|
|
423
|
+
lines.append(
|
|
424
|
+
f'<t p="{lx:.2f} {ly:.2f}" BoundingBox="{bbox}" '
|
|
425
|
+
f'LabelJustification="Left">'
|
|
426
|
+
)
|
|
427
|
+
lines.append(
|
|
428
|
+
f'<s font="{ACS_LABEL_FONT}" size="{ACS_LABEL_SIZE}" '
|
|
429
|
+
f'color="0" face="{ACS_LABEL_FACE}">{label_text}</s>'
|
|
430
|
+
)
|
|
431
|
+
lines.append("</t>")
|
|
432
|
+
lines.append("</n>")
|
|
433
|
+
|
|
434
|
+
# Bonds
|
|
435
|
+
for b in bonds:
|
|
436
|
+
bond_xml_id = ids.next()
|
|
437
|
+
z = ids.next()
|
|
438
|
+
a1_xml = atom_id_map.get(b["atom1"], 0)
|
|
439
|
+
a2_xml = atom_id_map.get(b["atom2"], 0)
|
|
440
|
+
order = b.get("order", 1)
|
|
441
|
+
cfg = b.get("cfg", 0)
|
|
442
|
+
|
|
443
|
+
attrs = [
|
|
444
|
+
f'id="{bond_xml_id}"',
|
|
445
|
+
f'Z="{z}"',
|
|
446
|
+
f'B="{a1_xml}"',
|
|
447
|
+
f'E="{a2_xml}"',
|
|
448
|
+
]
|
|
449
|
+
|
|
450
|
+
order_attr = BOND_ORDER_ATTR.get(order)
|
|
451
|
+
if order_attr:
|
|
452
|
+
attrs.append(f'Order="{order_attr}"')
|
|
453
|
+
|
|
454
|
+
double_pos = b.get("double_pos", "")
|
|
455
|
+
if double_pos:
|
|
456
|
+
attrs.append(f'DoublePosition="{double_pos}"')
|
|
457
|
+
|
|
458
|
+
if cfg and cfg in BOND_STEREO_ATTR:
|
|
459
|
+
attrs.append(f'Display="{BOND_STEREO_ATTR[cfg]}"')
|
|
460
|
+
elif order == 1:
|
|
461
|
+
# Default single bond gets BS="N" (normal, no stereo)
|
|
462
|
+
attrs.append('BS="N"')
|
|
463
|
+
|
|
464
|
+
lines.append(f'<b {" ".join(attrs)}/>')
|
|
465
|
+
|
|
466
|
+
lines.append("</fragment>")
|
|
467
|
+
return "\n".join(lines), atom_id_map, frag_id
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
# ---------------------------------------------------------------------------
|
|
471
|
+
# Conditions text builder
|
|
472
|
+
# ---------------------------------------------------------------------------
|
|
473
|
+
|
|
474
|
+
def _build_conditions_text(
|
|
475
|
+
lines: List[str],
|
|
476
|
+
x: float,
|
|
477
|
+
y: float,
|
|
478
|
+
ids: _IDGen,
|
|
479
|
+
justification: str = "Center",
|
|
480
|
+
) -> Tuple[str, int]:
|
|
481
|
+
"""
|
|
482
|
+
Build a standalone <t> element for reaction conditions (above or below arrow).
|
|
483
|
+
|
|
484
|
+
Returns (xml_string, text_xml_id).
|
|
485
|
+
"""
|
|
486
|
+
tid = ids.next()
|
|
487
|
+
z = ids.next()
|
|
488
|
+
|
|
489
|
+
# Estimate bounding box: ~6 pt per char, 12 pt line height
|
|
490
|
+
max_chars = max((len(ln) for ln in lines), default=1)
|
|
491
|
+
w = max_chars * 5.8
|
|
492
|
+
h = len(lines) * 12.0
|
|
493
|
+
|
|
494
|
+
bx1 = x - w / 2.0
|
|
495
|
+
by1 = y - h
|
|
496
|
+
bx2 = x + w / 2.0
|
|
497
|
+
by2 = y
|
|
498
|
+
|
|
499
|
+
parts = [
|
|
500
|
+
f'<t id="{tid}" p="{x:.2f} {y:.2f}" '
|
|
501
|
+
f'BoundingBox="{bx1:.2f} {by1:.2f} {bx2:.2f} {by2:.2f}" '
|
|
502
|
+
f'Z="{z}" '
|
|
503
|
+
f'CaptionJustification="{justification}" '
|
|
504
|
+
f'Justification="{justification}" '
|
|
505
|
+
f'LineHeight="auto">'
|
|
506
|
+
]
|
|
507
|
+
text = "\n".join(xml_escape(ln) for ln in lines)
|
|
508
|
+
parts.append(
|
|
509
|
+
f'<s font="{ACS_LABEL_FONT}" size="{ACS_CAPTION_SIZE}" '
|
|
510
|
+
f'color="0" face="{ACS_CAPTION_FACE}">{text}</s>'
|
|
511
|
+
)
|
|
512
|
+
parts.append("</t>")
|
|
513
|
+
return "\n".join(parts), tid
|
|
514
|
+
|
|
515
|
+
|
|
516
|
+
# ---------------------------------------------------------------------------
|
|
517
|
+
# Arrow builder
|
|
518
|
+
# ---------------------------------------------------------------------------
|
|
519
|
+
|
|
520
|
+
def _build_arrow(
|
|
521
|
+
tail_x: float, tail_y: float,
|
|
522
|
+
head_x: float, head_y: float,
|
|
523
|
+
ids: _IDGen,
|
|
524
|
+
) -> Tuple[str, int]:
|
|
525
|
+
"""
|
|
526
|
+
Build an <arrow> element (full solid arrowhead, reaction style).
|
|
527
|
+
Returns (xml_string, arrow_xml_id).
|
|
528
|
+
"""
|
|
529
|
+
aid = ids.next()
|
|
530
|
+
z = ids.next()
|
|
531
|
+
|
|
532
|
+
# BoundingBox encloses the arrow shaft
|
|
533
|
+
bx1 = min(tail_x, head_x)
|
|
534
|
+
by1 = min(tail_y, head_y) - 4.0
|
|
535
|
+
bx2 = max(tail_x, head_x)
|
|
536
|
+
by2 = max(tail_y, head_y) + 4.0
|
|
537
|
+
|
|
538
|
+
# Center3D / MajorAxisEnd3D / MinorAxisEnd3D — ChemDraw uses these for
|
|
539
|
+
# internal geometry but they don't affect display in standard mode.
|
|
540
|
+
cx3 = (tail_x + head_x) / 2.0
|
|
541
|
+
cy3 = tail_y + 100.0
|
|
542
|
+
xml = (
|
|
543
|
+
f'<arrow id="{aid}" '
|
|
544
|
+
f'BoundingBox="{bx1:.2f} {by1:.2f} {bx2:.2f} {by2:.2f}" '
|
|
545
|
+
f'Z="{z}" '
|
|
546
|
+
f'FillType="None" '
|
|
547
|
+
f'ArrowheadHead="Full" '
|
|
548
|
+
f'ArrowheadType="Solid" '
|
|
549
|
+
f'HeadSize="1000" '
|
|
550
|
+
f'ArrowheadCenterSize="875" '
|
|
551
|
+
f'ArrowheadWidth="250" '
|
|
552
|
+
f'Head3D="{head_x:.2f} {head_y:.2f} 0" '
|
|
553
|
+
f'Tail3D="{tail_x:.2f} {tail_y:.2f} 0" '
|
|
554
|
+
f'Center3D="{cx3:.2f} {cy3:.2f} 0" '
|
|
555
|
+
f'MajorAxisEnd3D="{cx3 + 80:.2f} {cy3:.2f} 0" '
|
|
556
|
+
f'MinorAxisEnd3D="{cx3:.2f} {cy3 + 80:.2f} 0"'
|
|
557
|
+
f'/>'
|
|
558
|
+
)
|
|
559
|
+
return xml, aid
|
|
560
|
+
|
|
561
|
+
|
|
562
|
+
# ---------------------------------------------------------------------------
|
|
563
|
+
# Page templates
|
|
564
|
+
# ---------------------------------------------------------------------------
|
|
565
|
+
|
|
566
|
+
_PAGE_OPEN = (
|
|
567
|
+
'<page id="{page_id}" BoundingBox="0 0 1620 2160" '
|
|
568
|
+
'HeaderPosition="36" FooterPosition="36" '
|
|
569
|
+
'PrintTrimMarks="yes" HeightPages="3" WidthPages="3">'
|
|
570
|
+
)
|
|
571
|
+
_PAGE_CLOSE = "</page>"
|
|
572
|
+
|
|
573
|
+
|
|
574
|
+
def _header(bbox: str) -> str:
|
|
575
|
+
return _CDXML_HEADER.format(
|
|
576
|
+
bbox=bbox,
|
|
577
|
+
label_font=ACS_LABEL_FONT,
|
|
578
|
+
label_size=ACS_LABEL_SIZE,
|
|
579
|
+
label_face=ACS_LABEL_FACE,
|
|
580
|
+
caption_size=ACS_CAPTION_SIZE,
|
|
581
|
+
hash_spacing=ACS_HASH_SPACING,
|
|
582
|
+
margin_width=ACS_MARGIN_WIDTH,
|
|
583
|
+
line_width=ACS_LINE_WIDTH,
|
|
584
|
+
bold_width=ACS_BOLD_WIDTH,
|
|
585
|
+
bond_length=ACS_BOND_LENGTH,
|
|
586
|
+
bond_spacing=ACS_BOND_SPACING,
|
|
587
|
+
chain_angle=ACS_CHAIN_ANGLE,
|
|
588
|
+
)
|
|
589
|
+
|
|
590
|
+
|
|
591
|
+
# ---------------------------------------------------------------------------
|
|
592
|
+
# Public API — single molecule
|
|
593
|
+
# ---------------------------------------------------------------------------
|
|
594
|
+
|
|
595
|
+
def build_molecule_cdxml(
|
|
596
|
+
atoms: List[Dict],
|
|
597
|
+
bonds: List[Dict],
|
|
598
|
+
start_id: int = 1000,
|
|
599
|
+
) -> str:
|
|
600
|
+
"""
|
|
601
|
+
Build a CDXML document containing a single molecule fragment.
|
|
602
|
+
|
|
603
|
+
Parameters
|
|
604
|
+
----------
|
|
605
|
+
atoms : list of atom dicts (coordinates already in CDXML pts)
|
|
606
|
+
bonds : list of bond dicts
|
|
607
|
+
start_id : first XML element id to use
|
|
608
|
+
|
|
609
|
+
Returns
|
|
610
|
+
-------
|
|
611
|
+
CDXML document as a string
|
|
612
|
+
"""
|
|
613
|
+
ids = _IDGen(start_id)
|
|
614
|
+
|
|
615
|
+
atom_id_map: Dict[int, int] = {}
|
|
616
|
+
frag_xml, atom_id_map, _ = _build_fragment(atoms, bonds, ids, atom_id_map)
|
|
617
|
+
|
|
618
|
+
# Document bounding box
|
|
619
|
+
xs = [a["x"] for a in atoms]
|
|
620
|
+
ys = [a["y"] for a in atoms]
|
|
621
|
+
bbox = f"{min(xs):.2f} {min(ys):.2f} {max(xs):.2f} {max(ys):.2f}"
|
|
622
|
+
|
|
623
|
+
page_id = ids.next()
|
|
624
|
+
|
|
625
|
+
lines = [
|
|
626
|
+
_header(bbox),
|
|
627
|
+
_PAGE_OPEN.format(page_id=page_id),
|
|
628
|
+
frag_xml,
|
|
629
|
+
_PAGE_CLOSE,
|
|
630
|
+
_CDXML_FOOTER,
|
|
631
|
+
]
|
|
632
|
+
return "\n".join(lines)
|
|
633
|
+
|
|
634
|
+
|
|
635
|
+
# ---------------------------------------------------------------------------
|
|
636
|
+
# Public API — reaction scheme
|
|
637
|
+
# ---------------------------------------------------------------------------
|
|
638
|
+
|
|
639
|
+
def build_reaction_cdxml(
|
|
640
|
+
reactants: List[Dict],
|
|
641
|
+
products: List[Dict],
|
|
642
|
+
conditions: Optional[Dict] = None,
|
|
643
|
+
arrow_y: Optional[float] = None,
|
|
644
|
+
arrow_tail_x: Optional[float] = None,
|
|
645
|
+
arrow_head_x: Optional[float] = None,
|
|
646
|
+
start_id: int = 1000,
|
|
647
|
+
) -> str:
|
|
648
|
+
"""
|
|
649
|
+
Build a CDXML reaction scheme document.
|
|
650
|
+
|
|
651
|
+
Each molecule in reactants/products is a dict::
|
|
652
|
+
|
|
653
|
+
{
|
|
654
|
+
"atoms": [...],
|
|
655
|
+
"bonds": [...],
|
|
656
|
+
# optional: "name", "role"
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
conditions is a dict::
|
|
660
|
+
|
|
661
|
+
{
|
|
662
|
+
"above": ["Pd2dba3 (5 mol%)", "BINAP (10 mol%)"],
|
|
663
|
+
"below": ["Cs2CO3 (2 eq.)", "dioxane", "100 °C, 24 h"]
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
Arrow position is auto-calculated from molecule bounding boxes if not given.
|
|
667
|
+
|
|
668
|
+
Parameters
|
|
669
|
+
----------
|
|
670
|
+
reactants : list of molecule dicts
|
|
671
|
+
products : list of molecule dicts
|
|
672
|
+
conditions: dict with optional "above" and "below" lists of strings
|
|
673
|
+
arrow_y : y-coordinate of arrow shaft (auto if None)
|
|
674
|
+
arrow_tail_x, arrow_head_x : x-coords of arrow ends (auto if None)
|
|
675
|
+
start_id : first XML element id
|
|
676
|
+
|
|
677
|
+
Returns
|
|
678
|
+
-------
|
|
679
|
+
CDXML document string
|
|
680
|
+
"""
|
|
681
|
+
if conditions is None:
|
|
682
|
+
conditions = {}
|
|
683
|
+
|
|
684
|
+
ids = _IDGen(start_id)
|
|
685
|
+
|
|
686
|
+
# ---- Build all fragment XMLs ----
|
|
687
|
+
all_xml_parts: List[str] = []
|
|
688
|
+
reactant_frag_ids: List[int] = []
|
|
689
|
+
product_frag_ids: List[int] = []
|
|
690
|
+
|
|
691
|
+
# Collect all atom positions to determine arrow y and bounding box
|
|
692
|
+
all_xs: List[float] = []
|
|
693
|
+
all_ys: List[float] = []
|
|
694
|
+
|
|
695
|
+
for mol in reactants:
|
|
696
|
+
atom_id_map: Dict[int, int] = {}
|
|
697
|
+
frag_xml, _, frag_id = _build_fragment(
|
|
698
|
+
mol.get("atoms", []), mol.get("bonds", []), ids, atom_id_map
|
|
699
|
+
)
|
|
700
|
+
all_xml_parts.append(frag_xml)
|
|
701
|
+
reactant_frag_ids.append(frag_id)
|
|
702
|
+
for a in mol.get("atoms", []):
|
|
703
|
+
all_xs.append(a["x"])
|
|
704
|
+
all_ys.append(a["y"])
|
|
705
|
+
|
|
706
|
+
for mol in products:
|
|
707
|
+
atom_id_map = {}
|
|
708
|
+
frag_xml, _, frag_id = _build_fragment(
|
|
709
|
+
mol.get("atoms", []), mol.get("bonds", []), ids, atom_id_map
|
|
710
|
+
)
|
|
711
|
+
all_xml_parts.append(frag_xml)
|
|
712
|
+
product_frag_ids.append(frag_id)
|
|
713
|
+
for a in mol.get("atoms", []):
|
|
714
|
+
all_xs.append(a["x"])
|
|
715
|
+
all_ys.append(a["y"])
|
|
716
|
+
|
|
717
|
+
if not all_xs:
|
|
718
|
+
raise ValueError("No atoms found in reactants or products")
|
|
719
|
+
|
|
720
|
+
# ---- Auto-calculate arrow position ----
|
|
721
|
+
# Arrow y: vertical midpoint of all molecules
|
|
722
|
+
mid_y = (min(all_ys) + max(all_ys)) / 2.0
|
|
723
|
+
if arrow_y is None:
|
|
724
|
+
arrow_y = mid_y
|
|
725
|
+
|
|
726
|
+
# Arrow x: gap between right edge of last reactant and left edge of first product
|
|
727
|
+
reactant_xs = []
|
|
728
|
+
product_xs = []
|
|
729
|
+
for mol in reactants:
|
|
730
|
+
reactant_xs.extend(a["x"] for a in mol.get("atoms", []))
|
|
731
|
+
for mol in products:
|
|
732
|
+
product_xs.extend(a["x"] for a in mol.get("atoms", []))
|
|
733
|
+
|
|
734
|
+
reactant_right = max(reactant_xs) if reactant_xs else 100.0
|
|
735
|
+
product_left = min(product_xs) if product_xs else 300.0
|
|
736
|
+
|
|
737
|
+
gap = product_left - reactant_right
|
|
738
|
+
margin = max(10.0, gap * 0.15)
|
|
739
|
+
|
|
740
|
+
if arrow_tail_x is None:
|
|
741
|
+
arrow_tail_x = reactant_right + margin
|
|
742
|
+
if arrow_head_x is None:
|
|
743
|
+
arrow_head_x = product_left - margin
|
|
744
|
+
|
|
745
|
+
# ---- Conditions text elements ----
|
|
746
|
+
above_ids: List[int] = []
|
|
747
|
+
below_ids: List[int] = []
|
|
748
|
+
|
|
749
|
+
arrow_mid_x = (arrow_tail_x + arrow_head_x) / 2.0
|
|
750
|
+
|
|
751
|
+
above_lines = conditions.get("above", [])
|
|
752
|
+
below_lines = conditions.get("below", [])
|
|
753
|
+
|
|
754
|
+
above_xml_parts: List[str] = []
|
|
755
|
+
below_xml_parts: List[str] = []
|
|
756
|
+
|
|
757
|
+
if above_lines:
|
|
758
|
+
txt_xml, txt_id = _build_conditions_text(
|
|
759
|
+
above_lines,
|
|
760
|
+
x=arrow_mid_x,
|
|
761
|
+
y=arrow_y - 8.0, # above arrow shaft
|
|
762
|
+
ids=ids,
|
|
763
|
+
)
|
|
764
|
+
above_xml_parts.append(txt_xml)
|
|
765
|
+
above_ids.append(txt_id)
|
|
766
|
+
|
|
767
|
+
if below_lines:
|
|
768
|
+
txt_xml, txt_id = _build_conditions_text(
|
|
769
|
+
below_lines,
|
|
770
|
+
x=arrow_mid_x,
|
|
771
|
+
y=arrow_y + 20.0, # below arrow shaft
|
|
772
|
+
ids=ids,
|
|
773
|
+
)
|
|
774
|
+
below_xml_parts.append(txt_xml)
|
|
775
|
+
below_ids.append(txt_id)
|
|
776
|
+
|
|
777
|
+
# ---- Arrow ----
|
|
778
|
+
arrow_xml, arrow_id = _build_arrow(
|
|
779
|
+
tail_x=arrow_tail_x,
|
|
780
|
+
tail_y=arrow_y,
|
|
781
|
+
head_x=arrow_head_x,
|
|
782
|
+
head_y=arrow_y,
|
|
783
|
+
ids=ids,
|
|
784
|
+
)
|
|
785
|
+
|
|
786
|
+
# ---- Scheme / step ----
|
|
787
|
+
scheme_id = ids.next()
|
|
788
|
+
step_id = ids.next()
|
|
789
|
+
|
|
790
|
+
reactant_str = " ".join(str(i) for i in reactant_frag_ids)
|
|
791
|
+
product_str = " ".join(str(i) for i in product_frag_ids)
|
|
792
|
+
above_str = " ".join(str(i) for i in above_ids)
|
|
793
|
+
below_str = " ".join(str(i) for i in below_ids)
|
|
794
|
+
|
|
795
|
+
step_attrs = [
|
|
796
|
+
f'id="{step_id}"',
|
|
797
|
+
f'ReactionStepReactants="{reactant_str}"',
|
|
798
|
+
f'ReactionStepProducts="{product_str}"',
|
|
799
|
+
f'ReactionStepArrows="{arrow_id}"',
|
|
800
|
+
]
|
|
801
|
+
if above_str:
|
|
802
|
+
step_attrs.append(f'ReactionStepObjectsAboveArrow="{above_str}"')
|
|
803
|
+
if below_str:
|
|
804
|
+
step_attrs.append(f'ReactionStepObjectsBelowArrow="{below_str}"')
|
|
805
|
+
|
|
806
|
+
scheme_xml = (
|
|
807
|
+
f'<scheme id="{scheme_id}">'
|
|
808
|
+
f'<step {" ".join(step_attrs)}/>'
|
|
809
|
+
f'</scheme>'
|
|
810
|
+
)
|
|
811
|
+
|
|
812
|
+
# ---- Document bounding box ----
|
|
813
|
+
extra_margin = 20.0
|
|
814
|
+
doc_x1 = min(all_xs) - extra_margin
|
|
815
|
+
doc_y1 = min(all_ys) - extra_margin
|
|
816
|
+
doc_x2 = max(all_xs) + extra_margin
|
|
817
|
+
doc_y2 = max(all_ys) + extra_margin
|
|
818
|
+
doc_bbox = f"{doc_x1:.2f} {doc_y1:.2f} {doc_x2:.2f} {doc_y2:.2f}"
|
|
819
|
+
|
|
820
|
+
page_id = ids.next()
|
|
821
|
+
|
|
822
|
+
# ---- Assemble document ----
|
|
823
|
+
sections = (
|
|
824
|
+
[_header(doc_bbox)]
|
|
825
|
+
+ [_PAGE_OPEN.format(page_id=page_id)]
|
|
826
|
+
+ all_xml_parts
|
|
827
|
+
+ above_xml_parts
|
|
828
|
+
+ below_xml_parts
|
|
829
|
+
+ [arrow_xml]
|
|
830
|
+
+ [scheme_xml]
|
|
831
|
+
+ [_PAGE_CLOSE]
|
|
832
|
+
+ [_CDXML_FOOTER]
|
|
833
|
+
)
|
|
834
|
+
return "\n".join(sections)
|
|
835
|
+
|
|
836
|
+
|
|
837
|
+
# ---------------------------------------------------------------------------
|
|
838
|
+
# Helpers for loading from JSON
|
|
839
|
+
# ---------------------------------------------------------------------------
|
|
840
|
+
|
|
841
|
+
def _load_json(path: str) -> Dict:
|
|
842
|
+
if path == "-":
|
|
843
|
+
return json.load(sys.stdin)
|
|
844
|
+
with open(path, encoding="utf-8") as fh:
|
|
845
|
+
return json.load(fh)
|
|
846
|
+
|
|
847
|
+
|
|
848
|
+
# ---------------------------------------------------------------------------
|
|
849
|
+
# CLI
|
|
850
|
+
# ---------------------------------------------------------------------------
|
|
851
|
+
|
|
852
|
+
def _build_arg_parser() -> argparse.ArgumentParser:
|
|
853
|
+
p = argparse.ArgumentParser(
|
|
854
|
+
description="Build CDXML from structured atom/bond JSON (ACS Document 1996 style).",
|
|
855
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
856
|
+
epilog=__doc__,
|
|
857
|
+
)
|
|
858
|
+
p.add_argument(
|
|
859
|
+
"--input", "-i",
|
|
860
|
+
default="-",
|
|
861
|
+
help="Input JSON file (default: stdin)",
|
|
862
|
+
)
|
|
863
|
+
p.add_argument(
|
|
864
|
+
"--output", "-o",
|
|
865
|
+
default="-",
|
|
866
|
+
help="Output CDXML file (default: stdout)",
|
|
867
|
+
)
|
|
868
|
+
p.add_argument(
|
|
869
|
+
"--mode", "-m",
|
|
870
|
+
choices=["molecule", "reaction"],
|
|
871
|
+
default="molecule",
|
|
872
|
+
help="Output mode: 'molecule' (single fragment) or 'reaction' (scheme with arrow)",
|
|
873
|
+
)
|
|
874
|
+
p.add_argument(
|
|
875
|
+
"--start-id",
|
|
876
|
+
type=int,
|
|
877
|
+
default=1000,
|
|
878
|
+
help="First XML element id to use (default: 1000)",
|
|
879
|
+
)
|
|
880
|
+
return p
|
|
881
|
+
|
|
882
|
+
|
|
883
|
+
def main(argv: Optional[List[str]] = None) -> int:
|
|
884
|
+
parser = _build_arg_parser()
|
|
885
|
+
args = parser.parse_args(argv)
|
|
886
|
+
|
|
887
|
+
data = _load_json(args.input)
|
|
888
|
+
|
|
889
|
+
if args.mode == "molecule":
|
|
890
|
+
atoms = data.get("atoms", [])
|
|
891
|
+
bonds = data.get("bonds", [])
|
|
892
|
+
if not atoms:
|
|
893
|
+
print("ERROR: no atoms in input", file=sys.stderr)
|
|
894
|
+
return 1
|
|
895
|
+
cdxml = build_molecule_cdxml(atoms, bonds, start_id=args.start_id)
|
|
896
|
+
|
|
897
|
+
else: # reaction
|
|
898
|
+
reactants = data.get("reactants", [])
|
|
899
|
+
products = data.get("products", [])
|
|
900
|
+
conditions = data.get("conditions", {})
|
|
901
|
+
if not reactants or not products:
|
|
902
|
+
print("ERROR: reaction mode requires 'reactants' and 'products'", file=sys.stderr)
|
|
903
|
+
return 1
|
|
904
|
+
cdxml = build_reaction_cdxml(
|
|
905
|
+
reactants, products, conditions,
|
|
906
|
+
start_id=args.start_id,
|
|
907
|
+
)
|
|
908
|
+
|
|
909
|
+
if args.output == "-":
|
|
910
|
+
print(cdxml)
|
|
911
|
+
else:
|
|
912
|
+
with open(args.output, "w", encoding="utf-8") as fh:
|
|
913
|
+
fh.write(cdxml)
|
|
914
|
+
print(f"Written to {args.output}", file=sys.stderr)
|
|
915
|
+
|
|
916
|
+
return 0
|
|
917
|
+
|
|
918
|
+
|
|
919
|
+
if __name__ == "__main__":
|
|
920
|
+
sys.exit(main())
|