cdxml-toolkit 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cdxml_toolkit/__init__.py +18 -0
- cdxml_toolkit/_jre/__init__.py +2 -0
- cdxml_toolkit/_jre/temurin-21-jre-win-x64.zip +0 -0
- cdxml_toolkit/analysis/__init__.py +35 -0
- cdxml_toolkit/analysis/deterministic/__init__.py +12 -0
- cdxml_toolkit/analysis/deterministic/discover_experiment_files.py +413 -0
- cdxml_toolkit/analysis/deterministic/lab_book_formatter.py +701 -0
- cdxml_toolkit/analysis/deterministic/lcms_file_categorizer.py +928 -0
- cdxml_toolkit/analysis/deterministic/lcms_identifier.py +598 -0
- cdxml_toolkit/analysis/deterministic/mass_resolver.py +654 -0
- cdxml_toolkit/analysis/deterministic/multi_lcms_analyzer.py +1412 -0
- cdxml_toolkit/analysis/deterministic/procedure_writer.py +446 -0
- cdxml_toolkit/analysis/extract_nmr.py +47 -0
- cdxml_toolkit/analysis/format_procedure_entry.py +479 -0
- cdxml_toolkit/analysis/lcms_analyzer.py +1299 -0
- cdxml_toolkit/analysis/parse_analysis_file.py +134 -0
- cdxml_toolkit/cdxml_builder.py +920 -0
- cdxml_toolkit/cdxml_utils.py +342 -0
- cdxml_toolkit/chemdraw/__init__.py +5 -0
- cdxml_toolkit/chemdraw/_chemscript_server.py +562 -0
- cdxml_toolkit/chemdraw/cdx_converter.py +527 -0
- cdxml_toolkit/chemdraw/cdxml_to_image.py +262 -0
- cdxml_toolkit/chemdraw/cdxml_to_image_rdkit.py +296 -0
- cdxml_toolkit/chemdraw/chemscript_bridge.py +901 -0
- cdxml_toolkit/constants.py +304 -0
- cdxml_toolkit/coord_normalizer.py +438 -0
- cdxml_toolkit/deterministic_pipeline/__init__.py +6 -0
- cdxml_toolkit/deterministic_pipeline/legacy/__init__.py +5 -0
- cdxml_toolkit/deterministic_pipeline/legacy/eln_cdx_cleanup.py +509 -0
- cdxml_toolkit/deterministic_pipeline/legacy/eln_enrichment.py +1394 -0
- cdxml_toolkit/deterministic_pipeline/legacy/scheme_aligner.py +428 -0
- cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher.py +1337 -0
- cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher_v2.py +1340 -0
- cdxml_toolkit/deterministic_pipeline/scheme_reader_audit.py +931 -0
- cdxml_toolkit/deterministic_pipeline/scheme_reader_verify.py +1160 -0
- cdxml_toolkit/image/__init__.py +15 -0
- cdxml_toolkit/image/reaction_from_image.py +2103 -0
- cdxml_toolkit/image/structure_from_image.py +1711 -0
- cdxml_toolkit/layout/__init__.py +5 -0
- cdxml_toolkit/layout/alignment.py +1642 -0
- cdxml_toolkit/layout/reaction_cleanup.py +1002 -0
- cdxml_toolkit/layout/scheme_merger.py +2260 -0
- cdxml_toolkit/mcp_server/__init__.py +0 -0
- cdxml_toolkit/mcp_server/__main__.py +5 -0
- cdxml_toolkit/mcp_server/server.py +1567 -0
- cdxml_toolkit/naming/__init__.py +6 -0
- cdxml_toolkit/naming/aligned_namer.py +2342 -0
- cdxml_toolkit/naming/mol_builder.py +3722 -0
- cdxml_toolkit/naming/name_decomposer.py +2843 -0
- cdxml_toolkit/naming/reactions_datamol.json +2414 -0
- cdxml_toolkit/office/__init__.py +5 -0
- cdxml_toolkit/office/doc_from_template.py +722 -0
- cdxml_toolkit/office/ole_embedder.py +808 -0
- cdxml_toolkit/office/ole_extractor.py +272 -0
- cdxml_toolkit/perception/__init__.py +10 -0
- cdxml_toolkit/perception/compound_search.py +229 -0
- cdxml_toolkit/perception/eln_csv_parser.py +240 -0
- cdxml_toolkit/perception/rdf_parser.py +664 -0
- cdxml_toolkit/perception/reactant_heuristic.py +1045 -0
- cdxml_toolkit/perception/reaction_parser.py +2150 -0
- cdxml_toolkit/perception/scheme_reader.py +2948 -0
- cdxml_toolkit/perception/scheme_refine.py +1404 -0
- cdxml_toolkit/perception/scheme_segmenter.py +619 -0
- cdxml_toolkit/perception/spatial_assignment.py +1013 -0
- cdxml_toolkit/rdkit_utils.py +605 -0
- cdxml_toolkit/render/__init__.py +17 -0
- cdxml_toolkit/render/auto_layout.py +229 -0
- cdxml_toolkit/render/compact_parser.py +632 -0
- cdxml_toolkit/render/parser.py +706 -0
- cdxml_toolkit/render/render_scheme.py +267 -0
- cdxml_toolkit/render/renderer.py +2387 -0
- cdxml_toolkit/render/schema.py +90 -0
- cdxml_toolkit/render/scheme_maker.py +1043 -0
- cdxml_toolkit/render/scheme_yaml_writer.py +1487 -0
- cdxml_toolkit/resolve/__init__.py +13 -0
- cdxml_toolkit/resolve/cas_resolver.py +430 -0
- cdxml_toolkit/resolve/chemscanner_abbreviations.json +28813 -0
- cdxml_toolkit/resolve/condensed_formula.py +493 -0
- cdxml_toolkit/resolve/jre_manager.py +195 -0
- cdxml_toolkit/resolve/reagent_abbreviations.json +1046 -0
- cdxml_toolkit/resolve/reagent_db.py +285 -0
- cdxml_toolkit/resolve/superatom_data.json +2856 -0
- cdxml_toolkit/resolve/superatom_table.py +146 -0
- cdxml_toolkit/text_formatting.py +298 -0
- cdxml_toolkit-0.5.0.dist-info/METADATA +318 -0
- cdxml_toolkit-0.5.0.dist-info/RECORD +91 -0
- cdxml_toolkit-0.5.0.dist-info/WHEEL +5 -0
- cdxml_toolkit-0.5.0.dist-info/entry_points.txt +17 -0
- cdxml_toolkit-0.5.0.dist-info/licenses/LICENSE +21 -0
- cdxml_toolkit-0.5.0.dist-info/licenses/NOTICE.md +37 -0
- cdxml_toolkit-0.5.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,2387 @@
|
|
|
1
|
+
"""
|
|
2
|
+
renderer.py — Render a SchemeDescriptor to a CDXML document.
|
|
3
|
+
|
|
4
|
+
Supports:
|
|
5
|
+
- linear: single step (substrates → products)
|
|
6
|
+
- sequential: multi-step in a single row
|
|
7
|
+
- wrap: repeat — multi-row L→R with repeated structures
|
|
8
|
+
- wrap: serpentine — zigzag layout (L→R, R→L, L→R, ...) with vertical arrows
|
|
9
|
+
|
|
10
|
+
Uses RDKit for SMILES → 2D coords (no ChemDraw COM dependency).
|
|
11
|
+
Uses cdxml_builder infrastructure for fragment/arrow/text XML generation.
|
|
12
|
+
Uses reaction_cleanup-style layout logic for positioning.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import json
|
|
18
|
+
import math
|
|
19
|
+
import os
|
|
20
|
+
from copy import deepcopy
|
|
21
|
+
from dataclasses import dataclass, field
|
|
22
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
23
|
+
from xml.sax.saxutils import escape as xml_escape
|
|
24
|
+
|
|
25
|
+
from ..constants import (
|
|
26
|
+
ACS_BOND_LENGTH,
|
|
27
|
+
ACS_BOND_LENGTH_STR,
|
|
28
|
+
ACS_BOND_SPACING,
|
|
29
|
+
ACS_BOLD_WIDTH,
|
|
30
|
+
ACS_CAPTION_FACE,
|
|
31
|
+
ACS_CAPTION_SIZE,
|
|
32
|
+
ACS_CHAIN_ANGLE_STR,
|
|
33
|
+
ACS_HASH_SPACING,
|
|
34
|
+
ACS_LABEL_FACE,
|
|
35
|
+
ACS_LABEL_FONT,
|
|
36
|
+
ACS_LABEL_SIZE,
|
|
37
|
+
ACS_LINE_WIDTH,
|
|
38
|
+
ACS_MARGIN_WIDTH,
|
|
39
|
+
CDXML_FOOTER,
|
|
40
|
+
CDXML_HEADER,
|
|
41
|
+
LAYOUT_ABOVE_GAP,
|
|
42
|
+
LAYOUT_BELOW_GAP,
|
|
43
|
+
LAYOUT_FRAG_GAP_BONDS,
|
|
44
|
+
LAYOUT_INTER_GAP_BONDS,
|
|
45
|
+
)
|
|
46
|
+
from ..text_formatting import build_formatted_s_xml
|
|
47
|
+
|
|
48
|
+
from .schema import (
|
|
49
|
+
ArrowContent,
|
|
50
|
+
RunArrowEntry,
|
|
51
|
+
SchemeDescriptor,
|
|
52
|
+
SectionDescriptor,
|
|
53
|
+
StepDescriptor,
|
|
54
|
+
StepRunArrows,
|
|
55
|
+
StructureRef,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
# ---------------------------------------------------------------------------
|
|
60
|
+
# Text metrics for Arial 10pt Bold in ChemDraw
|
|
61
|
+
# (measured via bbox investigation: 135 fragments, 237 texts, 83 arrows)
|
|
62
|
+
# ---------------------------------------------------------------------------
|
|
63
|
+
|
|
64
|
+
_CHAR_WIDTH = 4.7 # average character width (proportional Arial)
|
|
65
|
+
_LINE_ADVANCE = 11.5 # line-to-line distance (ChemDraw ~1.15× multiplier)
|
|
66
|
+
_CAP_HEIGHT = 9.1 # baseline to top of uppercase letters
|
|
67
|
+
_DESCENT = 2.0 # baseline to bottom of descenders
|
|
68
|
+
|
|
69
|
+
# Fragment bbox padding beyond atom center positions (half of measured excess)
|
|
70
|
+
_FRAG_PAD_W = 3.3 # ±3.3 pt per side (6.6 pt total width excess)
|
|
71
|
+
_FRAG_PAD_H = 1.45 # ±1.45 pt per side (2.9 pt total height excess)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# ---------------------------------------------------------------------------
|
|
75
|
+
# Multi-row constants
|
|
76
|
+
# ---------------------------------------------------------------------------
|
|
77
|
+
|
|
78
|
+
# Vertical gap between bottom of row N (including run arrows) and top of row N+1.
|
|
79
|
+
# ~55 pts matches real Report-scheme-extr-2 inter-row spacing.
|
|
80
|
+
ROW_GAP = 55.0
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
# ---------------------------------------------------------------------------
|
|
84
|
+
# Source JSON loader (reaction_parser output)
|
|
85
|
+
# ---------------------------------------------------------------------------
|
|
86
|
+
|
|
87
|
+
def _load_source_json(path: str) -> Dict[str, Dict]:
|
|
88
|
+
"""
|
|
89
|
+
Load reaction_parser JSON, build species lookup by ID and by role.
|
|
90
|
+
|
|
91
|
+
Returns a dict mapping keys to species dicts. Keys include:
|
|
92
|
+
- species ID ("sp_0", "sp_1", ...)
|
|
93
|
+
- "SM" / "DP" shortcuts
|
|
94
|
+
- lowercase species name
|
|
95
|
+
"""
|
|
96
|
+
with open(path, encoding="utf-8") as f:
|
|
97
|
+
data = json.load(f)
|
|
98
|
+
|
|
99
|
+
lookup: Dict[str, Dict] = {}
|
|
100
|
+
for sp in data.get("species", []):
|
|
101
|
+
sp_id = sp.get("id", "")
|
|
102
|
+
if sp_id:
|
|
103
|
+
lookup[sp_id] = sp
|
|
104
|
+
if sp.get("is_sm"):
|
|
105
|
+
lookup["SM"] = sp
|
|
106
|
+
if sp.get("is_dp"):
|
|
107
|
+
lookup["DP"] = sp
|
|
108
|
+
# Register by lowercase name for flexible matching
|
|
109
|
+
name = sp.get("name", "")
|
|
110
|
+
if name:
|
|
111
|
+
lookup[name.lower()] = sp
|
|
112
|
+
# Register by CSV name too
|
|
113
|
+
csv_name = sp.get("csv_name", "")
|
|
114
|
+
if csv_name:
|
|
115
|
+
lookup[csv_name.lower()] = sp
|
|
116
|
+
return lookup
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
# ---------------------------------------------------------------------------
|
|
120
|
+
# ID generator (same pattern as cdxml_builder)
|
|
121
|
+
# ---------------------------------------------------------------------------
|
|
122
|
+
|
|
123
|
+
class _IDGen:
|
|
124
|
+
"""Simple incrementing integer ID generator."""
|
|
125
|
+
def __init__(self, start: int = 1000):
|
|
126
|
+
self._n = start
|
|
127
|
+
|
|
128
|
+
def next(self) -> int:
|
|
129
|
+
v = self._n
|
|
130
|
+
self._n += 1
|
|
131
|
+
return v
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
# ---------------------------------------------------------------------------
|
|
135
|
+
# Resolved structure: SMILES → atoms/bonds in CDXML points
|
|
136
|
+
# ---------------------------------------------------------------------------
|
|
137
|
+
|
|
138
|
+
@dataclass
|
|
139
|
+
class ResolvedFragment:
|
|
140
|
+
"""A structure that has been resolved to atom/bond data + XML."""
|
|
141
|
+
ref: StructureRef
|
|
142
|
+
atoms: List[Dict]
|
|
143
|
+
bonds: List[Dict]
|
|
144
|
+
xml: str = "" # <fragment> XML string
|
|
145
|
+
frag_id: int = 0 # XML element ID
|
|
146
|
+
# Bounding box (CDXML points): min_x, min_y, max_x, max_y
|
|
147
|
+
bbox: Tuple[float, float, float, float] = (0, 0, 0, 0)
|
|
148
|
+
# Current center position
|
|
149
|
+
cx: float = 0
|
|
150
|
+
cy: float = 0
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
@dataclass
|
|
154
|
+
class ResolvedStep:
|
|
155
|
+
"""A step with all structures resolved and laid out."""
|
|
156
|
+
descriptor: StepDescriptor
|
|
157
|
+
substrates: List[ResolvedFragment]
|
|
158
|
+
products: List[ResolvedFragment]
|
|
159
|
+
above_structures: List[ResolvedFragment]
|
|
160
|
+
above_text: List[str]
|
|
161
|
+
below_text: List[str]
|
|
162
|
+
below_structures: List[ResolvedFragment]
|
|
163
|
+
# Arrow geometry (set during layout)
|
|
164
|
+
arrow_tail_x: float = 0
|
|
165
|
+
arrow_tail_y: float = 0
|
|
166
|
+
arrow_head_x: float = 0
|
|
167
|
+
arrow_head_y: float = 0
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
# ---------------------------------------------------------------------------
|
|
171
|
+
# SMILES → atom/bond dicts (using structure_from_image + normalize)
|
|
172
|
+
# ---------------------------------------------------------------------------
|
|
173
|
+
|
|
174
|
+
def _smiles_to_fragment_data(
|
|
175
|
+
smiles: str,
|
|
176
|
+
center_x: float = 200.0,
|
|
177
|
+
center_y: float = 300.0,
|
|
178
|
+
) -> Optional[Tuple[List[Dict], List[Dict]]]:
|
|
179
|
+
"""
|
|
180
|
+
Convert SMILES to atom/bond dicts in CDXML point coordinates.
|
|
181
|
+
|
|
182
|
+
Returns (atoms, bonds) or None on failure.
|
|
183
|
+
"""
|
|
184
|
+
try:
|
|
185
|
+
from rdkit import Chem
|
|
186
|
+
from rdkit.Chem import AllChem
|
|
187
|
+
except ImportError:
|
|
188
|
+
raise RuntimeError("RDKit is required for SMILES→structure conversion")
|
|
189
|
+
|
|
190
|
+
mol = Chem.MolFromSmiles(smiles)
|
|
191
|
+
if mol is None:
|
|
192
|
+
return None
|
|
193
|
+
|
|
194
|
+
AllChem.Compute2DCoords(mol)
|
|
195
|
+
|
|
196
|
+
# Kekulize for explicit single/double bonds.
|
|
197
|
+
# Use a copy because clearAromaticFlags=True corrupts the mol on failure.
|
|
198
|
+
mol_kek = Chem.RWMol(mol)
|
|
199
|
+
try:
|
|
200
|
+
Chem.Kekulize(mol_kek, clearAromaticFlags=True)
|
|
201
|
+
mol = mol_kek
|
|
202
|
+
except Exception:
|
|
203
|
+
# Kekulization failed — use original mol with aromatic bonds.
|
|
204
|
+
# _rdkit_mol_to_atom_bond_dicts handles AROMATIC → order 2 fallback.
|
|
205
|
+
pass
|
|
206
|
+
|
|
207
|
+
# Extract atom/bond dicts
|
|
208
|
+
from ..image.structure_from_image import _rdkit_mol_to_atom_bond_dicts
|
|
209
|
+
atoms, bonds = _rdkit_mol_to_atom_bond_dicts(mol)
|
|
210
|
+
|
|
211
|
+
# Normalize to CDXML coordinate space (scale + flip y + center)
|
|
212
|
+
from ..image.structure_from_image import normalize_for_cdxml
|
|
213
|
+
atoms, bonds = normalize_for_cdxml(atoms, bonds, center_x, center_y)
|
|
214
|
+
|
|
215
|
+
return atoms, bonds
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def _align_mol_to_reference(
|
|
219
|
+
target_mol,
|
|
220
|
+
ref_mol,
|
|
221
|
+
center_x: float = 200.0,
|
|
222
|
+
center_y: float = 300.0,
|
|
223
|
+
) -> Optional[Tuple[List[Dict], List[Dict]]]:
|
|
224
|
+
"""
|
|
225
|
+
Align target_mol's 2D layout to ref_mol using MCS, then extract
|
|
226
|
+
atom/bond dicts in CDXML coordinates.
|
|
227
|
+
|
|
228
|
+
Uses RDKit's GenerateDepictionMatching2DStructure to orient the target
|
|
229
|
+
so its shared scaffold matches the reference orientation.
|
|
230
|
+
|
|
231
|
+
Returns (atoms, bonds) or None if alignment fails or MCS is too small.
|
|
232
|
+
"""
|
|
233
|
+
try:
|
|
234
|
+
from rdkit import Chem
|
|
235
|
+
from rdkit.Chem import AllChem, rdFMCS
|
|
236
|
+
except ImportError:
|
|
237
|
+
return None
|
|
238
|
+
|
|
239
|
+
if target_mol is None or ref_mol is None:
|
|
240
|
+
return None
|
|
241
|
+
|
|
242
|
+
# Find MCS between target and reference
|
|
243
|
+
try:
|
|
244
|
+
mcs_result = rdFMCS.FindMCS(
|
|
245
|
+
[target_mol, ref_mol],
|
|
246
|
+
timeout=5,
|
|
247
|
+
ringMatchesRingOnly=True,
|
|
248
|
+
completeRingsOnly=True,
|
|
249
|
+
)
|
|
250
|
+
except Exception:
|
|
251
|
+
return None
|
|
252
|
+
|
|
253
|
+
if mcs_result.numAtoms < 3:
|
|
254
|
+
return None # MCS too small for meaningful alignment
|
|
255
|
+
|
|
256
|
+
# Build the MCS query mol
|
|
257
|
+
mcs_mol = Chem.MolFromSmarts(mcs_result.smartsString)
|
|
258
|
+
if mcs_mol is None:
|
|
259
|
+
return None
|
|
260
|
+
|
|
261
|
+
# Get substructure matches
|
|
262
|
+
ref_match = ref_mol.GetSubstructMatch(mcs_mol)
|
|
263
|
+
target_match = target_mol.GetSubstructMatch(mcs_mol)
|
|
264
|
+
|
|
265
|
+
if not ref_match or not target_match:
|
|
266
|
+
return None
|
|
267
|
+
|
|
268
|
+
# Build atom map: (ref_idx, target_idx) for MCS atoms
|
|
269
|
+
atom_map = list(zip(ref_match, target_match))
|
|
270
|
+
|
|
271
|
+
# Ensure reference has 2D coordinates
|
|
272
|
+
if ref_mol.GetNumConformers() == 0:
|
|
273
|
+
AllChem.Compute2DCoords(ref_mol)
|
|
274
|
+
|
|
275
|
+
# Generate aligned 2D coordinates for target
|
|
276
|
+
try:
|
|
277
|
+
AllChem.Compute2DCoords(target_mol)
|
|
278
|
+
AllChem.GenerateDepictionMatching2DStructure(
|
|
279
|
+
target_mol, ref_mol, atomMap=atom_map
|
|
280
|
+
)
|
|
281
|
+
except Exception:
|
|
282
|
+
# Alignment failed — fall back to standard coords
|
|
283
|
+
AllChem.Compute2DCoords(target_mol)
|
|
284
|
+
|
|
285
|
+
# Kekulize for explicit bonds
|
|
286
|
+
mol_kek = Chem.RWMol(target_mol)
|
|
287
|
+
try:
|
|
288
|
+
Chem.Kekulize(mol_kek, clearAromaticFlags=True)
|
|
289
|
+
target_mol = mol_kek
|
|
290
|
+
except Exception:
|
|
291
|
+
pass
|
|
292
|
+
|
|
293
|
+
# Extract atom/bond dicts
|
|
294
|
+
from ..image.structure_from_image import _rdkit_mol_to_atom_bond_dicts
|
|
295
|
+
atoms, bonds = _rdkit_mol_to_atom_bond_dicts(target_mol)
|
|
296
|
+
|
|
297
|
+
# Normalize to CDXML coordinate space
|
|
298
|
+
from ..image.structure_from_image import normalize_for_cdxml
|
|
299
|
+
atoms, bonds = normalize_for_cdxml(atoms, bonds, center_x, center_y)
|
|
300
|
+
|
|
301
|
+
return atoms, bonds
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
# ---------------------------------------------------------------------------
|
|
305
|
+
# Fragment XML builder (adapted from cdxml_builder._build_fragment)
|
|
306
|
+
# ---------------------------------------------------------------------------
|
|
307
|
+
|
|
308
|
+
# Element number lookup
|
|
309
|
+
ELEMENT_NUMBERS = {
|
|
310
|
+
"H": 1, "He": 2, "Li": 3, "Be": 4, "B": 5, "C": 6, "N": 7,
|
|
311
|
+
"O": 8, "F": 9, "Ne": 10, "Na": 11, "Mg": 12, "Al": 13, "Si": 14,
|
|
312
|
+
"P": 15, "S": 16, "Cl": 17, "Ar": 18, "K": 19, "Ca": 20, "Ti": 22,
|
|
313
|
+
"V": 23, "Cr": 24, "Mn": 25, "Fe": 26, "Co": 27, "Ni": 28, "Cu": 29,
|
|
314
|
+
"Zn": 30, "Ga": 31, "Ge": 32, "As": 33, "Se": 34, "Br": 35, "Kr": 36,
|
|
315
|
+
"Rb": 37, "Sr": 38, "Zr": 40, "Mo": 42, "Ru": 44, "Rh": 45, "Pd": 46,
|
|
316
|
+
"Ag": 47, "Cd": 48, "In": 49, "Sn": 50, "Sb": 51, "Te": 52, "I": 53,
|
|
317
|
+
"Xe": 54, "Cs": 55, "Ba": 56, "La": 57, "Ce": 58, "Pr": 59, "Nd": 60,
|
|
318
|
+
"W": 74, "Re": 75, "Os": 76, "Ir": 77, "Pt": 78, "Au": 79, "Hg": 80,
|
|
319
|
+
"Tl": 81, "Pb": 82, "Bi": 83,
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
# Bond stereo
|
|
323
|
+
BOND_STEREO_ATTR = {
|
|
324
|
+
1: "WedgeBegin",
|
|
325
|
+
4: "WedgeBegin",
|
|
326
|
+
6: "WedgedHashBegin",
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def _build_fragment(
|
|
331
|
+
atoms: List[Dict],
|
|
332
|
+
bonds: List[Dict],
|
|
333
|
+
ids: _IDGen,
|
|
334
|
+
) -> Tuple[str, Dict[int, int], int]:
|
|
335
|
+
"""
|
|
336
|
+
Build a <fragment> XML string from atom/bond dicts.
|
|
337
|
+
|
|
338
|
+
Returns (xml_string, atom_id_map, fragment_xml_id).
|
|
339
|
+
"""
|
|
340
|
+
atom_id_map: Dict[int, int] = {}
|
|
341
|
+
frag_id = ids.next()
|
|
342
|
+
|
|
343
|
+
xs = [a["x"] for a in atoms]
|
|
344
|
+
ys = [a["y"] for a in atoms]
|
|
345
|
+
bb_x1, bb_y1 = min(xs), min(ys)
|
|
346
|
+
bb_x2, bb_y2 = max(xs), max(ys)
|
|
347
|
+
|
|
348
|
+
lines: List[str] = []
|
|
349
|
+
lines.append(
|
|
350
|
+
f'<fragment id="{frag_id}" '
|
|
351
|
+
f'BoundingBox="{bb_x1:.2f} {bb_y1:.2f} {bb_x2:.2f} {bb_y2:.2f}" '
|
|
352
|
+
f'Z="{ids.next()}">'
|
|
353
|
+
)
|
|
354
|
+
|
|
355
|
+
for a in atoms:
|
|
356
|
+
atom_xml_id = ids.next()
|
|
357
|
+
atom_id_map[a["index"]] = atom_xml_id
|
|
358
|
+
ax, ay = a["x"], a["y"]
|
|
359
|
+
z = ids.next()
|
|
360
|
+
|
|
361
|
+
sym = a.get("symbol", "C")
|
|
362
|
+
elem_num = ELEMENT_NUMBERS.get(sym, 6)
|
|
363
|
+
nh = a.get("num_hydrogens")
|
|
364
|
+
charge = a.get("charge", 0)
|
|
365
|
+
isotope = a.get("isotope")
|
|
366
|
+
|
|
367
|
+
attrs = [
|
|
368
|
+
f'id="{atom_xml_id}"',
|
|
369
|
+
f'p="{ax:.2f} {ay:.2f}"',
|
|
370
|
+
f'Z="{z}"',
|
|
371
|
+
]
|
|
372
|
+
|
|
373
|
+
is_carbon = (sym == "C" and not charge and not isotope)
|
|
374
|
+
if not is_carbon:
|
|
375
|
+
attrs.append(f'Element="{elem_num}"')
|
|
376
|
+
if nh is not None:
|
|
377
|
+
attrs.append(f'NumHydrogens="{nh}"')
|
|
378
|
+
if isotope:
|
|
379
|
+
attrs.append(f'Isotope="{isotope}"')
|
|
380
|
+
attrs.append('NeedsClean="yes"')
|
|
381
|
+
if charge:
|
|
382
|
+
attrs.append(f'Charge="{charge}"')
|
|
383
|
+
|
|
384
|
+
if is_carbon:
|
|
385
|
+
lines.append(f'<n {" ".join(attrs)}/>')
|
|
386
|
+
else:
|
|
387
|
+
# Heteroatom needs a text label
|
|
388
|
+
lines.append(f'<n {" ".join(attrs)}>')
|
|
389
|
+
lx = ax - 3.25
|
|
390
|
+
ly = ay + 3.52
|
|
391
|
+
label_w = max(len(sym) * 5.5, 6.0)
|
|
392
|
+
lbx1 = ax - label_w / 2.0
|
|
393
|
+
lby1 = ay - 7.52
|
|
394
|
+
lbx2 = ax + label_w / 2.0
|
|
395
|
+
lby2 = ay
|
|
396
|
+
tid = ids.next()
|
|
397
|
+
lines.append(
|
|
398
|
+
f'<t id="{tid}" p="{lx:.2f} {ly:.2f}" '
|
|
399
|
+
f'BoundingBox="{lbx1:.2f} {lby1:.2f} {lbx2:.2f} {lby2:.2f}" '
|
|
400
|
+
f'LabelJustification="Left">'
|
|
401
|
+
)
|
|
402
|
+
# Use isotope-specific symbol for display (e.g. D for deuterium)
|
|
403
|
+
if sym == "H" and isotope == 2:
|
|
404
|
+
display_text = "D"
|
|
405
|
+
elif sym == "H" and isotope == 3:
|
|
406
|
+
display_text = "T"
|
|
407
|
+
else:
|
|
408
|
+
display_text = sym
|
|
409
|
+
if nh is not None and nh > 0:
|
|
410
|
+
display_text += "H" if nh == 1 else f"H{nh}"
|
|
411
|
+
lines.append(
|
|
412
|
+
f'<s font="{ACS_LABEL_FONT}" size="{ACS_LABEL_SIZE}" '
|
|
413
|
+
f'color="0" face="{ACS_LABEL_FACE}">{xml_escape(display_text)}</s>'
|
|
414
|
+
)
|
|
415
|
+
lines.append('</t>')
|
|
416
|
+
lines.append('</n>')
|
|
417
|
+
|
|
418
|
+
# Bonds
|
|
419
|
+
for b in bonds:
|
|
420
|
+
bid = ids.next()
|
|
421
|
+
z = ids.next()
|
|
422
|
+
a1 = atom_id_map.get(b["atom1"], 0)
|
|
423
|
+
a2 = atom_id_map.get(b["atom2"], 0)
|
|
424
|
+
order = b.get("order", 1)
|
|
425
|
+
|
|
426
|
+
attrs = [
|
|
427
|
+
f'id="{bid}"',
|
|
428
|
+
f'Z="{z}"',
|
|
429
|
+
f'B="{a1}"',
|
|
430
|
+
f'E="{a2}"',
|
|
431
|
+
]
|
|
432
|
+
if order == 2:
|
|
433
|
+
attrs.append('Order="2"')
|
|
434
|
+
elif order == 3:
|
|
435
|
+
attrs.append('Order="3"')
|
|
436
|
+
|
|
437
|
+
cfg = b.get("cfg", 0)
|
|
438
|
+
if cfg in BOND_STEREO_ATTR:
|
|
439
|
+
attrs.append(f'Display="{BOND_STEREO_ATTR[cfg]}"')
|
|
440
|
+
|
|
441
|
+
dp = b.get("double_pos")
|
|
442
|
+
if dp:
|
|
443
|
+
attrs.append(f'DoublePosition="{dp}"')
|
|
444
|
+
|
|
445
|
+
lines.append(f'<b {" ".join(attrs)}/>')
|
|
446
|
+
|
|
447
|
+
lines.append('</fragment>')
|
|
448
|
+
return "\n".join(lines), atom_id_map, frag_id
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
# ---------------------------------------------------------------------------
|
|
452
|
+
# Text builder
|
|
453
|
+
# ---------------------------------------------------------------------------
|
|
454
|
+
|
|
455
|
+
def _build_text_element(
|
|
456
|
+
text_lines: List[str],
|
|
457
|
+
x: float,
|
|
458
|
+
y: float,
|
|
459
|
+
ids: _IDGen,
|
|
460
|
+
justification: str = "Center",
|
|
461
|
+
use_formatting: bool = True,
|
|
462
|
+
font_size: Optional[float] = None,
|
|
463
|
+
) -> Tuple[str, int]:
|
|
464
|
+
"""
|
|
465
|
+
Build a standalone <t> element for condition text.
|
|
466
|
+
|
|
467
|
+
Parameters
|
|
468
|
+
----------
|
|
469
|
+
font_size : float, optional
|
|
470
|
+
Override font size (default: ACS_CAPTION_SIZE, typically 10pt).
|
|
471
|
+
|
|
472
|
+
Returns (xml_string, text_xml_id).
|
|
473
|
+
"""
|
|
474
|
+
size = float(font_size if font_size is not None else ACS_CAPTION_SIZE)
|
|
475
|
+
scale = size / float(ACS_CAPTION_SIZE)
|
|
476
|
+
char_w = _CHAR_WIDTH * scale
|
|
477
|
+
cap_h = _CAP_HEIGHT * scale
|
|
478
|
+
descent = _DESCENT * scale
|
|
479
|
+
line_adv = _LINE_ADVANCE * scale
|
|
480
|
+
|
|
481
|
+
tid = ids.next()
|
|
482
|
+
z = ids.next()
|
|
483
|
+
|
|
484
|
+
max_chars = max((len(ln) for ln in text_lines), default=1)
|
|
485
|
+
n = len(text_lines)
|
|
486
|
+
w = max_chars * char_w
|
|
487
|
+
|
|
488
|
+
bx1 = x - w / 2.0
|
|
489
|
+
by1 = y - cap_h
|
|
490
|
+
bx2 = x + w / 2.0
|
|
491
|
+
by2 = y + max(0, n - 1) * line_adv + descent
|
|
492
|
+
|
|
493
|
+
parts = [
|
|
494
|
+
f'<t id="{tid}" p="{x:.2f} {y:.2f}" '
|
|
495
|
+
f'BoundingBox="{bx1:.2f} {by1:.2f} {bx2:.2f} {by2:.2f}" '
|
|
496
|
+
f'Z="{z}" '
|
|
497
|
+
f'CaptionJustification="{justification}" '
|
|
498
|
+
f'Justification="{justification}" '
|
|
499
|
+
f'LineHeight="auto">'
|
|
500
|
+
]
|
|
501
|
+
|
|
502
|
+
if use_formatting:
|
|
503
|
+
# Use chemistry-aware text formatting for each line.
|
|
504
|
+
# ChemDraw requires \n INSIDE <s> text content for line breaks —
|
|
505
|
+
# newlines between XML elements are treated as whitespace.
|
|
506
|
+
formatted_runs = []
|
|
507
|
+
for i, line in enumerate(text_lines):
|
|
508
|
+
run = build_formatted_s_xml(
|
|
509
|
+
line,
|
|
510
|
+
font=ACS_LABEL_FONT,
|
|
511
|
+
size=size,
|
|
512
|
+
color="0",
|
|
513
|
+
)
|
|
514
|
+
if i < len(text_lines) - 1:
|
|
515
|
+
# Inject \n before the closing </s> of this line's last run
|
|
516
|
+
# so ChemDraw renders a line break
|
|
517
|
+
last_close = run.rfind("</s>")
|
|
518
|
+
if last_close >= 0:
|
|
519
|
+
run = run[:last_close] + "\n" + run[last_close:]
|
|
520
|
+
formatted_runs.append(run)
|
|
521
|
+
parts.append("".join(formatted_runs))
|
|
522
|
+
else:
|
|
523
|
+
text = "\n".join(xml_escape(ln) for ln in text_lines)
|
|
524
|
+
parts.append(
|
|
525
|
+
f'<s font="{ACS_LABEL_FONT}" size="{size}" '
|
|
526
|
+
f'color="0" face="{ACS_CAPTION_FACE}">{text}</s>'
|
|
527
|
+
)
|
|
528
|
+
|
|
529
|
+
parts.append("</t>")
|
|
530
|
+
return "\n".join(parts), tid
|
|
531
|
+
|
|
532
|
+
|
|
533
|
+
def _build_label_element(
|
|
534
|
+
label: str,
|
|
535
|
+
x: float,
|
|
536
|
+
y: float,
|
|
537
|
+
ids: _IDGen,
|
|
538
|
+
) -> Tuple[str, int]:
|
|
539
|
+
"""Build a compound number label (e.g. "1", "2") centered below a structure."""
|
|
540
|
+
return _build_text_element(
|
|
541
|
+
[label], x, y, ids, justification="Center", use_formatting=False,
|
|
542
|
+
)
|
|
543
|
+
|
|
544
|
+
|
|
545
|
+
# ---------------------------------------------------------------------------
|
|
546
|
+
# Arrow builder
|
|
547
|
+
# ---------------------------------------------------------------------------
|
|
548
|
+
|
|
549
|
+
def _build_arrow(
|
|
550
|
+
tail_x: float,
|
|
551
|
+
tail_y: float,
|
|
552
|
+
head_x: float,
|
|
553
|
+
head_y: float,
|
|
554
|
+
ids: _IDGen,
|
|
555
|
+
dashed: bool = False,
|
|
556
|
+
nogo: bool = False,
|
|
557
|
+
) -> Tuple[str, int]:
|
|
558
|
+
"""Build an <arrow> element. Returns (xml_string, arrow_xml_id).
|
|
559
|
+
|
|
560
|
+
Parameters
|
|
561
|
+
----------
|
|
562
|
+
nogo : bool
|
|
563
|
+
If True, adds ``NoGo="Cross"`` — ChemDraw's native failed-arrow
|
|
564
|
+
rendering (bold X through the arrow shaft).
|
|
565
|
+
"""
|
|
566
|
+
aid = ids.next()
|
|
567
|
+
z = ids.next()
|
|
568
|
+
|
|
569
|
+
bx1 = min(tail_x, head_x)
|
|
570
|
+
by1 = min(tail_y, head_y) - 4.0
|
|
571
|
+
bx2 = max(tail_x, head_x)
|
|
572
|
+
by2 = max(tail_y, head_y) + 4.0
|
|
573
|
+
|
|
574
|
+
cx3 = (tail_x + head_x) / 2.0
|
|
575
|
+
cy3 = tail_y + 100.0
|
|
576
|
+
|
|
577
|
+
attrs = [
|
|
578
|
+
f'id="{aid}"',
|
|
579
|
+
f'BoundingBox="{bx1:.2f} {by1:.2f} {bx2:.2f} {by2:.2f}"',
|
|
580
|
+
f'Z="{z}"',
|
|
581
|
+
f'FillType="None"',
|
|
582
|
+
f'ArrowheadHead="Full"',
|
|
583
|
+
f'ArrowheadType="Solid"',
|
|
584
|
+
f'HeadSize="1000"',
|
|
585
|
+
f'ArrowheadCenterSize="875"',
|
|
586
|
+
f'ArrowheadWidth="250"',
|
|
587
|
+
f'Head3D="{head_x:.2f} {head_y:.2f} 0"',
|
|
588
|
+
f'Tail3D="{tail_x:.2f} {tail_y:.2f} 0"',
|
|
589
|
+
f'Center3D="{cx3:.2f} {cy3:.2f} 0"',
|
|
590
|
+
f'MajorAxisEnd3D="{cx3 + 80:.2f} {cy3:.2f} 0"',
|
|
591
|
+
f'MinorAxisEnd3D="{cx3:.2f} {cy3 + 80:.2f} 0"',
|
|
592
|
+
]
|
|
593
|
+
|
|
594
|
+
if dashed:
|
|
595
|
+
attrs.append('LineType="Dashed"')
|
|
596
|
+
if nogo:
|
|
597
|
+
attrs.append('NoGo="Cross"')
|
|
598
|
+
|
|
599
|
+
xml = f'<arrow {" ".join(attrs)}/>'
|
|
600
|
+
return xml, aid
|
|
601
|
+
|
|
602
|
+
|
|
603
|
+
def _build_failed_x(
|
|
604
|
+
cx: float,
|
|
605
|
+
cy: float,
|
|
606
|
+
ids: _IDGen,
|
|
607
|
+
) -> str:
|
|
608
|
+
"""Build a bold 'X' text element centered on an arrow midpoint.
|
|
609
|
+
|
|
610
|
+
.. deprecated::
|
|
611
|
+
Use ``NoGo="Cross"`` on the ``<arrow>`` element instead.
|
|
612
|
+
This function is kept for backwards compatibility.
|
|
613
|
+
"""
|
|
614
|
+
tid = ids.next()
|
|
615
|
+
z = ids.next()
|
|
616
|
+
# Position slightly above arrow line so X sits centered on it
|
|
617
|
+
py = cy + 3.5 # baseline offset — text anchor is at baseline
|
|
618
|
+
return (
|
|
619
|
+
f'<t id="{tid}" p="{cx:.2f} {py:.2f}" Z="{z}" '
|
|
620
|
+
f'Justification="Center" InterpretChemically="no" '
|
|
621
|
+
f'CaptionJustification="Center">\n'
|
|
622
|
+
f'<s font="{ACS_LABEL_FONT}" size="12" color="0" face="1">X</s>\n'
|
|
623
|
+
f'</t>'
|
|
624
|
+
)
|
|
625
|
+
|
|
626
|
+
|
|
627
|
+
def _is_letter_condition(text_lines: List[str]) -> bool:
|
|
628
|
+
"""Check if below-arrow text is a letter condition like 'a' or 'b,c'."""
|
|
629
|
+
if len(text_lines) != 1:
|
|
630
|
+
return False
|
|
631
|
+
import re
|
|
632
|
+
return bool(re.match(r'^[a-z](,\s*[a-z])*$', text_lines[0].strip()))
|
|
633
|
+
|
|
634
|
+
|
|
635
|
+
def _build_letter_label(
|
|
636
|
+
letter: str,
|
|
637
|
+
cx: float,
|
|
638
|
+
y: float,
|
|
639
|
+
ids: _IDGen,
|
|
640
|
+
) -> Tuple[str, int]:
|
|
641
|
+
"""Build a small letter label centered above an arrow."""
|
|
642
|
+
tid = ids.next()
|
|
643
|
+
z = ids.next()
|
|
644
|
+
# Position slightly above arrow line
|
|
645
|
+
py = y - 6.0
|
|
646
|
+
xml = (
|
|
647
|
+
f'<t id="{tid}" p="{cx:.2f} {py:.2f}" Z="{z}" '
|
|
648
|
+
f'Justification="Center" InterpretChemically="no" '
|
|
649
|
+
f'CaptionJustification="Center">\n'
|
|
650
|
+
f'<s font="{ACS_LABEL_FONT}" size="8" color="0" face="1">'
|
|
651
|
+
f'{xml_escape(letter)}</s>\n'
|
|
652
|
+
f'</t>'
|
|
653
|
+
)
|
|
654
|
+
return xml, tid
|
|
655
|
+
|
|
656
|
+
|
|
657
|
+
def _build_condition_key(
|
|
658
|
+
condition_key: Dict[str, str],
|
|
659
|
+
left_x: float,
|
|
660
|
+
top_y: float,
|
|
661
|
+
ids: _IDGen,
|
|
662
|
+
) -> Tuple[str, float]:
|
|
663
|
+
"""Build the condition key block below the scheme.
|
|
664
|
+
|
|
665
|
+
Returns (xml_string, bottom_y).
|
|
666
|
+
"""
|
|
667
|
+
xml_parts: List[str] = []
|
|
668
|
+
y = top_y
|
|
669
|
+
for letter in sorted(condition_key.keys()):
|
|
670
|
+
text = condition_key[letter]
|
|
671
|
+
tid = ids.next()
|
|
672
|
+
z = ids.next()
|
|
673
|
+
# Format: "(a) conditions text" — letter in italic, rest in regular
|
|
674
|
+
# Use build_formatted_s_xml for chemistry-aware subscript formatting
|
|
675
|
+
letter_run = (
|
|
676
|
+
f'<s font="{ACS_LABEL_FONT}" size="{ACS_CAPTION_SIZE}" '
|
|
677
|
+
f'color="0" face="2">({xml_escape(letter)})</s>'
|
|
678
|
+
)
|
|
679
|
+
text_run = build_formatted_s_xml(
|
|
680
|
+
f" {text}",
|
|
681
|
+
font=ACS_LABEL_FONT,
|
|
682
|
+
size=ACS_CAPTION_SIZE,
|
|
683
|
+
color="0",
|
|
684
|
+
)
|
|
685
|
+
xml_parts.append(
|
|
686
|
+
f'<t id="{tid}" p="{left_x:.2f} {y:.2f}" Z="{z}" '
|
|
687
|
+
f'InterpretChemically="no">\n'
|
|
688
|
+
f'{letter_run}{text_run}\n'
|
|
689
|
+
f'</t>'
|
|
690
|
+
)
|
|
691
|
+
y += 14.0 # line spacing
|
|
692
|
+
return "\n".join(xml_parts), y
|
|
693
|
+
|
|
694
|
+
|
|
695
|
+
def _build_vertical_arrow(
|
|
696
|
+
x: float,
|
|
697
|
+
top_y: float,
|
|
698
|
+
bottom_y: float,
|
|
699
|
+
ids: _IDGen,
|
|
700
|
+
condition_lines: Optional[List[str]] = None,
|
|
701
|
+
condition_side: str = "right",
|
|
702
|
+
dashed: bool = False,
|
|
703
|
+
nogo: bool = False,
|
|
704
|
+
) -> Tuple[str, int]:
|
|
705
|
+
"""
|
|
706
|
+
Build a vertical down arrow with optional condition text beside it.
|
|
707
|
+
|
|
708
|
+
Parameters
|
|
709
|
+
----------
|
|
710
|
+
x : float
|
|
711
|
+
Horizontal position of the arrow.
|
|
712
|
+
top_y : float
|
|
713
|
+
Y coordinate of the arrow tail (top, in CDXML y-down coords).
|
|
714
|
+
bottom_y : float
|
|
715
|
+
Y coordinate of the arrow head (bottom).
|
|
716
|
+
condition_lines : list of str, optional
|
|
717
|
+
Condition text lines placed beside the arrow.
|
|
718
|
+
condition_side : "right" or "left"
|
|
719
|
+
Which side of the arrow to place condition text.
|
|
720
|
+
nogo : bool
|
|
721
|
+
If True, adds ``NoGo="Cross"`` for failed arrow.
|
|
722
|
+
|
|
723
|
+
Returns (xml_string, arrow_xml_id).
|
|
724
|
+
"""
|
|
725
|
+
arrow_xml, aid = _build_arrow(x, top_y, x, bottom_y, ids, dashed=dashed, nogo=nogo)
|
|
726
|
+
|
|
727
|
+
if not condition_lines:
|
|
728
|
+
return arrow_xml, aid
|
|
729
|
+
|
|
730
|
+
# Place condition text beside the arrow
|
|
731
|
+
mid_y = (top_y + bottom_y) / 2.0
|
|
732
|
+
text_offset = 12.0 # horizontal gap from arrow shaft
|
|
733
|
+
if condition_side == "right":
|
|
734
|
+
text_x = x + text_offset
|
|
735
|
+
justification = "Left"
|
|
736
|
+
else:
|
|
737
|
+
text_x = x - text_offset
|
|
738
|
+
justification = "Right"
|
|
739
|
+
|
|
740
|
+
# p.y is the first line baseline. To vertically center the visual
|
|
741
|
+
# text block on the arrow midpoint:
|
|
742
|
+
# visual_top = p.y - _CAP_HEIGHT
|
|
743
|
+
# visual_bottom = p.y + (n-1)*_LINE_ADVANCE + _DESCENT
|
|
744
|
+
# visual_center = visual_top + text_h/2 = mid_y
|
|
745
|
+
# → p.y = mid_y - text_h/2 + _CAP_HEIGHT
|
|
746
|
+
text_h = _estimate_text_height(condition_lines)
|
|
747
|
+
text_y = mid_y - text_h / 2.0 + _CAP_HEIGHT
|
|
748
|
+
|
|
749
|
+
txt_xml, _ = _build_text_element(
|
|
750
|
+
condition_lines, text_x, text_y, ids, justification=justification,
|
|
751
|
+
)
|
|
752
|
+
|
|
753
|
+
return arrow_xml + "\n" + txt_xml, aid
|
|
754
|
+
|
|
755
|
+
|
|
756
|
+
def _build_run_arrow(
|
|
757
|
+
tail_x: float,
|
|
758
|
+
head_x: float,
|
|
759
|
+
y: float,
|
|
760
|
+
input_label: str,
|
|
761
|
+
output_label: str,
|
|
762
|
+
ids: _IDGen,
|
|
763
|
+
) -> Tuple[str, List[int]]:
|
|
764
|
+
"""
|
|
765
|
+
Build a simple run arrow with input/output labels.
|
|
766
|
+
|
|
767
|
+
Returns (xml_string, [element_ids]).
|
|
768
|
+
"""
|
|
769
|
+
parts = []
|
|
770
|
+
all_ids = []
|
|
771
|
+
|
|
772
|
+
# Arrow
|
|
773
|
+
arrow_xml, arrow_id = _build_arrow(tail_x, y, head_x, y, ids)
|
|
774
|
+
parts.append(arrow_xml)
|
|
775
|
+
all_ids.append(arrow_id)
|
|
776
|
+
|
|
777
|
+
# Input label (left of arrow) — center text vertically on arrow.
|
|
778
|
+
# p.y is baseline; visual center = p.y - (_CAP_HEIGHT - _DESCENT)/2
|
|
779
|
+
# To center on arrow at y: p.y = y + (_CAP_HEIGHT - _DESCENT)/2
|
|
780
|
+
label_baseline_y = y + (_CAP_HEIGHT - _DESCENT) / 2.0
|
|
781
|
+
inp_xml, inp_id = _build_text_element(
|
|
782
|
+
[input_label], tail_x - 5.0, label_baseline_y, ids,
|
|
783
|
+
justification="Right", use_formatting=False,
|
|
784
|
+
)
|
|
785
|
+
parts.append(inp_xml)
|
|
786
|
+
all_ids.append(inp_id)
|
|
787
|
+
|
|
788
|
+
# Output label (right of arrow) — center text vertically on arrow
|
|
789
|
+
if output_label:
|
|
790
|
+
out_xml, out_id = _build_text_element(
|
|
791
|
+
[output_label], head_x + 5.0, label_baseline_y, ids,
|
|
792
|
+
justification="Left", use_formatting=False,
|
|
793
|
+
)
|
|
794
|
+
parts.append(out_xml)
|
|
795
|
+
all_ids.append(out_id)
|
|
796
|
+
|
|
797
|
+
return "\n".join(parts), all_ids
|
|
798
|
+
|
|
799
|
+
|
|
800
|
+
# ---------------------------------------------------------------------------
|
|
801
|
+
# Layout helpers
|
|
802
|
+
# ---------------------------------------------------------------------------
|
|
803
|
+
|
|
804
|
+
def _fragment_bbox(atoms: List[Dict]) -> Tuple[float, float, float, float]:
|
|
805
|
+
"""Compute (min_x, min_y, max_x, max_y) from atom positions.
|
|
806
|
+
|
|
807
|
+
Includes padding for bond lines and heteroatom labels that extend
|
|
808
|
+
beyond atom center positions (measured: +6.6pt width, +2.9pt height).
|
|
809
|
+
"""
|
|
810
|
+
xs = [a["x"] for a in atoms]
|
|
811
|
+
ys = [a["y"] for a in atoms]
|
|
812
|
+
return (
|
|
813
|
+
min(xs) - _FRAG_PAD_W,
|
|
814
|
+
min(ys) - _FRAG_PAD_H,
|
|
815
|
+
max(xs) + _FRAG_PAD_W,
|
|
816
|
+
max(ys) + _FRAG_PAD_H,
|
|
817
|
+
)
|
|
818
|
+
|
|
819
|
+
|
|
820
|
+
def _bbox_width(bbox: Tuple[float, float, float, float]) -> float:
|
|
821
|
+
return bbox[2] - bbox[0]
|
|
822
|
+
|
|
823
|
+
|
|
824
|
+
def _bbox_height(bbox: Tuple[float, float, float, float]) -> float:
|
|
825
|
+
return bbox[3] - bbox[1]
|
|
826
|
+
|
|
827
|
+
|
|
828
|
+
def _bbox_center(bbox: Tuple[float, float, float, float]) -> Tuple[float, float]:
|
|
829
|
+
return ((bbox[0] + bbox[2]) / 2.0, (bbox[1] + bbox[3]) / 2.0)
|
|
830
|
+
|
|
831
|
+
|
|
832
|
+
def _shift_atoms(atoms: List[Dict], dx: float, dy: float) -> None:
|
|
833
|
+
"""Translate all atoms in place."""
|
|
834
|
+
for a in atoms:
|
|
835
|
+
a["x"] += dx
|
|
836
|
+
a["y"] += dy
|
|
837
|
+
|
|
838
|
+
|
|
839
|
+
def _estimate_text_width(lines: List[str]) -> float:
|
|
840
|
+
"""Estimate text block width in CDXML points."""
|
|
841
|
+
if not lines:
|
|
842
|
+
return 0
|
|
843
|
+
return max(len(ln) for ln in lines) * _CHAR_WIDTH
|
|
844
|
+
|
|
845
|
+
|
|
846
|
+
def _estimate_text_height(lines: List[str]) -> float:
|
|
847
|
+
"""Estimate visual text block height (cap-height + line advances + descent)."""
|
|
848
|
+
n = len(lines)
|
|
849
|
+
if n == 0:
|
|
850
|
+
return 0.0
|
|
851
|
+
return _CAP_HEIGHT + max(0, n - 1) * _LINE_ADVANCE + _DESCENT
|
|
852
|
+
|
|
853
|
+
|
|
854
|
+
# ---------------------------------------------------------------------------
|
|
855
|
+
# Structure resolver
|
|
856
|
+
# ---------------------------------------------------------------------------
|
|
857
|
+
|
|
858
|
+
def _resolve_structure(
|
|
859
|
+
ref: StructureRef,
|
|
860
|
+
center_x: float = 200.0,
|
|
861
|
+
center_y: float = 300.0,
|
|
862
|
+
source_data: Optional[Dict[str, Dict]] = None,
|
|
863
|
+
reference_mol: Optional[Any] = None,
|
|
864
|
+
) -> ResolvedFragment:
|
|
865
|
+
"""
|
|
866
|
+
Resolve a StructureRef to atoms/bonds in CDXML coordinates.
|
|
867
|
+
|
|
868
|
+
Resolution order (three tiers):
|
|
869
|
+
1. Source JSON — species ID or shorthand lookup
|
|
870
|
+
2. Declared SMILES — from StructureRef.smiles
|
|
871
|
+
3. Name resolution — reagent_db → PubChem cascade
|
|
872
|
+
|
|
873
|
+
Parameters
|
|
874
|
+
----------
|
|
875
|
+
reference_mol : RDKit Mol, optional
|
|
876
|
+
When provided, the structure is aligned to this reference via MCS.
|
|
877
|
+
This orients shared scaffolds to match the reference (product) layout.
|
|
878
|
+
"""
|
|
879
|
+
smiles = ref.smiles
|
|
880
|
+
label = ref.label
|
|
881
|
+
|
|
882
|
+
# Tier 1: Source JSON lookup (by ref.id)
|
|
883
|
+
if source_data and not smiles:
|
|
884
|
+
# Try exact id, then lowercase
|
|
885
|
+
sp = source_data.get(ref.id) or source_data.get(ref.id.lower())
|
|
886
|
+
if sp:
|
|
887
|
+
smiles = sp.get("smiles")
|
|
888
|
+
# JSON label overrides ref label when ref has none
|
|
889
|
+
if label is None and sp.get("label"):
|
|
890
|
+
label = sp["label"]
|
|
891
|
+
|
|
892
|
+
# Tier 2: Declared SMILES
|
|
893
|
+
if smiles:
|
|
894
|
+
# Try MCS alignment to reference if available
|
|
895
|
+
if reference_mol is not None:
|
|
896
|
+
try:
|
|
897
|
+
from rdkit import Chem
|
|
898
|
+
target_mol = Chem.MolFromSmiles(smiles)
|
|
899
|
+
if target_mol is not None:
|
|
900
|
+
aligned = _align_mol_to_reference(
|
|
901
|
+
target_mol, reference_mol, center_x, center_y,
|
|
902
|
+
)
|
|
903
|
+
if aligned is not None:
|
|
904
|
+
atoms, bonds = aligned
|
|
905
|
+
bbox = _fragment_bbox(atoms)
|
|
906
|
+
cx, cy = _bbox_center(bbox)
|
|
907
|
+
resolved_ref = StructureRef(
|
|
908
|
+
id=ref.id, smiles=smiles, name=ref.name, file=ref.file,
|
|
909
|
+
cdxml_id=ref.cdxml_id, label=label,
|
|
910
|
+
)
|
|
911
|
+
return ResolvedFragment(
|
|
912
|
+
ref=resolved_ref, atoms=atoms, bonds=bonds,
|
|
913
|
+
bbox=bbox, cx=cx, cy=cy,
|
|
914
|
+
)
|
|
915
|
+
except Exception:
|
|
916
|
+
pass # Fall through to standard coords
|
|
917
|
+
|
|
918
|
+
result = _smiles_to_fragment_data(smiles, center_x, center_y)
|
|
919
|
+
if result is None:
|
|
920
|
+
raise ValueError(f"Failed to generate 2D coords for '{ref.id}' (SMILES: {smiles})")
|
|
921
|
+
atoms, bonds = result
|
|
922
|
+
bbox = _fragment_bbox(atoms)
|
|
923
|
+
cx, cy = _bbox_center(bbox)
|
|
924
|
+
resolved_ref = StructureRef(
|
|
925
|
+
id=ref.id, smiles=smiles, name=ref.name, file=ref.file,
|
|
926
|
+
cdxml_id=ref.cdxml_id, label=label,
|
|
927
|
+
)
|
|
928
|
+
return ResolvedFragment(
|
|
929
|
+
ref=resolved_ref, atoms=atoms, bonds=bonds, bbox=bbox, cx=cx, cy=cy,
|
|
930
|
+
)
|
|
931
|
+
|
|
932
|
+
# Tier 3: Name resolution (reagent_db cascade)
|
|
933
|
+
name = ref.name
|
|
934
|
+
# If source_data had a species with a name, use it for resolution
|
|
935
|
+
if not name and source_data:
|
|
936
|
+
sp = source_data.get(ref.id) or source_data.get(ref.id.lower())
|
|
937
|
+
if sp:
|
|
938
|
+
name = sp.get("name")
|
|
939
|
+
|
|
940
|
+
if name:
|
|
941
|
+
try:
|
|
942
|
+
from ..reagent_db import get_reagent_db
|
|
943
|
+
db = get_reagent_db()
|
|
944
|
+
entry = db.entry_for_name(name.lower())
|
|
945
|
+
if entry and entry.get("smiles"):
|
|
946
|
+
smi = entry["smiles"]
|
|
947
|
+
if isinstance(smi, list):
|
|
948
|
+
smi = smi[0]
|
|
949
|
+
ref_copy = StructureRef(
|
|
950
|
+
id=ref.id, smiles=smi, label=label,
|
|
951
|
+
)
|
|
952
|
+
return _resolve_structure(
|
|
953
|
+
ref_copy, center_x, center_y,
|
|
954
|
+
reference_mol=reference_mol,
|
|
955
|
+
)
|
|
956
|
+
except Exception:
|
|
957
|
+
pass
|
|
958
|
+
raise ValueError(
|
|
959
|
+
f"Cannot resolve structure '{ref.id}' by name '{name}'. "
|
|
960
|
+
f"Provide a SMILES string instead."
|
|
961
|
+
)
|
|
962
|
+
|
|
963
|
+
if ref.file:
|
|
964
|
+
raise NotImplementedError(
|
|
965
|
+
f"File-based structure loading not yet implemented for '{ref.id}'"
|
|
966
|
+
)
|
|
967
|
+
|
|
968
|
+
# Tier 4: Try the ID itself as a compound name in reagent_db
|
|
969
|
+
try:
|
|
970
|
+
from ..reagent_db import get_reagent_db
|
|
971
|
+
db = get_reagent_db()
|
|
972
|
+
entry = db.entry_for_name(ref.id.lower())
|
|
973
|
+
if entry and entry.get("smiles"):
|
|
974
|
+
smi = entry["smiles"]
|
|
975
|
+
if isinstance(smi, list):
|
|
976
|
+
smi = smi[0]
|
|
977
|
+
ref_copy = StructureRef(
|
|
978
|
+
id=ref.id, smiles=smi, label=label,
|
|
979
|
+
)
|
|
980
|
+
return _resolve_structure(
|
|
981
|
+
ref_copy, center_x, center_y,
|
|
982
|
+
reference_mol=reference_mol,
|
|
983
|
+
)
|
|
984
|
+
except ImportError:
|
|
985
|
+
pass
|
|
986
|
+
|
|
987
|
+
raise ValueError(
|
|
988
|
+
f"Structure '{ref.id}' has no smiles, name, or file — cannot resolve. "
|
|
989
|
+
f"Provide explicit smiles: or name: in the structures block."
|
|
990
|
+
)
|
|
991
|
+
|
|
992
|
+
|
|
993
|
+
# ---------------------------------------------------------------------------
|
|
994
|
+
# Layout engine: linear (single step)
|
|
995
|
+
# ---------------------------------------------------------------------------
|
|
996
|
+
|
|
997
|
+
def _layout_linear(
|
|
998
|
+
scheme: SchemeDescriptor,
|
|
999
|
+
ids: _IDGen,
|
|
1000
|
+
source_data: Optional[Dict[str, Dict]] = None,
|
|
1001
|
+
reference_mol: Optional[Any] = None,
|
|
1002
|
+
) -> Tuple[str, float]:
|
|
1003
|
+
"""
|
|
1004
|
+
Layout a single-step scheme: substrates → products.
|
|
1005
|
+
|
|
1006
|
+
Returns (inner_xml, lowest_y).
|
|
1007
|
+
"""
|
|
1008
|
+
xml, lowest_y, _, _ = _layout_steps_row(scheme, scheme.steps, ids, start_x=100.0,
|
|
1009
|
+
arrow_y=300.0, source_data=source_data,
|
|
1010
|
+
reference_mol=reference_mol)
|
|
1011
|
+
return xml, lowest_y
|
|
1012
|
+
|
|
1013
|
+
|
|
1014
|
+
# ---------------------------------------------------------------------------
|
|
1015
|
+
# Layout engine: sequential (multi-step, with wrap:repeat support)
|
|
1016
|
+
# ---------------------------------------------------------------------------
|
|
1017
|
+
|
|
1018
|
+
def _split_into_rows(
|
|
1019
|
+
steps: List[StepDescriptor],
|
|
1020
|
+
steps_per_row: Optional[int],
|
|
1021
|
+
) -> List[List[StepDescriptor]]:
|
|
1022
|
+
"""Split steps into row groups for wrap:repeat."""
|
|
1023
|
+
if steps_per_row is None or steps_per_row >= len(steps):
|
|
1024
|
+
return [steps]
|
|
1025
|
+
rows = []
|
|
1026
|
+
for i in range(0, len(steps), steps_per_row):
|
|
1027
|
+
rows.append(steps[i:i + steps_per_row])
|
|
1028
|
+
return rows
|
|
1029
|
+
|
|
1030
|
+
|
|
1031
|
+
def _split_serpentine_rows(
|
|
1032
|
+
steps: List[StepDescriptor],
|
|
1033
|
+
steps_per_row: int,
|
|
1034
|
+
) -> Tuple[List[List[StepDescriptor]], List[Optional[StepDescriptor]]]:
|
|
1035
|
+
"""
|
|
1036
|
+
Split steps into horizontal rows + transition steps for serpentine layout.
|
|
1037
|
+
|
|
1038
|
+
Returns (rows, transitions) where:
|
|
1039
|
+
- rows[i] = list of steps rendered horizontally in row i
|
|
1040
|
+
- transitions[i] = the step between row i and row i+1 (rendered as
|
|
1041
|
+
a vertical arrow), or None if row i is the last row.
|
|
1042
|
+
"""
|
|
1043
|
+
rows: List[List[StepDescriptor]] = []
|
|
1044
|
+
transitions: List[Optional[StepDescriptor]] = []
|
|
1045
|
+
i = 0
|
|
1046
|
+
while i < len(steps):
|
|
1047
|
+
row = steps[i:i + steps_per_row]
|
|
1048
|
+
rows.append(row)
|
|
1049
|
+
i += steps_per_row
|
|
1050
|
+
if i < len(steps):
|
|
1051
|
+
# The next step becomes the vertical transition arrow
|
|
1052
|
+
transitions.append(steps[i])
|
|
1053
|
+
i += 1
|
|
1054
|
+
else:
|
|
1055
|
+
transitions.append(None)
|
|
1056
|
+
return rows, transitions
|
|
1057
|
+
|
|
1058
|
+
|
|
1059
|
+
def _layout_sequential(
|
|
1060
|
+
scheme: SchemeDescriptor,
|
|
1061
|
+
ids: _IDGen,
|
|
1062
|
+
source_data: Optional[Dict[str, Dict]] = None,
|
|
1063
|
+
reference_mol: Optional[Any] = None,
|
|
1064
|
+
) -> Tuple[str, float]:
|
|
1065
|
+
"""
|
|
1066
|
+
Layout a multi-step sequential scheme.
|
|
1067
|
+
|
|
1068
|
+
Supports wrap:repeat (multi-row L->R with repeated structures),
|
|
1069
|
+
wrap:serpentine (zigzag with vertical arrows between rows), and
|
|
1070
|
+
single-row (no wrapping).
|
|
1071
|
+
|
|
1072
|
+
Returns (inner_xml, lowest_y).
|
|
1073
|
+
"""
|
|
1074
|
+
wrap = scheme.wrap
|
|
1075
|
+
steps_per_row = scheme.steps_per_row
|
|
1076
|
+
|
|
1077
|
+
# Single row: no wrapping needed
|
|
1078
|
+
if (wrap not in ("repeat", "serpentine")
|
|
1079
|
+
or steps_per_row is None
|
|
1080
|
+
or steps_per_row >= len(scheme.steps)):
|
|
1081
|
+
xml, lowest_y, _, _ = _layout_steps_row(
|
|
1082
|
+
scheme, scheme.steps, ids, start_x=80.0, arrow_y=300.0,
|
|
1083
|
+
source_data=source_data, reference_mol=reference_mol)
|
|
1084
|
+
return xml, lowest_y
|
|
1085
|
+
|
|
1086
|
+
if wrap == "serpentine":
|
|
1087
|
+
return _layout_serpentine(scheme, ids, source_data=source_data,
|
|
1088
|
+
reference_mol=reference_mol)
|
|
1089
|
+
|
|
1090
|
+
# --- Multi-row wrap:repeat ---
|
|
1091
|
+
rows = _split_into_rows(scheme.steps, steps_per_row)
|
|
1092
|
+
|
|
1093
|
+
# Assign run arrows to rows with step numbers adjusted to be 1-indexed
|
|
1094
|
+
# within each row. Step N (1-indexed overall) -> row (N-1)//spr,
|
|
1095
|
+
# adjusted step = N - row_idx * spr.
|
|
1096
|
+
row_run_arrows: Dict[int, List[StepRunArrows]] = {}
|
|
1097
|
+
for sra in scheme.run_arrows:
|
|
1098
|
+
row_idx = (sra.step - 1) // steps_per_row
|
|
1099
|
+
adjusted_step = sra.step - row_idx * steps_per_row
|
|
1100
|
+
row_run_arrows.setdefault(row_idx, []).append(
|
|
1101
|
+
StepRunArrows(step=adjusted_step, runs=sra.runs))
|
|
1102
|
+
|
|
1103
|
+
xml_parts: List[str] = []
|
|
1104
|
+
arrow_y = 300.0
|
|
1105
|
+
lowest_y = arrow_y
|
|
1106
|
+
|
|
1107
|
+
for row_idx, row_steps in enumerate(rows):
|
|
1108
|
+
row_ra = row_run_arrows.get(row_idx, [])
|
|
1109
|
+
|
|
1110
|
+
row_xml, lowest_y, _, _ = _layout_steps_row(
|
|
1111
|
+
scheme, row_steps, ids,
|
|
1112
|
+
start_x=80.0, arrow_y=arrow_y,
|
|
1113
|
+
source_data=source_data,
|
|
1114
|
+
run_arrows=row_ra,
|
|
1115
|
+
reference_mol=reference_mol,
|
|
1116
|
+
)
|
|
1117
|
+
xml_parts.append(row_xml)
|
|
1118
|
+
|
|
1119
|
+
# Next row starts below this row's lowest point
|
|
1120
|
+
arrow_y = lowest_y + ROW_GAP
|
|
1121
|
+
|
|
1122
|
+
return "\n".join(xml_parts), lowest_y
|
|
1123
|
+
|
|
1124
|
+
|
|
1125
|
+
def _layout_serpentine(
|
|
1126
|
+
scheme: SchemeDescriptor,
|
|
1127
|
+
ids: _IDGen,
|
|
1128
|
+
source_data: Optional[Dict[str, Dict]] = None,
|
|
1129
|
+
reference_mol: Optional[Any] = None,
|
|
1130
|
+
) -> Tuple[str, float]:
|
|
1131
|
+
"""
|
|
1132
|
+
Layout a serpentine (zigzag) multi-row scheme.
|
|
1133
|
+
|
|
1134
|
+
Row 1 flows L→R, vertical down arrow at right edge, Row 2 flows R→L,
|
|
1135
|
+
vertical down arrow at left edge, Row 3 flows L→R, etc.
|
|
1136
|
+
|
|
1137
|
+
Transition steps between rows are rendered as vertical arrows with
|
|
1138
|
+
their conditions placed beside the arrow.
|
|
1139
|
+
|
|
1140
|
+
Returns (inner_xml, lowest_y).
|
|
1141
|
+
"""
|
|
1142
|
+
steps_per_row = scheme.steps_per_row
|
|
1143
|
+
left_margin = 80.0
|
|
1144
|
+
|
|
1145
|
+
rows, transitions = _split_serpentine_rows(scheme.steps, steps_per_row)
|
|
1146
|
+
|
|
1147
|
+
bond_len = ACS_BOND_LENGTH
|
|
1148
|
+
frag_gap = bond_len * LAYOUT_FRAG_GAP_BONDS
|
|
1149
|
+
inter_gap = bond_len * LAYOUT_INTER_GAP_BONDS
|
|
1150
|
+
|
|
1151
|
+
xml_parts: List[str] = []
|
|
1152
|
+
arrow_y = 300.0
|
|
1153
|
+
right_edge = 0.0 # will be set from first row's width
|
|
1154
|
+
last_product_frag_id: Optional[int] = None
|
|
1155
|
+
last_product_cursor_x: Optional[float] = None # cursor pos after pre-placed substrate
|
|
1156
|
+
|
|
1157
|
+
def _get_ref(sid: str) -> StructureRef:
|
|
1158
|
+
if sid in scheme.structures:
|
|
1159
|
+
return scheme.structures[sid]
|
|
1160
|
+
return StructureRef(id=sid)
|
|
1161
|
+
|
|
1162
|
+
for row_idx, row_steps in enumerate(rows):
|
|
1163
|
+
direction = "ltr" if row_idx % 2 == 0 else "rtl"
|
|
1164
|
+
|
|
1165
|
+
if direction == "ltr":
|
|
1166
|
+
start_x = left_margin
|
|
1167
|
+
else:
|
|
1168
|
+
start_x = right_edge
|
|
1169
|
+
|
|
1170
|
+
# For rows after the first, the first substrate was placed by
|
|
1171
|
+
# the transition step's product. Pass skip_first_substrate_id
|
|
1172
|
+
# so _layout_steps_row doesn't redraw it.
|
|
1173
|
+
skip_id = last_product_frag_id if row_idx > 0 else None
|
|
1174
|
+
skip_cursor = last_product_cursor_x if row_idx > 0 else None
|
|
1175
|
+
|
|
1176
|
+
row_xml, lowest_y, row_edge, row_info = _layout_steps_row(
|
|
1177
|
+
scheme, row_steps, ids,
|
|
1178
|
+
start_x=start_x, arrow_y=arrow_y,
|
|
1179
|
+
source_data=source_data,
|
|
1180
|
+
direction=direction,
|
|
1181
|
+
skip_first_substrate_id=skip_id,
|
|
1182
|
+
skip_first_substrate_cursor_x=skip_cursor,
|
|
1183
|
+
reference_mol=reference_mol,
|
|
1184
|
+
)
|
|
1185
|
+
xml_parts.append(row_xml)
|
|
1186
|
+
|
|
1187
|
+
# After row 1, record the right edge for RTL alignment
|
|
1188
|
+
if row_idx == 0:
|
|
1189
|
+
right_edge = row_edge
|
|
1190
|
+
|
|
1191
|
+
# --- Transition: vertical arrow + product of transition step ---
|
|
1192
|
+
transition = transitions[row_idx]
|
|
1193
|
+
if transition is not None:
|
|
1194
|
+
# Vertical arrow position: below the CENTER of the last product
|
|
1195
|
+
# in the current row (not at the row edge).
|
|
1196
|
+
if direction == "ltr":
|
|
1197
|
+
vert_x = row_info["last_product_cx"]
|
|
1198
|
+
else:
|
|
1199
|
+
vert_x = row_info["last_product_cx"]
|
|
1200
|
+
|
|
1201
|
+
# Vertical arrow geometry:
|
|
1202
|
+
# - tail starts BELOW the last product (below its compound label)
|
|
1203
|
+
# - head ends with enough room for the next row's product
|
|
1204
|
+
last_prod_bottom = row_info["last_product_bottom"]
|
|
1205
|
+
vert_top_y = last_prod_bottom + 8.0 # below the last product + label
|
|
1206
|
+
vert_bottom_y = vert_top_y + ROW_GAP # adequate vertical span
|
|
1207
|
+
|
|
1208
|
+
# Condition text from the transition step
|
|
1209
|
+
cond_lines: List[str] = []
|
|
1210
|
+
if transition.below_arrow and transition.below_arrow.text:
|
|
1211
|
+
cond_lines = transition.below_arrow.text[:]
|
|
1212
|
+
if transition.above_arrow and transition.above_arrow.text:
|
|
1213
|
+
cond_lines = transition.above_arrow.text + cond_lines
|
|
1214
|
+
if transition.yield_:
|
|
1215
|
+
cond_lines.append(transition.yield_)
|
|
1216
|
+
|
|
1217
|
+
# Condition text placement: right side for right-edge arrows,
|
|
1218
|
+
# left side for left-edge arrows
|
|
1219
|
+
if direction == "ltr":
|
|
1220
|
+
cond_side = "right"
|
|
1221
|
+
else:
|
|
1222
|
+
cond_side = "left"
|
|
1223
|
+
|
|
1224
|
+
vert_xml, _ = _build_vertical_arrow(
|
|
1225
|
+
vert_x, vert_top_y, vert_bottom_y, ids,
|
|
1226
|
+
condition_lines=cond_lines if cond_lines else None,
|
|
1227
|
+
condition_side=cond_side,
|
|
1228
|
+
)
|
|
1229
|
+
xml_parts.append(vert_xml)
|
|
1230
|
+
|
|
1231
|
+
# Place the transition step's product below the vertical
|
|
1232
|
+
# arrow. This becomes the first substrate of the next row.
|
|
1233
|
+
prod_ref = _get_ref(transition.products[0])
|
|
1234
|
+
prod_frag = _resolve_structure(
|
|
1235
|
+
prod_ref,
|
|
1236
|
+
center_x=vert_x,
|
|
1237
|
+
center_y=vert_bottom_y + 25.0,
|
|
1238
|
+
source_data=source_data,
|
|
1239
|
+
reference_mol=reference_mol,
|
|
1240
|
+
)
|
|
1241
|
+
# Shift so product top is below arrowhead with clearance
|
|
1242
|
+
prod_h = _bbox_height(prod_frag.bbox)
|
|
1243
|
+
target_cy = vert_bottom_y + frag_gap * 1.0 + prod_h / 2.0
|
|
1244
|
+
dx = vert_x - prod_frag.cx
|
|
1245
|
+
dy = target_cy - prod_frag.cy
|
|
1246
|
+
_shift_atoms(prod_frag.atoms, dx, dy)
|
|
1247
|
+
prod_frag.bbox = _fragment_bbox(prod_frag.atoms)
|
|
1248
|
+
prod_frag.cx, prod_frag.cy = _bbox_center(prod_frag.bbox)
|
|
1249
|
+
|
|
1250
|
+
frag_xml, _, frag_id = _build_fragment(
|
|
1251
|
+
prod_frag.atoms, prod_frag.bonds, ids,
|
|
1252
|
+
)
|
|
1253
|
+
xml_parts.append(frag_xml)
|
|
1254
|
+
|
|
1255
|
+
# Compound label
|
|
1256
|
+
if prod_frag.ref.label:
|
|
1257
|
+
lbl_xml, _ = _build_label_element(
|
|
1258
|
+
prod_frag.ref.label, prod_frag.cx,
|
|
1259
|
+
prod_frag.bbox[3] + 14.0, ids,
|
|
1260
|
+
)
|
|
1261
|
+
xml_parts.append(lbl_xml)
|
|
1262
|
+
|
|
1263
|
+
last_product_frag_id = frag_id
|
|
1264
|
+
|
|
1265
|
+
# Compute cursor position for the next row's first step
|
|
1266
|
+
# (after the pre-placed substrate).
|
|
1267
|
+
# Next row direction is opposite of current.
|
|
1268
|
+
next_direction = "rtl" if direction == "ltr" else "ltr"
|
|
1269
|
+
if next_direction == "rtl":
|
|
1270
|
+
# RTL: cursor is at the LEFT edge of the substrate
|
|
1271
|
+
last_product_cursor_x = prod_frag.bbox[0] - inter_gap
|
|
1272
|
+
else:
|
|
1273
|
+
# LTR: cursor is at the RIGHT edge of the substrate
|
|
1274
|
+
last_product_cursor_x = prod_frag.bbox[2] + inter_gap
|
|
1275
|
+
|
|
1276
|
+
# Next row's arrow_y = below the transition product
|
|
1277
|
+
arrow_y = prod_frag.cy
|
|
1278
|
+
else:
|
|
1279
|
+
last_product_frag_id = None
|
|
1280
|
+
last_product_cursor_x = None
|
|
1281
|
+
|
|
1282
|
+
return "\n".join(xml_parts), lowest_y
|
|
1283
|
+
|
|
1284
|
+
|
|
1285
|
+
# ---------------------------------------------------------------------------
|
|
1286
|
+
# Layout engine: divergent (one SM → multiple products via vertical branching)
|
|
1287
|
+
# ---------------------------------------------------------------------------
|
|
1288
|
+
|
|
1289
|
+
# Gap between the bottom of one branch and the top of the next.
|
|
1290
|
+
_DIVERGENT_BRANCH_GAP = 2.5 * ACS_BOND_LENGTH
|
|
1291
|
+
|
|
1292
|
+
|
|
1293
|
+
def _layout_divergent(
|
|
1294
|
+
scheme: SchemeDescriptor,
|
|
1295
|
+
ids: _IDGen,
|
|
1296
|
+
source_data: Optional[Dict[str, Dict]] = None,
|
|
1297
|
+
reference_mol: Optional[Any] = None,
|
|
1298
|
+
) -> Tuple[str, float]:
|
|
1299
|
+
"""
|
|
1300
|
+
Layout a divergent scheme: one SM gives multiple products.
|
|
1301
|
+
|
|
1302
|
+
Detects the shared substrate (appears in multiple steps) and renders it
|
|
1303
|
+
once on the left, with the first step going horizontally and subsequent
|
|
1304
|
+
steps branching downward with vertical arrows.
|
|
1305
|
+
|
|
1306
|
+
Returns (inner_xml, lowest_y).
|
|
1307
|
+
"""
|
|
1308
|
+
steps = scheme.steps
|
|
1309
|
+
if not steps:
|
|
1310
|
+
return "", 300.0
|
|
1311
|
+
|
|
1312
|
+
# --- Identify shared substrate ---
|
|
1313
|
+
# Count how many steps each substrate ID appears in
|
|
1314
|
+
from collections import Counter
|
|
1315
|
+
sub_counts: Counter = Counter()
|
|
1316
|
+
for step in steps:
|
|
1317
|
+
for sid in step.substrates:
|
|
1318
|
+
sub_counts[sid] += 1
|
|
1319
|
+
|
|
1320
|
+
# The shared substrate is the one appearing in the most steps
|
|
1321
|
+
shared_id = sub_counts.most_common(1)[0][0] if sub_counts else None
|
|
1322
|
+
|
|
1323
|
+
# If no substrate appears more than once, just stack steps vertically
|
|
1324
|
+
if shared_id is None or sub_counts[shared_id] < 2:
|
|
1325
|
+
# Fall back to stacking as independent rows
|
|
1326
|
+
return _layout_divergent_stacked(scheme, ids, source_data,
|
|
1327
|
+
reference_mol=reference_mol)
|
|
1328
|
+
|
|
1329
|
+
# Separate steps: those sharing the substrate vs others
|
|
1330
|
+
shared_steps = [s for s in steps if shared_id in s.substrates]
|
|
1331
|
+
other_steps = [s for s in steps if shared_id not in s.substrates]
|
|
1332
|
+
|
|
1333
|
+
# --- Resolve shared substrate once ---
|
|
1334
|
+
def _get_ref(sid: str) -> StructureRef:
|
|
1335
|
+
if sid in scheme.structures:
|
|
1336
|
+
return scheme.structures[sid]
|
|
1337
|
+
return StructureRef(id=sid)
|
|
1338
|
+
|
|
1339
|
+
bond_len = ACS_BOND_LENGTH
|
|
1340
|
+
frag_gap = bond_len * LAYOUT_FRAG_GAP_BONDS
|
|
1341
|
+
inter_gap = bond_len * LAYOUT_INTER_GAP_BONDS
|
|
1342
|
+
min_arrow_len = 5.0 * bond_len
|
|
1343
|
+
|
|
1344
|
+
start_x = 100.0
|
|
1345
|
+
arrow_y = 300.0
|
|
1346
|
+
|
|
1347
|
+
sm_ref = _get_ref(shared_id)
|
|
1348
|
+
sm_frag = _resolve_structure(sm_ref, center_x=200.0, center_y=arrow_y,
|
|
1349
|
+
source_data=source_data,
|
|
1350
|
+
reference_mol=reference_mol)
|
|
1351
|
+
# Position SM at start
|
|
1352
|
+
sm_w = _bbox_width(sm_frag.bbox)
|
|
1353
|
+
cx_target = start_x + sm_w / 2.0
|
|
1354
|
+
dx = cx_target - sm_frag.cx
|
|
1355
|
+
dy = arrow_y - sm_frag.cy
|
|
1356
|
+
_shift_atoms(sm_frag.atoms, dx, dy)
|
|
1357
|
+
sm_frag.bbox = _fragment_bbox(sm_frag.atoms)
|
|
1358
|
+
sm_frag.cx, sm_frag.cy = _bbox_center(sm_frag.bbox)
|
|
1359
|
+
|
|
1360
|
+
xml_parts: List[str] = []
|
|
1361
|
+
all_frag_ids: List[int] = []
|
|
1362
|
+
step_metadata: List[Dict] = []
|
|
1363
|
+
|
|
1364
|
+
# Build SM fragment
|
|
1365
|
+
sm_xml, _, sm_frag_id = _build_fragment(sm_frag.atoms, sm_frag.bonds, ids)
|
|
1366
|
+
xml_parts.append(sm_xml)
|
|
1367
|
+
all_frag_ids.append(sm_frag_id)
|
|
1368
|
+
|
|
1369
|
+
# SM label
|
|
1370
|
+
if sm_frag.ref.label:
|
|
1371
|
+
lbl_xml, _ = _build_label_element(
|
|
1372
|
+
sm_frag.ref.label, sm_frag.cx, sm_frag.bbox[3] + 14.0, ids)
|
|
1373
|
+
xml_parts.append(lbl_xml)
|
|
1374
|
+
|
|
1375
|
+
sm_right_x = sm_frag.bbox[2]
|
|
1376
|
+
lowest_y = arrow_y + 60.0
|
|
1377
|
+
|
|
1378
|
+
# --- First step: horizontal (same Y as SM) ---
|
|
1379
|
+
first_step = shared_steps[0]
|
|
1380
|
+
branch_y = arrow_y
|
|
1381
|
+
|
|
1382
|
+
for branch_idx, step in enumerate(shared_steps):
|
|
1383
|
+
step_meta: Dict[str, Any] = {
|
|
1384
|
+
"reactant_ids": [sm_frag_id],
|
|
1385
|
+
"product_ids": [],
|
|
1386
|
+
"arrow_id": 0,
|
|
1387
|
+
"above_ids": [],
|
|
1388
|
+
"below_ids": [],
|
|
1389
|
+
}
|
|
1390
|
+
|
|
1391
|
+
if branch_idx == 0:
|
|
1392
|
+
# Horizontal arrow from SM
|
|
1393
|
+
arrow_tail_x = sm_right_x + frag_gap * 0.3
|
|
1394
|
+
|
|
1395
|
+
# Compute arrow length from content
|
|
1396
|
+
below_text = list(step.below_arrow.text) if step.below_arrow else []
|
|
1397
|
+
above_text = list(step.above_arrow.text) if step.above_arrow else []
|
|
1398
|
+
if step.yield_:
|
|
1399
|
+
below_text.append(step.yield_)
|
|
1400
|
+
content_w = max(
|
|
1401
|
+
_estimate_text_width(below_text),
|
|
1402
|
+
_estimate_text_width(above_text),
|
|
1403
|
+
0,
|
|
1404
|
+
)
|
|
1405
|
+
arrow_len = max(content_w + 10.0, min_arrow_len)
|
|
1406
|
+
arrow_head_x = arrow_tail_x + arrow_len
|
|
1407
|
+
arrow_mid_x = (arrow_tail_x + arrow_head_x) / 2.0
|
|
1408
|
+
|
|
1409
|
+
dashed = (step.arrow_style == "dashed")
|
|
1410
|
+
failed = (step.arrow_style == "failed")
|
|
1411
|
+
arrow_xml, arrow_id = _build_arrow(
|
|
1412
|
+
arrow_tail_x, branch_y, arrow_head_x, branch_y, ids,
|
|
1413
|
+
dashed=dashed, nogo=failed)
|
|
1414
|
+
xml_parts.append(arrow_xml)
|
|
1415
|
+
step_meta["arrow_id"] = arrow_id
|
|
1416
|
+
|
|
1417
|
+
# Above-arrow text (p.y = baseline of first line)
|
|
1418
|
+
# Position so visual bottom sits LAYOUT_BELOW_GAP above arrow
|
|
1419
|
+
# (same gap as below-arrow content for symmetry)
|
|
1420
|
+
if above_text:
|
|
1421
|
+
n_abt = len(above_text)
|
|
1422
|
+
text_below_baseline = max(0, n_abt - 1) * _LINE_ADVANCE + _DESCENT
|
|
1423
|
+
text_y = branch_y - LAYOUT_BELOW_GAP - text_below_baseline
|
|
1424
|
+
txt_xml, txt_id = _build_text_element(
|
|
1425
|
+
above_text, arrow_mid_x, text_y, ids,
|
|
1426
|
+
use_formatting=False,
|
|
1427
|
+
)
|
|
1428
|
+
xml_parts.append(txt_xml)
|
|
1429
|
+
step_meta["above_ids"].append(txt_id)
|
|
1430
|
+
|
|
1431
|
+
# Below-arrow text (p.y = baseline; visual top = p.y - _CAP_HEIGHT)
|
|
1432
|
+
if below_text:
|
|
1433
|
+
text_y = branch_y + LAYOUT_BELOW_GAP + _CAP_HEIGHT
|
|
1434
|
+
txt_xml, txt_id = _build_text_element(below_text, arrow_mid_x, text_y, ids)
|
|
1435
|
+
xml_parts.append(txt_xml)
|
|
1436
|
+
step_meta["below_ids"].append(txt_id)
|
|
1437
|
+
n_blt = len(below_text)
|
|
1438
|
+
lowest_y = max(
|
|
1439
|
+
lowest_y,
|
|
1440
|
+
text_y + max(0, n_blt - 1) * _LINE_ADVANCE + _DESCENT,
|
|
1441
|
+
)
|
|
1442
|
+
|
|
1443
|
+
# Products
|
|
1444
|
+
cursor_x = arrow_head_x + frag_gap * 1.0
|
|
1445
|
+
for prod_sid in step.products:
|
|
1446
|
+
prod_ref = _get_ref(prod_sid)
|
|
1447
|
+
prod_frag = _resolve_structure(prod_ref, source_data=source_data,
|
|
1448
|
+
reference_mol=reference_mol)
|
|
1449
|
+
pw = _bbox_width(prod_frag.bbox)
|
|
1450
|
+
pcx = cursor_x + pw / 2.0
|
|
1451
|
+
ddx = pcx - prod_frag.cx
|
|
1452
|
+
ddy = branch_y - prod_frag.cy
|
|
1453
|
+
_shift_atoms(prod_frag.atoms, ddx, ddy)
|
|
1454
|
+
prod_frag.bbox = _fragment_bbox(prod_frag.atoms)
|
|
1455
|
+
prod_frag.cx, prod_frag.cy = _bbox_center(prod_frag.bbox)
|
|
1456
|
+
|
|
1457
|
+
pfrag_xml, _, pfrag_id = _build_fragment(
|
|
1458
|
+
prod_frag.atoms, prod_frag.bonds, ids)
|
|
1459
|
+
xml_parts.append(pfrag_xml)
|
|
1460
|
+
step_meta["product_ids"].append(pfrag_id)
|
|
1461
|
+
all_frag_ids.append(pfrag_id)
|
|
1462
|
+
|
|
1463
|
+
if prod_frag.ref.label:
|
|
1464
|
+
lbl_xml, _ = _build_label_element(
|
|
1465
|
+
prod_frag.ref.label, prod_frag.cx,
|
|
1466
|
+
prod_frag.bbox[3] + 14.0, ids)
|
|
1467
|
+
xml_parts.append(lbl_xml)
|
|
1468
|
+
|
|
1469
|
+
cursor_x = prod_frag.bbox[2] + inter_gap
|
|
1470
|
+
|
|
1471
|
+
# Track lowest for below-text of this horizontal step
|
|
1472
|
+
lowest_y = max(lowest_y, branch_y + 60.0)
|
|
1473
|
+
|
|
1474
|
+
else:
|
|
1475
|
+
# Vertical branch downward from SM
|
|
1476
|
+
# Vertical arrow starts below the previous branch's content
|
|
1477
|
+
vert_top_y = lowest_y + 8.0
|
|
1478
|
+
|
|
1479
|
+
# Condition text
|
|
1480
|
+
cond_lines: List[str] = []
|
|
1481
|
+
if step.above_arrow and step.above_arrow.text:
|
|
1482
|
+
cond_lines.extend(step.above_arrow.text)
|
|
1483
|
+
if step.below_arrow and step.below_arrow.text:
|
|
1484
|
+
cond_lines.extend(step.below_arrow.text)
|
|
1485
|
+
if step.yield_:
|
|
1486
|
+
cond_lines.append(step.yield_)
|
|
1487
|
+
|
|
1488
|
+
# Compute vertical arrow length: enough for condition text + product
|
|
1489
|
+
prod_ref = _get_ref(step.products[0])
|
|
1490
|
+
prod_frag = _resolve_structure(prod_ref, source_data=source_data,
|
|
1491
|
+
reference_mol=reference_mol)
|
|
1492
|
+
prod_h = _bbox_height(prod_frag.bbox)
|
|
1493
|
+
|
|
1494
|
+
cond_text_h = _estimate_text_height(cond_lines) if cond_lines else 0
|
|
1495
|
+
vert_len = max(cond_text_h + 20.0, prod_h + 30.0, 3.0 * bond_len)
|
|
1496
|
+
vert_bottom_y = vert_top_y + vert_len
|
|
1497
|
+
|
|
1498
|
+
dashed = (step.arrow_style == "dashed")
|
|
1499
|
+
failed = (step.arrow_style == "failed")
|
|
1500
|
+
|
|
1501
|
+
vert_xml, vert_aid = _build_vertical_arrow(
|
|
1502
|
+
sm_frag.cx, vert_top_y, vert_bottom_y, ids,
|
|
1503
|
+
condition_lines=cond_lines if cond_lines else None,
|
|
1504
|
+
condition_side="right",
|
|
1505
|
+
dashed=dashed,
|
|
1506
|
+
nogo=failed,
|
|
1507
|
+
)
|
|
1508
|
+
xml_parts.append(vert_xml)
|
|
1509
|
+
step_meta["arrow_id"] = vert_aid
|
|
1510
|
+
|
|
1511
|
+
# Product below the vertical arrow, centered on SM's x.
|
|
1512
|
+
prod_w = _bbox_width(prod_frag.bbox)
|
|
1513
|
+
prod_y = vert_bottom_y + frag_gap * 1.0 + prod_h / 2.0
|
|
1514
|
+
prod_target_cx = sm_frag.cx
|
|
1515
|
+
ddx = prod_target_cx - prod_frag.cx
|
|
1516
|
+
ddy = prod_y - prod_frag.cy
|
|
1517
|
+
_shift_atoms(prod_frag.atoms, ddx, ddy)
|
|
1518
|
+
prod_frag.bbox = _fragment_bbox(prod_frag.atoms)
|
|
1519
|
+
prod_frag.cx, prod_frag.cy = _bbox_center(prod_frag.bbox)
|
|
1520
|
+
|
|
1521
|
+
pfrag_xml, _, pfrag_id = _build_fragment(
|
|
1522
|
+
prod_frag.atoms, prod_frag.bonds, ids)
|
|
1523
|
+
xml_parts.append(pfrag_xml)
|
|
1524
|
+
step_meta["product_ids"].append(pfrag_id)
|
|
1525
|
+
all_frag_ids.append(pfrag_id)
|
|
1526
|
+
|
|
1527
|
+
if prod_frag.ref.label:
|
|
1528
|
+
lbl_xml, _ = _build_label_element(
|
|
1529
|
+
prod_frag.ref.label, prod_frag.cx,
|
|
1530
|
+
prod_frag.bbox[3] + 14.0, ids)
|
|
1531
|
+
xml_parts.append(lbl_xml)
|
|
1532
|
+
|
|
1533
|
+
lowest_y = prod_frag.bbox[3] + 20.0
|
|
1534
|
+
|
|
1535
|
+
step_metadata.append(step_meta)
|
|
1536
|
+
|
|
1537
|
+
# --- Build <scheme> ---
|
|
1538
|
+
scheme_id = ids.next()
|
|
1539
|
+
scheme_parts = [f'<scheme id="{scheme_id}">']
|
|
1540
|
+
for meta in step_metadata:
|
|
1541
|
+
step_id = ids.next()
|
|
1542
|
+
attrs = [f'id="{step_id}"']
|
|
1543
|
+
if meta["reactant_ids"]:
|
|
1544
|
+
attrs.append(f'ReactionStepReactants="{" ".join(str(x) for x in meta["reactant_ids"])}"')
|
|
1545
|
+
if meta["product_ids"]:
|
|
1546
|
+
attrs.append(f'ReactionStepProducts="{" ".join(str(x) for x in meta["product_ids"])}"')
|
|
1547
|
+
attrs.append(f'ReactionStepArrows="{meta["arrow_id"]}"')
|
|
1548
|
+
if meta["above_ids"]:
|
|
1549
|
+
attrs.append(f'ReactionStepObjectsAboveArrow="{" ".join(str(x) for x in meta["above_ids"])}"')
|
|
1550
|
+
if meta["below_ids"]:
|
|
1551
|
+
attrs.append(f'ReactionStepObjectsBelowArrow="{" ".join(str(x) for x in meta["below_ids"])}"')
|
|
1552
|
+
scheme_parts.append(f'<step {" ".join(attrs)}/>')
|
|
1553
|
+
scheme_parts.append('</scheme>')
|
|
1554
|
+
xml_parts.append("\n".join(scheme_parts))
|
|
1555
|
+
|
|
1556
|
+
return "\n".join(xml_parts), lowest_y
|
|
1557
|
+
|
|
1558
|
+
|
|
1559
|
+
def _layout_divergent_stacked(
|
|
1560
|
+
scheme: SchemeDescriptor,
|
|
1561
|
+
ids: _IDGen,
|
|
1562
|
+
source_data: Optional[Dict[str, Dict]] = None,
|
|
1563
|
+
reference_mol: Optional[Any] = None,
|
|
1564
|
+
) -> Tuple[str, float]:
|
|
1565
|
+
"""Fallback: stack divergent steps vertically when no shared substrate."""
|
|
1566
|
+
xml_parts: List[str] = []
|
|
1567
|
+
arrow_y = 300.0
|
|
1568
|
+
lowest_y = arrow_y
|
|
1569
|
+
|
|
1570
|
+
for step in scheme.steps:
|
|
1571
|
+
sub_scheme = SchemeDescriptor(
|
|
1572
|
+
structures=scheme.structures,
|
|
1573
|
+
steps=[step],
|
|
1574
|
+
layout="linear",
|
|
1575
|
+
)
|
|
1576
|
+
row_xml, row_lowest = _layout_linear(sub_scheme, ids, source_data=source_data,
|
|
1577
|
+
reference_mol=reference_mol)
|
|
1578
|
+
xml_parts.append(row_xml)
|
|
1579
|
+
lowest_y = row_lowest
|
|
1580
|
+
arrow_y = row_lowest + _DIVERGENT_BRANCH_GAP
|
|
1581
|
+
|
|
1582
|
+
return "\n".join(xml_parts), lowest_y
|
|
1583
|
+
|
|
1584
|
+
|
|
1585
|
+
# ---------------------------------------------------------------------------
|
|
1586
|
+
# Layout engine: stacked-rows (multiple independent sub-schemes)
|
|
1587
|
+
# ---------------------------------------------------------------------------
|
|
1588
|
+
|
|
1589
|
+
SECTION_GAP = 5.5 * ACS_BOND_LENGTH # ~79.2 pt between sections (more clearance)
|
|
1590
|
+
SECTION_LABEL_X = 40.0 # left margin for section labels
|
|
1591
|
+
|
|
1592
|
+
|
|
1593
|
+
def _layout_stacked_rows(
|
|
1594
|
+
scheme: SchemeDescriptor,
|
|
1595
|
+
ids: _IDGen,
|
|
1596
|
+
source_data: Optional[Dict[str, Dict]] = None,
|
|
1597
|
+
reference_mol: Optional[Any] = None,
|
|
1598
|
+
) -> Tuple[str, float]:
|
|
1599
|
+
"""
|
|
1600
|
+
Layout multiple independent sub-schemes stacked vertically.
|
|
1601
|
+
|
|
1602
|
+
Each section is rendered as its own row (linear or sequential).
|
|
1603
|
+
Section labels like "(i)", "(ii)" are placed at the left margin.
|
|
1604
|
+
|
|
1605
|
+
Returns (inner_xml, lowest_y).
|
|
1606
|
+
"""
|
|
1607
|
+
sections = scheme.sections
|
|
1608
|
+
if not sections:
|
|
1609
|
+
# Fall back to linear if no sections defined
|
|
1610
|
+
return _layout_linear(scheme, ids, source_data=source_data,
|
|
1611
|
+
reference_mol=reference_mol)
|
|
1612
|
+
|
|
1613
|
+
# --- Partition run_arrows by section ---
|
|
1614
|
+
# Global step numbers are 1-indexed across all sections. Build a map
|
|
1615
|
+
# from global step number → (section_index, local_step_number).
|
|
1616
|
+
global_step = 1
|
|
1617
|
+
sec_run_arrows: Dict[int, List[StepRunArrows]] = {}
|
|
1618
|
+
for sec_idx, sec in enumerate(sections):
|
|
1619
|
+
for local_step in range(1, len(sec.steps) + 1):
|
|
1620
|
+
# Map this global step to this section
|
|
1621
|
+
for sra in scheme.run_arrows:
|
|
1622
|
+
if sra.step == global_step:
|
|
1623
|
+
sec_run_arrows.setdefault(sec_idx, []).append(
|
|
1624
|
+
StepRunArrows(step=local_step, runs=sra.runs))
|
|
1625
|
+
global_step += 1
|
|
1626
|
+
|
|
1627
|
+
xml_parts: List[str] = []
|
|
1628
|
+
arrow_y = 300.0
|
|
1629
|
+
lowest_y = arrow_y
|
|
1630
|
+
# Content starts to the right of section labels
|
|
1631
|
+
content_start_x = 100.0
|
|
1632
|
+
|
|
1633
|
+
for sec_idx, sec in enumerate(sections):
|
|
1634
|
+
# --- Section label ---
|
|
1635
|
+
if sec.label:
|
|
1636
|
+
# Place label at the left margin, vertically centered on arrow_y
|
|
1637
|
+
# Baseline is below center, so use arrow_y + small offset
|
|
1638
|
+
label_xml, _ = _build_text_element(
|
|
1639
|
+
[sec.label], SECTION_LABEL_X, arrow_y + 4.0, ids,
|
|
1640
|
+
justification="Left", use_formatting=False,
|
|
1641
|
+
)
|
|
1642
|
+
xml_parts.append(label_xml)
|
|
1643
|
+
|
|
1644
|
+
# --- Per-section reference product for alignment ---
|
|
1645
|
+
# Each section is an independent reaction, so align structures
|
|
1646
|
+
# within each section to that section's own product.
|
|
1647
|
+
sec_ref_mol = _product_mol_for_steps(
|
|
1648
|
+
sec.steps, scheme.structures, source_data,
|
|
1649
|
+
)
|
|
1650
|
+
if sec_ref_mol is None:
|
|
1651
|
+
sec_ref_mol = reference_mol # fallback to global
|
|
1652
|
+
|
|
1653
|
+
# --- Run arrows for this section ---
|
|
1654
|
+
section_ra = sec_run_arrows.get(sec_idx, [])
|
|
1655
|
+
|
|
1656
|
+
# --- Render this section's steps ---
|
|
1657
|
+
# Create a temporary sub-scheme sharing the structure definitions
|
|
1658
|
+
sub_scheme = SchemeDescriptor(
|
|
1659
|
+
structures=scheme.structures,
|
|
1660
|
+
steps=sec.steps,
|
|
1661
|
+
layout=sec.layout or "linear",
|
|
1662
|
+
)
|
|
1663
|
+
|
|
1664
|
+
row_xml, row_lowest, _, _ = _layout_steps_row(
|
|
1665
|
+
sub_scheme, sec.steps, ids,
|
|
1666
|
+
start_x=content_start_x, arrow_y=arrow_y,
|
|
1667
|
+
source_data=source_data,
|
|
1668
|
+
run_arrows=section_ra,
|
|
1669
|
+
reference_mol=sec_ref_mol,
|
|
1670
|
+
)
|
|
1671
|
+
|
|
1672
|
+
xml_parts.append(row_xml)
|
|
1673
|
+
lowest_y = row_lowest
|
|
1674
|
+
|
|
1675
|
+
# Move to next section
|
|
1676
|
+
arrow_y = lowest_y + SECTION_GAP
|
|
1677
|
+
|
|
1678
|
+
return "\n".join(xml_parts), lowest_y
|
|
1679
|
+
|
|
1680
|
+
|
|
1681
|
+
# ---------------------------------------------------------------------------
|
|
1682
|
+
# Core row layout: positions steps in a single row (LTR or RTL)
|
|
1683
|
+
# ---------------------------------------------------------------------------
|
|
1684
|
+
|
|
1685
|
+
def _layout_steps_row(
|
|
1686
|
+
scheme: SchemeDescriptor,
|
|
1687
|
+
steps: List[StepDescriptor],
|
|
1688
|
+
ids: _IDGen,
|
|
1689
|
+
start_x: float = 100.0,
|
|
1690
|
+
arrow_y: float = 300.0,
|
|
1691
|
+
source_data: Optional[Dict[str, Dict]] = None,
|
|
1692
|
+
run_arrows: Optional[List[StepRunArrows]] = None,
|
|
1693
|
+
direction: str = "ltr",
|
|
1694
|
+
skip_first_substrate_id: Optional[int] = None,
|
|
1695
|
+
skip_first_substrate_cursor_x: Optional[float] = None,
|
|
1696
|
+
reference_mol: Optional[Any] = None,
|
|
1697
|
+
) -> Tuple[str, float, float, Dict]:
|
|
1698
|
+
"""
|
|
1699
|
+
Layout multiple steps in a single row.
|
|
1700
|
+
|
|
1701
|
+
Parameters
|
|
1702
|
+
----------
|
|
1703
|
+
run_arrows : optional
|
|
1704
|
+
Run arrows for this row (step numbers are 1-indexed within this row).
|
|
1705
|
+
If None, uses scheme.run_arrows with original step numbers.
|
|
1706
|
+
direction : str
|
|
1707
|
+
"ltr" for left-to-right (default), "rtl" for right-to-left.
|
|
1708
|
+
For RTL, start_x is the RIGHT edge; cursor moves leftward.
|
|
1709
|
+
Arrows point left, substrates on right, products on left.
|
|
1710
|
+
skip_first_substrate_id : optional int
|
|
1711
|
+
If set, the first step's substrate is already placed (e.g. by a
|
|
1712
|
+
vertical arrow in serpentine mode). This frag_id is used in the
|
|
1713
|
+
scheme metadata but the structure is not drawn or positioned.
|
|
1714
|
+
skip_first_substrate_cursor_x : optional float
|
|
1715
|
+
When skip_first_substrate_id is set, the cursor position after the
|
|
1716
|
+
pre-placed substrate. For LTR this is the substrate's right_x + gap;
|
|
1717
|
+
for RTL this is the substrate's left_x - gap.
|
|
1718
|
+
|
|
1719
|
+
Returns (xml_string, lowest_y, row_edge_x, row_info) where:
|
|
1720
|
+
lowest_y = bottom extent of this row including run arrows
|
|
1721
|
+
row_edge_x = rightmost x for LTR, leftmost x for RTL
|
|
1722
|
+
row_info = dict with extra metadata:
|
|
1723
|
+
"last_product_cx" : float — center x of the last product placed
|
|
1724
|
+
"last_product_bottom" : float — bottom y extent of last product + label
|
|
1725
|
+
"first_substrate_cx" : float — center x of the first substrate
|
|
1726
|
+
"""
|
|
1727
|
+
is_rtl = (direction == "rtl")
|
|
1728
|
+
bond_len = ACS_BOND_LENGTH
|
|
1729
|
+
frag_gap = bond_len * LAYOUT_FRAG_GAP_BONDS # gap between fragment and arrow
|
|
1730
|
+
inter_gap = bond_len * LAYOUT_INTER_GAP_BONDS # gap between adjacent fragments
|
|
1731
|
+
|
|
1732
|
+
# --- Phase 1: Resolve all structures ---
|
|
1733
|
+
def _get_ref(sid: str) -> StructureRef:
|
|
1734
|
+
"""Get StructureRef from declared structures or create a bare one for source lookup."""
|
|
1735
|
+
if sid in scheme.structures:
|
|
1736
|
+
return scheme.structures[sid]
|
|
1737
|
+
# Not declared — create a bare ref (will be resolved via source_data)
|
|
1738
|
+
return StructureRef(id=sid)
|
|
1739
|
+
|
|
1740
|
+
resolved_steps: List[ResolvedStep] = []
|
|
1741
|
+
for step in steps:
|
|
1742
|
+
subs = [_resolve_structure(_get_ref(sid), source_data=source_data,
|
|
1743
|
+
reference_mol=reference_mol)
|
|
1744
|
+
for sid in step.substrates]
|
|
1745
|
+
prods = [_resolve_structure(_get_ref(pid), source_data=source_data,
|
|
1746
|
+
reference_mol=reference_mol)
|
|
1747
|
+
for pid in step.products]
|
|
1748
|
+
|
|
1749
|
+
above_structs = []
|
|
1750
|
+
above_text = []
|
|
1751
|
+
below_text = []
|
|
1752
|
+
below_structs = []
|
|
1753
|
+
|
|
1754
|
+
if step.above_arrow:
|
|
1755
|
+
for sid in step.above_arrow.structures:
|
|
1756
|
+
above_structs.append(
|
|
1757
|
+
_resolve_structure(_get_ref(sid), source_data=source_data,
|
|
1758
|
+
reference_mol=reference_mol))
|
|
1759
|
+
above_text = step.above_arrow.text[:]
|
|
1760
|
+
if step.below_arrow:
|
|
1761
|
+
for sid in step.below_arrow.structures:
|
|
1762
|
+
below_structs.append(
|
|
1763
|
+
_resolve_structure(_get_ref(sid), source_data=source_data,
|
|
1764
|
+
reference_mol=reference_mol))
|
|
1765
|
+
below_text = step.below_arrow.text[:]
|
|
1766
|
+
|
|
1767
|
+
# Add yield to below-arrow text if present
|
|
1768
|
+
if step.yield_:
|
|
1769
|
+
below_text.append(step.yield_)
|
|
1770
|
+
|
|
1771
|
+
resolved_steps.append(ResolvedStep(
|
|
1772
|
+
descriptor=step,
|
|
1773
|
+
substrates=subs,
|
|
1774
|
+
products=prods,
|
|
1775
|
+
above_structures=above_structs,
|
|
1776
|
+
above_text=above_text,
|
|
1777
|
+
below_text=below_text,
|
|
1778
|
+
below_structures=below_structs,
|
|
1779
|
+
))
|
|
1780
|
+
|
|
1781
|
+
# --- Phase 2: Compute arrow lengths from content ---
|
|
1782
|
+
min_arrow_len = 5.0 * bond_len
|
|
1783
|
+
use_letter_conditions = bool(scheme.condition_key)
|
|
1784
|
+
|
|
1785
|
+
for rs in resolved_steps:
|
|
1786
|
+
# Check if this step uses letter conditions
|
|
1787
|
+
is_letter = (use_letter_conditions
|
|
1788
|
+
and _is_letter_condition(rs.below_text))
|
|
1789
|
+
rs._is_letter_cond = is_letter
|
|
1790
|
+
|
|
1791
|
+
# Width of above-arrow content
|
|
1792
|
+
above_width = 0.0
|
|
1793
|
+
for af in rs.above_structures:
|
|
1794
|
+
above_width += _bbox_width(af.bbox) + inter_gap
|
|
1795
|
+
above_text_w = _estimate_text_width(rs.above_text)
|
|
1796
|
+
above_width = max(above_width, above_text_w)
|
|
1797
|
+
|
|
1798
|
+
# Width of below-arrow content (skip if using letter label)
|
|
1799
|
+
below_width = 0.0
|
|
1800
|
+
if not is_letter:
|
|
1801
|
+
for bf in rs.below_structures:
|
|
1802
|
+
below_width += _bbox_width(bf.bbox) + inter_gap
|
|
1803
|
+
below_text_w = _estimate_text_width(rs.below_text)
|
|
1804
|
+
below_width = max(below_width, below_text_w)
|
|
1805
|
+
|
|
1806
|
+
content_width = max(above_width, below_width)
|
|
1807
|
+
rs._arrow_len = max(content_width + 10.0, min_arrow_len)
|
|
1808
|
+
|
|
1809
|
+
# --- Phase 3: Position everything ---
|
|
1810
|
+
# For LTR: cursor_x = left edge of next element, moves right.
|
|
1811
|
+
# For RTL: cursor_x = right edge of next element, moves left.
|
|
1812
|
+
cursor_x = start_x
|
|
1813
|
+
|
|
1814
|
+
xml_parts: List[str] = []
|
|
1815
|
+
all_frag_ids: List[int] = []
|
|
1816
|
+
step_metadata: List[Dict] = [] # for <scheme><step> elements
|
|
1817
|
+
|
|
1818
|
+
# Track extra info for serpentine/multi-row callers
|
|
1819
|
+
_last_product_cx: float = start_x
|
|
1820
|
+
_last_product_bottom: float = arrow_y
|
|
1821
|
+
_first_substrate_cx: float = start_x
|
|
1822
|
+
|
|
1823
|
+
for step_idx, rs in enumerate(resolved_steps):
|
|
1824
|
+
step_meta: Dict[str, Any] = {
|
|
1825
|
+
"reactant_ids": [],
|
|
1826
|
+
"product_ids": [],
|
|
1827
|
+
"arrow_id": 0,
|
|
1828
|
+
"above_ids": [],
|
|
1829
|
+
"below_ids": [],
|
|
1830
|
+
}
|
|
1831
|
+
|
|
1832
|
+
# -- Substrates --
|
|
1833
|
+
# Skip if: (a) pre-placed by vertical arrow (serpentine), or
|
|
1834
|
+
# (b) shared intermediate from the previous step's product.
|
|
1835
|
+
skip_substrate = False
|
|
1836
|
+
if step_idx == 0 and skip_first_substrate_id is not None:
|
|
1837
|
+
skip_substrate = True
|
|
1838
|
+
step_meta["reactant_ids"].append(skip_first_substrate_id)
|
|
1839
|
+
if skip_first_substrate_cursor_x is not None:
|
|
1840
|
+
cursor_x = skip_first_substrate_cursor_x
|
|
1841
|
+
elif step_idx > 0:
|
|
1842
|
+
prev_products = resolved_steps[step_idx - 1].descriptor.products
|
|
1843
|
+
curr_substrates = rs.descriptor.substrates
|
|
1844
|
+
if (len(curr_substrates) == 1 and len(prev_products) >= 1
|
|
1845
|
+
and curr_substrates[0] == prev_products[-1]):
|
|
1846
|
+
skip_substrate = True
|
|
1847
|
+
# Use the previous step's product fragment ID
|
|
1848
|
+
prev_meta = step_metadata[step_idx - 1]
|
|
1849
|
+
if prev_meta["product_ids"]:
|
|
1850
|
+
step_meta["reactant_ids"].append(prev_meta["product_ids"][-1])
|
|
1851
|
+
|
|
1852
|
+
if not skip_substrate:
|
|
1853
|
+
for i, sub in enumerate(rs.substrates):
|
|
1854
|
+
bbox = sub.bbox
|
|
1855
|
+
w = _bbox_width(bbox)
|
|
1856
|
+
# Shift to position
|
|
1857
|
+
if is_rtl:
|
|
1858
|
+
cx_target = cursor_x - w / 2.0
|
|
1859
|
+
else:
|
|
1860
|
+
cx_target = cursor_x + w / 2.0
|
|
1861
|
+
cy_target = arrow_y
|
|
1862
|
+
dx = cx_target - sub.cx
|
|
1863
|
+
dy = cy_target - sub.cy
|
|
1864
|
+
_shift_atoms(sub.atoms, dx, dy)
|
|
1865
|
+
sub.bbox = _fragment_bbox(sub.atoms)
|
|
1866
|
+
sub.cx, sub.cy = _bbox_center(sub.bbox)
|
|
1867
|
+
|
|
1868
|
+
frag_xml, _, frag_id = _build_fragment(sub.atoms, sub.bonds, ids)
|
|
1869
|
+
sub.xml = frag_xml
|
|
1870
|
+
sub.frag_id = frag_id
|
|
1871
|
+
xml_parts.append(frag_xml)
|
|
1872
|
+
step_meta["reactant_ids"].append(frag_id)
|
|
1873
|
+
all_frag_ids.append(frag_id)
|
|
1874
|
+
|
|
1875
|
+
# Track first substrate center
|
|
1876
|
+
if step_idx == 0 and i == 0:
|
|
1877
|
+
_first_substrate_cx = sub.cx
|
|
1878
|
+
|
|
1879
|
+
# Compound label below structure
|
|
1880
|
+
if sub.ref.label:
|
|
1881
|
+
label_x = sub.cx
|
|
1882
|
+
label_y = sub.bbox[3] + 14.0 # below structure
|
|
1883
|
+
lbl_xml, lbl_id = _build_label_element(
|
|
1884
|
+
sub.ref.label, label_x, label_y, ids,
|
|
1885
|
+
)
|
|
1886
|
+
xml_parts.append(lbl_xml)
|
|
1887
|
+
|
|
1888
|
+
if is_rtl:
|
|
1889
|
+
cursor_x = sub.bbox[0] - inter_gap
|
|
1890
|
+
else:
|
|
1891
|
+
cursor_x = sub.bbox[2] + inter_gap
|
|
1892
|
+
|
|
1893
|
+
# -- Arrow --
|
|
1894
|
+
if is_rtl:
|
|
1895
|
+
# RTL: tail on right (near substrate), head on left (near product)
|
|
1896
|
+
arrow_tail_x = cursor_x - frag_gap * 0.3
|
|
1897
|
+
arrow_head_x = arrow_tail_x - rs._arrow_len
|
|
1898
|
+
else:
|
|
1899
|
+
arrow_tail_x = cursor_x + frag_gap * 0.3
|
|
1900
|
+
arrow_head_x = arrow_tail_x + rs._arrow_len
|
|
1901
|
+
arrow_mid_x = (arrow_tail_x + arrow_head_x) / 2.0
|
|
1902
|
+
|
|
1903
|
+
dashed = (rs.descriptor.arrow_style == "dashed")
|
|
1904
|
+
failed = (rs.descriptor.arrow_style == "failed")
|
|
1905
|
+
arrow_xml, arrow_id = _build_arrow(
|
|
1906
|
+
arrow_tail_x, arrow_y, arrow_head_x, arrow_y, ids,
|
|
1907
|
+
dashed=dashed, nogo=failed,
|
|
1908
|
+
)
|
|
1909
|
+
xml_parts.append(arrow_xml)
|
|
1910
|
+
step_meta["arrow_id"] = arrow_id
|
|
1911
|
+
|
|
1912
|
+
rs.arrow_tail_x = arrow_tail_x
|
|
1913
|
+
rs.arrow_tail_y = arrow_y
|
|
1914
|
+
rs.arrow_head_x = arrow_head_x
|
|
1915
|
+
rs.arrow_head_y = arrow_y
|
|
1916
|
+
|
|
1917
|
+
# -- Above-arrow content --
|
|
1918
|
+
# Structures above arrow
|
|
1919
|
+
above_cursor_x = arrow_mid_x
|
|
1920
|
+
if rs.above_structures:
|
|
1921
|
+
total_above_w = sum(_bbox_width(af.bbox) for af in rs.above_structures)
|
|
1922
|
+
total_above_w += inter_gap * max(0, len(rs.above_structures) - 1)
|
|
1923
|
+
above_cursor_x = arrow_mid_x - total_above_w / 2.0
|
|
1924
|
+
|
|
1925
|
+
# If there's text below above-arrow structures (e.g. "(1.2 eq)"),
|
|
1926
|
+
# push structures higher to make room for the text between
|
|
1927
|
+
# structure bottom and arrow.
|
|
1928
|
+
# Gap layout: struct -(4pt)- text -(2pt)- arrow
|
|
1929
|
+
above_text_height = 0.0
|
|
1930
|
+
if rs.above_text:
|
|
1931
|
+
n_abt = len(rs.above_text)
|
|
1932
|
+
# Visual text height + struct-to-text gap (4pt)
|
|
1933
|
+
above_text_height = (
|
|
1934
|
+
_CAP_HEIGHT + max(0, n_abt - 1) * _LINE_ADVANCE + _DESCENT + 4.0
|
|
1935
|
+
)
|
|
1936
|
+
|
|
1937
|
+
for af in rs.above_structures:
|
|
1938
|
+
af_w = _bbox_width(af.bbox)
|
|
1939
|
+
af_h = _bbox_height(af.bbox)
|
|
1940
|
+
# Position above the arrow, with extra room for text below structure
|
|
1941
|
+
target_cx = above_cursor_x + af_w / 2.0
|
|
1942
|
+
if rs.above_text:
|
|
1943
|
+
# text sits LAYOUT_BELOW_GAP above arrow; struct sits above text
|
|
1944
|
+
target_cy = arrow_y - LAYOUT_BELOW_GAP - above_text_height - af_h / 2.0
|
|
1945
|
+
else:
|
|
1946
|
+
# no text: struct sits LAYOUT_ABOVE_GAP above arrow
|
|
1947
|
+
target_cy = arrow_y - LAYOUT_ABOVE_GAP - af_h / 2.0
|
|
1948
|
+
dx = target_cx - af.cx
|
|
1949
|
+
dy = target_cy - af.cy
|
|
1950
|
+
_shift_atoms(af.atoms, dx, dy)
|
|
1951
|
+
af.bbox = _fragment_bbox(af.atoms)
|
|
1952
|
+
af.cx, af.cy = _bbox_center(af.bbox)
|
|
1953
|
+
|
|
1954
|
+
frag_xml, _, frag_id = _build_fragment(af.atoms, af.bonds, ids)
|
|
1955
|
+
af.xml = frag_xml
|
|
1956
|
+
af.frag_id = frag_id
|
|
1957
|
+
xml_parts.append(frag_xml)
|
|
1958
|
+
step_meta["above_ids"].append(frag_id)
|
|
1959
|
+
all_frag_ids.append(frag_id)
|
|
1960
|
+
|
|
1961
|
+
# Label for above-arrow structure
|
|
1962
|
+
if af.ref.label:
|
|
1963
|
+
lbl_xml, _ = _build_label_element(
|
|
1964
|
+
af.ref.label, af.cx, af.bbox[3] + 14.0, ids,
|
|
1965
|
+
)
|
|
1966
|
+
xml_parts.append(lbl_xml)
|
|
1967
|
+
|
|
1968
|
+
above_cursor_x += af_w + inter_gap
|
|
1969
|
+
|
|
1970
|
+
# Above-arrow text (equiv text for above structures, or condition text)
|
|
1971
|
+
# p.y is the BASELINE of the first line.
|
|
1972
|
+
# Position so that the visual bottom (last baseline + descent) sits
|
|
1973
|
+
# LAYOUT_BELOW_GAP above the arrow (same gap as below-arrow content).
|
|
1974
|
+
if rs.above_text:
|
|
1975
|
+
n_abt = len(rs.above_text)
|
|
1976
|
+
# Distance from first baseline to visual bottom
|
|
1977
|
+
text_below_baseline = max(0, n_abt - 1) * _LINE_ADVANCE + _DESCENT
|
|
1978
|
+
text_y = arrow_y - LAYOUT_BELOW_GAP - text_below_baseline
|
|
1979
|
+
if rs.above_structures:
|
|
1980
|
+
# Text sits between above-arrow structures and the arrow.
|
|
1981
|
+
# Structures were already pushed higher to make room.
|
|
1982
|
+
pass # text_y is already correct
|
|
1983
|
+
txt_xml, txt_id = _build_text_element(
|
|
1984
|
+
rs.above_text, arrow_mid_x, text_y, ids,
|
|
1985
|
+
use_formatting=False,
|
|
1986
|
+
)
|
|
1987
|
+
xml_parts.append(txt_xml)
|
|
1988
|
+
step_meta["above_ids"].append(txt_id)
|
|
1989
|
+
|
|
1990
|
+
# -- Below-arrow content --
|
|
1991
|
+
# p.y is the BASELINE of the first line.
|
|
1992
|
+
# Visual top = p.y - _CAP_HEIGHT, so set p.y so that
|
|
1993
|
+
# (p.y - _CAP_HEIGHT) = arrow_y + LAYOUT_BELOW_GAP.
|
|
1994
|
+
below_y = arrow_y + LAYOUT_BELOW_GAP + _CAP_HEIGHT
|
|
1995
|
+
|
|
1996
|
+
# Below-arrow structures first
|
|
1997
|
+
if rs.below_structures:
|
|
1998
|
+
below_struct_y = arrow_y + LAYOUT_BELOW_GAP
|
|
1999
|
+
total_below_w = sum(_bbox_width(bf.bbox) for bf in rs.below_structures)
|
|
2000
|
+
total_below_w += inter_gap * max(0, len(rs.below_structures) - 1)
|
|
2001
|
+
below_cursor_x = arrow_mid_x - total_below_w / 2.0
|
|
2002
|
+
|
|
2003
|
+
for bf in rs.below_structures:
|
|
2004
|
+
bf_w = _bbox_width(bf.bbox)
|
|
2005
|
+
bf_h = _bbox_height(bf.bbox)
|
|
2006
|
+
target_cx = below_cursor_x + bf_w / 2.0
|
|
2007
|
+
target_cy = below_struct_y + bf_h / 2.0 + 4.0
|
|
2008
|
+
dx = target_cx - bf.cx
|
|
2009
|
+
dy = target_cy - bf.cy
|
|
2010
|
+
_shift_atoms(bf.atoms, dx, dy)
|
|
2011
|
+
bf.bbox = _fragment_bbox(bf.atoms)
|
|
2012
|
+
bf.cx, bf.cy = _bbox_center(bf.bbox)
|
|
2013
|
+
|
|
2014
|
+
frag_xml, _, frag_id = _build_fragment(bf.atoms, bf.bonds, ids)
|
|
2015
|
+
bf.xml = frag_xml
|
|
2016
|
+
bf.frag_id = frag_id
|
|
2017
|
+
xml_parts.append(frag_xml)
|
|
2018
|
+
step_meta["below_ids"].append(frag_id)
|
|
2019
|
+
all_frag_ids.append(frag_id)
|
|
2020
|
+
|
|
2021
|
+
if bf.ref.label:
|
|
2022
|
+
lbl_xml, _ = _build_label_element(
|
|
2023
|
+
bf.ref.label, bf.cx, bf.bbox[3] + 14.0, ids,
|
|
2024
|
+
)
|
|
2025
|
+
xml_parts.append(lbl_xml)
|
|
2026
|
+
|
|
2027
|
+
below_cursor_x += bf_w + inter_gap
|
|
2028
|
+
|
|
2029
|
+
# Adjust text Y to be below the structures
|
|
2030
|
+
below_y = max(bf.bbox[3] for bf in rs.below_structures) + 14.0
|
|
2031
|
+
|
|
2032
|
+
# Below-arrow text (or letter label for condition key mode)
|
|
2033
|
+
if rs.below_text:
|
|
2034
|
+
if rs._is_letter_cond:
|
|
2035
|
+
# Letter condition: render small italic label above arrow
|
|
2036
|
+
letter_text = rs.below_text[0].strip()
|
|
2037
|
+
lbl_xml, lbl_id = _build_letter_label(
|
|
2038
|
+
letter_text, arrow_mid_x, arrow_y, ids,
|
|
2039
|
+
)
|
|
2040
|
+
xml_parts.append(lbl_xml)
|
|
2041
|
+
step_meta["above_ids"].append(lbl_id)
|
|
2042
|
+
else:
|
|
2043
|
+
txt_xml, txt_id = _build_text_element(
|
|
2044
|
+
rs.below_text, arrow_mid_x, below_y, ids,
|
|
2045
|
+
)
|
|
2046
|
+
xml_parts.append(txt_xml)
|
|
2047
|
+
step_meta["below_ids"].append(txt_id)
|
|
2048
|
+
|
|
2049
|
+
if is_rtl:
|
|
2050
|
+
cursor_x = arrow_head_x - frag_gap * 1.0
|
|
2051
|
+
else:
|
|
2052
|
+
cursor_x = arrow_head_x + frag_gap * 1.0
|
|
2053
|
+
|
|
2054
|
+
# -- Products --
|
|
2055
|
+
for i, prod in enumerate(rs.products):
|
|
2056
|
+
bbox = prod.bbox
|
|
2057
|
+
w = _bbox_width(bbox)
|
|
2058
|
+
if is_rtl:
|
|
2059
|
+
cx_target = cursor_x - w / 2.0
|
|
2060
|
+
else:
|
|
2061
|
+
cx_target = cursor_x + w / 2.0
|
|
2062
|
+
cy_target = arrow_y
|
|
2063
|
+
dx = cx_target - prod.cx
|
|
2064
|
+
dy = cy_target - prod.cy
|
|
2065
|
+
_shift_atoms(prod.atoms, dx, dy)
|
|
2066
|
+
prod.bbox = _fragment_bbox(prod.atoms)
|
|
2067
|
+
prod.cx, prod.cy = _bbox_center(prod.bbox)
|
|
2068
|
+
|
|
2069
|
+
frag_xml, _, frag_id = _build_fragment(prod.atoms, prod.bonds, ids)
|
|
2070
|
+
prod.xml = frag_xml
|
|
2071
|
+
prod.frag_id = frag_id
|
|
2072
|
+
xml_parts.append(frag_xml)
|
|
2073
|
+
step_meta["product_ids"].append(frag_id)
|
|
2074
|
+
all_frag_ids.append(frag_id)
|
|
2075
|
+
|
|
2076
|
+
prod_bottom = prod.bbox[3]
|
|
2077
|
+
if prod.ref.label:
|
|
2078
|
+
lbl_xml, _ = _build_label_element(
|
|
2079
|
+
prod.ref.label, prod.cx, prod.bbox[3] + 14.0, ids,
|
|
2080
|
+
)
|
|
2081
|
+
xml_parts.append(lbl_xml)
|
|
2082
|
+
prod_bottom = prod.bbox[3] + 14.0 + 6.0 # label baseline + descent
|
|
2083
|
+
|
|
2084
|
+
# Track last product info for serpentine callers
|
|
2085
|
+
_last_product_cx = prod.cx
|
|
2086
|
+
_last_product_bottom = prod_bottom
|
|
2087
|
+
|
|
2088
|
+
if is_rtl:
|
|
2089
|
+
cursor_x = prod.bbox[0] - inter_gap
|
|
2090
|
+
else:
|
|
2091
|
+
cursor_x = prod.bbox[2] + inter_gap
|
|
2092
|
+
|
|
2093
|
+
step_metadata.append(step_meta)
|
|
2094
|
+
|
|
2095
|
+
# --- Phase 4: Build <scheme> with <step> elements ---
|
|
2096
|
+
scheme_id = ids.next()
|
|
2097
|
+
scheme_parts = [f'<scheme id="{scheme_id}">']
|
|
2098
|
+
for meta in step_metadata:
|
|
2099
|
+
step_id = ids.next()
|
|
2100
|
+
attrs = [f'id="{step_id}"']
|
|
2101
|
+
if meta["reactant_ids"]:
|
|
2102
|
+
attrs.append(f'ReactionStepReactants="{" ".join(str(x) for x in meta["reactant_ids"])}"')
|
|
2103
|
+
if meta["product_ids"]:
|
|
2104
|
+
attrs.append(f'ReactionStepProducts="{" ".join(str(x) for x in meta["product_ids"])}"')
|
|
2105
|
+
attrs.append(f'ReactionStepArrows="{meta["arrow_id"]}"')
|
|
2106
|
+
if meta["above_ids"]:
|
|
2107
|
+
attrs.append(f'ReactionStepObjectsAboveArrow="{" ".join(str(x) for x in meta["above_ids"])}"')
|
|
2108
|
+
if meta["below_ids"]:
|
|
2109
|
+
attrs.append(f'ReactionStepObjectsBelowArrow="{" ".join(str(x) for x in meta["below_ids"])}"')
|
|
2110
|
+
scheme_parts.append(f'<step {" ".join(attrs)}/>')
|
|
2111
|
+
scheme_parts.append('</scheme>')
|
|
2112
|
+
xml_parts.append("\n".join(scheme_parts))
|
|
2113
|
+
|
|
2114
|
+
# --- Phase 5: Run arrows ---
|
|
2115
|
+
# Compute lowest_y from row content (always needed for return value)
|
|
2116
|
+
lowest_y = arrow_y + 60.0 # baseline estimate
|
|
2117
|
+
for rs in resolved_steps:
|
|
2118
|
+
if rs.below_text:
|
|
2119
|
+
n_blt = len(rs.below_text)
|
|
2120
|
+
# below_y baseline + distance to visual bottom + margin
|
|
2121
|
+
below_vis_bottom = (
|
|
2122
|
+
arrow_y + LAYOUT_BELOW_GAP + _CAP_HEIGHT # first baseline
|
|
2123
|
+
+ max(0, n_blt - 1) * _LINE_ADVANCE + _DESCENT # to visual bottom
|
|
2124
|
+
+ 4.0 # margin
|
|
2125
|
+
)
|
|
2126
|
+
lowest_y = max(lowest_y, below_vis_bottom)
|
|
2127
|
+
for bf in rs.below_structures:
|
|
2128
|
+
lowest_y = max(lowest_y, bf.bbox[3] + 20.0)
|
|
2129
|
+
|
|
2130
|
+
# Determine which run arrows to render
|
|
2131
|
+
effective_run_arrows = run_arrows if run_arrows is not None else scheme.run_arrows
|
|
2132
|
+
|
|
2133
|
+
if effective_run_arrows:
|
|
2134
|
+
run_y = lowest_y + 6.0 # tighter gap between conditions text and run arrows
|
|
2135
|
+
|
|
2136
|
+
for sra in effective_run_arrows:
|
|
2137
|
+
step_idx = sra.step - 1 # 0-indexed
|
|
2138
|
+
if step_idx < 0 or step_idx >= len(resolved_steps):
|
|
2139
|
+
continue
|
|
2140
|
+
rs = resolved_steps[step_idx]
|
|
2141
|
+
|
|
2142
|
+
# Run arrow matches the reaction arrow exactly (same tail/head X),
|
|
2143
|
+
# just translated vertically below the scheme content.
|
|
2144
|
+
run_tail_x = rs.arrow_tail_x
|
|
2145
|
+
run_head_x = rs.arrow_head_x
|
|
2146
|
+
|
|
2147
|
+
for run_entry in sra.runs:
|
|
2148
|
+
if run_entry.note:
|
|
2149
|
+
# Text centered above this specific run arrow
|
|
2150
|
+
note_y = run_y - 1.0
|
|
2151
|
+
note_xml, _ = _build_text_element(
|
|
2152
|
+
[run_entry.note],
|
|
2153
|
+
(run_tail_x + run_head_x) / 2.0,
|
|
2154
|
+
note_y, ids,
|
|
2155
|
+
justification="Center",
|
|
2156
|
+
use_formatting=False,
|
|
2157
|
+
)
|
|
2158
|
+
xml_parts.append(note_xml)
|
|
2159
|
+
run_y += 10.0 # space for note text
|
|
2160
|
+
run_xml, _ = _build_run_arrow(
|
|
2161
|
+
run_tail_x, run_head_x,
|
|
2162
|
+
run_y,
|
|
2163
|
+
run_entry.input_label,
|
|
2164
|
+
run_entry.output_label,
|
|
2165
|
+
ids,
|
|
2166
|
+
)
|
|
2167
|
+
xml_parts.append(run_xml)
|
|
2168
|
+
run_y += 18.0 # stack multiple runs
|
|
2169
|
+
|
|
2170
|
+
lowest_y = run_y
|
|
2171
|
+
|
|
2172
|
+
# Compute row edge (rightmost for LTR, leftmost for RTL)
|
|
2173
|
+
row_edge_x = cursor_x
|
|
2174
|
+
|
|
2175
|
+
row_info = {
|
|
2176
|
+
"last_product_cx": _last_product_cx,
|
|
2177
|
+
"last_product_bottom": _last_product_bottom,
|
|
2178
|
+
"first_substrate_cx": _first_substrate_cx,
|
|
2179
|
+
}
|
|
2180
|
+
return "\n".join(xml_parts), lowest_y, row_edge_x, row_info
|
|
2181
|
+
|
|
2182
|
+
|
|
2183
|
+
# ---------------------------------------------------------------------------
|
|
2184
|
+
# Document assembly
|
|
2185
|
+
# ---------------------------------------------------------------------------
|
|
2186
|
+
|
|
2187
|
+
def _format_header(bbox: str) -> str:
|
|
2188
|
+
"""Format the full CDXML header with ACS style."""
|
|
2189
|
+
return CDXML_HEADER.format(
|
|
2190
|
+
bbox=bbox,
|
|
2191
|
+
label_font=ACS_LABEL_FONT,
|
|
2192
|
+
label_size=ACS_LABEL_SIZE,
|
|
2193
|
+
label_face=ACS_LABEL_FACE,
|
|
2194
|
+
caption_size=ACS_CAPTION_SIZE,
|
|
2195
|
+
hash_spacing=ACS_HASH_SPACING,
|
|
2196
|
+
margin_width=ACS_MARGIN_WIDTH,
|
|
2197
|
+
line_width=ACS_LINE_WIDTH,
|
|
2198
|
+
bold_width=ACS_BOLD_WIDTH,
|
|
2199
|
+
bond_length=ACS_BOND_LENGTH_STR,
|
|
2200
|
+
bond_spacing=ACS_BOND_SPACING,
|
|
2201
|
+
chain_angle=ACS_CHAIN_ANGLE_STR,
|
|
2202
|
+
)
|
|
2203
|
+
|
|
2204
|
+
|
|
2205
|
+
_PAGE_OPEN = (
|
|
2206
|
+
'<page id="{page_id}" BoundingBox="0 0 1620 2160" '
|
|
2207
|
+
'HeaderPosition="36" FooterPosition="36" '
|
|
2208
|
+
'PrintTrimMarks="yes" HeightPages="3" WidthPages="3">'
|
|
2209
|
+
)
|
|
2210
|
+
_PAGE_CLOSE = "</page>"
|
|
2211
|
+
|
|
2212
|
+
|
|
2213
|
+
def _product_mol_for_steps(
|
|
2214
|
+
steps: List[StepDescriptor],
|
|
2215
|
+
structures: Dict[str, StructureRef],
|
|
2216
|
+
source_data: Optional[Dict[str, Dict]] = None,
|
|
2217
|
+
pick: str = "last",
|
|
2218
|
+
) -> Optional[Any]:
|
|
2219
|
+
"""
|
|
2220
|
+
Build an RDKit Mol with 2D coords for the product of given steps.
|
|
2221
|
+
|
|
2222
|
+
Parameters
|
|
2223
|
+
----------
|
|
2224
|
+
steps : list of StepDescriptor
|
|
2225
|
+
structures : dict mapping structure IDs to StructureRef
|
|
2226
|
+
source_data : optional reaction_parser JSON species lookup
|
|
2227
|
+
pick : "last" (default) = last step's last product;
|
|
2228
|
+
"first" = first step's first product (for divergent).
|
|
2229
|
+
|
|
2230
|
+
Returns an RDKit Mol with 2D conformer, or None.
|
|
2231
|
+
"""
|
|
2232
|
+
try:
|
|
2233
|
+
from rdkit import Chem
|
|
2234
|
+
from rdkit.Chem import AllChem
|
|
2235
|
+
except ImportError:
|
|
2236
|
+
return None
|
|
2237
|
+
|
|
2238
|
+
pid: Optional[str] = None
|
|
2239
|
+
if pick == "first":
|
|
2240
|
+
if steps and steps[0].products:
|
|
2241
|
+
pid = steps[0].products[0]
|
|
2242
|
+
else:
|
|
2243
|
+
if steps and steps[-1].products:
|
|
2244
|
+
pid = steps[-1].products[-1]
|
|
2245
|
+
|
|
2246
|
+
if pid is None:
|
|
2247
|
+
return None
|
|
2248
|
+
|
|
2249
|
+
product_smiles: Optional[str] = None
|
|
2250
|
+
ref = structures.get(pid)
|
|
2251
|
+
if ref and ref.smiles:
|
|
2252
|
+
product_smiles = ref.smiles
|
|
2253
|
+
elif source_data:
|
|
2254
|
+
sp = source_data.get(pid) or source_data.get(pid.lower())
|
|
2255
|
+
if sp:
|
|
2256
|
+
product_smiles = sp.get("smiles")
|
|
2257
|
+
|
|
2258
|
+
if not product_smiles:
|
|
2259
|
+
return None
|
|
2260
|
+
|
|
2261
|
+
mol = Chem.MolFromSmiles(product_smiles)
|
|
2262
|
+
if mol is None:
|
|
2263
|
+
return None
|
|
2264
|
+
|
|
2265
|
+
AllChem.Compute2DCoords(mol)
|
|
2266
|
+
return mol
|
|
2267
|
+
|
|
2268
|
+
|
|
2269
|
+
def _identify_product_mol(
|
|
2270
|
+
scheme: SchemeDescriptor,
|
|
2271
|
+
source_data: Optional[Dict[str, Dict]] = None,
|
|
2272
|
+
) -> Optional[Any]:
|
|
2273
|
+
"""
|
|
2274
|
+
Identify the main product and create an RDKit Mol with 2D coords.
|
|
2275
|
+
|
|
2276
|
+
This mol serves as the reference for MCS alignment of all other
|
|
2277
|
+
structures in the scheme — shared scaffolds are oriented to match
|
|
2278
|
+
the product's layout, producing visually consistent schemes.
|
|
2279
|
+
|
|
2280
|
+
Reference product selection:
|
|
2281
|
+
- linear / sequential: product of the LAST step
|
|
2282
|
+
- divergent: product of the FIRST (horizontal) step
|
|
2283
|
+
- stacked-rows: product of the first section's last step
|
|
2284
|
+
|
|
2285
|
+
Returns an RDKit Mol with 2D conformer, or None if RDKit is
|
|
2286
|
+
unavailable or no product SMILES can be resolved.
|
|
2287
|
+
"""
|
|
2288
|
+
if scheme.layout == "divergent":
|
|
2289
|
+
return _product_mol_for_steps(
|
|
2290
|
+
scheme.steps, scheme.structures, source_data, pick="first",
|
|
2291
|
+
)
|
|
2292
|
+
elif scheme.layout == "stacked-rows":
|
|
2293
|
+
if scheme.sections:
|
|
2294
|
+
return _product_mol_for_steps(
|
|
2295
|
+
scheme.sections[0].steps, scheme.structures, source_data,
|
|
2296
|
+
)
|
|
2297
|
+
return None
|
|
2298
|
+
else:
|
|
2299
|
+
# linear / sequential — last step's last product
|
|
2300
|
+
return _product_mol_for_steps(
|
|
2301
|
+
scheme.steps, scheme.structures, source_data,
|
|
2302
|
+
)
|
|
2303
|
+
|
|
2304
|
+
|
|
2305
|
+
def render(scheme: SchemeDescriptor, yaml_dir: Optional[str] = None) -> str:
|
|
2306
|
+
"""
|
|
2307
|
+
Render a SchemeDescriptor to a CDXML document string.
|
|
2308
|
+
|
|
2309
|
+
Parameters
|
|
2310
|
+
----------
|
|
2311
|
+
scheme : SchemeDescriptor
|
|
2312
|
+
Parsed scheme from YAML.
|
|
2313
|
+
yaml_dir : str, optional
|
|
2314
|
+
Directory of the YAML file (for resolving relative source paths).
|
|
2315
|
+
|
|
2316
|
+
Returns
|
|
2317
|
+
-------
|
|
2318
|
+
str
|
|
2319
|
+
Complete CDXML document.
|
|
2320
|
+
"""
|
|
2321
|
+
ids = _IDGen(1000)
|
|
2322
|
+
|
|
2323
|
+
# Load source JSON if specified
|
|
2324
|
+
source_data = None
|
|
2325
|
+
if scheme.source:
|
|
2326
|
+
source_path = scheme.source
|
|
2327
|
+
if not os.path.isabs(source_path) and yaml_dir:
|
|
2328
|
+
source_path = os.path.join(yaml_dir, source_path)
|
|
2329
|
+
source_data = _load_source_json(source_path)
|
|
2330
|
+
|
|
2331
|
+
# --- Identify product reference Mol for alignment ---
|
|
2332
|
+
# The product of the last step is the reference for MCS alignment.
|
|
2333
|
+
# All other structures are aligned to match its scaffold orientation.
|
|
2334
|
+
ref_mol = _identify_product_mol(scheme, source_data)
|
|
2335
|
+
|
|
2336
|
+
# Choose layout
|
|
2337
|
+
if scheme.layout == "linear":
|
|
2338
|
+
inner_xml, lowest_y = _layout_linear(scheme, ids, source_data=source_data,
|
|
2339
|
+
reference_mol=ref_mol)
|
|
2340
|
+
elif scheme.layout == "sequential":
|
|
2341
|
+
inner_xml, lowest_y = _layout_sequential(scheme, ids, source_data=source_data,
|
|
2342
|
+
reference_mol=ref_mol)
|
|
2343
|
+
elif scheme.layout == "divergent":
|
|
2344
|
+
inner_xml, lowest_y = _layout_divergent(scheme, ids, source_data=source_data,
|
|
2345
|
+
reference_mol=ref_mol)
|
|
2346
|
+
elif scheme.layout == "stacked-rows":
|
|
2347
|
+
inner_xml, lowest_y = _layout_stacked_rows(scheme, ids, source_data=source_data,
|
|
2348
|
+
reference_mol=ref_mol)
|
|
2349
|
+
else:
|
|
2350
|
+
raise NotImplementedError(
|
|
2351
|
+
f"Layout '{scheme.layout}' is not yet implemented. "
|
|
2352
|
+
f"Supported: linear, sequential, divergent, stacked-rows"
|
|
2353
|
+
)
|
|
2354
|
+
|
|
2355
|
+
# Condition key block below the scheme
|
|
2356
|
+
if scheme.condition_key:
|
|
2357
|
+
key_y = lowest_y + 20.0
|
|
2358
|
+
key_xml, _ = _build_condition_key(
|
|
2359
|
+
scheme.condition_key, 80.0, key_y, ids,
|
|
2360
|
+
)
|
|
2361
|
+
inner_xml += "\n" + key_xml
|
|
2362
|
+
|
|
2363
|
+
# Wrap in document
|
|
2364
|
+
page_id = ids.next()
|
|
2365
|
+
|
|
2366
|
+
# Use a generous bounding box
|
|
2367
|
+
bbox = "0 0 1620 2160"
|
|
2368
|
+
|
|
2369
|
+
doc_parts = [
|
|
2370
|
+
_format_header(bbox),
|
|
2371
|
+
_PAGE_OPEN.format(page_id=page_id),
|
|
2372
|
+
inner_xml,
|
|
2373
|
+
_PAGE_CLOSE,
|
|
2374
|
+
CDXML_FOOTER,
|
|
2375
|
+
]
|
|
2376
|
+
return "\n".join(doc_parts)
|
|
2377
|
+
|
|
2378
|
+
|
|
2379
|
+
def render_to_file(
|
|
2380
|
+
scheme: SchemeDescriptor,
|
|
2381
|
+
output_path: str,
|
|
2382
|
+
yaml_dir: Optional[str] = None,
|
|
2383
|
+
) -> None:
|
|
2384
|
+
"""Render and write to a file."""
|
|
2385
|
+
cdxml = render(scheme, yaml_dir=yaml_dir)
|
|
2386
|
+
with open(output_path, "w", encoding="utf-8") as f:
|
|
2387
|
+
f.write(cdxml)
|