cdxml-toolkit 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. cdxml_toolkit/__init__.py +18 -0
  2. cdxml_toolkit/_jre/__init__.py +2 -0
  3. cdxml_toolkit/_jre/temurin-21-jre-win-x64.zip +0 -0
  4. cdxml_toolkit/analysis/__init__.py +35 -0
  5. cdxml_toolkit/analysis/deterministic/__init__.py +12 -0
  6. cdxml_toolkit/analysis/deterministic/discover_experiment_files.py +413 -0
  7. cdxml_toolkit/analysis/deterministic/lab_book_formatter.py +701 -0
  8. cdxml_toolkit/analysis/deterministic/lcms_file_categorizer.py +928 -0
  9. cdxml_toolkit/analysis/deterministic/lcms_identifier.py +598 -0
  10. cdxml_toolkit/analysis/deterministic/mass_resolver.py +654 -0
  11. cdxml_toolkit/analysis/deterministic/multi_lcms_analyzer.py +1412 -0
  12. cdxml_toolkit/analysis/deterministic/procedure_writer.py +446 -0
  13. cdxml_toolkit/analysis/extract_nmr.py +47 -0
  14. cdxml_toolkit/analysis/format_procedure_entry.py +479 -0
  15. cdxml_toolkit/analysis/lcms_analyzer.py +1299 -0
  16. cdxml_toolkit/analysis/parse_analysis_file.py +134 -0
  17. cdxml_toolkit/cdxml_builder.py +920 -0
  18. cdxml_toolkit/cdxml_utils.py +342 -0
  19. cdxml_toolkit/chemdraw/__init__.py +5 -0
  20. cdxml_toolkit/chemdraw/_chemscript_server.py +562 -0
  21. cdxml_toolkit/chemdraw/cdx_converter.py +527 -0
  22. cdxml_toolkit/chemdraw/cdxml_to_image.py +262 -0
  23. cdxml_toolkit/chemdraw/cdxml_to_image_rdkit.py +296 -0
  24. cdxml_toolkit/chemdraw/chemscript_bridge.py +901 -0
  25. cdxml_toolkit/constants.py +304 -0
  26. cdxml_toolkit/coord_normalizer.py +438 -0
  27. cdxml_toolkit/deterministic_pipeline/__init__.py +6 -0
  28. cdxml_toolkit/deterministic_pipeline/legacy/__init__.py +5 -0
  29. cdxml_toolkit/deterministic_pipeline/legacy/eln_cdx_cleanup.py +509 -0
  30. cdxml_toolkit/deterministic_pipeline/legacy/eln_enrichment.py +1394 -0
  31. cdxml_toolkit/deterministic_pipeline/legacy/scheme_aligner.py +428 -0
  32. cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher.py +1337 -0
  33. cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher_v2.py +1340 -0
  34. cdxml_toolkit/deterministic_pipeline/scheme_reader_audit.py +931 -0
  35. cdxml_toolkit/deterministic_pipeline/scheme_reader_verify.py +1160 -0
  36. cdxml_toolkit/image/__init__.py +15 -0
  37. cdxml_toolkit/image/reaction_from_image.py +2103 -0
  38. cdxml_toolkit/image/structure_from_image.py +1711 -0
  39. cdxml_toolkit/layout/__init__.py +5 -0
  40. cdxml_toolkit/layout/alignment.py +1642 -0
  41. cdxml_toolkit/layout/reaction_cleanup.py +1002 -0
  42. cdxml_toolkit/layout/scheme_merger.py +2260 -0
  43. cdxml_toolkit/mcp_server/__init__.py +0 -0
  44. cdxml_toolkit/mcp_server/__main__.py +5 -0
  45. cdxml_toolkit/mcp_server/server.py +1567 -0
  46. cdxml_toolkit/naming/__init__.py +6 -0
  47. cdxml_toolkit/naming/aligned_namer.py +2342 -0
  48. cdxml_toolkit/naming/mol_builder.py +3722 -0
  49. cdxml_toolkit/naming/name_decomposer.py +2843 -0
  50. cdxml_toolkit/naming/reactions_datamol.json +2414 -0
  51. cdxml_toolkit/office/__init__.py +5 -0
  52. cdxml_toolkit/office/doc_from_template.py +722 -0
  53. cdxml_toolkit/office/ole_embedder.py +808 -0
  54. cdxml_toolkit/office/ole_extractor.py +272 -0
  55. cdxml_toolkit/perception/__init__.py +10 -0
  56. cdxml_toolkit/perception/compound_search.py +229 -0
  57. cdxml_toolkit/perception/eln_csv_parser.py +240 -0
  58. cdxml_toolkit/perception/rdf_parser.py +664 -0
  59. cdxml_toolkit/perception/reactant_heuristic.py +1045 -0
  60. cdxml_toolkit/perception/reaction_parser.py +2150 -0
  61. cdxml_toolkit/perception/scheme_reader.py +2948 -0
  62. cdxml_toolkit/perception/scheme_refine.py +1404 -0
  63. cdxml_toolkit/perception/scheme_segmenter.py +619 -0
  64. cdxml_toolkit/perception/spatial_assignment.py +1013 -0
  65. cdxml_toolkit/rdkit_utils.py +605 -0
  66. cdxml_toolkit/render/__init__.py +17 -0
  67. cdxml_toolkit/render/auto_layout.py +229 -0
  68. cdxml_toolkit/render/compact_parser.py +632 -0
  69. cdxml_toolkit/render/parser.py +706 -0
  70. cdxml_toolkit/render/render_scheme.py +267 -0
  71. cdxml_toolkit/render/renderer.py +2387 -0
  72. cdxml_toolkit/render/schema.py +90 -0
  73. cdxml_toolkit/render/scheme_maker.py +1043 -0
  74. cdxml_toolkit/render/scheme_yaml_writer.py +1487 -0
  75. cdxml_toolkit/resolve/__init__.py +13 -0
  76. cdxml_toolkit/resolve/cas_resolver.py +430 -0
  77. cdxml_toolkit/resolve/chemscanner_abbreviations.json +28813 -0
  78. cdxml_toolkit/resolve/condensed_formula.py +493 -0
  79. cdxml_toolkit/resolve/jre_manager.py +195 -0
  80. cdxml_toolkit/resolve/reagent_abbreviations.json +1046 -0
  81. cdxml_toolkit/resolve/reagent_db.py +285 -0
  82. cdxml_toolkit/resolve/superatom_data.json +2856 -0
  83. cdxml_toolkit/resolve/superatom_table.py +146 -0
  84. cdxml_toolkit/text_formatting.py +298 -0
  85. cdxml_toolkit-0.5.0.dist-info/METADATA +318 -0
  86. cdxml_toolkit-0.5.0.dist-info/RECORD +91 -0
  87. cdxml_toolkit-0.5.0.dist-info/WHEEL +5 -0
  88. cdxml_toolkit-0.5.0.dist-info/entry_points.txt +17 -0
  89. cdxml_toolkit-0.5.0.dist-info/licenses/LICENSE +21 -0
  90. cdxml_toolkit-0.5.0.dist-info/licenses/NOTICE.md +37 -0
  91. cdxml_toolkit-0.5.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,2387 @@
1
+ """
2
+ renderer.py — Render a SchemeDescriptor to a CDXML document.
3
+
4
+ Supports:
5
+ - linear: single step (substrates → products)
6
+ - sequential: multi-step in a single row
7
+ - wrap: repeat — multi-row L→R with repeated structures
8
+ - wrap: serpentine — zigzag layout (L→R, R→L, L→R, ...) with vertical arrows
9
+
10
+ Uses RDKit for SMILES → 2D coords (no ChemDraw COM dependency).
11
+ Uses cdxml_builder infrastructure for fragment/arrow/text XML generation.
12
+ Uses reaction_cleanup-style layout logic for positioning.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import json
18
+ import math
19
+ import os
20
+ from copy import deepcopy
21
+ from dataclasses import dataclass, field
22
+ from typing import Any, Dict, List, Optional, Tuple
23
+ from xml.sax.saxutils import escape as xml_escape
24
+
25
+ from ..constants import (
26
+ ACS_BOND_LENGTH,
27
+ ACS_BOND_LENGTH_STR,
28
+ ACS_BOND_SPACING,
29
+ ACS_BOLD_WIDTH,
30
+ ACS_CAPTION_FACE,
31
+ ACS_CAPTION_SIZE,
32
+ ACS_CHAIN_ANGLE_STR,
33
+ ACS_HASH_SPACING,
34
+ ACS_LABEL_FACE,
35
+ ACS_LABEL_FONT,
36
+ ACS_LABEL_SIZE,
37
+ ACS_LINE_WIDTH,
38
+ ACS_MARGIN_WIDTH,
39
+ CDXML_FOOTER,
40
+ CDXML_HEADER,
41
+ LAYOUT_ABOVE_GAP,
42
+ LAYOUT_BELOW_GAP,
43
+ LAYOUT_FRAG_GAP_BONDS,
44
+ LAYOUT_INTER_GAP_BONDS,
45
+ )
46
+ from ..text_formatting import build_formatted_s_xml
47
+
48
+ from .schema import (
49
+ ArrowContent,
50
+ RunArrowEntry,
51
+ SchemeDescriptor,
52
+ SectionDescriptor,
53
+ StepDescriptor,
54
+ StepRunArrows,
55
+ StructureRef,
56
+ )
57
+
58
+
59
+ # ---------------------------------------------------------------------------
60
+ # Text metrics for Arial 10pt Bold in ChemDraw
61
+ # (measured via bbox investigation: 135 fragments, 237 texts, 83 arrows)
62
+ # ---------------------------------------------------------------------------
63
+
64
+ _CHAR_WIDTH = 4.7 # average character width (proportional Arial)
65
+ _LINE_ADVANCE = 11.5 # line-to-line distance (ChemDraw ~1.15× multiplier)
66
+ _CAP_HEIGHT = 9.1 # baseline to top of uppercase letters
67
+ _DESCENT = 2.0 # baseline to bottom of descenders
68
+
69
+ # Fragment bbox padding beyond atom center positions (half of measured excess)
70
+ _FRAG_PAD_W = 3.3 # ±3.3 pt per side (6.6 pt total width excess)
71
+ _FRAG_PAD_H = 1.45 # ±1.45 pt per side (2.9 pt total height excess)
72
+
73
+
74
+ # ---------------------------------------------------------------------------
75
+ # Multi-row constants
76
+ # ---------------------------------------------------------------------------
77
+
78
+ # Vertical gap between bottom of row N (including run arrows) and top of row N+1.
79
+ # ~55 pts matches real Report-scheme-extr-2 inter-row spacing.
80
+ ROW_GAP = 55.0
81
+
82
+
83
+ # ---------------------------------------------------------------------------
84
+ # Source JSON loader (reaction_parser output)
85
+ # ---------------------------------------------------------------------------
86
+
87
+ def _load_source_json(path: str) -> Dict[str, Dict]:
88
+ """
89
+ Load reaction_parser JSON, build species lookup by ID and by role.
90
+
91
+ Returns a dict mapping keys to species dicts. Keys include:
92
+ - species ID ("sp_0", "sp_1", ...)
93
+ - "SM" / "DP" shortcuts
94
+ - lowercase species name
95
+ """
96
+ with open(path, encoding="utf-8") as f:
97
+ data = json.load(f)
98
+
99
+ lookup: Dict[str, Dict] = {}
100
+ for sp in data.get("species", []):
101
+ sp_id = sp.get("id", "")
102
+ if sp_id:
103
+ lookup[sp_id] = sp
104
+ if sp.get("is_sm"):
105
+ lookup["SM"] = sp
106
+ if sp.get("is_dp"):
107
+ lookup["DP"] = sp
108
+ # Register by lowercase name for flexible matching
109
+ name = sp.get("name", "")
110
+ if name:
111
+ lookup[name.lower()] = sp
112
+ # Register by CSV name too
113
+ csv_name = sp.get("csv_name", "")
114
+ if csv_name:
115
+ lookup[csv_name.lower()] = sp
116
+ return lookup
117
+
118
+
119
+ # ---------------------------------------------------------------------------
120
+ # ID generator (same pattern as cdxml_builder)
121
+ # ---------------------------------------------------------------------------
122
+
123
+ class _IDGen:
124
+ """Simple incrementing integer ID generator."""
125
+ def __init__(self, start: int = 1000):
126
+ self._n = start
127
+
128
+ def next(self) -> int:
129
+ v = self._n
130
+ self._n += 1
131
+ return v
132
+
133
+
134
+ # ---------------------------------------------------------------------------
135
+ # Resolved structure: SMILES → atoms/bonds in CDXML points
136
+ # ---------------------------------------------------------------------------
137
+
138
+ @dataclass
139
+ class ResolvedFragment:
140
+ """A structure that has been resolved to atom/bond data + XML."""
141
+ ref: StructureRef
142
+ atoms: List[Dict]
143
+ bonds: List[Dict]
144
+ xml: str = "" # <fragment> XML string
145
+ frag_id: int = 0 # XML element ID
146
+ # Bounding box (CDXML points): min_x, min_y, max_x, max_y
147
+ bbox: Tuple[float, float, float, float] = (0, 0, 0, 0)
148
+ # Current center position
149
+ cx: float = 0
150
+ cy: float = 0
151
+
152
+
153
+ @dataclass
154
+ class ResolvedStep:
155
+ """A step with all structures resolved and laid out."""
156
+ descriptor: StepDescriptor
157
+ substrates: List[ResolvedFragment]
158
+ products: List[ResolvedFragment]
159
+ above_structures: List[ResolvedFragment]
160
+ above_text: List[str]
161
+ below_text: List[str]
162
+ below_structures: List[ResolvedFragment]
163
+ # Arrow geometry (set during layout)
164
+ arrow_tail_x: float = 0
165
+ arrow_tail_y: float = 0
166
+ arrow_head_x: float = 0
167
+ arrow_head_y: float = 0
168
+
169
+
170
+ # ---------------------------------------------------------------------------
171
+ # SMILES → atom/bond dicts (using structure_from_image + normalize)
172
+ # ---------------------------------------------------------------------------
173
+
174
+ def _smiles_to_fragment_data(
175
+ smiles: str,
176
+ center_x: float = 200.0,
177
+ center_y: float = 300.0,
178
+ ) -> Optional[Tuple[List[Dict], List[Dict]]]:
179
+ """
180
+ Convert SMILES to atom/bond dicts in CDXML point coordinates.
181
+
182
+ Returns (atoms, bonds) or None on failure.
183
+ """
184
+ try:
185
+ from rdkit import Chem
186
+ from rdkit.Chem import AllChem
187
+ except ImportError:
188
+ raise RuntimeError("RDKit is required for SMILES→structure conversion")
189
+
190
+ mol = Chem.MolFromSmiles(smiles)
191
+ if mol is None:
192
+ return None
193
+
194
+ AllChem.Compute2DCoords(mol)
195
+
196
+ # Kekulize for explicit single/double bonds.
197
+ # Use a copy because clearAromaticFlags=True corrupts the mol on failure.
198
+ mol_kek = Chem.RWMol(mol)
199
+ try:
200
+ Chem.Kekulize(mol_kek, clearAromaticFlags=True)
201
+ mol = mol_kek
202
+ except Exception:
203
+ # Kekulization failed — use original mol with aromatic bonds.
204
+ # _rdkit_mol_to_atom_bond_dicts handles AROMATIC → order 2 fallback.
205
+ pass
206
+
207
+ # Extract atom/bond dicts
208
+ from ..image.structure_from_image import _rdkit_mol_to_atom_bond_dicts
209
+ atoms, bonds = _rdkit_mol_to_atom_bond_dicts(mol)
210
+
211
+ # Normalize to CDXML coordinate space (scale + flip y + center)
212
+ from ..image.structure_from_image import normalize_for_cdxml
213
+ atoms, bonds = normalize_for_cdxml(atoms, bonds, center_x, center_y)
214
+
215
+ return atoms, bonds
216
+
217
+
218
+ def _align_mol_to_reference(
219
+ target_mol,
220
+ ref_mol,
221
+ center_x: float = 200.0,
222
+ center_y: float = 300.0,
223
+ ) -> Optional[Tuple[List[Dict], List[Dict]]]:
224
+ """
225
+ Align target_mol's 2D layout to ref_mol using MCS, then extract
226
+ atom/bond dicts in CDXML coordinates.
227
+
228
+ Uses RDKit's GenerateDepictionMatching2DStructure to orient the target
229
+ so its shared scaffold matches the reference orientation.
230
+
231
+ Returns (atoms, bonds) or None if alignment fails or MCS is too small.
232
+ """
233
+ try:
234
+ from rdkit import Chem
235
+ from rdkit.Chem import AllChem, rdFMCS
236
+ except ImportError:
237
+ return None
238
+
239
+ if target_mol is None or ref_mol is None:
240
+ return None
241
+
242
+ # Find MCS between target and reference
243
+ try:
244
+ mcs_result = rdFMCS.FindMCS(
245
+ [target_mol, ref_mol],
246
+ timeout=5,
247
+ ringMatchesRingOnly=True,
248
+ completeRingsOnly=True,
249
+ )
250
+ except Exception:
251
+ return None
252
+
253
+ if mcs_result.numAtoms < 3:
254
+ return None # MCS too small for meaningful alignment
255
+
256
+ # Build the MCS query mol
257
+ mcs_mol = Chem.MolFromSmarts(mcs_result.smartsString)
258
+ if mcs_mol is None:
259
+ return None
260
+
261
+ # Get substructure matches
262
+ ref_match = ref_mol.GetSubstructMatch(mcs_mol)
263
+ target_match = target_mol.GetSubstructMatch(mcs_mol)
264
+
265
+ if not ref_match or not target_match:
266
+ return None
267
+
268
+ # Build atom map: (ref_idx, target_idx) for MCS atoms
269
+ atom_map = list(zip(ref_match, target_match))
270
+
271
+ # Ensure reference has 2D coordinates
272
+ if ref_mol.GetNumConformers() == 0:
273
+ AllChem.Compute2DCoords(ref_mol)
274
+
275
+ # Generate aligned 2D coordinates for target
276
+ try:
277
+ AllChem.Compute2DCoords(target_mol)
278
+ AllChem.GenerateDepictionMatching2DStructure(
279
+ target_mol, ref_mol, atomMap=atom_map
280
+ )
281
+ except Exception:
282
+ # Alignment failed — fall back to standard coords
283
+ AllChem.Compute2DCoords(target_mol)
284
+
285
+ # Kekulize for explicit bonds
286
+ mol_kek = Chem.RWMol(target_mol)
287
+ try:
288
+ Chem.Kekulize(mol_kek, clearAromaticFlags=True)
289
+ target_mol = mol_kek
290
+ except Exception:
291
+ pass
292
+
293
+ # Extract atom/bond dicts
294
+ from ..image.structure_from_image import _rdkit_mol_to_atom_bond_dicts
295
+ atoms, bonds = _rdkit_mol_to_atom_bond_dicts(target_mol)
296
+
297
+ # Normalize to CDXML coordinate space
298
+ from ..image.structure_from_image import normalize_for_cdxml
299
+ atoms, bonds = normalize_for_cdxml(atoms, bonds, center_x, center_y)
300
+
301
+ return atoms, bonds
302
+
303
+
304
+ # ---------------------------------------------------------------------------
305
+ # Fragment XML builder (adapted from cdxml_builder._build_fragment)
306
+ # ---------------------------------------------------------------------------
307
+
308
+ # Element number lookup
309
+ ELEMENT_NUMBERS = {
310
+ "H": 1, "He": 2, "Li": 3, "Be": 4, "B": 5, "C": 6, "N": 7,
311
+ "O": 8, "F": 9, "Ne": 10, "Na": 11, "Mg": 12, "Al": 13, "Si": 14,
312
+ "P": 15, "S": 16, "Cl": 17, "Ar": 18, "K": 19, "Ca": 20, "Ti": 22,
313
+ "V": 23, "Cr": 24, "Mn": 25, "Fe": 26, "Co": 27, "Ni": 28, "Cu": 29,
314
+ "Zn": 30, "Ga": 31, "Ge": 32, "As": 33, "Se": 34, "Br": 35, "Kr": 36,
315
+ "Rb": 37, "Sr": 38, "Zr": 40, "Mo": 42, "Ru": 44, "Rh": 45, "Pd": 46,
316
+ "Ag": 47, "Cd": 48, "In": 49, "Sn": 50, "Sb": 51, "Te": 52, "I": 53,
317
+ "Xe": 54, "Cs": 55, "Ba": 56, "La": 57, "Ce": 58, "Pr": 59, "Nd": 60,
318
+ "W": 74, "Re": 75, "Os": 76, "Ir": 77, "Pt": 78, "Au": 79, "Hg": 80,
319
+ "Tl": 81, "Pb": 82, "Bi": 83,
320
+ }
321
+
322
+ # Bond stereo
323
+ BOND_STEREO_ATTR = {
324
+ 1: "WedgeBegin",
325
+ 4: "WedgeBegin",
326
+ 6: "WedgedHashBegin",
327
+ }
328
+
329
+
330
+ def _build_fragment(
331
+ atoms: List[Dict],
332
+ bonds: List[Dict],
333
+ ids: _IDGen,
334
+ ) -> Tuple[str, Dict[int, int], int]:
335
+ """
336
+ Build a <fragment> XML string from atom/bond dicts.
337
+
338
+ Returns (xml_string, atom_id_map, fragment_xml_id).
339
+ """
340
+ atom_id_map: Dict[int, int] = {}
341
+ frag_id = ids.next()
342
+
343
+ xs = [a["x"] for a in atoms]
344
+ ys = [a["y"] for a in atoms]
345
+ bb_x1, bb_y1 = min(xs), min(ys)
346
+ bb_x2, bb_y2 = max(xs), max(ys)
347
+
348
+ lines: List[str] = []
349
+ lines.append(
350
+ f'<fragment id="{frag_id}" '
351
+ f'BoundingBox="{bb_x1:.2f} {bb_y1:.2f} {bb_x2:.2f} {bb_y2:.2f}" '
352
+ f'Z="{ids.next()}">'
353
+ )
354
+
355
+ for a in atoms:
356
+ atom_xml_id = ids.next()
357
+ atom_id_map[a["index"]] = atom_xml_id
358
+ ax, ay = a["x"], a["y"]
359
+ z = ids.next()
360
+
361
+ sym = a.get("symbol", "C")
362
+ elem_num = ELEMENT_NUMBERS.get(sym, 6)
363
+ nh = a.get("num_hydrogens")
364
+ charge = a.get("charge", 0)
365
+ isotope = a.get("isotope")
366
+
367
+ attrs = [
368
+ f'id="{atom_xml_id}"',
369
+ f'p="{ax:.2f} {ay:.2f}"',
370
+ f'Z="{z}"',
371
+ ]
372
+
373
+ is_carbon = (sym == "C" and not charge and not isotope)
374
+ if not is_carbon:
375
+ attrs.append(f'Element="{elem_num}"')
376
+ if nh is not None:
377
+ attrs.append(f'NumHydrogens="{nh}"')
378
+ if isotope:
379
+ attrs.append(f'Isotope="{isotope}"')
380
+ attrs.append('NeedsClean="yes"')
381
+ if charge:
382
+ attrs.append(f'Charge="{charge}"')
383
+
384
+ if is_carbon:
385
+ lines.append(f'<n {" ".join(attrs)}/>')
386
+ else:
387
+ # Heteroatom needs a text label
388
+ lines.append(f'<n {" ".join(attrs)}>')
389
+ lx = ax - 3.25
390
+ ly = ay + 3.52
391
+ label_w = max(len(sym) * 5.5, 6.0)
392
+ lbx1 = ax - label_w / 2.0
393
+ lby1 = ay - 7.52
394
+ lbx2 = ax + label_w / 2.0
395
+ lby2 = ay
396
+ tid = ids.next()
397
+ lines.append(
398
+ f'<t id="{tid}" p="{lx:.2f} {ly:.2f}" '
399
+ f'BoundingBox="{lbx1:.2f} {lby1:.2f} {lbx2:.2f} {lby2:.2f}" '
400
+ f'LabelJustification="Left">'
401
+ )
402
+ # Use isotope-specific symbol for display (e.g. D for deuterium)
403
+ if sym == "H" and isotope == 2:
404
+ display_text = "D"
405
+ elif sym == "H" and isotope == 3:
406
+ display_text = "T"
407
+ else:
408
+ display_text = sym
409
+ if nh is not None and nh > 0:
410
+ display_text += "H" if nh == 1 else f"H{nh}"
411
+ lines.append(
412
+ f'<s font="{ACS_LABEL_FONT}" size="{ACS_LABEL_SIZE}" '
413
+ f'color="0" face="{ACS_LABEL_FACE}">{xml_escape(display_text)}</s>'
414
+ )
415
+ lines.append('</t>')
416
+ lines.append('</n>')
417
+
418
+ # Bonds
419
+ for b in bonds:
420
+ bid = ids.next()
421
+ z = ids.next()
422
+ a1 = atom_id_map.get(b["atom1"], 0)
423
+ a2 = atom_id_map.get(b["atom2"], 0)
424
+ order = b.get("order", 1)
425
+
426
+ attrs = [
427
+ f'id="{bid}"',
428
+ f'Z="{z}"',
429
+ f'B="{a1}"',
430
+ f'E="{a2}"',
431
+ ]
432
+ if order == 2:
433
+ attrs.append('Order="2"')
434
+ elif order == 3:
435
+ attrs.append('Order="3"')
436
+
437
+ cfg = b.get("cfg", 0)
438
+ if cfg in BOND_STEREO_ATTR:
439
+ attrs.append(f'Display="{BOND_STEREO_ATTR[cfg]}"')
440
+
441
+ dp = b.get("double_pos")
442
+ if dp:
443
+ attrs.append(f'DoublePosition="{dp}"')
444
+
445
+ lines.append(f'<b {" ".join(attrs)}/>')
446
+
447
+ lines.append('</fragment>')
448
+ return "\n".join(lines), atom_id_map, frag_id
449
+
450
+
451
+ # ---------------------------------------------------------------------------
452
+ # Text builder
453
+ # ---------------------------------------------------------------------------
454
+
455
+ def _build_text_element(
456
+ text_lines: List[str],
457
+ x: float,
458
+ y: float,
459
+ ids: _IDGen,
460
+ justification: str = "Center",
461
+ use_formatting: bool = True,
462
+ font_size: Optional[float] = None,
463
+ ) -> Tuple[str, int]:
464
+ """
465
+ Build a standalone <t> element for condition text.
466
+
467
+ Parameters
468
+ ----------
469
+ font_size : float, optional
470
+ Override font size (default: ACS_CAPTION_SIZE, typically 10pt).
471
+
472
+ Returns (xml_string, text_xml_id).
473
+ """
474
+ size = float(font_size if font_size is not None else ACS_CAPTION_SIZE)
475
+ scale = size / float(ACS_CAPTION_SIZE)
476
+ char_w = _CHAR_WIDTH * scale
477
+ cap_h = _CAP_HEIGHT * scale
478
+ descent = _DESCENT * scale
479
+ line_adv = _LINE_ADVANCE * scale
480
+
481
+ tid = ids.next()
482
+ z = ids.next()
483
+
484
+ max_chars = max((len(ln) for ln in text_lines), default=1)
485
+ n = len(text_lines)
486
+ w = max_chars * char_w
487
+
488
+ bx1 = x - w / 2.0
489
+ by1 = y - cap_h
490
+ bx2 = x + w / 2.0
491
+ by2 = y + max(0, n - 1) * line_adv + descent
492
+
493
+ parts = [
494
+ f'<t id="{tid}" p="{x:.2f} {y:.2f}" '
495
+ f'BoundingBox="{bx1:.2f} {by1:.2f} {bx2:.2f} {by2:.2f}" '
496
+ f'Z="{z}" '
497
+ f'CaptionJustification="{justification}" '
498
+ f'Justification="{justification}" '
499
+ f'LineHeight="auto">'
500
+ ]
501
+
502
+ if use_formatting:
503
+ # Use chemistry-aware text formatting for each line.
504
+ # ChemDraw requires \n INSIDE <s> text content for line breaks —
505
+ # newlines between XML elements are treated as whitespace.
506
+ formatted_runs = []
507
+ for i, line in enumerate(text_lines):
508
+ run = build_formatted_s_xml(
509
+ line,
510
+ font=ACS_LABEL_FONT,
511
+ size=size,
512
+ color="0",
513
+ )
514
+ if i < len(text_lines) - 1:
515
+ # Inject \n before the closing </s> of this line's last run
516
+ # so ChemDraw renders a line break
517
+ last_close = run.rfind("</s>")
518
+ if last_close >= 0:
519
+ run = run[:last_close] + "\n" + run[last_close:]
520
+ formatted_runs.append(run)
521
+ parts.append("".join(formatted_runs))
522
+ else:
523
+ text = "\n".join(xml_escape(ln) for ln in text_lines)
524
+ parts.append(
525
+ f'<s font="{ACS_LABEL_FONT}" size="{size}" '
526
+ f'color="0" face="{ACS_CAPTION_FACE}">{text}</s>'
527
+ )
528
+
529
+ parts.append("</t>")
530
+ return "\n".join(parts), tid
531
+
532
+
533
+ def _build_label_element(
534
+ label: str,
535
+ x: float,
536
+ y: float,
537
+ ids: _IDGen,
538
+ ) -> Tuple[str, int]:
539
+ """Build a compound number label (e.g. "1", "2") centered below a structure."""
540
+ return _build_text_element(
541
+ [label], x, y, ids, justification="Center", use_formatting=False,
542
+ )
543
+
544
+
545
+ # ---------------------------------------------------------------------------
546
+ # Arrow builder
547
+ # ---------------------------------------------------------------------------
548
+
549
+ def _build_arrow(
550
+ tail_x: float,
551
+ tail_y: float,
552
+ head_x: float,
553
+ head_y: float,
554
+ ids: _IDGen,
555
+ dashed: bool = False,
556
+ nogo: bool = False,
557
+ ) -> Tuple[str, int]:
558
+ """Build an <arrow> element. Returns (xml_string, arrow_xml_id).
559
+
560
+ Parameters
561
+ ----------
562
+ nogo : bool
563
+ If True, adds ``NoGo="Cross"`` — ChemDraw's native failed-arrow
564
+ rendering (bold X through the arrow shaft).
565
+ """
566
+ aid = ids.next()
567
+ z = ids.next()
568
+
569
+ bx1 = min(tail_x, head_x)
570
+ by1 = min(tail_y, head_y) - 4.0
571
+ bx2 = max(tail_x, head_x)
572
+ by2 = max(tail_y, head_y) + 4.0
573
+
574
+ cx3 = (tail_x + head_x) / 2.0
575
+ cy3 = tail_y + 100.0
576
+
577
+ attrs = [
578
+ f'id="{aid}"',
579
+ f'BoundingBox="{bx1:.2f} {by1:.2f} {bx2:.2f} {by2:.2f}"',
580
+ f'Z="{z}"',
581
+ f'FillType="None"',
582
+ f'ArrowheadHead="Full"',
583
+ f'ArrowheadType="Solid"',
584
+ f'HeadSize="1000"',
585
+ f'ArrowheadCenterSize="875"',
586
+ f'ArrowheadWidth="250"',
587
+ f'Head3D="{head_x:.2f} {head_y:.2f} 0"',
588
+ f'Tail3D="{tail_x:.2f} {tail_y:.2f} 0"',
589
+ f'Center3D="{cx3:.2f} {cy3:.2f} 0"',
590
+ f'MajorAxisEnd3D="{cx3 + 80:.2f} {cy3:.2f} 0"',
591
+ f'MinorAxisEnd3D="{cx3:.2f} {cy3 + 80:.2f} 0"',
592
+ ]
593
+
594
+ if dashed:
595
+ attrs.append('LineType="Dashed"')
596
+ if nogo:
597
+ attrs.append('NoGo="Cross"')
598
+
599
+ xml = f'<arrow {" ".join(attrs)}/>'
600
+ return xml, aid
601
+
602
+
603
+ def _build_failed_x(
604
+ cx: float,
605
+ cy: float,
606
+ ids: _IDGen,
607
+ ) -> str:
608
+ """Build a bold 'X' text element centered on an arrow midpoint.
609
+
610
+ .. deprecated::
611
+ Use ``NoGo="Cross"`` on the ``<arrow>`` element instead.
612
+ This function is kept for backwards compatibility.
613
+ """
614
+ tid = ids.next()
615
+ z = ids.next()
616
+ # Position slightly above arrow line so X sits centered on it
617
+ py = cy + 3.5 # baseline offset — text anchor is at baseline
618
+ return (
619
+ f'<t id="{tid}" p="{cx:.2f} {py:.2f}" Z="{z}" '
620
+ f'Justification="Center" InterpretChemically="no" '
621
+ f'CaptionJustification="Center">\n'
622
+ f'<s font="{ACS_LABEL_FONT}" size="12" color="0" face="1">X</s>\n'
623
+ f'</t>'
624
+ )
625
+
626
+
627
+ def _is_letter_condition(text_lines: List[str]) -> bool:
628
+ """Check if below-arrow text is a letter condition like 'a' or 'b,c'."""
629
+ if len(text_lines) != 1:
630
+ return False
631
+ import re
632
+ return bool(re.match(r'^[a-z](,\s*[a-z])*$', text_lines[0].strip()))
633
+
634
+
635
+ def _build_letter_label(
636
+ letter: str,
637
+ cx: float,
638
+ y: float,
639
+ ids: _IDGen,
640
+ ) -> Tuple[str, int]:
641
+ """Build a small letter label centered above an arrow."""
642
+ tid = ids.next()
643
+ z = ids.next()
644
+ # Position slightly above arrow line
645
+ py = y - 6.0
646
+ xml = (
647
+ f'<t id="{tid}" p="{cx:.2f} {py:.2f}" Z="{z}" '
648
+ f'Justification="Center" InterpretChemically="no" '
649
+ f'CaptionJustification="Center">\n'
650
+ f'<s font="{ACS_LABEL_FONT}" size="8" color="0" face="1">'
651
+ f'{xml_escape(letter)}</s>\n'
652
+ f'</t>'
653
+ )
654
+ return xml, tid
655
+
656
+
657
+ def _build_condition_key(
658
+ condition_key: Dict[str, str],
659
+ left_x: float,
660
+ top_y: float,
661
+ ids: _IDGen,
662
+ ) -> Tuple[str, float]:
663
+ """Build the condition key block below the scheme.
664
+
665
+ Returns (xml_string, bottom_y).
666
+ """
667
+ xml_parts: List[str] = []
668
+ y = top_y
669
+ for letter in sorted(condition_key.keys()):
670
+ text = condition_key[letter]
671
+ tid = ids.next()
672
+ z = ids.next()
673
+ # Format: "(a) conditions text" — letter in italic, rest in regular
674
+ # Use build_formatted_s_xml for chemistry-aware subscript formatting
675
+ letter_run = (
676
+ f'<s font="{ACS_LABEL_FONT}" size="{ACS_CAPTION_SIZE}" '
677
+ f'color="0" face="2">({xml_escape(letter)})</s>'
678
+ )
679
+ text_run = build_formatted_s_xml(
680
+ f" {text}",
681
+ font=ACS_LABEL_FONT,
682
+ size=ACS_CAPTION_SIZE,
683
+ color="0",
684
+ )
685
+ xml_parts.append(
686
+ f'<t id="{tid}" p="{left_x:.2f} {y:.2f}" Z="{z}" '
687
+ f'InterpretChemically="no">\n'
688
+ f'{letter_run}{text_run}\n'
689
+ f'</t>'
690
+ )
691
+ y += 14.0 # line spacing
692
+ return "\n".join(xml_parts), y
693
+
694
+
695
+ def _build_vertical_arrow(
696
+ x: float,
697
+ top_y: float,
698
+ bottom_y: float,
699
+ ids: _IDGen,
700
+ condition_lines: Optional[List[str]] = None,
701
+ condition_side: str = "right",
702
+ dashed: bool = False,
703
+ nogo: bool = False,
704
+ ) -> Tuple[str, int]:
705
+ """
706
+ Build a vertical down arrow with optional condition text beside it.
707
+
708
+ Parameters
709
+ ----------
710
+ x : float
711
+ Horizontal position of the arrow.
712
+ top_y : float
713
+ Y coordinate of the arrow tail (top, in CDXML y-down coords).
714
+ bottom_y : float
715
+ Y coordinate of the arrow head (bottom).
716
+ condition_lines : list of str, optional
717
+ Condition text lines placed beside the arrow.
718
+ condition_side : "right" or "left"
719
+ Which side of the arrow to place condition text.
720
+ nogo : bool
721
+ If True, adds ``NoGo="Cross"`` for failed arrow.
722
+
723
+ Returns (xml_string, arrow_xml_id).
724
+ """
725
+ arrow_xml, aid = _build_arrow(x, top_y, x, bottom_y, ids, dashed=dashed, nogo=nogo)
726
+
727
+ if not condition_lines:
728
+ return arrow_xml, aid
729
+
730
+ # Place condition text beside the arrow
731
+ mid_y = (top_y + bottom_y) / 2.0
732
+ text_offset = 12.0 # horizontal gap from arrow shaft
733
+ if condition_side == "right":
734
+ text_x = x + text_offset
735
+ justification = "Left"
736
+ else:
737
+ text_x = x - text_offset
738
+ justification = "Right"
739
+
740
+ # p.y is the first line baseline. To vertically center the visual
741
+ # text block on the arrow midpoint:
742
+ # visual_top = p.y - _CAP_HEIGHT
743
+ # visual_bottom = p.y + (n-1)*_LINE_ADVANCE + _DESCENT
744
+ # visual_center = visual_top + text_h/2 = mid_y
745
+ # → p.y = mid_y - text_h/2 + _CAP_HEIGHT
746
+ text_h = _estimate_text_height(condition_lines)
747
+ text_y = mid_y - text_h / 2.0 + _CAP_HEIGHT
748
+
749
+ txt_xml, _ = _build_text_element(
750
+ condition_lines, text_x, text_y, ids, justification=justification,
751
+ )
752
+
753
+ return arrow_xml + "\n" + txt_xml, aid
754
+
755
+
756
+ def _build_run_arrow(
757
+ tail_x: float,
758
+ head_x: float,
759
+ y: float,
760
+ input_label: str,
761
+ output_label: str,
762
+ ids: _IDGen,
763
+ ) -> Tuple[str, List[int]]:
764
+ """
765
+ Build a simple run arrow with input/output labels.
766
+
767
+ Returns (xml_string, [element_ids]).
768
+ """
769
+ parts = []
770
+ all_ids = []
771
+
772
+ # Arrow
773
+ arrow_xml, arrow_id = _build_arrow(tail_x, y, head_x, y, ids)
774
+ parts.append(arrow_xml)
775
+ all_ids.append(arrow_id)
776
+
777
+ # Input label (left of arrow) — center text vertically on arrow.
778
+ # p.y is baseline; visual center = p.y - (_CAP_HEIGHT - _DESCENT)/2
779
+ # To center on arrow at y: p.y = y + (_CAP_HEIGHT - _DESCENT)/2
780
+ label_baseline_y = y + (_CAP_HEIGHT - _DESCENT) / 2.0
781
+ inp_xml, inp_id = _build_text_element(
782
+ [input_label], tail_x - 5.0, label_baseline_y, ids,
783
+ justification="Right", use_formatting=False,
784
+ )
785
+ parts.append(inp_xml)
786
+ all_ids.append(inp_id)
787
+
788
+ # Output label (right of arrow) — center text vertically on arrow
789
+ if output_label:
790
+ out_xml, out_id = _build_text_element(
791
+ [output_label], head_x + 5.0, label_baseline_y, ids,
792
+ justification="Left", use_formatting=False,
793
+ )
794
+ parts.append(out_xml)
795
+ all_ids.append(out_id)
796
+
797
+ return "\n".join(parts), all_ids
798
+
799
+
800
+ # ---------------------------------------------------------------------------
801
+ # Layout helpers
802
+ # ---------------------------------------------------------------------------
803
+
804
+ def _fragment_bbox(atoms: List[Dict]) -> Tuple[float, float, float, float]:
805
+ """Compute (min_x, min_y, max_x, max_y) from atom positions.
806
+
807
+ Includes padding for bond lines and heteroatom labels that extend
808
+ beyond atom center positions (measured: +6.6pt width, +2.9pt height).
809
+ """
810
+ xs = [a["x"] for a in atoms]
811
+ ys = [a["y"] for a in atoms]
812
+ return (
813
+ min(xs) - _FRAG_PAD_W,
814
+ min(ys) - _FRAG_PAD_H,
815
+ max(xs) + _FRAG_PAD_W,
816
+ max(ys) + _FRAG_PAD_H,
817
+ )
818
+
819
+
820
+ def _bbox_width(bbox: Tuple[float, float, float, float]) -> float:
821
+ return bbox[2] - bbox[0]
822
+
823
+
824
+ def _bbox_height(bbox: Tuple[float, float, float, float]) -> float:
825
+ return bbox[3] - bbox[1]
826
+
827
+
828
+ def _bbox_center(bbox: Tuple[float, float, float, float]) -> Tuple[float, float]:
829
+ return ((bbox[0] + bbox[2]) / 2.0, (bbox[1] + bbox[3]) / 2.0)
830
+
831
+
832
+ def _shift_atoms(atoms: List[Dict], dx: float, dy: float) -> None:
833
+ """Translate all atoms in place."""
834
+ for a in atoms:
835
+ a["x"] += dx
836
+ a["y"] += dy
837
+
838
+
839
+ def _estimate_text_width(lines: List[str]) -> float:
840
+ """Estimate text block width in CDXML points."""
841
+ if not lines:
842
+ return 0
843
+ return max(len(ln) for ln in lines) * _CHAR_WIDTH
844
+
845
+
846
+ def _estimate_text_height(lines: List[str]) -> float:
847
+ """Estimate visual text block height (cap-height + line advances + descent)."""
848
+ n = len(lines)
849
+ if n == 0:
850
+ return 0.0
851
+ return _CAP_HEIGHT + max(0, n - 1) * _LINE_ADVANCE + _DESCENT
852
+
853
+
854
+ # ---------------------------------------------------------------------------
855
+ # Structure resolver
856
+ # ---------------------------------------------------------------------------
857
+
858
+ def _resolve_structure(
859
+ ref: StructureRef,
860
+ center_x: float = 200.0,
861
+ center_y: float = 300.0,
862
+ source_data: Optional[Dict[str, Dict]] = None,
863
+ reference_mol: Optional[Any] = None,
864
+ ) -> ResolvedFragment:
865
+ """
866
+ Resolve a StructureRef to atoms/bonds in CDXML coordinates.
867
+
868
+ Resolution order (three tiers):
869
+ 1. Source JSON — species ID or shorthand lookup
870
+ 2. Declared SMILES — from StructureRef.smiles
871
+ 3. Name resolution — reagent_db → PubChem cascade
872
+
873
+ Parameters
874
+ ----------
875
+ reference_mol : RDKit Mol, optional
876
+ When provided, the structure is aligned to this reference via MCS.
877
+ This orients shared scaffolds to match the reference (product) layout.
878
+ """
879
+ smiles = ref.smiles
880
+ label = ref.label
881
+
882
+ # Tier 1: Source JSON lookup (by ref.id)
883
+ if source_data and not smiles:
884
+ # Try exact id, then lowercase
885
+ sp = source_data.get(ref.id) or source_data.get(ref.id.lower())
886
+ if sp:
887
+ smiles = sp.get("smiles")
888
+ # JSON label overrides ref label when ref has none
889
+ if label is None and sp.get("label"):
890
+ label = sp["label"]
891
+
892
+ # Tier 2: Declared SMILES
893
+ if smiles:
894
+ # Try MCS alignment to reference if available
895
+ if reference_mol is not None:
896
+ try:
897
+ from rdkit import Chem
898
+ target_mol = Chem.MolFromSmiles(smiles)
899
+ if target_mol is not None:
900
+ aligned = _align_mol_to_reference(
901
+ target_mol, reference_mol, center_x, center_y,
902
+ )
903
+ if aligned is not None:
904
+ atoms, bonds = aligned
905
+ bbox = _fragment_bbox(atoms)
906
+ cx, cy = _bbox_center(bbox)
907
+ resolved_ref = StructureRef(
908
+ id=ref.id, smiles=smiles, name=ref.name, file=ref.file,
909
+ cdxml_id=ref.cdxml_id, label=label,
910
+ )
911
+ return ResolvedFragment(
912
+ ref=resolved_ref, atoms=atoms, bonds=bonds,
913
+ bbox=bbox, cx=cx, cy=cy,
914
+ )
915
+ except Exception:
916
+ pass # Fall through to standard coords
917
+
918
+ result = _smiles_to_fragment_data(smiles, center_x, center_y)
919
+ if result is None:
920
+ raise ValueError(f"Failed to generate 2D coords for '{ref.id}' (SMILES: {smiles})")
921
+ atoms, bonds = result
922
+ bbox = _fragment_bbox(atoms)
923
+ cx, cy = _bbox_center(bbox)
924
+ resolved_ref = StructureRef(
925
+ id=ref.id, smiles=smiles, name=ref.name, file=ref.file,
926
+ cdxml_id=ref.cdxml_id, label=label,
927
+ )
928
+ return ResolvedFragment(
929
+ ref=resolved_ref, atoms=atoms, bonds=bonds, bbox=bbox, cx=cx, cy=cy,
930
+ )
931
+
932
+ # Tier 3: Name resolution (reagent_db cascade)
933
+ name = ref.name
934
+ # If source_data had a species with a name, use it for resolution
935
+ if not name and source_data:
936
+ sp = source_data.get(ref.id) or source_data.get(ref.id.lower())
937
+ if sp:
938
+ name = sp.get("name")
939
+
940
+ if name:
941
+ try:
942
+ from ..reagent_db import get_reagent_db
943
+ db = get_reagent_db()
944
+ entry = db.entry_for_name(name.lower())
945
+ if entry and entry.get("smiles"):
946
+ smi = entry["smiles"]
947
+ if isinstance(smi, list):
948
+ smi = smi[0]
949
+ ref_copy = StructureRef(
950
+ id=ref.id, smiles=smi, label=label,
951
+ )
952
+ return _resolve_structure(
953
+ ref_copy, center_x, center_y,
954
+ reference_mol=reference_mol,
955
+ )
956
+ except Exception:
957
+ pass
958
+ raise ValueError(
959
+ f"Cannot resolve structure '{ref.id}' by name '{name}'. "
960
+ f"Provide a SMILES string instead."
961
+ )
962
+
963
+ if ref.file:
964
+ raise NotImplementedError(
965
+ f"File-based structure loading not yet implemented for '{ref.id}'"
966
+ )
967
+
968
+ # Tier 4: Try the ID itself as a compound name in reagent_db
969
+ try:
970
+ from ..reagent_db import get_reagent_db
971
+ db = get_reagent_db()
972
+ entry = db.entry_for_name(ref.id.lower())
973
+ if entry and entry.get("smiles"):
974
+ smi = entry["smiles"]
975
+ if isinstance(smi, list):
976
+ smi = smi[0]
977
+ ref_copy = StructureRef(
978
+ id=ref.id, smiles=smi, label=label,
979
+ )
980
+ return _resolve_structure(
981
+ ref_copy, center_x, center_y,
982
+ reference_mol=reference_mol,
983
+ )
984
+ except ImportError:
985
+ pass
986
+
987
+ raise ValueError(
988
+ f"Structure '{ref.id}' has no smiles, name, or file — cannot resolve. "
989
+ f"Provide explicit smiles: or name: in the structures block."
990
+ )
991
+
992
+
993
+ # ---------------------------------------------------------------------------
994
+ # Layout engine: linear (single step)
995
+ # ---------------------------------------------------------------------------
996
+
997
+ def _layout_linear(
998
+ scheme: SchemeDescriptor,
999
+ ids: _IDGen,
1000
+ source_data: Optional[Dict[str, Dict]] = None,
1001
+ reference_mol: Optional[Any] = None,
1002
+ ) -> Tuple[str, float]:
1003
+ """
1004
+ Layout a single-step scheme: substrates → products.
1005
+
1006
+ Returns (inner_xml, lowest_y).
1007
+ """
1008
+ xml, lowest_y, _, _ = _layout_steps_row(scheme, scheme.steps, ids, start_x=100.0,
1009
+ arrow_y=300.0, source_data=source_data,
1010
+ reference_mol=reference_mol)
1011
+ return xml, lowest_y
1012
+
1013
+
1014
+ # ---------------------------------------------------------------------------
1015
+ # Layout engine: sequential (multi-step, with wrap:repeat support)
1016
+ # ---------------------------------------------------------------------------
1017
+
1018
+ def _split_into_rows(
1019
+ steps: List[StepDescriptor],
1020
+ steps_per_row: Optional[int],
1021
+ ) -> List[List[StepDescriptor]]:
1022
+ """Split steps into row groups for wrap:repeat."""
1023
+ if steps_per_row is None or steps_per_row >= len(steps):
1024
+ return [steps]
1025
+ rows = []
1026
+ for i in range(0, len(steps), steps_per_row):
1027
+ rows.append(steps[i:i + steps_per_row])
1028
+ return rows
1029
+
1030
+
1031
+ def _split_serpentine_rows(
1032
+ steps: List[StepDescriptor],
1033
+ steps_per_row: int,
1034
+ ) -> Tuple[List[List[StepDescriptor]], List[Optional[StepDescriptor]]]:
1035
+ """
1036
+ Split steps into horizontal rows + transition steps for serpentine layout.
1037
+
1038
+ Returns (rows, transitions) where:
1039
+ - rows[i] = list of steps rendered horizontally in row i
1040
+ - transitions[i] = the step between row i and row i+1 (rendered as
1041
+ a vertical arrow), or None if row i is the last row.
1042
+ """
1043
+ rows: List[List[StepDescriptor]] = []
1044
+ transitions: List[Optional[StepDescriptor]] = []
1045
+ i = 0
1046
+ while i < len(steps):
1047
+ row = steps[i:i + steps_per_row]
1048
+ rows.append(row)
1049
+ i += steps_per_row
1050
+ if i < len(steps):
1051
+ # The next step becomes the vertical transition arrow
1052
+ transitions.append(steps[i])
1053
+ i += 1
1054
+ else:
1055
+ transitions.append(None)
1056
+ return rows, transitions
1057
+
1058
+
1059
+ def _layout_sequential(
1060
+ scheme: SchemeDescriptor,
1061
+ ids: _IDGen,
1062
+ source_data: Optional[Dict[str, Dict]] = None,
1063
+ reference_mol: Optional[Any] = None,
1064
+ ) -> Tuple[str, float]:
1065
+ """
1066
+ Layout a multi-step sequential scheme.
1067
+
1068
+ Supports wrap:repeat (multi-row L->R with repeated structures),
1069
+ wrap:serpentine (zigzag with vertical arrows between rows), and
1070
+ single-row (no wrapping).
1071
+
1072
+ Returns (inner_xml, lowest_y).
1073
+ """
1074
+ wrap = scheme.wrap
1075
+ steps_per_row = scheme.steps_per_row
1076
+
1077
+ # Single row: no wrapping needed
1078
+ if (wrap not in ("repeat", "serpentine")
1079
+ or steps_per_row is None
1080
+ or steps_per_row >= len(scheme.steps)):
1081
+ xml, lowest_y, _, _ = _layout_steps_row(
1082
+ scheme, scheme.steps, ids, start_x=80.0, arrow_y=300.0,
1083
+ source_data=source_data, reference_mol=reference_mol)
1084
+ return xml, lowest_y
1085
+
1086
+ if wrap == "serpentine":
1087
+ return _layout_serpentine(scheme, ids, source_data=source_data,
1088
+ reference_mol=reference_mol)
1089
+
1090
+ # --- Multi-row wrap:repeat ---
1091
+ rows = _split_into_rows(scheme.steps, steps_per_row)
1092
+
1093
+ # Assign run arrows to rows with step numbers adjusted to be 1-indexed
1094
+ # within each row. Step N (1-indexed overall) -> row (N-1)//spr,
1095
+ # adjusted step = N - row_idx * spr.
1096
+ row_run_arrows: Dict[int, List[StepRunArrows]] = {}
1097
+ for sra in scheme.run_arrows:
1098
+ row_idx = (sra.step - 1) // steps_per_row
1099
+ adjusted_step = sra.step - row_idx * steps_per_row
1100
+ row_run_arrows.setdefault(row_idx, []).append(
1101
+ StepRunArrows(step=adjusted_step, runs=sra.runs))
1102
+
1103
+ xml_parts: List[str] = []
1104
+ arrow_y = 300.0
1105
+ lowest_y = arrow_y
1106
+
1107
+ for row_idx, row_steps in enumerate(rows):
1108
+ row_ra = row_run_arrows.get(row_idx, [])
1109
+
1110
+ row_xml, lowest_y, _, _ = _layout_steps_row(
1111
+ scheme, row_steps, ids,
1112
+ start_x=80.0, arrow_y=arrow_y,
1113
+ source_data=source_data,
1114
+ run_arrows=row_ra,
1115
+ reference_mol=reference_mol,
1116
+ )
1117
+ xml_parts.append(row_xml)
1118
+
1119
+ # Next row starts below this row's lowest point
1120
+ arrow_y = lowest_y + ROW_GAP
1121
+
1122
+ return "\n".join(xml_parts), lowest_y
1123
+
1124
+
1125
+ def _layout_serpentine(
1126
+ scheme: SchemeDescriptor,
1127
+ ids: _IDGen,
1128
+ source_data: Optional[Dict[str, Dict]] = None,
1129
+ reference_mol: Optional[Any] = None,
1130
+ ) -> Tuple[str, float]:
1131
+ """
1132
+ Layout a serpentine (zigzag) multi-row scheme.
1133
+
1134
+ Row 1 flows L→R, vertical down arrow at right edge, Row 2 flows R→L,
1135
+ vertical down arrow at left edge, Row 3 flows L→R, etc.
1136
+
1137
+ Transition steps between rows are rendered as vertical arrows with
1138
+ their conditions placed beside the arrow.
1139
+
1140
+ Returns (inner_xml, lowest_y).
1141
+ """
1142
+ steps_per_row = scheme.steps_per_row
1143
+ left_margin = 80.0
1144
+
1145
+ rows, transitions = _split_serpentine_rows(scheme.steps, steps_per_row)
1146
+
1147
+ bond_len = ACS_BOND_LENGTH
1148
+ frag_gap = bond_len * LAYOUT_FRAG_GAP_BONDS
1149
+ inter_gap = bond_len * LAYOUT_INTER_GAP_BONDS
1150
+
1151
+ xml_parts: List[str] = []
1152
+ arrow_y = 300.0
1153
+ right_edge = 0.0 # will be set from first row's width
1154
+ last_product_frag_id: Optional[int] = None
1155
+ last_product_cursor_x: Optional[float] = None # cursor pos after pre-placed substrate
1156
+
1157
+ def _get_ref(sid: str) -> StructureRef:
1158
+ if sid in scheme.structures:
1159
+ return scheme.structures[sid]
1160
+ return StructureRef(id=sid)
1161
+
1162
+ for row_idx, row_steps in enumerate(rows):
1163
+ direction = "ltr" if row_idx % 2 == 0 else "rtl"
1164
+
1165
+ if direction == "ltr":
1166
+ start_x = left_margin
1167
+ else:
1168
+ start_x = right_edge
1169
+
1170
+ # For rows after the first, the first substrate was placed by
1171
+ # the transition step's product. Pass skip_first_substrate_id
1172
+ # so _layout_steps_row doesn't redraw it.
1173
+ skip_id = last_product_frag_id if row_idx > 0 else None
1174
+ skip_cursor = last_product_cursor_x if row_idx > 0 else None
1175
+
1176
+ row_xml, lowest_y, row_edge, row_info = _layout_steps_row(
1177
+ scheme, row_steps, ids,
1178
+ start_x=start_x, arrow_y=arrow_y,
1179
+ source_data=source_data,
1180
+ direction=direction,
1181
+ skip_first_substrate_id=skip_id,
1182
+ skip_first_substrate_cursor_x=skip_cursor,
1183
+ reference_mol=reference_mol,
1184
+ )
1185
+ xml_parts.append(row_xml)
1186
+
1187
+ # After row 1, record the right edge for RTL alignment
1188
+ if row_idx == 0:
1189
+ right_edge = row_edge
1190
+
1191
+ # --- Transition: vertical arrow + product of transition step ---
1192
+ transition = transitions[row_idx]
1193
+ if transition is not None:
1194
+ # Vertical arrow position: below the CENTER of the last product
1195
+ # in the current row (not at the row edge).
1196
+ if direction == "ltr":
1197
+ vert_x = row_info["last_product_cx"]
1198
+ else:
1199
+ vert_x = row_info["last_product_cx"]
1200
+
1201
+ # Vertical arrow geometry:
1202
+ # - tail starts BELOW the last product (below its compound label)
1203
+ # - head ends with enough room for the next row's product
1204
+ last_prod_bottom = row_info["last_product_bottom"]
1205
+ vert_top_y = last_prod_bottom + 8.0 # below the last product + label
1206
+ vert_bottom_y = vert_top_y + ROW_GAP # adequate vertical span
1207
+
1208
+ # Condition text from the transition step
1209
+ cond_lines: List[str] = []
1210
+ if transition.below_arrow and transition.below_arrow.text:
1211
+ cond_lines = transition.below_arrow.text[:]
1212
+ if transition.above_arrow and transition.above_arrow.text:
1213
+ cond_lines = transition.above_arrow.text + cond_lines
1214
+ if transition.yield_:
1215
+ cond_lines.append(transition.yield_)
1216
+
1217
+ # Condition text placement: right side for right-edge arrows,
1218
+ # left side for left-edge arrows
1219
+ if direction == "ltr":
1220
+ cond_side = "right"
1221
+ else:
1222
+ cond_side = "left"
1223
+
1224
+ vert_xml, _ = _build_vertical_arrow(
1225
+ vert_x, vert_top_y, vert_bottom_y, ids,
1226
+ condition_lines=cond_lines if cond_lines else None,
1227
+ condition_side=cond_side,
1228
+ )
1229
+ xml_parts.append(vert_xml)
1230
+
1231
+ # Place the transition step's product below the vertical
1232
+ # arrow. This becomes the first substrate of the next row.
1233
+ prod_ref = _get_ref(transition.products[0])
1234
+ prod_frag = _resolve_structure(
1235
+ prod_ref,
1236
+ center_x=vert_x,
1237
+ center_y=vert_bottom_y + 25.0,
1238
+ source_data=source_data,
1239
+ reference_mol=reference_mol,
1240
+ )
1241
+ # Shift so product top is below arrowhead with clearance
1242
+ prod_h = _bbox_height(prod_frag.bbox)
1243
+ target_cy = vert_bottom_y + frag_gap * 1.0 + prod_h / 2.0
1244
+ dx = vert_x - prod_frag.cx
1245
+ dy = target_cy - prod_frag.cy
1246
+ _shift_atoms(prod_frag.atoms, dx, dy)
1247
+ prod_frag.bbox = _fragment_bbox(prod_frag.atoms)
1248
+ prod_frag.cx, prod_frag.cy = _bbox_center(prod_frag.bbox)
1249
+
1250
+ frag_xml, _, frag_id = _build_fragment(
1251
+ prod_frag.atoms, prod_frag.bonds, ids,
1252
+ )
1253
+ xml_parts.append(frag_xml)
1254
+
1255
+ # Compound label
1256
+ if prod_frag.ref.label:
1257
+ lbl_xml, _ = _build_label_element(
1258
+ prod_frag.ref.label, prod_frag.cx,
1259
+ prod_frag.bbox[3] + 14.0, ids,
1260
+ )
1261
+ xml_parts.append(lbl_xml)
1262
+
1263
+ last_product_frag_id = frag_id
1264
+
1265
+ # Compute cursor position for the next row's first step
1266
+ # (after the pre-placed substrate).
1267
+ # Next row direction is opposite of current.
1268
+ next_direction = "rtl" if direction == "ltr" else "ltr"
1269
+ if next_direction == "rtl":
1270
+ # RTL: cursor is at the LEFT edge of the substrate
1271
+ last_product_cursor_x = prod_frag.bbox[0] - inter_gap
1272
+ else:
1273
+ # LTR: cursor is at the RIGHT edge of the substrate
1274
+ last_product_cursor_x = prod_frag.bbox[2] + inter_gap
1275
+
1276
+ # Next row's arrow_y = below the transition product
1277
+ arrow_y = prod_frag.cy
1278
+ else:
1279
+ last_product_frag_id = None
1280
+ last_product_cursor_x = None
1281
+
1282
+ return "\n".join(xml_parts), lowest_y
1283
+
1284
+
1285
+ # ---------------------------------------------------------------------------
1286
+ # Layout engine: divergent (one SM → multiple products via vertical branching)
1287
+ # ---------------------------------------------------------------------------
1288
+
1289
+ # Gap between the bottom of one branch and the top of the next.
1290
+ _DIVERGENT_BRANCH_GAP = 2.5 * ACS_BOND_LENGTH
1291
+
1292
+
1293
+ def _layout_divergent(
1294
+ scheme: SchemeDescriptor,
1295
+ ids: _IDGen,
1296
+ source_data: Optional[Dict[str, Dict]] = None,
1297
+ reference_mol: Optional[Any] = None,
1298
+ ) -> Tuple[str, float]:
1299
+ """
1300
+ Layout a divergent scheme: one SM gives multiple products.
1301
+
1302
+ Detects the shared substrate (appears in multiple steps) and renders it
1303
+ once on the left, with the first step going horizontally and subsequent
1304
+ steps branching downward with vertical arrows.
1305
+
1306
+ Returns (inner_xml, lowest_y).
1307
+ """
1308
+ steps = scheme.steps
1309
+ if not steps:
1310
+ return "", 300.0
1311
+
1312
+ # --- Identify shared substrate ---
1313
+ # Count how many steps each substrate ID appears in
1314
+ from collections import Counter
1315
+ sub_counts: Counter = Counter()
1316
+ for step in steps:
1317
+ for sid in step.substrates:
1318
+ sub_counts[sid] += 1
1319
+
1320
+ # The shared substrate is the one appearing in the most steps
1321
+ shared_id = sub_counts.most_common(1)[0][0] if sub_counts else None
1322
+
1323
+ # If no substrate appears more than once, just stack steps vertically
1324
+ if shared_id is None or sub_counts[shared_id] < 2:
1325
+ # Fall back to stacking as independent rows
1326
+ return _layout_divergent_stacked(scheme, ids, source_data,
1327
+ reference_mol=reference_mol)
1328
+
1329
+ # Separate steps: those sharing the substrate vs others
1330
+ shared_steps = [s for s in steps if shared_id in s.substrates]
1331
+ other_steps = [s for s in steps if shared_id not in s.substrates]
1332
+
1333
+ # --- Resolve shared substrate once ---
1334
+ def _get_ref(sid: str) -> StructureRef:
1335
+ if sid in scheme.structures:
1336
+ return scheme.structures[sid]
1337
+ return StructureRef(id=sid)
1338
+
1339
+ bond_len = ACS_BOND_LENGTH
1340
+ frag_gap = bond_len * LAYOUT_FRAG_GAP_BONDS
1341
+ inter_gap = bond_len * LAYOUT_INTER_GAP_BONDS
1342
+ min_arrow_len = 5.0 * bond_len
1343
+
1344
+ start_x = 100.0
1345
+ arrow_y = 300.0
1346
+
1347
+ sm_ref = _get_ref(shared_id)
1348
+ sm_frag = _resolve_structure(sm_ref, center_x=200.0, center_y=arrow_y,
1349
+ source_data=source_data,
1350
+ reference_mol=reference_mol)
1351
+ # Position SM at start
1352
+ sm_w = _bbox_width(sm_frag.bbox)
1353
+ cx_target = start_x + sm_w / 2.0
1354
+ dx = cx_target - sm_frag.cx
1355
+ dy = arrow_y - sm_frag.cy
1356
+ _shift_atoms(sm_frag.atoms, dx, dy)
1357
+ sm_frag.bbox = _fragment_bbox(sm_frag.atoms)
1358
+ sm_frag.cx, sm_frag.cy = _bbox_center(sm_frag.bbox)
1359
+
1360
+ xml_parts: List[str] = []
1361
+ all_frag_ids: List[int] = []
1362
+ step_metadata: List[Dict] = []
1363
+
1364
+ # Build SM fragment
1365
+ sm_xml, _, sm_frag_id = _build_fragment(sm_frag.atoms, sm_frag.bonds, ids)
1366
+ xml_parts.append(sm_xml)
1367
+ all_frag_ids.append(sm_frag_id)
1368
+
1369
+ # SM label
1370
+ if sm_frag.ref.label:
1371
+ lbl_xml, _ = _build_label_element(
1372
+ sm_frag.ref.label, sm_frag.cx, sm_frag.bbox[3] + 14.0, ids)
1373
+ xml_parts.append(lbl_xml)
1374
+
1375
+ sm_right_x = sm_frag.bbox[2]
1376
+ lowest_y = arrow_y + 60.0
1377
+
1378
+ # --- First step: horizontal (same Y as SM) ---
1379
+ first_step = shared_steps[0]
1380
+ branch_y = arrow_y
1381
+
1382
+ for branch_idx, step in enumerate(shared_steps):
1383
+ step_meta: Dict[str, Any] = {
1384
+ "reactant_ids": [sm_frag_id],
1385
+ "product_ids": [],
1386
+ "arrow_id": 0,
1387
+ "above_ids": [],
1388
+ "below_ids": [],
1389
+ }
1390
+
1391
+ if branch_idx == 0:
1392
+ # Horizontal arrow from SM
1393
+ arrow_tail_x = sm_right_x + frag_gap * 0.3
1394
+
1395
+ # Compute arrow length from content
1396
+ below_text = list(step.below_arrow.text) if step.below_arrow else []
1397
+ above_text = list(step.above_arrow.text) if step.above_arrow else []
1398
+ if step.yield_:
1399
+ below_text.append(step.yield_)
1400
+ content_w = max(
1401
+ _estimate_text_width(below_text),
1402
+ _estimate_text_width(above_text),
1403
+ 0,
1404
+ )
1405
+ arrow_len = max(content_w + 10.0, min_arrow_len)
1406
+ arrow_head_x = arrow_tail_x + arrow_len
1407
+ arrow_mid_x = (arrow_tail_x + arrow_head_x) / 2.0
1408
+
1409
+ dashed = (step.arrow_style == "dashed")
1410
+ failed = (step.arrow_style == "failed")
1411
+ arrow_xml, arrow_id = _build_arrow(
1412
+ arrow_tail_x, branch_y, arrow_head_x, branch_y, ids,
1413
+ dashed=dashed, nogo=failed)
1414
+ xml_parts.append(arrow_xml)
1415
+ step_meta["arrow_id"] = arrow_id
1416
+
1417
+ # Above-arrow text (p.y = baseline of first line)
1418
+ # Position so visual bottom sits LAYOUT_BELOW_GAP above arrow
1419
+ # (same gap as below-arrow content for symmetry)
1420
+ if above_text:
1421
+ n_abt = len(above_text)
1422
+ text_below_baseline = max(0, n_abt - 1) * _LINE_ADVANCE + _DESCENT
1423
+ text_y = branch_y - LAYOUT_BELOW_GAP - text_below_baseline
1424
+ txt_xml, txt_id = _build_text_element(
1425
+ above_text, arrow_mid_x, text_y, ids,
1426
+ use_formatting=False,
1427
+ )
1428
+ xml_parts.append(txt_xml)
1429
+ step_meta["above_ids"].append(txt_id)
1430
+
1431
+ # Below-arrow text (p.y = baseline; visual top = p.y - _CAP_HEIGHT)
1432
+ if below_text:
1433
+ text_y = branch_y + LAYOUT_BELOW_GAP + _CAP_HEIGHT
1434
+ txt_xml, txt_id = _build_text_element(below_text, arrow_mid_x, text_y, ids)
1435
+ xml_parts.append(txt_xml)
1436
+ step_meta["below_ids"].append(txt_id)
1437
+ n_blt = len(below_text)
1438
+ lowest_y = max(
1439
+ lowest_y,
1440
+ text_y + max(0, n_blt - 1) * _LINE_ADVANCE + _DESCENT,
1441
+ )
1442
+
1443
+ # Products
1444
+ cursor_x = arrow_head_x + frag_gap * 1.0
1445
+ for prod_sid in step.products:
1446
+ prod_ref = _get_ref(prod_sid)
1447
+ prod_frag = _resolve_structure(prod_ref, source_data=source_data,
1448
+ reference_mol=reference_mol)
1449
+ pw = _bbox_width(prod_frag.bbox)
1450
+ pcx = cursor_x + pw / 2.0
1451
+ ddx = pcx - prod_frag.cx
1452
+ ddy = branch_y - prod_frag.cy
1453
+ _shift_atoms(prod_frag.atoms, ddx, ddy)
1454
+ prod_frag.bbox = _fragment_bbox(prod_frag.atoms)
1455
+ prod_frag.cx, prod_frag.cy = _bbox_center(prod_frag.bbox)
1456
+
1457
+ pfrag_xml, _, pfrag_id = _build_fragment(
1458
+ prod_frag.atoms, prod_frag.bonds, ids)
1459
+ xml_parts.append(pfrag_xml)
1460
+ step_meta["product_ids"].append(pfrag_id)
1461
+ all_frag_ids.append(pfrag_id)
1462
+
1463
+ if prod_frag.ref.label:
1464
+ lbl_xml, _ = _build_label_element(
1465
+ prod_frag.ref.label, prod_frag.cx,
1466
+ prod_frag.bbox[3] + 14.0, ids)
1467
+ xml_parts.append(lbl_xml)
1468
+
1469
+ cursor_x = prod_frag.bbox[2] + inter_gap
1470
+
1471
+ # Track lowest for below-text of this horizontal step
1472
+ lowest_y = max(lowest_y, branch_y + 60.0)
1473
+
1474
+ else:
1475
+ # Vertical branch downward from SM
1476
+ # Vertical arrow starts below the previous branch's content
1477
+ vert_top_y = lowest_y + 8.0
1478
+
1479
+ # Condition text
1480
+ cond_lines: List[str] = []
1481
+ if step.above_arrow and step.above_arrow.text:
1482
+ cond_lines.extend(step.above_arrow.text)
1483
+ if step.below_arrow and step.below_arrow.text:
1484
+ cond_lines.extend(step.below_arrow.text)
1485
+ if step.yield_:
1486
+ cond_lines.append(step.yield_)
1487
+
1488
+ # Compute vertical arrow length: enough for condition text + product
1489
+ prod_ref = _get_ref(step.products[0])
1490
+ prod_frag = _resolve_structure(prod_ref, source_data=source_data,
1491
+ reference_mol=reference_mol)
1492
+ prod_h = _bbox_height(prod_frag.bbox)
1493
+
1494
+ cond_text_h = _estimate_text_height(cond_lines) if cond_lines else 0
1495
+ vert_len = max(cond_text_h + 20.0, prod_h + 30.0, 3.0 * bond_len)
1496
+ vert_bottom_y = vert_top_y + vert_len
1497
+
1498
+ dashed = (step.arrow_style == "dashed")
1499
+ failed = (step.arrow_style == "failed")
1500
+
1501
+ vert_xml, vert_aid = _build_vertical_arrow(
1502
+ sm_frag.cx, vert_top_y, vert_bottom_y, ids,
1503
+ condition_lines=cond_lines if cond_lines else None,
1504
+ condition_side="right",
1505
+ dashed=dashed,
1506
+ nogo=failed,
1507
+ )
1508
+ xml_parts.append(vert_xml)
1509
+ step_meta["arrow_id"] = vert_aid
1510
+
1511
+ # Product below the vertical arrow, centered on SM's x.
1512
+ prod_w = _bbox_width(prod_frag.bbox)
1513
+ prod_y = vert_bottom_y + frag_gap * 1.0 + prod_h / 2.0
1514
+ prod_target_cx = sm_frag.cx
1515
+ ddx = prod_target_cx - prod_frag.cx
1516
+ ddy = prod_y - prod_frag.cy
1517
+ _shift_atoms(prod_frag.atoms, ddx, ddy)
1518
+ prod_frag.bbox = _fragment_bbox(prod_frag.atoms)
1519
+ prod_frag.cx, prod_frag.cy = _bbox_center(prod_frag.bbox)
1520
+
1521
+ pfrag_xml, _, pfrag_id = _build_fragment(
1522
+ prod_frag.atoms, prod_frag.bonds, ids)
1523
+ xml_parts.append(pfrag_xml)
1524
+ step_meta["product_ids"].append(pfrag_id)
1525
+ all_frag_ids.append(pfrag_id)
1526
+
1527
+ if prod_frag.ref.label:
1528
+ lbl_xml, _ = _build_label_element(
1529
+ prod_frag.ref.label, prod_frag.cx,
1530
+ prod_frag.bbox[3] + 14.0, ids)
1531
+ xml_parts.append(lbl_xml)
1532
+
1533
+ lowest_y = prod_frag.bbox[3] + 20.0
1534
+
1535
+ step_metadata.append(step_meta)
1536
+
1537
+ # --- Build <scheme> ---
1538
+ scheme_id = ids.next()
1539
+ scheme_parts = [f'<scheme id="{scheme_id}">']
1540
+ for meta in step_metadata:
1541
+ step_id = ids.next()
1542
+ attrs = [f'id="{step_id}"']
1543
+ if meta["reactant_ids"]:
1544
+ attrs.append(f'ReactionStepReactants="{" ".join(str(x) for x in meta["reactant_ids"])}"')
1545
+ if meta["product_ids"]:
1546
+ attrs.append(f'ReactionStepProducts="{" ".join(str(x) for x in meta["product_ids"])}"')
1547
+ attrs.append(f'ReactionStepArrows="{meta["arrow_id"]}"')
1548
+ if meta["above_ids"]:
1549
+ attrs.append(f'ReactionStepObjectsAboveArrow="{" ".join(str(x) for x in meta["above_ids"])}"')
1550
+ if meta["below_ids"]:
1551
+ attrs.append(f'ReactionStepObjectsBelowArrow="{" ".join(str(x) for x in meta["below_ids"])}"')
1552
+ scheme_parts.append(f'<step {" ".join(attrs)}/>')
1553
+ scheme_parts.append('</scheme>')
1554
+ xml_parts.append("\n".join(scheme_parts))
1555
+
1556
+ return "\n".join(xml_parts), lowest_y
1557
+
1558
+
1559
+ def _layout_divergent_stacked(
1560
+ scheme: SchemeDescriptor,
1561
+ ids: _IDGen,
1562
+ source_data: Optional[Dict[str, Dict]] = None,
1563
+ reference_mol: Optional[Any] = None,
1564
+ ) -> Tuple[str, float]:
1565
+ """Fallback: stack divergent steps vertically when no shared substrate."""
1566
+ xml_parts: List[str] = []
1567
+ arrow_y = 300.0
1568
+ lowest_y = arrow_y
1569
+
1570
+ for step in scheme.steps:
1571
+ sub_scheme = SchemeDescriptor(
1572
+ structures=scheme.structures,
1573
+ steps=[step],
1574
+ layout="linear",
1575
+ )
1576
+ row_xml, row_lowest = _layout_linear(sub_scheme, ids, source_data=source_data,
1577
+ reference_mol=reference_mol)
1578
+ xml_parts.append(row_xml)
1579
+ lowest_y = row_lowest
1580
+ arrow_y = row_lowest + _DIVERGENT_BRANCH_GAP
1581
+
1582
+ return "\n".join(xml_parts), lowest_y
1583
+
1584
+
1585
+ # ---------------------------------------------------------------------------
1586
+ # Layout engine: stacked-rows (multiple independent sub-schemes)
1587
+ # ---------------------------------------------------------------------------
1588
+
1589
+ SECTION_GAP = 5.5 * ACS_BOND_LENGTH # ~79.2 pt between sections (more clearance)
1590
+ SECTION_LABEL_X = 40.0 # left margin for section labels
1591
+
1592
+
1593
+ def _layout_stacked_rows(
1594
+ scheme: SchemeDescriptor,
1595
+ ids: _IDGen,
1596
+ source_data: Optional[Dict[str, Dict]] = None,
1597
+ reference_mol: Optional[Any] = None,
1598
+ ) -> Tuple[str, float]:
1599
+ """
1600
+ Layout multiple independent sub-schemes stacked vertically.
1601
+
1602
+ Each section is rendered as its own row (linear or sequential).
1603
+ Section labels like "(i)", "(ii)" are placed at the left margin.
1604
+
1605
+ Returns (inner_xml, lowest_y).
1606
+ """
1607
+ sections = scheme.sections
1608
+ if not sections:
1609
+ # Fall back to linear if no sections defined
1610
+ return _layout_linear(scheme, ids, source_data=source_data,
1611
+ reference_mol=reference_mol)
1612
+
1613
+ # --- Partition run_arrows by section ---
1614
+ # Global step numbers are 1-indexed across all sections. Build a map
1615
+ # from global step number → (section_index, local_step_number).
1616
+ global_step = 1
1617
+ sec_run_arrows: Dict[int, List[StepRunArrows]] = {}
1618
+ for sec_idx, sec in enumerate(sections):
1619
+ for local_step in range(1, len(sec.steps) + 1):
1620
+ # Map this global step to this section
1621
+ for sra in scheme.run_arrows:
1622
+ if sra.step == global_step:
1623
+ sec_run_arrows.setdefault(sec_idx, []).append(
1624
+ StepRunArrows(step=local_step, runs=sra.runs))
1625
+ global_step += 1
1626
+
1627
+ xml_parts: List[str] = []
1628
+ arrow_y = 300.0
1629
+ lowest_y = arrow_y
1630
+ # Content starts to the right of section labels
1631
+ content_start_x = 100.0
1632
+
1633
+ for sec_idx, sec in enumerate(sections):
1634
+ # --- Section label ---
1635
+ if sec.label:
1636
+ # Place label at the left margin, vertically centered on arrow_y
1637
+ # Baseline is below center, so use arrow_y + small offset
1638
+ label_xml, _ = _build_text_element(
1639
+ [sec.label], SECTION_LABEL_X, arrow_y + 4.0, ids,
1640
+ justification="Left", use_formatting=False,
1641
+ )
1642
+ xml_parts.append(label_xml)
1643
+
1644
+ # --- Per-section reference product for alignment ---
1645
+ # Each section is an independent reaction, so align structures
1646
+ # within each section to that section's own product.
1647
+ sec_ref_mol = _product_mol_for_steps(
1648
+ sec.steps, scheme.structures, source_data,
1649
+ )
1650
+ if sec_ref_mol is None:
1651
+ sec_ref_mol = reference_mol # fallback to global
1652
+
1653
+ # --- Run arrows for this section ---
1654
+ section_ra = sec_run_arrows.get(sec_idx, [])
1655
+
1656
+ # --- Render this section's steps ---
1657
+ # Create a temporary sub-scheme sharing the structure definitions
1658
+ sub_scheme = SchemeDescriptor(
1659
+ structures=scheme.structures,
1660
+ steps=sec.steps,
1661
+ layout=sec.layout or "linear",
1662
+ )
1663
+
1664
+ row_xml, row_lowest, _, _ = _layout_steps_row(
1665
+ sub_scheme, sec.steps, ids,
1666
+ start_x=content_start_x, arrow_y=arrow_y,
1667
+ source_data=source_data,
1668
+ run_arrows=section_ra,
1669
+ reference_mol=sec_ref_mol,
1670
+ )
1671
+
1672
+ xml_parts.append(row_xml)
1673
+ lowest_y = row_lowest
1674
+
1675
+ # Move to next section
1676
+ arrow_y = lowest_y + SECTION_GAP
1677
+
1678
+ return "\n".join(xml_parts), lowest_y
1679
+
1680
+
1681
+ # ---------------------------------------------------------------------------
1682
+ # Core row layout: positions steps in a single row (LTR or RTL)
1683
+ # ---------------------------------------------------------------------------
1684
+
1685
+ def _layout_steps_row(
1686
+ scheme: SchemeDescriptor,
1687
+ steps: List[StepDescriptor],
1688
+ ids: _IDGen,
1689
+ start_x: float = 100.0,
1690
+ arrow_y: float = 300.0,
1691
+ source_data: Optional[Dict[str, Dict]] = None,
1692
+ run_arrows: Optional[List[StepRunArrows]] = None,
1693
+ direction: str = "ltr",
1694
+ skip_first_substrate_id: Optional[int] = None,
1695
+ skip_first_substrate_cursor_x: Optional[float] = None,
1696
+ reference_mol: Optional[Any] = None,
1697
+ ) -> Tuple[str, float, float, Dict]:
1698
+ """
1699
+ Layout multiple steps in a single row.
1700
+
1701
+ Parameters
1702
+ ----------
1703
+ run_arrows : optional
1704
+ Run arrows for this row (step numbers are 1-indexed within this row).
1705
+ If None, uses scheme.run_arrows with original step numbers.
1706
+ direction : str
1707
+ "ltr" for left-to-right (default), "rtl" for right-to-left.
1708
+ For RTL, start_x is the RIGHT edge; cursor moves leftward.
1709
+ Arrows point left, substrates on right, products on left.
1710
+ skip_first_substrate_id : optional int
1711
+ If set, the first step's substrate is already placed (e.g. by a
1712
+ vertical arrow in serpentine mode). This frag_id is used in the
1713
+ scheme metadata but the structure is not drawn or positioned.
1714
+ skip_first_substrate_cursor_x : optional float
1715
+ When skip_first_substrate_id is set, the cursor position after the
1716
+ pre-placed substrate. For LTR this is the substrate's right_x + gap;
1717
+ for RTL this is the substrate's left_x - gap.
1718
+
1719
+ Returns (xml_string, lowest_y, row_edge_x, row_info) where:
1720
+ lowest_y = bottom extent of this row including run arrows
1721
+ row_edge_x = rightmost x for LTR, leftmost x for RTL
1722
+ row_info = dict with extra metadata:
1723
+ "last_product_cx" : float — center x of the last product placed
1724
+ "last_product_bottom" : float — bottom y extent of last product + label
1725
+ "first_substrate_cx" : float — center x of the first substrate
1726
+ """
1727
+ is_rtl = (direction == "rtl")
1728
+ bond_len = ACS_BOND_LENGTH
1729
+ frag_gap = bond_len * LAYOUT_FRAG_GAP_BONDS # gap between fragment and arrow
1730
+ inter_gap = bond_len * LAYOUT_INTER_GAP_BONDS # gap between adjacent fragments
1731
+
1732
+ # --- Phase 1: Resolve all structures ---
1733
+ def _get_ref(sid: str) -> StructureRef:
1734
+ """Get StructureRef from declared structures or create a bare one for source lookup."""
1735
+ if sid in scheme.structures:
1736
+ return scheme.structures[sid]
1737
+ # Not declared — create a bare ref (will be resolved via source_data)
1738
+ return StructureRef(id=sid)
1739
+
1740
+ resolved_steps: List[ResolvedStep] = []
1741
+ for step in steps:
1742
+ subs = [_resolve_structure(_get_ref(sid), source_data=source_data,
1743
+ reference_mol=reference_mol)
1744
+ for sid in step.substrates]
1745
+ prods = [_resolve_structure(_get_ref(pid), source_data=source_data,
1746
+ reference_mol=reference_mol)
1747
+ for pid in step.products]
1748
+
1749
+ above_structs = []
1750
+ above_text = []
1751
+ below_text = []
1752
+ below_structs = []
1753
+
1754
+ if step.above_arrow:
1755
+ for sid in step.above_arrow.structures:
1756
+ above_structs.append(
1757
+ _resolve_structure(_get_ref(sid), source_data=source_data,
1758
+ reference_mol=reference_mol))
1759
+ above_text = step.above_arrow.text[:]
1760
+ if step.below_arrow:
1761
+ for sid in step.below_arrow.structures:
1762
+ below_structs.append(
1763
+ _resolve_structure(_get_ref(sid), source_data=source_data,
1764
+ reference_mol=reference_mol))
1765
+ below_text = step.below_arrow.text[:]
1766
+
1767
+ # Add yield to below-arrow text if present
1768
+ if step.yield_:
1769
+ below_text.append(step.yield_)
1770
+
1771
+ resolved_steps.append(ResolvedStep(
1772
+ descriptor=step,
1773
+ substrates=subs,
1774
+ products=prods,
1775
+ above_structures=above_structs,
1776
+ above_text=above_text,
1777
+ below_text=below_text,
1778
+ below_structures=below_structs,
1779
+ ))
1780
+
1781
+ # --- Phase 2: Compute arrow lengths from content ---
1782
+ min_arrow_len = 5.0 * bond_len
1783
+ use_letter_conditions = bool(scheme.condition_key)
1784
+
1785
+ for rs in resolved_steps:
1786
+ # Check if this step uses letter conditions
1787
+ is_letter = (use_letter_conditions
1788
+ and _is_letter_condition(rs.below_text))
1789
+ rs._is_letter_cond = is_letter
1790
+
1791
+ # Width of above-arrow content
1792
+ above_width = 0.0
1793
+ for af in rs.above_structures:
1794
+ above_width += _bbox_width(af.bbox) + inter_gap
1795
+ above_text_w = _estimate_text_width(rs.above_text)
1796
+ above_width = max(above_width, above_text_w)
1797
+
1798
+ # Width of below-arrow content (skip if using letter label)
1799
+ below_width = 0.0
1800
+ if not is_letter:
1801
+ for bf in rs.below_structures:
1802
+ below_width += _bbox_width(bf.bbox) + inter_gap
1803
+ below_text_w = _estimate_text_width(rs.below_text)
1804
+ below_width = max(below_width, below_text_w)
1805
+
1806
+ content_width = max(above_width, below_width)
1807
+ rs._arrow_len = max(content_width + 10.0, min_arrow_len)
1808
+
1809
+ # --- Phase 3: Position everything ---
1810
+ # For LTR: cursor_x = left edge of next element, moves right.
1811
+ # For RTL: cursor_x = right edge of next element, moves left.
1812
+ cursor_x = start_x
1813
+
1814
+ xml_parts: List[str] = []
1815
+ all_frag_ids: List[int] = []
1816
+ step_metadata: List[Dict] = [] # for <scheme><step> elements
1817
+
1818
+ # Track extra info for serpentine/multi-row callers
1819
+ _last_product_cx: float = start_x
1820
+ _last_product_bottom: float = arrow_y
1821
+ _first_substrate_cx: float = start_x
1822
+
1823
+ for step_idx, rs in enumerate(resolved_steps):
1824
+ step_meta: Dict[str, Any] = {
1825
+ "reactant_ids": [],
1826
+ "product_ids": [],
1827
+ "arrow_id": 0,
1828
+ "above_ids": [],
1829
+ "below_ids": [],
1830
+ }
1831
+
1832
+ # -- Substrates --
1833
+ # Skip if: (a) pre-placed by vertical arrow (serpentine), or
1834
+ # (b) shared intermediate from the previous step's product.
1835
+ skip_substrate = False
1836
+ if step_idx == 0 and skip_first_substrate_id is not None:
1837
+ skip_substrate = True
1838
+ step_meta["reactant_ids"].append(skip_first_substrate_id)
1839
+ if skip_first_substrate_cursor_x is not None:
1840
+ cursor_x = skip_first_substrate_cursor_x
1841
+ elif step_idx > 0:
1842
+ prev_products = resolved_steps[step_idx - 1].descriptor.products
1843
+ curr_substrates = rs.descriptor.substrates
1844
+ if (len(curr_substrates) == 1 and len(prev_products) >= 1
1845
+ and curr_substrates[0] == prev_products[-1]):
1846
+ skip_substrate = True
1847
+ # Use the previous step's product fragment ID
1848
+ prev_meta = step_metadata[step_idx - 1]
1849
+ if prev_meta["product_ids"]:
1850
+ step_meta["reactant_ids"].append(prev_meta["product_ids"][-1])
1851
+
1852
+ if not skip_substrate:
1853
+ for i, sub in enumerate(rs.substrates):
1854
+ bbox = sub.bbox
1855
+ w = _bbox_width(bbox)
1856
+ # Shift to position
1857
+ if is_rtl:
1858
+ cx_target = cursor_x - w / 2.0
1859
+ else:
1860
+ cx_target = cursor_x + w / 2.0
1861
+ cy_target = arrow_y
1862
+ dx = cx_target - sub.cx
1863
+ dy = cy_target - sub.cy
1864
+ _shift_atoms(sub.atoms, dx, dy)
1865
+ sub.bbox = _fragment_bbox(sub.atoms)
1866
+ sub.cx, sub.cy = _bbox_center(sub.bbox)
1867
+
1868
+ frag_xml, _, frag_id = _build_fragment(sub.atoms, sub.bonds, ids)
1869
+ sub.xml = frag_xml
1870
+ sub.frag_id = frag_id
1871
+ xml_parts.append(frag_xml)
1872
+ step_meta["reactant_ids"].append(frag_id)
1873
+ all_frag_ids.append(frag_id)
1874
+
1875
+ # Track first substrate center
1876
+ if step_idx == 0 and i == 0:
1877
+ _first_substrate_cx = sub.cx
1878
+
1879
+ # Compound label below structure
1880
+ if sub.ref.label:
1881
+ label_x = sub.cx
1882
+ label_y = sub.bbox[3] + 14.0 # below structure
1883
+ lbl_xml, lbl_id = _build_label_element(
1884
+ sub.ref.label, label_x, label_y, ids,
1885
+ )
1886
+ xml_parts.append(lbl_xml)
1887
+
1888
+ if is_rtl:
1889
+ cursor_x = sub.bbox[0] - inter_gap
1890
+ else:
1891
+ cursor_x = sub.bbox[2] + inter_gap
1892
+
1893
+ # -- Arrow --
1894
+ if is_rtl:
1895
+ # RTL: tail on right (near substrate), head on left (near product)
1896
+ arrow_tail_x = cursor_x - frag_gap * 0.3
1897
+ arrow_head_x = arrow_tail_x - rs._arrow_len
1898
+ else:
1899
+ arrow_tail_x = cursor_x + frag_gap * 0.3
1900
+ arrow_head_x = arrow_tail_x + rs._arrow_len
1901
+ arrow_mid_x = (arrow_tail_x + arrow_head_x) / 2.0
1902
+
1903
+ dashed = (rs.descriptor.arrow_style == "dashed")
1904
+ failed = (rs.descriptor.arrow_style == "failed")
1905
+ arrow_xml, arrow_id = _build_arrow(
1906
+ arrow_tail_x, arrow_y, arrow_head_x, arrow_y, ids,
1907
+ dashed=dashed, nogo=failed,
1908
+ )
1909
+ xml_parts.append(arrow_xml)
1910
+ step_meta["arrow_id"] = arrow_id
1911
+
1912
+ rs.arrow_tail_x = arrow_tail_x
1913
+ rs.arrow_tail_y = arrow_y
1914
+ rs.arrow_head_x = arrow_head_x
1915
+ rs.arrow_head_y = arrow_y
1916
+
1917
+ # -- Above-arrow content --
1918
+ # Structures above arrow
1919
+ above_cursor_x = arrow_mid_x
1920
+ if rs.above_structures:
1921
+ total_above_w = sum(_bbox_width(af.bbox) for af in rs.above_structures)
1922
+ total_above_w += inter_gap * max(0, len(rs.above_structures) - 1)
1923
+ above_cursor_x = arrow_mid_x - total_above_w / 2.0
1924
+
1925
+ # If there's text below above-arrow structures (e.g. "(1.2 eq)"),
1926
+ # push structures higher to make room for the text between
1927
+ # structure bottom and arrow.
1928
+ # Gap layout: struct -(4pt)- text -(2pt)- arrow
1929
+ above_text_height = 0.0
1930
+ if rs.above_text:
1931
+ n_abt = len(rs.above_text)
1932
+ # Visual text height + struct-to-text gap (4pt)
1933
+ above_text_height = (
1934
+ _CAP_HEIGHT + max(0, n_abt - 1) * _LINE_ADVANCE + _DESCENT + 4.0
1935
+ )
1936
+
1937
+ for af in rs.above_structures:
1938
+ af_w = _bbox_width(af.bbox)
1939
+ af_h = _bbox_height(af.bbox)
1940
+ # Position above the arrow, with extra room for text below structure
1941
+ target_cx = above_cursor_x + af_w / 2.0
1942
+ if rs.above_text:
1943
+ # text sits LAYOUT_BELOW_GAP above arrow; struct sits above text
1944
+ target_cy = arrow_y - LAYOUT_BELOW_GAP - above_text_height - af_h / 2.0
1945
+ else:
1946
+ # no text: struct sits LAYOUT_ABOVE_GAP above arrow
1947
+ target_cy = arrow_y - LAYOUT_ABOVE_GAP - af_h / 2.0
1948
+ dx = target_cx - af.cx
1949
+ dy = target_cy - af.cy
1950
+ _shift_atoms(af.atoms, dx, dy)
1951
+ af.bbox = _fragment_bbox(af.atoms)
1952
+ af.cx, af.cy = _bbox_center(af.bbox)
1953
+
1954
+ frag_xml, _, frag_id = _build_fragment(af.atoms, af.bonds, ids)
1955
+ af.xml = frag_xml
1956
+ af.frag_id = frag_id
1957
+ xml_parts.append(frag_xml)
1958
+ step_meta["above_ids"].append(frag_id)
1959
+ all_frag_ids.append(frag_id)
1960
+
1961
+ # Label for above-arrow structure
1962
+ if af.ref.label:
1963
+ lbl_xml, _ = _build_label_element(
1964
+ af.ref.label, af.cx, af.bbox[3] + 14.0, ids,
1965
+ )
1966
+ xml_parts.append(lbl_xml)
1967
+
1968
+ above_cursor_x += af_w + inter_gap
1969
+
1970
+ # Above-arrow text (equiv text for above structures, or condition text)
1971
+ # p.y is the BASELINE of the first line.
1972
+ # Position so that the visual bottom (last baseline + descent) sits
1973
+ # LAYOUT_BELOW_GAP above the arrow (same gap as below-arrow content).
1974
+ if rs.above_text:
1975
+ n_abt = len(rs.above_text)
1976
+ # Distance from first baseline to visual bottom
1977
+ text_below_baseline = max(0, n_abt - 1) * _LINE_ADVANCE + _DESCENT
1978
+ text_y = arrow_y - LAYOUT_BELOW_GAP - text_below_baseline
1979
+ if rs.above_structures:
1980
+ # Text sits between above-arrow structures and the arrow.
1981
+ # Structures were already pushed higher to make room.
1982
+ pass # text_y is already correct
1983
+ txt_xml, txt_id = _build_text_element(
1984
+ rs.above_text, arrow_mid_x, text_y, ids,
1985
+ use_formatting=False,
1986
+ )
1987
+ xml_parts.append(txt_xml)
1988
+ step_meta["above_ids"].append(txt_id)
1989
+
1990
+ # -- Below-arrow content --
1991
+ # p.y is the BASELINE of the first line.
1992
+ # Visual top = p.y - _CAP_HEIGHT, so set p.y so that
1993
+ # (p.y - _CAP_HEIGHT) = arrow_y + LAYOUT_BELOW_GAP.
1994
+ below_y = arrow_y + LAYOUT_BELOW_GAP + _CAP_HEIGHT
1995
+
1996
+ # Below-arrow structures first
1997
+ if rs.below_structures:
1998
+ below_struct_y = arrow_y + LAYOUT_BELOW_GAP
1999
+ total_below_w = sum(_bbox_width(bf.bbox) for bf in rs.below_structures)
2000
+ total_below_w += inter_gap * max(0, len(rs.below_structures) - 1)
2001
+ below_cursor_x = arrow_mid_x - total_below_w / 2.0
2002
+
2003
+ for bf in rs.below_structures:
2004
+ bf_w = _bbox_width(bf.bbox)
2005
+ bf_h = _bbox_height(bf.bbox)
2006
+ target_cx = below_cursor_x + bf_w / 2.0
2007
+ target_cy = below_struct_y + bf_h / 2.0 + 4.0
2008
+ dx = target_cx - bf.cx
2009
+ dy = target_cy - bf.cy
2010
+ _shift_atoms(bf.atoms, dx, dy)
2011
+ bf.bbox = _fragment_bbox(bf.atoms)
2012
+ bf.cx, bf.cy = _bbox_center(bf.bbox)
2013
+
2014
+ frag_xml, _, frag_id = _build_fragment(bf.atoms, bf.bonds, ids)
2015
+ bf.xml = frag_xml
2016
+ bf.frag_id = frag_id
2017
+ xml_parts.append(frag_xml)
2018
+ step_meta["below_ids"].append(frag_id)
2019
+ all_frag_ids.append(frag_id)
2020
+
2021
+ if bf.ref.label:
2022
+ lbl_xml, _ = _build_label_element(
2023
+ bf.ref.label, bf.cx, bf.bbox[3] + 14.0, ids,
2024
+ )
2025
+ xml_parts.append(lbl_xml)
2026
+
2027
+ below_cursor_x += bf_w + inter_gap
2028
+
2029
+ # Adjust text Y to be below the structures
2030
+ below_y = max(bf.bbox[3] for bf in rs.below_structures) + 14.0
2031
+
2032
+ # Below-arrow text (or letter label for condition key mode)
2033
+ if rs.below_text:
2034
+ if rs._is_letter_cond:
2035
+ # Letter condition: render small italic label above arrow
2036
+ letter_text = rs.below_text[0].strip()
2037
+ lbl_xml, lbl_id = _build_letter_label(
2038
+ letter_text, arrow_mid_x, arrow_y, ids,
2039
+ )
2040
+ xml_parts.append(lbl_xml)
2041
+ step_meta["above_ids"].append(lbl_id)
2042
+ else:
2043
+ txt_xml, txt_id = _build_text_element(
2044
+ rs.below_text, arrow_mid_x, below_y, ids,
2045
+ )
2046
+ xml_parts.append(txt_xml)
2047
+ step_meta["below_ids"].append(txt_id)
2048
+
2049
+ if is_rtl:
2050
+ cursor_x = arrow_head_x - frag_gap * 1.0
2051
+ else:
2052
+ cursor_x = arrow_head_x + frag_gap * 1.0
2053
+
2054
+ # -- Products --
2055
+ for i, prod in enumerate(rs.products):
2056
+ bbox = prod.bbox
2057
+ w = _bbox_width(bbox)
2058
+ if is_rtl:
2059
+ cx_target = cursor_x - w / 2.0
2060
+ else:
2061
+ cx_target = cursor_x + w / 2.0
2062
+ cy_target = arrow_y
2063
+ dx = cx_target - prod.cx
2064
+ dy = cy_target - prod.cy
2065
+ _shift_atoms(prod.atoms, dx, dy)
2066
+ prod.bbox = _fragment_bbox(prod.atoms)
2067
+ prod.cx, prod.cy = _bbox_center(prod.bbox)
2068
+
2069
+ frag_xml, _, frag_id = _build_fragment(prod.atoms, prod.bonds, ids)
2070
+ prod.xml = frag_xml
2071
+ prod.frag_id = frag_id
2072
+ xml_parts.append(frag_xml)
2073
+ step_meta["product_ids"].append(frag_id)
2074
+ all_frag_ids.append(frag_id)
2075
+
2076
+ prod_bottom = prod.bbox[3]
2077
+ if prod.ref.label:
2078
+ lbl_xml, _ = _build_label_element(
2079
+ prod.ref.label, prod.cx, prod.bbox[3] + 14.0, ids,
2080
+ )
2081
+ xml_parts.append(lbl_xml)
2082
+ prod_bottom = prod.bbox[3] + 14.0 + 6.0 # label baseline + descent
2083
+
2084
+ # Track last product info for serpentine callers
2085
+ _last_product_cx = prod.cx
2086
+ _last_product_bottom = prod_bottom
2087
+
2088
+ if is_rtl:
2089
+ cursor_x = prod.bbox[0] - inter_gap
2090
+ else:
2091
+ cursor_x = prod.bbox[2] + inter_gap
2092
+
2093
+ step_metadata.append(step_meta)
2094
+
2095
+ # --- Phase 4: Build <scheme> with <step> elements ---
2096
+ scheme_id = ids.next()
2097
+ scheme_parts = [f'<scheme id="{scheme_id}">']
2098
+ for meta in step_metadata:
2099
+ step_id = ids.next()
2100
+ attrs = [f'id="{step_id}"']
2101
+ if meta["reactant_ids"]:
2102
+ attrs.append(f'ReactionStepReactants="{" ".join(str(x) for x in meta["reactant_ids"])}"')
2103
+ if meta["product_ids"]:
2104
+ attrs.append(f'ReactionStepProducts="{" ".join(str(x) for x in meta["product_ids"])}"')
2105
+ attrs.append(f'ReactionStepArrows="{meta["arrow_id"]}"')
2106
+ if meta["above_ids"]:
2107
+ attrs.append(f'ReactionStepObjectsAboveArrow="{" ".join(str(x) for x in meta["above_ids"])}"')
2108
+ if meta["below_ids"]:
2109
+ attrs.append(f'ReactionStepObjectsBelowArrow="{" ".join(str(x) for x in meta["below_ids"])}"')
2110
+ scheme_parts.append(f'<step {" ".join(attrs)}/>')
2111
+ scheme_parts.append('</scheme>')
2112
+ xml_parts.append("\n".join(scheme_parts))
2113
+
2114
+ # --- Phase 5: Run arrows ---
2115
+ # Compute lowest_y from row content (always needed for return value)
2116
+ lowest_y = arrow_y + 60.0 # baseline estimate
2117
+ for rs in resolved_steps:
2118
+ if rs.below_text:
2119
+ n_blt = len(rs.below_text)
2120
+ # below_y baseline + distance to visual bottom + margin
2121
+ below_vis_bottom = (
2122
+ arrow_y + LAYOUT_BELOW_GAP + _CAP_HEIGHT # first baseline
2123
+ + max(0, n_blt - 1) * _LINE_ADVANCE + _DESCENT # to visual bottom
2124
+ + 4.0 # margin
2125
+ )
2126
+ lowest_y = max(lowest_y, below_vis_bottom)
2127
+ for bf in rs.below_structures:
2128
+ lowest_y = max(lowest_y, bf.bbox[3] + 20.0)
2129
+
2130
+ # Determine which run arrows to render
2131
+ effective_run_arrows = run_arrows if run_arrows is not None else scheme.run_arrows
2132
+
2133
+ if effective_run_arrows:
2134
+ run_y = lowest_y + 6.0 # tighter gap between conditions text and run arrows
2135
+
2136
+ for sra in effective_run_arrows:
2137
+ step_idx = sra.step - 1 # 0-indexed
2138
+ if step_idx < 0 or step_idx >= len(resolved_steps):
2139
+ continue
2140
+ rs = resolved_steps[step_idx]
2141
+
2142
+ # Run arrow matches the reaction arrow exactly (same tail/head X),
2143
+ # just translated vertically below the scheme content.
2144
+ run_tail_x = rs.arrow_tail_x
2145
+ run_head_x = rs.arrow_head_x
2146
+
2147
+ for run_entry in sra.runs:
2148
+ if run_entry.note:
2149
+ # Text centered above this specific run arrow
2150
+ note_y = run_y - 1.0
2151
+ note_xml, _ = _build_text_element(
2152
+ [run_entry.note],
2153
+ (run_tail_x + run_head_x) / 2.0,
2154
+ note_y, ids,
2155
+ justification="Center",
2156
+ use_formatting=False,
2157
+ )
2158
+ xml_parts.append(note_xml)
2159
+ run_y += 10.0 # space for note text
2160
+ run_xml, _ = _build_run_arrow(
2161
+ run_tail_x, run_head_x,
2162
+ run_y,
2163
+ run_entry.input_label,
2164
+ run_entry.output_label,
2165
+ ids,
2166
+ )
2167
+ xml_parts.append(run_xml)
2168
+ run_y += 18.0 # stack multiple runs
2169
+
2170
+ lowest_y = run_y
2171
+
2172
+ # Compute row edge (rightmost for LTR, leftmost for RTL)
2173
+ row_edge_x = cursor_x
2174
+
2175
+ row_info = {
2176
+ "last_product_cx": _last_product_cx,
2177
+ "last_product_bottom": _last_product_bottom,
2178
+ "first_substrate_cx": _first_substrate_cx,
2179
+ }
2180
+ return "\n".join(xml_parts), lowest_y, row_edge_x, row_info
2181
+
2182
+
2183
+ # ---------------------------------------------------------------------------
2184
+ # Document assembly
2185
+ # ---------------------------------------------------------------------------
2186
+
2187
+ def _format_header(bbox: str) -> str:
2188
+ """Format the full CDXML header with ACS style."""
2189
+ return CDXML_HEADER.format(
2190
+ bbox=bbox,
2191
+ label_font=ACS_LABEL_FONT,
2192
+ label_size=ACS_LABEL_SIZE,
2193
+ label_face=ACS_LABEL_FACE,
2194
+ caption_size=ACS_CAPTION_SIZE,
2195
+ hash_spacing=ACS_HASH_SPACING,
2196
+ margin_width=ACS_MARGIN_WIDTH,
2197
+ line_width=ACS_LINE_WIDTH,
2198
+ bold_width=ACS_BOLD_WIDTH,
2199
+ bond_length=ACS_BOND_LENGTH_STR,
2200
+ bond_spacing=ACS_BOND_SPACING,
2201
+ chain_angle=ACS_CHAIN_ANGLE_STR,
2202
+ )
2203
+
2204
+
2205
+ _PAGE_OPEN = (
2206
+ '<page id="{page_id}" BoundingBox="0 0 1620 2160" '
2207
+ 'HeaderPosition="36" FooterPosition="36" '
2208
+ 'PrintTrimMarks="yes" HeightPages="3" WidthPages="3">'
2209
+ )
2210
+ _PAGE_CLOSE = "</page>"
2211
+
2212
+
2213
+ def _product_mol_for_steps(
2214
+ steps: List[StepDescriptor],
2215
+ structures: Dict[str, StructureRef],
2216
+ source_data: Optional[Dict[str, Dict]] = None,
2217
+ pick: str = "last",
2218
+ ) -> Optional[Any]:
2219
+ """
2220
+ Build an RDKit Mol with 2D coords for the product of given steps.
2221
+
2222
+ Parameters
2223
+ ----------
2224
+ steps : list of StepDescriptor
2225
+ structures : dict mapping structure IDs to StructureRef
2226
+ source_data : optional reaction_parser JSON species lookup
2227
+ pick : "last" (default) = last step's last product;
2228
+ "first" = first step's first product (for divergent).
2229
+
2230
+ Returns an RDKit Mol with 2D conformer, or None.
2231
+ """
2232
+ try:
2233
+ from rdkit import Chem
2234
+ from rdkit.Chem import AllChem
2235
+ except ImportError:
2236
+ return None
2237
+
2238
+ pid: Optional[str] = None
2239
+ if pick == "first":
2240
+ if steps and steps[0].products:
2241
+ pid = steps[0].products[0]
2242
+ else:
2243
+ if steps and steps[-1].products:
2244
+ pid = steps[-1].products[-1]
2245
+
2246
+ if pid is None:
2247
+ return None
2248
+
2249
+ product_smiles: Optional[str] = None
2250
+ ref = structures.get(pid)
2251
+ if ref and ref.smiles:
2252
+ product_smiles = ref.smiles
2253
+ elif source_data:
2254
+ sp = source_data.get(pid) or source_data.get(pid.lower())
2255
+ if sp:
2256
+ product_smiles = sp.get("smiles")
2257
+
2258
+ if not product_smiles:
2259
+ return None
2260
+
2261
+ mol = Chem.MolFromSmiles(product_smiles)
2262
+ if mol is None:
2263
+ return None
2264
+
2265
+ AllChem.Compute2DCoords(mol)
2266
+ return mol
2267
+
2268
+
2269
+ def _identify_product_mol(
2270
+ scheme: SchemeDescriptor,
2271
+ source_data: Optional[Dict[str, Dict]] = None,
2272
+ ) -> Optional[Any]:
2273
+ """
2274
+ Identify the main product and create an RDKit Mol with 2D coords.
2275
+
2276
+ This mol serves as the reference for MCS alignment of all other
2277
+ structures in the scheme — shared scaffolds are oriented to match
2278
+ the product's layout, producing visually consistent schemes.
2279
+
2280
+ Reference product selection:
2281
+ - linear / sequential: product of the LAST step
2282
+ - divergent: product of the FIRST (horizontal) step
2283
+ - stacked-rows: product of the first section's last step
2284
+
2285
+ Returns an RDKit Mol with 2D conformer, or None if RDKit is
2286
+ unavailable or no product SMILES can be resolved.
2287
+ """
2288
+ if scheme.layout == "divergent":
2289
+ return _product_mol_for_steps(
2290
+ scheme.steps, scheme.structures, source_data, pick="first",
2291
+ )
2292
+ elif scheme.layout == "stacked-rows":
2293
+ if scheme.sections:
2294
+ return _product_mol_for_steps(
2295
+ scheme.sections[0].steps, scheme.structures, source_data,
2296
+ )
2297
+ return None
2298
+ else:
2299
+ # linear / sequential — last step's last product
2300
+ return _product_mol_for_steps(
2301
+ scheme.steps, scheme.structures, source_data,
2302
+ )
2303
+
2304
+
2305
+ def render(scheme: SchemeDescriptor, yaml_dir: Optional[str] = None) -> str:
2306
+ """
2307
+ Render a SchemeDescriptor to a CDXML document string.
2308
+
2309
+ Parameters
2310
+ ----------
2311
+ scheme : SchemeDescriptor
2312
+ Parsed scheme from YAML.
2313
+ yaml_dir : str, optional
2314
+ Directory of the YAML file (for resolving relative source paths).
2315
+
2316
+ Returns
2317
+ -------
2318
+ str
2319
+ Complete CDXML document.
2320
+ """
2321
+ ids = _IDGen(1000)
2322
+
2323
+ # Load source JSON if specified
2324
+ source_data = None
2325
+ if scheme.source:
2326
+ source_path = scheme.source
2327
+ if not os.path.isabs(source_path) and yaml_dir:
2328
+ source_path = os.path.join(yaml_dir, source_path)
2329
+ source_data = _load_source_json(source_path)
2330
+
2331
+ # --- Identify product reference Mol for alignment ---
2332
+ # The product of the last step is the reference for MCS alignment.
2333
+ # All other structures are aligned to match its scaffold orientation.
2334
+ ref_mol = _identify_product_mol(scheme, source_data)
2335
+
2336
+ # Choose layout
2337
+ if scheme.layout == "linear":
2338
+ inner_xml, lowest_y = _layout_linear(scheme, ids, source_data=source_data,
2339
+ reference_mol=ref_mol)
2340
+ elif scheme.layout == "sequential":
2341
+ inner_xml, lowest_y = _layout_sequential(scheme, ids, source_data=source_data,
2342
+ reference_mol=ref_mol)
2343
+ elif scheme.layout == "divergent":
2344
+ inner_xml, lowest_y = _layout_divergent(scheme, ids, source_data=source_data,
2345
+ reference_mol=ref_mol)
2346
+ elif scheme.layout == "stacked-rows":
2347
+ inner_xml, lowest_y = _layout_stacked_rows(scheme, ids, source_data=source_data,
2348
+ reference_mol=ref_mol)
2349
+ else:
2350
+ raise NotImplementedError(
2351
+ f"Layout '{scheme.layout}' is not yet implemented. "
2352
+ f"Supported: linear, sequential, divergent, stacked-rows"
2353
+ )
2354
+
2355
+ # Condition key block below the scheme
2356
+ if scheme.condition_key:
2357
+ key_y = lowest_y + 20.0
2358
+ key_xml, _ = _build_condition_key(
2359
+ scheme.condition_key, 80.0, key_y, ids,
2360
+ )
2361
+ inner_xml += "\n" + key_xml
2362
+
2363
+ # Wrap in document
2364
+ page_id = ids.next()
2365
+
2366
+ # Use a generous bounding box
2367
+ bbox = "0 0 1620 2160"
2368
+
2369
+ doc_parts = [
2370
+ _format_header(bbox),
2371
+ _PAGE_OPEN.format(page_id=page_id),
2372
+ inner_xml,
2373
+ _PAGE_CLOSE,
2374
+ CDXML_FOOTER,
2375
+ ]
2376
+ return "\n".join(doc_parts)
2377
+
2378
+
2379
+ def render_to_file(
2380
+ scheme: SchemeDescriptor,
2381
+ output_path: str,
2382
+ yaml_dir: Optional[str] = None,
2383
+ ) -> None:
2384
+ """Render and write to a file."""
2385
+ cdxml = render(scheme, yaml_dir=yaml_dir)
2386
+ with open(output_path, "w", encoding="utf-8") as f:
2387
+ f.write(cdxml)