cdxml-toolkit 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. cdxml_toolkit/__init__.py +18 -0
  2. cdxml_toolkit/_jre/__init__.py +2 -0
  3. cdxml_toolkit/_jre/temurin-21-jre-win-x64.zip +0 -0
  4. cdxml_toolkit/analysis/__init__.py +35 -0
  5. cdxml_toolkit/analysis/deterministic/__init__.py +12 -0
  6. cdxml_toolkit/analysis/deterministic/discover_experiment_files.py +413 -0
  7. cdxml_toolkit/analysis/deterministic/lab_book_formatter.py +701 -0
  8. cdxml_toolkit/analysis/deterministic/lcms_file_categorizer.py +928 -0
  9. cdxml_toolkit/analysis/deterministic/lcms_identifier.py +598 -0
  10. cdxml_toolkit/analysis/deterministic/mass_resolver.py +654 -0
  11. cdxml_toolkit/analysis/deterministic/multi_lcms_analyzer.py +1412 -0
  12. cdxml_toolkit/analysis/deterministic/procedure_writer.py +446 -0
  13. cdxml_toolkit/analysis/extract_nmr.py +47 -0
  14. cdxml_toolkit/analysis/format_procedure_entry.py +479 -0
  15. cdxml_toolkit/analysis/lcms_analyzer.py +1299 -0
  16. cdxml_toolkit/analysis/parse_analysis_file.py +134 -0
  17. cdxml_toolkit/cdxml_builder.py +920 -0
  18. cdxml_toolkit/cdxml_utils.py +342 -0
  19. cdxml_toolkit/chemdraw/__init__.py +5 -0
  20. cdxml_toolkit/chemdraw/_chemscript_server.py +562 -0
  21. cdxml_toolkit/chemdraw/cdx_converter.py +527 -0
  22. cdxml_toolkit/chemdraw/cdxml_to_image.py +262 -0
  23. cdxml_toolkit/chemdraw/cdxml_to_image_rdkit.py +296 -0
  24. cdxml_toolkit/chemdraw/chemscript_bridge.py +901 -0
  25. cdxml_toolkit/constants.py +304 -0
  26. cdxml_toolkit/coord_normalizer.py +438 -0
  27. cdxml_toolkit/deterministic_pipeline/__init__.py +6 -0
  28. cdxml_toolkit/deterministic_pipeline/legacy/__init__.py +5 -0
  29. cdxml_toolkit/deterministic_pipeline/legacy/eln_cdx_cleanup.py +509 -0
  30. cdxml_toolkit/deterministic_pipeline/legacy/eln_enrichment.py +1394 -0
  31. cdxml_toolkit/deterministic_pipeline/legacy/scheme_aligner.py +428 -0
  32. cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher.py +1337 -0
  33. cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher_v2.py +1340 -0
  34. cdxml_toolkit/deterministic_pipeline/scheme_reader_audit.py +931 -0
  35. cdxml_toolkit/deterministic_pipeline/scheme_reader_verify.py +1160 -0
  36. cdxml_toolkit/image/__init__.py +15 -0
  37. cdxml_toolkit/image/reaction_from_image.py +2103 -0
  38. cdxml_toolkit/image/structure_from_image.py +1711 -0
  39. cdxml_toolkit/layout/__init__.py +5 -0
  40. cdxml_toolkit/layout/alignment.py +1642 -0
  41. cdxml_toolkit/layout/reaction_cleanup.py +1002 -0
  42. cdxml_toolkit/layout/scheme_merger.py +2260 -0
  43. cdxml_toolkit/mcp_server/__init__.py +0 -0
  44. cdxml_toolkit/mcp_server/__main__.py +5 -0
  45. cdxml_toolkit/mcp_server/server.py +1567 -0
  46. cdxml_toolkit/naming/__init__.py +6 -0
  47. cdxml_toolkit/naming/aligned_namer.py +2342 -0
  48. cdxml_toolkit/naming/mol_builder.py +3722 -0
  49. cdxml_toolkit/naming/name_decomposer.py +2843 -0
  50. cdxml_toolkit/naming/reactions_datamol.json +2414 -0
  51. cdxml_toolkit/office/__init__.py +5 -0
  52. cdxml_toolkit/office/doc_from_template.py +722 -0
  53. cdxml_toolkit/office/ole_embedder.py +808 -0
  54. cdxml_toolkit/office/ole_extractor.py +272 -0
  55. cdxml_toolkit/perception/__init__.py +10 -0
  56. cdxml_toolkit/perception/compound_search.py +229 -0
  57. cdxml_toolkit/perception/eln_csv_parser.py +240 -0
  58. cdxml_toolkit/perception/rdf_parser.py +664 -0
  59. cdxml_toolkit/perception/reactant_heuristic.py +1045 -0
  60. cdxml_toolkit/perception/reaction_parser.py +2150 -0
  61. cdxml_toolkit/perception/scheme_reader.py +2948 -0
  62. cdxml_toolkit/perception/scheme_refine.py +1404 -0
  63. cdxml_toolkit/perception/scheme_segmenter.py +619 -0
  64. cdxml_toolkit/perception/spatial_assignment.py +1013 -0
  65. cdxml_toolkit/rdkit_utils.py +605 -0
  66. cdxml_toolkit/render/__init__.py +17 -0
  67. cdxml_toolkit/render/auto_layout.py +229 -0
  68. cdxml_toolkit/render/compact_parser.py +632 -0
  69. cdxml_toolkit/render/parser.py +706 -0
  70. cdxml_toolkit/render/render_scheme.py +267 -0
  71. cdxml_toolkit/render/renderer.py +2387 -0
  72. cdxml_toolkit/render/schema.py +90 -0
  73. cdxml_toolkit/render/scheme_maker.py +1043 -0
  74. cdxml_toolkit/render/scheme_yaml_writer.py +1487 -0
  75. cdxml_toolkit/resolve/__init__.py +13 -0
  76. cdxml_toolkit/resolve/cas_resolver.py +430 -0
  77. cdxml_toolkit/resolve/chemscanner_abbreviations.json +28813 -0
  78. cdxml_toolkit/resolve/condensed_formula.py +493 -0
  79. cdxml_toolkit/resolve/jre_manager.py +195 -0
  80. cdxml_toolkit/resolve/reagent_abbreviations.json +1046 -0
  81. cdxml_toolkit/resolve/reagent_db.py +285 -0
  82. cdxml_toolkit/resolve/superatom_data.json +2856 -0
  83. cdxml_toolkit/resolve/superatom_table.py +146 -0
  84. cdxml_toolkit/text_formatting.py +298 -0
  85. cdxml_toolkit-0.5.0.dist-info/METADATA +318 -0
  86. cdxml_toolkit-0.5.0.dist-info/RECORD +91 -0
  87. cdxml_toolkit-0.5.0.dist-info/WHEEL +5 -0
  88. cdxml_toolkit-0.5.0.dist-info/entry_points.txt +17 -0
  89. cdxml_toolkit-0.5.0.dist-info/licenses/LICENSE +21 -0
  90. cdxml_toolkit-0.5.0.dist-info/licenses/NOTICE.md +37 -0
  91. cdxml_toolkit-0.5.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,428 @@
1
+ #!/usr/bin/env python
2
+ """
3
+ scheme_aligner.py - Align reaction scheme structures using RDKit MCS.
4
+
5
+ Experimental tool. Uses Maximum Common Substructure (MCS) to find shared
6
+ scaffolds between the product and every other drawn structure (reactants,
7
+ reagents) in a CDXML reaction scheme, then aligns each structure's 2D
8
+ coordinates to match the product's orientation via RDKit's
9
+ GenerateDepictionMatching2DStructure.
10
+
11
+ The product is the reference — everything else aligns to it.
12
+
13
+ Inspired by:
14
+ https://greglandrum.github.io/rdkit-blog/posts/2021-08-07-rgd-and-highlighting.html
15
+
16
+ Usage:
17
+ python scheme_aligner.py reaction.cdxml
18
+ python scheme_aligner.py reaction.cdxml -o aligned.cdxml --svg
19
+ """
20
+
21
+ import argparse
22
+ import math
23
+ import sys
24
+ import xml.etree.ElementTree as ET
25
+ from pathlib import Path
26
+
27
+ from ...constants import ACS_BOND_LENGTH
28
+
29
+ try:
30
+ from rdkit import Chem, RDLogger
31
+ from rdkit.Chem import AllChem, rdFMCS, rdDepictor
32
+ from rdkit.Chem.Draw import rdMolDraw2D
33
+ from rdkit.Geometry import Point3D
34
+ RDLogger.logger().setLevel(RDLogger.ERROR)
35
+ except ImportError:
36
+ sys.exit("Error: RDKit is required. Activate the LLMChem environment.")
37
+
38
+
39
+ # ---------------------------------------------------------------------------
40
+ # CDXML parsing
41
+ # ---------------------------------------------------------------------------
42
+
43
+ def parse_cdxml(path):
44
+ """Parse CDXML file. Returns (tree, fragments_dict, reaction_steps)."""
45
+ tree = ET.parse(str(path))
46
+ root = tree.getroot()
47
+ page = root.find('.//page')
48
+ if page is None:
49
+ sys.exit("No <page> element in CDXML.")
50
+
51
+ fragments = {int(f.get('id')): f for f in page.findall('fragment')}
52
+
53
+ steps = []
54
+ for s in root.findall('.//step'):
55
+ steps.append({
56
+ 'reactants': _ids(s.get('ReactionStepReactants', '')),
57
+ 'products': _ids(s.get('ReactionStepProducts', '')),
58
+ 'above': _ids(s.get('ReactionStepObjectsAboveArrow', '')),
59
+ 'below': _ids(s.get('ReactionStepObjectsBelowArrow', '')),
60
+ })
61
+
62
+ return tree, fragments, steps
63
+
64
+
65
+ def _ids(s):
66
+ return [int(x) for x in s.split() if x]
67
+
68
+
69
+ # ---------------------------------------------------------------------------
70
+ # Fragment -> RDKit Mol
71
+ # ---------------------------------------------------------------------------
72
+
73
+ def fragment_to_mol(frag_elem):
74
+ """Convert a CDXML <fragment> to an RDKit Mol (no conformer set).
75
+
76
+ Returns (mol, atoms_list) where atoms_list has per-atom metadata
77
+ including original CDXML coordinates and XML element references.
78
+ """
79
+ atoms, id_map = [], {}
80
+
81
+ for n in frag_elem.findall('n'):
82
+ nid = int(n.get('id'))
83
+ if n.get('NodeType') == 'ExternalConnectionPoint':
84
+ continue
85
+
86
+ px, py = [float(v) for v in n.get('p', '0 0').split()]
87
+ elem = int(n.get('Element', '6'))
88
+ num_h_attr = n.get('NumHydrogens')
89
+ num_h = int(num_h_attr) if num_h_attr is not None else None
90
+ is_abbrev = n.get('NodeType') == 'Fragment'
91
+
92
+ idx = len(atoms)
93
+ id_map[nid] = idx
94
+ atoms.append({
95
+ 'id': nid, 'idx': idx,
96
+ 'x': px, 'y': py,
97
+ 'elem': elem, 'num_h': num_h,
98
+ 'is_abbrev': is_abbrev,
99
+ 'xml': n,
100
+ })
101
+
102
+ bonds = []
103
+ for b in frag_elem.findall('b'):
104
+ bi, ei = int(b.get('B')), int(b.get('E'))
105
+ if bi in id_map and ei in id_map:
106
+ bonds.append((id_map[bi], id_map[ei], int(b.get('Order', '1'))))
107
+
108
+ em = Chem.RWMol()
109
+ for a in atoms:
110
+ ra = Chem.Atom(0 if a['is_abbrev'] else a['elem'])
111
+ if a['num_h'] is not None:
112
+ ra.SetNoImplicit(True)
113
+ ra.SetNumExplicitHs(a['num_h'])
114
+ em.AddAtom(ra)
115
+
116
+ BT = {1: Chem.BondType.SINGLE, 2: Chem.BondType.DOUBLE,
117
+ 3: Chem.BondType.TRIPLE}
118
+ for bi, ei, order in bonds:
119
+ em.AddBond(bi, ei, BT.get(order, Chem.BondType.SINGLE))
120
+
121
+ mol = em.GetMol()
122
+ try:
123
+ Chem.SanitizeMol(mol)
124
+ except Exception:
125
+ try:
126
+ Chem.SanitizeMol(mol,
127
+ Chem.SanitizeFlags.SANITIZE_ALL ^
128
+ Chem.SanitizeFlags.SANITIZE_PROPERTIES)
129
+ except Exception:
130
+ pass
131
+
132
+ return mol, atoms
133
+
134
+
135
+ # ---------------------------------------------------------------------------
136
+ # Scale helpers
137
+ # ---------------------------------------------------------------------------
138
+
139
+ def avg_bond_length(atoms_data, mol):
140
+ """Average bond length computed from CDXML atom coordinates."""
141
+ total, count = 0.0, 0
142
+ for bond in mol.GetBonds():
143
+ i, j = bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()
144
+ dx = atoms_data[i]['x'] - atoms_data[j]['x']
145
+ dy = atoms_data[i]['y'] - atoms_data[j]['y']
146
+ total += math.sqrt(dx * dx + dy * dy)
147
+ count += 1
148
+ return total / count if count else ACS_BOND_LENGTH
149
+
150
+
151
+ _rdkit_bl_cache = None
152
+
153
+ def rdkit_bond_length():
154
+ """RDKit's default 2D depiction bond length (cached)."""
155
+ global _rdkit_bl_cache
156
+ if _rdkit_bl_cache is None:
157
+ m = Chem.MolFromSmiles('CC')
158
+ AllChem.Compute2DCoords(m)
159
+ c = m.GetConformer()
160
+ p0, p1 = c.GetAtomPosition(0), c.GetAtomPosition(1)
161
+ _rdkit_bl_cache = math.sqrt(
162
+ (p1.x - p0.x) ** 2 + (p1.y - p0.y) ** 2)
163
+ return _rdkit_bl_cache
164
+
165
+
166
+ def set_cdxml_coords(mol, atoms_data, scale=1.0):
167
+ """Set conformer from CDXML coordinates (y-flipped, optionally scaled)."""
168
+ conf = Chem.Conformer(mol.GetNumAtoms())
169
+ for a in atoms_data:
170
+ conf.SetAtomPosition(a['idx'],
171
+ Point3D(a['x'] * scale, -a['y'] * scale, 0.0))
172
+ mol.RemoveAllConformers()
173
+ mol.AddConformer(conf, assignId=True)
174
+
175
+
176
+ # ---------------------------------------------------------------------------
177
+ # MCS finding
178
+ # ---------------------------------------------------------------------------
179
+
180
+ def find_mcs(ref_mol, target_mol, timeout=30):
181
+ """Find MCS. Returns (mcs_result, atom_map [(ref_idx, tgt_idx)])."""
182
+ mcs = rdFMCS.FindMCS(
183
+ [ref_mol, target_mol],
184
+ timeout=timeout,
185
+ atomCompare=rdFMCS.AtomCompare.CompareElements,
186
+ bondCompare=rdFMCS.BondCompare.CompareOrder,
187
+ ringMatchesRingOnly=True,
188
+ completeRingsOnly=True,
189
+ )
190
+
191
+ if mcs.numAtoms < 3:
192
+ return None, None
193
+
194
+ core = Chem.MolFromSmarts(mcs.smartsString)
195
+ if core is None:
196
+ return None, None
197
+
198
+ ref_match = ref_mol.GetSubstructMatch(core)
199
+ target_match = target_mol.GetSubstructMatch(core)
200
+ if not ref_match or not target_match:
201
+ return None, None
202
+
203
+ return mcs, list(zip(ref_match, target_match))
204
+
205
+
206
+ # ---------------------------------------------------------------------------
207
+ # Alignment via GenerateDepictionMatching2DStructure
208
+ # ---------------------------------------------------------------------------
209
+
210
+ def align_fragment(ref_mol, tgt_mol, atom_map):
211
+ """Align target fragment to reference (product) using
212
+ GenerateDepictionMatching2DStructure.
213
+
214
+ ref_mol must already have its conformer set at RDKit scale.
215
+ Modifies tgt_mol conformer in-place.
216
+ Returns MCS RMSD in RDKit units.
217
+ """
218
+ rdDepictor.GenerateDepictionMatching2DStructure(
219
+ tgt_mol, ref_mol, atom_map)
220
+
221
+ # RMSD of MCS atoms (should be ~0)
222
+ rc = ref_mol.GetConformer()
223
+ tc = tgt_mol.GetConformer()
224
+ ss = sum(
225
+ (rc.GetAtomPosition(ri).x - tc.GetAtomPosition(ti).x) ** 2 +
226
+ (rc.GetAtomPosition(ri).y - tc.GetAtomPosition(ti).y) ** 2
227
+ for ri, ti in atom_map)
228
+ return math.sqrt(ss / len(atom_map))
229
+
230
+
231
+ # ---------------------------------------------------------------------------
232
+ # Coordinate writeback
233
+ # ---------------------------------------------------------------------------
234
+
235
+ def _translate_subtree(elem, dx, dy):
236
+ """Recursively shift all p and BoundingBox attributes by (dx, dy)."""
237
+ p = elem.get('p')
238
+ if p:
239
+ parts = p.split()
240
+ if len(parts) >= 2:
241
+ elem.set('p',
242
+ f"{float(parts[0])+dx:.2f} {float(parts[1])+dy:.2f}")
243
+
244
+ bb = elem.get('BoundingBox')
245
+ if bb:
246
+ parts = bb.split()
247
+ if len(parts) == 4:
248
+ elem.set('BoundingBox',
249
+ f"{float(parts[0])+dx:.2f} {float(parts[1])+dy:.2f} "
250
+ f"{float(parts[2])+dx:.2f} {float(parts[3])+dy:.2f}")
251
+
252
+ for child in elem:
253
+ _translate_subtree(child, dx, dy)
254
+
255
+
256
+ def write_aligned_coords(frag_elem, mol, atoms_data, scale,
257
+ original_center):
258
+ """Convert aligned RDKit coords back to CDXML space and write to XML."""
259
+ conf = mol.GetConformer()
260
+ inv = 1.0 / scale
261
+
262
+ # Aligned positions in CDXML space
263
+ aligned = []
264
+ for a in atoms_data:
265
+ pos = conf.GetAtomPosition(a['idx'])
266
+ aligned.append((pos.x * inv, -pos.y * inv)) # scale + flip y
267
+
268
+ # Translate to keep fragment at its original center
269
+ acx = sum(p[0] for p in aligned) / len(aligned)
270
+ acy = sum(p[1] for p in aligned) / len(aligned)
271
+ gdx = original_center[0] - acx
272
+ gdy = original_center[1] - acy
273
+
274
+ for i, a in enumerate(atoms_data):
275
+ new_x = aligned[i][0] + gdx
276
+ new_y = aligned[i][1] + gdy
277
+ adx = new_x - a['x']
278
+ ady = new_y - a['y']
279
+
280
+ node = a['xml']
281
+ node.set('p', f"{new_x:.2f} {new_y:.2f}")
282
+
283
+ for child in node:
284
+ _translate_subtree(child, adx, ady)
285
+
286
+ # Recompute fragment BoundingBox
287
+ xs, ys = [], []
288
+ for n in frag_elem.findall('n'):
289
+ if n.get('NodeType') == 'ExternalConnectionPoint':
290
+ continue
291
+ p = n.get('p')
292
+ if p:
293
+ parts = p.split()
294
+ xs.append(float(parts[0]))
295
+ ys.append(float(parts[1]))
296
+ if xs and ys:
297
+ margin = 15.0
298
+ frag_elem.set('BoundingBox',
299
+ f"{min(xs)-margin:.2f} {min(ys)-margin:.2f} "
300
+ f"{max(xs)+margin:.2f} {max(ys)+margin:.2f}")
301
+
302
+
303
+ # ---------------------------------------------------------------------------
304
+ # Visualization
305
+ # ---------------------------------------------------------------------------
306
+
307
+ def save_svg(mol, highlight_atoms, label, out_dir, stem):
308
+ """Save a single SVG with highlighted atoms."""
309
+ drawer = rdMolDraw2D.MolDraw2DSVG(600, 450)
310
+ drawer.drawOptions().addAtomIndices = False
311
+ drawer.DrawMolecule(mol, highlightAtoms=highlight_atoms)
312
+ drawer.FinishDrawing()
313
+ svg_path = out_dir / f"{stem}-{label}.svg"
314
+ svg_path.write_text(drawer.GetDrawingText())
315
+ print(f" SVG: {svg_path}")
316
+
317
+
318
+ # ---------------------------------------------------------------------------
319
+ # Main
320
+ # ---------------------------------------------------------------------------
321
+
322
+ def _centroid(atoms_data):
323
+ n = len(atoms_data)
324
+ return (sum(a['x'] for a in atoms_data) / n,
325
+ sum(a['y'] for a in atoms_data) / n)
326
+
327
+
328
+ def main(argv=None) -> int:
329
+ ap = argparse.ArgumentParser(
330
+ description='Align all structures in a reaction scheme to the '
331
+ 'product orientation via RDKit MCS.',
332
+ )
333
+ ap.add_argument('input', help='Input CDXML file with reaction scheme')
334
+ ap.add_argument('-o', '--output',
335
+ help='Output CDXML (default: <input>-aligned.cdxml)')
336
+ ap.add_argument('--svg', action='store_true',
337
+ help='Save SVGs showing MCS-highlighted structures')
338
+ ap.add_argument('--timeout', type=int, default=30,
339
+ help='MCS timeout in seconds (default: 30)')
340
+ args = ap.parse_args(argv)
341
+
342
+ inp = Path(args.input)
343
+ if not inp.exists():
344
+ print(f"File not found: {inp}", file=sys.stderr)
345
+ return 1
346
+
347
+ out = Path(args.output) if args.output else \
348
+ inp.parent / (inp.stem + '-aligned.cdxml')
349
+
350
+ tree, fragments, steps = parse_cdxml(inp)
351
+ if not steps:
352
+ print("No reaction scheme found in CDXML.", file=sys.stderr)
353
+ return 1
354
+
355
+ print(f"Input: {inp}")
356
+ print(f"Fragments: {list(fragments.keys())}")
357
+ print(f"Reaction steps: {len(steps)}")
358
+
359
+ for si, step in enumerate(steps):
360
+ if not step['products']:
361
+ print(f"\nStep {si+1}: no products, skipping.")
362
+ continue
363
+
364
+ # --- Product is the reference ---
365
+ prod_id = step['products'][0]
366
+ prod_mol, prod_atoms = fragment_to_mol(fragments[prod_id])
367
+
368
+ # Compute scale from product's bond length
369
+ cdxml_bl = avg_bond_length(prod_atoms, prod_mol)
370
+ rdk_bl = rdkit_bond_length()
371
+ scale = rdk_bl / cdxml_bl
372
+
373
+ # Set product conformer at RDKit scale (the reference for all alignments)
374
+ set_cdxml_coords(prod_mol, prod_atoms, scale)
375
+
376
+ print(f"\nStep {si+1}:")
377
+ print(f" Product = reference (fragment {prod_id}): "
378
+ f"{prod_mol.GetNumAtoms()} atoms, "
379
+ f"{prod_mol.GetNumBonds()} bonds")
380
+ print(f" Bond length: CDXML {cdxml_bl:.1f} pts -> "
381
+ f"RDKit {rdk_bl:.2f}")
382
+
383
+ if args.svg:
384
+ save_svg(prod_mol, list(range(prod_mol.GetNumAtoms())),
385
+ 'product-ref', out.parent, out.stem)
386
+
387
+ # --- Collect all other drawn structures in this step ---
388
+ other_ids = []
389
+ for fid in (step['reactants'] + step['above'] + step['below']):
390
+ if fid in fragments and fid != prod_id and fid not in other_ids:
391
+ other_ids.append(fid)
392
+
393
+ for fid in other_ids:
394
+ frag_mol, frag_atoms = fragment_to_mol(fragments[fid])
395
+ frag_center = _centroid(frag_atoms)
396
+
397
+ print(f"\n Fragment {fid}: "
398
+ f"{frag_mol.GetNumAtoms()} atoms, "
399
+ f"{frag_mol.GetNumBonds()} bonds")
400
+
401
+ # Find MCS with product
402
+ mcs, amap = find_mcs(prod_mol, frag_mol, args.timeout)
403
+ if mcs is None:
404
+ print(f" MCS < 3 atoms, skipping.")
405
+ continue
406
+
407
+ print(f" MCS: {mcs.numAtoms} atoms, {mcs.numBonds} bonds")
408
+
409
+ # Align this fragment to the product
410
+ rmsd = align_fragment(prod_mol, frag_mol, amap)
411
+ print(f" MCS RMSD: {rmsd:.4f}")
412
+
413
+ # Write aligned coords back to CDXML
414
+ write_aligned_coords(
415
+ fragments[fid], frag_mol, frag_atoms, scale, frag_center)
416
+ print(f" Coordinates updated.")
417
+
418
+ if args.svg:
419
+ hl = [ti for ri, ti in amap]
420
+ save_svg(frag_mol, hl, f'frag{fid}', out.parent, out.stem)
421
+
422
+ tree.write(str(out), xml_declaration=True, encoding='UTF-8')
423
+ print(f"\nOutput: {out}")
424
+ return 0
425
+
426
+
427
+ if __name__ == '__main__':
428
+ sys.exit(main())