cdxml-toolkit 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. cdxml_toolkit/__init__.py +18 -0
  2. cdxml_toolkit/_jre/__init__.py +2 -0
  3. cdxml_toolkit/_jre/temurin-21-jre-win-x64.zip +0 -0
  4. cdxml_toolkit/analysis/__init__.py +35 -0
  5. cdxml_toolkit/analysis/deterministic/__init__.py +12 -0
  6. cdxml_toolkit/analysis/deterministic/discover_experiment_files.py +413 -0
  7. cdxml_toolkit/analysis/deterministic/lab_book_formatter.py +701 -0
  8. cdxml_toolkit/analysis/deterministic/lcms_file_categorizer.py +928 -0
  9. cdxml_toolkit/analysis/deterministic/lcms_identifier.py +598 -0
  10. cdxml_toolkit/analysis/deterministic/mass_resolver.py +654 -0
  11. cdxml_toolkit/analysis/deterministic/multi_lcms_analyzer.py +1412 -0
  12. cdxml_toolkit/analysis/deterministic/procedure_writer.py +446 -0
  13. cdxml_toolkit/analysis/extract_nmr.py +47 -0
  14. cdxml_toolkit/analysis/format_procedure_entry.py +479 -0
  15. cdxml_toolkit/analysis/lcms_analyzer.py +1299 -0
  16. cdxml_toolkit/analysis/parse_analysis_file.py +134 -0
  17. cdxml_toolkit/cdxml_builder.py +920 -0
  18. cdxml_toolkit/cdxml_utils.py +342 -0
  19. cdxml_toolkit/chemdraw/__init__.py +5 -0
  20. cdxml_toolkit/chemdraw/_chemscript_server.py +562 -0
  21. cdxml_toolkit/chemdraw/cdx_converter.py +527 -0
  22. cdxml_toolkit/chemdraw/cdxml_to_image.py +262 -0
  23. cdxml_toolkit/chemdraw/cdxml_to_image_rdkit.py +296 -0
  24. cdxml_toolkit/chemdraw/chemscript_bridge.py +901 -0
  25. cdxml_toolkit/constants.py +304 -0
  26. cdxml_toolkit/coord_normalizer.py +438 -0
  27. cdxml_toolkit/deterministic_pipeline/__init__.py +6 -0
  28. cdxml_toolkit/deterministic_pipeline/legacy/__init__.py +5 -0
  29. cdxml_toolkit/deterministic_pipeline/legacy/eln_cdx_cleanup.py +509 -0
  30. cdxml_toolkit/deterministic_pipeline/legacy/eln_enrichment.py +1394 -0
  31. cdxml_toolkit/deterministic_pipeline/legacy/scheme_aligner.py +428 -0
  32. cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher.py +1337 -0
  33. cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher_v2.py +1340 -0
  34. cdxml_toolkit/deterministic_pipeline/scheme_reader_audit.py +931 -0
  35. cdxml_toolkit/deterministic_pipeline/scheme_reader_verify.py +1160 -0
  36. cdxml_toolkit/image/__init__.py +15 -0
  37. cdxml_toolkit/image/reaction_from_image.py +2103 -0
  38. cdxml_toolkit/image/structure_from_image.py +1711 -0
  39. cdxml_toolkit/layout/__init__.py +5 -0
  40. cdxml_toolkit/layout/alignment.py +1642 -0
  41. cdxml_toolkit/layout/reaction_cleanup.py +1002 -0
  42. cdxml_toolkit/layout/scheme_merger.py +2260 -0
  43. cdxml_toolkit/mcp_server/__init__.py +0 -0
  44. cdxml_toolkit/mcp_server/__main__.py +5 -0
  45. cdxml_toolkit/mcp_server/server.py +1567 -0
  46. cdxml_toolkit/naming/__init__.py +6 -0
  47. cdxml_toolkit/naming/aligned_namer.py +2342 -0
  48. cdxml_toolkit/naming/mol_builder.py +3722 -0
  49. cdxml_toolkit/naming/name_decomposer.py +2843 -0
  50. cdxml_toolkit/naming/reactions_datamol.json +2414 -0
  51. cdxml_toolkit/office/__init__.py +5 -0
  52. cdxml_toolkit/office/doc_from_template.py +722 -0
  53. cdxml_toolkit/office/ole_embedder.py +808 -0
  54. cdxml_toolkit/office/ole_extractor.py +272 -0
  55. cdxml_toolkit/perception/__init__.py +10 -0
  56. cdxml_toolkit/perception/compound_search.py +229 -0
  57. cdxml_toolkit/perception/eln_csv_parser.py +240 -0
  58. cdxml_toolkit/perception/rdf_parser.py +664 -0
  59. cdxml_toolkit/perception/reactant_heuristic.py +1045 -0
  60. cdxml_toolkit/perception/reaction_parser.py +2150 -0
  61. cdxml_toolkit/perception/scheme_reader.py +2948 -0
  62. cdxml_toolkit/perception/scheme_refine.py +1404 -0
  63. cdxml_toolkit/perception/scheme_segmenter.py +619 -0
  64. cdxml_toolkit/perception/spatial_assignment.py +1013 -0
  65. cdxml_toolkit/rdkit_utils.py +605 -0
  66. cdxml_toolkit/render/__init__.py +17 -0
  67. cdxml_toolkit/render/auto_layout.py +229 -0
  68. cdxml_toolkit/render/compact_parser.py +632 -0
  69. cdxml_toolkit/render/parser.py +706 -0
  70. cdxml_toolkit/render/render_scheme.py +267 -0
  71. cdxml_toolkit/render/renderer.py +2387 -0
  72. cdxml_toolkit/render/schema.py +90 -0
  73. cdxml_toolkit/render/scheme_maker.py +1043 -0
  74. cdxml_toolkit/render/scheme_yaml_writer.py +1487 -0
  75. cdxml_toolkit/resolve/__init__.py +13 -0
  76. cdxml_toolkit/resolve/cas_resolver.py +430 -0
  77. cdxml_toolkit/resolve/chemscanner_abbreviations.json +28813 -0
  78. cdxml_toolkit/resolve/condensed_formula.py +493 -0
  79. cdxml_toolkit/resolve/jre_manager.py +195 -0
  80. cdxml_toolkit/resolve/reagent_abbreviations.json +1046 -0
  81. cdxml_toolkit/resolve/reagent_db.py +285 -0
  82. cdxml_toolkit/resolve/superatom_data.json +2856 -0
  83. cdxml_toolkit/resolve/superatom_table.py +146 -0
  84. cdxml_toolkit/text_formatting.py +298 -0
  85. cdxml_toolkit-0.5.0.dist-info/METADATA +318 -0
  86. cdxml_toolkit-0.5.0.dist-info/RECORD +91 -0
  87. cdxml_toolkit-0.5.0.dist-info/WHEEL +5 -0
  88. cdxml_toolkit-0.5.0.dist-info/entry_points.txt +17 -0
  89. cdxml_toolkit-0.5.0.dist-info/licenses/LICENSE +21 -0
  90. cdxml_toolkit-0.5.0.dist-info/licenses/NOTICE.md +37 -0
  91. cdxml_toolkit-0.5.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,562 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ ChemScript 32-bit subprocess server.
4
+
5
+ This script runs under the 32-bit chemscript32 conda environment and provides
6
+ JSON-based RPC access to the ChemScript .NET DLL. It reads JSON commands from
7
+ stdin (one per line) and writes JSON responses to stdout.
8
+
9
+ NOT intended for direct use — called by chemscript_bridge.py.
10
+ """
11
+
12
+ import json
13
+ import os
14
+ import sys
15
+ import traceback
16
+
17
+ # ---------------------------------------------------------------------------
18
+ # Bootstrap: load .NET runtime and ChemScript DLL
19
+ # ---------------------------------------------------------------------------
20
+
21
+ # Accept --dll-dir and --assembly from chemscript_bridge.py to support
22
+ # different ChemDraw versions (e.g. ChemOffice2015 vs ChemOffice2016).
23
+ _dll_dir_arg = None
24
+ _assembly_arg = None
25
+ _remaining = []
26
+ _args = sys.argv[1:]
27
+ _i = 0
28
+ while _i < len(_args):
29
+ if _args[_i] == "--dll-dir" and _i + 1 < len(_args):
30
+ _dll_dir_arg = _args[_i + 1]
31
+ _i += 2
32
+ elif _args[_i] == "--assembly" and _i + 1 < len(_args):
33
+ _assembly_arg = _args[_i + 1]
34
+ _i += 2
35
+ else:
36
+ _remaining.append(_args[_i])
37
+ _i += 1
38
+ sys.argv = [sys.argv[0]] + _remaining
39
+
40
+ DLL_DIR = _dll_dir_arg or os.environ.get("CHEMSCRIPT_DLL_DIR") or os.path.join(
41
+ os.environ.get("PROGRAMFILES(X86)", r"C:\Program Files (x86)"),
42
+ "PerkinElmerInformatics", "ChemOffice2016", "ChemScript", "Lib", "Net",
43
+ )
44
+
45
+ ASSEMBLY = _assembly_arg or os.environ.get("CHEMSCRIPT_ASSEMBLY") or "CambridgeSoft.ChemScript16"
46
+
47
+ # Suppress the ChemScript welcome banner (goes to stderr)
48
+ _real_stderr = sys.stderr
49
+ sys.stderr = open(os.devnull, "w")
50
+
51
+ # Add DLL_DIR to Python path for the managed assembly (.NET DLL)
52
+ sys.path.insert(0, DLL_DIR)
53
+
54
+ # Also add DLL_DIR (and its parent) to the Windows PATH so the native
55
+ # ChemScript engine DLL (e.g. ChemScript160.dll) can be found at runtime.
56
+ # When DLLs are bundled in a flat directory (portable deployment), both the
57
+ # managed and native DLLs live in the same folder.
58
+ _dll_parent = os.path.dirname(DLL_DIR.rstrip(os.sep))
59
+ _extra_paths = os.pathsep.join(p for p in [DLL_DIR, _dll_parent] if os.path.isdir(p))
60
+ os.environ["PATH"] = _extra_paths + os.pathsep + os.environ.get("PATH", "")
61
+
62
+ from pythonnet import load as _load_runtime
63
+
64
+ _load_runtime("netfx")
65
+ import clr
66
+
67
+ clr.AddReference(ASSEMBLY)
68
+ _cs_module = __import__(ASSEMBLY, fromlist=["StructureData", "ReactionData"])
69
+ StructureData = _cs_module.StructureData
70
+ ReactionData = _cs_module.ReactionData
71
+
72
+ # Restore stderr
73
+ sys.stderr = _real_stderr
74
+
75
+ # ---------------------------------------------------------------------------
76
+ # Helpers
77
+ # ---------------------------------------------------------------------------
78
+
79
+ # Extension-to-mimetype mapping for WriteFile when format isn't obvious
80
+ EXT_MIME = {
81
+ ".cdxml": "text/xml",
82
+ ".cdx": "chemical/x-cdx",
83
+ ".mol": "chemical/x-mdl-molfile",
84
+ ".sdf": "chemical/x-mdl-molfile",
85
+ ".rxn": "chemical/x-mdl-rxn",
86
+ ".smi": "chemical/x-smiles",
87
+ ".smiles": "chemical/x-smiles",
88
+ ".inchi": "chemical/x-inchi",
89
+ }
90
+
91
+ # Short alias → full mimetype
92
+ MIME_ALIASES = {
93
+ "cdxml": "text/xml",
94
+ "cdx": "chemical/x-cdx",
95
+ "smiles": "chemical/x-smiles",
96
+ "smi": "chemical/x-smiles",
97
+ "inchi": "chemical/x-inchi",
98
+ "name": "chemical/x-name",
99
+ "mol": "chemical/x-mdl-molfile",
100
+ "molv3": "chemical/x-mdl-molfile-v3000",
101
+ "rxn": "chemical/x-mdl-rxn",
102
+ "rxnv3": "chemical/x-mdl-rxn-v3000",
103
+ "cml": "chemical/x-cml",
104
+ }
105
+
106
+
107
+ def resolve_mime(fmt: str) -> str:
108
+ """Resolve a short alias or extension to a full mimetype."""
109
+ if "/" in fmt:
110
+ return fmt
111
+ return MIME_ALIASES.get(fmt.lower().lstrip("."), fmt)
112
+
113
+
114
+ def _load_structure(source: str, fmt: str = None) -> StructureData:
115
+ """Load a StructureData from file path or data string."""
116
+ if os.path.isfile(source):
117
+ if fmt:
118
+ m = StructureData()
119
+ m.ReadFile(source)
120
+ return m
121
+ return StructureData.LoadFile(source)
122
+ else:
123
+ mime = resolve_mime(fmt) if fmt else None
124
+ if mime:
125
+ return StructureData.LoadData(source, mime)
126
+ return StructureData.LoadData(source)
127
+
128
+
129
+ def _load_reaction(source: str, fmt: str = None):
130
+ """Load a ReactionData from file path or data string."""
131
+ if os.path.isfile(source):
132
+ return ReactionData.LoadFile(source)
133
+ else:
134
+ mime = resolve_mime(fmt) if fmt else "chemical/x-smiles"
135
+ return ReactionData.LoadData(source, mime)
136
+
137
+
138
+ # ---------------------------------------------------------------------------
139
+ # Command handlers
140
+ # ---------------------------------------------------------------------------
141
+
142
+
143
+ def cmd_convert(args: dict) -> dict:
144
+ """Convert a file from one format to another."""
145
+ input_path = args["input"]
146
+ output_path = args["output"]
147
+
148
+ # Try loading as structure first, then as reaction
149
+ m = StructureData.LoadFile(input_path)
150
+ if m is not None:
151
+ m.WriteFile(output_path)
152
+ return {"ok": True, "type": "structure", "formula": m.Formula()}
153
+
154
+ r = ReactionData.LoadFile(input_path)
155
+ if r is not None:
156
+ r.WriteFile(output_path)
157
+ return {"ok": True, "type": "reaction", "formula": r.Formula()}
158
+
159
+ return {"ok": False, "error": f"Could not load: {input_path}"}
160
+
161
+
162
+ def cmd_name_to_cdxml(args: dict) -> dict:
163
+ """Convert a chemical name to CDXML string."""
164
+ name = args["name"]
165
+ m = StructureData.LoadData(name, "chemical/x-name")
166
+ if m is None:
167
+ return {"ok": False, "error": f"Could not resolve name: {name}"}
168
+ m.CleanupStructure()
169
+ cdxml = m.WriteData("text/xml")
170
+ smiles = m.WriteData("chemical/x-smiles")
171
+ formula = m.Formula()
172
+ output = args.get("output")
173
+ if output:
174
+ m.WriteFile(output)
175
+ return {"ok": True, "cdxml": cdxml, "smiles": smiles, "formula": formula}
176
+
177
+
178
+ def cmd_smiles_to_cdxml(args: dict) -> dict:
179
+ """Convert a SMILES string to CDXML."""
180
+ smi = args["smiles"]
181
+ m = StructureData.LoadData(smi, "chemical/x-smiles")
182
+ if m is None:
183
+ return {"ok": False, "error": f"Could not parse SMILES: {smi}"}
184
+ m.CleanupStructure()
185
+ cdxml = m.WriteData("text/xml")
186
+ formula = m.Formula()
187
+ output = args.get("output")
188
+ if output:
189
+ m.WriteFile(output)
190
+ return {"ok": True, "cdxml": cdxml, "smiles": smi, "formula": formula}
191
+
192
+
193
+ def cmd_cleanup(args: dict) -> dict:
194
+ """Clean up a structure file (normalize coordinates, bond lengths)."""
195
+ input_path = args["input"]
196
+ output_path = args.get("output", input_path)
197
+ m = StructureData.LoadFile(input_path)
198
+ if m is None:
199
+ return {"ok": False, "error": f"Could not load: {input_path}"}
200
+ m.CleanupStructure()
201
+ m.WriteFile(output_path)
202
+ return {"ok": True, "formula": m.Formula()}
203
+
204
+
205
+ def cmd_get_info(args: dict) -> dict:
206
+ """Get chemical information about a structure file or string."""
207
+ source = args["source"]
208
+ fmt = args.get("format")
209
+
210
+ # Try as structure
211
+ m = _load_structure(source, fmt)
212
+ if m is not None:
213
+ result = {
214
+ "ok": True,
215
+ "type": "structure",
216
+ "formula": m.Formula(),
217
+ "smiles": m.WriteData("chemical/x-smiles"),
218
+ }
219
+ try:
220
+ result["name"] = m.ChemicalName()
221
+ except Exception:
222
+ result["name"] = None
223
+ try:
224
+ result["inchi"] = m.WriteData("chemical/x-inchi")
225
+ except Exception:
226
+ result["inchi"] = None
227
+
228
+ # Count atoms and bonds
229
+ atom_count = 0
230
+ bond_count = 0
231
+ for _ in m.Atoms:
232
+ atom_count += 1
233
+ for _ in m.Bonds:
234
+ bond_count += 1
235
+ result["atom_count"] = atom_count
236
+ result["bond_count"] = bond_count
237
+ return result
238
+
239
+ # Try as reaction
240
+ r = _load_reaction(source, fmt)
241
+ if r is not None:
242
+ reactants = []
243
+ for rct in r.Reactants:
244
+ info = {"smiles": rct.WriteData("chemical/x-smiles"), "formula": rct.Formula()}
245
+ try:
246
+ info["name"] = rct.ChemicalName()
247
+ except Exception:
248
+ info["name"] = None
249
+ reactants.append(info)
250
+ products = []
251
+ for prod in r.Products:
252
+ info = {"smiles": prod.WriteData("chemical/x-smiles"), "formula": prod.Formula()}
253
+ try:
254
+ info["name"] = prod.ChemicalName()
255
+ except Exception:
256
+ info["name"] = None
257
+ products.append(info)
258
+ return {
259
+ "ok": True,
260
+ "type": "reaction",
261
+ "formula": r.Formula(),
262
+ "reactants": reactants,
263
+ "products": products,
264
+ }
265
+
266
+ return {"ok": False, "error": f"Could not load: {source}"}
267
+
268
+
269
+ def cmd_contains_substructure(args: dict) -> dict:
270
+ """Check if target contains query substructure."""
271
+ target = _load_structure(args["target"], args.get("target_format"))
272
+ query = _load_structure(args["query"], args.get("query_format"))
273
+ if target is None:
274
+ return {"ok": False, "error": f"Could not load target: {args['target']}"}
275
+ if query is None:
276
+ return {"ok": False, "error": f"Could not load query: {args['query']}"}
277
+ result = target.ContainsSubstructure(query)
278
+ return {"ok": True, "contains": bool(result)}
279
+
280
+
281
+ def cmd_substructure_search(args: dict) -> dict:
282
+ """Perform atom-by-atom substructure search."""
283
+ target = _load_structure(args["target"], args.get("target_format"))
284
+ query = _load_structure(args["query"], args.get("query_format"))
285
+ if target is None:
286
+ return {"ok": False, "error": f"Could not load target: {args['target']}"}
287
+ if query is None:
288
+ return {"ok": False, "error": f"Could not load query: {args['query']}"}
289
+
290
+ maps = query.AtomByAtomSearch(target)
291
+ all_maps = []
292
+ for atom_map in maps:
293
+ mapping = {}
294
+ for atom in atom_map.Keys:
295
+ mapping[atom.Name] = atom_map[atom].Name
296
+ all_maps.append(mapping)
297
+ return {"ok": True, "contains": len(all_maps) > 0, "maps": all_maps}
298
+
299
+
300
+ def cmd_get_name(args: dict) -> dict:
301
+ """Get IUPAC name for a structure."""
302
+ m = _load_structure(args["source"], args.get("format"))
303
+ if m is None:
304
+ return {"ok": False, "error": f"Could not load: {args['source']}"}
305
+ try:
306
+ name = m.ChemicalName()
307
+ return {"ok": True, "name": name}
308
+ except Exception as e:
309
+ return {"ok": False, "error": str(e)}
310
+
311
+
312
+ def cmd_get_formula(args: dict) -> dict:
313
+ """Get molecular formula for a structure."""
314
+ m = _load_structure(args["source"], args.get("format"))
315
+ if m is None:
316
+ return {"ok": False, "error": f"Could not load: {args['source']}"}
317
+ return {"ok": True, "formula": m.Formula()}
318
+
319
+
320
+ def cmd_write_data(args: dict) -> dict:
321
+ """Convert a structure to a specific format string."""
322
+ m = _load_structure(args["source"], args.get("source_format"))
323
+ if m is None:
324
+ return {"ok": False, "error": f"Could not load: {args['source']}"}
325
+ mime = resolve_mime(args["target_format"])
326
+ data = m.WriteData(mime)
327
+ return {"ok": True, "data": data}
328
+
329
+
330
+ def cmd_load_reaction(args: dict) -> dict:
331
+ """Load a reaction and return component information."""
332
+ r = _load_reaction(args["source"], args.get("format"))
333
+ if r is None:
334
+ return {"ok": False, "error": f"Could not load reaction: {args['source']}"}
335
+
336
+ reactants = []
337
+ for rct in r.Reactants:
338
+ info = {
339
+ "smiles": rct.WriteData("chemical/x-smiles"),
340
+ "formula": rct.Formula(),
341
+ }
342
+ try:
343
+ info["name"] = rct.ChemicalName()
344
+ except Exception:
345
+ info["name"] = None
346
+ if args.get("include_cdxml"):
347
+ rct.CleanupStructure()
348
+ info["cdxml"] = rct.WriteData("text/xml")
349
+ reactants.append(info)
350
+
351
+ products = []
352
+ for prod in r.Products:
353
+ info = {
354
+ "smiles": prod.WriteData("chemical/x-smiles"),
355
+ "formula": prod.Formula(),
356
+ }
357
+ try:
358
+ info["name"] = prod.ChemicalName()
359
+ except Exception:
360
+ info["name"] = None
361
+ if args.get("include_cdxml"):
362
+ prod.CleanupStructure()
363
+ info["cdxml"] = prod.WriteData("text/xml")
364
+ products.append(info)
365
+
366
+ result = {
367
+ "ok": True,
368
+ "formula": r.Formula(),
369
+ "reactants": reactants,
370
+ "products": products,
371
+ }
372
+
373
+ output = args.get("output")
374
+ if output:
375
+ r.WriteFile(output)
376
+
377
+ return result
378
+
379
+
380
+ def cmd_largest_common_substructure(args: dict) -> dict:
381
+ """Find the largest common substructure between two molecules."""
382
+ from CambridgeSoft.ChemScript16 import LargestCommonSubstructure
383
+
384
+ m1 = _load_structure(args["mol1"], args.get("mol1_format"))
385
+ m2 = _load_structure(args["mol2"], args.get("mol2_format"))
386
+ if m1 is None:
387
+ return {"ok": False, "error": f"Could not load mol1: {args['mol1']}"}
388
+ if m2 is None:
389
+ return {"ok": False, "error": f"Could not load mol2: {args['mol2']}"}
390
+
391
+ common = LargestCommonSubstructure.Compute(m1, m2)
392
+ if common is None:
393
+ return {"ok": True, "atom_map": []}
394
+
395
+ atom_map1 = common.AtomMapM1()
396
+ atom_map2 = common.AtomMapM2()
397
+ mapping = []
398
+ for atom in atom_map1.Keys:
399
+ mapping.append({
400
+ "common": atom.Name,
401
+ "mol1": atom_map1[atom].Name,
402
+ "mol2": atom_map2[atom].Name,
403
+ })
404
+ return {"ok": True, "atom_map": mapping, "common_atom_count": len(mapping)}
405
+
406
+
407
+ def cmd_overlay(args: dict) -> dict:
408
+ """Overlay (2D-align) a molecule onto a reference molecule.
409
+
410
+ Args:
411
+ source: CDXML string or file path of the molecule to align.
412
+ target: CDXML string or file path of the reference molecule.
413
+ source_format: optional format hint for source (default: auto).
414
+ target_format: optional format hint for target (default: auto).
415
+
416
+ Returns:
417
+ aligned_cdxml: CDXML string of the aligned molecule.
418
+ success: whether the overlay succeeded.
419
+ """
420
+ source = args["source"]
421
+ target = args["target"]
422
+ src_fmt = args.get("source_format")
423
+ tgt_fmt = args.get("target_format")
424
+
425
+ m = _load_structure(source, src_fmt)
426
+ if m is None:
427
+ return {"ok": False, "error": "Could not load source structure"}
428
+ t = _load_structure(target, tgt_fmt)
429
+ if t is None:
430
+ return {"ok": False, "error": "Could not load target structure"}
431
+
432
+ success = bool(m.Overlay(t))
433
+ aligned_cdxml = m.WriteData("text/xml")
434
+ return {"ok": True, "aligned_cdxml": aligned_cdxml, "success": success}
435
+
436
+
437
+ def cmd_substructure_align(args: dict) -> dict:
438
+ """Align a query (small molecule) to its substructure match in a target.
439
+
440
+ Uses ChemScript to convert both structures to SMILES, then returns
441
+ the SMILES + target CDXML so the caller can do substructure matching
442
+ (e.g. via RDKit) to find the atom mapping.
443
+
444
+ This avoids the ChemScript atom-name-mismatch problem entirely.
445
+
446
+ Args:
447
+ query: CDXML string or file path of the small molecule (reagent).
448
+ target: CDXML string or file path of the large molecule (product).
449
+
450
+ Returns:
451
+ ok, contains, query_smiles, target_smiles, target_cdxml
452
+ """
453
+ target = _load_structure(args["target"], args.get("target_format"))
454
+ query = _load_structure(args["query"], args.get("query_format"))
455
+ if target is None:
456
+ return {"ok": False, "error": "Could not load target"}
457
+ if query is None:
458
+ return {"ok": False, "error": "Could not load query"}
459
+
460
+ # Check if query is a substructure of target
461
+ maps = query.AtomByAtomSearch(target)
462
+ contains = bool(maps and len(maps) > 0)
463
+
464
+ # Always return MOL blocks + CDXML (caller may need them for MCS fallback)
465
+ query_mol = query.WriteData("chemical/x-mdl-molfile")
466
+ target_mol = target.WriteData("chemical/x-mdl-molfile")
467
+ target_cdxml = target.WriteData("text/xml")
468
+ query_cdxml = query.WriteData("text/xml")
469
+
470
+ return {
471
+ "ok": True,
472
+ "contains": contains,
473
+ "query_mol": query_mol,
474
+ "target_mol": target_mol,
475
+ "target_cdxml": target_cdxml,
476
+ "query_cdxml": query_cdxml,
477
+ }
478
+
479
+
480
+ def cmd_mimetypes(args: dict) -> dict:
481
+ """List all supported mimetypes."""
482
+ types = list(StructureData.MimeTypes())
483
+ return {"ok": True, "mimetypes": types}
484
+
485
+
486
+ def cmd_ping(args: dict) -> dict:
487
+ """Health check."""
488
+ return {"ok": True, "message": "ChemScript server ready"}
489
+
490
+
491
+ # ---------------------------------------------------------------------------
492
+ # Dispatch table
493
+ # ---------------------------------------------------------------------------
494
+
495
+ COMMANDS = {
496
+ "ping": cmd_ping,
497
+ "convert": cmd_convert,
498
+ "name_to_cdxml": cmd_name_to_cdxml,
499
+ "smiles_to_cdxml": cmd_smiles_to_cdxml,
500
+ "cleanup": cmd_cleanup,
501
+ "get_info": cmd_get_info,
502
+ "get_name": cmd_get_name,
503
+ "get_formula": cmd_get_formula,
504
+ "contains_substructure": cmd_contains_substructure,
505
+ "substructure_search": cmd_substructure_search,
506
+ "write_data": cmd_write_data,
507
+ "load_reaction": cmd_load_reaction,
508
+ "largest_common_substructure": cmd_largest_common_substructure,
509
+ "overlay": cmd_overlay,
510
+ "substructure_align": cmd_substructure_align,
511
+ "mimetypes": cmd_mimetypes,
512
+ }
513
+
514
+ # ---------------------------------------------------------------------------
515
+ # Main loop — reads JSON lines from stdin, writes JSON lines to stdout
516
+ # ---------------------------------------------------------------------------
517
+
518
+
519
+ def main():
520
+ # Signal readiness
521
+ sys.stdout.write(json.dumps({"ready": True}) + "\n")
522
+ sys.stdout.flush()
523
+
524
+ for line in sys.stdin:
525
+ line = line.strip()
526
+ if not line:
527
+ continue
528
+ try:
529
+ request = json.loads(line)
530
+ except json.JSONDecodeError as e:
531
+ response = {"ok": False, "error": f"Invalid JSON: {e}"}
532
+ sys.stdout.write(json.dumps(response) + "\n")
533
+ sys.stdout.flush()
534
+ continue
535
+
536
+ cmd = request.get("cmd")
537
+ args = request.get("args", {})
538
+
539
+ if cmd == "quit":
540
+ sys.stdout.write(json.dumps({"ok": True, "message": "bye"}) + "\n")
541
+ sys.stdout.flush()
542
+ break
543
+
544
+ handler = COMMANDS.get(cmd)
545
+ if handler is None:
546
+ response = {"ok": False, "error": f"Unknown command: {cmd}"}
547
+ else:
548
+ try:
549
+ response = handler(args)
550
+ except Exception as e:
551
+ response = {
552
+ "ok": False,
553
+ "error": str(e),
554
+ "traceback": traceback.format_exc(),
555
+ }
556
+
557
+ sys.stdout.write(json.dumps(response) + "\n")
558
+ sys.stdout.flush()
559
+
560
+
561
+ if __name__ == "__main__":
562
+ main()