cdxml-toolkit 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cdxml_toolkit/__init__.py +18 -0
- cdxml_toolkit/_jre/__init__.py +2 -0
- cdxml_toolkit/_jre/temurin-21-jre-win-x64.zip +0 -0
- cdxml_toolkit/analysis/__init__.py +35 -0
- cdxml_toolkit/analysis/deterministic/__init__.py +12 -0
- cdxml_toolkit/analysis/deterministic/discover_experiment_files.py +413 -0
- cdxml_toolkit/analysis/deterministic/lab_book_formatter.py +701 -0
- cdxml_toolkit/analysis/deterministic/lcms_file_categorizer.py +928 -0
- cdxml_toolkit/analysis/deterministic/lcms_identifier.py +598 -0
- cdxml_toolkit/analysis/deterministic/mass_resolver.py +654 -0
- cdxml_toolkit/analysis/deterministic/multi_lcms_analyzer.py +1412 -0
- cdxml_toolkit/analysis/deterministic/procedure_writer.py +446 -0
- cdxml_toolkit/analysis/extract_nmr.py +47 -0
- cdxml_toolkit/analysis/format_procedure_entry.py +479 -0
- cdxml_toolkit/analysis/lcms_analyzer.py +1299 -0
- cdxml_toolkit/analysis/parse_analysis_file.py +134 -0
- cdxml_toolkit/cdxml_builder.py +920 -0
- cdxml_toolkit/cdxml_utils.py +342 -0
- cdxml_toolkit/chemdraw/__init__.py +5 -0
- cdxml_toolkit/chemdraw/_chemscript_server.py +562 -0
- cdxml_toolkit/chemdraw/cdx_converter.py +527 -0
- cdxml_toolkit/chemdraw/cdxml_to_image.py +262 -0
- cdxml_toolkit/chemdraw/cdxml_to_image_rdkit.py +296 -0
- cdxml_toolkit/chemdraw/chemscript_bridge.py +901 -0
- cdxml_toolkit/constants.py +304 -0
- cdxml_toolkit/coord_normalizer.py +438 -0
- cdxml_toolkit/deterministic_pipeline/__init__.py +6 -0
- cdxml_toolkit/deterministic_pipeline/legacy/__init__.py +5 -0
- cdxml_toolkit/deterministic_pipeline/legacy/eln_cdx_cleanup.py +509 -0
- cdxml_toolkit/deterministic_pipeline/legacy/eln_enrichment.py +1394 -0
- cdxml_toolkit/deterministic_pipeline/legacy/scheme_aligner.py +428 -0
- cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher.py +1337 -0
- cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher_v2.py +1340 -0
- cdxml_toolkit/deterministic_pipeline/scheme_reader_audit.py +931 -0
- cdxml_toolkit/deterministic_pipeline/scheme_reader_verify.py +1160 -0
- cdxml_toolkit/image/__init__.py +15 -0
- cdxml_toolkit/image/reaction_from_image.py +2103 -0
- cdxml_toolkit/image/structure_from_image.py +1711 -0
- cdxml_toolkit/layout/__init__.py +5 -0
- cdxml_toolkit/layout/alignment.py +1642 -0
- cdxml_toolkit/layout/reaction_cleanup.py +1002 -0
- cdxml_toolkit/layout/scheme_merger.py +2260 -0
- cdxml_toolkit/mcp_server/__init__.py +0 -0
- cdxml_toolkit/mcp_server/__main__.py +5 -0
- cdxml_toolkit/mcp_server/server.py +1567 -0
- cdxml_toolkit/naming/__init__.py +6 -0
- cdxml_toolkit/naming/aligned_namer.py +2342 -0
- cdxml_toolkit/naming/mol_builder.py +3722 -0
- cdxml_toolkit/naming/name_decomposer.py +2843 -0
- cdxml_toolkit/naming/reactions_datamol.json +2414 -0
- cdxml_toolkit/office/__init__.py +5 -0
- cdxml_toolkit/office/doc_from_template.py +722 -0
- cdxml_toolkit/office/ole_embedder.py +808 -0
- cdxml_toolkit/office/ole_extractor.py +272 -0
- cdxml_toolkit/perception/__init__.py +10 -0
- cdxml_toolkit/perception/compound_search.py +229 -0
- cdxml_toolkit/perception/eln_csv_parser.py +240 -0
- cdxml_toolkit/perception/rdf_parser.py +664 -0
- cdxml_toolkit/perception/reactant_heuristic.py +1045 -0
- cdxml_toolkit/perception/reaction_parser.py +2150 -0
- cdxml_toolkit/perception/scheme_reader.py +2948 -0
- cdxml_toolkit/perception/scheme_refine.py +1404 -0
- cdxml_toolkit/perception/scheme_segmenter.py +619 -0
- cdxml_toolkit/perception/spatial_assignment.py +1013 -0
- cdxml_toolkit/rdkit_utils.py +605 -0
- cdxml_toolkit/render/__init__.py +17 -0
- cdxml_toolkit/render/auto_layout.py +229 -0
- cdxml_toolkit/render/compact_parser.py +632 -0
- cdxml_toolkit/render/parser.py +706 -0
- cdxml_toolkit/render/render_scheme.py +267 -0
- cdxml_toolkit/render/renderer.py +2387 -0
- cdxml_toolkit/render/schema.py +90 -0
- cdxml_toolkit/render/scheme_maker.py +1043 -0
- cdxml_toolkit/render/scheme_yaml_writer.py +1487 -0
- cdxml_toolkit/resolve/__init__.py +13 -0
- cdxml_toolkit/resolve/cas_resolver.py +430 -0
- cdxml_toolkit/resolve/chemscanner_abbreviations.json +28813 -0
- cdxml_toolkit/resolve/condensed_formula.py +493 -0
- cdxml_toolkit/resolve/jre_manager.py +195 -0
- cdxml_toolkit/resolve/reagent_abbreviations.json +1046 -0
- cdxml_toolkit/resolve/reagent_db.py +285 -0
- cdxml_toolkit/resolve/superatom_data.json +2856 -0
- cdxml_toolkit/resolve/superatom_table.py +146 -0
- cdxml_toolkit/text_formatting.py +298 -0
- cdxml_toolkit-0.5.0.dist-info/METADATA +318 -0
- cdxml_toolkit-0.5.0.dist-info/RECORD +91 -0
- cdxml_toolkit-0.5.0.dist-info/WHEEL +5 -0
- cdxml_toolkit-0.5.0.dist-info/entry_points.txt +17 -0
- cdxml_toolkit-0.5.0.dist-info/licenses/LICENSE +21 -0
- cdxml_toolkit-0.5.0.dist-info/licenses/NOTICE.md +37 -0
- cdxml_toolkit-0.5.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,562 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
ChemScript 32-bit subprocess server.
|
|
4
|
+
|
|
5
|
+
This script runs under the 32-bit chemscript32 conda environment and provides
|
|
6
|
+
JSON-based RPC access to the ChemScript .NET DLL. It reads JSON commands from
|
|
7
|
+
stdin (one per line) and writes JSON responses to stdout.
|
|
8
|
+
|
|
9
|
+
NOT intended for direct use — called by chemscript_bridge.py.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import json
|
|
13
|
+
import os
|
|
14
|
+
import sys
|
|
15
|
+
import traceback
|
|
16
|
+
|
|
17
|
+
# ---------------------------------------------------------------------------
|
|
18
|
+
# Bootstrap: load .NET runtime and ChemScript DLL
|
|
19
|
+
# ---------------------------------------------------------------------------
|
|
20
|
+
|
|
21
|
+
# Accept --dll-dir and --assembly from chemscript_bridge.py to support
|
|
22
|
+
# different ChemDraw versions (e.g. ChemOffice2015 vs ChemOffice2016).
|
|
23
|
+
_dll_dir_arg = None
|
|
24
|
+
_assembly_arg = None
|
|
25
|
+
_remaining = []
|
|
26
|
+
_args = sys.argv[1:]
|
|
27
|
+
_i = 0
|
|
28
|
+
while _i < len(_args):
|
|
29
|
+
if _args[_i] == "--dll-dir" and _i + 1 < len(_args):
|
|
30
|
+
_dll_dir_arg = _args[_i + 1]
|
|
31
|
+
_i += 2
|
|
32
|
+
elif _args[_i] == "--assembly" and _i + 1 < len(_args):
|
|
33
|
+
_assembly_arg = _args[_i + 1]
|
|
34
|
+
_i += 2
|
|
35
|
+
else:
|
|
36
|
+
_remaining.append(_args[_i])
|
|
37
|
+
_i += 1
|
|
38
|
+
sys.argv = [sys.argv[0]] + _remaining
|
|
39
|
+
|
|
40
|
+
DLL_DIR = _dll_dir_arg or os.environ.get("CHEMSCRIPT_DLL_DIR") or os.path.join(
|
|
41
|
+
os.environ.get("PROGRAMFILES(X86)", r"C:\Program Files (x86)"),
|
|
42
|
+
"PerkinElmerInformatics", "ChemOffice2016", "ChemScript", "Lib", "Net",
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
ASSEMBLY = _assembly_arg or os.environ.get("CHEMSCRIPT_ASSEMBLY") or "CambridgeSoft.ChemScript16"
|
|
46
|
+
|
|
47
|
+
# Suppress the ChemScript welcome banner (goes to stderr)
|
|
48
|
+
_real_stderr = sys.stderr
|
|
49
|
+
sys.stderr = open(os.devnull, "w")
|
|
50
|
+
|
|
51
|
+
# Add DLL_DIR to Python path for the managed assembly (.NET DLL)
|
|
52
|
+
sys.path.insert(0, DLL_DIR)
|
|
53
|
+
|
|
54
|
+
# Also add DLL_DIR (and its parent) to the Windows PATH so the native
|
|
55
|
+
# ChemScript engine DLL (e.g. ChemScript160.dll) can be found at runtime.
|
|
56
|
+
# When DLLs are bundled in a flat directory (portable deployment), both the
|
|
57
|
+
# managed and native DLLs live in the same folder.
|
|
58
|
+
_dll_parent = os.path.dirname(DLL_DIR.rstrip(os.sep))
|
|
59
|
+
_extra_paths = os.pathsep.join(p for p in [DLL_DIR, _dll_parent] if os.path.isdir(p))
|
|
60
|
+
os.environ["PATH"] = _extra_paths + os.pathsep + os.environ.get("PATH", "")
|
|
61
|
+
|
|
62
|
+
from pythonnet import load as _load_runtime
|
|
63
|
+
|
|
64
|
+
_load_runtime("netfx")
|
|
65
|
+
import clr
|
|
66
|
+
|
|
67
|
+
clr.AddReference(ASSEMBLY)
|
|
68
|
+
_cs_module = __import__(ASSEMBLY, fromlist=["StructureData", "ReactionData"])
|
|
69
|
+
StructureData = _cs_module.StructureData
|
|
70
|
+
ReactionData = _cs_module.ReactionData
|
|
71
|
+
|
|
72
|
+
# Restore stderr
|
|
73
|
+
sys.stderr = _real_stderr
|
|
74
|
+
|
|
75
|
+
# ---------------------------------------------------------------------------
|
|
76
|
+
# Helpers
|
|
77
|
+
# ---------------------------------------------------------------------------
|
|
78
|
+
|
|
79
|
+
# Extension-to-mimetype mapping for WriteFile when format isn't obvious
|
|
80
|
+
EXT_MIME = {
|
|
81
|
+
".cdxml": "text/xml",
|
|
82
|
+
".cdx": "chemical/x-cdx",
|
|
83
|
+
".mol": "chemical/x-mdl-molfile",
|
|
84
|
+
".sdf": "chemical/x-mdl-molfile",
|
|
85
|
+
".rxn": "chemical/x-mdl-rxn",
|
|
86
|
+
".smi": "chemical/x-smiles",
|
|
87
|
+
".smiles": "chemical/x-smiles",
|
|
88
|
+
".inchi": "chemical/x-inchi",
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
# Short alias → full mimetype
|
|
92
|
+
MIME_ALIASES = {
|
|
93
|
+
"cdxml": "text/xml",
|
|
94
|
+
"cdx": "chemical/x-cdx",
|
|
95
|
+
"smiles": "chemical/x-smiles",
|
|
96
|
+
"smi": "chemical/x-smiles",
|
|
97
|
+
"inchi": "chemical/x-inchi",
|
|
98
|
+
"name": "chemical/x-name",
|
|
99
|
+
"mol": "chemical/x-mdl-molfile",
|
|
100
|
+
"molv3": "chemical/x-mdl-molfile-v3000",
|
|
101
|
+
"rxn": "chemical/x-mdl-rxn",
|
|
102
|
+
"rxnv3": "chemical/x-mdl-rxn-v3000",
|
|
103
|
+
"cml": "chemical/x-cml",
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def resolve_mime(fmt: str) -> str:
|
|
108
|
+
"""Resolve a short alias or extension to a full mimetype."""
|
|
109
|
+
if "/" in fmt:
|
|
110
|
+
return fmt
|
|
111
|
+
return MIME_ALIASES.get(fmt.lower().lstrip("."), fmt)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _load_structure(source: str, fmt: str = None) -> StructureData:
|
|
115
|
+
"""Load a StructureData from file path or data string."""
|
|
116
|
+
if os.path.isfile(source):
|
|
117
|
+
if fmt:
|
|
118
|
+
m = StructureData()
|
|
119
|
+
m.ReadFile(source)
|
|
120
|
+
return m
|
|
121
|
+
return StructureData.LoadFile(source)
|
|
122
|
+
else:
|
|
123
|
+
mime = resolve_mime(fmt) if fmt else None
|
|
124
|
+
if mime:
|
|
125
|
+
return StructureData.LoadData(source, mime)
|
|
126
|
+
return StructureData.LoadData(source)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _load_reaction(source: str, fmt: str = None):
|
|
130
|
+
"""Load a ReactionData from file path or data string."""
|
|
131
|
+
if os.path.isfile(source):
|
|
132
|
+
return ReactionData.LoadFile(source)
|
|
133
|
+
else:
|
|
134
|
+
mime = resolve_mime(fmt) if fmt else "chemical/x-smiles"
|
|
135
|
+
return ReactionData.LoadData(source, mime)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
# ---------------------------------------------------------------------------
|
|
139
|
+
# Command handlers
|
|
140
|
+
# ---------------------------------------------------------------------------
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def cmd_convert(args: dict) -> dict:
|
|
144
|
+
"""Convert a file from one format to another."""
|
|
145
|
+
input_path = args["input"]
|
|
146
|
+
output_path = args["output"]
|
|
147
|
+
|
|
148
|
+
# Try loading as structure first, then as reaction
|
|
149
|
+
m = StructureData.LoadFile(input_path)
|
|
150
|
+
if m is not None:
|
|
151
|
+
m.WriteFile(output_path)
|
|
152
|
+
return {"ok": True, "type": "structure", "formula": m.Formula()}
|
|
153
|
+
|
|
154
|
+
r = ReactionData.LoadFile(input_path)
|
|
155
|
+
if r is not None:
|
|
156
|
+
r.WriteFile(output_path)
|
|
157
|
+
return {"ok": True, "type": "reaction", "formula": r.Formula()}
|
|
158
|
+
|
|
159
|
+
return {"ok": False, "error": f"Could not load: {input_path}"}
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def cmd_name_to_cdxml(args: dict) -> dict:
|
|
163
|
+
"""Convert a chemical name to CDXML string."""
|
|
164
|
+
name = args["name"]
|
|
165
|
+
m = StructureData.LoadData(name, "chemical/x-name")
|
|
166
|
+
if m is None:
|
|
167
|
+
return {"ok": False, "error": f"Could not resolve name: {name}"}
|
|
168
|
+
m.CleanupStructure()
|
|
169
|
+
cdxml = m.WriteData("text/xml")
|
|
170
|
+
smiles = m.WriteData("chemical/x-smiles")
|
|
171
|
+
formula = m.Formula()
|
|
172
|
+
output = args.get("output")
|
|
173
|
+
if output:
|
|
174
|
+
m.WriteFile(output)
|
|
175
|
+
return {"ok": True, "cdxml": cdxml, "smiles": smiles, "formula": formula}
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def cmd_smiles_to_cdxml(args: dict) -> dict:
|
|
179
|
+
"""Convert a SMILES string to CDXML."""
|
|
180
|
+
smi = args["smiles"]
|
|
181
|
+
m = StructureData.LoadData(smi, "chemical/x-smiles")
|
|
182
|
+
if m is None:
|
|
183
|
+
return {"ok": False, "error": f"Could not parse SMILES: {smi}"}
|
|
184
|
+
m.CleanupStructure()
|
|
185
|
+
cdxml = m.WriteData("text/xml")
|
|
186
|
+
formula = m.Formula()
|
|
187
|
+
output = args.get("output")
|
|
188
|
+
if output:
|
|
189
|
+
m.WriteFile(output)
|
|
190
|
+
return {"ok": True, "cdxml": cdxml, "smiles": smi, "formula": formula}
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def cmd_cleanup(args: dict) -> dict:
|
|
194
|
+
"""Clean up a structure file (normalize coordinates, bond lengths)."""
|
|
195
|
+
input_path = args["input"]
|
|
196
|
+
output_path = args.get("output", input_path)
|
|
197
|
+
m = StructureData.LoadFile(input_path)
|
|
198
|
+
if m is None:
|
|
199
|
+
return {"ok": False, "error": f"Could not load: {input_path}"}
|
|
200
|
+
m.CleanupStructure()
|
|
201
|
+
m.WriteFile(output_path)
|
|
202
|
+
return {"ok": True, "formula": m.Formula()}
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def cmd_get_info(args: dict) -> dict:
|
|
206
|
+
"""Get chemical information about a structure file or string."""
|
|
207
|
+
source = args["source"]
|
|
208
|
+
fmt = args.get("format")
|
|
209
|
+
|
|
210
|
+
# Try as structure
|
|
211
|
+
m = _load_structure(source, fmt)
|
|
212
|
+
if m is not None:
|
|
213
|
+
result = {
|
|
214
|
+
"ok": True,
|
|
215
|
+
"type": "structure",
|
|
216
|
+
"formula": m.Formula(),
|
|
217
|
+
"smiles": m.WriteData("chemical/x-smiles"),
|
|
218
|
+
}
|
|
219
|
+
try:
|
|
220
|
+
result["name"] = m.ChemicalName()
|
|
221
|
+
except Exception:
|
|
222
|
+
result["name"] = None
|
|
223
|
+
try:
|
|
224
|
+
result["inchi"] = m.WriteData("chemical/x-inchi")
|
|
225
|
+
except Exception:
|
|
226
|
+
result["inchi"] = None
|
|
227
|
+
|
|
228
|
+
# Count atoms and bonds
|
|
229
|
+
atom_count = 0
|
|
230
|
+
bond_count = 0
|
|
231
|
+
for _ in m.Atoms:
|
|
232
|
+
atom_count += 1
|
|
233
|
+
for _ in m.Bonds:
|
|
234
|
+
bond_count += 1
|
|
235
|
+
result["atom_count"] = atom_count
|
|
236
|
+
result["bond_count"] = bond_count
|
|
237
|
+
return result
|
|
238
|
+
|
|
239
|
+
# Try as reaction
|
|
240
|
+
r = _load_reaction(source, fmt)
|
|
241
|
+
if r is not None:
|
|
242
|
+
reactants = []
|
|
243
|
+
for rct in r.Reactants:
|
|
244
|
+
info = {"smiles": rct.WriteData("chemical/x-smiles"), "formula": rct.Formula()}
|
|
245
|
+
try:
|
|
246
|
+
info["name"] = rct.ChemicalName()
|
|
247
|
+
except Exception:
|
|
248
|
+
info["name"] = None
|
|
249
|
+
reactants.append(info)
|
|
250
|
+
products = []
|
|
251
|
+
for prod in r.Products:
|
|
252
|
+
info = {"smiles": prod.WriteData("chemical/x-smiles"), "formula": prod.Formula()}
|
|
253
|
+
try:
|
|
254
|
+
info["name"] = prod.ChemicalName()
|
|
255
|
+
except Exception:
|
|
256
|
+
info["name"] = None
|
|
257
|
+
products.append(info)
|
|
258
|
+
return {
|
|
259
|
+
"ok": True,
|
|
260
|
+
"type": "reaction",
|
|
261
|
+
"formula": r.Formula(),
|
|
262
|
+
"reactants": reactants,
|
|
263
|
+
"products": products,
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
return {"ok": False, "error": f"Could not load: {source}"}
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def cmd_contains_substructure(args: dict) -> dict:
|
|
270
|
+
"""Check if target contains query substructure."""
|
|
271
|
+
target = _load_structure(args["target"], args.get("target_format"))
|
|
272
|
+
query = _load_structure(args["query"], args.get("query_format"))
|
|
273
|
+
if target is None:
|
|
274
|
+
return {"ok": False, "error": f"Could not load target: {args['target']}"}
|
|
275
|
+
if query is None:
|
|
276
|
+
return {"ok": False, "error": f"Could not load query: {args['query']}"}
|
|
277
|
+
result = target.ContainsSubstructure(query)
|
|
278
|
+
return {"ok": True, "contains": bool(result)}
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def cmd_substructure_search(args: dict) -> dict:
|
|
282
|
+
"""Perform atom-by-atom substructure search."""
|
|
283
|
+
target = _load_structure(args["target"], args.get("target_format"))
|
|
284
|
+
query = _load_structure(args["query"], args.get("query_format"))
|
|
285
|
+
if target is None:
|
|
286
|
+
return {"ok": False, "error": f"Could not load target: {args['target']}"}
|
|
287
|
+
if query is None:
|
|
288
|
+
return {"ok": False, "error": f"Could not load query: {args['query']}"}
|
|
289
|
+
|
|
290
|
+
maps = query.AtomByAtomSearch(target)
|
|
291
|
+
all_maps = []
|
|
292
|
+
for atom_map in maps:
|
|
293
|
+
mapping = {}
|
|
294
|
+
for atom in atom_map.Keys:
|
|
295
|
+
mapping[atom.Name] = atom_map[atom].Name
|
|
296
|
+
all_maps.append(mapping)
|
|
297
|
+
return {"ok": True, "contains": len(all_maps) > 0, "maps": all_maps}
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
def cmd_get_name(args: dict) -> dict:
|
|
301
|
+
"""Get IUPAC name for a structure."""
|
|
302
|
+
m = _load_structure(args["source"], args.get("format"))
|
|
303
|
+
if m is None:
|
|
304
|
+
return {"ok": False, "error": f"Could not load: {args['source']}"}
|
|
305
|
+
try:
|
|
306
|
+
name = m.ChemicalName()
|
|
307
|
+
return {"ok": True, "name": name}
|
|
308
|
+
except Exception as e:
|
|
309
|
+
return {"ok": False, "error": str(e)}
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
def cmd_get_formula(args: dict) -> dict:
|
|
313
|
+
"""Get molecular formula for a structure."""
|
|
314
|
+
m = _load_structure(args["source"], args.get("format"))
|
|
315
|
+
if m is None:
|
|
316
|
+
return {"ok": False, "error": f"Could not load: {args['source']}"}
|
|
317
|
+
return {"ok": True, "formula": m.Formula()}
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
def cmd_write_data(args: dict) -> dict:
|
|
321
|
+
"""Convert a structure to a specific format string."""
|
|
322
|
+
m = _load_structure(args["source"], args.get("source_format"))
|
|
323
|
+
if m is None:
|
|
324
|
+
return {"ok": False, "error": f"Could not load: {args['source']}"}
|
|
325
|
+
mime = resolve_mime(args["target_format"])
|
|
326
|
+
data = m.WriteData(mime)
|
|
327
|
+
return {"ok": True, "data": data}
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def cmd_load_reaction(args: dict) -> dict:
|
|
331
|
+
"""Load a reaction and return component information."""
|
|
332
|
+
r = _load_reaction(args["source"], args.get("format"))
|
|
333
|
+
if r is None:
|
|
334
|
+
return {"ok": False, "error": f"Could not load reaction: {args['source']}"}
|
|
335
|
+
|
|
336
|
+
reactants = []
|
|
337
|
+
for rct in r.Reactants:
|
|
338
|
+
info = {
|
|
339
|
+
"smiles": rct.WriteData("chemical/x-smiles"),
|
|
340
|
+
"formula": rct.Formula(),
|
|
341
|
+
}
|
|
342
|
+
try:
|
|
343
|
+
info["name"] = rct.ChemicalName()
|
|
344
|
+
except Exception:
|
|
345
|
+
info["name"] = None
|
|
346
|
+
if args.get("include_cdxml"):
|
|
347
|
+
rct.CleanupStructure()
|
|
348
|
+
info["cdxml"] = rct.WriteData("text/xml")
|
|
349
|
+
reactants.append(info)
|
|
350
|
+
|
|
351
|
+
products = []
|
|
352
|
+
for prod in r.Products:
|
|
353
|
+
info = {
|
|
354
|
+
"smiles": prod.WriteData("chemical/x-smiles"),
|
|
355
|
+
"formula": prod.Formula(),
|
|
356
|
+
}
|
|
357
|
+
try:
|
|
358
|
+
info["name"] = prod.ChemicalName()
|
|
359
|
+
except Exception:
|
|
360
|
+
info["name"] = None
|
|
361
|
+
if args.get("include_cdxml"):
|
|
362
|
+
prod.CleanupStructure()
|
|
363
|
+
info["cdxml"] = prod.WriteData("text/xml")
|
|
364
|
+
products.append(info)
|
|
365
|
+
|
|
366
|
+
result = {
|
|
367
|
+
"ok": True,
|
|
368
|
+
"formula": r.Formula(),
|
|
369
|
+
"reactants": reactants,
|
|
370
|
+
"products": products,
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
output = args.get("output")
|
|
374
|
+
if output:
|
|
375
|
+
r.WriteFile(output)
|
|
376
|
+
|
|
377
|
+
return result
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
def cmd_largest_common_substructure(args: dict) -> dict:
|
|
381
|
+
"""Find the largest common substructure between two molecules."""
|
|
382
|
+
from CambridgeSoft.ChemScript16 import LargestCommonSubstructure
|
|
383
|
+
|
|
384
|
+
m1 = _load_structure(args["mol1"], args.get("mol1_format"))
|
|
385
|
+
m2 = _load_structure(args["mol2"], args.get("mol2_format"))
|
|
386
|
+
if m1 is None:
|
|
387
|
+
return {"ok": False, "error": f"Could not load mol1: {args['mol1']}"}
|
|
388
|
+
if m2 is None:
|
|
389
|
+
return {"ok": False, "error": f"Could not load mol2: {args['mol2']}"}
|
|
390
|
+
|
|
391
|
+
common = LargestCommonSubstructure.Compute(m1, m2)
|
|
392
|
+
if common is None:
|
|
393
|
+
return {"ok": True, "atom_map": []}
|
|
394
|
+
|
|
395
|
+
atom_map1 = common.AtomMapM1()
|
|
396
|
+
atom_map2 = common.AtomMapM2()
|
|
397
|
+
mapping = []
|
|
398
|
+
for atom in atom_map1.Keys:
|
|
399
|
+
mapping.append({
|
|
400
|
+
"common": atom.Name,
|
|
401
|
+
"mol1": atom_map1[atom].Name,
|
|
402
|
+
"mol2": atom_map2[atom].Name,
|
|
403
|
+
})
|
|
404
|
+
return {"ok": True, "atom_map": mapping, "common_atom_count": len(mapping)}
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
def cmd_overlay(args: dict) -> dict:
|
|
408
|
+
"""Overlay (2D-align) a molecule onto a reference molecule.
|
|
409
|
+
|
|
410
|
+
Args:
|
|
411
|
+
source: CDXML string or file path of the molecule to align.
|
|
412
|
+
target: CDXML string or file path of the reference molecule.
|
|
413
|
+
source_format: optional format hint for source (default: auto).
|
|
414
|
+
target_format: optional format hint for target (default: auto).
|
|
415
|
+
|
|
416
|
+
Returns:
|
|
417
|
+
aligned_cdxml: CDXML string of the aligned molecule.
|
|
418
|
+
success: whether the overlay succeeded.
|
|
419
|
+
"""
|
|
420
|
+
source = args["source"]
|
|
421
|
+
target = args["target"]
|
|
422
|
+
src_fmt = args.get("source_format")
|
|
423
|
+
tgt_fmt = args.get("target_format")
|
|
424
|
+
|
|
425
|
+
m = _load_structure(source, src_fmt)
|
|
426
|
+
if m is None:
|
|
427
|
+
return {"ok": False, "error": "Could not load source structure"}
|
|
428
|
+
t = _load_structure(target, tgt_fmt)
|
|
429
|
+
if t is None:
|
|
430
|
+
return {"ok": False, "error": "Could not load target structure"}
|
|
431
|
+
|
|
432
|
+
success = bool(m.Overlay(t))
|
|
433
|
+
aligned_cdxml = m.WriteData("text/xml")
|
|
434
|
+
return {"ok": True, "aligned_cdxml": aligned_cdxml, "success": success}
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
def cmd_substructure_align(args: dict) -> dict:
|
|
438
|
+
"""Align a query (small molecule) to its substructure match in a target.
|
|
439
|
+
|
|
440
|
+
Uses ChemScript to convert both structures to SMILES, then returns
|
|
441
|
+
the SMILES + target CDXML so the caller can do substructure matching
|
|
442
|
+
(e.g. via RDKit) to find the atom mapping.
|
|
443
|
+
|
|
444
|
+
This avoids the ChemScript atom-name-mismatch problem entirely.
|
|
445
|
+
|
|
446
|
+
Args:
|
|
447
|
+
query: CDXML string or file path of the small molecule (reagent).
|
|
448
|
+
target: CDXML string or file path of the large molecule (product).
|
|
449
|
+
|
|
450
|
+
Returns:
|
|
451
|
+
ok, contains, query_smiles, target_smiles, target_cdxml
|
|
452
|
+
"""
|
|
453
|
+
target = _load_structure(args["target"], args.get("target_format"))
|
|
454
|
+
query = _load_structure(args["query"], args.get("query_format"))
|
|
455
|
+
if target is None:
|
|
456
|
+
return {"ok": False, "error": "Could not load target"}
|
|
457
|
+
if query is None:
|
|
458
|
+
return {"ok": False, "error": "Could not load query"}
|
|
459
|
+
|
|
460
|
+
# Check if query is a substructure of target
|
|
461
|
+
maps = query.AtomByAtomSearch(target)
|
|
462
|
+
contains = bool(maps and len(maps) > 0)
|
|
463
|
+
|
|
464
|
+
# Always return MOL blocks + CDXML (caller may need them for MCS fallback)
|
|
465
|
+
query_mol = query.WriteData("chemical/x-mdl-molfile")
|
|
466
|
+
target_mol = target.WriteData("chemical/x-mdl-molfile")
|
|
467
|
+
target_cdxml = target.WriteData("text/xml")
|
|
468
|
+
query_cdxml = query.WriteData("text/xml")
|
|
469
|
+
|
|
470
|
+
return {
|
|
471
|
+
"ok": True,
|
|
472
|
+
"contains": contains,
|
|
473
|
+
"query_mol": query_mol,
|
|
474
|
+
"target_mol": target_mol,
|
|
475
|
+
"target_cdxml": target_cdxml,
|
|
476
|
+
"query_cdxml": query_cdxml,
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
def cmd_mimetypes(args: dict) -> dict:
|
|
481
|
+
"""List all supported mimetypes."""
|
|
482
|
+
types = list(StructureData.MimeTypes())
|
|
483
|
+
return {"ok": True, "mimetypes": types}
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
def cmd_ping(args: dict) -> dict:
|
|
487
|
+
"""Health check."""
|
|
488
|
+
return {"ok": True, "message": "ChemScript server ready"}
|
|
489
|
+
|
|
490
|
+
|
|
491
|
+
# ---------------------------------------------------------------------------
|
|
492
|
+
# Dispatch table
|
|
493
|
+
# ---------------------------------------------------------------------------
|
|
494
|
+
|
|
495
|
+
COMMANDS = {
|
|
496
|
+
"ping": cmd_ping,
|
|
497
|
+
"convert": cmd_convert,
|
|
498
|
+
"name_to_cdxml": cmd_name_to_cdxml,
|
|
499
|
+
"smiles_to_cdxml": cmd_smiles_to_cdxml,
|
|
500
|
+
"cleanup": cmd_cleanup,
|
|
501
|
+
"get_info": cmd_get_info,
|
|
502
|
+
"get_name": cmd_get_name,
|
|
503
|
+
"get_formula": cmd_get_formula,
|
|
504
|
+
"contains_substructure": cmd_contains_substructure,
|
|
505
|
+
"substructure_search": cmd_substructure_search,
|
|
506
|
+
"write_data": cmd_write_data,
|
|
507
|
+
"load_reaction": cmd_load_reaction,
|
|
508
|
+
"largest_common_substructure": cmd_largest_common_substructure,
|
|
509
|
+
"overlay": cmd_overlay,
|
|
510
|
+
"substructure_align": cmd_substructure_align,
|
|
511
|
+
"mimetypes": cmd_mimetypes,
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
# ---------------------------------------------------------------------------
|
|
515
|
+
# Main loop — reads JSON lines from stdin, writes JSON lines to stdout
|
|
516
|
+
# ---------------------------------------------------------------------------
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
def main():
|
|
520
|
+
# Signal readiness
|
|
521
|
+
sys.stdout.write(json.dumps({"ready": True}) + "\n")
|
|
522
|
+
sys.stdout.flush()
|
|
523
|
+
|
|
524
|
+
for line in sys.stdin:
|
|
525
|
+
line = line.strip()
|
|
526
|
+
if not line:
|
|
527
|
+
continue
|
|
528
|
+
try:
|
|
529
|
+
request = json.loads(line)
|
|
530
|
+
except json.JSONDecodeError as e:
|
|
531
|
+
response = {"ok": False, "error": f"Invalid JSON: {e}"}
|
|
532
|
+
sys.stdout.write(json.dumps(response) + "\n")
|
|
533
|
+
sys.stdout.flush()
|
|
534
|
+
continue
|
|
535
|
+
|
|
536
|
+
cmd = request.get("cmd")
|
|
537
|
+
args = request.get("args", {})
|
|
538
|
+
|
|
539
|
+
if cmd == "quit":
|
|
540
|
+
sys.stdout.write(json.dumps({"ok": True, "message": "bye"}) + "\n")
|
|
541
|
+
sys.stdout.flush()
|
|
542
|
+
break
|
|
543
|
+
|
|
544
|
+
handler = COMMANDS.get(cmd)
|
|
545
|
+
if handler is None:
|
|
546
|
+
response = {"ok": False, "error": f"Unknown command: {cmd}"}
|
|
547
|
+
else:
|
|
548
|
+
try:
|
|
549
|
+
response = handler(args)
|
|
550
|
+
except Exception as e:
|
|
551
|
+
response = {
|
|
552
|
+
"ok": False,
|
|
553
|
+
"error": str(e),
|
|
554
|
+
"traceback": traceback.format_exc(),
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
sys.stdout.write(json.dumps(response) + "\n")
|
|
558
|
+
sys.stdout.flush()
|
|
559
|
+
|
|
560
|
+
|
|
561
|
+
if __name__ == "__main__":
|
|
562
|
+
main()
|