cdxml-toolkit 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cdxml_toolkit/__init__.py +18 -0
- cdxml_toolkit/_jre/__init__.py +2 -0
- cdxml_toolkit/_jre/temurin-21-jre-win-x64.zip +0 -0
- cdxml_toolkit/analysis/__init__.py +35 -0
- cdxml_toolkit/analysis/deterministic/__init__.py +12 -0
- cdxml_toolkit/analysis/deterministic/discover_experiment_files.py +413 -0
- cdxml_toolkit/analysis/deterministic/lab_book_formatter.py +701 -0
- cdxml_toolkit/analysis/deterministic/lcms_file_categorizer.py +928 -0
- cdxml_toolkit/analysis/deterministic/lcms_identifier.py +598 -0
- cdxml_toolkit/analysis/deterministic/mass_resolver.py +654 -0
- cdxml_toolkit/analysis/deterministic/multi_lcms_analyzer.py +1412 -0
- cdxml_toolkit/analysis/deterministic/procedure_writer.py +446 -0
- cdxml_toolkit/analysis/extract_nmr.py +47 -0
- cdxml_toolkit/analysis/format_procedure_entry.py +479 -0
- cdxml_toolkit/analysis/lcms_analyzer.py +1299 -0
- cdxml_toolkit/analysis/parse_analysis_file.py +134 -0
- cdxml_toolkit/cdxml_builder.py +920 -0
- cdxml_toolkit/cdxml_utils.py +342 -0
- cdxml_toolkit/chemdraw/__init__.py +5 -0
- cdxml_toolkit/chemdraw/_chemscript_server.py +562 -0
- cdxml_toolkit/chemdraw/cdx_converter.py +527 -0
- cdxml_toolkit/chemdraw/cdxml_to_image.py +262 -0
- cdxml_toolkit/chemdraw/cdxml_to_image_rdkit.py +296 -0
- cdxml_toolkit/chemdraw/chemscript_bridge.py +901 -0
- cdxml_toolkit/constants.py +304 -0
- cdxml_toolkit/coord_normalizer.py +438 -0
- cdxml_toolkit/deterministic_pipeline/__init__.py +6 -0
- cdxml_toolkit/deterministic_pipeline/legacy/__init__.py +5 -0
- cdxml_toolkit/deterministic_pipeline/legacy/eln_cdx_cleanup.py +509 -0
- cdxml_toolkit/deterministic_pipeline/legacy/eln_enrichment.py +1394 -0
- cdxml_toolkit/deterministic_pipeline/legacy/scheme_aligner.py +428 -0
- cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher.py +1337 -0
- cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher_v2.py +1340 -0
- cdxml_toolkit/deterministic_pipeline/scheme_reader_audit.py +931 -0
- cdxml_toolkit/deterministic_pipeline/scheme_reader_verify.py +1160 -0
- cdxml_toolkit/image/__init__.py +15 -0
- cdxml_toolkit/image/reaction_from_image.py +2103 -0
- cdxml_toolkit/image/structure_from_image.py +1711 -0
- cdxml_toolkit/layout/__init__.py +5 -0
- cdxml_toolkit/layout/alignment.py +1642 -0
- cdxml_toolkit/layout/reaction_cleanup.py +1002 -0
- cdxml_toolkit/layout/scheme_merger.py +2260 -0
- cdxml_toolkit/mcp_server/__init__.py +0 -0
- cdxml_toolkit/mcp_server/__main__.py +5 -0
- cdxml_toolkit/mcp_server/server.py +1567 -0
- cdxml_toolkit/naming/__init__.py +6 -0
- cdxml_toolkit/naming/aligned_namer.py +2342 -0
- cdxml_toolkit/naming/mol_builder.py +3722 -0
- cdxml_toolkit/naming/name_decomposer.py +2843 -0
- cdxml_toolkit/naming/reactions_datamol.json +2414 -0
- cdxml_toolkit/office/__init__.py +5 -0
- cdxml_toolkit/office/doc_from_template.py +722 -0
- cdxml_toolkit/office/ole_embedder.py +808 -0
- cdxml_toolkit/office/ole_extractor.py +272 -0
- cdxml_toolkit/perception/__init__.py +10 -0
- cdxml_toolkit/perception/compound_search.py +229 -0
- cdxml_toolkit/perception/eln_csv_parser.py +240 -0
- cdxml_toolkit/perception/rdf_parser.py +664 -0
- cdxml_toolkit/perception/reactant_heuristic.py +1045 -0
- cdxml_toolkit/perception/reaction_parser.py +2150 -0
- cdxml_toolkit/perception/scheme_reader.py +2948 -0
- cdxml_toolkit/perception/scheme_refine.py +1404 -0
- cdxml_toolkit/perception/scheme_segmenter.py +619 -0
- cdxml_toolkit/perception/spatial_assignment.py +1013 -0
- cdxml_toolkit/rdkit_utils.py +605 -0
- cdxml_toolkit/render/__init__.py +17 -0
- cdxml_toolkit/render/auto_layout.py +229 -0
- cdxml_toolkit/render/compact_parser.py +632 -0
- cdxml_toolkit/render/parser.py +706 -0
- cdxml_toolkit/render/render_scheme.py +267 -0
- cdxml_toolkit/render/renderer.py +2387 -0
- cdxml_toolkit/render/schema.py +90 -0
- cdxml_toolkit/render/scheme_maker.py +1043 -0
- cdxml_toolkit/render/scheme_yaml_writer.py +1487 -0
- cdxml_toolkit/resolve/__init__.py +13 -0
- cdxml_toolkit/resolve/cas_resolver.py +430 -0
- cdxml_toolkit/resolve/chemscanner_abbreviations.json +28813 -0
- cdxml_toolkit/resolve/condensed_formula.py +493 -0
- cdxml_toolkit/resolve/jre_manager.py +195 -0
- cdxml_toolkit/resolve/reagent_abbreviations.json +1046 -0
- cdxml_toolkit/resolve/reagent_db.py +285 -0
- cdxml_toolkit/resolve/superatom_data.json +2856 -0
- cdxml_toolkit/resolve/superatom_table.py +146 -0
- cdxml_toolkit/text_formatting.py +298 -0
- cdxml_toolkit-0.5.0.dist-info/METADATA +318 -0
- cdxml_toolkit-0.5.0.dist-info/RECORD +91 -0
- cdxml_toolkit-0.5.0.dist-info/WHEEL +5 -0
- cdxml_toolkit-0.5.0.dist-info/entry_points.txt +17 -0
- cdxml_toolkit-0.5.0.dist-info/licenses/LICENSE +21 -0
- cdxml_toolkit-0.5.0.dist-info/licenses/NOTICE.md +37 -0
- cdxml_toolkit-0.5.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
"""
|
|
2
|
+
auto_layout.py — Generate a default SchemeDescriptor from reaction_parser JSON.
|
|
3
|
+
|
|
4
|
+
The "zero-effort" path: reads the JSON, puts SM on left, DP on right,
|
|
5
|
+
atom-contributing species above arrow, conditions and non-contributing
|
|
6
|
+
species as text below arrow.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
from scheme_dsl.auto_layout import auto_layout
|
|
10
|
+
scheme = auto_layout("reaction.json")
|
|
11
|
+
|
|
12
|
+
# Or from CLI:
|
|
13
|
+
python -m scheme_dsl.auto_layout reaction.json -o scheme.cdxml
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import json
|
|
19
|
+
import os
|
|
20
|
+
import sys
|
|
21
|
+
from typing import Any, Dict, List, Optional
|
|
22
|
+
|
|
23
|
+
from .schema import (
|
|
24
|
+
ArrowContent,
|
|
25
|
+
SchemeDescriptor,
|
|
26
|
+
StepDescriptor,
|
|
27
|
+
StructureRef,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def auto_layout(
|
|
32
|
+
reaction_json_path: str,
|
|
33
|
+
include_equiv: bool = True,
|
|
34
|
+
) -> SchemeDescriptor:
|
|
35
|
+
"""
|
|
36
|
+
Generate a default SchemeDescriptor from reaction_parser output.
|
|
37
|
+
|
|
38
|
+
Reads the JSON, classifies species into layout positions:
|
|
39
|
+
- SM → substrate (left of arrow)
|
|
40
|
+
- atom-contributing non-SM species → above arrow as structures
|
|
41
|
+
- non-contributing species with SMILES → text below arrow (with equiv)
|
|
42
|
+
- conditions from JSON → text below arrow
|
|
43
|
+
- DP → product (right of arrow)
|
|
44
|
+
|
|
45
|
+
Parameters
|
|
46
|
+
----------
|
|
47
|
+
reaction_json_path : str
|
|
48
|
+
Path to reaction_parser JSON file.
|
|
49
|
+
include_equiv : bool
|
|
50
|
+
Whether to include equivalents in text labels (default True).
|
|
51
|
+
|
|
52
|
+
Returns
|
|
53
|
+
-------
|
|
54
|
+
SchemeDescriptor
|
|
55
|
+
Ready to render with renderer.render().
|
|
56
|
+
"""
|
|
57
|
+
with open(reaction_json_path, encoding="utf-8") as f:
|
|
58
|
+
data = json.load(f)
|
|
59
|
+
|
|
60
|
+
species_list = data.get("species", [])
|
|
61
|
+
conditions = data.get("conditions", [])
|
|
62
|
+
|
|
63
|
+
# Classify species by role and position
|
|
64
|
+
sm = None
|
|
65
|
+
dp = None
|
|
66
|
+
above_structures: List[Dict] = [] # drawn above arrow
|
|
67
|
+
below_text_species: List[Dict] = [] # reagents shown as text below
|
|
68
|
+
|
|
69
|
+
for sp in species_list:
|
|
70
|
+
if sp.get("is_sm"):
|
|
71
|
+
sm = sp
|
|
72
|
+
elif sp.get("is_dp"):
|
|
73
|
+
dp = sp
|
|
74
|
+
elif sp.get("role") == "atom_contributing":
|
|
75
|
+
# Non-SM atom-contributing species go above arrow as structures
|
|
76
|
+
above_structures.append(sp)
|
|
77
|
+
elif sp.get("role") in ("non_contributing", "reagent"):
|
|
78
|
+
below_text_species.append(sp)
|
|
79
|
+
elif sp.get("role") == "product":
|
|
80
|
+
# Additional products (not DP) — skip for now
|
|
81
|
+
pass
|
|
82
|
+
else:
|
|
83
|
+
# Unknown role — put as text below
|
|
84
|
+
below_text_species.append(sp)
|
|
85
|
+
|
|
86
|
+
if sm is None:
|
|
87
|
+
raise ValueError("No starting material (is_sm=true) found in JSON")
|
|
88
|
+
if dp is None:
|
|
89
|
+
raise ValueError("No desired product (is_dp=true) found in JSON")
|
|
90
|
+
|
|
91
|
+
# Build step
|
|
92
|
+
substrates = [sm["id"]]
|
|
93
|
+
products = [dp["id"]]
|
|
94
|
+
|
|
95
|
+
# Above arrow: structures + equiv text
|
|
96
|
+
above_arrow = None
|
|
97
|
+
if above_structures:
|
|
98
|
+
above_struct_ids = [sp["id"] for sp in above_structures]
|
|
99
|
+
above_text = []
|
|
100
|
+
if include_equiv:
|
|
101
|
+
for sp in above_structures:
|
|
102
|
+
equiv = sp.get("csv_equiv")
|
|
103
|
+
if equiv and equiv != "1.0":
|
|
104
|
+
above_text.append(f"({equiv} eq)")
|
|
105
|
+
above_arrow = ArrowContent(
|
|
106
|
+
structures=above_struct_ids,
|
|
107
|
+
text=above_text,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
# Below arrow: reagent text + conditions
|
|
111
|
+
below_lines: List[str] = []
|
|
112
|
+
|
|
113
|
+
for sp in below_text_species:
|
|
114
|
+
name = sp.get("name", sp.get("csv_name", ""))
|
|
115
|
+
if not name:
|
|
116
|
+
continue
|
|
117
|
+
# Format: "Name (X eq.)" or just "Name"
|
|
118
|
+
equiv = sp.get("csv_equiv")
|
|
119
|
+
if include_equiv and equiv and equiv != "1.0":
|
|
120
|
+
below_lines.append(f"{name} ({equiv} eq.)")
|
|
121
|
+
else:
|
|
122
|
+
below_lines.append(name)
|
|
123
|
+
|
|
124
|
+
# Append conditions from JSON
|
|
125
|
+
below_lines.extend(conditions)
|
|
126
|
+
|
|
127
|
+
below_arrow = None
|
|
128
|
+
if below_lines:
|
|
129
|
+
below_arrow = ArrowContent(text=below_lines)
|
|
130
|
+
|
|
131
|
+
step = StepDescriptor(
|
|
132
|
+
substrates=substrates,
|
|
133
|
+
products=products,
|
|
134
|
+
above_arrow=above_arrow,
|
|
135
|
+
below_arrow=below_arrow,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
# Store just the basename — the renderer resolves relative to yaml_dir
|
|
139
|
+
return SchemeDescriptor(
|
|
140
|
+
source=os.path.basename(reaction_json_path),
|
|
141
|
+
steps=[step],
|
|
142
|
+
layout="linear",
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def auto_layout_to_cdxml(
|
|
147
|
+
reaction_json_path: str,
|
|
148
|
+
output_path: Optional[str] = None,
|
|
149
|
+
include_equiv: bool = True,
|
|
150
|
+
) -> str:
|
|
151
|
+
"""
|
|
152
|
+
Generate and render a scheme from reaction_parser JSON.
|
|
153
|
+
|
|
154
|
+
Parameters
|
|
155
|
+
----------
|
|
156
|
+
reaction_json_path : str
|
|
157
|
+
Path to reaction_parser JSON file.
|
|
158
|
+
output_path : str, optional
|
|
159
|
+
Output CDXML path. If None, derives from JSON filename.
|
|
160
|
+
include_equiv : bool
|
|
161
|
+
Whether to include equivalents in text labels.
|
|
162
|
+
|
|
163
|
+
Returns
|
|
164
|
+
-------
|
|
165
|
+
str
|
|
166
|
+
Path to the written CDXML file.
|
|
167
|
+
"""
|
|
168
|
+
from .renderer import render_to_file
|
|
169
|
+
|
|
170
|
+
scheme = auto_layout(reaction_json_path, include_equiv=include_equiv)
|
|
171
|
+
|
|
172
|
+
if output_path is None:
|
|
173
|
+
stem = os.path.splitext(os.path.basename(reaction_json_path))[0]
|
|
174
|
+
output_path = os.path.join(
|
|
175
|
+
os.path.dirname(reaction_json_path),
|
|
176
|
+
f"{stem}-scheme.cdxml",
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
yaml_dir = os.path.dirname(os.path.abspath(reaction_json_path))
|
|
180
|
+
render_to_file(scheme, output_path, yaml_dir=yaml_dir)
|
|
181
|
+
return output_path
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def main():
|
|
185
|
+
"""CLI entry point for auto_layout."""
|
|
186
|
+
import argparse
|
|
187
|
+
|
|
188
|
+
parser = argparse.ArgumentParser(
|
|
189
|
+
description="Auto-generate a CDXML reaction scheme from reaction_parser JSON.",
|
|
190
|
+
)
|
|
191
|
+
parser.add_argument(
|
|
192
|
+
"input",
|
|
193
|
+
help="reaction_parser JSON file",
|
|
194
|
+
)
|
|
195
|
+
parser.add_argument(
|
|
196
|
+
"-o", "--output",
|
|
197
|
+
default=None,
|
|
198
|
+
help="Output CDXML file (default: {input_stem}-scheme.cdxml)",
|
|
199
|
+
)
|
|
200
|
+
parser.add_argument(
|
|
201
|
+
"--no-equiv",
|
|
202
|
+
action="store_true",
|
|
203
|
+
help="Don't include equivalents in reagent text",
|
|
204
|
+
)
|
|
205
|
+
parser.add_argument(
|
|
206
|
+
"-v", "--verbose",
|
|
207
|
+
action="store_true",
|
|
208
|
+
help="Print progress to stderr",
|
|
209
|
+
)
|
|
210
|
+
args = parser.parse_args()
|
|
211
|
+
|
|
212
|
+
if not os.path.exists(args.input):
|
|
213
|
+
print(f"Error: file not found: {args.input}", file=sys.stderr)
|
|
214
|
+
sys.exit(1)
|
|
215
|
+
|
|
216
|
+
if args.verbose:
|
|
217
|
+
print(f"Loading {args.input}...", file=sys.stderr)
|
|
218
|
+
|
|
219
|
+
output_path = auto_layout_to_cdxml(
|
|
220
|
+
args.input,
|
|
221
|
+
output_path=args.output,
|
|
222
|
+
include_equiv=not args.no_equiv,
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
print(f"Written: {output_path}")
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
if __name__ == "__main__":
|
|
229
|
+
main()
|