cdxml-toolkit 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. cdxml_toolkit/__init__.py +18 -0
  2. cdxml_toolkit/_jre/__init__.py +2 -0
  3. cdxml_toolkit/_jre/temurin-21-jre-win-x64.zip +0 -0
  4. cdxml_toolkit/analysis/__init__.py +35 -0
  5. cdxml_toolkit/analysis/deterministic/__init__.py +12 -0
  6. cdxml_toolkit/analysis/deterministic/discover_experiment_files.py +413 -0
  7. cdxml_toolkit/analysis/deterministic/lab_book_formatter.py +701 -0
  8. cdxml_toolkit/analysis/deterministic/lcms_file_categorizer.py +928 -0
  9. cdxml_toolkit/analysis/deterministic/lcms_identifier.py +598 -0
  10. cdxml_toolkit/analysis/deterministic/mass_resolver.py +654 -0
  11. cdxml_toolkit/analysis/deterministic/multi_lcms_analyzer.py +1412 -0
  12. cdxml_toolkit/analysis/deterministic/procedure_writer.py +446 -0
  13. cdxml_toolkit/analysis/extract_nmr.py +47 -0
  14. cdxml_toolkit/analysis/format_procedure_entry.py +479 -0
  15. cdxml_toolkit/analysis/lcms_analyzer.py +1299 -0
  16. cdxml_toolkit/analysis/parse_analysis_file.py +134 -0
  17. cdxml_toolkit/cdxml_builder.py +920 -0
  18. cdxml_toolkit/cdxml_utils.py +342 -0
  19. cdxml_toolkit/chemdraw/__init__.py +5 -0
  20. cdxml_toolkit/chemdraw/_chemscript_server.py +562 -0
  21. cdxml_toolkit/chemdraw/cdx_converter.py +527 -0
  22. cdxml_toolkit/chemdraw/cdxml_to_image.py +262 -0
  23. cdxml_toolkit/chemdraw/cdxml_to_image_rdkit.py +296 -0
  24. cdxml_toolkit/chemdraw/chemscript_bridge.py +901 -0
  25. cdxml_toolkit/constants.py +304 -0
  26. cdxml_toolkit/coord_normalizer.py +438 -0
  27. cdxml_toolkit/deterministic_pipeline/__init__.py +6 -0
  28. cdxml_toolkit/deterministic_pipeline/legacy/__init__.py +5 -0
  29. cdxml_toolkit/deterministic_pipeline/legacy/eln_cdx_cleanup.py +509 -0
  30. cdxml_toolkit/deterministic_pipeline/legacy/eln_enrichment.py +1394 -0
  31. cdxml_toolkit/deterministic_pipeline/legacy/scheme_aligner.py +428 -0
  32. cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher.py +1337 -0
  33. cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher_v2.py +1340 -0
  34. cdxml_toolkit/deterministic_pipeline/scheme_reader_audit.py +931 -0
  35. cdxml_toolkit/deterministic_pipeline/scheme_reader_verify.py +1160 -0
  36. cdxml_toolkit/image/__init__.py +15 -0
  37. cdxml_toolkit/image/reaction_from_image.py +2103 -0
  38. cdxml_toolkit/image/structure_from_image.py +1711 -0
  39. cdxml_toolkit/layout/__init__.py +5 -0
  40. cdxml_toolkit/layout/alignment.py +1642 -0
  41. cdxml_toolkit/layout/reaction_cleanup.py +1002 -0
  42. cdxml_toolkit/layout/scheme_merger.py +2260 -0
  43. cdxml_toolkit/mcp_server/__init__.py +0 -0
  44. cdxml_toolkit/mcp_server/__main__.py +5 -0
  45. cdxml_toolkit/mcp_server/server.py +1567 -0
  46. cdxml_toolkit/naming/__init__.py +6 -0
  47. cdxml_toolkit/naming/aligned_namer.py +2342 -0
  48. cdxml_toolkit/naming/mol_builder.py +3722 -0
  49. cdxml_toolkit/naming/name_decomposer.py +2843 -0
  50. cdxml_toolkit/naming/reactions_datamol.json +2414 -0
  51. cdxml_toolkit/office/__init__.py +5 -0
  52. cdxml_toolkit/office/doc_from_template.py +722 -0
  53. cdxml_toolkit/office/ole_embedder.py +808 -0
  54. cdxml_toolkit/office/ole_extractor.py +272 -0
  55. cdxml_toolkit/perception/__init__.py +10 -0
  56. cdxml_toolkit/perception/compound_search.py +229 -0
  57. cdxml_toolkit/perception/eln_csv_parser.py +240 -0
  58. cdxml_toolkit/perception/rdf_parser.py +664 -0
  59. cdxml_toolkit/perception/reactant_heuristic.py +1045 -0
  60. cdxml_toolkit/perception/reaction_parser.py +2150 -0
  61. cdxml_toolkit/perception/scheme_reader.py +2948 -0
  62. cdxml_toolkit/perception/scheme_refine.py +1404 -0
  63. cdxml_toolkit/perception/scheme_segmenter.py +619 -0
  64. cdxml_toolkit/perception/spatial_assignment.py +1013 -0
  65. cdxml_toolkit/rdkit_utils.py +605 -0
  66. cdxml_toolkit/render/__init__.py +17 -0
  67. cdxml_toolkit/render/auto_layout.py +229 -0
  68. cdxml_toolkit/render/compact_parser.py +632 -0
  69. cdxml_toolkit/render/parser.py +706 -0
  70. cdxml_toolkit/render/render_scheme.py +267 -0
  71. cdxml_toolkit/render/renderer.py +2387 -0
  72. cdxml_toolkit/render/schema.py +90 -0
  73. cdxml_toolkit/render/scheme_maker.py +1043 -0
  74. cdxml_toolkit/render/scheme_yaml_writer.py +1487 -0
  75. cdxml_toolkit/resolve/__init__.py +13 -0
  76. cdxml_toolkit/resolve/cas_resolver.py +430 -0
  77. cdxml_toolkit/resolve/chemscanner_abbreviations.json +28813 -0
  78. cdxml_toolkit/resolve/condensed_formula.py +493 -0
  79. cdxml_toolkit/resolve/jre_manager.py +195 -0
  80. cdxml_toolkit/resolve/reagent_abbreviations.json +1046 -0
  81. cdxml_toolkit/resolve/reagent_db.py +285 -0
  82. cdxml_toolkit/resolve/superatom_data.json +2856 -0
  83. cdxml_toolkit/resolve/superatom_table.py +146 -0
  84. cdxml_toolkit/text_formatting.py +298 -0
  85. cdxml_toolkit-0.5.0.dist-info/METADATA +318 -0
  86. cdxml_toolkit-0.5.0.dist-info/RECORD +91 -0
  87. cdxml_toolkit-0.5.0.dist-info/WHEEL +5 -0
  88. cdxml_toolkit-0.5.0.dist-info/entry_points.txt +17 -0
  89. cdxml_toolkit-0.5.0.dist-info/licenses/LICENSE +21 -0
  90. cdxml_toolkit-0.5.0.dist-info/licenses/NOTICE.md +37 -0
  91. cdxml_toolkit-0.5.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,229 @@
1
+ """
2
+ auto_layout.py — Generate a default SchemeDescriptor from reaction_parser JSON.
3
+
4
+ The "zero-effort" path: reads the JSON, puts SM on left, DP on right,
5
+ atom-contributing species above arrow, conditions and non-contributing
6
+ species as text below arrow.
7
+
8
+ Usage:
9
+ from scheme_dsl.auto_layout import auto_layout
10
+ scheme = auto_layout("reaction.json")
11
+
12
+ # Or from CLI:
13
+ python -m scheme_dsl.auto_layout reaction.json -o scheme.cdxml
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import json
19
+ import os
20
+ import sys
21
+ from typing import Any, Dict, List, Optional
22
+
23
+ from .schema import (
24
+ ArrowContent,
25
+ SchemeDescriptor,
26
+ StepDescriptor,
27
+ StructureRef,
28
+ )
29
+
30
+
31
+ def auto_layout(
32
+ reaction_json_path: str,
33
+ include_equiv: bool = True,
34
+ ) -> SchemeDescriptor:
35
+ """
36
+ Generate a default SchemeDescriptor from reaction_parser output.
37
+
38
+ Reads the JSON, classifies species into layout positions:
39
+ - SM → substrate (left of arrow)
40
+ - atom-contributing non-SM species → above arrow as structures
41
+ - non-contributing species with SMILES → text below arrow (with equiv)
42
+ - conditions from JSON → text below arrow
43
+ - DP → product (right of arrow)
44
+
45
+ Parameters
46
+ ----------
47
+ reaction_json_path : str
48
+ Path to reaction_parser JSON file.
49
+ include_equiv : bool
50
+ Whether to include equivalents in text labels (default True).
51
+
52
+ Returns
53
+ -------
54
+ SchemeDescriptor
55
+ Ready to render with renderer.render().
56
+ """
57
+ with open(reaction_json_path, encoding="utf-8") as f:
58
+ data = json.load(f)
59
+
60
+ species_list = data.get("species", [])
61
+ conditions = data.get("conditions", [])
62
+
63
+ # Classify species by role and position
64
+ sm = None
65
+ dp = None
66
+ above_structures: List[Dict] = [] # drawn above arrow
67
+ below_text_species: List[Dict] = [] # reagents shown as text below
68
+
69
+ for sp in species_list:
70
+ if sp.get("is_sm"):
71
+ sm = sp
72
+ elif sp.get("is_dp"):
73
+ dp = sp
74
+ elif sp.get("role") == "atom_contributing":
75
+ # Non-SM atom-contributing species go above arrow as structures
76
+ above_structures.append(sp)
77
+ elif sp.get("role") in ("non_contributing", "reagent"):
78
+ below_text_species.append(sp)
79
+ elif sp.get("role") == "product":
80
+ # Additional products (not DP) — skip for now
81
+ pass
82
+ else:
83
+ # Unknown role — put as text below
84
+ below_text_species.append(sp)
85
+
86
+ if sm is None:
87
+ raise ValueError("No starting material (is_sm=true) found in JSON")
88
+ if dp is None:
89
+ raise ValueError("No desired product (is_dp=true) found in JSON")
90
+
91
+ # Build step
92
+ substrates = [sm["id"]]
93
+ products = [dp["id"]]
94
+
95
+ # Above arrow: structures + equiv text
96
+ above_arrow = None
97
+ if above_structures:
98
+ above_struct_ids = [sp["id"] for sp in above_structures]
99
+ above_text = []
100
+ if include_equiv:
101
+ for sp in above_structures:
102
+ equiv = sp.get("csv_equiv")
103
+ if equiv and equiv != "1.0":
104
+ above_text.append(f"({equiv} eq)")
105
+ above_arrow = ArrowContent(
106
+ structures=above_struct_ids,
107
+ text=above_text,
108
+ )
109
+
110
+ # Below arrow: reagent text + conditions
111
+ below_lines: List[str] = []
112
+
113
+ for sp in below_text_species:
114
+ name = sp.get("name", sp.get("csv_name", ""))
115
+ if not name:
116
+ continue
117
+ # Format: "Name (X eq.)" or just "Name"
118
+ equiv = sp.get("csv_equiv")
119
+ if include_equiv and equiv and equiv != "1.0":
120
+ below_lines.append(f"{name} ({equiv} eq.)")
121
+ else:
122
+ below_lines.append(name)
123
+
124
+ # Append conditions from JSON
125
+ below_lines.extend(conditions)
126
+
127
+ below_arrow = None
128
+ if below_lines:
129
+ below_arrow = ArrowContent(text=below_lines)
130
+
131
+ step = StepDescriptor(
132
+ substrates=substrates,
133
+ products=products,
134
+ above_arrow=above_arrow,
135
+ below_arrow=below_arrow,
136
+ )
137
+
138
+ # Store just the basename — the renderer resolves relative to yaml_dir
139
+ return SchemeDescriptor(
140
+ source=os.path.basename(reaction_json_path),
141
+ steps=[step],
142
+ layout="linear",
143
+ )
144
+
145
+
146
+ def auto_layout_to_cdxml(
147
+ reaction_json_path: str,
148
+ output_path: Optional[str] = None,
149
+ include_equiv: bool = True,
150
+ ) -> str:
151
+ """
152
+ Generate and render a scheme from reaction_parser JSON.
153
+
154
+ Parameters
155
+ ----------
156
+ reaction_json_path : str
157
+ Path to reaction_parser JSON file.
158
+ output_path : str, optional
159
+ Output CDXML path. If None, derives from JSON filename.
160
+ include_equiv : bool
161
+ Whether to include equivalents in text labels.
162
+
163
+ Returns
164
+ -------
165
+ str
166
+ Path to the written CDXML file.
167
+ """
168
+ from .renderer import render_to_file
169
+
170
+ scheme = auto_layout(reaction_json_path, include_equiv=include_equiv)
171
+
172
+ if output_path is None:
173
+ stem = os.path.splitext(os.path.basename(reaction_json_path))[0]
174
+ output_path = os.path.join(
175
+ os.path.dirname(reaction_json_path),
176
+ f"{stem}-scheme.cdxml",
177
+ )
178
+
179
+ yaml_dir = os.path.dirname(os.path.abspath(reaction_json_path))
180
+ render_to_file(scheme, output_path, yaml_dir=yaml_dir)
181
+ return output_path
182
+
183
+
184
+ def main():
185
+ """CLI entry point for auto_layout."""
186
+ import argparse
187
+
188
+ parser = argparse.ArgumentParser(
189
+ description="Auto-generate a CDXML reaction scheme from reaction_parser JSON.",
190
+ )
191
+ parser.add_argument(
192
+ "input",
193
+ help="reaction_parser JSON file",
194
+ )
195
+ parser.add_argument(
196
+ "-o", "--output",
197
+ default=None,
198
+ help="Output CDXML file (default: {input_stem}-scheme.cdxml)",
199
+ )
200
+ parser.add_argument(
201
+ "--no-equiv",
202
+ action="store_true",
203
+ help="Don't include equivalents in reagent text",
204
+ )
205
+ parser.add_argument(
206
+ "-v", "--verbose",
207
+ action="store_true",
208
+ help="Print progress to stderr",
209
+ )
210
+ args = parser.parse_args()
211
+
212
+ if not os.path.exists(args.input):
213
+ print(f"Error: file not found: {args.input}", file=sys.stderr)
214
+ sys.exit(1)
215
+
216
+ if args.verbose:
217
+ print(f"Loading {args.input}...", file=sys.stderr)
218
+
219
+ output_path = auto_layout_to_cdxml(
220
+ args.input,
221
+ output_path=args.output,
222
+ include_equiv=not args.no_equiv,
223
+ )
224
+
225
+ print(f"Written: {output_path}")
226
+
227
+
228
+ if __name__ == "__main__":
229
+ main()