cdxml-toolkit 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. cdxml_toolkit/__init__.py +18 -0
  2. cdxml_toolkit/_jre/__init__.py +2 -0
  3. cdxml_toolkit/_jre/temurin-21-jre-win-x64.zip +0 -0
  4. cdxml_toolkit/analysis/__init__.py +35 -0
  5. cdxml_toolkit/analysis/deterministic/__init__.py +12 -0
  6. cdxml_toolkit/analysis/deterministic/discover_experiment_files.py +413 -0
  7. cdxml_toolkit/analysis/deterministic/lab_book_formatter.py +701 -0
  8. cdxml_toolkit/analysis/deterministic/lcms_file_categorizer.py +928 -0
  9. cdxml_toolkit/analysis/deterministic/lcms_identifier.py +598 -0
  10. cdxml_toolkit/analysis/deterministic/mass_resolver.py +654 -0
  11. cdxml_toolkit/analysis/deterministic/multi_lcms_analyzer.py +1412 -0
  12. cdxml_toolkit/analysis/deterministic/procedure_writer.py +446 -0
  13. cdxml_toolkit/analysis/extract_nmr.py +47 -0
  14. cdxml_toolkit/analysis/format_procedure_entry.py +479 -0
  15. cdxml_toolkit/analysis/lcms_analyzer.py +1299 -0
  16. cdxml_toolkit/analysis/parse_analysis_file.py +134 -0
  17. cdxml_toolkit/cdxml_builder.py +920 -0
  18. cdxml_toolkit/cdxml_utils.py +342 -0
  19. cdxml_toolkit/chemdraw/__init__.py +5 -0
  20. cdxml_toolkit/chemdraw/_chemscript_server.py +562 -0
  21. cdxml_toolkit/chemdraw/cdx_converter.py +527 -0
  22. cdxml_toolkit/chemdraw/cdxml_to_image.py +262 -0
  23. cdxml_toolkit/chemdraw/cdxml_to_image_rdkit.py +296 -0
  24. cdxml_toolkit/chemdraw/chemscript_bridge.py +901 -0
  25. cdxml_toolkit/constants.py +304 -0
  26. cdxml_toolkit/coord_normalizer.py +438 -0
  27. cdxml_toolkit/deterministic_pipeline/__init__.py +6 -0
  28. cdxml_toolkit/deterministic_pipeline/legacy/__init__.py +5 -0
  29. cdxml_toolkit/deterministic_pipeline/legacy/eln_cdx_cleanup.py +509 -0
  30. cdxml_toolkit/deterministic_pipeline/legacy/eln_enrichment.py +1394 -0
  31. cdxml_toolkit/deterministic_pipeline/legacy/scheme_aligner.py +428 -0
  32. cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher.py +1337 -0
  33. cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher_v2.py +1340 -0
  34. cdxml_toolkit/deterministic_pipeline/scheme_reader_audit.py +931 -0
  35. cdxml_toolkit/deterministic_pipeline/scheme_reader_verify.py +1160 -0
  36. cdxml_toolkit/image/__init__.py +15 -0
  37. cdxml_toolkit/image/reaction_from_image.py +2103 -0
  38. cdxml_toolkit/image/structure_from_image.py +1711 -0
  39. cdxml_toolkit/layout/__init__.py +5 -0
  40. cdxml_toolkit/layout/alignment.py +1642 -0
  41. cdxml_toolkit/layout/reaction_cleanup.py +1002 -0
  42. cdxml_toolkit/layout/scheme_merger.py +2260 -0
  43. cdxml_toolkit/mcp_server/__init__.py +0 -0
  44. cdxml_toolkit/mcp_server/__main__.py +5 -0
  45. cdxml_toolkit/mcp_server/server.py +1567 -0
  46. cdxml_toolkit/naming/__init__.py +6 -0
  47. cdxml_toolkit/naming/aligned_namer.py +2342 -0
  48. cdxml_toolkit/naming/mol_builder.py +3722 -0
  49. cdxml_toolkit/naming/name_decomposer.py +2843 -0
  50. cdxml_toolkit/naming/reactions_datamol.json +2414 -0
  51. cdxml_toolkit/office/__init__.py +5 -0
  52. cdxml_toolkit/office/doc_from_template.py +722 -0
  53. cdxml_toolkit/office/ole_embedder.py +808 -0
  54. cdxml_toolkit/office/ole_extractor.py +272 -0
  55. cdxml_toolkit/perception/__init__.py +10 -0
  56. cdxml_toolkit/perception/compound_search.py +229 -0
  57. cdxml_toolkit/perception/eln_csv_parser.py +240 -0
  58. cdxml_toolkit/perception/rdf_parser.py +664 -0
  59. cdxml_toolkit/perception/reactant_heuristic.py +1045 -0
  60. cdxml_toolkit/perception/reaction_parser.py +2150 -0
  61. cdxml_toolkit/perception/scheme_reader.py +2948 -0
  62. cdxml_toolkit/perception/scheme_refine.py +1404 -0
  63. cdxml_toolkit/perception/scheme_segmenter.py +619 -0
  64. cdxml_toolkit/perception/spatial_assignment.py +1013 -0
  65. cdxml_toolkit/rdkit_utils.py +605 -0
  66. cdxml_toolkit/render/__init__.py +17 -0
  67. cdxml_toolkit/render/auto_layout.py +229 -0
  68. cdxml_toolkit/render/compact_parser.py +632 -0
  69. cdxml_toolkit/render/parser.py +706 -0
  70. cdxml_toolkit/render/render_scheme.py +267 -0
  71. cdxml_toolkit/render/renderer.py +2387 -0
  72. cdxml_toolkit/render/schema.py +90 -0
  73. cdxml_toolkit/render/scheme_maker.py +1043 -0
  74. cdxml_toolkit/render/scheme_yaml_writer.py +1487 -0
  75. cdxml_toolkit/resolve/__init__.py +13 -0
  76. cdxml_toolkit/resolve/cas_resolver.py +430 -0
  77. cdxml_toolkit/resolve/chemscanner_abbreviations.json +28813 -0
  78. cdxml_toolkit/resolve/condensed_formula.py +493 -0
  79. cdxml_toolkit/resolve/jre_manager.py +195 -0
  80. cdxml_toolkit/resolve/reagent_abbreviations.json +1046 -0
  81. cdxml_toolkit/resolve/reagent_db.py +285 -0
  82. cdxml_toolkit/resolve/superatom_data.json +2856 -0
  83. cdxml_toolkit/resolve/superatom_table.py +146 -0
  84. cdxml_toolkit/text_formatting.py +298 -0
  85. cdxml_toolkit-0.5.0.dist-info/METADATA +318 -0
  86. cdxml_toolkit-0.5.0.dist-info/RECORD +91 -0
  87. cdxml_toolkit-0.5.0.dist-info/WHEEL +5 -0
  88. cdxml_toolkit-0.5.0.dist-info/entry_points.txt +17 -0
  89. cdxml_toolkit-0.5.0.dist-info/licenses/LICENSE +21 -0
  90. cdxml_toolkit-0.5.0.dist-info/licenses/NOTICE.md +37 -0
  91. cdxml_toolkit-0.5.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,267 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ render_scheme.py — CLI entry point for scheme → CDXML rendering.
4
+
5
+ Accepts both YAML and compact syntax input files.
6
+
7
+ Usage:
8
+ python experiments/scheme_dsl/render_scheme.py examples/simple_linear.yaml -o output.cdxml
9
+ python experiments/scheme_dsl/render_scheme.py examples/buchwald.yaml
10
+ python experiments/scheme_dsl/render_scheme.py scheme.txt --format compact
11
+ """
12
+
13
+ import argparse
14
+ import os
15
+ import sys
16
+
17
+ from .parser import SchemeParseError, parse_yaml
18
+ from .compact_parser import parse_compact_file, ParseError
19
+ from .scheme_yaml_writer import write_scheme_yaml
20
+ from .renderer import render, render_to_file
21
+
22
+ # File extensions that trigger compact syntax parsing
23
+ _COMPACT_EXTENSIONS = {".rxn", ".scheme", ".txt"}
24
+ _YAML_EXTENSIONS = {".yaml", ".yml"}
25
+
26
+
27
+ def _detect_format(input_path: str) -> str:
28
+ """Detect input format from file extension or content.
29
+
30
+ Returns "yaml" or "compact".
31
+ """
32
+ ext = os.path.splitext(input_path)[1].lower()
33
+ if ext in _YAML_EXTENSIONS:
34
+ return "yaml"
35
+ if ext in _COMPACT_EXTENSIONS:
36
+ return "compact"
37
+
38
+ # Fallback: sniff content — if first non-comment line contains -->, it's compact
39
+ try:
40
+ with open(input_path, "r", encoding="utf-8") as f:
41
+ for line in f:
42
+ stripped = line.strip()
43
+ if not stripped or stripped.startswith("#"):
44
+ continue
45
+ if "-->" in stripped or "..>" in stripped or "X>" in stripped or "x>" in stripped:
46
+ return "compact"
47
+ break
48
+ except OSError:
49
+ pass
50
+
51
+ return "yaml"
52
+
53
+
54
+ def main():
55
+ parser = argparse.ArgumentParser(
56
+ description="Render a reaction scheme (YAML, compact syntax, or JSON) to CDXML.",
57
+ )
58
+ parser.add_argument(
59
+ "input",
60
+ nargs="?",
61
+ default=None,
62
+ help="Input file (YAML or compact syntax)",
63
+ )
64
+ parser.add_argument(
65
+ "--from-json",
66
+ nargs="+",
67
+ default=None,
68
+ metavar="JSON",
69
+ help="Render from one or more reaction_parser JSON files",
70
+ )
71
+ parser.add_argument(
72
+ "-o", "--output",
73
+ default=None,
74
+ help="Output CDXML file (default: input stem + .cdxml)",
75
+ )
76
+ parser.add_argument(
77
+ "--format",
78
+ choices=["yaml", "compact"],
79
+ default=None,
80
+ help="Input format (auto-detected from extension if omitted)",
81
+ )
82
+ parser.add_argument(
83
+ "--layout",
84
+ default="auto",
85
+ help="Layout override for --from-json (default: auto)",
86
+ )
87
+ parser.add_argument(
88
+ "--no-run-arrows",
89
+ action="store_true",
90
+ help="Suppress run arrows (SM mass → product yield)",
91
+ )
92
+ parser.add_argument(
93
+ "-v", "--verbose",
94
+ action="store_true",
95
+ help="Print progress to stderr",
96
+ )
97
+ args = parser.parse_args()
98
+
99
+ # --- Handle --from-json mode ---
100
+ if args.from_json:
101
+ json_paths = args.from_json
102
+ for jp in json_paths:
103
+ if not os.path.exists(jp):
104
+ print(f"Error: JSON file not found: {jp}", file=sys.stderr)
105
+ sys.exit(1)
106
+
107
+ include_run = not args.no_run_arrows
108
+
109
+ if len(json_paths) == 1:
110
+ # Single JSON: existing behavior
111
+ json_path = json_paths[0]
112
+ output_path = args.output
113
+ if output_path is None:
114
+ stem = os.path.splitext(os.path.basename(json_path))[0]
115
+ output_path = os.path.join(
116
+ os.path.dirname(json_path) or ".", f"{stem}-scheme.cdxml")
117
+
118
+ yaml_path = os.path.splitext(output_path)[0] + ".yaml"
119
+ if args.verbose:
120
+ print(f"Writing scheme YAML: {json_path} -> {yaml_path}",
121
+ file=sys.stderr)
122
+
123
+ try:
124
+ write_scheme_yaml(
125
+ json_path, yaml_path,
126
+ layout=args.layout,
127
+ include_run_arrows=include_run,
128
+ )
129
+ except Exception as e:
130
+ print(f"YAML writer error: {e}", file=sys.stderr)
131
+ sys.exit(1)
132
+ else:
133
+ # Multiple JSONs: produce individual + merged
134
+ output_path = args.output
135
+ if output_path is None:
136
+ output_path = os.path.join(
137
+ os.path.dirname(json_paths[0]) or ".",
138
+ "merged-scheme.cdxml")
139
+
140
+ # Individual files
141
+ for jp in json_paths:
142
+ stem = os.path.splitext(os.path.basename(jp))[0]
143
+ ind_yaml = os.path.join(
144
+ os.path.dirname(jp) or ".", f"{stem}-scheme.yaml")
145
+ ind_cdxml = os.path.join(
146
+ os.path.dirname(jp) or ".", f"{stem}-scheme.cdxml")
147
+ try:
148
+ write_scheme_yaml(jp, ind_yaml, layout=args.layout,
149
+ include_run_arrows=include_run)
150
+ ind_scheme = parse_yaml(ind_yaml)
151
+ ind_dir = os.path.dirname(os.path.abspath(ind_yaml))
152
+ render_to_file(ind_scheme, ind_cdxml, yaml_dir=ind_dir)
153
+ if args.verbose:
154
+ print(f"Individual: {ind_cdxml}", file=sys.stderr)
155
+ except Exception as e:
156
+ print(f"Warning: individual render failed for {jp}: {e}",
157
+ file=sys.stderr)
158
+
159
+ # Merged YAML
160
+ yaml_path = os.path.splitext(output_path)[0] + ".yaml"
161
+ if args.verbose:
162
+ print(f"Writing merged YAML: {yaml_path}", file=sys.stderr)
163
+
164
+ try:
165
+ from .scheme_yaml_writer import write_merged_scheme_yaml
166
+ write_merged_scheme_yaml(
167
+ json_paths, yaml_path,
168
+ layout=args.layout,
169
+ include_run_arrows=include_run,
170
+ )
171
+ except Exception as e:
172
+ print(f"Merged YAML writer error: {e}", file=sys.stderr)
173
+ sys.exit(1)
174
+
175
+ # Render YAML → CDXML
176
+ if args.verbose:
177
+ print(f"Rendering: {yaml_path} -> {output_path}", file=sys.stderr)
178
+
179
+ try:
180
+ scheme = parse_yaml(yaml_path)
181
+ except SchemeParseError as e:
182
+ print(f"Parse error in generated YAML: {e}", file=sys.stderr)
183
+ sys.exit(1)
184
+
185
+ if args.verbose:
186
+ n_steps = len(scheme.steps) + sum(
187
+ len(s.steps) for s in scheme.sections)
188
+ print(
189
+ f" {len(scheme.structures)} structures, "
190
+ f"{n_steps} steps, "
191
+ f"layout={scheme.layout}",
192
+ file=sys.stderr,
193
+ )
194
+
195
+ try:
196
+ yaml_dir = os.path.dirname(os.path.abspath(yaml_path))
197
+ render_to_file(scheme, output_path, yaml_dir=yaml_dir)
198
+ except Exception as e:
199
+ print(f"Render error: {e}", file=sys.stderr)
200
+ sys.exit(1)
201
+
202
+ print(f"Written: {output_path}")
203
+ return
204
+
205
+ # --- Standard YAML/compact mode ---
206
+ if not args.input:
207
+ print("Error: input file required (or use --from-json)", file=sys.stderr)
208
+ sys.exit(1)
209
+
210
+ # Resolve input path
211
+ input_path = args.input
212
+ if not os.path.isabs(input_path):
213
+ # Try relative to CWD first, then relative to script dir
214
+ if not os.path.exists(input_path):
215
+ alt = os.path.join(os.path.dirname(__file__), input_path)
216
+ if os.path.exists(alt):
217
+ input_path = alt
218
+
219
+ if not os.path.exists(input_path):
220
+ print(f"Error: input file not found: {input_path}", file=sys.stderr)
221
+ sys.exit(1)
222
+
223
+ # Resolve output path
224
+ output_path = args.output
225
+ if output_path is None:
226
+ stem = os.path.splitext(os.path.basename(input_path))[0]
227
+ output_path = os.path.join(os.path.dirname(input_path), f"{stem}.cdxml")
228
+
229
+ # Detect format
230
+ fmt = args.format or _detect_format(input_path)
231
+
232
+ # Parse
233
+ if args.verbose:
234
+ print(f"Parsing {input_path} (format: {fmt})...", file=sys.stderr)
235
+ try:
236
+ if fmt == "compact":
237
+ scheme = parse_compact_file(input_path)
238
+ else:
239
+ scheme = parse_yaml(input_path)
240
+ except (SchemeParseError, ParseError) as e:
241
+ print(f"Parse error: {e}", file=sys.stderr)
242
+ sys.exit(1)
243
+
244
+ if args.verbose:
245
+ source_info = f", source={scheme.source}" if scheme.source else ""
246
+ print(
247
+ f" {len(scheme.structures)} structures, "
248
+ f"{len(scheme.steps)} steps, "
249
+ f"layout={scheme.layout}{source_info}",
250
+ file=sys.stderr,
251
+ )
252
+
253
+ # Render
254
+ if args.verbose:
255
+ print(f"Rendering to {output_path}...", file=sys.stderr)
256
+ try:
257
+ yaml_dir = os.path.dirname(os.path.abspath(input_path))
258
+ render_to_file(scheme, output_path, yaml_dir=yaml_dir)
259
+ except Exception as e:
260
+ print(f"Render error: {e}", file=sys.stderr)
261
+ sys.exit(1)
262
+
263
+ print(f"Written: {output_path}")
264
+
265
+
266
+ if __name__ == "__main__":
267
+ main()