cdxml-toolkit 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cdxml_toolkit/__init__.py +18 -0
- cdxml_toolkit/_jre/__init__.py +2 -0
- cdxml_toolkit/_jre/temurin-21-jre-win-x64.zip +0 -0
- cdxml_toolkit/analysis/__init__.py +35 -0
- cdxml_toolkit/analysis/deterministic/__init__.py +12 -0
- cdxml_toolkit/analysis/deterministic/discover_experiment_files.py +413 -0
- cdxml_toolkit/analysis/deterministic/lab_book_formatter.py +701 -0
- cdxml_toolkit/analysis/deterministic/lcms_file_categorizer.py +928 -0
- cdxml_toolkit/analysis/deterministic/lcms_identifier.py +598 -0
- cdxml_toolkit/analysis/deterministic/mass_resolver.py +654 -0
- cdxml_toolkit/analysis/deterministic/multi_lcms_analyzer.py +1412 -0
- cdxml_toolkit/analysis/deterministic/procedure_writer.py +446 -0
- cdxml_toolkit/analysis/extract_nmr.py +47 -0
- cdxml_toolkit/analysis/format_procedure_entry.py +479 -0
- cdxml_toolkit/analysis/lcms_analyzer.py +1299 -0
- cdxml_toolkit/analysis/parse_analysis_file.py +134 -0
- cdxml_toolkit/cdxml_builder.py +920 -0
- cdxml_toolkit/cdxml_utils.py +342 -0
- cdxml_toolkit/chemdraw/__init__.py +5 -0
- cdxml_toolkit/chemdraw/_chemscript_server.py +562 -0
- cdxml_toolkit/chemdraw/cdx_converter.py +527 -0
- cdxml_toolkit/chemdraw/cdxml_to_image.py +262 -0
- cdxml_toolkit/chemdraw/cdxml_to_image_rdkit.py +296 -0
- cdxml_toolkit/chemdraw/chemscript_bridge.py +901 -0
- cdxml_toolkit/constants.py +304 -0
- cdxml_toolkit/coord_normalizer.py +438 -0
- cdxml_toolkit/deterministic_pipeline/__init__.py +6 -0
- cdxml_toolkit/deterministic_pipeline/legacy/__init__.py +5 -0
- cdxml_toolkit/deterministic_pipeline/legacy/eln_cdx_cleanup.py +509 -0
- cdxml_toolkit/deterministic_pipeline/legacy/eln_enrichment.py +1394 -0
- cdxml_toolkit/deterministic_pipeline/legacy/scheme_aligner.py +428 -0
- cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher.py +1337 -0
- cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher_v2.py +1340 -0
- cdxml_toolkit/deterministic_pipeline/scheme_reader_audit.py +931 -0
- cdxml_toolkit/deterministic_pipeline/scheme_reader_verify.py +1160 -0
- cdxml_toolkit/image/__init__.py +15 -0
- cdxml_toolkit/image/reaction_from_image.py +2103 -0
- cdxml_toolkit/image/structure_from_image.py +1711 -0
- cdxml_toolkit/layout/__init__.py +5 -0
- cdxml_toolkit/layout/alignment.py +1642 -0
- cdxml_toolkit/layout/reaction_cleanup.py +1002 -0
- cdxml_toolkit/layout/scheme_merger.py +2260 -0
- cdxml_toolkit/mcp_server/__init__.py +0 -0
- cdxml_toolkit/mcp_server/__main__.py +5 -0
- cdxml_toolkit/mcp_server/server.py +1567 -0
- cdxml_toolkit/naming/__init__.py +6 -0
- cdxml_toolkit/naming/aligned_namer.py +2342 -0
- cdxml_toolkit/naming/mol_builder.py +3722 -0
- cdxml_toolkit/naming/name_decomposer.py +2843 -0
- cdxml_toolkit/naming/reactions_datamol.json +2414 -0
- cdxml_toolkit/office/__init__.py +5 -0
- cdxml_toolkit/office/doc_from_template.py +722 -0
- cdxml_toolkit/office/ole_embedder.py +808 -0
- cdxml_toolkit/office/ole_extractor.py +272 -0
- cdxml_toolkit/perception/__init__.py +10 -0
- cdxml_toolkit/perception/compound_search.py +229 -0
- cdxml_toolkit/perception/eln_csv_parser.py +240 -0
- cdxml_toolkit/perception/rdf_parser.py +664 -0
- cdxml_toolkit/perception/reactant_heuristic.py +1045 -0
- cdxml_toolkit/perception/reaction_parser.py +2150 -0
- cdxml_toolkit/perception/scheme_reader.py +2948 -0
- cdxml_toolkit/perception/scheme_refine.py +1404 -0
- cdxml_toolkit/perception/scheme_segmenter.py +619 -0
- cdxml_toolkit/perception/spatial_assignment.py +1013 -0
- cdxml_toolkit/rdkit_utils.py +605 -0
- cdxml_toolkit/render/__init__.py +17 -0
- cdxml_toolkit/render/auto_layout.py +229 -0
- cdxml_toolkit/render/compact_parser.py +632 -0
- cdxml_toolkit/render/parser.py +706 -0
- cdxml_toolkit/render/render_scheme.py +267 -0
- cdxml_toolkit/render/renderer.py +2387 -0
- cdxml_toolkit/render/schema.py +90 -0
- cdxml_toolkit/render/scheme_maker.py +1043 -0
- cdxml_toolkit/render/scheme_yaml_writer.py +1487 -0
- cdxml_toolkit/resolve/__init__.py +13 -0
- cdxml_toolkit/resolve/cas_resolver.py +430 -0
- cdxml_toolkit/resolve/chemscanner_abbreviations.json +28813 -0
- cdxml_toolkit/resolve/condensed_formula.py +493 -0
- cdxml_toolkit/resolve/jre_manager.py +195 -0
- cdxml_toolkit/resolve/reagent_abbreviations.json +1046 -0
- cdxml_toolkit/resolve/reagent_db.py +285 -0
- cdxml_toolkit/resolve/superatom_data.json +2856 -0
- cdxml_toolkit/resolve/superatom_table.py +146 -0
- cdxml_toolkit/text_formatting.py +298 -0
- cdxml_toolkit-0.5.0.dist-info/METADATA +318 -0
- cdxml_toolkit-0.5.0.dist-info/RECORD +91 -0
- cdxml_toolkit-0.5.0.dist-info/WHEEL +5 -0
- cdxml_toolkit-0.5.0.dist-info/entry_points.txt +17 -0
- cdxml_toolkit-0.5.0.dist-info/licenses/LICENSE +21 -0
- cdxml_toolkit-0.5.0.dist-info/licenses/NOTICE.md +37 -0
- cdxml_toolkit-0.5.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"""
|
|
2
|
+
schema.py — Dataclasses for the scheme DSL descriptor.
|
|
3
|
+
|
|
4
|
+
Represents the parsed content of a YAML scheme file. The LLM specifies
|
|
5
|
+
chemistry content and topology; the renderer handles all spatial layout.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
from typing import Optional
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class StructureRef:
|
|
16
|
+
"""Reference to a chemical structure — resolved later by the renderer."""
|
|
17
|
+
id: str # user-assigned key (e.g. "ArBr")
|
|
18
|
+
smiles: Optional[str] = None # SMILES string
|
|
19
|
+
name: Optional[str] = None # compound name (for resolution)
|
|
20
|
+
file: Optional[str] = None # path to CDXML file
|
|
21
|
+
cdxml_id: Optional[int] = None # existing fragment ID
|
|
22
|
+
label: Optional[str] = None # compound number displayed below (e.g. "1")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class ArrowContent:
|
|
27
|
+
"""Content placed above or below an arrow."""
|
|
28
|
+
structures: list[str] = field(default_factory=list) # refs to StructureRef ids
|
|
29
|
+
text: list[str] = field(default_factory=list) # condition text lines
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class StepDescriptor:
|
|
34
|
+
"""A single reaction step."""
|
|
35
|
+
substrates: list[str] # refs to StructureRef ids
|
|
36
|
+
products: list[str] # refs to StructureRef ids
|
|
37
|
+
above_arrow: Optional[ArrowContent] = None
|
|
38
|
+
below_arrow: Optional[ArrowContent] = None
|
|
39
|
+
yield_: Optional[str] = None
|
|
40
|
+
number: Optional[int] = None # for numbered steps
|
|
41
|
+
id: Optional[str] = None
|
|
42
|
+
arrow_style: str = "solid" # "solid", "dashed", "failed" (X on arrow)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass
|
|
46
|
+
class RunArrowEntry:
|
|
47
|
+
"""A single run (one scale) of a reaction step."""
|
|
48
|
+
input_label: str # e.g. "2.15 g"
|
|
49
|
+
output_label: str # e.g. "1.60 g, 72% yield"
|
|
50
|
+
note: Optional[str] = None # per-run annotation, e.g. "HATU (1.2 eq)"
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dataclass
|
|
54
|
+
class StepRunArrows:
|
|
55
|
+
"""Run arrows for a specific step (may have multiple scales)."""
|
|
56
|
+
step: int # 1-indexed step number
|
|
57
|
+
runs: list[RunArrowEntry] = field(default_factory=list)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
VALID_LAYOUTS = frozenset({
|
|
61
|
+
"linear", "sequential", "divergent", "stacked-rows",
|
|
62
|
+
"numbered-parallel", "convergent",
|
|
63
|
+
})
|
|
64
|
+
|
|
65
|
+
VALID_WRAPS = frozenset({"repeat", "serpentine", "none"})
|
|
66
|
+
|
|
67
|
+
VALID_ARROW_STYLES = frozenset({"solid", "dashed", "failed"})
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@dataclass
|
|
71
|
+
class SectionDescriptor:
|
|
72
|
+
"""A section in a stacked-rows layout."""
|
|
73
|
+
label: Optional[str] = None # "(i)", "(a)", etc.
|
|
74
|
+
steps: list[StepDescriptor] = field(default_factory=list)
|
|
75
|
+
layout: str = "linear" # each section's internal layout
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@dataclass
|
|
79
|
+
class SchemeDescriptor:
|
|
80
|
+
"""Complete scheme description."""
|
|
81
|
+
source: Optional[str] = None # path to reaction_parser JSON file
|
|
82
|
+
structures: dict[str, StructureRef] = field(default_factory=dict)
|
|
83
|
+
steps: list[StepDescriptor] = field(default_factory=list)
|
|
84
|
+
layout: str = "linear" # layout pattern keyword
|
|
85
|
+
wrap: str = "repeat" # "repeat", "serpentine", "none"
|
|
86
|
+
steps_per_row: Optional[int] = None # auto-computed if omitted
|
|
87
|
+
title: Optional[str] = None
|
|
88
|
+
run_arrows: list[StepRunArrows] = field(default_factory=list)
|
|
89
|
+
condition_key: Optional[dict[str, str]] = None # letter conditions: {"a": "..."}
|
|
90
|
+
sections: list[SectionDescriptor] = field(default_factory=list)
|