cdxml-toolkit 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. cdxml_toolkit/__init__.py +18 -0
  2. cdxml_toolkit/_jre/__init__.py +2 -0
  3. cdxml_toolkit/_jre/temurin-21-jre-win-x64.zip +0 -0
  4. cdxml_toolkit/analysis/__init__.py +35 -0
  5. cdxml_toolkit/analysis/deterministic/__init__.py +12 -0
  6. cdxml_toolkit/analysis/deterministic/discover_experiment_files.py +413 -0
  7. cdxml_toolkit/analysis/deterministic/lab_book_formatter.py +701 -0
  8. cdxml_toolkit/analysis/deterministic/lcms_file_categorizer.py +928 -0
  9. cdxml_toolkit/analysis/deterministic/lcms_identifier.py +598 -0
  10. cdxml_toolkit/analysis/deterministic/mass_resolver.py +654 -0
  11. cdxml_toolkit/analysis/deterministic/multi_lcms_analyzer.py +1412 -0
  12. cdxml_toolkit/analysis/deterministic/procedure_writer.py +446 -0
  13. cdxml_toolkit/analysis/extract_nmr.py +47 -0
  14. cdxml_toolkit/analysis/format_procedure_entry.py +479 -0
  15. cdxml_toolkit/analysis/lcms_analyzer.py +1299 -0
  16. cdxml_toolkit/analysis/parse_analysis_file.py +134 -0
  17. cdxml_toolkit/cdxml_builder.py +920 -0
  18. cdxml_toolkit/cdxml_utils.py +342 -0
  19. cdxml_toolkit/chemdraw/__init__.py +5 -0
  20. cdxml_toolkit/chemdraw/_chemscript_server.py +562 -0
  21. cdxml_toolkit/chemdraw/cdx_converter.py +527 -0
  22. cdxml_toolkit/chemdraw/cdxml_to_image.py +262 -0
  23. cdxml_toolkit/chemdraw/cdxml_to_image_rdkit.py +296 -0
  24. cdxml_toolkit/chemdraw/chemscript_bridge.py +901 -0
  25. cdxml_toolkit/constants.py +304 -0
  26. cdxml_toolkit/coord_normalizer.py +438 -0
  27. cdxml_toolkit/deterministic_pipeline/__init__.py +6 -0
  28. cdxml_toolkit/deterministic_pipeline/legacy/__init__.py +5 -0
  29. cdxml_toolkit/deterministic_pipeline/legacy/eln_cdx_cleanup.py +509 -0
  30. cdxml_toolkit/deterministic_pipeline/legacy/eln_enrichment.py +1394 -0
  31. cdxml_toolkit/deterministic_pipeline/legacy/scheme_aligner.py +428 -0
  32. cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher.py +1337 -0
  33. cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher_v2.py +1340 -0
  34. cdxml_toolkit/deterministic_pipeline/scheme_reader_audit.py +931 -0
  35. cdxml_toolkit/deterministic_pipeline/scheme_reader_verify.py +1160 -0
  36. cdxml_toolkit/image/__init__.py +15 -0
  37. cdxml_toolkit/image/reaction_from_image.py +2103 -0
  38. cdxml_toolkit/image/structure_from_image.py +1711 -0
  39. cdxml_toolkit/layout/__init__.py +5 -0
  40. cdxml_toolkit/layout/alignment.py +1642 -0
  41. cdxml_toolkit/layout/reaction_cleanup.py +1002 -0
  42. cdxml_toolkit/layout/scheme_merger.py +2260 -0
  43. cdxml_toolkit/mcp_server/__init__.py +0 -0
  44. cdxml_toolkit/mcp_server/__main__.py +5 -0
  45. cdxml_toolkit/mcp_server/server.py +1567 -0
  46. cdxml_toolkit/naming/__init__.py +6 -0
  47. cdxml_toolkit/naming/aligned_namer.py +2342 -0
  48. cdxml_toolkit/naming/mol_builder.py +3722 -0
  49. cdxml_toolkit/naming/name_decomposer.py +2843 -0
  50. cdxml_toolkit/naming/reactions_datamol.json +2414 -0
  51. cdxml_toolkit/office/__init__.py +5 -0
  52. cdxml_toolkit/office/doc_from_template.py +722 -0
  53. cdxml_toolkit/office/ole_embedder.py +808 -0
  54. cdxml_toolkit/office/ole_extractor.py +272 -0
  55. cdxml_toolkit/perception/__init__.py +10 -0
  56. cdxml_toolkit/perception/compound_search.py +229 -0
  57. cdxml_toolkit/perception/eln_csv_parser.py +240 -0
  58. cdxml_toolkit/perception/rdf_parser.py +664 -0
  59. cdxml_toolkit/perception/reactant_heuristic.py +1045 -0
  60. cdxml_toolkit/perception/reaction_parser.py +2150 -0
  61. cdxml_toolkit/perception/scheme_reader.py +2948 -0
  62. cdxml_toolkit/perception/scheme_refine.py +1404 -0
  63. cdxml_toolkit/perception/scheme_segmenter.py +619 -0
  64. cdxml_toolkit/perception/spatial_assignment.py +1013 -0
  65. cdxml_toolkit/rdkit_utils.py +605 -0
  66. cdxml_toolkit/render/__init__.py +17 -0
  67. cdxml_toolkit/render/auto_layout.py +229 -0
  68. cdxml_toolkit/render/compact_parser.py +632 -0
  69. cdxml_toolkit/render/parser.py +706 -0
  70. cdxml_toolkit/render/render_scheme.py +267 -0
  71. cdxml_toolkit/render/renderer.py +2387 -0
  72. cdxml_toolkit/render/schema.py +90 -0
  73. cdxml_toolkit/render/scheme_maker.py +1043 -0
  74. cdxml_toolkit/render/scheme_yaml_writer.py +1487 -0
  75. cdxml_toolkit/resolve/__init__.py +13 -0
  76. cdxml_toolkit/resolve/cas_resolver.py +430 -0
  77. cdxml_toolkit/resolve/chemscanner_abbreviations.json +28813 -0
  78. cdxml_toolkit/resolve/condensed_formula.py +493 -0
  79. cdxml_toolkit/resolve/jre_manager.py +195 -0
  80. cdxml_toolkit/resolve/reagent_abbreviations.json +1046 -0
  81. cdxml_toolkit/resolve/reagent_db.py +285 -0
  82. cdxml_toolkit/resolve/superatom_data.json +2856 -0
  83. cdxml_toolkit/resolve/superatom_table.py +146 -0
  84. cdxml_toolkit/text_formatting.py +298 -0
  85. cdxml_toolkit-0.5.0.dist-info/METADATA +318 -0
  86. cdxml_toolkit-0.5.0.dist-info/RECORD +91 -0
  87. cdxml_toolkit-0.5.0.dist-info/WHEEL +5 -0
  88. cdxml_toolkit-0.5.0.dist-info/entry_points.txt +17 -0
  89. cdxml_toolkit-0.5.0.dist-info/licenses/LICENSE +21 -0
  90. cdxml_toolkit-0.5.0.dist-info/licenses/NOTICE.md +37 -0
  91. cdxml_toolkit-0.5.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,90 @@
1
+ """
2
+ schema.py — Dataclasses for the scheme DSL descriptor.
3
+
4
+ Represents the parsed content of a YAML scheme file. The LLM specifies
5
+ chemistry content and topology; the renderer handles all spatial layout.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from dataclasses import dataclass, field
11
+ from typing import Optional
12
+
13
+
14
+ @dataclass
15
+ class StructureRef:
16
+ """Reference to a chemical structure — resolved later by the renderer."""
17
+ id: str # user-assigned key (e.g. "ArBr")
18
+ smiles: Optional[str] = None # SMILES string
19
+ name: Optional[str] = None # compound name (for resolution)
20
+ file: Optional[str] = None # path to CDXML file
21
+ cdxml_id: Optional[int] = None # existing fragment ID
22
+ label: Optional[str] = None # compound number displayed below (e.g. "1")
23
+
24
+
25
+ @dataclass
26
+ class ArrowContent:
27
+ """Content placed above or below an arrow."""
28
+ structures: list[str] = field(default_factory=list) # refs to StructureRef ids
29
+ text: list[str] = field(default_factory=list) # condition text lines
30
+
31
+
32
+ @dataclass
33
+ class StepDescriptor:
34
+ """A single reaction step."""
35
+ substrates: list[str] # refs to StructureRef ids
36
+ products: list[str] # refs to StructureRef ids
37
+ above_arrow: Optional[ArrowContent] = None
38
+ below_arrow: Optional[ArrowContent] = None
39
+ yield_: Optional[str] = None
40
+ number: Optional[int] = None # for numbered steps
41
+ id: Optional[str] = None
42
+ arrow_style: str = "solid" # "solid", "dashed", "failed" (X on arrow)
43
+
44
+
45
+ @dataclass
46
+ class RunArrowEntry:
47
+ """A single run (one scale) of a reaction step."""
48
+ input_label: str # e.g. "2.15 g"
49
+ output_label: str # e.g. "1.60 g, 72% yield"
50
+ note: Optional[str] = None # per-run annotation, e.g. "HATU (1.2 eq)"
51
+
52
+
53
+ @dataclass
54
+ class StepRunArrows:
55
+ """Run arrows for a specific step (may have multiple scales)."""
56
+ step: int # 1-indexed step number
57
+ runs: list[RunArrowEntry] = field(default_factory=list)
58
+
59
+
60
+ VALID_LAYOUTS = frozenset({
61
+ "linear", "sequential", "divergent", "stacked-rows",
62
+ "numbered-parallel", "convergent",
63
+ })
64
+
65
+ VALID_WRAPS = frozenset({"repeat", "serpentine", "none"})
66
+
67
+ VALID_ARROW_STYLES = frozenset({"solid", "dashed", "failed"})
68
+
69
+
70
+ @dataclass
71
+ class SectionDescriptor:
72
+ """A section in a stacked-rows layout."""
73
+ label: Optional[str] = None # "(i)", "(a)", etc.
74
+ steps: list[StepDescriptor] = field(default_factory=list)
75
+ layout: str = "linear" # each section's internal layout
76
+
77
+
78
+ @dataclass
79
+ class SchemeDescriptor:
80
+ """Complete scheme description."""
81
+ source: Optional[str] = None # path to reaction_parser JSON file
82
+ structures: dict[str, StructureRef] = field(default_factory=dict)
83
+ steps: list[StepDescriptor] = field(default_factory=list)
84
+ layout: str = "linear" # layout pattern keyword
85
+ wrap: str = "repeat" # "repeat", "serpentine", "none"
86
+ steps_per_row: Optional[int] = None # auto-computed if omitted
87
+ title: Optional[str] = None
88
+ run_arrows: list[StepRunArrows] = field(default_factory=list)
89
+ condition_key: Optional[dict[str, str]] = None # letter conditions: {"a": "..."}
90
+ sections: list[SectionDescriptor] = field(default_factory=list)