cdxml-toolkit 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. cdxml_toolkit/__init__.py +18 -0
  2. cdxml_toolkit/_jre/__init__.py +2 -0
  3. cdxml_toolkit/_jre/temurin-21-jre-win-x64.zip +0 -0
  4. cdxml_toolkit/analysis/__init__.py +35 -0
  5. cdxml_toolkit/analysis/deterministic/__init__.py +12 -0
  6. cdxml_toolkit/analysis/deterministic/discover_experiment_files.py +413 -0
  7. cdxml_toolkit/analysis/deterministic/lab_book_formatter.py +701 -0
  8. cdxml_toolkit/analysis/deterministic/lcms_file_categorizer.py +928 -0
  9. cdxml_toolkit/analysis/deterministic/lcms_identifier.py +598 -0
  10. cdxml_toolkit/analysis/deterministic/mass_resolver.py +654 -0
  11. cdxml_toolkit/analysis/deterministic/multi_lcms_analyzer.py +1412 -0
  12. cdxml_toolkit/analysis/deterministic/procedure_writer.py +446 -0
  13. cdxml_toolkit/analysis/extract_nmr.py +47 -0
  14. cdxml_toolkit/analysis/format_procedure_entry.py +479 -0
  15. cdxml_toolkit/analysis/lcms_analyzer.py +1299 -0
  16. cdxml_toolkit/analysis/parse_analysis_file.py +134 -0
  17. cdxml_toolkit/cdxml_builder.py +920 -0
  18. cdxml_toolkit/cdxml_utils.py +342 -0
  19. cdxml_toolkit/chemdraw/__init__.py +5 -0
  20. cdxml_toolkit/chemdraw/_chemscript_server.py +562 -0
  21. cdxml_toolkit/chemdraw/cdx_converter.py +527 -0
  22. cdxml_toolkit/chemdraw/cdxml_to_image.py +262 -0
  23. cdxml_toolkit/chemdraw/cdxml_to_image_rdkit.py +296 -0
  24. cdxml_toolkit/chemdraw/chemscript_bridge.py +901 -0
  25. cdxml_toolkit/constants.py +304 -0
  26. cdxml_toolkit/coord_normalizer.py +438 -0
  27. cdxml_toolkit/deterministic_pipeline/__init__.py +6 -0
  28. cdxml_toolkit/deterministic_pipeline/legacy/__init__.py +5 -0
  29. cdxml_toolkit/deterministic_pipeline/legacy/eln_cdx_cleanup.py +509 -0
  30. cdxml_toolkit/deterministic_pipeline/legacy/eln_enrichment.py +1394 -0
  31. cdxml_toolkit/deterministic_pipeline/legacy/scheme_aligner.py +428 -0
  32. cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher.py +1337 -0
  33. cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher_v2.py +1340 -0
  34. cdxml_toolkit/deterministic_pipeline/scheme_reader_audit.py +931 -0
  35. cdxml_toolkit/deterministic_pipeline/scheme_reader_verify.py +1160 -0
  36. cdxml_toolkit/image/__init__.py +15 -0
  37. cdxml_toolkit/image/reaction_from_image.py +2103 -0
  38. cdxml_toolkit/image/structure_from_image.py +1711 -0
  39. cdxml_toolkit/layout/__init__.py +5 -0
  40. cdxml_toolkit/layout/alignment.py +1642 -0
  41. cdxml_toolkit/layout/reaction_cleanup.py +1002 -0
  42. cdxml_toolkit/layout/scheme_merger.py +2260 -0
  43. cdxml_toolkit/mcp_server/__init__.py +0 -0
  44. cdxml_toolkit/mcp_server/__main__.py +5 -0
  45. cdxml_toolkit/mcp_server/server.py +1567 -0
  46. cdxml_toolkit/naming/__init__.py +6 -0
  47. cdxml_toolkit/naming/aligned_namer.py +2342 -0
  48. cdxml_toolkit/naming/mol_builder.py +3722 -0
  49. cdxml_toolkit/naming/name_decomposer.py +2843 -0
  50. cdxml_toolkit/naming/reactions_datamol.json +2414 -0
  51. cdxml_toolkit/office/__init__.py +5 -0
  52. cdxml_toolkit/office/doc_from_template.py +722 -0
  53. cdxml_toolkit/office/ole_embedder.py +808 -0
  54. cdxml_toolkit/office/ole_extractor.py +272 -0
  55. cdxml_toolkit/perception/__init__.py +10 -0
  56. cdxml_toolkit/perception/compound_search.py +229 -0
  57. cdxml_toolkit/perception/eln_csv_parser.py +240 -0
  58. cdxml_toolkit/perception/rdf_parser.py +664 -0
  59. cdxml_toolkit/perception/reactant_heuristic.py +1045 -0
  60. cdxml_toolkit/perception/reaction_parser.py +2150 -0
  61. cdxml_toolkit/perception/scheme_reader.py +2948 -0
  62. cdxml_toolkit/perception/scheme_refine.py +1404 -0
  63. cdxml_toolkit/perception/scheme_segmenter.py +619 -0
  64. cdxml_toolkit/perception/spatial_assignment.py +1013 -0
  65. cdxml_toolkit/rdkit_utils.py +605 -0
  66. cdxml_toolkit/render/__init__.py +17 -0
  67. cdxml_toolkit/render/auto_layout.py +229 -0
  68. cdxml_toolkit/render/compact_parser.py +632 -0
  69. cdxml_toolkit/render/parser.py +706 -0
  70. cdxml_toolkit/render/render_scheme.py +267 -0
  71. cdxml_toolkit/render/renderer.py +2387 -0
  72. cdxml_toolkit/render/schema.py +90 -0
  73. cdxml_toolkit/render/scheme_maker.py +1043 -0
  74. cdxml_toolkit/render/scheme_yaml_writer.py +1487 -0
  75. cdxml_toolkit/resolve/__init__.py +13 -0
  76. cdxml_toolkit/resolve/cas_resolver.py +430 -0
  77. cdxml_toolkit/resolve/chemscanner_abbreviations.json +28813 -0
  78. cdxml_toolkit/resolve/condensed_formula.py +493 -0
  79. cdxml_toolkit/resolve/jre_manager.py +195 -0
  80. cdxml_toolkit/resolve/reagent_abbreviations.json +1046 -0
  81. cdxml_toolkit/resolve/reagent_db.py +285 -0
  82. cdxml_toolkit/resolve/superatom_data.json +2856 -0
  83. cdxml_toolkit/resolve/superatom_table.py +146 -0
  84. cdxml_toolkit/text_formatting.py +298 -0
  85. cdxml_toolkit-0.5.0.dist-info/METADATA +318 -0
  86. cdxml_toolkit-0.5.0.dist-info/RECORD +91 -0
  87. cdxml_toolkit-0.5.0.dist-info/WHEEL +5 -0
  88. cdxml_toolkit-0.5.0.dist-info/entry_points.txt +17 -0
  89. cdxml_toolkit-0.5.0.dist-info/licenses/LICENSE +21 -0
  90. cdxml_toolkit-0.5.0.dist-info/licenses/NOTICE.md +37 -0
  91. cdxml_toolkit-0.5.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,304 @@
1
+ """
2
+ constants.py -- Centralized constants for chem-tools v0.3.
3
+
4
+ All hardcoded magic numbers that were previously scattered across individual
5
+ tool scripts are collected here. Each constant has a comment noting its
6
+ purpose and original source file(s).
7
+
8
+ Sections:
9
+ 1. ACS Document 1996 style constants
10
+ 2. ACS_STYLE dict and CDXML_HEADER template string
11
+ 3. LCMS analysis constants
12
+ 4. Mass matching constants
13
+ 5. Layout constants (reaction_cleanup gaps)
14
+ 6. Image / structure constants
15
+ """
16
+
17
+ # ============================================================================
18
+ # 1. ACS Document 1996 style constants
19
+ # ============================================================================
20
+
21
+ # Bond length -- float for geometry calculations
22
+ # Originally in: alignment.py, scheme_polisher_v2.py, coord_normalizer.py,
23
+ # reaction_from_image.py, reaction_cleanup.py (~line 772), scheme_aligner.py (~line 146)
24
+ ACS_BOND_LENGTH = 14.40
25
+
26
+ # Bond length -- string for CDXML XML attributes
27
+ # Originally in: cdxml_builder.py, chemscript_bridge.py, eln_enrichment.py,
28
+ # reactant_heuristic.py, reaction_from_image.py (CDXML header)
29
+ ACS_BOND_LENGTH_STR = "14.40"
30
+
31
+ # Chain angle -- float for calculations
32
+ # Originally in: cdxml_builder.py, chemscript_bridge.py
33
+ ACS_CHAIN_ANGLE = 120
34
+
35
+ # Chain angle -- string for CDXML XML attributes
36
+ # Originally in: cdxml_builder.py, chemscript_bridge.py
37
+ ACS_CHAIN_ANGLE_STR = "120"
38
+
39
+ # Font table ID for Arial (ChemDraw font table index)
40
+ # Originally in: cdxml_builder.py, scheme_polisher_v2.py
41
+ ACS_LABEL_FONT = "3"
42
+
43
+ # Label size in points (atom labels)
44
+ # Originally in: cdxml_builder.py, scheme_polisher_v2.py
45
+ ACS_LABEL_SIZE = "10"
46
+
47
+ # Label face: 96 = bold (ChemDraw encoding)
48
+ # Originally in: cdxml_builder.py, scheme_polisher_v2.py
49
+ ACS_LABEL_FACE = "96"
50
+
51
+ # Caption size in points (reaction conditions text)
52
+ # Originally in: cdxml_builder.py
53
+ ACS_CAPTION_SIZE = "10"
54
+
55
+ # Caption face: 0 = plain
56
+ # Originally in: cdxml_builder.py, chemscript_bridge.py
57
+ ACS_CAPTION_FACE = "0"
58
+
59
+ # Line width in points
60
+ # Originally in: cdxml_builder.py, chemscript_bridge.py
61
+ ACS_LINE_WIDTH = "0.60"
62
+
63
+ # Bold bond width in points
64
+ # Originally in: cdxml_builder.py, chemscript_bridge.py, scheme_polisher_v2.py
65
+ ACS_BOLD_WIDTH = "2"
66
+
67
+ # Bond spacing (percentage, ChemDraw internal units)
68
+ # Originally in: cdxml_builder.py, scheme_polisher_v2.py
69
+ ACS_BOND_SPACING = "18"
70
+
71
+ # Hash spacing (dashed bond dash gap) in points
72
+ # Originally in: cdxml_builder.py, chemscript_bridge.py, scheme_polisher_v2.py
73
+ ACS_HASH_SPACING = "2.50"
74
+
75
+ # Margin width in points
76
+ # Originally in: cdxml_builder.py, chemscript_bridge.py, scheme_polisher_v2.py
77
+ ACS_MARGIN_WIDTH = "1.60"
78
+
79
+
80
+ # ============================================================================
81
+ # 2. ACS_STYLE dict and CDXML_HEADER template
82
+ # ============================================================================
83
+
84
+ # Complete ACS Document 1996 style dict -- all values are strings for direct
85
+ # use as XML attributes. Superset of chemscript_bridge.ACS_STYLE_ATTRS and
86
+ # scheme_polisher_v2.ACS_SETTINGS.
87
+ # Originally in: chemscript_bridge.py (ACS_STYLE_ATTRS), scheme_polisher_v2.py (ACS_SETTINGS)
88
+ ACS_STYLE = {
89
+ "BondLength": ACS_BOND_LENGTH_STR,
90
+ "ChainAngle": ACS_CHAIN_ANGLE_STR,
91
+ "BoldWidth": ACS_BOLD_WIDTH,
92
+ "LineWidth": ACS_LINE_WIDTH,
93
+ "MarginWidth": ACS_MARGIN_WIDTH,
94
+ "HashSpacing": ACS_HASH_SPACING,
95
+ "BondSpacing": ACS_BOND_SPACING,
96
+ "LabelFont": ACS_LABEL_FONT,
97
+ "LabelSize": ACS_LABEL_SIZE,
98
+ "LabelFace": ACS_LABEL_FACE,
99
+ "CaptionFont": ACS_LABEL_FONT,
100
+ "CaptionSize": ACS_CAPTION_SIZE,
101
+ "CaptionFace": ACS_CAPTION_FACE,
102
+ }
103
+
104
+ # Full CDXML document header template with ACS Document 1996 style.
105
+ # Contains {bbox} placeholder for the document bounding box.
106
+ # Originally in: cdxml_builder.py (_CDXML_HEADER), reaction_from_image.py (_CDXML_HEADER)
107
+ CDXML_HEADER = """\
108
+ <?xml version="1.0" encoding="UTF-8" ?>
109
+ <!DOCTYPE CDXML SYSTEM "http://www.cambridgesoft.com/xml/cdxml.dtd" >
110
+ <CDXML
111
+ CreationProgram="ChemDraw 16.0.0.82"
112
+ BoundingBox="{bbox}"
113
+ WindowPosition="-2147483648 -2147483648"
114
+ WindowSize="-2147483648 -2147483648"
115
+ FractionalWidths="yes"
116
+ InterpretChemically="yes"
117
+ ShowAtomQuery="yes"
118
+ ShowAtomStereo="no"
119
+ ShowAtomEnhancedStereo="yes"
120
+ ShowAtomNumber="no"
121
+ ShowResidueID="no"
122
+ ShowBondQuery="yes"
123
+ ShowBondRxn="yes"
124
+ ShowBondStereo="no"
125
+ ShowTerminalCarbonLabels="no"
126
+ ShowNonTerminalCarbonLabels="no"
127
+ HideImplicitHydrogens="no"
128
+ LabelFont="{label_font}"
129
+ LabelSize="{label_size}"
130
+ LabelFace="{label_face}"
131
+ CaptionFont="{label_font}"
132
+ CaptionSize="{caption_size}"
133
+ HashSpacing="{hash_spacing}"
134
+ MarginWidth="{margin_width}"
135
+ LineWidth="{line_width}"
136
+ BoldWidth="{bold_width}"
137
+ BondLength="{bond_length}"
138
+ BondSpacing="{bond_spacing}"
139
+ ChainAngle="{chain_angle}"
140
+ LabelJustification="Auto"
141
+ CaptionJustification="Left"
142
+ AminoAcidTermini="HOH"
143
+ ShowSequenceTermini="yes"
144
+ ShowSequenceBonds="yes"
145
+ ResidueWrapCount="40"
146
+ ResidueBlockCount="10"
147
+ ResidueZigZag="yes"
148
+ NumberResidueBlocks="no"
149
+ PrintMargins="36 36 36 36"
150
+ ChemPropName=""
151
+ ChemPropFormula="Chemical Formula: "
152
+ ChemPropExactMass="Exact Mass: "
153
+ ChemPropMolWt="Molecular Weight: "
154
+ ChemPropMOverZ="m/z: "
155
+ ChemPropAnalysis="Elemental Analysis: "
156
+ ChemPropBoilingPt="Boiling Point: "
157
+ ChemPropMeltingPt="Melting Point: "
158
+ ChemPropCritTemp="Critical Temp: "
159
+ ChemPropCritPres="Critical Pres: "
160
+ ChemPropCritVol="Critical Vol: "
161
+ ChemPropGibbs="Gibbs Energy: "
162
+ ChemPropLogP="Log P: "
163
+ ChemPropMR="MR: "
164
+ ChemPropHenry="Henry&apos;s Law: "
165
+ ChemPropEForm="Heat of Form: "
166
+ ChemProptPSA="tPSA: "
167
+ ChemPropCLogP="CLogP: "
168
+ ChemPropCMR="CMR: "
169
+ ChemPropLogS="LogS: "
170
+ ChemPropPKa="pKa: "
171
+ ChemPropID=""
172
+ color="0"
173
+ bgcolor="1"
174
+ RxnAutonumberStart="1"
175
+ RxnAutonumberConditions="no"
176
+ RxnAutonumberStyle="Roman"
177
+ RxnAutonumberFormat="(#)"
178
+ ><colortable>
179
+ <color r="1" g="1" b="1"/>
180
+ <color r="0" g="0" b="0"/>
181
+ <color r="1" g="0" b="0"/>
182
+ <color r="1" g="1" b="0"/>
183
+ <color r="0" g="1" b="0"/>
184
+ <color r="0" g="1" b="1"/>
185
+ <color r="0" g="0" b="1"/>
186
+ <color r="1" g="0" b="1"/>
187
+ </colortable><fonttable>
188
+ <font id="{label_font}" charset="iso-8859-1" name="Arial"/>
189
+ </fonttable>"""
190
+
191
+ # Minimal CDXML wrapper for single-fragment operations (ChemScript, etc.)
192
+ # Originally in: alignment.py (sp_fragment_to_cdxml), eln_enrichment.py, reactant_heuristic.py
193
+ CDXML_MINIMAL_HEADER = (
194
+ '<?xml version="1.0" encoding="UTF-8" ?>\n'
195
+ '<!DOCTYPE CDXML SYSTEM "http://www.cambridgesoft.com/xml/cdxml.dtd" >\n'
196
+ '<CDXML BondLength="' + ACS_BOND_LENGTH_STR + '">'
197
+ )
198
+
199
+ # CDXML closing tag
200
+ # Originally in: cdxml_builder.py, reaction_from_image.py
201
+ CDXML_FOOTER = "</CDXML>"
202
+
203
+
204
+ # ============================================================================
205
+ # 3. LCMS analysis constants
206
+ # ============================================================================
207
+
208
+ # Default RT matching tolerance in minutes for cross-file peak matching
209
+ # Originally in: multi_lcms_analyzer.py (--rt-tolerance default)
210
+ LCMS_RT_TOLERANCE = 0.02
211
+
212
+ # Default m/z clustering tolerance in Da for ion merging
213
+ # Originally in: multi_lcms_analyzer.py (--mz-tolerance default)
214
+ LCMS_MZ_TOLERANCE = 0.5
215
+
216
+ # Fraction change threshold for increasing/decreasing trend classification
217
+ # Originally in: multi_lcms_analyzer.py (--trend-threshold default)
218
+ LCMS_TREND_THRESHOLD = 0.2
219
+
220
+ # Default minimum area% for a compound to appear in the reaction summary
221
+ # Originally in: multi_lcms_analyzer.py (--min-summary-area default)
222
+ LCMS_MIN_SUMMARY_AREA = 2.0
223
+
224
+ # Column boundary x-coordinate (half of 612pt letter page width) for
225
+ # two-column MS/UV panel parsing in MassLynx PDF reports.
226
+ # Originally in: lcms_analyzer.py (computed as page_width / 2.0)
227
+ LCMS_COLUMN_BOUNDARY = 306.0
228
+
229
+ # MS axis tick values to exclude when extracting m/z labels
230
+ # Originally in: lcms_analyzer.py (_MS_AXIS_TICKS)
231
+ LCMS_MS_AXIS_TICKS = {500.0, 1000.0}
232
+
233
+ # UV axis tick values to exclude when extracting wavelength labels
234
+ # Originally in: lcms_analyzer.py (_UV_AXIS_TICKS)
235
+ LCMS_UV_AXIS_TICKS = {150.0, 200.0, 250.0, 300.0, 350.0, 400.0}
236
+
237
+ # UV wavelength valid range (nm) for lambda-max extraction
238
+ # Originally in: lcms_analyzer.py (_parse_uv_from_words, line ~408)
239
+ LCMS_UV_WAVELENGTH_MIN = 150.0
240
+ LCMS_UV_WAVELENGTH_MAX = 400.0
241
+
242
+
243
+ # ============================================================================
244
+ # 4. Mass matching constants
245
+ # ============================================================================
246
+
247
+ # MW tolerance in Da for matching CSV reagents to scheme fragments
248
+ # Originally in: eln_enrichment.py (best_delta threshold, lines ~306 and ~347)
249
+ MW_MATCH_TOLERANCE = 2.0
250
+
251
+ # Loose MW tolerance in Da for matching substrate fragment to CSV row
252
+ # Originally in: eln_enrichment.py (substrate MW match, line ~689)
253
+ MW_MATCH_TOLERANCE_LOOSE = 5.0
254
+
255
+ # MW tolerance in Da for matching species to CSV rows in procedure_writer
256
+ # Originally in: procedure_writer.py (MASS_TOLERANCE, line ~69)
257
+ MASS_TOLERANCE = 1.5
258
+
259
+ # Minimum area% for a peak to be reported in LCMS characterization/notes
260
+ # Originally in: procedure_writer.py (MIN_REPORT_AREA_PCT, line ~70)
261
+ MIN_REPORT_AREA_PCT = 20.0
262
+
263
+ # Minimum area% for an unidentified compound to be counted as "significant"
264
+ # Originally in: procedure_writer.py (hardcoded 2.0 in tracking summary, line ~1990)
265
+ MIN_SIGNIFICANT_AREA = 2.0
266
+
267
+
268
+ # ============================================================================
269
+ # 5. Layout constants (from reaction_cleanup.py)
270
+ # ============================================================================
271
+
272
+ # Gap in points from arrow to bottom of above-arrow objects (base case)
273
+ # Originally in: reaction_cleanup.py (ABOVE_GAP in most approaches, line ~457)
274
+ LAYOUT_ABOVE_GAP = 8.0
275
+
276
+ # Gap in points from arrow to top of below-arrow objects
277
+ # Originally in: reaction_cleanup.py (BELOW_GAP in all approaches, line ~458)
278
+ LAYOUT_BELOW_GAP = 4.0
279
+
280
+ # Extra gap for fragments with hanging NH/PH labels (N or P at bottom with <=2 bonds)
281
+ # Originally in: reaction_cleanup.py (HANGING_GAP, line ~899)
282
+ LAYOUT_HANGING_LABEL_GAP = 16.0
283
+
284
+ # Gap in points between multiple fragments on the same side (arrow_driven approach)
285
+ # Originally in: reaction_cleanup.py (INTER_GAP, line ~523)
286
+ LAYOUT_INTER_FRAGMENT_GAP = 8.0
287
+
288
+ # Gap between molecule edge and arrow tip, in multiples of bond length (chemdraw_mimic)
289
+ # Originally in: reaction_cleanup.py (FRAG_GAP_BONDS, line ~774)
290
+ LAYOUT_FRAG_GAP_BONDS = 1.0
291
+
292
+ # Gap between multiple reactants, in multiples of bond length (chemdraw_mimic)
293
+ # Originally in: reaction_cleanup.py (INTER_GAP_BONDS, line ~775)
294
+ LAYOUT_INTER_GAP_BONDS = 0.8
295
+
296
+
297
+ # ============================================================================
298
+ # 6. Image / structure constants
299
+ # ============================================================================
300
+
301
+ # Reduced bond length for condition structures rendered above/below the arrow
302
+ # (smaller than ACS_BOND_LENGTH so they don't overwhelm the scheme)
303
+ # Originally in: reaction_from_image.py (EXPAND_SCALE_BOND, line ~420)
304
+ EXPAND_SCALE_BOND = 10.0