cdxml-toolkit 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. cdxml_toolkit/__init__.py +18 -0
  2. cdxml_toolkit/_jre/__init__.py +2 -0
  3. cdxml_toolkit/_jre/temurin-21-jre-win-x64.zip +0 -0
  4. cdxml_toolkit/analysis/__init__.py +35 -0
  5. cdxml_toolkit/analysis/deterministic/__init__.py +12 -0
  6. cdxml_toolkit/analysis/deterministic/discover_experiment_files.py +413 -0
  7. cdxml_toolkit/analysis/deterministic/lab_book_formatter.py +701 -0
  8. cdxml_toolkit/analysis/deterministic/lcms_file_categorizer.py +928 -0
  9. cdxml_toolkit/analysis/deterministic/lcms_identifier.py +598 -0
  10. cdxml_toolkit/analysis/deterministic/mass_resolver.py +654 -0
  11. cdxml_toolkit/analysis/deterministic/multi_lcms_analyzer.py +1412 -0
  12. cdxml_toolkit/analysis/deterministic/procedure_writer.py +446 -0
  13. cdxml_toolkit/analysis/extract_nmr.py +47 -0
  14. cdxml_toolkit/analysis/format_procedure_entry.py +479 -0
  15. cdxml_toolkit/analysis/lcms_analyzer.py +1299 -0
  16. cdxml_toolkit/analysis/parse_analysis_file.py +134 -0
  17. cdxml_toolkit/cdxml_builder.py +920 -0
  18. cdxml_toolkit/cdxml_utils.py +342 -0
  19. cdxml_toolkit/chemdraw/__init__.py +5 -0
  20. cdxml_toolkit/chemdraw/_chemscript_server.py +562 -0
  21. cdxml_toolkit/chemdraw/cdx_converter.py +527 -0
  22. cdxml_toolkit/chemdraw/cdxml_to_image.py +262 -0
  23. cdxml_toolkit/chemdraw/cdxml_to_image_rdkit.py +296 -0
  24. cdxml_toolkit/chemdraw/chemscript_bridge.py +901 -0
  25. cdxml_toolkit/constants.py +304 -0
  26. cdxml_toolkit/coord_normalizer.py +438 -0
  27. cdxml_toolkit/deterministic_pipeline/__init__.py +6 -0
  28. cdxml_toolkit/deterministic_pipeline/legacy/__init__.py +5 -0
  29. cdxml_toolkit/deterministic_pipeline/legacy/eln_cdx_cleanup.py +509 -0
  30. cdxml_toolkit/deterministic_pipeline/legacy/eln_enrichment.py +1394 -0
  31. cdxml_toolkit/deterministic_pipeline/legacy/scheme_aligner.py +428 -0
  32. cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher.py +1337 -0
  33. cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher_v2.py +1340 -0
  34. cdxml_toolkit/deterministic_pipeline/scheme_reader_audit.py +931 -0
  35. cdxml_toolkit/deterministic_pipeline/scheme_reader_verify.py +1160 -0
  36. cdxml_toolkit/image/__init__.py +15 -0
  37. cdxml_toolkit/image/reaction_from_image.py +2103 -0
  38. cdxml_toolkit/image/structure_from_image.py +1711 -0
  39. cdxml_toolkit/layout/__init__.py +5 -0
  40. cdxml_toolkit/layout/alignment.py +1642 -0
  41. cdxml_toolkit/layout/reaction_cleanup.py +1002 -0
  42. cdxml_toolkit/layout/scheme_merger.py +2260 -0
  43. cdxml_toolkit/mcp_server/__init__.py +0 -0
  44. cdxml_toolkit/mcp_server/__main__.py +5 -0
  45. cdxml_toolkit/mcp_server/server.py +1567 -0
  46. cdxml_toolkit/naming/__init__.py +6 -0
  47. cdxml_toolkit/naming/aligned_namer.py +2342 -0
  48. cdxml_toolkit/naming/mol_builder.py +3722 -0
  49. cdxml_toolkit/naming/name_decomposer.py +2843 -0
  50. cdxml_toolkit/naming/reactions_datamol.json +2414 -0
  51. cdxml_toolkit/office/__init__.py +5 -0
  52. cdxml_toolkit/office/doc_from_template.py +722 -0
  53. cdxml_toolkit/office/ole_embedder.py +808 -0
  54. cdxml_toolkit/office/ole_extractor.py +272 -0
  55. cdxml_toolkit/perception/__init__.py +10 -0
  56. cdxml_toolkit/perception/compound_search.py +229 -0
  57. cdxml_toolkit/perception/eln_csv_parser.py +240 -0
  58. cdxml_toolkit/perception/rdf_parser.py +664 -0
  59. cdxml_toolkit/perception/reactant_heuristic.py +1045 -0
  60. cdxml_toolkit/perception/reaction_parser.py +2150 -0
  61. cdxml_toolkit/perception/scheme_reader.py +2948 -0
  62. cdxml_toolkit/perception/scheme_refine.py +1404 -0
  63. cdxml_toolkit/perception/scheme_segmenter.py +619 -0
  64. cdxml_toolkit/perception/spatial_assignment.py +1013 -0
  65. cdxml_toolkit/rdkit_utils.py +605 -0
  66. cdxml_toolkit/render/__init__.py +17 -0
  67. cdxml_toolkit/render/auto_layout.py +229 -0
  68. cdxml_toolkit/render/compact_parser.py +632 -0
  69. cdxml_toolkit/render/parser.py +706 -0
  70. cdxml_toolkit/render/render_scheme.py +267 -0
  71. cdxml_toolkit/render/renderer.py +2387 -0
  72. cdxml_toolkit/render/schema.py +90 -0
  73. cdxml_toolkit/render/scheme_maker.py +1043 -0
  74. cdxml_toolkit/render/scheme_yaml_writer.py +1487 -0
  75. cdxml_toolkit/resolve/__init__.py +13 -0
  76. cdxml_toolkit/resolve/cas_resolver.py +430 -0
  77. cdxml_toolkit/resolve/chemscanner_abbreviations.json +28813 -0
  78. cdxml_toolkit/resolve/condensed_formula.py +493 -0
  79. cdxml_toolkit/resolve/jre_manager.py +195 -0
  80. cdxml_toolkit/resolve/reagent_abbreviations.json +1046 -0
  81. cdxml_toolkit/resolve/reagent_db.py +285 -0
  82. cdxml_toolkit/resolve/superatom_data.json +2856 -0
  83. cdxml_toolkit/resolve/superatom_table.py +146 -0
  84. cdxml_toolkit/text_formatting.py +298 -0
  85. cdxml_toolkit-0.5.0.dist-info/METADATA +318 -0
  86. cdxml_toolkit-0.5.0.dist-info/RECORD +91 -0
  87. cdxml_toolkit-0.5.0.dist-info/WHEEL +5 -0
  88. cdxml_toolkit-0.5.0.dist-info/entry_points.txt +17 -0
  89. cdxml_toolkit-0.5.0.dist-info/licenses/LICENSE +21 -0
  90. cdxml_toolkit-0.5.0.dist-info/licenses/NOTICE.md +37 -0
  91. cdxml_toolkit-0.5.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,318 @@
1
+ Metadata-Version: 2.4
2
+ Name: cdxml-toolkit
3
+ Version: 0.5.0
4
+ Summary: Python toolkit for ChemDraw CDXML reaction scheme processing, layout, and rendering.
5
+ Author: Hiu Fung Kevin Lee
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/leehiufung911/cdxml-toolkit
8
+ Project-URL: Repository, https://github.com/leehiufung911/cdxml-toolkit
9
+ Project-URL: Issues, https://github.com/leehiufung911/cdxml-toolkit/issues
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: Topic :: Scientific/Engineering :: Chemistry
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Operating System :: Microsoft :: Windows
20
+ Requires-Python: >=3.9
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE
23
+ License-File: NOTICE.md
24
+ Requires-Dist: lxml>=4.6
25
+ Provides-Extra: rdkit
26
+ Requires-Dist: rdkit>=2023.03; extra == "rdkit"
27
+ Provides-Extra: chemdraw
28
+ Requires-Dist: pywin32>=300; extra == "chemdraw"
29
+ Provides-Extra: image
30
+ Requires-Dist: opencv-python>=4.5; extra == "image"
31
+ Requires-Dist: Pillow>=9.0; extra == "image"
32
+ Provides-Extra: office
33
+ Requires-Dist: python-pptx>=0.6; extra == "office"
34
+ Requires-Dist: python-docx>=0.8; extra == "office"
35
+ Requires-Dist: olefile>=0.46; extra == "office"
36
+ Provides-Extra: yaml
37
+ Requires-Dist: pyyaml>=5.4; extra == "yaml"
38
+ Provides-Extra: mcp
39
+ Requires-Dist: mcp>=1.0; extra == "mcp"
40
+ Requires-Dist: pyyaml>=5.4; extra == "mcp"
41
+ Provides-Extra: analysis
42
+ Requires-Dist: pdfplumber>=0.7; extra == "analysis"
43
+ Provides-Extra: chemscript
44
+ Requires-Dist: pythonnet>=3.0; extra == "chemscript"
45
+ Provides-Extra: decimer
46
+ Requires-Dist: tensorflow>=2.16; extra == "decimer"
47
+ Requires-Dist: decimer>=2.6; extra == "decimer"
48
+ Requires-Dist: pymupdf>=1.20; extra == "decimer"
49
+ Provides-Extra: ocr
50
+ Requires-Dist: pytesseract>=0.3; extra == "ocr"
51
+ Provides-Extra: opsin
52
+ Requires-Dist: py2opsin>=1.0; extra == "opsin"
53
+ Provides-Extra: all
54
+ Requires-Dist: cdxml-toolkit[analysis,chemdraw,chemscript,image,mcp,office,opsin,rdkit,yaml]; extra == "all"
55
+ Provides-Extra: full
56
+ Requires-Dist: cdxml-toolkit[all,decimer,ocr]; extra == "full"
57
+ Provides-Extra: dev
58
+ Requires-Dist: cdxml-toolkit[all]; extra == "dev"
59
+ Requires-Dist: pytest>=7.0; extra == "dev"
60
+ Dynamic: license-file
61
+
62
+ # cdxml-toolkit
63
+
64
+ Chemistry office automation toolkit with MCP (Model Context Protocol) server. Lets LLM agents draw reaction schemes, parse ELN exports, analyze LCMS data, and produce publication-ready ChemDraw (CDXML) output.
65
+
66
+ The goal: any chemist with a consumer GPU can run a local LLM agent that helps with routine chemistry office tasks. The toolkit provides 15 grounded, validated chemistry tools that LLMs call via MCP — the agent reasons about chemistry while the tools handle SMILES resolution, 2D coordinate generation, and CDXML layout.
67
+
68
+ > Built and tested with Claude Code (Opus 4.6). I directed the design and architecture; Claude did the implementation. I'm a PhD organic chemist, not a programmer — this project wouldn't exist without Claude Code, and I thank Anthropic.
69
+
70
+ ## Quick start: MCP server
71
+
72
+ The primary interface is the MCP server. Connect it to any MCP-compatible agent (Claude Desktop, opencode, qwen-agent, etc.) and just chat naturally: "Draw deucravacitinib", "Help me complete my lab book", "Extract structures from this image".
73
+
74
+ ### Claude Desktop
75
+
76
+ Edit `%APPDATA%\Claude\claude_desktop_config.json` (Windows) or `~/Library/Application Support/Claude/claude_desktop_config.json` (Mac):
77
+
78
+ ```json
79
+ {
80
+ "mcpServers": {
81
+ "cdxml-toolkit": {
82
+ "command": "python",
83
+ "args": ["-m", "cdxml_toolkit.mcp_server"]
84
+ }
85
+ }
86
+ }
87
+ ```
88
+
89
+ ### opencode (for OpenRouter / local models)
90
+
91
+ Create `opencode.json`:
92
+
93
+ ```json
94
+ {
95
+ "provider": {
96
+ "openrouter": {
97
+ "models": { "qwen/qwen3.5-27b": {} }
98
+ }
99
+ },
100
+ "mcp": {
101
+ "cdxml-toolkit": {
102
+ "type": "local",
103
+ "command": ["python", "-m", "cdxml_toolkit.mcp_server"],
104
+ "enabled": true,
105
+ "timeout": 120000
106
+ }
107
+ }
108
+ }
109
+ ```
110
+
111
+ ### Verify it works
112
+
113
+ ```
114
+ > Use cdxml-toolkit. Resolve "aspirin", then draw it.
115
+ ```
116
+
117
+ Expected: 2 tool calls (resolve_name, draw_molecule), produces an aspirin CDXML file.
118
+
119
+ ## MCP tools (15)
120
+
121
+ ### Chemistry resolution
122
+ | Tool | Description |
123
+ |------|-------------|
124
+ | `resolve_name` | Name/abbreviation/CAS/formula to rich molecule JSON (5-tier: reagent DB, condensed formula, ChemScript, OPSIN, PubChem) |
125
+ | `modify_molecule` | 6 operations: analyze, name_surgery, smarts, set_smiles, set_name, reaction. 162 named reaction templates. Returns MCS-based structural diffs. |
126
+
127
+ ### Structure rendering
128
+ | Tool | Description |
129
+ |------|-------------|
130
+ | `draw_molecule` | Single molecule to CDXML |
131
+ | `render_scheme` | YAML/compact text/reaction JSON to publication-ready CDXML. Forgiving parser handles common LLM YAML mistakes. |
132
+
133
+ ### Perception (reading existing chemistry)
134
+ | Tool | Description |
135
+ |------|-------------|
136
+ | `parse_reaction` | ELN exports (CDXML/CDX/CSV/RXN) to semantic JSON with species, roles, SMILES, equivalents |
137
+ | `summarize_reaction` | Context-efficient view of reaction JSON (select only the fields you need) |
138
+ | `extract_structures_from_image` | Image to SMILES + confidence scores via DECIMER neural network |
139
+ | `parse_scheme` | CDXML scheme to structured species/steps/topology JSON |
140
+
141
+ ### Analysis
142
+ | Tool | Description |
143
+ |------|-------------|
144
+ | `parse_analysis_file` | LCMS (Waters/manual) or NMR (MestReNova) PDF to structured peak data |
145
+ | `format_lab_entry` | Structured entry dicts to formatted lab book text. Re-reads LCMS PDFs for exact numbers. |
146
+
147
+ ### Office integration
148
+ | Tool | Description |
149
+ |------|-------------|
150
+ | `extract_cdxml_from_office` | Pull embedded ChemDraw OLE objects from PPTX/DOCX |
151
+ | `embed_cdxml_in_office` | Inject CDXML as editable ChemDraw OLE into PPTX/DOCX |
152
+ | `convert_cdx_cdxml` | Bidirectional CDX/CDXML conversion |
153
+ | `search_compound` | Find a molecule across experiment directories by SMILES similarity |
154
+ | `render_to_png` | CDXML to PNG via ChemDraw COM |
155
+
156
+ ## Design principles
157
+
158
+ **Never trust LLM-generated SMILES.** The agent always goes through `resolve_name` to get grounded SMILES from databases. Direct SMILES generation is the #1 source of chemistry hallucination.
159
+
160
+ **Verify every transformation.** `modify_molecule` returns aligned IUPAC name diffs and MCS-based molecular diffs after every edit. The agent can confirm the transformation is correct.
161
+
162
+ **Never flood the agent.** Large outputs (CDXML, JSON) always write to files and return `{ok: true, output_path: "...", size: 23456}`. The agent never gets 30KB of XML in its context window.
163
+
164
+ **Forgiving inputs.** The YAML parser accepts 9+ common LLM mistakes (inline structures, `substrates` as alias for `structures`, text as string not list, bare SMILES, `above_arrow` as list/string). Input parameters accept bare SMILES strings, stringified JSON arrays, and fuzzy operation names.
165
+
166
+ **Actionable errors.** Every error tells the agent what to do instead: "Did you mean: BOC_deprotection?", not "KeyError".
167
+
168
+ **Progressive discovery.** Call any tool with no arguments to get usage examples and schema reference.
169
+
170
+ ## Installation
171
+
172
+ **Prerequisites:** Windows with ChemDraw (ChemOffice 2015+) and ChemScript installed.
173
+
174
+ ```bash
175
+ # From GitHub — recommended install (includes RDKit, MCP server, ChemDraw COM,
176
+ # Office support, PDF parsing, image processing, and ChemScript bridge)
177
+ pip install "cdxml-toolkit[all] @ git+https://github.com/leehiufung911/cdxml-toolkit.git@main"
178
+
179
+ # With DECIMER neural image extraction (extract_structures_from_image)
180
+ pip install "cdxml-toolkit[all,decimer] @ git+https://github.com/leehiufung911/cdxml-toolkit.git@main"
181
+
182
+ # Development (editable install)
183
+ git clone https://github.com/leehiufung911/cdxml-toolkit.git
184
+ cd cdxml-toolkit
185
+ pip install -e ".[dev]"
186
+ ```
187
+
188
+ **Required:** `lxml>=4.6`. **Recommended:** `rdkit>=2023.03` (needed for scheme rendering).
189
+
190
+ ### Extras
191
+
192
+ | Extra | What it includes | Notes |
193
+ |-------|-----------------|-------|
194
+ | `[all]` | RDKit, pywin32, image, Office, YAML, PDF analysis, MCP server, pythonnet, py2opsin | **Use this.** Everything most users need. |
195
+ | `[decimer]` | TensorFlow, DECIMER, PyMuPDF | Neural image-to-SMILES. Adds ~1 GB. |
196
+ | `[full]` | `[all]` + `[decimer]` + `[ocr]` | Everything pip-installable. |
197
+ | `[rdkit]` | RDKit only | Minimal install for scripting. |
198
+ | `[mcp]` | MCP server + PyYAML | MCP server only (no RDKit/Office). |
199
+ | `[dev]` | `[all]` + pytest | For running the test suite. |
200
+
201
+ ### Name resolution tiers
202
+
203
+ `resolve_name` tries 5 tiers in order. The first tier to return a valid SMILES wins:
204
+
205
+ | Tier | Source | Deps | Coverage |
206
+ |------|--------|------|----------|
207
+ | 1 | Curated reagent DB (186 entries) | None | Common reagents, catalysts, solvents |
208
+ | 2 | Condensed formula parser | RDKit | Shorthand like PhB(OH)2, Et3N, CF3 |
209
+ | 3 | **ChemScript** (preferred) | ChemDraw + 32-bit Python | Full IUPAC names, any drawable structure |
210
+ | 4 | **OPSIN** (bundled fallback) | py2opsin + bundled JRE | Systematic IUPAC names, offline |
211
+ | 5 | PubChem | Network | CAS numbers, trade names, everything else |
212
+
213
+ ChemScript (Tier 3) is preferred because it handles the widest range of names and integrates with ChemDraw's structure engine. OPSIN (Tier 4) is a fully offline fallback that works out of the box — a JRE is bundled with the package, no Java install needed. If neither is available, PubChem provides a network-based last resort.
214
+
215
+ ### System dependencies (not pip-installable)
216
+
217
+ | Dependency | Required for | Setup |
218
+ |-----------|-------------|-------|
219
+ | **ChemDraw** (ChemOffice 2015+) | CDX conversion, PNG rendering | Must be **closed** before running COM tools. |
220
+ | **ChemScript .NET** | Name resolution Tier 3 (preferred, not required) | Comes with ChemOffice. See setup below. |
221
+ | **Microsoft Office** | OLE embedding into PPTX/DOCX | Optional. Only needed for `embed_cdxml_in_office`. |
222
+
223
+ ### ChemScript setup (optional but recommended)
224
+
225
+ ChemScript gives the best IUPAC name resolution but requires a 32-bit Python environment because the ChemScript .NET DLL is 32-bit. If you skip this, OPSIN handles IUPAC names as a fallback.
226
+
227
+ ```bash
228
+ # 1. Create 32-bit Python env
229
+ set CONDA_SUBDIR=win-32 && conda create -n chemscript32 python=3.10 -y
230
+
231
+ # 2. Install pythonnet in the 32-bit env
232
+ C:\Users\%USERNAME%\miniconda3\envs\chemscript32\python.exe -m pip install pythonnet
233
+
234
+ # 3. Auto-detect ChemDraw and save config
235
+ cdxml-convert --configure
236
+ ```
237
+
238
+ Step 3 scans for ChemDraw (2015/2016/PerkinElmer paths) and writes `~/.chemscript_config.json`. If your ChemDraw is in a non-standard location, edit the config manually:
239
+
240
+ ```json
241
+ {
242
+ "python32": "C:\\Users\\YOU\\miniconda3\\envs\\chemscript32\\python.exe",
243
+ "dll_dir": "C:\\Program Files (x86)\\PerkinElmerInformatics\\ChemOffice2016\\ChemScript\\Lib\\Net",
244
+ "assembly": "CambridgeSoft.ChemScript16"
245
+ }
246
+ ```
247
+
248
+ ## CLI tools
249
+
250
+ All tools are also available as command-line scripts:
251
+
252
+ | Command | Description |
253
+ |---------|-------------|
254
+ | `cdxml-mcp` | MCP server (primary interface) |
255
+ | `cdxml-parse` | Parse reaction files to JSON |
256
+ | `cdxml-render` | Render JSON/YAML/compact text to CDXML |
257
+ | `cdxml-convert` | CDX/CDXML bidirectional conversion |
258
+ | `cdxml-image` | CDXML to PNG/SVG (ChemDraw COM) |
259
+ | `cdxml-merge` | Merge multiple reaction schemes |
260
+ | `cdxml-layout` | Clean up reaction layout (pure Python) |
261
+ | `cdxml-ole` | Embed CDXML as editable OLE in PPTX/DOCX |
262
+ | `cdxml-lcms` | Parse LCMS PDF reports |
263
+ | `cdxml-nmr` | Extract NMR data from MestReNova PDFs |
264
+ | `cdxml-format-entry` | Format lab book entries |
265
+ | `cdxml-discover` | Discover experiment files in a directory |
266
+
267
+ ## Scheme DSL
268
+
269
+ The renderer accepts three input formats:
270
+
271
+ **YAML** (what agents typically write):
272
+ ```yaml
273
+ layout: sequential
274
+ structures:
275
+ SM:
276
+ smiles: "Brc1ncnc2sccc12"
277
+ Product:
278
+ smiles: "c1nc(N2CCOCC2)c2ccsc2n1"
279
+ steps:
280
+ - substrates: [SM]
281
+ products: [Product]
282
+ above_arrow:
283
+ structures: [Morph]
284
+ below_arrow:
285
+ text: ["Pd2(dba)3", "BINAP", "Cs2CO3", "Dioxane, 105 C"]
286
+ ```
287
+
288
+ **Compact text** ("Mermaid for reactions"):
289
+ ```
290
+ SM: {Brc1ncnc2sccc12}
291
+ SM --> Product{c1nc(N2CCOCC2)c2ccsc2n1}
292
+ above: Morph{C1COCCN1}
293
+ below: "Pd2(dba)3", "BINAP", "Cs2CO3"
294
+ ```
295
+
296
+ **Reaction JSON** (from parse_reaction):
297
+ ```bash
298
+ cdxml-render --from-json reaction.json -o scheme.cdxml
299
+ ```
300
+
301
+ ## Running tests
302
+
303
+ ```bash
304
+ pip install -e ".[dev]"
305
+ pytest tests/ -v
306
+ ```
307
+
308
+ ## License
309
+
310
+ [MIT](LICENSE)
311
+
312
+ ## Attribution
313
+
314
+ See [NOTICE.md](NOTICE.md) for third-party data attribution (ChemScanner, RDKit).
315
+
316
+ ## Author
317
+
318
+ Hiu Fung Kevin Lee ([@leehiufung911](https://github.com/leehiufung911))
@@ -0,0 +1,91 @@
1
+ cdxml_toolkit/__init__.py,sha256=yIpnWL8W-QBASS3K-muszOEEtaZQyqTeQB-c5aTSPhc,829
2
+ cdxml_toolkit/cdxml_builder.py,sha256=IRaLeOGZEzFKODEbBSO_2bQXw2hrgggJDILB1iVZdhY,29848
3
+ cdxml_toolkit/cdxml_utils.py,sha256=PEM8pBWFxlLAj9tOdfzVnoxCjPgmZCYlzwr6u2nLypQ,11764
4
+ cdxml_toolkit/constants.py,sha256=Bjta4bQtRPY8YTS42HCF7mrxkrTtDZV2DpXjfNiRieQ,11242
5
+ cdxml_toolkit/coord_normalizer.py,sha256=rGpDdQWO4d966XVdqzKHscq-cGAvX3dKs4-MjwcEh0g,14288
6
+ cdxml_toolkit/rdkit_utils.py,sha256=hr_7fvQeSkrxdazdovezdTnCPy8jevIH9Ly9WER4P-c,22625
7
+ cdxml_toolkit/text_formatting.py,sha256=7G-WVo2hx12nURrorKkUQIryDAvitxsG2Xovs4A56rU,11227
8
+ cdxml_toolkit/_jre/__init__.py,sha256=T1p2AHctzwbYsiJjrqz_shKeNENTlP-XZWqSKLiUtMI,120
9
+ cdxml_toolkit/_jre/temurin-21-jre-win-x64.zip,sha256=xnjqBE96TXofiifUh22Ui7U7dhkBXxte_AQFGUR6Im8,48804778
10
+ cdxml_toolkit/analysis/__init__.py,sha256=Zpl8qHhbH0ncHBNVM8Sc5DxC9mVhlcULtOdIQiZt5CE,1558
11
+ cdxml_toolkit/analysis/extract_nmr.py,sha256=NVOttAdwoM_dTgjgjSIbYXewPH5HY2LhzAEsJFzSmPw,1321
12
+ cdxml_toolkit/analysis/format_procedure_entry.py,sha256=XFGDayCJMisZQyUTNiFLvzP60kK4xnocUqfbu2yD2hY,17449
13
+ cdxml_toolkit/analysis/lcms_analyzer.py,sha256=PZA9rDEM12FBFLm4Kps0p5oKS_6C2D1NN4hf2e8mfbE,49925
14
+ cdxml_toolkit/analysis/parse_analysis_file.py,sha256=iGObc5BjjTCrra-mttAlLBe-wiC0N1Trh03AQJGzMfo,4485
15
+ cdxml_toolkit/analysis/deterministic/__init__.py,sha256=1U8rUytuEHpSvF3FABTc8bGbsa9FuzxVE0WVeP1lnpw,657
16
+ cdxml_toolkit/analysis/deterministic/discover_experiment_files.py,sha256=X7K5gI2VgdWwEDrQdWRi4-QX8PeBWzTWzxyk6aZsVI8,15448
17
+ cdxml_toolkit/analysis/deterministic/lab_book_formatter.py,sha256=H66GvCXptxliOoAUCKbta0RGPOr3_yCugnmkcrSygEk,26249
18
+ cdxml_toolkit/analysis/deterministic/lcms_file_categorizer.py,sha256=v_AwLzgZ3Rv3Y8XBx_elyKRirRAcSJoJfUfp9vivBd4,38319
19
+ cdxml_toolkit/analysis/deterministic/lcms_identifier.py,sha256=5XnB5Pvq7o8ipMRyhaamVWNDQp6jt6LKnj_J4TnFlM8,22044
20
+ cdxml_toolkit/analysis/deterministic/mass_resolver.py,sha256=gUYyQe7-OM97h3oPgC9d1AQkxvRhQor23zJNC5Soybk,23804
21
+ cdxml_toolkit/analysis/deterministic/multi_lcms_analyzer.py,sha256=Wyq1KOTPGi4EkRQYZfYKTZe-YPGTV4qfKFKTQkZy9oc,55611
22
+ cdxml_toolkit/analysis/deterministic/procedure_writer.py,sha256=1jwKBkjuVovlLFfxAas0gZtugmPMY_K6COhX890QBKA,18500
23
+ cdxml_toolkit/chemdraw/__init__.py,sha256=2pqdmyYJ3kwe2Os86YbzF8-O5SQUTxDfeoOTEEKGQf0,207
24
+ cdxml_toolkit/chemdraw/_chemscript_server.py,sha256=r768MNzwbCU1InmvWDVMe90CIpU-leO4v45Bu7ncdc8,19254
25
+ cdxml_toolkit/chemdraw/cdx_converter.py,sha256=0LFpTgVxu7sYTJI_oDzKA19IunPr7VW_T5zRX_Fo7gU,19184
26
+ cdxml_toolkit/chemdraw/cdxml_to_image.py,sha256=Sojqh3tM-dDE4Od3aFypD0KPOuI_fmPpM_Z2j0_gsaM,8283
27
+ cdxml_toolkit/chemdraw/cdxml_to_image_rdkit.py,sha256=wA4tfNN1Z0AuJ0m_7oUkiFctRvWFVqpp00zjIj9kIGA,10101
28
+ cdxml_toolkit/chemdraw/chemscript_bridge.py,sha256=hF-yw6MGMWW8t1bYJNiKkGoEdI5Sfs6qophDKoKOyGI,35721
29
+ cdxml_toolkit/deterministic_pipeline/__init__.py,sha256=QKwmV0fLbs7TpMRtElB-s0e8WoE7rxAFfQtHubGUMuc,252
30
+ cdxml_toolkit/deterministic_pipeline/scheme_reader_audit.py,sha256=eG9cwi8iwu-AXC-G9kv4-kCsskUaUQ3YGFpKaCDObqE,36762
31
+ cdxml_toolkit/deterministic_pipeline/scheme_reader_verify.py,sha256=mqNwefT3ponBm4168bQTfckz4MK2yt3ETIvYLYfhZns,47199
32
+ cdxml_toolkit/deterministic_pipeline/legacy/__init__.py,sha256=nIah54-YWMwkkLMw-XiNlFpWCy_64Ov0774Crbm1JL0,227
33
+ cdxml_toolkit/deterministic_pipeline/legacy/eln_cdx_cleanup.py,sha256=UVj6k46PogCSvlQsZ31tEVJuce2d6tfN255CvbXIHzY,17879
34
+ cdxml_toolkit/deterministic_pipeline/legacy/eln_enrichment.py,sha256=4fpWLZZiNarO_HiauwGIirTKu7PdIBdbArNR1l0Y73A,49830
35
+ cdxml_toolkit/deterministic_pipeline/legacy/scheme_aligner.py,sha256=yLC8GOz96dwd2to4Ch56u0df9BuQxD5nZAWQKAvhVeU,14922
36
+ cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher.py,sha256=L88WU9uhx8maaNrZ0x4pRxomGJYHyYS8QJ9ZwYCmr04,49678
37
+ cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher_v2.py,sha256=Jrvlvmn9SBD-cUl-lSytYesDEVVd1l2dn3Rw1B_A0b0,52497
38
+ cdxml_toolkit/image/__init__.py,sha256=O3W3BCbFvuDvU7K8rBe2tRlkSeOnwqpdzthj8Hk2Lng,415
39
+ cdxml_toolkit/image/reaction_from_image.py,sha256=MmcuUlJDlzjPp7kBArEj3K7y59iUDRS5ecWrl6KYAXs,76196
40
+ cdxml_toolkit/image/structure_from_image.py,sha256=gCI-LAI_KZBaePHz7FHnWlF_vO0HhIKlglTlUDPQy38,63630
41
+ cdxml_toolkit/layout/__init__.py,sha256=JW14mVFAwagANVemMD0tprEvYtwE7mPM7bW9RGSb-_0,198
42
+ cdxml_toolkit/layout/alignment.py,sha256=nWTWKGKXO25sqUnrUm7XsXzY4hoRFbI60CUwiiQFoxU,59773
43
+ cdxml_toolkit/layout/reaction_cleanup.py,sha256=C24GAPJ3bloi2D5TAUTPW6B3kMyk010BV_3RvIFH0eg,38666
44
+ cdxml_toolkit/layout/scheme_merger.py,sha256=A1-OR21-clvGF_HBzS6D31Ja57IGLMuw2nzjJ1mydW4,82960
45
+ cdxml_toolkit/mcp_server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
+ cdxml_toolkit/mcp_server/__main__.py,sha256=hkkYSQyJZe-8VsC5Rk19W_nOIqSgvUO2oZoxZ2QsCDA,102
47
+ cdxml_toolkit/mcp_server/server.py,sha256=VfllYTMMH9TNZtARxXdCDn1iOGmae8lhjtLC2USGsxQ,65980
48
+ cdxml_toolkit/naming/__init__.py,sha256=FpmKxhspHTYFXYunj35TG4wOphjfc1RGV6LUoV3Hkbg,275
49
+ cdxml_toolkit/naming/aligned_namer.py,sha256=h6LLe0DW7soN6yJ8UCJeOKsfD0iGkQct1_xxBzBoSnE,91479
50
+ cdxml_toolkit/naming/mol_builder.py,sha256=7K5au00gMT0N1KcSwQvtCktbfwU65xOol5_F2I7XZSE,149780
51
+ cdxml_toolkit/naming/name_decomposer.py,sha256=RqFjzU0khN4xJGzdN2V_E5LUjs8PgraDcZTMO6VhwrM,113629
52
+ cdxml_toolkit/naming/reactions_datamol.json,sha256=U5ACgbi5WoW6akCc5lYNWzu5qCtH5_H_gDColr0ZJgM,124841
53
+ cdxml_toolkit/office/__init__.py,sha256=Q7njZn1_dtxbd1CiI5N7la930odMZrF6RVr6K0-iLeE,201
54
+ cdxml_toolkit/office/doc_from_template.py,sha256=0Y9E-pci2thKd0Yx3lDnhfXgntkeUd0RzJNd59OAUlM,25930
55
+ cdxml_toolkit/office/ole_embedder.py,sha256=TdXf6o7OY041kViwKBdmJuZd1GgXLoS11J7pGmXvwMc,28977
56
+ cdxml_toolkit/office/ole_extractor.py,sha256=LA9r0FjoX2Tpejr1xya1FSTxlBVOq_ZHDddpfIZxlgU,9326
57
+ cdxml_toolkit/perception/__init__.py,sha256=BCtpUr7G0UXs-1SKEoG0HqHEVoMyFBLvRKRqJPE514o,455
58
+ cdxml_toolkit/perception/compound_search.py,sha256=Ntl8RX-T-OWynoibTsiwmTsRPsJxwECUOlMrWC6u3WQ,8250
59
+ cdxml_toolkit/perception/eln_csv_parser.py,sha256=4IpWs83zG4dlBhR04ABYZnXMNjimNbwH_5inxfxsvV4,7663
60
+ cdxml_toolkit/perception/rdf_parser.py,sha256=ro5XvOvsXoHqu15ggJvzyiFKa-6xauyZ559OmMcYItY,22568
61
+ cdxml_toolkit/perception/reactant_heuristic.py,sha256=N2qrOqBZqq7qBrMMwS_Pi-xqU1z4lFH6kdUtN4JsKKU,37245
62
+ cdxml_toolkit/perception/reaction_parser.py,sha256=3ZqQN6e1AkpSFV6lqRFoeSm2UFM8YaSVoQG2TY--Lkk,78337
63
+ cdxml_toolkit/perception/scheme_reader.py,sha256=iYhNe3mp0nhwI6vdfYfA_y2WN2R8h3d-i2BomhcneVg,113360
64
+ cdxml_toolkit/perception/scheme_refine.py,sha256=LF0DR818dpfqZ6eWTxjBTfUatWQp8c4fP2kn0DrVWIQ,53933
65
+ cdxml_toolkit/perception/scheme_segmenter.py,sha256=jMfayzoXdJec7gkzUsuE_8UsZLD2YfmEOQeV6oo9t3M,22346
66
+ cdxml_toolkit/perception/spatial_assignment.py,sha256=0Penwj8cjkAUG3LHME2KNqMIu4TtgZ8mqO9ixUulUO4,36688
67
+ cdxml_toolkit/render/__init__.py,sha256=sj71jqiB5ZHdMjMCOS8Clwe5NDYfbnOlPDiR1j1Z6nQ,726
68
+ cdxml_toolkit/render/auto_layout.py,sha256=sZJbDgrcZkzTUsNk434buGS9vDk4PAtSrT1WJm2hjiw,6736
69
+ cdxml_toolkit/render/compact_parser.py,sha256=oFH88lrdrOr4pm2HS4MDQ1c0HcurEjVLJOc85PNYcnk,24106
70
+ cdxml_toolkit/render/parser.py,sha256=ZV1yGtnmi_EUNiKKeV9U5Ddah827yj3v6LzRk9-09FA,27660
71
+ cdxml_toolkit/render/render_scheme.py,sha256=Y7IW1nGKQG4g6WoCChoqGpToRZkIZLeQE_19ZU-m5Xw,9315
72
+ cdxml_toolkit/render/renderer.py,sha256=skvjyQ2qhY88z1sWhrTpdbhxI2Np5p53Bd6gppBpI9s,88041
73
+ cdxml_toolkit/render/schema.py,sha256=2odwCAb5A8W2CoPoP8g30wfci-q30jO7RJNM2GMQ1Ks,3398
74
+ cdxml_toolkit/render/scheme_maker.py,sha256=mwM6JfFmxwez6jFtqcwzsMPeoiYi-64OGl1p-bkEjV8,38978
75
+ cdxml_toolkit/render/scheme_yaml_writer.py,sha256=ninptjQn6Yz5E42gYtJxU172eEuv9wDLChJOPg2tUiQ,54796
76
+ cdxml_toolkit/resolve/__init__.py,sha256=v7-wPFs_iYjqLb0t2I9cbyDlTE4nxclPUr-wqhK-MSo,561
77
+ cdxml_toolkit/resolve/cas_resolver.py,sha256=hxlW8-kM9i9eH5GhQw-rOL8mYhRb-nQpsFvd260edwU,14476
78
+ cdxml_toolkit/resolve/chemscanner_abbreviations.json,sha256=kBGq8lmAqbyn7IMXYfW5vtK1_ozpvKKCXFxg3NgwB9w,760394
79
+ cdxml_toolkit/resolve/condensed_formula.py,sha256=HKvV8MHG4m78Z6Pgmt1M6iJzEJT-N_EZURlarmWSVSU,17877
80
+ cdxml_toolkit/resolve/jre_manager.py,sha256=IUh1lydFnUjGiLk5NFwZD2FHOjW6zrvpNJKdx4juir8,5790
81
+ cdxml_toolkit/resolve/reagent_abbreviations.json,sha256=YDr5x0axlj3f1qsjYfkl1MYPA8irICmh2Kop6dIY0I0,28040
82
+ cdxml_toolkit/resolve/reagent_db.py,sha256=bXLVWED4_OD131eaRMKFlbuEt3wCPewfYnMuY1STNWM,10545
83
+ cdxml_toolkit/resolve/superatom_data.json,sha256=3jriTXXgP2t1uPyes9hnQwd0qio-JhJGWhCmsUlhm3A,116448
84
+ cdxml_toolkit/resolve/superatom_table.py,sha256=h7qIhnlvudutiHuHingLZpnyccAz6xM_cdNgOOgaMrE,5247
85
+ cdxml_toolkit-0.5.0.dist-info/licenses/LICENSE,sha256=wjZbp_d7o9SBUVZk6Kqq_MnYq9vRiOivigzQtPD4SC0,1096
86
+ cdxml_toolkit-0.5.0.dist-info/licenses/NOTICE.md,sha256=i4Q_4l-UVyC72WgR94PW45c9sm96RKxfRqgLX28n0JM,1697
87
+ cdxml_toolkit-0.5.0.dist-info/METADATA,sha256=m9fQPXlMNAQyRUzrugivFLGURwqC-zdlU6cwFEz4alo,13122
88
+ cdxml_toolkit-0.5.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
89
+ cdxml_toolkit-0.5.0.dist-info/entry_points.txt,sha256=Jqgw9-2s_o6ROfoK4FXSVdyLrlIJD3R58KaGJJQeflk,1012
90
+ cdxml_toolkit-0.5.0.dist-info/top_level.txt,sha256=bVY6UU47VwwBedbBzi9iapcGV5KymUOya2nWl9kgi7Y,14
91
+ cdxml_toolkit-0.5.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,17 @@
1
+ [console_scripts]
2
+ cdxml-build = cdxml_toolkit.cdxml_builder:main
3
+ cdxml-convert = cdxml_toolkit.chemdraw.cdx_converter:main
4
+ cdxml-discover = cdxml_toolkit.analysis.deterministic.discover_experiment_files:main
5
+ cdxml-format-entry = cdxml_toolkit.analysis.format_procedure_entry:main
6
+ cdxml-image = cdxml_toolkit.chemdraw.cdxml_to_image:main
7
+ cdxml-layout = cdxml_toolkit.layout.reaction_cleanup:main
8
+ cdxml-lcms = cdxml_toolkit.analysis.lcms_analyzer:main
9
+ cdxml-mcp = cdxml_toolkit.mcp_server.server:main
10
+ cdxml-merge = cdxml_toolkit.layout.scheme_merger:main
11
+ cdxml-multi-lcms = cdxml_toolkit.analysis.deterministic.multi_lcms_analyzer:main
12
+ cdxml-nmr = cdxml_toolkit.analysis.extract_nmr:main
13
+ cdxml-ole = cdxml_toolkit.office.ole_embedder:main
14
+ cdxml-parse = cdxml_toolkit.perception.reaction_parser:main
15
+ cdxml-polish = cdxml_toolkit.deterministic_pipeline.legacy.scheme_polisher_v2:main
16
+ cdxml-procedure = cdxml_toolkit.analysis.deterministic.procedure_writer:main
17
+ cdxml-render = cdxml_toolkit.render.render_scheme:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Hiu Fung Kevin Lee
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,37 @@
1
+ # Third-Party Notices
2
+
3
+ This project includes data and code derived from the following sources.
4
+
5
+ ## ChemScanner (data source)
6
+
7
+ The files `chemscanner_abbreviations.json` and `superatom_data.json` contain
8
+ chemical abbreviation-to-SMILES mappings derived from data files in the
9
+ [ChemScanner](https://github.com/ComPlat/chem_scanner) project, which is
10
+ licensed under AGPL-3.0.
11
+
12
+ The original data was extracted from ChemScanner's `abbreviations.yaml`,
13
+ `solvents.yaml`, and `superatom.txt` configuration files. The extraction
14
+ process involved:
15
+ - Parsing the original YAML/TXT files
16
+ - Validating all SMILES strings with RDKit
17
+ - Canonicalizing SMILES to a standard form
18
+ - Deduplicating entries by canonical SMILES
19
+ - Reorganizing into a different JSON schema with alias support
20
+
21
+ The resulting JSON files contain factual chemical information (abbreviation
22
+ labels mapped to their corresponding SMILES representations). Chemical
23
+ abbreviation-to-structure mappings are scientific facts — "OTs" represents
24
+ a tosylate group regardless of the source from which it is documented.
25
+
26
+ ## RDKit Built-in Abbreviations (runtime data source)
27
+
28
+ At runtime, `superatom_table.py` supplements its JSON-backed lookup table
29
+ with abbreviation data from RDKit's `rdAbbreviations.GetDefaultAbbreviations()`
30
+ (approximately 40 entries). RDKit is licensed under the
31
+ [BSD 3-Clause License](https://github.com/rdkit/rdkit/blob/master/license.txt).
32
+
33
+ ## Reagent Database
34
+
35
+ The file `reagent_abbreviations.json` is an original curated database of
36
+ approximately 172 reagent entries commonly used in medicinal chemistry,
37
+ compiled by the project author. It is licensed under MIT as part of this project.
@@ -0,0 +1 @@
1
+ cdxml_toolkit