molalchemy 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- molalchemy-0.0.1/PKG-INFO +288 -0
- molalchemy-0.0.1/README.md +259 -0
- molalchemy-0.0.1/pyproject.toml +55 -0
- molalchemy-0.0.1/src/molalchemy/__init__.py +0 -0
- molalchemy-0.0.1/src/molalchemy/base.py +0 -0
- molalchemy-0.0.1/src/molalchemy/bingo/__init__.py +23 -0
- molalchemy-0.0.1/src/molalchemy/bingo/comparators.py +157 -0
- molalchemy-0.0.1/src/molalchemy/bingo/functions/__init__.py +3 -0
- molalchemy-0.0.1/src/molalchemy/bingo/functions/mol.py +360 -0
- molalchemy-0.0.1/src/molalchemy/bingo/functions/rxn.py +237 -0
- molalchemy-0.0.1/src/molalchemy/bingo/index.py +232 -0
- molalchemy-0.0.1/src/molalchemy/bingo/proxy.py +160 -0
- molalchemy-0.0.1/src/molalchemy/bingo/types.py +286 -0
- molalchemy-0.0.1/src/molalchemy/helpers.py +88 -0
- molalchemy-0.0.1/src/molalchemy/py.typed +0 -0
- molalchemy-0.0.1/src/molalchemy/rdkit/__init__.py +8 -0
- molalchemy-0.0.1/src/molalchemy/rdkit/comparators.py +26 -0
- molalchemy-0.0.1/src/molalchemy/rdkit/functions/__init__.py +3 -0
- molalchemy-0.0.1/src/molalchemy/rdkit/functions/fp.py +7 -0
- molalchemy-0.0.1/src/molalchemy/rdkit/functions/mol.py +58 -0
- molalchemy-0.0.1/src/molalchemy/rdkit/functions/rxn.py +28 -0
- molalchemy-0.0.1/src/molalchemy/rdkit/index.py +23 -0
- molalchemy-0.0.1/src/molalchemy/rdkit/types.py +188 -0
- molalchemy-0.0.1/src/molalchemy/types.py +11 -0
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: molalchemy
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Extensions for SQLAlchemy to work with chemical cartridges
|
|
5
|
+
Keywords: chemistry,cheminformatics,molecules,reactions,databases
|
|
6
|
+
Author: Anton Siomchen
|
|
7
|
+
Author-email: Anton Siomchen <anton.siomchen+molalchemy@gmail.com>
|
|
8
|
+
License: Apache-2.0
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Science/Research
|
|
11
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
12
|
+
Classifier: Topic :: Scientific/Engineering
|
|
13
|
+
Classifier: Topic :: Utilities
|
|
14
|
+
Classifier: Topic :: Database
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
20
|
+
Requires-Dist: loguru>=0.7.3
|
|
21
|
+
Requires-Dist: psycopg[binary]>=3.2.9
|
|
22
|
+
Requires-Dist: rdkit>=2024.3.1
|
|
23
|
+
Requires-Dist: sqlalchemy>=2.0.43
|
|
24
|
+
Requires-Python: >=3.10, <3.14
|
|
25
|
+
Project-URL: documentation, https://molalchemy.readthedocs.io
|
|
26
|
+
Project-URL: homepage, https://github.com/asiomchen/molalchemy
|
|
27
|
+
Project-URL: repository, https://github.com/asiomchen/molalchemy
|
|
28
|
+
Description-Content-Type: text/markdown
|
|
29
|
+
|
|
30
|
+
<p align="center">
|
|
31
|
+
<a href="https://molalchemy.readthedocs.io"><img src="https://raw.githubusercontent.com/asiomchen/molalchemy/refs/heads/main/docs/img/logo-full.svg" alt="MolAlchemy"></a>
|
|
32
|
+
</p>
|
|
33
|
+
<p align="center">
|
|
34
|
+
<em>molalchemy - Making chemical databases as easy as regular databases! ๐งชโจ</em>
|
|
35
|
+
</p>
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
[](https://pypi.org/project/molalchemy/)
|
|
39
|
+

|
|
40
|
+
[](https://pepy.tech/projects/molalchemy)
|
|
41
|
+
[](LICENSE)
|
|
42
|
+
[]()
|
|
43
|
+

|
|
44
|
+
[](https://www.rdkit.org/)
|
|
45
|
+
[](https://www.sqlalchemy.org/)
|
|
46
|
+
[](https://github.com/astral-sh/ruff)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
**Extensions for SQLAlchemy to work with chemical cartridges**
|
|
50
|
+
|
|
51
|
+
molalchemy provides seamless integration between python and chemical databases, enabling powerful chemical structure storage, indexing, and querying capabilities. The library supports popular chemical cartridges (Bingo PostgreSQL & RDKit PostgreSQL) and provides a unified API for chemical database operations.
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
**This project was originally supposed to be a part of RDKit UGM 2025 hackathon, but COVID had other plans for me. Currently it is in alpha stage as a proof of concept. Contributions are welcome!**
|
|
55
|
+
|
|
56
|
+
**To give it a hackathon vibe, I build this PoC in couple hours, so expect some rough edges and missing features.**
|
|
57
|
+
|
|
58
|
+
## ๐ Features
|
|
59
|
+
|
|
60
|
+
- **Chemical Data Types**: Custom SQLAlchemy types for molecules, reactions and fingerprints
|
|
61
|
+
- **Chemical Cartridge Integration**: Support for Bingo and RDKit PostgreSQL cartridges
|
|
62
|
+
- **Substructure Search**: Efficient substructure and similarity searching
|
|
63
|
+
- **Chemical Indexing**: High-performance chemical structure indexing
|
|
64
|
+
- **Typing**: As much type hints as possible - no need to remember yet another abstract function name
|
|
65
|
+
- **Easy Integration**: Drop-in replacement for standard SQLAlchemy types
|
|
66
|
+
|
|
67
|
+
## ๐ฆ Installation
|
|
68
|
+
|
|
69
|
+
### Using pip
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
pip install molalchemy
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### From source
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
pip install git+https://github.com/asiomchen/molalchemy.git
|
|
79
|
+
|
|
80
|
+
# or clone the repo and install
|
|
81
|
+
git clone https://github.com/asiomchen/molalchemy.git
|
|
82
|
+
cd molalchemy
|
|
83
|
+
pip install .
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
### Prerequisites
|
|
88
|
+
|
|
89
|
+
- Python 3.10+
|
|
90
|
+
|
|
91
|
+
- SQLAlchemy 2.0+
|
|
92
|
+
|
|
93
|
+
- rdkit 2024.3.1+
|
|
94
|
+
|
|
95
|
+
- Running PostgreSQL with chemical cartridge (Bingo or RDKit) (see [`docker-compose.yaml`](docker-compose.yaml) for a ready-to-use setup)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
## ๐ง Quick Start
|
|
99
|
+
|
|
100
|
+
To learn how to use molalchemy, check out the [Quick Start - RDKit](https://molalchemy.readthedocs.io/en/latest/tutorials/01_Getting_Started_rdkit_ORM/) and [Quick Start - Bingo](https://molalchemy.readthedocs.io/en/latest/tutorials/01_Getting_Started_bingo_ORM/) tutorials in the documentation.
|
|
101
|
+
|
|
102
|
+
## ๐๏ธ Supported Cartridges
|
|
103
|
+
|
|
104
|
+
### Bingo Cartridge
|
|
105
|
+
|
|
106
|
+
```python
|
|
107
|
+
from molalchemy.bingo.types import (
|
|
108
|
+
BingoMol, # Text-based molecule storage (SMILES/Molfile)
|
|
109
|
+
BingoBinaryMol, # Binary molecule storage with format conversion
|
|
110
|
+
BingoReaction, # Reaction storage (reaction SMILES/Rxnfile)
|
|
111
|
+
BingoBinaryReaction # Binary reaction storage
|
|
112
|
+
)
|
|
113
|
+
from molalchemy.bingo.index import (
|
|
114
|
+
BingoMolIndex, # Molecule indexing
|
|
115
|
+
BingoBinaryMolIndex, # Binary molecule indexing
|
|
116
|
+
BingoRxnIndex, # Reaction indexing
|
|
117
|
+
BingoBinaryRxnIndex # Binary reaction indexing
|
|
118
|
+
)
|
|
119
|
+
from molalchemy.bingo.functions import (
|
|
120
|
+
mol, # Molecule functions
|
|
121
|
+
rxn # Reaction functions
|
|
122
|
+
)
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
### RDKit Cartridge
|
|
126
|
+
|
|
127
|
+
```python
|
|
128
|
+
from molalchemy.rdkit.types import (
|
|
129
|
+
RDKitMol, # RDKit molecule type
|
|
130
|
+
# Additional types available...
|
|
131
|
+
)
|
|
132
|
+
from molalchemy.rdkit.index import (
|
|
133
|
+
RDKitIndex, # RDKit molecule indexing (just GIST index)
|
|
134
|
+
)
|
|
135
|
+
from molalchemy.rdkit.functions import (
|
|
136
|
+
mol, # RDKit molecule functions
|
|
137
|
+
fp, # Fingerprint functions
|
|
138
|
+
rxn # Reaction functions
|
|
139
|
+
)
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
## ๐ฏ Advanced Features
|
|
143
|
+
|
|
144
|
+
### Chemical Indexing
|
|
145
|
+
|
|
146
|
+
```python
|
|
147
|
+
from molalchemy.bingo.index import BingoMolIndex
|
|
148
|
+
from molalchemy.bingo.types import BingoMol
|
|
149
|
+
|
|
150
|
+
class Molecule(Base):
|
|
151
|
+
__tablename__ = 'molecules'
|
|
152
|
+
|
|
153
|
+
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
|
154
|
+
structure: Mapped[str] = mapped_column(BingoMol)
|
|
155
|
+
name: Mapped[str] = mapped_column(String(100))
|
|
156
|
+
|
|
157
|
+
# Add chemical index for faster searching
|
|
158
|
+
__table_args__ = (
|
|
159
|
+
BingoMolIndex('mol_idx', 'structure'),
|
|
160
|
+
)
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
### Binary Storage with Format Conversion
|
|
164
|
+
|
|
165
|
+
```python
|
|
166
|
+
from molalchemy.bingo.types import BingoBinaryMol
|
|
167
|
+
|
|
168
|
+
class OptimizedMolecule(Base):
|
|
169
|
+
__tablename__ = 'optimized_molecules'
|
|
170
|
+
|
|
171
|
+
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
|
172
|
+
# Store as binary, return as SMILES
|
|
173
|
+
structure: Mapped[bytes] = mapped_column(
|
|
174
|
+
BingoBinaryMol(preserve_pos=False, return_type="smiles")
|
|
175
|
+
)
|
|
176
|
+
# Store as binary, return as Molfile (with coordinates)
|
|
177
|
+
structure_3d: Mapped[bytes] = mapped_column(
|
|
178
|
+
BingoBinaryMol(preserve_pos=True, return_type="molfile")
|
|
179
|
+
)
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
### Using Chemical Functions
|
|
183
|
+
|
|
184
|
+
`mol` provides all static methods for functional-style queries. Under the hood it uses SQLAlchemy's `func` to call the corresponding database functions, but provides type hints and syntax highlighting in IDEs.
|
|
185
|
+
|
|
186
|
+
```python
|
|
187
|
+
from molalchemy.bingo.functions import mol
|
|
188
|
+
|
|
189
|
+
# Calculate molecular properties
|
|
190
|
+
results = session.query(
|
|
191
|
+
Molecule.name,
|
|
192
|
+
mol.get_weight(Molecule.structure).label('molecular_weight'),
|
|
193
|
+
mol.gross_formula(Molecule.structure).label('formula'),
|
|
194
|
+
mol.to_canonical(Molecule.structure).label('canonical_smiles')
|
|
195
|
+
).all()
|
|
196
|
+
|
|
197
|
+
# Validate molecular structures
|
|
198
|
+
invalid_molecules = session.query(Molecule).filter(
|
|
199
|
+
mol.check_molecule(Molecule.structure).isnot(None)
|
|
200
|
+
).all()
|
|
201
|
+
|
|
202
|
+
# Format conversions
|
|
203
|
+
inchi_keys = session.query(
|
|
204
|
+
Molecule.id,
|
|
205
|
+
mol.to_inchikey(Molecule.structure).label('inchikey')
|
|
206
|
+
).all()
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
## ๐งช Development
|
|
211
|
+
|
|
212
|
+
### Setting Up Development Environment
|
|
213
|
+
|
|
214
|
+
1. Clone the repository:
|
|
215
|
+
```bash
|
|
216
|
+
git clone https://github.com/asiomchen/molalchemy.git
|
|
217
|
+
cd molalchemy
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
2. Install dependencies:
|
|
221
|
+
```bash
|
|
222
|
+
uv sync
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
3. Activate the virtual environment:
|
|
226
|
+
```bash
|
|
227
|
+
source .venv/bin/activate
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
### Running Tests
|
|
231
|
+
|
|
232
|
+
```bash
|
|
233
|
+
# Run all tests
|
|
234
|
+
uv run pytest
|
|
235
|
+
|
|
236
|
+
# Run specific test module
|
|
237
|
+
uv run pytest tests/bingo/
|
|
238
|
+
|
|
239
|
+
# Run with coverage
|
|
240
|
+
uv run pytest --cov=molalchemy
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
### Code Quality
|
|
244
|
+
|
|
245
|
+
This project uses modern Python development tools:
|
|
246
|
+
- **uv**: For virtual environment and dependency management
|
|
247
|
+
- **Ruff**: For linting and formatting
|
|
248
|
+
- **pytest**: For testing
|
|
249
|
+
|
|
250
|
+
## ๐ Documentation
|
|
251
|
+
|
|
252
|
+
- **[๐ Project Roadmap](ROADMAP.md)** - Development phases, timeline, and contribution opportunities
|
|
253
|
+
- **[๐ค Contributing Guide](CONTRIBUTING.md)** - How to contribute to the project
|
|
254
|
+
- **[๐ง API Reference](https://molalchemy.readthedocs.io/api)** - Complete API documentation
|
|
255
|
+
- **[๐ณ Bingo Manual](https://lifescience.opensource.epam.com/bingo/user-manual-postgres.html)** - Bingo PostgreSQL cartridge guide
|
|
256
|
+
- **[โ๏ธ RDKit Manual](https://www.rdkit.org/docs/Cartridge.html)** - RDKit PostgreSQL cartridge guide
|
|
257
|
+
|
|
258
|
+
## ๐ค Contributing
|
|
259
|
+
|
|
260
|
+
We welcome contributions! molalchemy offers many opportunities for developers interested in chemical informatics:
|
|
261
|
+
|
|
262
|
+
- **๐ฐ New to the project?** Check out [good first issues](https://github.com/asiomchen/molalchemy/labels/good%20first%20issue)
|
|
263
|
+
- **๏ฟฝ Chemical expertise?** Help complete RDKit integration or add ChemAxon support
|
|
264
|
+
- **๐ณ DevOps skills?** Optimize our Docker containers and CI/CD pipeline
|
|
265
|
+
- **๐ Love documentation?** Create tutorials and improve API docs
|
|
266
|
+
|
|
267
|
+
Read our **[Contributing Guide](CONTRIBUTING.md)** for detailed instructions on getting started.
|
|
268
|
+
|
|
269
|
+
## ๐ License
|
|
270
|
+
|
|
271
|
+
This project is licensed under the Apache License 2.0 - see the [LICENSE](LICENSE) file for details.
|
|
272
|
+
|
|
273
|
+
## ๐ Acknowledgments
|
|
274
|
+
|
|
275
|
+
- [RDKit](https://www.rdkit.org/) - Open-source cheminformatics toolkit
|
|
276
|
+
- [Bingo](https://lifescience.opensource.epam.com/bingo/) - Chemical database cartridge
|
|
277
|
+
- [SQLAlchemy](https://sqlalchemy.org/) - Python SQL toolkit
|
|
278
|
+
|
|
279
|
+
## ๐ง Contact
|
|
280
|
+
|
|
281
|
+
- **Author**: Anton Siomchen
|
|
282
|
+
- **Email**: anton.siomchen+molalchemy@gmail.com
|
|
283
|
+
- **GitHub**: [@asiomchen](https://github.com/asiomchen)
|
|
284
|
+
- **LinkedIn**: [Anton Siomchen](https://www.linkedin.com/in/anton-siomchen/)
|
|
285
|
+
|
|
286
|
+
---
|
|
287
|
+
|
|
288
|
+
**molalchemy** - Making chemical databases as easy as regular databases! ๐งชโจ
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
<p align="center">
|
|
2
|
+
<a href="https://molalchemy.readthedocs.io"><img src="https://raw.githubusercontent.com/asiomchen/molalchemy/refs/heads/main/docs/img/logo-full.svg" alt="MolAlchemy"></a>
|
|
3
|
+
</p>
|
|
4
|
+
<p align="center">
|
|
5
|
+
<em>molalchemy - Making chemical databases as easy as regular databases! ๐งชโจ</em>
|
|
6
|
+
</p>
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
[](https://pypi.org/project/molalchemy/)
|
|
10
|
+

|
|
11
|
+
[](https://pepy.tech/projects/molalchemy)
|
|
12
|
+
[](LICENSE)
|
|
13
|
+
[]()
|
|
14
|
+

|
|
15
|
+
[](https://www.rdkit.org/)
|
|
16
|
+
[](https://www.sqlalchemy.org/)
|
|
17
|
+
[](https://github.com/astral-sh/ruff)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
**Extensions for SQLAlchemy to work with chemical cartridges**
|
|
21
|
+
|
|
22
|
+
molalchemy provides seamless integration between python and chemical databases, enabling powerful chemical structure storage, indexing, and querying capabilities. The library supports popular chemical cartridges (Bingo PostgreSQL & RDKit PostgreSQL) and provides a unified API for chemical database operations.
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
**This project was originally supposed to be a part of RDKit UGM 2025 hackathon, but COVID had other plans for me. Currently it is in alpha stage as a proof of concept. Contributions are welcome!**
|
|
26
|
+
|
|
27
|
+
**To give it a hackathon vibe, I build this PoC in couple hours, so expect some rough edges and missing features.**
|
|
28
|
+
|
|
29
|
+
## ๐ Features
|
|
30
|
+
|
|
31
|
+
- **Chemical Data Types**: Custom SQLAlchemy types for molecules, reactions and fingerprints
|
|
32
|
+
- **Chemical Cartridge Integration**: Support for Bingo and RDKit PostgreSQL cartridges
|
|
33
|
+
- **Substructure Search**: Efficient substructure and similarity searching
|
|
34
|
+
- **Chemical Indexing**: High-performance chemical structure indexing
|
|
35
|
+
- **Typing**: As much type hints as possible - no need to remember yet another abstract function name
|
|
36
|
+
- **Easy Integration**: Drop-in replacement for standard SQLAlchemy types
|
|
37
|
+
|
|
38
|
+
## ๐ฆ Installation
|
|
39
|
+
|
|
40
|
+
### Using pip
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
pip install molalchemy
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### From source
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
pip install git+https://github.com/asiomchen/molalchemy.git
|
|
50
|
+
|
|
51
|
+
# or clone the repo and install
|
|
52
|
+
git clone https://github.com/asiomchen/molalchemy.git
|
|
53
|
+
cd molalchemy
|
|
54
|
+
pip install .
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
### Prerequisites
|
|
59
|
+
|
|
60
|
+
- Python 3.10+
|
|
61
|
+
|
|
62
|
+
- SQLAlchemy 2.0+
|
|
63
|
+
|
|
64
|
+
- rdkit 2024.3.1+
|
|
65
|
+
|
|
66
|
+
- Running PostgreSQL with chemical cartridge (Bingo or RDKit) (see [`docker-compose.yaml`](docker-compose.yaml) for a ready-to-use setup)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
## ๐ง Quick Start
|
|
70
|
+
|
|
71
|
+
To learn how to use molalchemy, check out the [Quick Start - RDKit](https://molalchemy.readthedocs.io/en/latest/tutorials/01_Getting_Started_rdkit_ORM/) and [Quick Start - Bingo](https://molalchemy.readthedocs.io/en/latest/tutorials/01_Getting_Started_bingo_ORM/) tutorials in the documentation.
|
|
72
|
+
|
|
73
|
+
## ๐๏ธ Supported Cartridges
|
|
74
|
+
|
|
75
|
+
### Bingo Cartridge
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
from molalchemy.bingo.types import (
|
|
79
|
+
BingoMol, # Text-based molecule storage (SMILES/Molfile)
|
|
80
|
+
BingoBinaryMol, # Binary molecule storage with format conversion
|
|
81
|
+
BingoReaction, # Reaction storage (reaction SMILES/Rxnfile)
|
|
82
|
+
BingoBinaryReaction # Binary reaction storage
|
|
83
|
+
)
|
|
84
|
+
from molalchemy.bingo.index import (
|
|
85
|
+
BingoMolIndex, # Molecule indexing
|
|
86
|
+
BingoBinaryMolIndex, # Binary molecule indexing
|
|
87
|
+
BingoRxnIndex, # Reaction indexing
|
|
88
|
+
BingoBinaryRxnIndex # Binary reaction indexing
|
|
89
|
+
)
|
|
90
|
+
from molalchemy.bingo.functions import (
|
|
91
|
+
mol, # Molecule functions
|
|
92
|
+
rxn # Reaction functions
|
|
93
|
+
)
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### RDKit Cartridge
|
|
97
|
+
|
|
98
|
+
```python
|
|
99
|
+
from molalchemy.rdkit.types import (
|
|
100
|
+
RDKitMol, # RDKit molecule type
|
|
101
|
+
# Additional types available...
|
|
102
|
+
)
|
|
103
|
+
from molalchemy.rdkit.index import (
|
|
104
|
+
RDKitIndex, # RDKit molecule indexing (just GIST index)
|
|
105
|
+
)
|
|
106
|
+
from molalchemy.rdkit.functions import (
|
|
107
|
+
mol, # RDKit molecule functions
|
|
108
|
+
fp, # Fingerprint functions
|
|
109
|
+
rxn # Reaction functions
|
|
110
|
+
)
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
## ๐ฏ Advanced Features
|
|
114
|
+
|
|
115
|
+
### Chemical Indexing
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
from molalchemy.bingo.index import BingoMolIndex
|
|
119
|
+
from molalchemy.bingo.types import BingoMol
|
|
120
|
+
|
|
121
|
+
class Molecule(Base):
|
|
122
|
+
__tablename__ = 'molecules'
|
|
123
|
+
|
|
124
|
+
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
|
125
|
+
structure: Mapped[str] = mapped_column(BingoMol)
|
|
126
|
+
name: Mapped[str] = mapped_column(String(100))
|
|
127
|
+
|
|
128
|
+
# Add chemical index for faster searching
|
|
129
|
+
__table_args__ = (
|
|
130
|
+
BingoMolIndex('mol_idx', 'structure'),
|
|
131
|
+
)
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
### Binary Storage with Format Conversion
|
|
135
|
+
|
|
136
|
+
```python
|
|
137
|
+
from molalchemy.bingo.types import BingoBinaryMol
|
|
138
|
+
|
|
139
|
+
class OptimizedMolecule(Base):
|
|
140
|
+
__tablename__ = 'optimized_molecules'
|
|
141
|
+
|
|
142
|
+
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
|
143
|
+
# Store as binary, return as SMILES
|
|
144
|
+
structure: Mapped[bytes] = mapped_column(
|
|
145
|
+
BingoBinaryMol(preserve_pos=False, return_type="smiles")
|
|
146
|
+
)
|
|
147
|
+
# Store as binary, return as Molfile (with coordinates)
|
|
148
|
+
structure_3d: Mapped[bytes] = mapped_column(
|
|
149
|
+
BingoBinaryMol(preserve_pos=True, return_type="molfile")
|
|
150
|
+
)
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
### Using Chemical Functions
|
|
154
|
+
|
|
155
|
+
`mol` provides all static methods for functional-style queries. Under the hood it uses SQLAlchemy's `func` to call the corresponding database functions, but provides type hints and syntax highlighting in IDEs.
|
|
156
|
+
|
|
157
|
+
```python
|
|
158
|
+
from molalchemy.bingo.functions import mol
|
|
159
|
+
|
|
160
|
+
# Calculate molecular properties
|
|
161
|
+
results = session.query(
|
|
162
|
+
Molecule.name,
|
|
163
|
+
mol.get_weight(Molecule.structure).label('molecular_weight'),
|
|
164
|
+
mol.gross_formula(Molecule.structure).label('formula'),
|
|
165
|
+
mol.to_canonical(Molecule.structure).label('canonical_smiles')
|
|
166
|
+
).all()
|
|
167
|
+
|
|
168
|
+
# Validate molecular structures
|
|
169
|
+
invalid_molecules = session.query(Molecule).filter(
|
|
170
|
+
mol.check_molecule(Molecule.structure).isnot(None)
|
|
171
|
+
).all()
|
|
172
|
+
|
|
173
|
+
# Format conversions
|
|
174
|
+
inchi_keys = session.query(
|
|
175
|
+
Molecule.id,
|
|
176
|
+
mol.to_inchikey(Molecule.structure).label('inchikey')
|
|
177
|
+
).all()
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
## ๐งช Development
|
|
182
|
+
|
|
183
|
+
### Setting Up Development Environment
|
|
184
|
+
|
|
185
|
+
1. Clone the repository:
|
|
186
|
+
```bash
|
|
187
|
+
git clone https://github.com/asiomchen/molalchemy.git
|
|
188
|
+
cd molalchemy
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
2. Install dependencies:
|
|
192
|
+
```bash
|
|
193
|
+
uv sync
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
3. Activate the virtual environment:
|
|
197
|
+
```bash
|
|
198
|
+
source .venv/bin/activate
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
### Running Tests
|
|
202
|
+
|
|
203
|
+
```bash
|
|
204
|
+
# Run all tests
|
|
205
|
+
uv run pytest
|
|
206
|
+
|
|
207
|
+
# Run specific test module
|
|
208
|
+
uv run pytest tests/bingo/
|
|
209
|
+
|
|
210
|
+
# Run with coverage
|
|
211
|
+
uv run pytest --cov=molalchemy
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
### Code Quality
|
|
215
|
+
|
|
216
|
+
This project uses modern Python development tools:
|
|
217
|
+
- **uv**: For virtual environment and dependency management
|
|
218
|
+
- **Ruff**: For linting and formatting
|
|
219
|
+
- **pytest**: For testing
|
|
220
|
+
|
|
221
|
+
## ๐ Documentation
|
|
222
|
+
|
|
223
|
+
- **[๐ Project Roadmap](ROADMAP.md)** - Development phases, timeline, and contribution opportunities
|
|
224
|
+
- **[๐ค Contributing Guide](CONTRIBUTING.md)** - How to contribute to the project
|
|
225
|
+
- **[๐ง API Reference](https://molalchemy.readthedocs.io/api)** - Complete API documentation
|
|
226
|
+
- **[๐ณ Bingo Manual](https://lifescience.opensource.epam.com/bingo/user-manual-postgres.html)** - Bingo PostgreSQL cartridge guide
|
|
227
|
+
- **[โ๏ธ RDKit Manual](https://www.rdkit.org/docs/Cartridge.html)** - RDKit PostgreSQL cartridge guide
|
|
228
|
+
|
|
229
|
+
## ๐ค Contributing
|
|
230
|
+
|
|
231
|
+
We welcome contributions! molalchemy offers many opportunities for developers interested in chemical informatics:
|
|
232
|
+
|
|
233
|
+
- **๐ฐ New to the project?** Check out [good first issues](https://github.com/asiomchen/molalchemy/labels/good%20first%20issue)
|
|
234
|
+
- **๏ฟฝ Chemical expertise?** Help complete RDKit integration or add ChemAxon support
|
|
235
|
+
- **๐ณ DevOps skills?** Optimize our Docker containers and CI/CD pipeline
|
|
236
|
+
- **๐ Love documentation?** Create tutorials and improve API docs
|
|
237
|
+
|
|
238
|
+
Read our **[Contributing Guide](CONTRIBUTING.md)** for detailed instructions on getting started.
|
|
239
|
+
|
|
240
|
+
## ๐ License
|
|
241
|
+
|
|
242
|
+
This project is licensed under the Apache License 2.0 - see the [LICENSE](LICENSE) file for details.
|
|
243
|
+
|
|
244
|
+
## ๐ Acknowledgments
|
|
245
|
+
|
|
246
|
+
- [RDKit](https://www.rdkit.org/) - Open-source cheminformatics toolkit
|
|
247
|
+
- [Bingo](https://lifescience.opensource.epam.com/bingo/) - Chemical database cartridge
|
|
248
|
+
- [SQLAlchemy](https://sqlalchemy.org/) - Python SQL toolkit
|
|
249
|
+
|
|
250
|
+
## ๐ง Contact
|
|
251
|
+
|
|
252
|
+
- **Author**: Anton Siomchen
|
|
253
|
+
- **Email**: anton.siomchen+molalchemy@gmail.com
|
|
254
|
+
- **GitHub**: [@asiomchen](https://github.com/asiomchen)
|
|
255
|
+
- **LinkedIn**: [Anton Siomchen](https://www.linkedin.com/in/anton-siomchen/)
|
|
256
|
+
|
|
257
|
+
---
|
|
258
|
+
|
|
259
|
+
**molalchemy** - Making chemical databases as easy as regular databases! ๐งชโจ
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "molalchemy"
|
|
3
|
+
version = "0.0.1"
|
|
4
|
+
description = "Extensions for SQLAlchemy to work with chemical cartridges"
|
|
5
|
+
license = {text = "Apache-2.0"}
|
|
6
|
+
readme = "README.md"
|
|
7
|
+
authors = [
|
|
8
|
+
{ name = "Anton Siomchen", email = "anton.siomchen+molalchemy@gmail.com" }
|
|
9
|
+
]
|
|
10
|
+
|
|
11
|
+
requires-python = ">=3.10,<3.14"
|
|
12
|
+
dependencies = [
|
|
13
|
+
"loguru>=0.7.3",
|
|
14
|
+
"psycopg[binary]>=3.2.9",
|
|
15
|
+
"rdkit>=2024.3.1",
|
|
16
|
+
"sqlalchemy>=2.0.43",
|
|
17
|
+
]
|
|
18
|
+
keywords = ["chemistry", "cheminformatics", "molecules", "reactions", "databases"]
|
|
19
|
+
classifiers = [
|
|
20
|
+
"Development Status :: 3 - Alpha",
|
|
21
|
+
"Intended Audience :: Science/Research",
|
|
22
|
+
"License :: OSI Approved :: Apache Software License",
|
|
23
|
+
"Topic :: Scientific/Engineering",
|
|
24
|
+
"Topic :: Utilities",
|
|
25
|
+
"Topic :: Database",
|
|
26
|
+
"Programming Language :: Python :: 3.10",
|
|
27
|
+
"Programming Language :: Python :: 3.11",
|
|
28
|
+
"Programming Language :: Python :: 3.12",
|
|
29
|
+
"Programming Language :: Python :: 3.13",
|
|
30
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
[project.urls]
|
|
34
|
+
homepage = "https://github.com/asiomchen/molalchemy"
|
|
35
|
+
repository = "https://github.com/asiomchen/molalchemy"
|
|
36
|
+
documentation = "https://molalchemy.readthedocs.io"
|
|
37
|
+
|
|
38
|
+
[build-system]
|
|
39
|
+
requires = ["uv_build>=0.8.13,<0.9.0"]
|
|
40
|
+
build-backend = "uv_build"
|
|
41
|
+
|
|
42
|
+
[dependency-groups]
|
|
43
|
+
dev = [
|
|
44
|
+
"ipykernel>=6.30.1",
|
|
45
|
+
"pytest>=8.4.2",
|
|
46
|
+
"pytest-cov>=7.0.0",
|
|
47
|
+
]
|
|
48
|
+
docs = [
|
|
49
|
+
"griffe-typingdoc>=0.2.9",
|
|
50
|
+
"mkdocs>=1.6.1",
|
|
51
|
+
"mkdocs-jupyter>=0.25.1",
|
|
52
|
+
"mkdocs-material[imaging]>=9.6.2",
|
|
53
|
+
"mkdocstrings[python]>=0.28.0",
|
|
54
|
+
"pymdown-extensions>=10.16.1",
|
|
55
|
+
]
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from .index import (
|
|
2
|
+
BingoBinaryMolIndex,
|
|
3
|
+
BingoBinaryRxnIndex,
|
|
4
|
+
BingoMolIndex,
|
|
5
|
+
BingoRxnIndex,
|
|
6
|
+
)
|
|
7
|
+
from .proxy import BingoMolProxy, BingoRxnProxy
|
|
8
|
+
from .types import BingoBinaryMol, BingoBinaryReaction, BingoMol, BingoReaction
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"BingoBinaryMol",
|
|
12
|
+
"BingoBinaryMolIndex",
|
|
13
|
+
"BingoBinaryReaction",
|
|
14
|
+
"BingoBinaryRxnIndex",
|
|
15
|
+
"BingoMol",
|
|
16
|
+
"BingoMolIndex",
|
|
17
|
+
"BingoMolProxy",
|
|
18
|
+
"BingoReaction",
|
|
19
|
+
"BingoRxnIndex",
|
|
20
|
+
"BingoRxnProxy",
|
|
21
|
+
"bingo_func",
|
|
22
|
+
"bingo_rxn_func",
|
|
23
|
+
]
|