chemrecon 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chemrecon-0.1.1/PKG-INFO +143 -0
- chemrecon-0.1.1/README.md +111 -0
- chemrecon-0.1.1/pyproject.toml +72 -0
- chemrecon-0.1.1/src/chemrecon/__init__.py +73 -0
- chemrecon-0.1.1/src/chemrecon/chem/__init__.py +0 -0
- chemrecon-0.1.1/src/chemrecon/chem/chemreaction.py +223 -0
- chemrecon-0.1.1/src/chemrecon/chem/constant_compounds.py +3 -0
- chemrecon-0.1.1/src/chemrecon/chem/create_mol.py +91 -0
- chemrecon-0.1.1/src/chemrecon/chem/elements.py +141 -0
- chemrecon-0.1.1/src/chemrecon/chem/gml/__init__.py +0 -0
- chemrecon-0.1.1/src/chemrecon/chem/gml/gml.py +324 -0
- chemrecon-0.1.1/src/chemrecon/chem/gml/gml_reactant_matching.py +130 -0
- chemrecon-0.1.1/src/chemrecon/chem/gml/gml_to_rdk.py +217 -0
- chemrecon-0.1.1/src/chemrecon/chem/mol.py +483 -0
- chemrecon-0.1.1/src/chemrecon/chem/sumformula.py +120 -0
- chemrecon-0.1.1/src/chemrecon/connection.py +97 -0
- chemrecon-0.1.1/src/chemrecon/core/__init__.py +0 -0
- chemrecon-0.1.1/src/chemrecon/core/id_types.py +687 -0
- chemrecon-0.1.1/src/chemrecon/core/ontology.py +209 -0
- chemrecon-0.1.1/src/chemrecon/core/populate_query_handler.py +336 -0
- chemrecon-0.1.1/src/chemrecon/core/query_handler.py +587 -0
- chemrecon-0.1.1/src/chemrecon/database/__init__.py +1 -0
- chemrecon-0.1.1/src/chemrecon/database/connect.py +63 -0
- chemrecon-0.1.1/src/chemrecon/database/connection_params/chemrecon_pub.dbinfo +5 -0
- chemrecon-0.1.1/src/chemrecon/database/connection_params/local_docker_dev.dbinfo +5 -0
- chemrecon-0.1.1/src/chemrecon/database/connection_params/local_docker_init.dbinfo +5 -0
- chemrecon-0.1.1/src/chemrecon/database/connection_params/local_docker_pub.dbinfo +5 -0
- chemrecon-0.1.1/src/chemrecon/database/params.py +88 -0
- chemrecon-0.1.1/src/chemrecon/entrygraph/draw.py +119 -0
- chemrecon-0.1.1/src/chemrecon/entrygraph/entrygraph.py +301 -0
- chemrecon-0.1.1/src/chemrecon/entrygraph/explorationprotocol.py +199 -0
- chemrecon-0.1.1/src/chemrecon/entrygraph/explore.py +421 -0
- chemrecon-0.1.1/src/chemrecon/entrygraph/explore_procedure.py +183 -0
- chemrecon-0.1.1/src/chemrecon/entrygraph/filter.py +88 -0
- chemrecon-0.1.1/src/chemrecon/entrygraph/scoring.py +141 -0
- chemrecon-0.1.1/src/chemrecon/query/__init__.py +26 -0
- chemrecon-0.1.1/src/chemrecon/query/create_entry.py +86 -0
- chemrecon-0.1.1/src/chemrecon/query/default_protocols.py +57 -0
- chemrecon-0.1.1/src/chemrecon/query/find_entry.py +84 -0
- chemrecon-0.1.1/src/chemrecon/query/get_relations.py +143 -0
- chemrecon-0.1.1/src/chemrecon/query/get_structures_from_compound.py +65 -0
- chemrecon-0.1.1/src/chemrecon/schema/__init__.py +86 -0
- chemrecon-0.1.1/src/chemrecon/schema/db_object.py +363 -0
- chemrecon-0.1.1/src/chemrecon/schema/direction.py +10 -0
- chemrecon-0.1.1/src/chemrecon/schema/entry_types/__init__.py +0 -0
- chemrecon-0.1.1/src/chemrecon/schema/entry_types/aam.py +34 -0
- chemrecon-0.1.1/src/chemrecon/schema/entry_types/aam_repr.py +37 -0
- chemrecon-0.1.1/src/chemrecon/schema/entry_types/compound.py +52 -0
- chemrecon-0.1.1/src/chemrecon/schema/entry_types/enzyme.py +49 -0
- chemrecon-0.1.1/src/chemrecon/schema/entry_types/molstructure.py +64 -0
- chemrecon-0.1.1/src/chemrecon/schema/entry_types/molstructure_repr.py +41 -0
- chemrecon-0.1.1/src/chemrecon/schema/entry_types/reaction.py +57 -0
- chemrecon-0.1.1/src/chemrecon/schema/enums.py +154 -0
- chemrecon-0.1.1/src/chemrecon/schema/procedural_relation_entrygraph.py +66 -0
- chemrecon-0.1.1/src/chemrecon/schema/relation_types_composed/__init__.py +0 -0
- chemrecon-0.1.1/src/chemrecon/schema/relation_types_composed/compound_has_molstructure_relation.py +59 -0
- chemrecon-0.1.1/src/chemrecon/schema/relation_types_composed/reaction_has_aam_relation.py +50 -0
- chemrecon-0.1.1/src/chemrecon/schema/relation_types_procedural/__init__.py +0 -0
- chemrecon-0.1.1/src/chemrecon/schema/relation_types_procedural/aam_convert_relation.py +69 -0
- chemrecon-0.1.1/src/chemrecon/schema/relation_types_procedural/compound_select_structure_proceduralrelation.py +36 -0
- chemrecon-0.1.1/src/chemrecon/schema/relation_types_procedural/compound_similarlity_proceduralrelation.py +1 -0
- chemrecon-0.1.1/src/chemrecon/schema/relation_types_procedural/molstructure_convert_relation.py +49 -0
- chemrecon-0.1.1/src/chemrecon/schema/relation_types_procedural/reaction_select_aam_proceduralrelation.py +38 -0
- chemrecon-0.1.1/src/chemrecon/schema/relation_types_procedural/reaction_similarity_proceduralrelation.py +1 -0
- chemrecon-0.1.1/src/chemrecon/schema/relation_types_source/__init__.py +0 -0
- chemrecon-0.1.1/src/chemrecon/schema/relation_types_source/aam_involves_molstructure_relation.py +77 -0
- chemrecon-0.1.1/src/chemrecon/schema/relation_types_source/aam_repr_involves_molstructure_repr_relation.py +79 -0
- chemrecon-0.1.1/src/chemrecon/schema/relation_types_source/compound_has_structure_representation_relation.py +33 -0
- chemrecon-0.1.1/src/chemrecon/schema/relation_types_source/compound_reference_relation.py +34 -0
- chemrecon-0.1.1/src/chemrecon/schema/relation_types_source/molstructure_standardisation_relation.py +71 -0
- chemrecon-0.1.1/src/chemrecon/schema/relation_types_source/ontology/__init__.py +0 -0
- chemrecon-0.1.1/src/chemrecon/schema/relation_types_source/ontology/compound_ontology.py +369 -0
- chemrecon-0.1.1/src/chemrecon/schema/relation_types_source/ontology/enzyme_ontology.py +142 -0
- chemrecon-0.1.1/src/chemrecon/schema/relation_types_source/ontology/reaction_ontology.py +140 -0
- chemrecon-0.1.1/src/chemrecon/schema/relation_types_source/reaction_has_aam_representation_relation.py +34 -0
- chemrecon-0.1.1/src/chemrecon/schema/relation_types_source/reaction_has_enzyme_relation.py +71 -0
- chemrecon-0.1.1/src/chemrecon/schema/relation_types_source/reaction_involves_compound_relation.py +69 -0
- chemrecon-0.1.1/src/chemrecon/schema/relation_types_source/reaction_reference_relation.py +33 -0
- chemrecon-0.1.1/src/chemrecon/scripts/initialize_database.py +494 -0
- chemrecon-0.1.1/src/chemrecon/utils/copy_signature.py +10 -0
- chemrecon-0.1.1/src/chemrecon/utils/encodeable_list.py +11 -0
- chemrecon-0.1.1/src/chemrecon/utils/get_id_type.py +70 -0
- chemrecon-0.1.1/src/chemrecon/utils/hungarian.py +31 -0
- chemrecon-0.1.1/src/chemrecon/utils/reactant_matching.py +168 -0
- chemrecon-0.1.1/src/chemrecon/utils/rxnutils.py +44 -0
- chemrecon-0.1.1/src/chemrecon/utils/set_cwd.py +12 -0
chemrecon-0.1.1/PKG-INFO
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: chemrecon
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: The ChemRecon library for integration and exploration of interconnected biochemical databases.
|
|
5
|
+
Keywords: bioinformatics
|
|
6
|
+
Author: Casper Asbjørn Eriksen
|
|
7
|
+
Author-email: Casper Asbjørn Eriksen <casbjorn@imada.sdu.dk>
|
|
8
|
+
License-Expression: GPL-3.0-only
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
10
|
+
Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
|
|
11
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
12
|
+
Classifier: Topic :: Scientific/Engineering :: Chemistry
|
|
13
|
+
Requires-Dist: psycopg[binary]~=3.3.2
|
|
14
|
+
Requires-Dist: rustworkx~=0.17.1
|
|
15
|
+
Requires-Dist: networkx~=3.6.1
|
|
16
|
+
Requires-Dist: matplotlib~=3.10
|
|
17
|
+
Requires-Dist: rdkit
|
|
18
|
+
Requires-Dist: sphinx==8.3.0 ; extra == 'docs'
|
|
19
|
+
Requires-Dist: myst-parser ; extra == 'docs'
|
|
20
|
+
Requires-Dist: sphinx-autobuild ; extra == 'docs'
|
|
21
|
+
Requires-Dist: enum-tools[sphinx]==0.12.0 ; extra == 'docs'
|
|
22
|
+
Requires-Dist: sphinx-toolbox ; extra == 'docs'
|
|
23
|
+
Requires-Dist: nbsphinx ; extra == 'docs'
|
|
24
|
+
Requires-Dist: ipykernel>=7.1.0 ; extra == 'docs'
|
|
25
|
+
Requires-Dist: furo ; extra == 'docs'
|
|
26
|
+
Requires-Dist: sphinxext-opengraph ; extra == 'docs'
|
|
27
|
+
Maintainer: Casper Asbjørn Eriksen
|
|
28
|
+
Maintainer-email: Casper Asbjørn Eriksen <casbjorn@imada.sdu.dk>
|
|
29
|
+
Requires-Python: >=3.12
|
|
30
|
+
Provides-Extra: docs
|
|
31
|
+
Description-Content-Type: text/markdown
|
|
32
|
+
|
|
33
|
+
# ChemRecon
|
|
34
|
+
*v. 0.1.1*
|
|
35
|
+
|
|
36
|
+
ChemRecon is a Python library and consolidated meta-database designed to simplify the integration and exploration of
|
|
37
|
+
biochemical data from a range of sources.
|
|
38
|
+
It is built from full-database downloads of compounds, reactions, enzymes, molecular structures, and atom-to-atom maps
|
|
39
|
+
from the following source databases: BiGG, BRENDA, ChEBI, ECMDB, M-CSA, MetaMDB, and PubChem.
|
|
40
|
+
|
|
41
|
+
Heterogenous data formats were standardized, and relationships within and between these databases were reconstructed in
|
|
42
|
+
a consistent format.
|
|
43
|
+
The resulting meta-database is freely accessible online and is complemented by a Python library which allows for easy
|
|
44
|
+
integration into existing workflows.
|
|
45
|
+
This enables unified querying of entries from all the source databases, and discovery and visualization of
|
|
46
|
+
relationships between these entries.
|
|
47
|
+
|
|
48
|
+

|
|
49
|
+
|
|
50
|
+
ChemRecon was developed at the
|
|
51
|
+
[Algorithmic Cheminformatics Group](https://cheminf.imada.sdu.dk/),
|
|
52
|
+
[Department of Mathematics and Computer Science](https://cheminf.imada.sdu.dk/),
|
|
53
|
+
[University of Southern Denmark](https://sdu.dk).
|
|
54
|
+
|
|
55
|
+
## Paper
|
|
56
|
+
If ChemRecon proves useful to your research, you may want to cite the following paper.
|
|
57
|
+
* **Title**
|
|
58
|
+
|
|
59
|
+
C. A. Eriksen, J. L. Andersen, R. Fagerberg, D. Merkle
|
|
60
|
+
|
|
61
|
+
Arxiv preprint, submitted to Bioinformatics.
|
|
62
|
+
|
|
63
|
+
TODO more
|
|
64
|
+
|
|
65
|
+
## Availability and Installation
|
|
66
|
+
ChemRecon is available via your Python package manager from the Python Package Index (PyPI):
|
|
67
|
+
[chemrecon](https://pypi.org/project/chemrecon/)
|
|
68
|
+
It can be installed using pip:
|
|
69
|
+
|
|
70
|
+
`pip install chemrecon`
|
|
71
|
+
|
|
72
|
+
Visualizing entry graphs requires [GraphViz](https://www.graphviz.org/) to be installed, and for the `dot` executable,
|
|
73
|
+
which renders the graphs, to be available on your system's `PATH`.
|
|
74
|
+
See the [GraphViz Python package](https://pypi.org/project/graphviz/) for instructions.
|
|
75
|
+
|
|
76
|
+
***
|
|
77
|
+
|
|
78
|
+
## Documentation
|
|
79
|
+
The documentation, including instructions on usage, tutorials, and complete description covering the types of entries
|
|
80
|
+
and relations supported, is available on the [ChemRecon homepage](https://www.cheminf.imada.sdu.dk/chemrecon).
|
|
81
|
+
|
|
82
|
+
## Usage
|
|
83
|
+
The following is an example of a typical ChemRecon workflow, producing the graph seen above.
|
|
84
|
+
For more detailed examples, see the tutorial section of the documentation.
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
from chemrecon import *
|
|
88
|
+
|
|
89
|
+
connect_public()
|
|
90
|
+
|
|
91
|
+
# Perform a database query to find the 'citrate' entry in BiGG.
|
|
92
|
+
citrate_entry = find_entry(id_type = C_BIGG, source_id = 'M_cit')
|
|
93
|
+
|
|
94
|
+
# Define a protocol to find related entries and molecular structures (protocols like this are included)
|
|
95
|
+
compound_structure_protocol = ExplorationProtocol(
|
|
96
|
+
relation_types = {CompoundReference, CompoundHasMolStructure, MolStructureStandardization}
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
# Create and expand an entry graph, according to this protocol, by traversing the database.
|
|
100
|
+
eg = EntryGraph(initial_entries = {citrate_entry})
|
|
101
|
+
explore(eg, compound_structure_protocol, steps = 5)
|
|
102
|
+
|
|
103
|
+
# Score the molecular structures in the graph according to their 'connectedness'
|
|
104
|
+
scorer = Scorer(score_entry_type = MolStructure)
|
|
105
|
+
scores = scorer(citrate_entry) # Result is an OrderedDict
|
|
106
|
+
|
|
107
|
+
# Draw the graph with these scores, producing the image seen on this page
|
|
108
|
+
eg.show(scores = scores)
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
***
|
|
112
|
+
|
|
113
|
+
## Database
|
|
114
|
+
ChemRecon needs to be connected to a database to function.
|
|
115
|
+
The easiest is to connect to the public database, hosted by [SDU](https://sdu.dk):
|
|
116
|
+
```
|
|
117
|
+
connect_public()
|
|
118
|
+
```
|
|
119
|
+
Alternatively, a local instance of the database can be hosted via Docker.
|
|
120
|
+
Instructions are given in the [documentation](https://chemrecon.org).
|
|
121
|
+
This has the advantage of lower latency, making queries and entry graph construction faster, and allows adding
|
|
122
|
+
custom data sources.
|
|
123
|
+
|
|
124
|
+
## Source Databases
|
|
125
|
+
ChemRecon contains compound, molecular structure, reaction, atom-to-atom map, and enzyme entries from the following
|
|
126
|
+
databases.
|
|
127
|
+
|
|
128
|
+
| Source | Compound | Structure | Reaction | AAM | Enzyme | Version |
|
|
129
|
+
|------------|--------------|---------------|------------|---------|----------|---------|
|
|
130
|
+
| BiGG | 20428 | - | 33942 | - | 5705 | 1.6 |
|
|
131
|
+
| BRENDA | - | - | 61129 | - | 8697 | 2025_1 |
|
|
132
|
+
| ChEBI | 224485 | 330207 | - | - | - | 2024-05 |
|
|
133
|
+
| ECMDB | 3760 | 7517 | - | - | - | 2.0 |
|
|
134
|
+
| M-CSA | - | - | 1003 | 342 | 1003 | 2024-11 |
|
|
135
|
+
| MetaMDB | 80815 | 4392 | 74520 | 1003 | - | 2025-02 |
|
|
136
|
+
| MetaNetX | 2601834 | 2297518 | 143880 | - | 48175 | 4.4 |
|
|
137
|
+
| PubChem | 9031498 | 5000000 | - | - | - | 2024-09 |
|
|
138
|
+
|
|
139
|
+
In addition to the source databases, ChemRecon can make use of a greater number of *auxiliary* databases, including
|
|
140
|
+
MetaCyc and KEGG. Data from these sources is are not directly included due to being proprietary or difficult to access.
|
|
141
|
+
However, the source databases contain references to the auxiliary databases, so entries are created which contain only
|
|
142
|
+
the identifier and no additional information. This allows users to use ChemRecon workflows based on identifiers from a
|
|
143
|
+
great number of databases, not just the source databases.
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
# ChemRecon
|
|
2
|
+
*v. 0.1.1*
|
|
3
|
+
|
|
4
|
+
ChemRecon is a Python library and consolidated meta-database designed to simplify the integration and exploration of
|
|
5
|
+
biochemical data from a range of sources.
|
|
6
|
+
It is built from full-database downloads of compounds, reactions, enzymes, molecular structures, and atom-to-atom maps
|
|
7
|
+
from the following source databases: BiGG, BRENDA, ChEBI, ECMDB, M-CSA, MetaMDB, and PubChem.
|
|
8
|
+
|
|
9
|
+
Heterogenous data formats were standardized, and relationships within and between these databases were reconstructed in
|
|
10
|
+
a consistent format.
|
|
11
|
+
The resulting meta-database is freely accessible online and is complemented by a Python library which allows for easy
|
|
12
|
+
integration into existing workflows.
|
|
13
|
+
This enables unified querying of entries from all the source databases, and discovery and visualization of
|
|
14
|
+
relationships between these entries.
|
|
15
|
+
|
|
16
|
+

|
|
17
|
+
|
|
18
|
+
ChemRecon was developed at the
|
|
19
|
+
[Algorithmic Cheminformatics Group](https://cheminf.imada.sdu.dk/),
|
|
20
|
+
[Department of Mathematics and Computer Science](https://cheminf.imada.sdu.dk/),
|
|
21
|
+
[University of Southern Denmark](https://sdu.dk).
|
|
22
|
+
|
|
23
|
+
## Paper
|
|
24
|
+
If ChemRecon proves useful to your research, you may want to cite the following paper.
|
|
25
|
+
* **Title**
|
|
26
|
+
|
|
27
|
+
C. A. Eriksen, J. L. Andersen, R. Fagerberg, D. Merkle
|
|
28
|
+
|
|
29
|
+
Arxiv preprint, submitted to Bioinformatics.
|
|
30
|
+
|
|
31
|
+
TODO more
|
|
32
|
+
|
|
33
|
+
## Availability and Installation
|
|
34
|
+
ChemRecon is available via your Python package manager from the Python Package Index (PyPI):
|
|
35
|
+
[chemrecon](https://pypi.org/project/chemrecon/)
|
|
36
|
+
It can be installed using pip:
|
|
37
|
+
|
|
38
|
+
`pip install chemrecon`
|
|
39
|
+
|
|
40
|
+
Visualizing entry graphs requires [GraphViz](https://www.graphviz.org/) to be installed, and for the `dot` executable,
|
|
41
|
+
which renders the graphs, to be available on your system's `PATH`.
|
|
42
|
+
See the [GraphViz Python package](https://pypi.org/project/graphviz/) for instructions.
|
|
43
|
+
|
|
44
|
+
***
|
|
45
|
+
|
|
46
|
+
## Documentation
|
|
47
|
+
The documentation, including instructions on usage, tutorials, and complete description covering the types of entries
|
|
48
|
+
and relations supported, is available on the [ChemRecon homepage](https://www.cheminf.imada.sdu.dk/chemrecon).
|
|
49
|
+
|
|
50
|
+
## Usage
|
|
51
|
+
The following is an example of a typical ChemRecon workflow, producing the graph seen above.
|
|
52
|
+
For more detailed examples, see the tutorial section of the documentation.
|
|
53
|
+
|
|
54
|
+
```python
|
|
55
|
+
from chemrecon import *
|
|
56
|
+
|
|
57
|
+
connect_public()
|
|
58
|
+
|
|
59
|
+
# Perform a database query to find the 'citrate' entry in BiGG.
|
|
60
|
+
citrate_entry = find_entry(id_type = C_BIGG, source_id = 'M_cit')
|
|
61
|
+
|
|
62
|
+
# Define a protocol to find related entries and molecular structures (protocols like this are included)
|
|
63
|
+
compound_structure_protocol = ExplorationProtocol(
|
|
64
|
+
relation_types = {CompoundReference, CompoundHasMolStructure, MolStructureStandardization}
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
# Create and expand an entry graph, according to this protocol, by traversing the database.
|
|
68
|
+
eg = EntryGraph(initial_entries = {citrate_entry})
|
|
69
|
+
explore(eg, compound_structure_protocol, steps = 5)
|
|
70
|
+
|
|
71
|
+
# Score the molecular structures in the graph according to their 'connectedness'
|
|
72
|
+
scorer = Scorer(score_entry_type = MolStructure)
|
|
73
|
+
scores = scorer(citrate_entry) # Result is an OrderedDict
|
|
74
|
+
|
|
75
|
+
# Draw the graph with these scores, producing the image seen on this page
|
|
76
|
+
eg.show(scores = scores)
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
***
|
|
80
|
+
|
|
81
|
+
## Database
|
|
82
|
+
ChemRecon needs to be connected to a database to function.
|
|
83
|
+
The easiest is to connect to the public database, hosted by [SDU](https://sdu.dk):
|
|
84
|
+
```
|
|
85
|
+
connect_public()
|
|
86
|
+
```
|
|
87
|
+
Alternatively, a local instance of the database can be hosted via Docker.
|
|
88
|
+
Instructions are given in the [documentation](https://chemrecon.org).
|
|
89
|
+
This has the advantage of lower latency, making queries and entry graph construction faster, and allows adding
|
|
90
|
+
custom data sources.
|
|
91
|
+
|
|
92
|
+
## Source Databases
|
|
93
|
+
ChemRecon contains compound, molecular structure, reaction, atom-to-atom map, and enzyme entries from the following
|
|
94
|
+
databases.
|
|
95
|
+
|
|
96
|
+
| Source | Compound | Structure | Reaction | AAM | Enzyme | Version |
|
|
97
|
+
|------------|--------------|---------------|------------|---------|----------|---------|
|
|
98
|
+
| BiGG | 20428 | - | 33942 | - | 5705 | 1.6 |
|
|
99
|
+
| BRENDA | - | - | 61129 | - | 8697 | 2025_1 |
|
|
100
|
+
| ChEBI | 224485 | 330207 | - | - | - | 2024-05 |
|
|
101
|
+
| ECMDB | 3760 | 7517 | - | - | - | 2.0 |
|
|
102
|
+
| M-CSA | - | - | 1003 | 342 | 1003 | 2024-11 |
|
|
103
|
+
| MetaMDB | 80815 | 4392 | 74520 | 1003 | - | 2025-02 |
|
|
104
|
+
| MetaNetX | 2601834 | 2297518 | 143880 | - | 48175 | 4.4 |
|
|
105
|
+
| PubChem | 9031498 | 5000000 | - | - | - | 2024-09 |
|
|
106
|
+
|
|
107
|
+
In addition to the source databases, ChemRecon can make use of a greater number of *auxiliary* databases, including
|
|
108
|
+
MetaCyc and KEGG. Data from these sources is are not directly included due to being proprietary or difficult to access.
|
|
109
|
+
However, the source databases contain references to the auxiliary databases, so entries are created which contain only
|
|
110
|
+
the identifier and no additional information. This allows users to use ChemRecon workflows based on identifiers from a
|
|
111
|
+
great number of databases, not just the source databases.
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = 'chemrecon'
|
|
3
|
+
version = "0.1.1"
|
|
4
|
+
description = 'The ChemRecon library for integration and exploration of interconnected biochemical databases.'
|
|
5
|
+
authors = [
|
|
6
|
+
{name = 'Casper Asbjørn Eriksen', email = 'casbjorn@imada.sdu.dk'}
|
|
7
|
+
]
|
|
8
|
+
maintainers = [
|
|
9
|
+
{name = 'Casper Asbjørn Eriksen', email = 'casbjorn@imada.sdu.dk'}
|
|
10
|
+
]
|
|
11
|
+
readme = {file = "README.md", content-type = "text/markdown"}
|
|
12
|
+
license = 'GPL-3.0-only'
|
|
13
|
+
keywords = [
|
|
14
|
+
'bioinformatics',
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
# Requirements
|
|
18
|
+
requires-python = '>=3.12'
|
|
19
|
+
dependencies = [
|
|
20
|
+
"psycopg[binary] ~= 3.3.2", # Binary also adds Postgres client binaries
|
|
21
|
+
"rustworkx ~= 0.17.1",
|
|
22
|
+
"networkx ~= 3.6.1",
|
|
23
|
+
"matplotlib ~= 3.10",
|
|
24
|
+
"rdkit",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
classifiers = [
|
|
28
|
+
'Programming Language :: Python :: 3.12',
|
|
29
|
+
'License :: OSI Approved :: GNU General Public License v3 (GPLv3)',
|
|
30
|
+
'Topic :: Scientific/Engineering :: Bio-Informatics',
|
|
31
|
+
'Topic :: Scientific/Engineering :: Chemistry'
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
#[project.urls]
|
|
35
|
+
Homepage = 'https://chemrecon.org'
|
|
36
|
+
Documentation = 'https://docs.chemrecon.org'
|
|
37
|
+
Repository = 'https://gitlab.com/casbjorn/chemrecon'
|
|
38
|
+
#Changelog = ''
|
|
39
|
+
|
|
40
|
+
[project.optional-dependencies]
|
|
41
|
+
docs = [
|
|
42
|
+
'sphinx==8.3.0', # Downgrade needed for enumtools, see https://github.com/domdfcoding/enum_tools/issues/118
|
|
43
|
+
'myst-parser',
|
|
44
|
+
'sphinx-autobuild',
|
|
45
|
+
'enum-tools[sphinx] == 0.12.0',
|
|
46
|
+
'sphinx-toolbox',
|
|
47
|
+
'nbsphinx',
|
|
48
|
+
"ipykernel>=7.1.0",
|
|
49
|
+
'furo',
|
|
50
|
+
'sphinxext-opengraph'
|
|
51
|
+
]
|
|
52
|
+
|
|
53
|
+
[dependency-groups]
|
|
54
|
+
dev = [
|
|
55
|
+
'pytest >=8.1.1,<9'
|
|
56
|
+
]
|
|
57
|
+
|
|
58
|
+
# uv
|
|
59
|
+
[build-system]
|
|
60
|
+
requires = [
|
|
61
|
+
'uv_build ~= 0.9.27'
|
|
62
|
+
]
|
|
63
|
+
build-backend = 'uv_build'
|
|
64
|
+
|
|
65
|
+
[tool.uv]
|
|
66
|
+
package = true
|
|
67
|
+
|
|
68
|
+
[[tool.uv.index]]
|
|
69
|
+
name = "testpypi"
|
|
70
|
+
url = "https://test.pypi.org/simple/"
|
|
71
|
+
publish-url = "https://test.pypi.org/legacy/"
|
|
72
|
+
explicit = true
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
""" Defines metadata and the most general exports in the chemrecon.* namespace.
|
|
2
|
+
"""
|
|
3
|
+
# Metadata
|
|
4
|
+
__version__ = '0.1.1' # Library version
|
|
5
|
+
__db_version__: list[str] = ['0.1.1'] # Compatible database versions
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
# Imports
|
|
9
|
+
import psycopg
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# Connection and handler
|
|
13
|
+
import chemrecon.connection
|
|
14
|
+
from chemrecon.connection import (
|
|
15
|
+
connect, connect_public, connect_local_docker, connect_local_docker_dev, disconnect,
|
|
16
|
+
get_query_handler
|
|
17
|
+
)
|
|
18
|
+
from chemrecon.database.params import Params
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# Project initialization
|
|
22
|
+
# Try to set the default database connection
|
|
23
|
+
try:
|
|
24
|
+
disconnect()
|
|
25
|
+
# connect_public()
|
|
26
|
+
except psycopg.OperationalError as e:
|
|
27
|
+
print(f'Cannot connect to default database: {e}')
|
|
28
|
+
|
|
29
|
+
# Import from schema
|
|
30
|
+
from chemrecon.schema import *
|
|
31
|
+
|
|
32
|
+
from chemrecon.core.query_handler import QueryHandler
|
|
33
|
+
from chemrecon.core.populate_query_handler import PopulateQueryHandler
|
|
34
|
+
|
|
35
|
+
# Export identifier types
|
|
36
|
+
from chemrecon.core.id_types import *
|
|
37
|
+
|
|
38
|
+
# Entry creators
|
|
39
|
+
# from chemrecon.query.create_entry import (
|
|
40
|
+
# entry,
|
|
41
|
+
# compound_entry, reaction_entry, enzyme_entry,
|
|
42
|
+
# aam_representation_entry, structure_representation_entry,
|
|
43
|
+
# structure_entry, aam_entry,
|
|
44
|
+
# enzyme_from_ec_number,
|
|
45
|
+
# entry_from_identifiers_org
|
|
46
|
+
# )
|
|
47
|
+
|
|
48
|
+
from chemrecon.query.find_entry import (
|
|
49
|
+
find_entry,
|
|
50
|
+
find_compound_entry, find_reaction_entry, find_enzyme_entry,
|
|
51
|
+
find_structure_representation_entry, find_aam_representation_entry,
|
|
52
|
+
find_structure_entry, find_aam_entry
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# Relation getters
|
|
56
|
+
from chemrecon.query.get_relations import *
|
|
57
|
+
|
|
58
|
+
# EntryGraphs
|
|
59
|
+
from chemrecon.entrygraph.entrygraph import EntryGraph, Vertex, Edge
|
|
60
|
+
from chemrecon.schema.direction import Direction
|
|
61
|
+
from chemrecon.entrygraph.filter import (
|
|
62
|
+
EntryFilter, EntryFilterProcedure,
|
|
63
|
+
RelationFilter, RelationFilterProcedure
|
|
64
|
+
)
|
|
65
|
+
from chemrecon.entrygraph.explorationprotocol import ExplorationProtocol
|
|
66
|
+
from chemrecon.entrygraph.explore import explore
|
|
67
|
+
from chemrecon.entrygraph.scoring import Scorer
|
|
68
|
+
|
|
69
|
+
# Pre-defined entrygraph types
|
|
70
|
+
from chemrecon.query.default_protocols import *
|
|
71
|
+
|
|
72
|
+
# Chemistry - Molecules
|
|
73
|
+
from chemrecon.chem.mol import Mol
|
|
File without changes
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
""" Implements a wrapper for the RDKit reaction type."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
import rdkit.Chem.rdChemReactions as rdk_r
|
|
8
|
+
from rdkit.Chem.rdChemReactions import ChemicalReaction
|
|
9
|
+
from rdkit.Chem import Draw as rdk_draw
|
|
10
|
+
|
|
11
|
+
from chemrecon.chem.mol import MolTemplate, MolInstance
|
|
12
|
+
from chemrecon.chem.sumformula import SumFormula
|
|
13
|
+
from chemrecon.schema.entry_types.aam import AAM
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class Side(Enum):
|
|
17
|
+
L = -1
|
|
18
|
+
R = 1
|
|
19
|
+
|
|
20
|
+
type stoich = int
|
|
21
|
+
|
|
22
|
+
class ChemReaction:
|
|
23
|
+
reaction: ChemicalReaction
|
|
24
|
+
_reaction_smiles: Optional[str]
|
|
25
|
+
|
|
26
|
+
# Templates represent the structure of each compound. (There can be duplicate templates)
|
|
27
|
+
lhs_templates: dict[MolTemplate, stoich]
|
|
28
|
+
rhs_templates: dict[MolTemplate, stoich]
|
|
29
|
+
|
|
30
|
+
# Instances represent specific structures with maps as they participate in the reaction.
|
|
31
|
+
lhs_instances: list[MolInstance]
|
|
32
|
+
rhs_instances: list[MolInstance]
|
|
33
|
+
|
|
34
|
+
# Mapping between each instance and its corresponding template
|
|
35
|
+
instance_template_dict = dict[MolInstance, MolTemplate]
|
|
36
|
+
|
|
37
|
+
# Properties of the given instances, as read through the input files if possible
|
|
38
|
+
instance_properties: dict[MolInstance, dict[str, str]]
|
|
39
|
+
template_ids: dict[MolTemplate, str]
|
|
40
|
+
template_names: dict[MolTemplate, str]
|
|
41
|
+
|
|
42
|
+
# Atom-to-atom map of this reaction
|
|
43
|
+
map: dict[tuple[MolInstance, int], tuple[MolInstance, int]]
|
|
44
|
+
lhs_index: dict[int, tuple[MolInstance, int]] # The global map number to (molecule, index) tuple
|
|
45
|
+
rhs_index: dict[int, tuple[MolInstance, int]] # The global map number to (molecule, index) tuple
|
|
46
|
+
|
|
47
|
+
def __init__(self, rdk_reaction: ChemicalReaction):
|
|
48
|
+
self.reaction = rdk_reaction
|
|
49
|
+
self._reaction_smiles = None
|
|
50
|
+
|
|
51
|
+
self.lhs_instances = list()
|
|
52
|
+
self.rhs_instances = list()
|
|
53
|
+
self.lhs_templates = dict()
|
|
54
|
+
self.rhs_templates = dict()
|
|
55
|
+
|
|
56
|
+
self.instance_template_dict = dict()
|
|
57
|
+
self.map = dict()
|
|
58
|
+
|
|
59
|
+
self.instance_properties = dict()
|
|
60
|
+
self.template_ids = dict()
|
|
61
|
+
self.template_names = dict()
|
|
62
|
+
|
|
63
|
+
# Set reactant and product MolInstances and map
|
|
64
|
+
for rdk_mol in self.reaction.GetReactants():
|
|
65
|
+
self.lhs_instances.append(
|
|
66
|
+
MolInstance(rdk_mol, provenance = 'implicit')
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
for rdk_mol in self.reaction.GetProducts():
|
|
70
|
+
self.rhs_instances.append(
|
|
71
|
+
MolInstance(rdk_mol, provenance = 'implicit')
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
# Populate map
|
|
75
|
+
# The global map number to (molecule, index) tuple
|
|
76
|
+
# Global map number -> molecule, index (referring to the order of atoms in the SMILES string)
|
|
77
|
+
self.lhs_index = dict()
|
|
78
|
+
self.rhs_index = dict()
|
|
79
|
+
for lhs_instance in self.lhs_instances:
|
|
80
|
+
for local_index, global_index in enumerate(lhs_instance.get_atom_map_in_native_order()):
|
|
81
|
+
self.lhs_index[global_index] = (lhs_instance, local_index)
|
|
82
|
+
for rhs_instance in self.rhs_instances:
|
|
83
|
+
for local_index, global_index in enumerate(rhs_instance.get_atom_map_in_native_order()):
|
|
84
|
+
self.rhs_index[global_index] = (rhs_instance, local_index)
|
|
85
|
+
|
|
86
|
+
self.map = dict()
|
|
87
|
+
for i, (lhs_mol, lhs_index) in self.lhs_index.items():
|
|
88
|
+
try:
|
|
89
|
+
self.map[(lhs_mol, lhs_index)] = self.rhs_index[i]
|
|
90
|
+
except KeyError:
|
|
91
|
+
# Missing map
|
|
92
|
+
# TODO do something if this is not a hydrogen?
|
|
93
|
+
pass
|
|
94
|
+
|
|
95
|
+
# self.map = {
|
|
96
|
+
# (lhs_mol, lhs_index): self.rhs_index[i]
|
|
97
|
+
# for i, (lhs_mol, lhs_index) in self.lhs_index.items()
|
|
98
|
+
# }
|
|
99
|
+
|
|
100
|
+
# Compute templates and add
|
|
101
|
+
for side, instance_list, template_list in [
|
|
102
|
+
(-1, self.lhs_instances, self.lhs_templates),
|
|
103
|
+
(1, self.rhs_instances, self.rhs_templates)
|
|
104
|
+
]:
|
|
105
|
+
mol_instance: MolInstance
|
|
106
|
+
for mol_instance in instance_list:
|
|
107
|
+
template = mol_instance.to_mol_template()
|
|
108
|
+
|
|
109
|
+
# Add to instance-template dict
|
|
110
|
+
self.instance_template_dict[mol_instance] = template
|
|
111
|
+
|
|
112
|
+
# Add to template lists
|
|
113
|
+
template_list[template] = template_list.get(template, 0) + side
|
|
114
|
+
|
|
115
|
+
# Getters
|
|
116
|
+
# ------------------------------------------------------------------------------------------------------------------
|
|
117
|
+
def get_lhs_templates(self) -> list[MolTemplate]:
|
|
118
|
+
return list(self.lhs_templates.keys())
|
|
119
|
+
|
|
120
|
+
def get_rhs_templates(self) -> list[MolTemplate]:
|
|
121
|
+
return list(self.rhs_templates.keys())
|
|
122
|
+
|
|
123
|
+
# Balance
|
|
124
|
+
# ------------------------------------------------------------------------------------------------------------------
|
|
125
|
+
def get_balance_difference(self) -> SumFormula:
|
|
126
|
+
""" Get the difference between the LHS and RHS as a (possible negative) MolFormula.
|
|
127
|
+
Positive counts indicate a surplus on the LHS, negative counts indicate a surplus on the RHS.
|
|
128
|
+
"""
|
|
129
|
+
raise NotImplementedError()
|
|
130
|
+
|
|
131
|
+
# lhs_sum = sum(m.get_molformula() for m in self.lhs_instances)
|
|
132
|
+
# rhs_sum = sum(m.get_molformula() for m in self.rhs_instances)
|
|
133
|
+
# return lhs_sum - rhs_sum
|
|
134
|
+
|
|
135
|
+
def is_balanced(self) -> bool:
|
|
136
|
+
""" Returns true if balanced in both atomic composition and charge.
|
|
137
|
+
If not balanced, use .get_balance_differenec() to inspect the difference between LHS and RHS.
|
|
138
|
+
"""
|
|
139
|
+
return self.get_balance_difference().is_zero()
|
|
140
|
+
|
|
141
|
+
# Representations
|
|
142
|
+
# ------------------------------------------------------------------------------------------------------------------
|
|
143
|
+
def to_reaction_smiles(self) -> str:
|
|
144
|
+
if self._reaction_smiles is None:
|
|
145
|
+
# self._reaction_smiles = rdk_r.ReactionToSmarts(self.reaction)
|
|
146
|
+
self._reaction_smiles = rdk_r.ReactionToSmiles(self.reaction)
|
|
147
|
+
|
|
148
|
+
return self._reaction_smiles
|
|
149
|
+
|
|
150
|
+
# Serialise
|
|
151
|
+
def serialize(self) -> dict:
|
|
152
|
+
return {
|
|
153
|
+
'reaction_smiles': self.to_reaction_smiles(),
|
|
154
|
+
'lhs': [
|
|
155
|
+
mol.serialize() for mol in self.lhs_instances
|
|
156
|
+
],
|
|
157
|
+
'rhs': [
|
|
158
|
+
mol.serialize() for mol in self.rhs_instances
|
|
159
|
+
],
|
|
160
|
+
'balanced': 'TODO', # TODO
|
|
161
|
+
'balance_difference': 'TODO', # TODO compute difference in sum formulae for LHS and RHS
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
# Misc
|
|
165
|
+
# ------------------------------------------------------------------------------------------------------------------
|
|
166
|
+
def __hash__(self):
|
|
167
|
+
return self.to_reaction_smiles().__hash__()
|
|
168
|
+
|
|
169
|
+
def sanity_check(self):
|
|
170
|
+
""" Raises an exception if the AAM maps atoms of different elements.
|
|
171
|
+
"""
|
|
172
|
+
# Check all instances have smiles
|
|
173
|
+
for inst in [*self.lhs_instances, *self.rhs_instances]:
|
|
174
|
+
if inst.smiles is None:
|
|
175
|
+
raise AssertionError('Instance invalid.')
|
|
176
|
+
|
|
177
|
+
# Check mapping
|
|
178
|
+
for global_index, (molinst_l, local_index_l) in self.lhs_index.items():
|
|
179
|
+
try:
|
|
180
|
+
molinst_r, local_index_r = self.rhs_index[global_index]
|
|
181
|
+
except KeyError:
|
|
182
|
+
# Atom 'disappears'
|
|
183
|
+
continue
|
|
184
|
+
|
|
185
|
+
l_atom = molinst_l.mol.GetAtomWithIdx(local_index_l)
|
|
186
|
+
r_atom = molinst_r.mol.GetAtomWithIdx(local_index_r)
|
|
187
|
+
if l_atom.GetAtomicNum() != r_atom.GetAtomicNum():
|
|
188
|
+
raise AssertionError(f'Element mismatch: L: {local_index_l}, R: {local_index_r}')
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
# Visualisation
|
|
194
|
+
# ------------------------------------------------------------------------------------------------------------------
|
|
195
|
+
def show(self):
|
|
196
|
+
img = rdk_draw.ReactionToImage(rxn = self.reaction, subImgSize = (800, 800))
|
|
197
|
+
img.show()
|
|
198
|
+
pass
|
|
199
|
+
|
|
200
|
+
# Creators
|
|
201
|
+
# ----------------------------------------------------------------------------------------------------------------------
|
|
202
|
+
def chem_reaction_from_aam_entry(
|
|
203
|
+
entry: AAM
|
|
204
|
+
) -> ChemReaction:
|
|
205
|
+
""" Given an AAM entry, load into a ChemReaction object.
|
|
206
|
+
"""
|
|
207
|
+
return chem_reaction_from_reactionsmiles(
|
|
208
|
+
entry.reaction_smiles
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
def chem_reaction_from_reactionsmiles(
|
|
212
|
+
reactionsmiles: str
|
|
213
|
+
) -> ChemReaction:
|
|
214
|
+
rdk_reaction = rdk_r.ReactionFromSmarts(reactionsmiles)
|
|
215
|
+
return ChemReaction(rdk_reaction)
|
|
216
|
+
|
|
217
|
+
def chem_reaction_from_rxn(
|
|
218
|
+
rxn: str,
|
|
219
|
+
provenance: Optional[str] = None,
|
|
220
|
+
safe: bool = False
|
|
221
|
+
) -> ChemReaction:
|
|
222
|
+
rdk_reaction = rdk_r.ReactionFromRxnBlock(rxn, sanitize = not safe, removeHs = not safe)
|
|
223
|
+
return ChemReaction(rdk_reaction)
|