chemrecon 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. chemrecon-0.1.1/PKG-INFO +143 -0
  2. chemrecon-0.1.1/README.md +111 -0
  3. chemrecon-0.1.1/pyproject.toml +72 -0
  4. chemrecon-0.1.1/src/chemrecon/__init__.py +73 -0
  5. chemrecon-0.1.1/src/chemrecon/chem/__init__.py +0 -0
  6. chemrecon-0.1.1/src/chemrecon/chem/chemreaction.py +223 -0
  7. chemrecon-0.1.1/src/chemrecon/chem/constant_compounds.py +3 -0
  8. chemrecon-0.1.1/src/chemrecon/chem/create_mol.py +91 -0
  9. chemrecon-0.1.1/src/chemrecon/chem/elements.py +141 -0
  10. chemrecon-0.1.1/src/chemrecon/chem/gml/__init__.py +0 -0
  11. chemrecon-0.1.1/src/chemrecon/chem/gml/gml.py +324 -0
  12. chemrecon-0.1.1/src/chemrecon/chem/gml/gml_reactant_matching.py +130 -0
  13. chemrecon-0.1.1/src/chemrecon/chem/gml/gml_to_rdk.py +217 -0
  14. chemrecon-0.1.1/src/chemrecon/chem/mol.py +483 -0
  15. chemrecon-0.1.1/src/chemrecon/chem/sumformula.py +120 -0
  16. chemrecon-0.1.1/src/chemrecon/connection.py +97 -0
  17. chemrecon-0.1.1/src/chemrecon/core/__init__.py +0 -0
  18. chemrecon-0.1.1/src/chemrecon/core/id_types.py +687 -0
  19. chemrecon-0.1.1/src/chemrecon/core/ontology.py +209 -0
  20. chemrecon-0.1.1/src/chemrecon/core/populate_query_handler.py +336 -0
  21. chemrecon-0.1.1/src/chemrecon/core/query_handler.py +587 -0
  22. chemrecon-0.1.1/src/chemrecon/database/__init__.py +1 -0
  23. chemrecon-0.1.1/src/chemrecon/database/connect.py +63 -0
  24. chemrecon-0.1.1/src/chemrecon/database/connection_params/chemrecon_pub.dbinfo +5 -0
  25. chemrecon-0.1.1/src/chemrecon/database/connection_params/local_docker_dev.dbinfo +5 -0
  26. chemrecon-0.1.1/src/chemrecon/database/connection_params/local_docker_init.dbinfo +5 -0
  27. chemrecon-0.1.1/src/chemrecon/database/connection_params/local_docker_pub.dbinfo +5 -0
  28. chemrecon-0.1.1/src/chemrecon/database/params.py +88 -0
  29. chemrecon-0.1.1/src/chemrecon/entrygraph/draw.py +119 -0
  30. chemrecon-0.1.1/src/chemrecon/entrygraph/entrygraph.py +301 -0
  31. chemrecon-0.1.1/src/chemrecon/entrygraph/explorationprotocol.py +199 -0
  32. chemrecon-0.1.1/src/chemrecon/entrygraph/explore.py +421 -0
  33. chemrecon-0.1.1/src/chemrecon/entrygraph/explore_procedure.py +183 -0
  34. chemrecon-0.1.1/src/chemrecon/entrygraph/filter.py +88 -0
  35. chemrecon-0.1.1/src/chemrecon/entrygraph/scoring.py +141 -0
  36. chemrecon-0.1.1/src/chemrecon/query/__init__.py +26 -0
  37. chemrecon-0.1.1/src/chemrecon/query/create_entry.py +86 -0
  38. chemrecon-0.1.1/src/chemrecon/query/default_protocols.py +57 -0
  39. chemrecon-0.1.1/src/chemrecon/query/find_entry.py +84 -0
  40. chemrecon-0.1.1/src/chemrecon/query/get_relations.py +143 -0
  41. chemrecon-0.1.1/src/chemrecon/query/get_structures_from_compound.py +65 -0
  42. chemrecon-0.1.1/src/chemrecon/schema/__init__.py +86 -0
  43. chemrecon-0.1.1/src/chemrecon/schema/db_object.py +363 -0
  44. chemrecon-0.1.1/src/chemrecon/schema/direction.py +10 -0
  45. chemrecon-0.1.1/src/chemrecon/schema/entry_types/__init__.py +0 -0
  46. chemrecon-0.1.1/src/chemrecon/schema/entry_types/aam.py +34 -0
  47. chemrecon-0.1.1/src/chemrecon/schema/entry_types/aam_repr.py +37 -0
  48. chemrecon-0.1.1/src/chemrecon/schema/entry_types/compound.py +52 -0
  49. chemrecon-0.1.1/src/chemrecon/schema/entry_types/enzyme.py +49 -0
  50. chemrecon-0.1.1/src/chemrecon/schema/entry_types/molstructure.py +64 -0
  51. chemrecon-0.1.1/src/chemrecon/schema/entry_types/molstructure_repr.py +41 -0
  52. chemrecon-0.1.1/src/chemrecon/schema/entry_types/reaction.py +57 -0
  53. chemrecon-0.1.1/src/chemrecon/schema/enums.py +154 -0
  54. chemrecon-0.1.1/src/chemrecon/schema/procedural_relation_entrygraph.py +66 -0
  55. chemrecon-0.1.1/src/chemrecon/schema/relation_types_composed/__init__.py +0 -0
  56. chemrecon-0.1.1/src/chemrecon/schema/relation_types_composed/compound_has_molstructure_relation.py +59 -0
  57. chemrecon-0.1.1/src/chemrecon/schema/relation_types_composed/reaction_has_aam_relation.py +50 -0
  58. chemrecon-0.1.1/src/chemrecon/schema/relation_types_procedural/__init__.py +0 -0
  59. chemrecon-0.1.1/src/chemrecon/schema/relation_types_procedural/aam_convert_relation.py +69 -0
  60. chemrecon-0.1.1/src/chemrecon/schema/relation_types_procedural/compound_select_structure_proceduralrelation.py +36 -0
  61. chemrecon-0.1.1/src/chemrecon/schema/relation_types_procedural/compound_similarlity_proceduralrelation.py +1 -0
  62. chemrecon-0.1.1/src/chemrecon/schema/relation_types_procedural/molstructure_convert_relation.py +49 -0
  63. chemrecon-0.1.1/src/chemrecon/schema/relation_types_procedural/reaction_select_aam_proceduralrelation.py +38 -0
  64. chemrecon-0.1.1/src/chemrecon/schema/relation_types_procedural/reaction_similarity_proceduralrelation.py +1 -0
  65. chemrecon-0.1.1/src/chemrecon/schema/relation_types_source/__init__.py +0 -0
  66. chemrecon-0.1.1/src/chemrecon/schema/relation_types_source/aam_involves_molstructure_relation.py +77 -0
  67. chemrecon-0.1.1/src/chemrecon/schema/relation_types_source/aam_repr_involves_molstructure_repr_relation.py +79 -0
  68. chemrecon-0.1.1/src/chemrecon/schema/relation_types_source/compound_has_structure_representation_relation.py +33 -0
  69. chemrecon-0.1.1/src/chemrecon/schema/relation_types_source/compound_reference_relation.py +34 -0
  70. chemrecon-0.1.1/src/chemrecon/schema/relation_types_source/molstructure_standardisation_relation.py +71 -0
  71. chemrecon-0.1.1/src/chemrecon/schema/relation_types_source/ontology/__init__.py +0 -0
  72. chemrecon-0.1.1/src/chemrecon/schema/relation_types_source/ontology/compound_ontology.py +369 -0
  73. chemrecon-0.1.1/src/chemrecon/schema/relation_types_source/ontology/enzyme_ontology.py +142 -0
  74. chemrecon-0.1.1/src/chemrecon/schema/relation_types_source/ontology/reaction_ontology.py +140 -0
  75. chemrecon-0.1.1/src/chemrecon/schema/relation_types_source/reaction_has_aam_representation_relation.py +34 -0
  76. chemrecon-0.1.1/src/chemrecon/schema/relation_types_source/reaction_has_enzyme_relation.py +71 -0
  77. chemrecon-0.1.1/src/chemrecon/schema/relation_types_source/reaction_involves_compound_relation.py +69 -0
  78. chemrecon-0.1.1/src/chemrecon/schema/relation_types_source/reaction_reference_relation.py +33 -0
  79. chemrecon-0.1.1/src/chemrecon/scripts/initialize_database.py +494 -0
  80. chemrecon-0.1.1/src/chemrecon/utils/copy_signature.py +10 -0
  81. chemrecon-0.1.1/src/chemrecon/utils/encodeable_list.py +11 -0
  82. chemrecon-0.1.1/src/chemrecon/utils/get_id_type.py +70 -0
  83. chemrecon-0.1.1/src/chemrecon/utils/hungarian.py +31 -0
  84. chemrecon-0.1.1/src/chemrecon/utils/reactant_matching.py +168 -0
  85. chemrecon-0.1.1/src/chemrecon/utils/rxnutils.py +44 -0
  86. chemrecon-0.1.1/src/chemrecon/utils/set_cwd.py +12 -0
@@ -0,0 +1,143 @@
1
+ Metadata-Version: 2.4
2
+ Name: chemrecon
3
+ Version: 0.1.1
4
+ Summary: The ChemRecon library for integration and exploration of interconnected biochemical databases.
5
+ Keywords: bioinformatics
6
+ Author: Casper Asbjørn Eriksen
7
+ Author-email: Casper Asbjørn Eriksen <casbjorn@imada.sdu.dk>
8
+ License-Expression: GPL-3.0-only
9
+ Classifier: Programming Language :: Python :: 3.12
10
+ Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
11
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
12
+ Classifier: Topic :: Scientific/Engineering :: Chemistry
13
+ Requires-Dist: psycopg[binary]~=3.3.2
14
+ Requires-Dist: rustworkx~=0.17.1
15
+ Requires-Dist: networkx~=3.6.1
16
+ Requires-Dist: matplotlib~=3.10
17
+ Requires-Dist: rdkit
18
+ Requires-Dist: sphinx==8.3.0 ; extra == 'docs'
19
+ Requires-Dist: myst-parser ; extra == 'docs'
20
+ Requires-Dist: sphinx-autobuild ; extra == 'docs'
21
+ Requires-Dist: enum-tools[sphinx]==0.12.0 ; extra == 'docs'
22
+ Requires-Dist: sphinx-toolbox ; extra == 'docs'
23
+ Requires-Dist: nbsphinx ; extra == 'docs'
24
+ Requires-Dist: ipykernel>=7.1.0 ; extra == 'docs'
25
+ Requires-Dist: furo ; extra == 'docs'
26
+ Requires-Dist: sphinxext-opengraph ; extra == 'docs'
27
+ Maintainer: Casper Asbjørn Eriksen
28
+ Maintainer-email: Casper Asbjørn Eriksen <casbjorn@imada.sdu.dk>
29
+ Requires-Python: >=3.12
30
+ Provides-Extra: docs
31
+ Description-Content-Type: text/markdown
32
+
33
+ # ChemRecon
34
+ *v. 0.1.1*
35
+
36
+ ChemRecon is a Python library and consolidated meta-database designed to simplify the integration and exploration of
37
+ biochemical data from a range of sources.
38
+ It is built from full-database downloads of compounds, reactions, enzymes, molecular structures, and atom-to-atom maps
39
+ from the following source databases: BiGG, BRENDA, ChEBI, ECMDB, M-CSA, MetaMDB, and PubChem.
40
+
41
+ Heterogenous data formats were standardized, and relationships within and between these databases were reconstructed in
42
+ a consistent format.
43
+ The resulting meta-database is freely accessible online and is complemented by a Python library which allows for easy
44
+ integration into existing workflows.
45
+ This enables unified querying of entries from all the source databases, and discovery and visualization of
46
+ relationships between these entries.
47
+
48
+ ![entrygraph](docs/source/resources/eg.svg)
49
+
50
+ ChemRecon was developed at the
51
+ [Algorithmic Cheminformatics Group](https://cheminf.imada.sdu.dk/),
52
+ [Department of Mathematics and Computer Science](https://cheminf.imada.sdu.dk/),
53
+ [University of Southern Denmark](https://sdu.dk).
54
+
55
+ ## Paper
56
+ If ChemRecon proves useful to your research, you may want to cite the following paper.
57
+ * **Title**
58
+
59
+ C. A. Eriksen, J. L. Andersen, R. Fagerberg, D. Merkle
60
+
61
+ Arxiv preprint, submitted to Bioinformatics.
62
+
63
+ TODO more
64
+
65
+ ## Availability and Installation
66
+ ChemRecon is available via your Python package manager from the Python Package Index (PyPI):
67
+ [chemrecon](https://pypi.org/project/chemrecon/)
68
+ It can be installed using pip:
69
+
70
+ `pip install chemrecon`
71
+
72
+ Visualizing entry graphs requires [GraphViz](https://www.graphviz.org/) to be installed, and for the `dot` executable,
73
+ which renders the graphs, to be available on your system's `PATH`.
74
+ See the [GraphViz Python package](https://pypi.org/project/graphviz/) for instructions.
75
+
76
+ ***
77
+
78
+ ## Documentation
79
+ The documentation, including instructions on usage, tutorials, and complete description covering the types of entries
80
+ and relations supported, is available on the [ChemRecon homepage](https://www.cheminf.imada.sdu.dk/chemrecon).
81
+
82
+ ## Usage
83
+ The following is an example of a typical ChemRecon workflow, producing the graph seen above.
84
+ For more detailed examples, see the tutorial section of the documentation.
85
+
86
+ ```python
87
+ from chemrecon import *
88
+
89
+ connect_public()
90
+
91
+ # Perform a database query to find the 'citrate' entry in BiGG.
92
+ citrate_entry = find_entry(id_type = C_BIGG, source_id = 'M_cit')
93
+
94
+ # Define a protocol to find related entries and molecular structures (protocols like this are included)
95
+ compound_structure_protocol = ExplorationProtocol(
96
+ relation_types = {CompoundReference, CompoundHasMolStructure, MolStructureStandardization}
97
+ )
98
+
99
+ # Create and expand an entry graph, according to this protocol, by traversing the database.
100
+ eg = EntryGraph(initial_entries = {citrate_entry})
101
+ explore(eg, compound_structure_protocol, steps = 5)
102
+
103
+ # Score the molecular structures in the graph according to their 'connectedness'
104
+ scorer = Scorer(score_entry_type = MolStructure)
105
+ scores = scorer(citrate_entry) # Result is an OrderedDict
106
+
107
+ # Draw the graph with these scores, producing the image seen on this page
108
+ eg.show(scores = scores)
109
+ ```
110
+
111
+ ***
112
+
113
+ ## Database
114
+ ChemRecon needs to be connected to a database to function.
115
+ The easiest is to connect to the public database, hosted by [SDU](https://sdu.dk):
116
+ ```
117
+ connect_public()
118
+ ```
119
+ Alternatively, a local instance of the database can be hosted via Docker.
120
+ Instructions are given in the [documentation](https://chemrecon.org).
121
+ This has the advantage of lower latency, making queries and entry graph construction faster, and allows adding
122
+ custom data sources.
123
+
124
+ ## Source Databases
125
+ ChemRecon contains compound, molecular structure, reaction, atom-to-atom map, and enzyme entries from the following
126
+ databases.
127
+
128
+ | Source | Compound | Structure | Reaction | AAM | Enzyme | Version |
129
+ |------------|--------------|---------------|------------|---------|----------|---------|
130
+ | BiGG | 20428 | - | 33942 | - | 5705 | 1.6 |
131
+ | BRENDA | - | - | 61129 | - | 8697 | 2025_1 |
132
+ | ChEBI | 224485 | 330207 | - | - | - | 2024-05 |
133
+ | ECMDB | 3760 | 7517 | - | - | - | 2.0 |
134
+ | M-CSA | - | - | 1003 | 342 | 1003 | 2024-11 |
135
+ | MetaMDB | 80815 | 4392 | 74520 | 1003 | - | 2025-02 |
136
+ | MetaNetX | 2601834 | 2297518 | 143880 | - | 48175 | 4.4 |
137
+ | PubChem | 9031498 | 5000000 | - | - | - | 2024-09 |
138
+
139
+ In addition to the source databases, ChemRecon can make use of a greater number of *auxiliary* databases, including
140
+ MetaCyc and KEGG. Data from these sources is are not directly included due to being proprietary or difficult to access.
141
+ However, the source databases contain references to the auxiliary databases, so entries are created which contain only
142
+ the identifier and no additional information. This allows users to use ChemRecon workflows based on identifiers from a
143
+ great number of databases, not just the source databases.
@@ -0,0 +1,111 @@
1
+ # ChemRecon
2
+ *v. 0.1.1*
3
+
4
+ ChemRecon is a Python library and consolidated meta-database designed to simplify the integration and exploration of
5
+ biochemical data from a range of sources.
6
+ It is built from full-database downloads of compounds, reactions, enzymes, molecular structures, and atom-to-atom maps
7
+ from the following source databases: BiGG, BRENDA, ChEBI, ECMDB, M-CSA, MetaMDB, and PubChem.
8
+
9
+ Heterogenous data formats were standardized, and relationships within and between these databases were reconstructed in
10
+ a consistent format.
11
+ The resulting meta-database is freely accessible online and is complemented by a Python library which allows for easy
12
+ integration into existing workflows.
13
+ This enables unified querying of entries from all the source databases, and discovery and visualization of
14
+ relationships between these entries.
15
+
16
+ ![entrygraph](docs/source/resources/eg.svg)
17
+
18
+ ChemRecon was developed at the
19
+ [Algorithmic Cheminformatics Group](https://cheminf.imada.sdu.dk/),
20
+ [Department of Mathematics and Computer Science](https://cheminf.imada.sdu.dk/),
21
+ [University of Southern Denmark](https://sdu.dk).
22
+
23
+ ## Paper
24
+ If ChemRecon proves useful to your research, you may want to cite the following paper.
25
+ * **Title**
26
+
27
+ C. A. Eriksen, J. L. Andersen, R. Fagerberg, D. Merkle
28
+
29
+ Arxiv preprint, submitted to Bioinformatics.
30
+
31
+ TODO more
32
+
33
+ ## Availability and Installation
34
+ ChemRecon is available via your Python package manager from the Python Package Index (PyPI):
35
+ [chemrecon](https://pypi.org/project/chemrecon/)
36
+ It can be installed using pip:
37
+
38
+ `pip install chemrecon`
39
+
40
+ Visualizing entry graphs requires [GraphViz](https://www.graphviz.org/) to be installed, and for the `dot` executable,
41
+ which renders the graphs, to be available on your system's `PATH`.
42
+ See the [GraphViz Python package](https://pypi.org/project/graphviz/) for instructions.
43
+
44
+ ***
45
+
46
+ ## Documentation
47
+ The documentation, including instructions on usage, tutorials, and complete description covering the types of entries
48
+ and relations supported, is available on the [ChemRecon homepage](https://www.cheminf.imada.sdu.dk/chemrecon).
49
+
50
+ ## Usage
51
+ The following is an example of a typical ChemRecon workflow, producing the graph seen above.
52
+ For more detailed examples, see the tutorial section of the documentation.
53
+
54
+ ```python
55
+ from chemrecon import *
56
+
57
+ connect_public()
58
+
59
+ # Perform a database query to find the 'citrate' entry in BiGG.
60
+ citrate_entry = find_entry(id_type = C_BIGG, source_id = 'M_cit')
61
+
62
+ # Define a protocol to find related entries and molecular structures (protocols like this are included)
63
+ compound_structure_protocol = ExplorationProtocol(
64
+ relation_types = {CompoundReference, CompoundHasMolStructure, MolStructureStandardization}
65
+ )
66
+
67
+ # Create and expand an entry graph, according to this protocol, by traversing the database.
68
+ eg = EntryGraph(initial_entries = {citrate_entry})
69
+ explore(eg, compound_structure_protocol, steps = 5)
70
+
71
+ # Score the molecular structures in the graph according to their 'connectedness'
72
+ scorer = Scorer(score_entry_type = MolStructure)
73
+ scores = scorer(citrate_entry) # Result is an OrderedDict
74
+
75
+ # Draw the graph with these scores, producing the image seen on this page
76
+ eg.show(scores = scores)
77
+ ```
78
+
79
+ ***
80
+
81
+ ## Database
82
+ ChemRecon needs to be connected to a database to function.
83
+ The easiest is to connect to the public database, hosted by [SDU](https://sdu.dk):
84
+ ```
85
+ connect_public()
86
+ ```
87
+ Alternatively, a local instance of the database can be hosted via Docker.
88
+ Instructions are given in the [documentation](https://chemrecon.org).
89
+ This has the advantage of lower latency, making queries and entry graph construction faster, and allows adding
90
+ custom data sources.
91
+
92
+ ## Source Databases
93
+ ChemRecon contains compound, molecular structure, reaction, atom-to-atom map, and enzyme entries from the following
94
+ databases.
95
+
96
+ | Source | Compound | Structure | Reaction | AAM | Enzyme | Version |
97
+ |------------|--------------|---------------|------------|---------|----------|---------|
98
+ | BiGG | 20428 | - | 33942 | - | 5705 | 1.6 |
99
+ | BRENDA | - | - | 61129 | - | 8697 | 2025_1 |
100
+ | ChEBI | 224485 | 330207 | - | - | - | 2024-05 |
101
+ | ECMDB | 3760 | 7517 | - | - | - | 2.0 |
102
+ | M-CSA | - | - | 1003 | 342 | 1003 | 2024-11 |
103
+ | MetaMDB | 80815 | 4392 | 74520 | 1003 | - | 2025-02 |
104
+ | MetaNetX | 2601834 | 2297518 | 143880 | - | 48175 | 4.4 |
105
+ | PubChem | 9031498 | 5000000 | - | - | - | 2024-09 |
106
+
107
+ In addition to the source databases, ChemRecon can make use of a greater number of *auxiliary* databases, including
108
+ MetaCyc and KEGG. Data from these sources is are not directly included due to being proprietary or difficult to access.
109
+ However, the source databases contain references to the auxiliary databases, so entries are created which contain only
110
+ the identifier and no additional information. This allows users to use ChemRecon workflows based on identifiers from a
111
+ great number of databases, not just the source databases.
@@ -0,0 +1,72 @@
1
+ [project]
2
+ name = 'chemrecon'
3
+ version = "0.1.1"
4
+ description = 'The ChemRecon library for integration and exploration of interconnected biochemical databases.'
5
+ authors = [
6
+ {name = 'Casper Asbjørn Eriksen', email = 'casbjorn@imada.sdu.dk'}
7
+ ]
8
+ maintainers = [
9
+ {name = 'Casper Asbjørn Eriksen', email = 'casbjorn@imada.sdu.dk'}
10
+ ]
11
+ readme = {file = "README.md", content-type = "text/markdown"}
12
+ license = 'GPL-3.0-only'
13
+ keywords = [
14
+ 'bioinformatics',
15
+ ]
16
+
17
+ # Requirements
18
+ requires-python = '>=3.12'
19
+ dependencies = [
20
+ "psycopg[binary] ~= 3.3.2", # Binary also adds Postgres client binaries
21
+ "rustworkx ~= 0.17.1",
22
+ "networkx ~= 3.6.1",
23
+ "matplotlib ~= 3.10",
24
+ "rdkit",
25
+ ]
26
+
27
+ classifiers = [
28
+ 'Programming Language :: Python :: 3.12',
29
+ 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)',
30
+ 'Topic :: Scientific/Engineering :: Bio-Informatics',
31
+ 'Topic :: Scientific/Engineering :: Chemistry'
32
+ ]
33
+
34
+ #[project.urls]
35
+ Homepage = 'https://chemrecon.org'
36
+ Documentation = 'https://docs.chemrecon.org'
37
+ Repository = 'https://gitlab.com/casbjorn/chemrecon'
38
+ #Changelog = ''
39
+
40
+ [project.optional-dependencies]
41
+ docs = [
42
+ 'sphinx==8.3.0', # Downgrade needed for enumtools, see https://github.com/domdfcoding/enum_tools/issues/118
43
+ 'myst-parser',
44
+ 'sphinx-autobuild',
45
+ 'enum-tools[sphinx] == 0.12.0',
46
+ 'sphinx-toolbox',
47
+ 'nbsphinx',
48
+ "ipykernel>=7.1.0",
49
+ 'furo',
50
+ 'sphinxext-opengraph'
51
+ ]
52
+
53
+ [dependency-groups]
54
+ dev = [
55
+ 'pytest >=8.1.1,<9'
56
+ ]
57
+
58
+ # uv
59
+ [build-system]
60
+ requires = [
61
+ 'uv_build ~= 0.9.27'
62
+ ]
63
+ build-backend = 'uv_build'
64
+
65
+ [tool.uv]
66
+ package = true
67
+
68
+ [[tool.uv.index]]
69
+ name = "testpypi"
70
+ url = "https://test.pypi.org/simple/"
71
+ publish-url = "https://test.pypi.org/legacy/"
72
+ explicit = true
@@ -0,0 +1,73 @@
1
+ """ Defines metadata and the most general exports in the chemrecon.* namespace.
2
+ """
3
+ # Metadata
4
+ __version__ = '0.1.1' # Library version
5
+ __db_version__: list[str] = ['0.1.1'] # Compatible database versions
6
+
7
+
8
+ # Imports
9
+ import psycopg
10
+
11
+
12
+ # Connection and handler
13
+ import chemrecon.connection
14
+ from chemrecon.connection import (
15
+ connect, connect_public, connect_local_docker, connect_local_docker_dev, disconnect,
16
+ get_query_handler
17
+ )
18
+ from chemrecon.database.params import Params
19
+
20
+
21
+ # Project initialization
22
+ # Try to set the default database connection
23
+ try:
24
+ disconnect()
25
+ # connect_public()
26
+ except psycopg.OperationalError as e:
27
+ print(f'Cannot connect to default database: {e}')
28
+
29
+ # Import from schema
30
+ from chemrecon.schema import *
31
+
32
+ from chemrecon.core.query_handler import QueryHandler
33
+ from chemrecon.core.populate_query_handler import PopulateQueryHandler
34
+
35
+ # Export identifier types
36
+ from chemrecon.core.id_types import *
37
+
38
+ # Entry creators
39
+ # from chemrecon.query.create_entry import (
40
+ # entry,
41
+ # compound_entry, reaction_entry, enzyme_entry,
42
+ # aam_representation_entry, structure_representation_entry,
43
+ # structure_entry, aam_entry,
44
+ # enzyme_from_ec_number,
45
+ # entry_from_identifiers_org
46
+ # )
47
+
48
+ from chemrecon.query.find_entry import (
49
+ find_entry,
50
+ find_compound_entry, find_reaction_entry, find_enzyme_entry,
51
+ find_structure_representation_entry, find_aam_representation_entry,
52
+ find_structure_entry, find_aam_entry
53
+ )
54
+
55
+ # Relation getters
56
+ from chemrecon.query.get_relations import *
57
+
58
+ # EntryGraphs
59
+ from chemrecon.entrygraph.entrygraph import EntryGraph, Vertex, Edge
60
+ from chemrecon.schema.direction import Direction
61
+ from chemrecon.entrygraph.filter import (
62
+ EntryFilter, EntryFilterProcedure,
63
+ RelationFilter, RelationFilterProcedure
64
+ )
65
+ from chemrecon.entrygraph.explorationprotocol import ExplorationProtocol
66
+ from chemrecon.entrygraph.explore import explore
67
+ from chemrecon.entrygraph.scoring import Scorer
68
+
69
+ # Pre-defined entrygraph types
70
+ from chemrecon.query.default_protocols import *
71
+
72
+ # Chemistry - Molecules
73
+ from chemrecon.chem.mol import Mol
File without changes
@@ -0,0 +1,223 @@
1
+ """ Implements a wrapper for the RDKit reaction type."""
2
+ from __future__ import annotations
3
+
4
+ from enum import Enum
5
+ from typing import Optional
6
+
7
+ import rdkit.Chem.rdChemReactions as rdk_r
8
+ from rdkit.Chem.rdChemReactions import ChemicalReaction
9
+ from rdkit.Chem import Draw as rdk_draw
10
+
11
+ from chemrecon.chem.mol import MolTemplate, MolInstance
12
+ from chemrecon.chem.sumformula import SumFormula
13
+ from chemrecon.schema.entry_types.aam import AAM
14
+
15
+
16
+ class Side(Enum):
17
+ L = -1
18
+ R = 1
19
+
20
+ type stoich = int
21
+
22
+ class ChemReaction:
23
+ reaction: ChemicalReaction
24
+ _reaction_smiles: Optional[str]
25
+
26
+ # Templates represent the structure of each compound. (There can be duplicate templates)
27
+ lhs_templates: dict[MolTemplate, stoich]
28
+ rhs_templates: dict[MolTemplate, stoich]
29
+
30
+ # Instances represent specific structures with maps as they participate in the reaction.
31
+ lhs_instances: list[MolInstance]
32
+ rhs_instances: list[MolInstance]
33
+
34
+ # Mapping between each instance and its corresponding template
35
+ instance_template_dict = dict[MolInstance, MolTemplate]
36
+
37
+ # Properties of the given instances, as read through the input files if possible
38
+ instance_properties: dict[MolInstance, dict[str, str]]
39
+ template_ids: dict[MolTemplate, str]
40
+ template_names: dict[MolTemplate, str]
41
+
42
+ # Atom-to-atom map of this reaction
43
+ map: dict[tuple[MolInstance, int], tuple[MolInstance, int]]
44
+ lhs_index: dict[int, tuple[MolInstance, int]] # The global map number to (molecule, index) tuple
45
+ rhs_index: dict[int, tuple[MolInstance, int]] # The global map number to (molecule, index) tuple
46
+
47
+ def __init__(self, rdk_reaction: ChemicalReaction):
48
+ self.reaction = rdk_reaction
49
+ self._reaction_smiles = None
50
+
51
+ self.lhs_instances = list()
52
+ self.rhs_instances = list()
53
+ self.lhs_templates = dict()
54
+ self.rhs_templates = dict()
55
+
56
+ self.instance_template_dict = dict()
57
+ self.map = dict()
58
+
59
+ self.instance_properties = dict()
60
+ self.template_ids = dict()
61
+ self.template_names = dict()
62
+
63
+ # Set reactant and product MolInstances and map
64
+ for rdk_mol in self.reaction.GetReactants():
65
+ self.lhs_instances.append(
66
+ MolInstance(rdk_mol, provenance = 'implicit')
67
+ )
68
+
69
+ for rdk_mol in self.reaction.GetProducts():
70
+ self.rhs_instances.append(
71
+ MolInstance(rdk_mol, provenance = 'implicit')
72
+ )
73
+
74
+ # Populate map
75
+ # The global map number to (molecule, index) tuple
76
+ # Global map number -> molecule, index (referring to the order of atoms in the SMILES string)
77
+ self.lhs_index = dict()
78
+ self.rhs_index = dict()
79
+ for lhs_instance in self.lhs_instances:
80
+ for local_index, global_index in enumerate(lhs_instance.get_atom_map_in_native_order()):
81
+ self.lhs_index[global_index] = (lhs_instance, local_index)
82
+ for rhs_instance in self.rhs_instances:
83
+ for local_index, global_index in enumerate(rhs_instance.get_atom_map_in_native_order()):
84
+ self.rhs_index[global_index] = (rhs_instance, local_index)
85
+
86
+ self.map = dict()
87
+ for i, (lhs_mol, lhs_index) in self.lhs_index.items():
88
+ try:
89
+ self.map[(lhs_mol, lhs_index)] = self.rhs_index[i]
90
+ except KeyError:
91
+ # Missing map
92
+ # TODO do something if this is not a hydrogen?
93
+ pass
94
+
95
+ # self.map = {
96
+ # (lhs_mol, lhs_index): self.rhs_index[i]
97
+ # for i, (lhs_mol, lhs_index) in self.lhs_index.items()
98
+ # }
99
+
100
+ # Compute templates and add
101
+ for side, instance_list, template_list in [
102
+ (-1, self.lhs_instances, self.lhs_templates),
103
+ (1, self.rhs_instances, self.rhs_templates)
104
+ ]:
105
+ mol_instance: MolInstance
106
+ for mol_instance in instance_list:
107
+ template = mol_instance.to_mol_template()
108
+
109
+ # Add to instance-template dict
110
+ self.instance_template_dict[mol_instance] = template
111
+
112
+ # Add to template lists
113
+ template_list[template] = template_list.get(template, 0) + side
114
+
115
+ # Getters
116
+ # ------------------------------------------------------------------------------------------------------------------
117
+ def get_lhs_templates(self) -> list[MolTemplate]:
118
+ return list(self.lhs_templates.keys())
119
+
120
+ def get_rhs_templates(self) -> list[MolTemplate]:
121
+ return list(self.rhs_templates.keys())
122
+
123
+ # Balance
124
+ # ------------------------------------------------------------------------------------------------------------------
125
+ def get_balance_difference(self) -> SumFormula:
126
+ """ Get the difference between the LHS and RHS as a (possible negative) MolFormula.
127
+ Positive counts indicate a surplus on the LHS, negative counts indicate a surplus on the RHS.
128
+ """
129
+ raise NotImplementedError()
130
+
131
+ # lhs_sum = sum(m.get_molformula() for m in self.lhs_instances)
132
+ # rhs_sum = sum(m.get_molformula() for m in self.rhs_instances)
133
+ # return lhs_sum - rhs_sum
134
+
135
+ def is_balanced(self) -> bool:
136
+ """ Returns true if balanced in both atomic composition and charge.
137
+ If not balanced, use .get_balance_differenec() to inspect the difference between LHS and RHS.
138
+ """
139
+ return self.get_balance_difference().is_zero()
140
+
141
+ # Representations
142
+ # ------------------------------------------------------------------------------------------------------------------
143
+ def to_reaction_smiles(self) -> str:
144
+ if self._reaction_smiles is None:
145
+ # self._reaction_smiles = rdk_r.ReactionToSmarts(self.reaction)
146
+ self._reaction_smiles = rdk_r.ReactionToSmiles(self.reaction)
147
+
148
+ return self._reaction_smiles
149
+
150
+ # Serialise
151
+ def serialize(self) -> dict:
152
+ return {
153
+ 'reaction_smiles': self.to_reaction_smiles(),
154
+ 'lhs': [
155
+ mol.serialize() for mol in self.lhs_instances
156
+ ],
157
+ 'rhs': [
158
+ mol.serialize() for mol in self.rhs_instances
159
+ ],
160
+ 'balanced': 'TODO', # TODO
161
+ 'balance_difference': 'TODO', # TODO compute difference in sum formulae for LHS and RHS
162
+ }
163
+
164
+ # Misc
165
+ # ------------------------------------------------------------------------------------------------------------------
166
+ def __hash__(self):
167
+ return self.to_reaction_smiles().__hash__()
168
+
169
+ def sanity_check(self):
170
+ """ Raises an exception if the AAM maps atoms of different elements.
171
+ """
172
+ # Check all instances have smiles
173
+ for inst in [*self.lhs_instances, *self.rhs_instances]:
174
+ if inst.smiles is None:
175
+ raise AssertionError('Instance invalid.')
176
+
177
+ # Check mapping
178
+ for global_index, (molinst_l, local_index_l) in self.lhs_index.items():
179
+ try:
180
+ molinst_r, local_index_r = self.rhs_index[global_index]
181
+ except KeyError:
182
+ # Atom 'disappears'
183
+ continue
184
+
185
+ l_atom = molinst_l.mol.GetAtomWithIdx(local_index_l)
186
+ r_atom = molinst_r.mol.GetAtomWithIdx(local_index_r)
187
+ if l_atom.GetAtomicNum() != r_atom.GetAtomicNum():
188
+ raise AssertionError(f'Element mismatch: L: {local_index_l}, R: {local_index_r}')
189
+
190
+
191
+
192
+
193
+ # Visualisation
194
+ # ------------------------------------------------------------------------------------------------------------------
195
+ def show(self):
196
+ img = rdk_draw.ReactionToImage(rxn = self.reaction, subImgSize = (800, 800))
197
+ img.show()
198
+ pass
199
+
200
+ # Creators
201
+ # ----------------------------------------------------------------------------------------------------------------------
202
+ def chem_reaction_from_aam_entry(
203
+ entry: AAM
204
+ ) -> ChemReaction:
205
+ """ Given an AAM entry, load into a ChemReaction object.
206
+ """
207
+ return chem_reaction_from_reactionsmiles(
208
+ entry.reaction_smiles
209
+ )
210
+
211
+ def chem_reaction_from_reactionsmiles(
212
+ reactionsmiles: str
213
+ ) -> ChemReaction:
214
+ rdk_reaction = rdk_r.ReactionFromSmarts(reactionsmiles)
215
+ return ChemReaction(rdk_reaction)
216
+
217
+ def chem_reaction_from_rxn(
218
+ rxn: str,
219
+ provenance: Optional[str] = None,
220
+ safe: bool = False
221
+ ) -> ChemReaction:
222
+ rdk_reaction = rdk_r.ReactionFromRxnBlock(rxn, sanitize = not safe, removeHs = not safe)
223
+ return ChemReaction(rdk_reaction)
@@ -0,0 +1,3 @@
1
+ """ Pre-defined molecules which skip lookup (water, co2, ...)
2
+ """
3
+ # TODO