chemrecon 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. chemrecon/__init__.py +73 -0
  2. chemrecon/chem/__init__.py +0 -0
  3. chemrecon/chem/chemreaction.py +223 -0
  4. chemrecon/chem/constant_compounds.py +3 -0
  5. chemrecon/chem/create_mol.py +91 -0
  6. chemrecon/chem/elements.py +141 -0
  7. chemrecon/chem/gml/__init__.py +0 -0
  8. chemrecon/chem/gml/gml.py +324 -0
  9. chemrecon/chem/gml/gml_reactant_matching.py +130 -0
  10. chemrecon/chem/gml/gml_to_rdk.py +217 -0
  11. chemrecon/chem/mol.py +483 -0
  12. chemrecon/chem/sumformula.py +120 -0
  13. chemrecon/connection.py +97 -0
  14. chemrecon/core/__init__.py +0 -0
  15. chemrecon/core/id_types.py +687 -0
  16. chemrecon/core/ontology.py +209 -0
  17. chemrecon/core/populate_query_handler.py +336 -0
  18. chemrecon/core/query_handler.py +587 -0
  19. chemrecon/database/__init__.py +1 -0
  20. chemrecon/database/connect.py +63 -0
  21. chemrecon/database/connection_params/chemrecon_pub.dbinfo +5 -0
  22. chemrecon/database/connection_params/local_docker_dev.dbinfo +5 -0
  23. chemrecon/database/connection_params/local_docker_init.dbinfo +5 -0
  24. chemrecon/database/connection_params/local_docker_pub.dbinfo +5 -0
  25. chemrecon/database/params.py +88 -0
  26. chemrecon/entrygraph/draw.py +119 -0
  27. chemrecon/entrygraph/entrygraph.py +301 -0
  28. chemrecon/entrygraph/explorationprotocol.py +199 -0
  29. chemrecon/entrygraph/explore.py +421 -0
  30. chemrecon/entrygraph/explore_procedure.py +183 -0
  31. chemrecon/entrygraph/filter.py +88 -0
  32. chemrecon/entrygraph/scoring.py +141 -0
  33. chemrecon/query/__init__.py +26 -0
  34. chemrecon/query/create_entry.py +86 -0
  35. chemrecon/query/default_protocols.py +57 -0
  36. chemrecon/query/find_entry.py +84 -0
  37. chemrecon/query/get_relations.py +143 -0
  38. chemrecon/query/get_structures_from_compound.py +65 -0
  39. chemrecon/schema/__init__.py +86 -0
  40. chemrecon/schema/db_object.py +363 -0
  41. chemrecon/schema/direction.py +10 -0
  42. chemrecon/schema/entry_types/__init__.py +0 -0
  43. chemrecon/schema/entry_types/aam.py +34 -0
  44. chemrecon/schema/entry_types/aam_repr.py +37 -0
  45. chemrecon/schema/entry_types/compound.py +52 -0
  46. chemrecon/schema/entry_types/enzyme.py +49 -0
  47. chemrecon/schema/entry_types/molstructure.py +64 -0
  48. chemrecon/schema/entry_types/molstructure_repr.py +41 -0
  49. chemrecon/schema/entry_types/reaction.py +57 -0
  50. chemrecon/schema/enums.py +154 -0
  51. chemrecon/schema/procedural_relation_entrygraph.py +66 -0
  52. chemrecon/schema/relation_types_composed/__init__.py +0 -0
  53. chemrecon/schema/relation_types_composed/compound_has_molstructure_relation.py +59 -0
  54. chemrecon/schema/relation_types_composed/reaction_has_aam_relation.py +50 -0
  55. chemrecon/schema/relation_types_procedural/__init__.py +0 -0
  56. chemrecon/schema/relation_types_procedural/aam_convert_relation.py +69 -0
  57. chemrecon/schema/relation_types_procedural/compound_select_structure_proceduralrelation.py +36 -0
  58. chemrecon/schema/relation_types_procedural/compound_similarlity_proceduralrelation.py +1 -0
  59. chemrecon/schema/relation_types_procedural/molstructure_convert_relation.py +49 -0
  60. chemrecon/schema/relation_types_procedural/reaction_select_aam_proceduralrelation.py +38 -0
  61. chemrecon/schema/relation_types_procedural/reaction_similarity_proceduralrelation.py +1 -0
  62. chemrecon/schema/relation_types_source/__init__.py +0 -0
  63. chemrecon/schema/relation_types_source/aam_involves_molstructure_relation.py +77 -0
  64. chemrecon/schema/relation_types_source/aam_repr_involves_molstructure_repr_relation.py +79 -0
  65. chemrecon/schema/relation_types_source/compound_has_structure_representation_relation.py +33 -0
  66. chemrecon/schema/relation_types_source/compound_reference_relation.py +34 -0
  67. chemrecon/schema/relation_types_source/molstructure_standardisation_relation.py +71 -0
  68. chemrecon/schema/relation_types_source/ontology/__init__.py +0 -0
  69. chemrecon/schema/relation_types_source/ontology/compound_ontology.py +369 -0
  70. chemrecon/schema/relation_types_source/ontology/enzyme_ontology.py +142 -0
  71. chemrecon/schema/relation_types_source/ontology/reaction_ontology.py +140 -0
  72. chemrecon/schema/relation_types_source/reaction_has_aam_representation_relation.py +34 -0
  73. chemrecon/schema/relation_types_source/reaction_has_enzyme_relation.py +71 -0
  74. chemrecon/schema/relation_types_source/reaction_involves_compound_relation.py +69 -0
  75. chemrecon/schema/relation_types_source/reaction_reference_relation.py +33 -0
  76. chemrecon/scripts/initialize_database.py +494 -0
  77. chemrecon/utils/copy_signature.py +10 -0
  78. chemrecon/utils/encodeable_list.py +11 -0
  79. chemrecon/utils/get_id_type.py +70 -0
  80. chemrecon/utils/hungarian.py +31 -0
  81. chemrecon/utils/reactant_matching.py +168 -0
  82. chemrecon/utils/rxnutils.py +44 -0
  83. chemrecon/utils/set_cwd.py +12 -0
  84. chemrecon-0.1.1.dist-info/METADATA +143 -0
  85. chemrecon-0.1.1.dist-info/RECORD +86 -0
  86. chemrecon-0.1.1.dist-info/WHEEL +4 -0
@@ -0,0 +1,70 @@
1
+ # Match the identifiers org prefix as group 1, and the actual id as group 2
2
+ import re
3
+ from typing import Optional
4
+
5
+ from chemrecon.core.id_types import (IdentifierType, IdentifierTypeAAM, IdentifierTypeCompound,
6
+ IdentifierTypeEnzyme, IdentifierTypeReaction,
7
+ IdentifierTypeStructureRepresentation,
8
+ id_types_aam, id_types_compound,
9
+ id_types_enzyme, id_types_reaction, id_types_structure_representation,
10
+ identifiers_org_dict, )
11
+
12
+ identifiers_org_regex = re.compile(r'(https?://identifiers.org/[a-zA-Z./\-]+/)(.+)')
13
+
14
+ # Sub-lookup dicts for identifier type names for each entrytype (compounds, reactions, etc)
15
+ id_type_name_lookup_subsdicts: dict[type[IdentifierType], dict[str, IdentifierType]] = dict()
16
+ typelists: dict[type[IdentifierType], list[IdentifierType]] = {
17
+ IdentifierTypeCompound: id_types_compound,
18
+ IdentifierTypeStructureRepresentation: id_types_structure_representation,
19
+ IdentifierTypeReaction: id_types_reaction,
20
+ IdentifierTypeEnzyme: id_types_enzyme,
21
+ IdentifierTypeAAM: id_types_aam
22
+ }
23
+ for entrytype_idtype, idtypes in typelists.items():
24
+ subdict: dict[str, IdentifierType] = dict()
25
+ for idtype in idtypes:
26
+ subdict[idtype.name.lower()] = idtype
27
+ subdict[idtype.shortname.lower()] = idtype
28
+ for name in idtype.alt_names:
29
+ subdict[name.lower()] = idtype
30
+ id_type_name_lookup_subsdicts[entrytype_idtype] = subdict
31
+
32
+
33
+
34
+ def get_id_type[T: IdentifierType](
35
+ type_str: str,
36
+ identifier_supertype: type[T]
37
+ ) -> Optional[T]:
38
+ """ Get an identifier type by a strign description. """
39
+ try:
40
+ return id_type_name_lookup_subsdicts[identifier_supertype][type_str.lower()]
41
+ except KeyError:
42
+ return None
43
+
44
+ def get_id_type_from_name[T: IdentifierType](name: str) -> T:
45
+ """ Get an identifier type (of type T, e.g. Compound, Reaction, Struct representation, ...) """
46
+ raise NotImplementedError
47
+
48
+ def get_possible_id_types_from_source_id[T: IdentifierType](name: str) -> list[T]:
49
+ """ Get a list of possible compatible identifiers (of type T, e.g. Compound, Reaction,
50
+ Struct representation, ...) """
51
+ raise NotImplementedError
52
+
53
+ def get_id_from_identifiers_org(
54
+ string: str,
55
+ identifier_supertype: type[IdentifierType]
56
+ ) -> Optional[tuple[IdentifierType, str]]:
57
+ """ Get an identifier type (of type T, e.g. Compound, Reaction, Struct representation, ...) """
58
+ match = re.match(identifiers_org_regex, string)
59
+ if match:
60
+ prefix = match.group(1)
61
+ match_id = match.group(2).removesuffix('/') # TODO should trailing '/' be removed in all cases?
62
+ try:
63
+ id_type: IdentifierType = identifiers_org_dict[prefix]
64
+ if not isinstance(id_type, identifier_supertype):
65
+ return None
66
+ return (id_type, match_id)
67
+ except KeyError:
68
+ return None
69
+ else:
70
+ return None
@@ -0,0 +1,31 @@
1
+ import networkx as nx
2
+
3
+ def max_weight_matching[T1, T2](
4
+ edges: dict[tuple[T1, T2], float],
5
+ min_weight: bool = False
6
+ ) -> dict[T1, T2]:
7
+ """ Takes as edges a dictionary T1 -> [T2], with a float value giving the weight for each match in the
8
+ second set.
9
+ Returns the maximum weight bipartite matching as a list of tuples (T1, T2).
10
+ If min_weight is true, uses negative weight values.
11
+ """
12
+ # Construct a weighted bipartite graph
13
+ g = nx.Graph()
14
+ for (a, b), w in edges.items():
15
+ g.add_node(a)
16
+ g.add_node(b)
17
+ g.add_edge(a, b, weight = ((2 - w) if min_weight else w))
18
+
19
+ # Find the maximal matching
20
+ matching = nx.max_weight_matching(g, maxcardinality = True)
21
+ keys: set[T1] = {k for k, _ in edges.keys()}
22
+ out: dict[T1, T2] = dict()
23
+ for a, b in matching:
24
+ if a in keys:
25
+ out[a] = b
26
+ elif b in keys:
27
+ out[b] = a
28
+ else:
29
+ raise RuntimeError
30
+
31
+ return out
@@ -0,0 +1,168 @@
1
+ """ Implements a heuristic algorithm to match structures of a ChemReaction to the structures attached to a
2
+ Compound.
3
+ """
4
+ from __future__ import annotations
5
+
6
+ from collections import OrderedDict
7
+ from enum import Enum
8
+ from typing import Hashable
9
+
10
+ from chemrecon import Direction, ChemReaction, Mol
11
+ from chemrecon.chem.create_mol import mol_from_struct_entry
12
+ from chemrecon.entrygraph.entrygraph import EntryGraph
13
+ from chemrecon.entrygraph.explorationprotocol import ExplorationProtocol
14
+ from chemrecon.entrygraph.explore import explore
15
+ from chemrecon.entrygraph.scoring import Scorer
16
+ from chemrecon.schema import (
17
+ Compound, MolStructure,
18
+ )
19
+ from chemrecon.schema.relation_types_procedural.compound_select_structure_proceduralrelation import \
20
+ CompoundSelectStructure
21
+ from chemrecon.utils import hungarian
22
+
23
+
24
+ class FlipState(Enum):
25
+ NORMAL = 0
26
+ FLIPPED = 1
27
+
28
+
29
+ def match_reactants(
30
+ reaction: ChemReaction,
31
+ compound_entries_lhs: dict[str, list[Compound]],
32
+ compound_entries_rhs: dict[str, list[Compound]],
33
+ consider_n_most_confident_structures: int = 1,
34
+ consider_first_entry_primary: bool = True,
35
+ consider_flipped: bool = False,
36
+ search_depth: int = 2
37
+ ) -> tuple[dict[Mol, str], dict[Mol, str], FlipState]:
38
+ """ ...
39
+ Compound entries are given as a dict (with key = primary_id), and a list of associated entries, possibly only 1.
40
+ If consider_first_entry_primary is set, for each given compound as a list of entries, the first entry in the
41
+ list will have higher weight in the EntryGraph.
42
+ Each compound can be given with multiple entries (for SBML files, for instance).
43
+ If consider_flipped is set, will also consider the case when LHS and RHS are flipped.
44
+ If look_deep is set, will consider structures not directly associated with the entries, instead using a deeper
45
+ Entrygraph search.
46
+ Also returns whether flipped was best
47
+ """
48
+
49
+ # First, find structures using EntryGraphs
50
+ compound_structures_lhs: dict[str, OrderedDict[Mol, float]] = OrderedDict()
51
+ compound_structures_rhs: dict[str, OrderedDict[Mol, float]] = OrderedDict()
52
+ for compound_entries, compound_structures in [
53
+ [compound_entries_lhs, compound_structures_lhs],
54
+ [compound_entries_rhs, compound_structures_rhs]
55
+ ]:
56
+ compound_entries: dict[str, list[Compound]]
57
+ compound_structures: dict[str, OrderedDict[Mol, float]]
58
+
59
+ for k, entry_list in compound_entries.items():
60
+ try:
61
+ compound_structures[k] = dict()
62
+ for struct_entry, score in search_structures(entry_list).items():
63
+ try:
64
+ compound_structures[k][mol_from_struct_entry(struct_entry)] = score
65
+ except AttributeError:
66
+ # Could not generate Mol
67
+ pass
68
+ except ValueError:
69
+ # No valid initial entries
70
+ compound_structures[k] = dict()
71
+
72
+ # Perform matching based on the Mols in the ChemReaction
73
+ match_l: dict[Mol, str]
74
+ match_r: dict[Mol, str]
75
+ match_ll, score_ll = match(
76
+ reaction.get_lhs_templates(),
77
+ compound_structures_lhs,
78
+ consider_n_most_confident_structures = consider_n_most_confident_structures
79
+ )
80
+ match_rr, score_rr = match(
81
+ reaction.get_rhs_templates(),
82
+ compound_structures_rhs,
83
+ consider_n_most_confident_structures = consider_n_most_confident_structures
84
+ )
85
+
86
+ # Consider the reaction flipped
87
+ if consider_flipped:
88
+ match_lr, score_lr = match(
89
+ reaction.get_lhs_templates(),
90
+ compound_structures_rhs,
91
+ consider_n_most_confident_structures = consider_n_most_confident_structures
92
+ )
93
+ match_rl, score_rl = match(
94
+ reaction.get_rhs_templates(),
95
+ compound_structures_lhs,
96
+ consider_n_most_confident_structures = consider_n_most_confident_structures
97
+ )
98
+
99
+ if (score_lr + score_rl) > (score_ll + score_rr):
100
+ print(f' -> flipped best ({score_lr:.2f} + {score_rl:.2f}) > ({score_ll:.2f} + {score_rr:.2f})')
101
+ flipstate = FlipState.FLIPPED
102
+ match_l = match_lr
103
+ match_r = match_rl
104
+ else:
105
+ print(f' -> non-flipped best ({score_lr:.2f} + {score_rl:.2f}) < ({score_ll:.2f} + {score_rr:.2f})')
106
+ flipstate = FlipState.NORMAL
107
+ match_l = match_ll
108
+ match_r = match_rr
109
+ else:
110
+ flipstate = FlipState.NORMAL
111
+ match_l = match_ll
112
+ match_r = match_rr
113
+
114
+ # Return chosen matching
115
+ return (match_l, match_r, flipstate)
116
+
117
+
118
+ # Matching alg
119
+ def match[T: Hashable](
120
+ reaction_mols: list[Mol],
121
+ compound_mols: dict[T, OrderedDict[Mol, float]],
122
+ consider_n_most_confident_structures: int = 1,
123
+ ) -> tuple[dict[Mol, T], int]:
124
+ """ Given the mols from the reaction and the compound_mols (with confidence), get the best mapping.
125
+ Type parameter T is the key, probalby str (primary_id).
126
+ Returns the matching, as well as a confidence level (0 to 1).
127
+ """
128
+ similarity: dict[tuple[Mol, T], float] = dict()
129
+
130
+ # First, compute the pairwise similarity of all compounds
131
+ for mol in reaction_mols:
132
+ for primary_id, mols_ in compound_mols.items():
133
+ # if len(mols_) == 0:
134
+ # # No structures given
135
+ # continue
136
+
137
+ sims: list[float] = list()
138
+ for mol_, conf in list(mols_.items())[:consider_n_most_confident_structures]:
139
+ # Get distance
140
+ sims.append(mol.get_similarity(mol_) * conf) # 1 if identical, approaches 0 as difference increases.
141
+
142
+ # Calculate the similarity as the maximum of (sim * conf)
143
+ similarity[(mol, primary_id)] = max(sims) if sims else 0
144
+
145
+ # Run the Hungarian algorithm to determine the best matching
146
+ matching = hungarian.max_weight_matching(edges = similarity)
147
+
148
+ # Compute the confidence/score of the matching as avg. weight of matched edges
149
+ confidence = sum(similarity[mol, primary_id] for mol, primary_id in matching.items()) / len(reaction_mols)
150
+ return matching, confidence
151
+
152
+
153
+ def search_structures(compound_entries: list[Compound]) -> OrderedDict[MolStructure, float]:
154
+ eg = EntryGraph(initial_entries = set(compound_entries))
155
+ explore(eg, structure_protocol, steps = 4)
156
+ return scorer_structure(eg)
157
+
158
+
159
+ # EntryGraph specifications for search
160
+ # ----------------------------------------------------------------------------------------------------------------------
161
+ structure_protocol = ExplorationProtocol(
162
+ relation_types = {CompoundSelectStructure}
163
+ )
164
+ scorer_structure = Scorer[MolStructure](
165
+ score_entry_type = MolStructure
166
+ )
167
+
168
+ # TODO score depending on whether primary id is weighted higher
@@ -0,0 +1,44 @@
1
+ import re
2
+
3
+ re_comment = re.compile(r'{(.*)}')
4
+
5
+
6
+ def extract_molblocks_v2000(rxnblock: str) -> list[tuple[str, str]]:
7
+ """ Gets the individual molblocks for each compound involved in the reaction. Returns tuples of the
8
+ molblock and comment string.
9
+ """
10
+ molblocks: list[tuple[str, str]] = list()
11
+
12
+ for mol_n, mol_str in enumerate(rxnblock.split('$MOL')):
13
+
14
+ # Skip the first, $RXN block
15
+ if mol_str.startswith('$RXN'):
16
+ continue
17
+
18
+ # Comment
19
+ comment_match = re.search(re_comment, mol_str)
20
+ if comment_match:
21
+ comment_str = comment_match[0]
22
+ else:
23
+ comment_str = ''
24
+
25
+ # Block
26
+ lines = mol_str.splitlines()
27
+
28
+ # First, read count line to get number of atoms
29
+ countline_i = 0
30
+ while len(lines[countline_i]) == 0 or (not lines[countline_i].strip()[0].isnumeric()):
31
+ countline_i += 1
32
+ countline = lines[countline_i]
33
+
34
+ countline_items = filter(lambda x: x, countline.split(' '))
35
+ n_atoms = int(countline[0:3])
36
+ molblock = '\n'.join(lines[1:])
37
+
38
+ molblocks.append((molblock, comment_str))
39
+
40
+ # Finalise
41
+ return molblocks
42
+
43
+
44
+
@@ -0,0 +1,12 @@
1
+ import os
2
+
3
+ def set_cwd():
4
+ while os.getcwd().split('/')[-1] not in {
5
+ 'chemrecon',
6
+ 'ChemRecon',
7
+ 'src',
8
+ 'chemrecon_populator'
9
+ }:
10
+ os.chdir('..')
11
+
12
+ print(f'Changed directory: {os.getcwd()}')
@@ -0,0 +1,143 @@
1
+ Metadata-Version: 2.4
2
+ Name: chemrecon
3
+ Version: 0.1.1
4
+ Summary: The ChemRecon library for integration and exploration of interconnected biochemical databases.
5
+ Keywords: bioinformatics
6
+ Author: Casper Asbjørn Eriksen
7
+ Author-email: Casper Asbjørn Eriksen <casbjorn@imada.sdu.dk>
8
+ License-Expression: GPL-3.0-only
9
+ Classifier: Programming Language :: Python :: 3.12
10
+ Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
11
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
12
+ Classifier: Topic :: Scientific/Engineering :: Chemistry
13
+ Requires-Dist: psycopg[binary]~=3.3.2
14
+ Requires-Dist: rustworkx~=0.17.1
15
+ Requires-Dist: networkx~=3.6.1
16
+ Requires-Dist: matplotlib~=3.10
17
+ Requires-Dist: rdkit
18
+ Requires-Dist: sphinx==8.3.0 ; extra == 'docs'
19
+ Requires-Dist: myst-parser ; extra == 'docs'
20
+ Requires-Dist: sphinx-autobuild ; extra == 'docs'
21
+ Requires-Dist: enum-tools[sphinx]==0.12.0 ; extra == 'docs'
22
+ Requires-Dist: sphinx-toolbox ; extra == 'docs'
23
+ Requires-Dist: nbsphinx ; extra == 'docs'
24
+ Requires-Dist: ipykernel>=7.1.0 ; extra == 'docs'
25
+ Requires-Dist: furo ; extra == 'docs'
26
+ Requires-Dist: sphinxext-opengraph ; extra == 'docs'
27
+ Maintainer: Casper Asbjørn Eriksen
28
+ Maintainer-email: Casper Asbjørn Eriksen <casbjorn@imada.sdu.dk>
29
+ Requires-Python: >=3.12
30
+ Provides-Extra: docs
31
+ Description-Content-Type: text/markdown
32
+
33
+ # ChemRecon
34
+ *v. 0.1.1*
35
+
36
+ ChemRecon is a Python library and consolidated meta-database designed to simplify the integration and exploration of
37
+ biochemical data from a range of sources.
38
+ It is built from full-database downloads of compounds, reactions, enzymes, molecular structures, and atom-to-atom maps
39
+ from the following source databases: BiGG, BRENDA, ChEBI, ECMDB, M-CSA, MetaMDB, and PubChem.
40
+
41
+ Heterogenous data formats were standardized, and relationships within and between these databases were reconstructed in
42
+ a consistent format.
43
+ The resulting meta-database is freely accessible online and is complemented by a Python library which allows for easy
44
+ integration into existing workflows.
45
+ This enables unified querying of entries from all the source databases, and discovery and visualization of
46
+ relationships between these entries.
47
+
48
+ ![entrygraph](docs/source/resources/eg.svg)
49
+
50
+ ChemRecon was developed at the
51
+ [Algorithmic Cheminformatics Group](https://cheminf.imada.sdu.dk/),
52
+ [Department of Mathematics and Computer Science](https://cheminf.imada.sdu.dk/),
53
+ [University of Southern Denmark](https://sdu.dk).
54
+
55
+ ## Paper
56
+ If ChemRecon proves useful to your research, you may want to cite the following paper.
57
+ * **Title**
58
+
59
+ C. A. Eriksen, J. L. Andersen, R. Fagerberg, D. Merkle
60
+
61
+ Arxiv preprint, submitted to Bioinformatics.
62
+
63
+ TODO more
64
+
65
+ ## Availability and Installation
66
+ ChemRecon is available via your Python package manager from the Python Package Index (PyPI):
67
+ [chemrecon](https://pypi.org/project/chemrecon/)
68
+ It can be installed using pip:
69
+
70
+ `pip install chemrecon`
71
+
72
+ Visualizing entry graphs requires [GraphViz](https://www.graphviz.org/) to be installed, and for the `dot` executable,
73
+ which renders the graphs, to be available on your system's `PATH`.
74
+ See the [GraphViz Python package](https://pypi.org/project/graphviz/) for instructions.
75
+
76
+ ***
77
+
78
+ ## Documentation
79
+ The documentation, including instructions on usage, tutorials, and complete description covering the types of entries
80
+ and relations supported, is available on the [ChemRecon homepage](https://www.cheminf.imada.sdu.dk/chemrecon).
81
+
82
+ ## Usage
83
+ The following is an example of a typical ChemRecon workflow, producing the graph seen above.
84
+ For more detailed examples, see the tutorial section of the documentation.
85
+
86
+ ```python
87
+ from chemrecon import *
88
+
89
+ connect_public()
90
+
91
+ # Perform a database query to find the 'citrate' entry in BiGG.
92
+ citrate_entry = find_entry(id_type = C_BIGG, source_id = 'M_cit')
93
+
94
+ # Define a protocol to find related entries and molecular structures (protocols like this are included)
95
+ compound_structure_protocol = ExplorationProtocol(
96
+ relation_types = {CompoundReference, CompoundHasMolStructure, MolStructureStandardization}
97
+ )
98
+
99
+ # Create and expand an entry graph, according to this protocol, by traversing the database.
100
+ eg = EntryGraph(initial_entries = {citrate_entry})
101
+ explore(eg, compound_structure_protocol, steps = 5)
102
+
103
+ # Score the molecular structures in the graph according to their 'connectedness'
104
+ scorer = Scorer(score_entry_type = MolStructure)
105
+ scores = scorer(citrate_entry) # Result is an OrderedDict
106
+
107
+ # Draw the graph with these scores, producing the image seen on this page
108
+ eg.show(scores = scores)
109
+ ```
110
+
111
+ ***
112
+
113
+ ## Database
114
+ ChemRecon needs to be connected to a database to function.
115
+ The easiest is to connect to the public database, hosted by [SDU](https://sdu.dk):
116
+ ```
117
+ connect_public()
118
+ ```
119
+ Alternatively, a local instance of the database can be hosted via Docker.
120
+ Instructions are given in the [documentation](https://chemrecon.org).
121
+ This has the advantage of lower latency, making queries and entry graph construction faster, and allows adding
122
+ custom data sources.
123
+
124
+ ## Source Databases
125
+ ChemRecon contains compound, molecular structure, reaction, atom-to-atom map, and enzyme entries from the following
126
+ databases.
127
+
128
+ | Source | Compound | Structure | Reaction | AAM | Enzyme | Version |
129
+ |------------|--------------|---------------|------------|---------|----------|---------|
130
+ | BiGG | 20428 | - | 33942 | - | 5705 | 1.6 |
131
+ | BRENDA | - | - | 61129 | - | 8697 | 2025_1 |
132
+ | ChEBI | 224485 | 330207 | - | - | - | 2024-05 |
133
+ | ECMDB | 3760 | 7517 | - | - | - | 2.0 |
134
+ | M-CSA | - | - | 1003 | 342 | 1003 | 2024-11 |
135
+ | MetaMDB | 80815 | 4392 | 74520 | 1003 | - | 2025-02 |
136
+ | MetaNetX | 2601834 | 2297518 | 143880 | - | 48175 | 4.4 |
137
+ | PubChem | 9031498 | 5000000 | - | - | - | 2024-09 |
138
+
139
+ In addition to the source databases, ChemRecon can make use of a greater number of *auxiliary* databases, including
140
+ MetaCyc and KEGG. Data from these sources is are not directly included due to being proprietary or difficult to access.
141
+ However, the source databases contain references to the auxiliary databases, so entries are created which contain only
142
+ the identifier and no additional information. This allows users to use ChemRecon workflows based on identifiers from a
143
+ great number of databases, not just the source databases.
@@ -0,0 +1,86 @@
1
+ chemrecon/__init__.py,sha256=BFgYLCNdIybZFfwmD4G6im0vRhFxTvyCB7WMr_Tt1Bc,2098
2
+ chemrecon/chem/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ chemrecon/chem/chemreaction.py,sha256=AA1JsVbYUi7aAbGkxQ027Vou0ZPUPCqAjeaLXwiOIZk,8525
4
+ chemrecon/chem/constant_compounds.py,sha256=3wOvVs7RKRCiV5n4d44NsfDKo4aDcuGXOiSIZc83Wl0,72
5
+ chemrecon/chem/create_mol.py,sha256=5ObHNF30rrdyTuY0jdXjN2Pqgk3cyagpl_vnixo-m3Q,3075
6
+ chemrecon/chem/elements.py,sha256=09QkK8C0BZT1WbTu6pQQqssGmnC8Km0_MCm9xURDADQ,5146
7
+ chemrecon/chem/gml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ chemrecon/chem/gml/gml.py,sha256=AP9VQbTkWaNLwwceSC8KT9iG2wYBsEvFv264JjNpS1I,9468
9
+ chemrecon/chem/gml/gml_reactant_matching.py,sha256=xYw23uNIuuxzaMrtQoHhY7M0ZpBiuSzuszaXKGjr9ko,6010
10
+ chemrecon/chem/gml/gml_to_rdk.py,sha256=gjUiJHR9WrGcvz_2etEOnpgH49vtqIHwMwtgzI6Eo78,7470
11
+ chemrecon/chem/mol.py,sha256=mg0cSFYzWKTPiOrfKlcTLtcRlgQFTWAOArqoR2Rr5kQ,15200
12
+ chemrecon/chem/sumformula.py,sha256=6RuzXvhtKTRWGXpTS-QcfalsqwtxTzEolF5LiVdPVaM,4176
13
+ chemrecon/connection.py,sha256=2I0Y0Nxmd_mXBpU-0bTnAsinrmFfj2Qtw0LadklE0p8,3277
14
+ chemrecon/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
+ chemrecon/core/id_types.py,sha256=WvuG_AOqznMcj-zsAkQzHxOuelUBZThl5TvIZTJ4nZM,19432
16
+ chemrecon/core/ontology.py,sha256=GJ6qrD6CZNvhT8DEnCnoMa5QjFjwTKqWomrlQp0tNME,5162
17
+ chemrecon/core/populate_query_handler.py,sha256=-IXehaqm-iWNdaOx00_RwW4QwR-xYGWoslVwdDcH8UI,12988
18
+ chemrecon/core/query_handler.py,sha256=5YQBbRSg72gdf1Zkuew1md8mMwyuJK9WlyuLze0AuW0,24039
19
+ chemrecon/database/__init__.py,sha256=nYwlJnBC-i88HqDvoap-ZQHUqKoeuCDSsptJjfqNgxo,53
20
+ chemrecon/database/connect.py,sha256=lobpyqkmiF7VTTgAqzHPANyjtu01i-se9BsENsvnxng,2031
21
+ chemrecon/database/connection_params/chemrecon_pub.dbinfo,sha256=RcotjdR1twQ6uh2OJCFz6r9tq-zAegnSIE91MQ_DSoo,72
22
+ chemrecon/database/connection_params/local_docker_dev.dbinfo,sha256=REtQj-C9delndbH9achchc8FS2TtOGbd7AjsnN49bug,50
23
+ chemrecon/database/connection_params/local_docker_init.dbinfo,sha256=yxe6DPeFcSy_q02L3VwKDsX_Of5AXSogkGSwa4qpFQY,51
24
+ chemrecon/database/connection_params/local_docker_pub.dbinfo,sha256=8LmyxYI5fMIVM6CKKDVLmfWXbYjCI8R79nEOhGNFDKA,67
25
+ chemrecon/database/params.py,sha256=wbo_ChWOYG_mDOyXn1nIAQnt9fe9zlEzXvf7eACUVLY,2566
26
+ chemrecon/entrygraph/draw.py,sha256=alpETBm4m4dusjTmY9iWRAOkq-O8OgMvOmYBaGw4vjU,3601
27
+ chemrecon/entrygraph/entrygraph.py,sha256=cYXC6BORm-cFeyegpXCk3MHghc4ccRzPhEq6Kl1HOp8,11565
28
+ chemrecon/entrygraph/explorationprotocol.py,sha256=vq5djr7J_VhDdVb7LOK2BVpMnW0E2DMCAtymoRd6_mU,10246
29
+ chemrecon/entrygraph/explore.py,sha256=zA_KCCGgfSA15Yn_6P_DSxKiBAm22V05F98kFOvHzYg,18340
30
+ chemrecon/entrygraph/explore_procedure.py,sha256=P_TM7jIRHvxIysN0lcAYCzOH0ic41dr41s1M43E3Dqc,6239
31
+ chemrecon/entrygraph/filter.py,sha256=CN3nw4WRwa4baUtdg-rn5GOEIhJHu-HURe3ie1cDDMY,2553
32
+ chemrecon/entrygraph/scoring.py,sha256=3qQo_hnCH9lMmjVWVcMSte2PXszO-ag__KhqMgH3ezk,5868
33
+ chemrecon/query/__init__.py,sha256=is4JVcbIFPeBOoq7qPYwDFjtYf2EPmzUVvLpG2uOduI,766
34
+ chemrecon/query/create_entry.py,sha256=T8Lb77Is5w5BjL9_N0qxsWhAJMbdRqBhebGm6qI0NUk,3424
35
+ chemrecon/query/default_protocols.py,sha256=gxfWpK2XQVxRwk8trtua1fnwRqrv2N1AzZskpwgbFR0,2799
36
+ chemrecon/query/find_entry.py,sha256=4T3d5pCBiZNmTXAn6lM5NK4NMN6eyD1NsmwWygXypzw,4041
37
+ chemrecon/query/get_relations.py,sha256=blZNbI-dQO7mhPuFXE4YCtpfYRDPXN5JvmcUswjd7fw,5635
38
+ chemrecon/query/get_structures_from_compound.py,sha256=f2QuNulVY2R0-RnIiSbVfb4_b8smW8L1cMmIytKby10,1833
39
+ chemrecon/schema/__init__.py,sha256=Pu_i7-jvKf_KOJxWaTs8_3uMiOv1a45O16QD9Hxkclo,3718
40
+ chemrecon/schema/db_object.py,sha256=6qMgkKApbUsjvv6ocwZ5ifLBCvJsiX4j9dAZe3kfLto,12876
41
+ chemrecon/schema/direction.py,sha256=k4F252HHAt2FqjjhNY-wihTN12iCgx6866LE9MUevnQ,266
42
+ chemrecon/schema/entry_types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
43
+ chemrecon/schema/entry_types/aam.py,sha256=lW-yN8MVEChx5MwXcMw9tGmnGrb-rtYxBk6DNgUsBLU,772
44
+ chemrecon/schema/entry_types/aam_repr.py,sha256=DYjqloFHv0jvq_dQad7TNzzumry_iiA_7y7d2weQILY,982
45
+ chemrecon/schema/entry_types/compound.py,sha256=DBIDPNojqn51dnlRcGeFfgLa-zD_kLVik-GGEvc10SI,1621
46
+ chemrecon/schema/entry_types/enzyme.py,sha256=qpO_z0OZfENptehBo1sbF4jXCUME9f4oOGGpIbR-l_g,1547
47
+ chemrecon/schema/entry_types/molstructure.py,sha256=MFf_kVpM8t2vfXNV8_g_82cJ5b8PEU19BoLonQEr0x4,2043
48
+ chemrecon/schema/entry_types/molstructure_repr.py,sha256=rrz6XqJ7JXd_lmAIPKWbjo1Z9sEK7CWhxF2TgZMlQ-M,1422
49
+ chemrecon/schema/entry_types/reaction.py,sha256=7XPLatMvaU-v2K4Cdlh2ht9BInnp-yNABEJxEnXTW9U,1956
50
+ chemrecon/schema/enums.py,sha256=JlwqU4IqzCkaKEdpUtiE39IibtNz_r3opK0DOitdexM,3780
51
+ chemrecon/schema/procedural_relation_entrygraph.py,sha256=UqlCem7wBLCTq1jbCQB_pvPwf7kF8uFyX-MR7sFOtPM,2120
52
+ chemrecon/schema/relation_types_composed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
+ chemrecon/schema/relation_types_composed/compound_has_molstructure_relation.py,sha256=d1GfBPiStbmD_X7WIwWnH0Vl5GLsz_95xlEv4glifJA,2182
54
+ chemrecon/schema/relation_types_composed/reaction_has_aam_relation.py,sha256=0q39smTcByAguDXQgkWGoGv9dGYE50tMslmMjEoBZm8,1744
55
+ chemrecon/schema/relation_types_procedural/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
+ chemrecon/schema/relation_types_procedural/aam_convert_relation.py,sha256=84OCfMK35YAGEDtAmWdnuSOHRSJfuEAk0zzHSOUb_nA,2295
57
+ chemrecon/schema/relation_types_procedural/compound_select_structure_proceduralrelation.py,sha256=HwRRpFP4ftzwFZexCWfKNelvCnkgN6ai15SDySZOO-M,1545
58
+ chemrecon/schema/relation_types_procedural/compound_similarlity_proceduralrelation.py,sha256=chG3GNX2BBDTWIuSVfZUJ_YF_ZVBSoel2d_AN0OChS0,6
59
+ chemrecon/schema/relation_types_procedural/molstructure_convert_relation.py,sha256=SKoim_kn0kxkKHKO1EzRNpSnxvt-zh9wB089bJ77zT0,1625
60
+ chemrecon/schema/relation_types_procedural/reaction_select_aam_proceduralrelation.py,sha256=39yXLPRK9k3xUJJV75exVGZZT5-z3dV6cxNVsXCTqS0,1499
61
+ chemrecon/schema/relation_types_procedural/reaction_similarity_proceduralrelation.py,sha256=chG3GNX2BBDTWIuSVfZUJ_YF_ZVBSoel2d_AN0OChS0,6
62
+ chemrecon/schema/relation_types_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
63
+ chemrecon/schema/relation_types_source/aam_involves_molstructure_relation.py,sha256=Blu1QsIpM8ZEX7pvFR9yUkFPJcAy3gN3EdtUoV4JBk8,2702
64
+ chemrecon/schema/relation_types_source/aam_repr_involves_molstructure_repr_relation.py,sha256=OmFGuP4mR6Ctmex-zZyxn0HQsntD0OBkLmYyBq9FSa0,2582
65
+ chemrecon/schema/relation_types_source/compound_has_structure_representation_relation.py,sha256=jqdN9w6IPqNH_FhCjsOPezqJL_EMl7K2zzy9z8MGXrQ,1001
66
+ chemrecon/schema/relation_types_source/compound_reference_relation.py,sha256=kiXcxTkYqHxjINhwsC6K7K7BDvJ69BjJ00SH-qiNbQY,907
67
+ chemrecon/schema/relation_types_source/molstructure_standardisation_relation.py,sha256=RRrTPYhAJL17ZAbzb3nOUuhRTVLsDEuWuWauFZ62pQI,2201
68
+ chemrecon/schema/relation_types_source/ontology/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
69
+ chemrecon/schema/relation_types_source/ontology/compound_ontology.py,sha256=_cFQgMRaX48frvrJ1YzbFNwOyPyL7uR42i8qa3omQT4,10314
70
+ chemrecon/schema/relation_types_source/ontology/enzyme_ontology.py,sha256=KrSprX-pUFzbH82Y1oJtkDP9vwvDUyfvsPKHWyyYHRg,3969
71
+ chemrecon/schema/relation_types_source/ontology/reaction_ontology.py,sha256=VCRrWGGEcz2mDvNucnPWqajbPYqw_QSlgKBEDNnIaEI,3903
72
+ chemrecon/schema/relation_types_source/reaction_has_aam_representation_relation.py,sha256=eIj2FqFPM2wO5f4GmAWF05hSkHpxH_PB4CrT17vqBBA,947
73
+ chemrecon/schema/relation_types_source/reaction_has_enzyme_relation.py,sha256=WEW84bxD2bEFmnnnL9oLVEL78TvR5ZfO5UxgBefv1WQ,1924
74
+ chemrecon/schema/relation_types_source/reaction_involves_compound_relation.py,sha256=Q4ayiZpz0IFPKsm6vr2ZepmZPlI9Cwa2YRmoeLGaXSE,2079
75
+ chemrecon/schema/relation_types_source/reaction_reference_relation.py,sha256=6liEih9Y3sFaQJZ3rEeJQm5C10cj0a6MMaEUeBdX394,908
76
+ chemrecon/scripts/initialize_database.py,sha256=FDMz61-o3BwvQOGbTX6UC3csh08olxYVCfF6-ycEcXk,19855
77
+ chemrecon/utils/copy_signature.py,sha256=pSqKIamm5j24YysvMfB0vbBsQOvsbekW7mtFgJ8ifE4,452
78
+ chemrecon/utils/encodeable_list.py,sha256=2MkzvTQzibUWuDHOL02IByYXv9j3T9QK1Z4qjfHffW0,337
79
+ chemrecon/utils/get_id_type.py,sha256=t93W9wBHvge09gDjbdZjQl57Vzg8wuBYvSGCP27l_uw,2990
80
+ chemrecon/utils/hungarian.py,sha256=Y1JR0r8dFt5lCTuj1Bj_3D4t8xmr57CHv3lkwo2L_rk,986
81
+ chemrecon/utils/reactant_matching.py,sha256=rfRapLqsgDHaiRUP34qvc15eQLOB_Z9CaVJ_ttb7s1o,6857
82
+ chemrecon/utils/rxnutils.py,sha256=7FgDzjY7qfTV32q1RWzFYqjPM1vEW_DXOMcfiQAzNAA,1190
83
+ chemrecon/utils/set_cwd.py,sha256=n7wGQS9MxmxpAUVQla_VRFytXmMcps97WhS57CUrlGU,237
84
+ chemrecon-0.1.1.dist-info/WHEEL,sha256=iHtWm8nRfs0VRdCYVXocAWFW8ppjHL-uTJkAdZJKOBM,80
85
+ chemrecon-0.1.1.dist-info/METADATA,sha256=a8Rvo9fM0sM0azTZ3fr7vW2ETLh5_-UN8ndVJEmr9bY,6618
86
+ chemrecon-0.1.1.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: uv 0.9.30
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any