chemrecon 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chemrecon/__init__.py +73 -0
- chemrecon/chem/__init__.py +0 -0
- chemrecon/chem/chemreaction.py +223 -0
- chemrecon/chem/constant_compounds.py +3 -0
- chemrecon/chem/create_mol.py +91 -0
- chemrecon/chem/elements.py +141 -0
- chemrecon/chem/gml/__init__.py +0 -0
- chemrecon/chem/gml/gml.py +324 -0
- chemrecon/chem/gml/gml_reactant_matching.py +130 -0
- chemrecon/chem/gml/gml_to_rdk.py +217 -0
- chemrecon/chem/mol.py +483 -0
- chemrecon/chem/sumformula.py +120 -0
- chemrecon/connection.py +97 -0
- chemrecon/core/__init__.py +0 -0
- chemrecon/core/id_types.py +687 -0
- chemrecon/core/ontology.py +209 -0
- chemrecon/core/populate_query_handler.py +336 -0
- chemrecon/core/query_handler.py +587 -0
- chemrecon/database/__init__.py +1 -0
- chemrecon/database/connect.py +63 -0
- chemrecon/database/connection_params/chemrecon_pub.dbinfo +5 -0
- chemrecon/database/connection_params/local_docker_dev.dbinfo +5 -0
- chemrecon/database/connection_params/local_docker_init.dbinfo +5 -0
- chemrecon/database/connection_params/local_docker_pub.dbinfo +5 -0
- chemrecon/database/params.py +88 -0
- chemrecon/entrygraph/draw.py +119 -0
- chemrecon/entrygraph/entrygraph.py +301 -0
- chemrecon/entrygraph/explorationprotocol.py +199 -0
- chemrecon/entrygraph/explore.py +421 -0
- chemrecon/entrygraph/explore_procedure.py +183 -0
- chemrecon/entrygraph/filter.py +88 -0
- chemrecon/entrygraph/scoring.py +141 -0
- chemrecon/query/__init__.py +26 -0
- chemrecon/query/create_entry.py +86 -0
- chemrecon/query/default_protocols.py +57 -0
- chemrecon/query/find_entry.py +84 -0
- chemrecon/query/get_relations.py +143 -0
- chemrecon/query/get_structures_from_compound.py +65 -0
- chemrecon/schema/__init__.py +86 -0
- chemrecon/schema/db_object.py +363 -0
- chemrecon/schema/direction.py +10 -0
- chemrecon/schema/entry_types/__init__.py +0 -0
- chemrecon/schema/entry_types/aam.py +34 -0
- chemrecon/schema/entry_types/aam_repr.py +37 -0
- chemrecon/schema/entry_types/compound.py +52 -0
- chemrecon/schema/entry_types/enzyme.py +49 -0
- chemrecon/schema/entry_types/molstructure.py +64 -0
- chemrecon/schema/entry_types/molstructure_repr.py +41 -0
- chemrecon/schema/entry_types/reaction.py +57 -0
- chemrecon/schema/enums.py +154 -0
- chemrecon/schema/procedural_relation_entrygraph.py +66 -0
- chemrecon/schema/relation_types_composed/__init__.py +0 -0
- chemrecon/schema/relation_types_composed/compound_has_molstructure_relation.py +59 -0
- chemrecon/schema/relation_types_composed/reaction_has_aam_relation.py +50 -0
- chemrecon/schema/relation_types_procedural/__init__.py +0 -0
- chemrecon/schema/relation_types_procedural/aam_convert_relation.py +69 -0
- chemrecon/schema/relation_types_procedural/compound_select_structure_proceduralrelation.py +36 -0
- chemrecon/schema/relation_types_procedural/compound_similarlity_proceduralrelation.py +1 -0
- chemrecon/schema/relation_types_procedural/molstructure_convert_relation.py +49 -0
- chemrecon/schema/relation_types_procedural/reaction_select_aam_proceduralrelation.py +38 -0
- chemrecon/schema/relation_types_procedural/reaction_similarity_proceduralrelation.py +1 -0
- chemrecon/schema/relation_types_source/__init__.py +0 -0
- chemrecon/schema/relation_types_source/aam_involves_molstructure_relation.py +77 -0
- chemrecon/schema/relation_types_source/aam_repr_involves_molstructure_repr_relation.py +79 -0
- chemrecon/schema/relation_types_source/compound_has_structure_representation_relation.py +33 -0
- chemrecon/schema/relation_types_source/compound_reference_relation.py +34 -0
- chemrecon/schema/relation_types_source/molstructure_standardisation_relation.py +71 -0
- chemrecon/schema/relation_types_source/ontology/__init__.py +0 -0
- chemrecon/schema/relation_types_source/ontology/compound_ontology.py +369 -0
- chemrecon/schema/relation_types_source/ontology/enzyme_ontology.py +142 -0
- chemrecon/schema/relation_types_source/ontology/reaction_ontology.py +140 -0
- chemrecon/schema/relation_types_source/reaction_has_aam_representation_relation.py +34 -0
- chemrecon/schema/relation_types_source/reaction_has_enzyme_relation.py +71 -0
- chemrecon/schema/relation_types_source/reaction_involves_compound_relation.py +69 -0
- chemrecon/schema/relation_types_source/reaction_reference_relation.py +33 -0
- chemrecon/scripts/initialize_database.py +494 -0
- chemrecon/utils/copy_signature.py +10 -0
- chemrecon/utils/encodeable_list.py +11 -0
- chemrecon/utils/get_id_type.py +70 -0
- chemrecon/utils/hungarian.py +31 -0
- chemrecon/utils/reactant_matching.py +168 -0
- chemrecon/utils/rxnutils.py +44 -0
- chemrecon/utils/set_cwd.py +12 -0
- chemrecon-0.1.1.dist-info/METADATA +143 -0
- chemrecon-0.1.1.dist-info/RECORD +86 -0
- chemrecon-0.1.1.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from typing import Optional, ClassVar
|
|
5
|
+
|
|
6
|
+
import psycopg as pg
|
|
7
|
+
from psycopg import sql as sql
|
|
8
|
+
|
|
9
|
+
from chemrecon.entrygraph.filter import EntryFilter, RelationFilter
|
|
10
|
+
from chemrecon.schema.db_object import Entry, Relation
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ExploreProcedure[T_takes: Entry, T_gives: Entry](ABC):
|
|
14
|
+
takes_entrytype: type[T_takes]
|
|
15
|
+
gives_entrytype: type[T_gives]
|
|
16
|
+
relation_type: type[Relation] # Filter on the produced relations
|
|
17
|
+
entry_filter: Optional[EntryFilter[T_gives]] # Filter on T_gives
|
|
18
|
+
relation_filter: Optional[RelationFilter]
|
|
19
|
+
|
|
20
|
+
# Pre-computed SQL snippets
|
|
21
|
+
_opt_relation_filter_clause: sql.Composable
|
|
22
|
+
_opt_entry_filter_clause: sql.Composable
|
|
23
|
+
|
|
24
|
+
n_params: ClassVar[int]
|
|
25
|
+
|
|
26
|
+
# SQL
|
|
27
|
+
q: sql.Composed
|
|
28
|
+
cursor: pg.Cursor
|
|
29
|
+
|
|
30
|
+
def __init__(
|
|
31
|
+
self,
|
|
32
|
+
relationtype: type[Relation],
|
|
33
|
+
relation_filter: Optional[RelationFilter],
|
|
34
|
+
entry_filter: Optional[EntryFilter],
|
|
35
|
+
):
|
|
36
|
+
# Init
|
|
37
|
+
self.relation_type = relationtype
|
|
38
|
+
self.relation_filter = relation_filter
|
|
39
|
+
self.entry_filter = entry_filter
|
|
40
|
+
|
|
41
|
+
if relation_filter is not None:
|
|
42
|
+
#assert relation_filter.relation_type == relationtype
|
|
43
|
+
pass
|
|
44
|
+
if entry_filter is not None:
|
|
45
|
+
#assert entry_filter.entry_type == self.gives_entrytype
|
|
46
|
+
pass
|
|
47
|
+
|
|
48
|
+
# Generate the various clauses ahead of time
|
|
49
|
+
match self.relation_filter:
|
|
50
|
+
# TODO filtering
|
|
51
|
+
#case RelationFilterSource():
|
|
52
|
+
# # Filter
|
|
53
|
+
# self._opt_relation_filter_clause = sql.SQL("""
|
|
54
|
+
# AND (rel_src in {src_set})
|
|
55
|
+
# """).format(
|
|
56
|
+
# src_set = list(relation_filter.allowed_sources)
|
|
57
|
+
# )
|
|
58
|
+
case _:
|
|
59
|
+
# No filter
|
|
60
|
+
self._opt_relation_filter_clause = sql.SQL('')
|
|
61
|
+
|
|
62
|
+
# Entry clause
|
|
63
|
+
# TODO change to filter entries on the DB level (hard for symmetric relations?)
|
|
64
|
+
# for symmetric - BOTH recon ids need to be in the set of allowed id types?
|
|
65
|
+
self._opt_entry_filter_clause = self.make_entry_filter_clause()
|
|
66
|
+
|
|
67
|
+
# Generate the query except for the formats
|
|
68
|
+
# Clause is prepared with as much formating as possible now, %(reconids)s param will be used when
|
|
69
|
+
# calling only
|
|
70
|
+
self.q = sql.SQL("""
|
|
71
|
+
SELECT *
|
|
72
|
+
FROM {rel_view}
|
|
73
|
+
WHERE {where_clause} {opt_relation_clause} {opt_entry_clause}
|
|
74
|
+
;
|
|
75
|
+
""").format(
|
|
76
|
+
rel_view = sql.Identifier(f'{self.relation_type.get_table_name()}_v'),
|
|
77
|
+
where_clause = self.make_where_clause(),
|
|
78
|
+
opt_relation_clause = self._opt_relation_filter_clause,
|
|
79
|
+
opt_entry_clause = self._opt_entry_filter_clause
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
@abstractmethod
|
|
83
|
+
def make_where_clause(self) -> sql.SQL:
|
|
84
|
+
""" Returns a parametrised SQL string to be used when calling. """
|
|
85
|
+
pass
|
|
86
|
+
|
|
87
|
+
@abstractmethod
|
|
88
|
+
def make_entry_filter_clause(self) -> sql.SQL:
|
|
89
|
+
""" Returns an SQL string filtering correctly by recon_id_1, recon_id_2, or both. """
|
|
90
|
+
pass
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class ExploreProcedureT1[T_takes: Entry, T_gives: Entry](ExploreProcedure[T_takes, T_gives]):
|
|
94
|
+
# Explore from T1
|
|
95
|
+
takes_entrytype: type[T_takes]
|
|
96
|
+
gives_entrytype: type[T_gives]
|
|
97
|
+
relation_type: type[Relation[T_takes, T_gives]]
|
|
98
|
+
entry_filter: Optional[EntryFilter[T_gives]]
|
|
99
|
+
relation_filter: Optional[RelationFilter[T_takes, T_gives]]
|
|
100
|
+
|
|
101
|
+
n_params: ClassVar[int] = 1
|
|
102
|
+
|
|
103
|
+
def __init__(
|
|
104
|
+
self,
|
|
105
|
+
relationtype: type[Relation[T_takes, T_gives]],
|
|
106
|
+
relation_filter: Optional[RelationFilter[T_takes, T_gives]],
|
|
107
|
+
entry_filter: Optional[EntryFilter[T_gives]]
|
|
108
|
+
):
|
|
109
|
+
assert not relationtype.symmetric
|
|
110
|
+
self.takes_entrytype = relationtype.source_entrytype
|
|
111
|
+
self.produces_entrytype = relationtype.target_entrytype
|
|
112
|
+
self.relation = relationtype
|
|
113
|
+
|
|
114
|
+
super().__init__(relationtype, relation_filter, entry_filter)
|
|
115
|
+
|
|
116
|
+
def make_where_clause(self) -> sql.SQL:
|
|
117
|
+
return sql.SQL("recon_id_1 = %s")
|
|
118
|
+
|
|
119
|
+
def make_entry_filter_clause(self) -> sql.SQL:
|
|
120
|
+
# TODO
|
|
121
|
+
return sql.SQL('')
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class ExploreProcedureT2[T_takes: Entry, T_gives: Entry](ExploreProcedure[T_takes, T_gives]):
|
|
125
|
+
# Explore from T2
|
|
126
|
+
takes_entrytype: type[T_takes]
|
|
127
|
+
gives_entrytype: type[T_gives]
|
|
128
|
+
relation_type: type[Relation[T_gives, T_takes]]
|
|
129
|
+
entry_filter: Optional[EntryFilter[T_gives]]
|
|
130
|
+
relation_filter: Optional[RelationFilter[T_gives, T_takes]]
|
|
131
|
+
|
|
132
|
+
n_params: ClassVar[int] = 1
|
|
133
|
+
|
|
134
|
+
def __init__(
|
|
135
|
+
self,
|
|
136
|
+
relationtype: type[Relation[T_gives, T_takes]],
|
|
137
|
+
relation_filter: Optional[RelationFilter[T_gives, T_takes]],
|
|
138
|
+
entry_filter: Optional[EntryFilter[T_gives]]
|
|
139
|
+
):
|
|
140
|
+
assert not relationtype.symmetric
|
|
141
|
+
self.takes_entrytype = relationtype.target_entrytype
|
|
142
|
+
self.produces_entrytype = relationtype.source_entrytype
|
|
143
|
+
self.relation = relationtype
|
|
144
|
+
|
|
145
|
+
super().__init__(relationtype, relation_filter, entry_filter)
|
|
146
|
+
|
|
147
|
+
def make_where_clause(self) -> sql.SQL:
|
|
148
|
+
return sql.SQL("recon_id_2 = %s")
|
|
149
|
+
|
|
150
|
+
def make_entry_filter_clause(self) -> sql.SQL:
|
|
151
|
+
# TODO
|
|
152
|
+
return sql.SQL('')
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
class ExploreProcedureSym[T: Entry](ExploreProcedure[T, T]):
|
|
156
|
+
# Explore from T
|
|
157
|
+
takes_entrytype: type[T]
|
|
158
|
+
gives_entrytype: type[T]
|
|
159
|
+
relation_type: type[Relation[T, T]]
|
|
160
|
+
entry_filter: Optional[EntryFilter[T]]
|
|
161
|
+
relation_filter: Optional[RelationFilter[T, T]]
|
|
162
|
+
|
|
163
|
+
n_params: ClassVar[int] = 2
|
|
164
|
+
|
|
165
|
+
def __init__(
|
|
166
|
+
self,
|
|
167
|
+
relationtype: type[Relation[T, T]],
|
|
168
|
+
relation_filter: Optional[RelationFilter[T, T]],
|
|
169
|
+
entry_filter: Optional[EntryFilter[T]]
|
|
170
|
+
):
|
|
171
|
+
assert relationtype.symmetric
|
|
172
|
+
self.takes_entrytype = relationtype.source_entrytype
|
|
173
|
+
self.produces_entrytype = relationtype.target_entrytype
|
|
174
|
+
self.relation = relationtype
|
|
175
|
+
|
|
176
|
+
super().__init__(relationtype, relation_filter, entry_filter)
|
|
177
|
+
|
|
178
|
+
def make_where_clause(self) -> sql.SQL:
|
|
179
|
+
return sql.SQL("(recon_id_1 = %s) OR (recon_id_2 = %s)")
|
|
180
|
+
|
|
181
|
+
def make_entry_filter_clause(self) -> sql.SQL:
|
|
182
|
+
# TODO
|
|
183
|
+
return sql.SQL('')
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
""" TODO re-implement this later, for now, filters should just be functions
|
|
2
|
+
"""
|
|
3
|
+
|
|
4
|
+
from abc import ABC, abstractmethod
|
|
5
|
+
from typing import Callable
|
|
6
|
+
|
|
7
|
+
from chemrecon.schema.db_object import SourceEntry
|
|
8
|
+
from chemrecon.core.id_types import IdentifierType
|
|
9
|
+
from chemrecon.schema.db_object import Entry, Relation
|
|
10
|
+
from chemrecon.schema.enums import SourceDatabase
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class EntryFilter[T: Entry](ABC):
|
|
14
|
+
""" Base class for filters. """
|
|
15
|
+
entry_type: type[Entry]
|
|
16
|
+
allowed_id_types: set[IdentifierType]
|
|
17
|
+
has_id_type_field: bool
|
|
18
|
+
|
|
19
|
+
@abstractmethod
|
|
20
|
+
def __call__(self, entry: T) -> bool:
|
|
21
|
+
# Base implementation
|
|
22
|
+
return True
|
|
23
|
+
|
|
24
|
+
class RelationFilter[T1: Entry, T2: Entry](ABC):
|
|
25
|
+
""" Base class for filters. """
|
|
26
|
+
relation_type: type[Relation[T1, T2]]
|
|
27
|
+
allowed_sources: set[SourceDatabase]
|
|
28
|
+
has_src_field: bool
|
|
29
|
+
|
|
30
|
+
def __init__(self, relation_type: type[Relation[T1, T2]]):
|
|
31
|
+
self.relation_type = relation_type
|
|
32
|
+
|
|
33
|
+
@abstractmethod
|
|
34
|
+
def __call__(self, relation: Relation[T1, T2]) -> bool:
|
|
35
|
+
# Base implementation
|
|
36
|
+
return True
|
|
37
|
+
|
|
38
|
+
class EntryFilterIdType[T: SourceEntry](EntryFilter[T]):
|
|
39
|
+
"""
|
|
40
|
+
"""
|
|
41
|
+
allowed_id_types: set[IdentifierType]
|
|
42
|
+
# TODO
|
|
43
|
+
|
|
44
|
+
def __init__(self, allowed_id_types: set[IdentifierType]):
|
|
45
|
+
self.allowed_id_types = allowed_id_types
|
|
46
|
+
raise NotImplementedError()
|
|
47
|
+
|
|
48
|
+
def __call__(self, entry: T) -> bool:
|
|
49
|
+
raise NotImplementedError()
|
|
50
|
+
|
|
51
|
+
# TODO similar to EntryFilterIdType, but for source
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class EntryFilterProcedure[T: Entry](EntryFilter):
|
|
55
|
+
""" Allows custom filtering on the application level in addition to type filtering.
|
|
56
|
+
"""
|
|
57
|
+
procedure: Callable[[T], bool]
|
|
58
|
+
|
|
59
|
+
def __init__(self, filter_proc: Callable[[T], bool]):
|
|
60
|
+
self.procedure = filter_proc
|
|
61
|
+
super().__init__()
|
|
62
|
+
|
|
63
|
+
def __call__(self, entry: T) -> bool:
|
|
64
|
+
if super().__call__(entry):
|
|
65
|
+
return self.procedure(entry)
|
|
66
|
+
else:
|
|
67
|
+
return False
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class RelationFilterProcedure[T1: Entry, T2: Entry](RelationFilter):
|
|
72
|
+
""" Allows custom filtering on the application level in addition to source
|
|
73
|
+
"""
|
|
74
|
+
procedure: Callable[[Relation[T1, T2]], bool]
|
|
75
|
+
|
|
76
|
+
def __call__(self, relation: Relation[T1, T2]) -> bool:
|
|
77
|
+
if super().__call__(relation):
|
|
78
|
+
return self.procedure(relation)
|
|
79
|
+
else:
|
|
80
|
+
return False
|
|
81
|
+
|
|
82
|
+
def __init__(
|
|
83
|
+
self,
|
|
84
|
+
relation_type: type[Relation[T1, T2]],
|
|
85
|
+
filter_proc: Callable[[Relation[T1, T2]], bool]
|
|
86
|
+
):
|
|
87
|
+
super().__init__(relation_type)
|
|
88
|
+
self.procedure = filter_proc
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
""" Methods for producing a ranking on an entry graph.
|
|
2
|
+
"""
|
|
3
|
+
from collections import OrderedDict
|
|
4
|
+
from typing import Callable, Optional, TYPE_CHECKING
|
|
5
|
+
|
|
6
|
+
import rustworkx
|
|
7
|
+
|
|
8
|
+
from chemrecon.entrygraph.entrygraph import (
|
|
9
|
+
Edge, EntryGraph, SourceEdgeArtificial, SourceVertexArtificial, Vertex,
|
|
10
|
+
)
|
|
11
|
+
from chemrecon.schema import Entry, Relation
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from chemrecon.schema.procedural_relation_entrygraph import ProceduralRelationEG
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Scorer[T_rank: Entry]:
|
|
18
|
+
""" A scorer is a callable which takes an entrygraph and produces a ranking of the vertices according to
|
|
19
|
+
the parameters of the scorer.
|
|
20
|
+
|
|
21
|
+
The score of an entry is (informally) the probability that a random walk starting at one of the initial entries
|
|
22
|
+
of the entry graph will terminate at that entry.
|
|
23
|
+
The parameters of the random walk can be customized by specifying weights (probabilities) using a weight
|
|
24
|
+
function on entries and relations, which alters the probability of choosing a given path.
|
|
25
|
+
The default weight of all entries and relations is 1.
|
|
26
|
+
For example, if you do not trust a particular source, edges and vertices from that source can have their
|
|
27
|
+
weight reduced, making them count less in the scoring algorithm.
|
|
28
|
+
|
|
29
|
+
A damping factor, `alpha` can be specified.
|
|
30
|
+
With probability `1-alpha`, the random walk will choose to go to a random entry rather than continuing the walk.
|
|
31
|
+
Furthermore, a _decay_factor_ can be specified such that entries further away from the initial entries are given
|
|
32
|
+
lower scores.
|
|
33
|
+
A decay factor of `0` disables this adjustment. The default is `0.2`.
|
|
34
|
+
|
|
35
|
+
Scores are normalized such that the sum of scores is **1**, which allows comparing scores across entry graphs.
|
|
36
|
+
|
|
37
|
+
Formally, the scores are computed using the PageRank algorithm (https://en.wikipedia.org/wiki/PageRank),
|
|
38
|
+
starting from the initial vertices, and with dangling vertices pointing back to all initial vertices with
|
|
39
|
+
equal probability.
|
|
40
|
+
|
|
41
|
+
"""
|
|
42
|
+
score_entry_type: type[T_rank]
|
|
43
|
+
|
|
44
|
+
entry_weight: Callable[[Entry], float]
|
|
45
|
+
relation_weight: Callable[[Relation], float]
|
|
46
|
+
|
|
47
|
+
# Algorithm parameters
|
|
48
|
+
alpha: float
|
|
49
|
+
decay_factor: float
|
|
50
|
+
|
|
51
|
+
def __init__(
|
|
52
|
+
self,
|
|
53
|
+
score_entry_type: type[Entry],
|
|
54
|
+
alpha: float = 0.85,
|
|
55
|
+
decay_factor: float = 0.2,
|
|
56
|
+
entry_weight: Optional[Callable[[Entry], float]] = None,
|
|
57
|
+
relation_weight: Optional[Callable[[Relation], float]] = None,
|
|
58
|
+
):
|
|
59
|
+
""" Specify a scorer.
|
|
60
|
+
"""
|
|
61
|
+
self.alpha = alpha
|
|
62
|
+
self.decay_factor = decay_factor
|
|
63
|
+
|
|
64
|
+
self.score_entry_type = score_entry_type
|
|
65
|
+
|
|
66
|
+
if entry_weight is not None:
|
|
67
|
+
self.entry_weight = entry_weight
|
|
68
|
+
else:
|
|
69
|
+
self.entry_weight = lambda e: 1
|
|
70
|
+
|
|
71
|
+
if relation_weight is not None:
|
|
72
|
+
self.relation_weight = relation_weight
|
|
73
|
+
else:
|
|
74
|
+
self.relation_weight = lambda r: 1
|
|
75
|
+
|
|
76
|
+
def __call__(self, entrygraph: EntryGraph) -> OrderedDict[T_rank, float]:
|
|
77
|
+
""" Produces a ranking of the entries of the type `score_entry_type`.
|
|
78
|
+
The result is an `OrderedDict`, with entries given in descending order of score.
|
|
79
|
+
"""
|
|
80
|
+
from chemrecon.schema.procedural_relation_entrygraph import ProceduralRelationEG
|
|
81
|
+
|
|
82
|
+
g = entrygraph.g.copy()
|
|
83
|
+
|
|
84
|
+
# Add source vertex and connect to initial vertices
|
|
85
|
+
source_vertex_index = g.add_node(SourceVertexArtificial())
|
|
86
|
+
for init_v_index in entrygraph.initial_vertices:
|
|
87
|
+
g.add_edge(source_vertex_index, init_v_index, SourceEdgeArtificial())
|
|
88
|
+
|
|
89
|
+
# Get the weight for all edges
|
|
90
|
+
edge_weight_dict: dict[Edge, float] = dict()
|
|
91
|
+
for e_index in g.edge_indices():
|
|
92
|
+
e = g.get_edge_data_by_index(e_index)
|
|
93
|
+
source_index = g.get_edge_endpoints_by_index(e_index)[0]
|
|
94
|
+
if isinstance(e, Edge):
|
|
95
|
+
edge_weight_dict[e] = (
|
|
96
|
+
self.relation_weight(e.relation)
|
|
97
|
+
* self.entry_weight(g.get_node_data(source_index).entry)
|
|
98
|
+
)
|
|
99
|
+
else:
|
|
100
|
+
edge_weight_dict[e] = 1
|
|
101
|
+
|
|
102
|
+
# Modify edges by 'score' parameter of EG-based procedural relations
|
|
103
|
+
if isinstance(e, Edge):
|
|
104
|
+
if isinstance(e.relation, ProceduralRelationEG):
|
|
105
|
+
edge_weight_dict[e] *= e.relation.score
|
|
106
|
+
|
|
107
|
+
for e_index in g.edge_indices():
|
|
108
|
+
e = g.get_edge_data_by_index(e_index)
|
|
109
|
+
|
|
110
|
+
# Produce the initial score (ranking all vertices, not normalized)
|
|
111
|
+
init_scoring = rustworkx.pagerank(
|
|
112
|
+
g,
|
|
113
|
+
alpha = self.alpha,
|
|
114
|
+
weight_fn = lambda e_: edge_weight_dict.get(e_, 1),
|
|
115
|
+
dangling = {source_vertex_index: 1}, # Terminal nodes should loop back to source
|
|
116
|
+
personalization = {source_vertex_index: 1} # Random traversals go back to source
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
# Use only eligible entries (filter by relevant entry type)
|
|
120
|
+
scoring: dict[Vertex, float] = dict()
|
|
121
|
+
for v_idx, score in init_scoring.items():
|
|
122
|
+
v = g.get_node_data(v_idx)
|
|
123
|
+
if isinstance(v, Vertex) and isinstance(v.entry, self.score_entry_type):
|
|
124
|
+
scoring[v] = score
|
|
125
|
+
|
|
126
|
+
# Apply decay based on the generation
|
|
127
|
+
if self.decay_factor != 0:
|
|
128
|
+
for v, score in scoring.items():
|
|
129
|
+
v: Vertex
|
|
130
|
+
scoring[v] = score * ((1 - self.decay_factor)**v.generation)
|
|
131
|
+
|
|
132
|
+
# Normalise s.t. sum(scoring.values()) == 1.0
|
|
133
|
+
score_sum = sum(s for _, s in scoring.items())
|
|
134
|
+
for v, score in scoring.items():
|
|
135
|
+
scoring[v] = score / score_sum
|
|
136
|
+
|
|
137
|
+
# Finalise
|
|
138
|
+
return OrderedDict(
|
|
139
|
+
(k.entry, v) for k, v in
|
|
140
|
+
sorted(scoring.items(), key = lambda pair: pair[1], reverse = True)
|
|
141
|
+
)
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module contains functions useful for querying the database.
|
|
3
|
+
These functions are also available in the `chemrecon` scope.
|
|
4
|
+
"""
|
|
5
|
+
from chemrecon.query.find_entry import (
|
|
6
|
+
find_entry,
|
|
7
|
+
find_compound_entry, find_reaction_entry, find_enzyme_entry,
|
|
8
|
+
find_structure_representation_entry, find_aam_representation_entry,
|
|
9
|
+
find_structure_entry, find_aam_entry
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
# Relation getters
|
|
13
|
+
from chemrecon.query.get_relations import (
|
|
14
|
+
get_relations_from_entry,
|
|
15
|
+
get_all_relations,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
# from chemrecon.query.create_entry import (
|
|
19
|
+
# entry,
|
|
20
|
+
# compound_entry, reaction_entry, enzyme_entry,
|
|
21
|
+
# aam_representation_entry, structure_representation_entry,
|
|
22
|
+
# structure_entry, aam_entry,
|
|
23
|
+
# enzyme_from_ec_number,
|
|
24
|
+
# entry_from_identifiers_org
|
|
25
|
+
# )
|
|
26
|
+
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# """ Contains methods to create entries to look up in the databases.
|
|
2
|
+
# """
|
|
3
|
+
#
|
|
4
|
+
#
|
|
5
|
+
# from chemrecon.core.id_types import (
|
|
6
|
+
# IdentifierType, IdentifierTypeCompound, IdentifierTypeReaction, IdentifierTypeStructureRepresentation,
|
|
7
|
+
# IdentifierTypeEnzyme, E_EC, IdentifierTypeAAM
|
|
8
|
+
# )
|
|
9
|
+
# from chemrecon.schema import (
|
|
10
|
+
# Entry, Compound, Enzyme, Reaction, MolStructureRepr, MolStructure,
|
|
11
|
+
# AAMRepr, AAM
|
|
12
|
+
# )
|
|
13
|
+
#
|
|
14
|
+
#
|
|
15
|
+
# # Direct creation of prototype entries
|
|
16
|
+
# # ----------------------------------------------------------------------------------------------------------------------
|
|
17
|
+
# def entry(id_type: IdentifierType, source_id: str) -> Entry:
|
|
18
|
+
# """ Create a 'prototype' entry, which may or may not correspond to an actual entry in the database.
|
|
19
|
+
# """
|
|
20
|
+
# match id_type:
|
|
21
|
+
# case IdentifierTypeCompound():
|
|
22
|
+
# return compound_entry(id_type, source_id)
|
|
23
|
+
# case IdentifierTypeReaction():
|
|
24
|
+
# return reaction_entry(id_type, source_id)
|
|
25
|
+
# case IdentifierTypeEnzyme():
|
|
26
|
+
# return enzyme_entry(id_type, source_id)
|
|
27
|
+
# case IdentifierTypeStructureRepresentation():
|
|
28
|
+
# return structure_representation_entry(id_type, source_id)
|
|
29
|
+
# case IdentifierTypeAAM():
|
|
30
|
+
# return aam_representation_entry(id_type, source_id)
|
|
31
|
+
# case _:
|
|
32
|
+
# # Not implemented
|
|
33
|
+
# raise NotImplementedError()
|
|
34
|
+
#
|
|
35
|
+
# def compound_entry(id_type: IdentifierTypeCompound, source_id: str) -> Compound:
|
|
36
|
+
# """ TODO docs
|
|
37
|
+
# """
|
|
38
|
+
# return Compound(id_type = id_type, source_id = id_type.std_identifier(source_id))
|
|
39
|
+
#
|
|
40
|
+
# def reaction_entry(id_type: IdentifierTypeReaction, source_id: str) -> Reaction:
|
|
41
|
+
# """ TODO docs
|
|
42
|
+
# """
|
|
43
|
+
# return Reaction(id_type = id_type, source_id = id_type.std_identifier(source_id))
|
|
44
|
+
#
|
|
45
|
+
# def enzyme_entry(id_type: IdentifierTypeEnzyme, source_id: str) -> Enzyme:
|
|
46
|
+
# """ TODO docs
|
|
47
|
+
# """
|
|
48
|
+
# return Enzyme(id_type = id_type, source_id = id_type.std_identifier(source_id))
|
|
49
|
+
#
|
|
50
|
+
# def structure_representation_entry(id_type: IdentifierTypeStructureRepresentation, source_id: str) -> MolStructureRepr:
|
|
51
|
+
# """ TODO docs
|
|
52
|
+
# """
|
|
53
|
+
# return MolStructureRepr(id_type = id_type, source_id = id_type.std_identifier(source_id))
|
|
54
|
+
#
|
|
55
|
+
# def aam_representation_entry(id_type: IdentifierTypeAAM, source_id: str) -> AAMRepr:
|
|
56
|
+
# """ TODO docs
|
|
57
|
+
# """
|
|
58
|
+
# return AAMRepr(id_type = id_type, source_id = id_type.std_identifier(source_id))
|
|
59
|
+
#
|
|
60
|
+
# def structure_entry(smiles: str) -> MolStructure:
|
|
61
|
+
# """ TODO docs
|
|
62
|
+
# """
|
|
63
|
+
# return MolStructure(smiles = smiles)
|
|
64
|
+
#
|
|
65
|
+
# def aam_entry(reaction_smiles: str) -> AAM:
|
|
66
|
+
# """ TODO docs
|
|
67
|
+
# """
|
|
68
|
+
# return AAM(reaction_smiles = reaction_smiles)
|
|
69
|
+
#
|
|
70
|
+
#
|
|
71
|
+
# # Special for ec
|
|
72
|
+
# def enzyme_from_ec_number(ec_number: str) -> Enzyme:
|
|
73
|
+
# """ Create a prototype enzyme entry from an EC number.
|
|
74
|
+
# """
|
|
75
|
+
# return Enzyme(id_type = E_EC, source_id = E_EC.std_identifier(ec_number))
|
|
76
|
+
#
|
|
77
|
+
#
|
|
78
|
+
# # Creation from identifiers.org strings
|
|
79
|
+
# # ----------------------------------------------------------------------------------------------------------------------
|
|
80
|
+
# def entry_from_identifiers_org(identifiers_org_string: str) -> Entry:
|
|
81
|
+
# """ Create an entry from an identifiers.org string. If the string is not valid for any types in the database, raise
|
|
82
|
+
# ValueError.
|
|
83
|
+
# """
|
|
84
|
+
# # TODO
|
|
85
|
+
# # TODO raise ValueError if invalid string
|
|
86
|
+
# raise NotImplementedError()
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
""" ChemRecon comes with a set of pre-defined exploration protocols for various purposes.
|
|
2
|
+
These are located in the chemrecon.query.default_protocols module.
|
|
3
|
+
We recommend looking in this file for inspiration on how to define custom protocols.
|
|
4
|
+
"""
|
|
5
|
+
from src.chemrecon import Direction
|
|
6
|
+
from chemrecon.entrygraph.filter import EntryFilterProcedure
|
|
7
|
+
from chemrecon.entrygraph.explorationprotocol import ExplorationProtocol
|
|
8
|
+
from chemrecon.schema.entry_types.compound import Compound
|
|
9
|
+
from chemrecon.schema.entry_types.molstructure_repr import MolStructureRepr
|
|
10
|
+
from chemrecon.schema.entry_types.molstructure import MolStructure
|
|
11
|
+
from chemrecon.schema.relation_types_composed.compound_has_molstructure_relation import CompoundHasMolStructure
|
|
12
|
+
from chemrecon.schema.relation_types_source.compound_reference_relation import CompoundReference
|
|
13
|
+
from chemrecon.schema.relation_types_source.molstructure_standardisation_relation import MolStructureStandardization
|
|
14
|
+
|
|
15
|
+
# TODO filters
|
|
16
|
+
|
|
17
|
+
# General
|
|
18
|
+
# ----------------------------------------------------------------------------------------------------------------------
|
|
19
|
+
#: The Compound-Structure protocol can be used to quickly gain an overview of the structural information
|
|
20
|
+
#: relating to a given compound.
|
|
21
|
+
#: The database compounds are traversed via the `CompoundReference` relation in order to expand the graph to include
|
|
22
|
+
#: other databases which contain the compound.
|
|
23
|
+
#: The `CompoundHasMolStructure` relation is then used to find the associated structure for each compound.
|
|
24
|
+
#: The `MolStructureStandardization` relation is used to standardize various properties of the structures, which can be
|
|
25
|
+
#: helpful in case the databases simply disagree on easy-to-standardize properties, such as charge or tautomerism.
|
|
26
|
+
protocol_compound_structure = ExplorationProtocol(
|
|
27
|
+
relation_types = {
|
|
28
|
+
(CompoundReference, Direction.SYMMETRIC),
|
|
29
|
+
(CompoundHasMolStructure, Direction.FORWARDS),
|
|
30
|
+
(MolStructureStandardization, Direction.FORWARDS),
|
|
31
|
+
}
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
# Search graphs - for selection rather than manual inspection
|
|
35
|
+
# ----------------------------------------------------------------------------------------------------------------------
|
|
36
|
+
protocol_select_structure = ExplorationProtocol(
|
|
37
|
+
relation_types = {
|
|
38
|
+
(CompoundReference, Direction.SYMMETRIC),
|
|
39
|
+
(CompoundHasMolStructure, Direction.FORWARDS),
|
|
40
|
+
(MolStructureStandardization, Direction.FORWARDS),
|
|
41
|
+
},
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
# TODO - FILTER - disallow implicit Structures!
|
|
45
|
+
def _select_structure_entry_filter(e: MolStructureRepr) -> bool:
|
|
46
|
+
if e.implicit:
|
|
47
|
+
return False
|
|
48
|
+
else:
|
|
49
|
+
return True
|
|
50
|
+
|
|
51
|
+
struct_repr_filter = EntryFilterProcedure[MolStructureRepr](
|
|
52
|
+
filter_proc = _select_structure_entry_filter
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
# TODO include some ontology relations, such as the old_id and new_id relations
|
|
57
|
+
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
""" Functions for finding entries given an index.
|
|
2
|
+
"""
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
import chemrecon.connection as connection
|
|
6
|
+
|
|
7
|
+
from chemrecon import IdentifierType, Entry, IdentifierTypeCompound, IdentifierTypeReaction, IdentifierTypeEnzyme, \
|
|
8
|
+
IdentifierTypeStructureRepresentation, IdentifierTypeAAM
|
|
9
|
+
from chemrecon.schema.entry_types.aam import AAM
|
|
10
|
+
from chemrecon.schema.entry_types.aam_repr import AAMRepr
|
|
11
|
+
from chemrecon.schema.entry_types.compound import Compound
|
|
12
|
+
from chemrecon.schema.entry_types.enzyme import Enzyme
|
|
13
|
+
from chemrecon.schema.entry_types.reaction import Reaction
|
|
14
|
+
from chemrecon.schema.entry_types.molstructure_repr import MolStructureRepr
|
|
15
|
+
from chemrecon.schema.entry_types.molstructure import MolStructure
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def find_entry(id_type: IdentifierType, source_id: str) -> Optional[Entry]:
|
|
19
|
+
""" Look for an entry with the specified type and id in the connected Database.
|
|
20
|
+
If not found, returns None.
|
|
21
|
+
"""
|
|
22
|
+
match id_type:
|
|
23
|
+
case IdentifierTypeCompound():
|
|
24
|
+
return find_compound_entry(id_type, source_id)
|
|
25
|
+
case IdentifierTypeReaction():
|
|
26
|
+
return find_reaction_entry(id_type, source_id)
|
|
27
|
+
case IdentifierTypeEnzyme():
|
|
28
|
+
return find_enzyme_entry(id_type, source_id)
|
|
29
|
+
case IdentifierTypeStructureRepresentation():
|
|
30
|
+
return find_structure_representation_entry(id_type, source_id)
|
|
31
|
+
case IdentifierTypeAAM():
|
|
32
|
+
return find_aam_representation_entry(id_type, source_id)
|
|
33
|
+
case _:
|
|
34
|
+
# Not implemented
|
|
35
|
+
raise NotImplementedError()
|
|
36
|
+
|
|
37
|
+
def find_compound_entry(id_type: IdentifierTypeCompound, source_id: str) -> Optional[Compound]:
|
|
38
|
+
""" Look for an entry with the specified type and id in the connected Database.
|
|
39
|
+
If not found, returns None.
|
|
40
|
+
"""
|
|
41
|
+
e = Compound(id_type = id_type.enum_type, source_id = id_type.std_identifier(source_id))
|
|
42
|
+
return connection.handler.get_entry_by_index(e)
|
|
43
|
+
|
|
44
|
+
def find_reaction_entry(id_type: IdentifierTypeReaction, source_id: str) -> Optional[Reaction]:
|
|
45
|
+
""" Look for an entry with the specified type and id in the connected Database.
|
|
46
|
+
If not found, returns None.
|
|
47
|
+
"""
|
|
48
|
+
e = Reaction(id_type = id_type.enum_type, source_id = id_type.std_identifier(source_id))
|
|
49
|
+
return connection.handler.get_entry_by_index(e)
|
|
50
|
+
|
|
51
|
+
def find_enzyme_entry(id_type: IdentifierTypeEnzyme, source_id: str) -> Optional[Enzyme]:
|
|
52
|
+
""" Look for an entry with the specified type and id in the connected Database.
|
|
53
|
+
If not found, returns None.
|
|
54
|
+
"""
|
|
55
|
+
e = Enzyme(id_type = id_type.enum_type, source_id = id_type.std_identifier(source_id))
|
|
56
|
+
return connection.handler.get_entry_by_index(e)
|
|
57
|
+
|
|
58
|
+
def find_structure_representation_entry(id_type: IdentifierTypeStructureRepresentation, source_id: str) -> Optional[MolStructureRepr]:
|
|
59
|
+
""" Look for an entry with the specified type and id in the connected Database.
|
|
60
|
+
If not found, returns None.
|
|
61
|
+
"""
|
|
62
|
+
e = MolStructureRepr(id_type = id_type.enum_type, source_id = id_type.std_identifier(source_id), implicit = False)
|
|
63
|
+
return connection.handler.get_entry_by_index(e)
|
|
64
|
+
|
|
65
|
+
def find_aam_representation_entry(id_type: IdentifierTypeAAM, source_id: str) -> Optional[AAMRepr]:
|
|
66
|
+
""" Look for an entry with the specified type and id in the connected Database.
|
|
67
|
+
If not found, returns None.
|
|
68
|
+
"""
|
|
69
|
+
e = AAMRepr(id_type = id_type.enum_type, source_id = id_type.std_identifier(source_id))
|
|
70
|
+
return connection.handler.get_entry_by_index(e)
|
|
71
|
+
|
|
72
|
+
def find_structure_entry(smiles: str) -> Optional[MolStructure]:
|
|
73
|
+
""" Look for an entry with the specified type and id in the connected Database.
|
|
74
|
+
If not found, returns None.
|
|
75
|
+
"""
|
|
76
|
+
e = MolStructure(smiles = smiles)
|
|
77
|
+
return connection.handler.get_entry_by_index(e)
|
|
78
|
+
|
|
79
|
+
def find_aam_entry(reaction_smiles: str) -> Optional[AAM]:
|
|
80
|
+
""" Look for an entry with the specified type and id in the connected Database.
|
|
81
|
+
If not found, returns None.
|
|
82
|
+
"""
|
|
83
|
+
e = AAM(reaction_smiles = reaction_smiles)
|
|
84
|
+
return connection.handler.get_entry_by_index(e)
|