chemrecon 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chemrecon/__init__.py +73 -0
- chemrecon/chem/__init__.py +0 -0
- chemrecon/chem/chemreaction.py +223 -0
- chemrecon/chem/constant_compounds.py +3 -0
- chemrecon/chem/create_mol.py +91 -0
- chemrecon/chem/elements.py +141 -0
- chemrecon/chem/gml/__init__.py +0 -0
- chemrecon/chem/gml/gml.py +324 -0
- chemrecon/chem/gml/gml_reactant_matching.py +130 -0
- chemrecon/chem/gml/gml_to_rdk.py +217 -0
- chemrecon/chem/mol.py +483 -0
- chemrecon/chem/sumformula.py +120 -0
- chemrecon/connection.py +97 -0
- chemrecon/core/__init__.py +0 -0
- chemrecon/core/id_types.py +687 -0
- chemrecon/core/ontology.py +209 -0
- chemrecon/core/populate_query_handler.py +336 -0
- chemrecon/core/query_handler.py +587 -0
- chemrecon/database/__init__.py +1 -0
- chemrecon/database/connect.py +63 -0
- chemrecon/database/connection_params/chemrecon_pub.dbinfo +5 -0
- chemrecon/database/connection_params/local_docker_dev.dbinfo +5 -0
- chemrecon/database/connection_params/local_docker_init.dbinfo +5 -0
- chemrecon/database/connection_params/local_docker_pub.dbinfo +5 -0
- chemrecon/database/params.py +88 -0
- chemrecon/entrygraph/draw.py +119 -0
- chemrecon/entrygraph/entrygraph.py +301 -0
- chemrecon/entrygraph/explorationprotocol.py +199 -0
- chemrecon/entrygraph/explore.py +421 -0
- chemrecon/entrygraph/explore_procedure.py +183 -0
- chemrecon/entrygraph/filter.py +88 -0
- chemrecon/entrygraph/scoring.py +141 -0
- chemrecon/query/__init__.py +26 -0
- chemrecon/query/create_entry.py +86 -0
- chemrecon/query/default_protocols.py +57 -0
- chemrecon/query/find_entry.py +84 -0
- chemrecon/query/get_relations.py +143 -0
- chemrecon/query/get_structures_from_compound.py +65 -0
- chemrecon/schema/__init__.py +86 -0
- chemrecon/schema/db_object.py +363 -0
- chemrecon/schema/direction.py +10 -0
- chemrecon/schema/entry_types/__init__.py +0 -0
- chemrecon/schema/entry_types/aam.py +34 -0
- chemrecon/schema/entry_types/aam_repr.py +37 -0
- chemrecon/schema/entry_types/compound.py +52 -0
- chemrecon/schema/entry_types/enzyme.py +49 -0
- chemrecon/schema/entry_types/molstructure.py +64 -0
- chemrecon/schema/entry_types/molstructure_repr.py +41 -0
- chemrecon/schema/entry_types/reaction.py +57 -0
- chemrecon/schema/enums.py +154 -0
- chemrecon/schema/procedural_relation_entrygraph.py +66 -0
- chemrecon/schema/relation_types_composed/__init__.py +0 -0
- chemrecon/schema/relation_types_composed/compound_has_molstructure_relation.py +59 -0
- chemrecon/schema/relation_types_composed/reaction_has_aam_relation.py +50 -0
- chemrecon/schema/relation_types_procedural/__init__.py +0 -0
- chemrecon/schema/relation_types_procedural/aam_convert_relation.py +69 -0
- chemrecon/schema/relation_types_procedural/compound_select_structure_proceduralrelation.py +36 -0
- chemrecon/schema/relation_types_procedural/compound_similarlity_proceduralrelation.py +1 -0
- chemrecon/schema/relation_types_procedural/molstructure_convert_relation.py +49 -0
- chemrecon/schema/relation_types_procedural/reaction_select_aam_proceduralrelation.py +38 -0
- chemrecon/schema/relation_types_procedural/reaction_similarity_proceduralrelation.py +1 -0
- chemrecon/schema/relation_types_source/__init__.py +0 -0
- chemrecon/schema/relation_types_source/aam_involves_molstructure_relation.py +77 -0
- chemrecon/schema/relation_types_source/aam_repr_involves_molstructure_repr_relation.py +79 -0
- chemrecon/schema/relation_types_source/compound_has_structure_representation_relation.py +33 -0
- chemrecon/schema/relation_types_source/compound_reference_relation.py +34 -0
- chemrecon/schema/relation_types_source/molstructure_standardisation_relation.py +71 -0
- chemrecon/schema/relation_types_source/ontology/__init__.py +0 -0
- chemrecon/schema/relation_types_source/ontology/compound_ontology.py +369 -0
- chemrecon/schema/relation_types_source/ontology/enzyme_ontology.py +142 -0
- chemrecon/schema/relation_types_source/ontology/reaction_ontology.py +140 -0
- chemrecon/schema/relation_types_source/reaction_has_aam_representation_relation.py +34 -0
- chemrecon/schema/relation_types_source/reaction_has_enzyme_relation.py +71 -0
- chemrecon/schema/relation_types_source/reaction_involves_compound_relation.py +69 -0
- chemrecon/schema/relation_types_source/reaction_reference_relation.py +33 -0
- chemrecon/scripts/initialize_database.py +494 -0
- chemrecon/utils/copy_signature.py +10 -0
- chemrecon/utils/encodeable_list.py +11 -0
- chemrecon/utils/get_id_type.py +70 -0
- chemrecon/utils/hungarian.py +31 -0
- chemrecon/utils/reactant_matching.py +168 -0
- chemrecon/utils/rxnutils.py +44 -0
- chemrecon/utils/set_cwd.py +12 -0
- chemrecon-0.1.1.dist-info/METADATA +143 -0
- chemrecon-0.1.1.dist-info/RECORD +86 -0
- chemrecon-0.1.1.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
""" Protocols define how to explore an entrygraph.
|
|
2
|
+
"""
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
from typing import Optional, Callable
|
|
7
|
+
|
|
8
|
+
from chemrecon.entrygraph.explore_procedure import (
|
|
9
|
+
ExploreProcedure, ExploreProcedureT1, ExploreProcedureT2,
|
|
10
|
+
ExploreProcedureSym,
|
|
11
|
+
)
|
|
12
|
+
from chemrecon.entrygraph.filter import EntryFilter, RelationFilter, EntryFilterProcedure, RelationFilterProcedure
|
|
13
|
+
from chemrecon.schema import Entry, ProceduralRelation, Relation, ComposedRelation
|
|
14
|
+
from chemrecon.schema.direction import Direction
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# Protocol specification
|
|
18
|
+
class ExplorationProtocol():
|
|
19
|
+
"""
|
|
20
|
+
Represents a protocol for exploring relationships and entries within a graph structure.
|
|
21
|
+
|
|
22
|
+
This class is designed to define and manage the exploration of entries and relationships in a graph-like
|
|
23
|
+
data model. Users can specify sets of entry and relationship types, as well as define filters and
|
|
24
|
+
procedures for exploration.
|
|
25
|
+
"""
|
|
26
|
+
relation_types: set[tuple[type[Relation], Direction]] #: List of relation types to traverse.
|
|
27
|
+
entry_types: set[type[Entry]] #: List of entry types involved in the specified relations
|
|
28
|
+
relation_types_terminal: set[tuple[type[Relation], Direction]] # Will not be explored, only added.
|
|
29
|
+
entry_filters: dict[type[Entry], EntryFilter] #: Optional filters for each type of entry.
|
|
30
|
+
relation_filters: dict[type[Relation], RelationFilter] #: Ditto, for relations.
|
|
31
|
+
|
|
32
|
+
# Explore procedures
|
|
33
|
+
explore_procedures: list[ExploreProcedure]
|
|
34
|
+
post_explore_procedures: list[ExploreProcedure]
|
|
35
|
+
transitive_subprocedures: dict[type[Relation], tuple[ExploreProcedure, ExploreProcedure]]
|
|
36
|
+
|
|
37
|
+
def __init__(
|
|
38
|
+
self,
|
|
39
|
+
relation_types: set[type[Relation] | tuple[type[Relation], Direction]],
|
|
40
|
+
relation_types_terminal: Optional[set[type[Relation]]] = None,
|
|
41
|
+
entry_filters: Optional[dict[type[Entry], Callable[[Entry], bool]]] = None,
|
|
42
|
+
relation_filters: Optional[dict[type[Relation], Callable[[Relation], bool]]] = None,
|
|
43
|
+
):
|
|
44
|
+
""" Specify an exploration protocol.
|
|
45
|
+
|
|
46
|
+
:param relation_types: A set of relation typess to explore. Optionally, (Relation, Direction) tuples can be
|
|
47
|
+
passed to specify in which direction each relation can be traversed. By default, only the forwards
|
|
48
|
+
direction is traversed. For symmetric relations, only the Direction.SYMMETRIC value is allowed.
|
|
49
|
+
:type relation_types: set[type[Relation] | tuple[type[Relation], Direction]],
|
|
50
|
+
:param relation_types_terminal: A set of relation types which are not used for expanding the graph, but are
|
|
51
|
+
added if both endpoints were already found. Here, directionality is not specified.
|
|
52
|
+
:type relation_types_terminal: Optional[set[type[Relation] | tuple[type[Relation], Direction]]]
|
|
53
|
+
:param entry_filters: An optional dictionary of filters for each entry type. Filters should be a function which
|
|
54
|
+
accepts an entry, and returns False if the entry should not be included by the protocol.
|
|
55
|
+
:type entry_filters: Optional[dict[type[Entry], Callable[[Entry], bool]]]
|
|
56
|
+
:param relation_filters: Ditto, for relations.
|
|
57
|
+
:type relation_filters: Optional[dict[type[Relation], Callable[[Relation], bool]]]
|
|
58
|
+
"""
|
|
59
|
+
# Process relation types (assign forward/symmetric direction by default)
|
|
60
|
+
self.relation_types = set()
|
|
61
|
+
for item in relation_types:
|
|
62
|
+
match item:
|
|
63
|
+
case (reltype, direction):
|
|
64
|
+
# Given with direction
|
|
65
|
+
self.relation_types.add((reltype, direction))
|
|
66
|
+
case reltype:
|
|
67
|
+
# Set forwards/symmetric direction by default
|
|
68
|
+
if reltype.symmetric:
|
|
69
|
+
self.relation_types.add((reltype, Direction.SYMMETRIC))
|
|
70
|
+
else:
|
|
71
|
+
self.relation_types.add((reltype, Direction.FORWARDS))
|
|
72
|
+
|
|
73
|
+
self.relation_types_terminal = set()
|
|
74
|
+
if relation_types_terminal is not None:
|
|
75
|
+
for reltype in relation_types_terminal:
|
|
76
|
+
# Set forwards/symmetric direction by default
|
|
77
|
+
if reltype.symmetric:
|
|
78
|
+
self.relation_types_terminal.add((reltype, Direction.SYMMETRIC))
|
|
79
|
+
else:
|
|
80
|
+
self.relation_types_terminal.add((reltype, Direction.BOTH))
|
|
81
|
+
|
|
82
|
+
# Process filters
|
|
83
|
+
self.entry_filters = dict()
|
|
84
|
+
self.relation_filters = dict()
|
|
85
|
+
if entry_filters is not None:
|
|
86
|
+
for e_type, e_filter in entry_filters.items():
|
|
87
|
+
self.entry_filters[e_type] = EntryFilterProcedure(
|
|
88
|
+
filter_proc = e_filter
|
|
89
|
+
)
|
|
90
|
+
if relation_filters is not None:
|
|
91
|
+
for r_type, r_filter in relation_filters.items():
|
|
92
|
+
self.relation_filters[r_type] = RelationFilterProcedure(
|
|
93
|
+
relation_type = r_type,
|
|
94
|
+
filter_proc = r_filter
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
self.entry_filters = (entry_filters) if entry_filters else dict()
|
|
98
|
+
self.relation_filters: dict[type[Relation], Callable[[Relation], bool]] \
|
|
99
|
+
= relation_filters if relation_filters else dict()
|
|
100
|
+
|
|
101
|
+
# Set entry types based on the relations
|
|
102
|
+
self.entry_types = set()
|
|
103
|
+
for r, direction in self.relation_types:
|
|
104
|
+
self.entry_types.add(r.source_entrytype)
|
|
105
|
+
self.entry_types.add(r.target_entrytype)
|
|
106
|
+
|
|
107
|
+
# List of finished ExplorationProcedure objects to call when exploring
|
|
108
|
+
self.explore_procedures: list[ExploreProcedure] = list()
|
|
109
|
+
self.post_explore_procedures: list[ExploreProcedure] = list()
|
|
110
|
+
self.transitive_subprocedures: dict[type[Relation], tuple[ExploreProcedure, ExploreProcedure]] = dict()
|
|
111
|
+
|
|
112
|
+
# Sanity checks of the given input
|
|
113
|
+
for rtype, direction in self.relation_types:
|
|
114
|
+
if rtype.source_entrytype not in self.entry_types:
|
|
115
|
+
raise ValueError(f'Relation {rtype} refers to entry types not allowed in the graph.')
|
|
116
|
+
if rtype.target_entrytype not in self.entry_types:
|
|
117
|
+
raise ValueError(f'Relation {rtype} refers to entry types not allowed in the graph.')
|
|
118
|
+
|
|
119
|
+
if len(self.relation_types_terminal.intersection(self.relation_types)) > 0:
|
|
120
|
+
raise ValueError('Terminal relation types must not intersect relation types.')
|
|
121
|
+
|
|
122
|
+
# Add 'None' for filters when not specified
|
|
123
|
+
for entrytype in (
|
|
124
|
+
self.entry_types.difference(self.entry_filters.keys())
|
|
125
|
+
):
|
|
126
|
+
self.entry_filters[entrytype] = None
|
|
127
|
+
for reltype in ({rel for rel, _ in self.relation_types}
|
|
128
|
+
.union({rel for rel, _ in self.relation_types_terminal})
|
|
129
|
+
.difference(self.relation_filters.keys())
|
|
130
|
+
):
|
|
131
|
+
self.relation_filters[reltype] = None
|
|
132
|
+
|
|
133
|
+
# Establish a series of exploration procedures
|
|
134
|
+
# TODO accept no arguments, and assume (forwards) or (symmetric)
|
|
135
|
+
|
|
136
|
+
for given_rels, proc_list in [
|
|
137
|
+
[self.relation_types, self.explore_procedures],
|
|
138
|
+
[self.relation_types_terminal, self.post_explore_procedures]
|
|
139
|
+
]:
|
|
140
|
+
for rel_type, direction in given_rels:
|
|
141
|
+
rel_type: type[Relation]
|
|
142
|
+
if issubclass(rel_type, ProceduralRelation):
|
|
143
|
+
if direction not in {Direction.FORWARDS, Direction.SYMMETRIC}:
|
|
144
|
+
raise ValueError('Procedural relations can only be explored forwards.')
|
|
145
|
+
|
|
146
|
+
match direction:
|
|
147
|
+
case Direction.FORWARDS:
|
|
148
|
+
# Forwards, explore from T1
|
|
149
|
+
assert not rel_type.symmetric
|
|
150
|
+
proc_list.append(ExploreProcedureT1(
|
|
151
|
+
relationtype = rel_type,
|
|
152
|
+
relation_filter = self.relation_filters[rel_type],
|
|
153
|
+
entry_filter = self.entry_filters[rel_type.target_entrytype]
|
|
154
|
+
))
|
|
155
|
+
case Direction.BACKWARDS:
|
|
156
|
+
# Backwards, explore from T2
|
|
157
|
+
assert not rel_type.symmetric
|
|
158
|
+
proc_list.append(ExploreProcedureT2(
|
|
159
|
+
relationtype = rel_type,
|
|
160
|
+
relation_filter = self.relation_filters[rel_type],
|
|
161
|
+
entry_filter = self.entry_filters[rel_type.source_entrytype]
|
|
162
|
+
))
|
|
163
|
+
case Direction.BOTH:
|
|
164
|
+
# Both
|
|
165
|
+
assert not rel_type.symmetric
|
|
166
|
+
proc_list.append(ExploreProcedureT1(
|
|
167
|
+
relationtype = rel_type,
|
|
168
|
+
relation_filter = self.relation_filters[rel_type],
|
|
169
|
+
entry_filter = self.entry_filters[rel_type.target_entrytype]
|
|
170
|
+
))
|
|
171
|
+
proc_list.append(ExploreProcedureT2(
|
|
172
|
+
relationtype = rel_type,
|
|
173
|
+
relation_filter = self.relation_filters[rel_type],
|
|
174
|
+
entry_filter = self.entry_filters[rel_type.source_entrytype]
|
|
175
|
+
))
|
|
176
|
+
case Direction.SYMMETRIC:
|
|
177
|
+
# Symmetric
|
|
178
|
+
assert rel_type.symmetric
|
|
179
|
+
proc_list.append(ExploreProcedureSym(
|
|
180
|
+
relationtype = rel_type,
|
|
181
|
+
relation_filter = self.relation_filters[rel_type],
|
|
182
|
+
entry_filter = self.entry_filters[rel_type.target_entrytype]
|
|
183
|
+
))
|
|
184
|
+
|
|
185
|
+
# If transitive, add sub procedures
|
|
186
|
+
if issubclass(rel_type, ComposedRelation):
|
|
187
|
+
self.transitive_subprocedures[rel_type] = (
|
|
188
|
+
ExploreProcedureT1(
|
|
189
|
+
relationtype = rel_type.rel_type_1,
|
|
190
|
+
relation_filter = None, entry_filter = None
|
|
191
|
+
),
|
|
192
|
+
ExploreProcedureT1(
|
|
193
|
+
relationtype = rel_type.rel_type_2,
|
|
194
|
+
relation_filter = None, entry_filter = None
|
|
195
|
+
)
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
# done
|
|
199
|
+
pass
|
|
@@ -0,0 +1,421 @@
|
|
|
1
|
+
""" Method for creating an explored Entry Graph based on the database.
|
|
2
|
+
"""
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
import psycopg as pg
|
|
6
|
+
|
|
7
|
+
import chemrecon.connection as connection
|
|
8
|
+
|
|
9
|
+
from chemrecon.schema import Entry, Relation, ComposedRelation, ProceduralRelation, ProceduralGeneratorError
|
|
10
|
+
|
|
11
|
+
from chemrecon.entrygraph.entrygraph import EntryGraph, ReconID, VertexIndex
|
|
12
|
+
from chemrecon.entrygraph.explore_procedure import (
|
|
13
|
+
ExploreProcedure, ExploreProcedureT1, ExploreProcedureT2, ExploreProcedureSym
|
|
14
|
+
)
|
|
15
|
+
from chemrecon.entrygraph.explorationprotocol import ExplorationProtocol
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def explore(
|
|
19
|
+
entrygraph: EntryGraph,
|
|
20
|
+
protocol: ExplorationProtocol,
|
|
21
|
+
steps: int = 4
|
|
22
|
+
):
|
|
23
|
+
""" Expand the given entry graph by traversing the database network using the specified protocol for a given
|
|
24
|
+
number of steps.
|
|
25
|
+
"""
|
|
26
|
+
Explorer(entrygraph, protocol, steps).explore()
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class Explorer:
|
|
30
|
+
entrygraph: EntryGraph
|
|
31
|
+
generations: Optional[int] = None
|
|
32
|
+
protocol: ExplorationProtocol
|
|
33
|
+
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
entrygraph: EntryGraph,
|
|
37
|
+
protocol: ExplorationProtocol,
|
|
38
|
+
generations: int = None
|
|
39
|
+
):
|
|
40
|
+
self.entrygraph = entrygraph
|
|
41
|
+
self.generations = generations
|
|
42
|
+
self.protocol = protocol
|
|
43
|
+
|
|
44
|
+
# Generate cursors for each procedure
|
|
45
|
+
for proc in {
|
|
46
|
+
*protocol.explore_procedures,
|
|
47
|
+
*protocol.post_explore_procedures,
|
|
48
|
+
*[p for p, _ in protocol.transitive_subprocedures.values()],
|
|
49
|
+
*[p for _, p in protocol.transitive_subprocedures.values()]
|
|
50
|
+
}:
|
|
51
|
+
proc: ExploreProcedure
|
|
52
|
+
proc.cursor = pg.Cursor(
|
|
53
|
+
connection = connection.handler.conn,
|
|
54
|
+
row_factory = connection.handler.make_relation_entry_view_row_factory(proc.relation_type)
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
# Main exploration alg.
|
|
58
|
+
# --------------------------------------------------------------------------------------------------------------
|
|
59
|
+
def explore(self) -> None:
|
|
60
|
+
""" Explores the graph based on the database connection.
|
|
61
|
+
After execution, the graph will be fully populated.
|
|
62
|
+
"""
|
|
63
|
+
import chemrecon.core.populate_query_handler
|
|
64
|
+
|
|
65
|
+
# Sets of already explored vertices (updated at end of every iteration)
|
|
66
|
+
vs_new: set[VertexIndex] = set()
|
|
67
|
+
vs_explored: set[VertexIndex] = set()
|
|
68
|
+
|
|
69
|
+
# Add initial vertices
|
|
70
|
+
for v_idx in self.entrygraph.initial_vertices:
|
|
71
|
+
vs_new.add(v_idx)
|
|
72
|
+
|
|
73
|
+
# Iteratively explore
|
|
74
|
+
i: int = 0 # Generation
|
|
75
|
+
while True:
|
|
76
|
+
# Main iteration
|
|
77
|
+
i += 1
|
|
78
|
+
|
|
79
|
+
# Compute new set of vertices to explore
|
|
80
|
+
to_explore: dict[type[Entry], set[tuple[VertexIndex, ReconID]]] = {
|
|
81
|
+
etype: set() for etype in self.protocol.entry_types
|
|
82
|
+
}
|
|
83
|
+
for v_idx in vs_new:
|
|
84
|
+
v = self.entrygraph.get_vertex_by_vertex_index(v_idx)
|
|
85
|
+
if v is None:
|
|
86
|
+
assert False, 'unreachable'
|
|
87
|
+
to_explore[type(v.entry)].add((v_idx, v.recon_id))
|
|
88
|
+
|
|
89
|
+
vs_explored.update(vs_new)
|
|
90
|
+
|
|
91
|
+
# For each type of relation, call the explore procedure, and sort out entries according to the filter
|
|
92
|
+
for explr_proc in self.protocol.explore_procedures:
|
|
93
|
+
|
|
94
|
+
# If past generation limit, explore only relations which explicitly ignore this
|
|
95
|
+
if i > self.generations:
|
|
96
|
+
if not explr_proc.relation_type.ignore_generation_limit:
|
|
97
|
+
continue
|
|
98
|
+
|
|
99
|
+
# Run procedure, producing relations and entry endpoints
|
|
100
|
+
input_entries = list(to_explore[explr_proc.takes_entrytype])
|
|
101
|
+
if len(input_entries) > 0:
|
|
102
|
+
try:
|
|
103
|
+
res = self._run_procedure(
|
|
104
|
+
explr_proc,
|
|
105
|
+
[self.entrygraph.g.get_node_data(v).entry for v, _ in input_entries]
|
|
106
|
+
)
|
|
107
|
+
if res is None:
|
|
108
|
+
# Should not happen (?)
|
|
109
|
+
continue
|
|
110
|
+
except ProceduralGeneratorError:
|
|
111
|
+
# Could not generate, so do not add
|
|
112
|
+
continue
|
|
113
|
+
|
|
114
|
+
# res: list of (t_takes, relation[t_takes, t_gives], t_gives)
|
|
115
|
+
for (takes_vertex_index, takes_recon_id), subresult in zip(input_entries, res):
|
|
116
|
+
# TODO error here, sym relations continually gives the e_takes instead of the e_gives
|
|
117
|
+
for rel, e_gives in subresult:
|
|
118
|
+
|
|
119
|
+
# Filter
|
|
120
|
+
if explr_proc.relation_filter is not None:
|
|
121
|
+
if not explr_proc.relation_filter(rel):
|
|
122
|
+
continue
|
|
123
|
+
if explr_proc.entry_filter is not None:
|
|
124
|
+
if not explr_proc.entry_filter(e_gives):
|
|
125
|
+
continue
|
|
126
|
+
|
|
127
|
+
# Add relation to entrygraph
|
|
128
|
+
v_new_idx = self.entrygraph.add_vertex_from(
|
|
129
|
+
from_index = takes_vertex_index,
|
|
130
|
+
relation = rel,
|
|
131
|
+
entry = e_gives,
|
|
132
|
+
generation = i
|
|
133
|
+
)
|
|
134
|
+
vs_new.add(v_new_idx) # Add new vertices to explore
|
|
135
|
+
|
|
136
|
+
# End of iteration, update lists
|
|
137
|
+
vs_new = vs_new - vs_explored
|
|
138
|
+
if len(vs_new) == 0:
|
|
139
|
+
break
|
|
140
|
+
|
|
141
|
+
# Explore terminal relation (does not lead to any new vertices to add)
|
|
142
|
+
# TODO should not add new vertices, only relations between vertices already in the graph
|
|
143
|
+
# TODO !important
|
|
144
|
+
for explr_proc in self.protocol.post_explore_procedures:
|
|
145
|
+
input_entries: list[tuple[VertexIndex, ReconID]] = [
|
|
146
|
+
(v.vertex_index, v.recon_id)
|
|
147
|
+
for v in self.entrygraph.g.nodes()
|
|
148
|
+
if type(v.entry) is explr_proc.takes_entrytype
|
|
149
|
+
]
|
|
150
|
+
if len(input_entries) > 0:
|
|
151
|
+
res = self._run_procedure(
|
|
152
|
+
explr_proc,
|
|
153
|
+
[self.entrygraph.g.get_node_data(v).entry for v, _ in input_entries]
|
|
154
|
+
)
|
|
155
|
+
for (takes_vertex_index, takes_recon_id), subresult in zip(input_entries, res):
|
|
156
|
+
for rel, e_gives in subresult:
|
|
157
|
+
# Add relation to entrygraph only if exists
|
|
158
|
+
lookup = self.entrygraph.get_vertex_by_entry(e_gives)
|
|
159
|
+
if lookup is not None:
|
|
160
|
+
self.entrygraph.add_edge(
|
|
161
|
+
source_v_index = takes_vertex_index,
|
|
162
|
+
target_v_index = lookup,
|
|
163
|
+
relation = rel
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
# TODO add setting to create new entries from terminal relations?
|
|
167
|
+
# Old, added new entries unnecessarily
|
|
168
|
+
# v_new_idx = self.entrygraph.add_vertex_from(
|
|
169
|
+
# from_index = takes_vertex_index,
|
|
170
|
+
# relation = rel,
|
|
171
|
+
# entry = e_gives,
|
|
172
|
+
# generation = -1
|
|
173
|
+
# )
|
|
174
|
+
|
|
175
|
+
# Commit changes made if handler can populate the backend
|
|
176
|
+
if isinstance(connection.handler, chemrecon.core.populate_query_handler.PopulateQueryHandler):
|
|
177
|
+
connection.handler.conn.commit()
|
|
178
|
+
|
|
179
|
+
# Done, finalise
|
|
180
|
+
pass
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
# Dispatchers for individual procedures
|
|
184
|
+
# ---------------------------------------------------------------------------------------------------------------
|
|
185
|
+
def _run_procedure[T_takes: Entry, T_gives: Entry](
|
|
186
|
+
self,
|
|
187
|
+
proc: ExploreProcedure[T_takes, T_gives],
|
|
188
|
+
take_entries: list[T_takes]
|
|
189
|
+
) -> list[list[tuple[Relation[T_takes, T_gives], T_gives]]]:
|
|
190
|
+
if len(take_entries) == 0:
|
|
191
|
+
return []
|
|
192
|
+
# Dispatch depending on whether procedural or database-backed relation
|
|
193
|
+
if issubclass(proc.relation_type, ComposedRelation):
|
|
194
|
+
return self._run_procedure_transitive(proc, take_entries)
|
|
195
|
+
elif issubclass(proc.relation_type, ProceduralRelation):
|
|
196
|
+
return self._run_procedure_procedural(proc, take_entries)
|
|
197
|
+
else:
|
|
198
|
+
return self._run_procedure_database(proc, take_entries)
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _run_procedure_database[T_takes: Entry, T_gives: Entry](
|
|
202
|
+
self,
|
|
203
|
+
proc: ExploreProcedure[T_takes, T_gives],
|
|
204
|
+
take_entries: list[T_takes],
|
|
205
|
+
) -> list[list[tuple[Relation[T_takes, T_gives], T_gives]]]:
|
|
206
|
+
# The result of the procedure relies only on information in the database
|
|
207
|
+
proc.cursor.executemany(
|
|
208
|
+
query = proc.q,
|
|
209
|
+
params_seq = [[e.recon_id] * proc.n_params for e in take_entries],
|
|
210
|
+
returning = True
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
# Fetch results and return
|
|
214
|
+
result: list[list[tuple[Relation, Entry, Entry]]] = [proc.cursor.fetchall()]
|
|
215
|
+
while proc.cursor.nextset():
|
|
216
|
+
result.append(proc.cursor.fetchall())
|
|
217
|
+
|
|
218
|
+
# How to interpret and add depends on T1, T2, or sym relation
|
|
219
|
+
match proc:
|
|
220
|
+
case ExploreProcedureT1():
|
|
221
|
+
# Result is relation and T2
|
|
222
|
+
return [
|
|
223
|
+
[(r, e2) for r, e1, e2 in subresult]
|
|
224
|
+
for subresult in result
|
|
225
|
+
]
|
|
226
|
+
case ExploreProcedureT2():
|
|
227
|
+
# Result is relation and T1
|
|
228
|
+
return [
|
|
229
|
+
[(r, e1) for r, e1, e2 in subresult]
|
|
230
|
+
for subresult in result
|
|
231
|
+
]
|
|
232
|
+
case ExploreProcedureSym():
|
|
233
|
+
# Result entry is T1 or T2, depending on which is different from the input
|
|
234
|
+
return [
|
|
235
|
+
[
|
|
236
|
+
(r, e2) if e1.recon_id == takes_recon_id else (r, e1)
|
|
237
|
+
for r, e1, e2 in subresult
|
|
238
|
+
]
|
|
239
|
+
for takes_recon_id, subresult in zip((
|
|
240
|
+
e.recon_id for e in take_entries
|
|
241
|
+
), result)
|
|
242
|
+
]
|
|
243
|
+
case _:
|
|
244
|
+
assert False, 'unreachable'
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def _run_procedure_procedural[T_takes: Entry, T_gives: Entry](
|
|
248
|
+
self,
|
|
249
|
+
proc: ExploreProcedure[T_takes, T_gives],
|
|
250
|
+
take_entries: list[T_takes],
|
|
251
|
+
) -> list[list[tuple[Relation[T_takes, T_gives], T_gives]]]:
|
|
252
|
+
import chemrecon.core.populate_query_handler
|
|
253
|
+
|
|
254
|
+
# TODO handle catching the ProceduralGeneratorError()!
|
|
255
|
+
|
|
256
|
+
# Run the generate() method of the relation, and possibly update the DB
|
|
257
|
+
# if the handler supports insertion
|
|
258
|
+
assert issubclass(proc.relation_type, ProceduralRelation)
|
|
259
|
+
|
|
260
|
+
# First, check that the relation already exists in the DB.
|
|
261
|
+
db_result = self._run_procedure_database(proc, take_entries)
|
|
262
|
+
|
|
263
|
+
# If we found results for all queries
|
|
264
|
+
if all(len(subresult) > 0 for subresult in db_result):
|
|
265
|
+
return db_result
|
|
266
|
+
|
|
267
|
+
# If not , use generator
|
|
268
|
+
procedural_result: list[list[tuple[Relation[T_takes, T_gives], T_gives]]] = list()
|
|
269
|
+
|
|
270
|
+
for i, take_entry in enumerate(take_entries):
|
|
271
|
+
if len(db_result[i]) > 0:
|
|
272
|
+
procedural_result.append(db_result[i])
|
|
273
|
+
continue
|
|
274
|
+
res = proc.relation_type.generate(
|
|
275
|
+
take_entry
|
|
276
|
+
)
|
|
277
|
+
procedural_result.append([
|
|
278
|
+
(rel, e_gives)
|
|
279
|
+
for rel, e_gives in res
|
|
280
|
+
])
|
|
281
|
+
|
|
282
|
+
# Update the database based on the db_result if possible
|
|
283
|
+
if isinstance(connection.handler, chemrecon.core.populate_query_handler.PopulateQueryHandler):
|
|
284
|
+
for take_reconid, subresult in zip((e.recon_id for e in take_entries), procedural_result):
|
|
285
|
+
assert take_reconid is not None
|
|
286
|
+
if len(subresult) == 0:
|
|
287
|
+
continue
|
|
288
|
+
|
|
289
|
+
assigned_ids = connection.handler.add_relations_to_entry_with_reconid(
|
|
290
|
+
recon_id = take_reconid,
|
|
291
|
+
entry_table = proc.relation_type.source_entrytype,
|
|
292
|
+
relations = [
|
|
293
|
+
(rel, e_gives)
|
|
294
|
+
for rel, e_gives in subresult
|
|
295
|
+
]
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
# Assign recon_ids to the created entries
|
|
299
|
+
for assign_recon_id, (new_rel, new_entry) in zip(assigned_ids, subresult):
|
|
300
|
+
new_entry.recon_id = assign_recon_id
|
|
301
|
+
|
|
302
|
+
else:
|
|
303
|
+
# Assign virtual Recon IDs to the generated entries (do not necessarily correspond with db)
|
|
304
|
+
# These will be negative numbers
|
|
305
|
+
for subresult in procedural_result:
|
|
306
|
+
for rel, e in subresult:
|
|
307
|
+
connection.handler.add_procedural_entry(e)
|
|
308
|
+
|
|
309
|
+
# Return result
|
|
310
|
+
return procedural_result
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
def _run_procedure_transitive[T_takes: Entry, T_gives: Entry, T_intermediate: Entry](
|
|
314
|
+
self,
|
|
315
|
+
proc: ExploreProcedure[T_takes, T_gives],
|
|
316
|
+
take_entries: list[T_takes],
|
|
317
|
+
) -> list[list[tuple[Relation[T_takes, T_gives], T_gives]]]:
|
|
318
|
+
import chemrecon.core.populate_query_handler
|
|
319
|
+
|
|
320
|
+
assert issubclass(proc.relation_type, ComposedRelation)
|
|
321
|
+
|
|
322
|
+
# First, check that the relation already exists in the DB.
|
|
323
|
+
db_result = self._run_procedure_database(proc, take_entries)
|
|
324
|
+
|
|
325
|
+
# If we found results for all queries
|
|
326
|
+
if all(len(subresult) > 0 for subresult in db_result):
|
|
327
|
+
return db_result
|
|
328
|
+
|
|
329
|
+
# Get rel1 results
|
|
330
|
+
rel_1_proc, rel_2_proc = self.protocol.transitive_subprocedures[proc.relation_type]
|
|
331
|
+
rel_1_result = self._run_procedure(rel_1_proc, take_entries)
|
|
332
|
+
|
|
333
|
+
# Filter by rel1 and e_inter
|
|
334
|
+
rel_1_result_filtered: list[list[tuple[Relation[T_takes, T_intermediate], T_intermediate]]] = list()
|
|
335
|
+
for subresult_1 in rel_1_result:
|
|
336
|
+
subresult_1_filtered: list[tuple[Relation[T_takes, T_intermediate], T_intermediate]] = list()
|
|
337
|
+
for r1, e_inter in subresult_1:
|
|
338
|
+
if proc.relation_type.filter_rel_1(r1) and proc.relation_type.filter_intermediate(e_inter):
|
|
339
|
+
subresult_1_filtered.append((r1, e_inter))
|
|
340
|
+
rel_1_result_filtered.append(subresult_1_filtered)
|
|
341
|
+
|
|
342
|
+
# Results of rel1 are passed as take_entries to rel_2
|
|
343
|
+
intermediate_map: dict[T_takes, list[tuple[Relation[T_takes, T_intermediate], T_intermediate]]] = {
|
|
344
|
+
e_take: subresult
|
|
345
|
+
for e_take, subresult in zip(take_entries, rel_1_result_filtered)
|
|
346
|
+
}
|
|
347
|
+
intermediate_entries: set[T_intermediate] = set()
|
|
348
|
+
for rels in intermediate_map.values():
|
|
349
|
+
intermediate_entries.update(e for _, e in rels)
|
|
350
|
+
intermediate_entries_list = [e for e in intermediate_entries if proc.relation_type.filter_intermediate(e)]
|
|
351
|
+
|
|
352
|
+
# Get rel2 results
|
|
353
|
+
rel_2_result = self._run_procedure(rel_2_proc, intermediate_entries_list)
|
|
354
|
+
rel_2_result_dict: dict[T_intermediate, list[tuple[Relation[T_intermediate, T_gives], T_gives]]] = {
|
|
355
|
+
e_inter: subresult
|
|
356
|
+
for e_inter, subresult in zip(intermediate_entries_list, rel_2_result)
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
# TODO filter rel2 results (not currently used)
|
|
360
|
+
|
|
361
|
+
# Make list of entries to return
|
|
362
|
+
result: list[list[tuple[Relation[T_takes, T_gives], T_gives]]] = list()
|
|
363
|
+
for i, take_entry in enumerate(take_entries):
|
|
364
|
+
# Consider routes through all intermediates
|
|
365
|
+
subresult: list[tuple[Relation[T_takes, T_gives], T_gives]] = list()
|
|
366
|
+
for rel_1, e_inter in intermediate_map[take_entry]:
|
|
367
|
+
# Add all from this intermediate
|
|
368
|
+
subresult.extend(
|
|
369
|
+
(proc.relation_type(rel_1 = rel_1, rel_2 = rel_2, intermediate = e_inter), e_gives)
|
|
370
|
+
for rel_2, e_gives in rel_2_result_dict[e_inter]
|
|
371
|
+
)
|
|
372
|
+
result.append(subresult)
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
if isinstance(connection.handler, chemrecon.core.populate_query_handler.PopulateQueryHandler):
|
|
376
|
+
# Update the database from rel_1 if procedural
|
|
377
|
+
for take_entry, subresult in zip(take_entries, rel_1_result_filtered):
|
|
378
|
+
assert take_entry.recon_id is not None
|
|
379
|
+
assigned_ids = connection.handler.add_relations_to_entry_with_reconid(
|
|
380
|
+
recon_id = take_entry.recon_id,
|
|
381
|
+
entry_table = proc.relation_type.source_entrytype,
|
|
382
|
+
relations = [
|
|
383
|
+
(rel, e_gives)
|
|
384
|
+
for rel, e_gives in subresult
|
|
385
|
+
]
|
|
386
|
+
)
|
|
387
|
+
for assign_recon_id, (new_rel, new_entry) in zip(assigned_ids, subresult):
|
|
388
|
+
new_entry.recon_id = assign_recon_id
|
|
389
|
+
|
|
390
|
+
# Update the database from rel_2 if procedural
|
|
391
|
+
for intermediate_entry, subresult in zip(intermediate_entries_list, rel_2_result):
|
|
392
|
+
assert intermediate_entry.recon_id is not None
|
|
393
|
+
assigned_ids = connection.handler.add_relations_to_entry_with_reconid(
|
|
394
|
+
recon_id = intermediate_entry.recon_id,
|
|
395
|
+
entry_table = proc.relation_type.intermediate_entrytype,
|
|
396
|
+
relations = [
|
|
397
|
+
(rel, e_gives)
|
|
398
|
+
for rel, e_gives in subresult
|
|
399
|
+
]
|
|
400
|
+
)
|
|
401
|
+
for assign_recon_id, (new_rel, new_entry) in zip(assigned_ids, subresult):
|
|
402
|
+
new_entry.recon_id = assign_recon_id
|
|
403
|
+
|
|
404
|
+
# Update the database for the transitive relation
|
|
405
|
+
for take_entry, subresult in zip(take_entries, result):
|
|
406
|
+
connection.handler.add_relations_to_entry_with_reconid(
|
|
407
|
+
recon_id = take_entry.recon_id,
|
|
408
|
+
entry_table = proc.relation_type.source_entrytype,
|
|
409
|
+
relations = [
|
|
410
|
+
(rel, e_gives)
|
|
411
|
+
for rel, e_gives in subresult
|
|
412
|
+
]
|
|
413
|
+
)
|
|
414
|
+
|
|
415
|
+
else:
|
|
416
|
+
# Assign virtual reconids to the generated entries
|
|
417
|
+
for subresult in result:
|
|
418
|
+
for rel, e in subresult:
|
|
419
|
+
connection.handler.add_procedural_entry(e)
|
|
420
|
+
|
|
421
|
+
return result
|