chemrecon 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. chemrecon/__init__.py +73 -0
  2. chemrecon/chem/__init__.py +0 -0
  3. chemrecon/chem/chemreaction.py +223 -0
  4. chemrecon/chem/constant_compounds.py +3 -0
  5. chemrecon/chem/create_mol.py +91 -0
  6. chemrecon/chem/elements.py +141 -0
  7. chemrecon/chem/gml/__init__.py +0 -0
  8. chemrecon/chem/gml/gml.py +324 -0
  9. chemrecon/chem/gml/gml_reactant_matching.py +130 -0
  10. chemrecon/chem/gml/gml_to_rdk.py +217 -0
  11. chemrecon/chem/mol.py +483 -0
  12. chemrecon/chem/sumformula.py +120 -0
  13. chemrecon/connection.py +97 -0
  14. chemrecon/core/__init__.py +0 -0
  15. chemrecon/core/id_types.py +687 -0
  16. chemrecon/core/ontology.py +209 -0
  17. chemrecon/core/populate_query_handler.py +336 -0
  18. chemrecon/core/query_handler.py +587 -0
  19. chemrecon/database/__init__.py +1 -0
  20. chemrecon/database/connect.py +63 -0
  21. chemrecon/database/connection_params/chemrecon_pub.dbinfo +5 -0
  22. chemrecon/database/connection_params/local_docker_dev.dbinfo +5 -0
  23. chemrecon/database/connection_params/local_docker_init.dbinfo +5 -0
  24. chemrecon/database/connection_params/local_docker_pub.dbinfo +5 -0
  25. chemrecon/database/params.py +88 -0
  26. chemrecon/entrygraph/draw.py +119 -0
  27. chemrecon/entrygraph/entrygraph.py +301 -0
  28. chemrecon/entrygraph/explorationprotocol.py +199 -0
  29. chemrecon/entrygraph/explore.py +421 -0
  30. chemrecon/entrygraph/explore_procedure.py +183 -0
  31. chemrecon/entrygraph/filter.py +88 -0
  32. chemrecon/entrygraph/scoring.py +141 -0
  33. chemrecon/query/__init__.py +26 -0
  34. chemrecon/query/create_entry.py +86 -0
  35. chemrecon/query/default_protocols.py +57 -0
  36. chemrecon/query/find_entry.py +84 -0
  37. chemrecon/query/get_relations.py +143 -0
  38. chemrecon/query/get_structures_from_compound.py +65 -0
  39. chemrecon/schema/__init__.py +86 -0
  40. chemrecon/schema/db_object.py +363 -0
  41. chemrecon/schema/direction.py +10 -0
  42. chemrecon/schema/entry_types/__init__.py +0 -0
  43. chemrecon/schema/entry_types/aam.py +34 -0
  44. chemrecon/schema/entry_types/aam_repr.py +37 -0
  45. chemrecon/schema/entry_types/compound.py +52 -0
  46. chemrecon/schema/entry_types/enzyme.py +49 -0
  47. chemrecon/schema/entry_types/molstructure.py +64 -0
  48. chemrecon/schema/entry_types/molstructure_repr.py +41 -0
  49. chemrecon/schema/entry_types/reaction.py +57 -0
  50. chemrecon/schema/enums.py +154 -0
  51. chemrecon/schema/procedural_relation_entrygraph.py +66 -0
  52. chemrecon/schema/relation_types_composed/__init__.py +0 -0
  53. chemrecon/schema/relation_types_composed/compound_has_molstructure_relation.py +59 -0
  54. chemrecon/schema/relation_types_composed/reaction_has_aam_relation.py +50 -0
  55. chemrecon/schema/relation_types_procedural/__init__.py +0 -0
  56. chemrecon/schema/relation_types_procedural/aam_convert_relation.py +69 -0
  57. chemrecon/schema/relation_types_procedural/compound_select_structure_proceduralrelation.py +36 -0
  58. chemrecon/schema/relation_types_procedural/compound_similarlity_proceduralrelation.py +1 -0
  59. chemrecon/schema/relation_types_procedural/molstructure_convert_relation.py +49 -0
  60. chemrecon/schema/relation_types_procedural/reaction_select_aam_proceduralrelation.py +38 -0
  61. chemrecon/schema/relation_types_procedural/reaction_similarity_proceduralrelation.py +1 -0
  62. chemrecon/schema/relation_types_source/__init__.py +0 -0
  63. chemrecon/schema/relation_types_source/aam_involves_molstructure_relation.py +77 -0
  64. chemrecon/schema/relation_types_source/aam_repr_involves_molstructure_repr_relation.py +79 -0
  65. chemrecon/schema/relation_types_source/compound_has_structure_representation_relation.py +33 -0
  66. chemrecon/schema/relation_types_source/compound_reference_relation.py +34 -0
  67. chemrecon/schema/relation_types_source/molstructure_standardisation_relation.py +71 -0
  68. chemrecon/schema/relation_types_source/ontology/__init__.py +0 -0
  69. chemrecon/schema/relation_types_source/ontology/compound_ontology.py +369 -0
  70. chemrecon/schema/relation_types_source/ontology/enzyme_ontology.py +142 -0
  71. chemrecon/schema/relation_types_source/ontology/reaction_ontology.py +140 -0
  72. chemrecon/schema/relation_types_source/reaction_has_aam_representation_relation.py +34 -0
  73. chemrecon/schema/relation_types_source/reaction_has_enzyme_relation.py +71 -0
  74. chemrecon/schema/relation_types_source/reaction_involves_compound_relation.py +69 -0
  75. chemrecon/schema/relation_types_source/reaction_reference_relation.py +33 -0
  76. chemrecon/scripts/initialize_database.py +494 -0
  77. chemrecon/utils/copy_signature.py +10 -0
  78. chemrecon/utils/encodeable_list.py +11 -0
  79. chemrecon/utils/get_id_type.py +70 -0
  80. chemrecon/utils/hungarian.py +31 -0
  81. chemrecon/utils/reactant_matching.py +168 -0
  82. chemrecon/utils/rxnutils.py +44 -0
  83. chemrecon/utils/set_cwd.py +12 -0
  84. chemrecon-0.1.1.dist-info/METADATA +143 -0
  85. chemrecon-0.1.1.dist-info/RECORD +86 -0
  86. chemrecon-0.1.1.dist-info/WHEEL +4 -0
@@ -0,0 +1,199 @@
1
+ """ Protocols define how to explore an entrygraph.
2
+ """
3
+
4
+ from __future__ import annotations
5
+
6
+ from typing import Optional, Callable
7
+
8
+ from chemrecon.entrygraph.explore_procedure import (
9
+ ExploreProcedure, ExploreProcedureT1, ExploreProcedureT2,
10
+ ExploreProcedureSym,
11
+ )
12
+ from chemrecon.entrygraph.filter import EntryFilter, RelationFilter, EntryFilterProcedure, RelationFilterProcedure
13
+ from chemrecon.schema import Entry, ProceduralRelation, Relation, ComposedRelation
14
+ from chemrecon.schema.direction import Direction
15
+
16
+
17
+ # Protocol specification
18
+ class ExplorationProtocol():
19
+ """
20
+ Represents a protocol for exploring relationships and entries within a graph structure.
21
+
22
+ This class is designed to define and manage the exploration of entries and relationships in a graph-like
23
+ data model. Users can specify sets of entry and relationship types, as well as define filters and
24
+ procedures for exploration.
25
+ """
26
+ relation_types: set[tuple[type[Relation], Direction]] #: List of relation types to traverse.
27
+ entry_types: set[type[Entry]] #: List of entry types involved in the specified relations
28
+ relation_types_terminal: set[tuple[type[Relation], Direction]] # Will not be explored, only added.
29
+ entry_filters: dict[type[Entry], EntryFilter] #: Optional filters for each type of entry.
30
+ relation_filters: dict[type[Relation], RelationFilter] #: Ditto, for relations.
31
+
32
+ # Explore procedures
33
+ explore_procedures: list[ExploreProcedure]
34
+ post_explore_procedures: list[ExploreProcedure]
35
+ transitive_subprocedures: dict[type[Relation], tuple[ExploreProcedure, ExploreProcedure]]
36
+
37
+ def __init__(
38
+ self,
39
+ relation_types: set[type[Relation] | tuple[type[Relation], Direction]],
40
+ relation_types_terminal: Optional[set[type[Relation]]] = None,
41
+ entry_filters: Optional[dict[type[Entry], Callable[[Entry], bool]]] = None,
42
+ relation_filters: Optional[dict[type[Relation], Callable[[Relation], bool]]] = None,
43
+ ):
44
+ """ Specify an exploration protocol.
45
+
46
+ :param relation_types: A set of relation typess to explore. Optionally, (Relation, Direction) tuples can be
47
+ passed to specify in which direction each relation can be traversed. By default, only the forwards
48
+ direction is traversed. For symmetric relations, only the Direction.SYMMETRIC value is allowed.
49
+ :type relation_types: set[type[Relation] | tuple[type[Relation], Direction]],
50
+ :param relation_types_terminal: A set of relation types which are not used for expanding the graph, but are
51
+ added if both endpoints were already found. Here, directionality is not specified.
52
+ :type relation_types_terminal: Optional[set[type[Relation] | tuple[type[Relation], Direction]]]
53
+ :param entry_filters: An optional dictionary of filters for each entry type. Filters should be a function which
54
+ accepts an entry, and returns False if the entry should not be included by the protocol.
55
+ :type entry_filters: Optional[dict[type[Entry], Callable[[Entry], bool]]]
56
+ :param relation_filters: Ditto, for relations.
57
+ :type relation_filters: Optional[dict[type[Relation], Callable[[Relation], bool]]]
58
+ """
59
+ # Process relation types (assign forward/symmetric direction by default)
60
+ self.relation_types = set()
61
+ for item in relation_types:
62
+ match item:
63
+ case (reltype, direction):
64
+ # Given with direction
65
+ self.relation_types.add((reltype, direction))
66
+ case reltype:
67
+ # Set forwards/symmetric direction by default
68
+ if reltype.symmetric:
69
+ self.relation_types.add((reltype, Direction.SYMMETRIC))
70
+ else:
71
+ self.relation_types.add((reltype, Direction.FORWARDS))
72
+
73
+ self.relation_types_terminal = set()
74
+ if relation_types_terminal is not None:
75
+ for reltype in relation_types_terminal:
76
+ # Set forwards/symmetric direction by default
77
+ if reltype.symmetric:
78
+ self.relation_types_terminal.add((reltype, Direction.SYMMETRIC))
79
+ else:
80
+ self.relation_types_terminal.add((reltype, Direction.BOTH))
81
+
82
+ # Process filters
83
+ self.entry_filters = dict()
84
+ self.relation_filters = dict()
85
+ if entry_filters is not None:
86
+ for e_type, e_filter in entry_filters.items():
87
+ self.entry_filters[e_type] = EntryFilterProcedure(
88
+ filter_proc = e_filter
89
+ )
90
+ if relation_filters is not None:
91
+ for r_type, r_filter in relation_filters.items():
92
+ self.relation_filters[r_type] = RelationFilterProcedure(
93
+ relation_type = r_type,
94
+ filter_proc = r_filter
95
+ )
96
+
97
+ self.entry_filters = (entry_filters) if entry_filters else dict()
98
+ self.relation_filters: dict[type[Relation], Callable[[Relation], bool]] \
99
+ = relation_filters if relation_filters else dict()
100
+
101
+ # Set entry types based on the relations
102
+ self.entry_types = set()
103
+ for r, direction in self.relation_types:
104
+ self.entry_types.add(r.source_entrytype)
105
+ self.entry_types.add(r.target_entrytype)
106
+
107
+ # List of finished ExplorationProcedure objects to call when exploring
108
+ self.explore_procedures: list[ExploreProcedure] = list()
109
+ self.post_explore_procedures: list[ExploreProcedure] = list()
110
+ self.transitive_subprocedures: dict[type[Relation], tuple[ExploreProcedure, ExploreProcedure]] = dict()
111
+
112
+ # Sanity checks of the given input
113
+ for rtype, direction in self.relation_types:
114
+ if rtype.source_entrytype not in self.entry_types:
115
+ raise ValueError(f'Relation {rtype} refers to entry types not allowed in the graph.')
116
+ if rtype.target_entrytype not in self.entry_types:
117
+ raise ValueError(f'Relation {rtype} refers to entry types not allowed in the graph.')
118
+
119
+ if len(self.relation_types_terminal.intersection(self.relation_types)) > 0:
120
+ raise ValueError('Terminal relation types must not intersect relation types.')
121
+
122
+ # Add 'None' for filters when not specified
123
+ for entrytype in (
124
+ self.entry_types.difference(self.entry_filters.keys())
125
+ ):
126
+ self.entry_filters[entrytype] = None
127
+ for reltype in ({rel for rel, _ in self.relation_types}
128
+ .union({rel for rel, _ in self.relation_types_terminal})
129
+ .difference(self.relation_filters.keys())
130
+ ):
131
+ self.relation_filters[reltype] = None
132
+
133
+ # Establish a series of exploration procedures
134
+ # TODO accept no arguments, and assume (forwards) or (symmetric)
135
+
136
+ for given_rels, proc_list in [
137
+ [self.relation_types, self.explore_procedures],
138
+ [self.relation_types_terminal, self.post_explore_procedures]
139
+ ]:
140
+ for rel_type, direction in given_rels:
141
+ rel_type: type[Relation]
142
+ if issubclass(rel_type, ProceduralRelation):
143
+ if direction not in {Direction.FORWARDS, Direction.SYMMETRIC}:
144
+ raise ValueError('Procedural relations can only be explored forwards.')
145
+
146
+ match direction:
147
+ case Direction.FORWARDS:
148
+ # Forwards, explore from T1
149
+ assert not rel_type.symmetric
150
+ proc_list.append(ExploreProcedureT1(
151
+ relationtype = rel_type,
152
+ relation_filter = self.relation_filters[rel_type],
153
+ entry_filter = self.entry_filters[rel_type.target_entrytype]
154
+ ))
155
+ case Direction.BACKWARDS:
156
+ # Backwards, explore from T2
157
+ assert not rel_type.symmetric
158
+ proc_list.append(ExploreProcedureT2(
159
+ relationtype = rel_type,
160
+ relation_filter = self.relation_filters[rel_type],
161
+ entry_filter = self.entry_filters[rel_type.source_entrytype]
162
+ ))
163
+ case Direction.BOTH:
164
+ # Both
165
+ assert not rel_type.symmetric
166
+ proc_list.append(ExploreProcedureT1(
167
+ relationtype = rel_type,
168
+ relation_filter = self.relation_filters[rel_type],
169
+ entry_filter = self.entry_filters[rel_type.target_entrytype]
170
+ ))
171
+ proc_list.append(ExploreProcedureT2(
172
+ relationtype = rel_type,
173
+ relation_filter = self.relation_filters[rel_type],
174
+ entry_filter = self.entry_filters[rel_type.source_entrytype]
175
+ ))
176
+ case Direction.SYMMETRIC:
177
+ # Symmetric
178
+ assert rel_type.symmetric
179
+ proc_list.append(ExploreProcedureSym(
180
+ relationtype = rel_type,
181
+ relation_filter = self.relation_filters[rel_type],
182
+ entry_filter = self.entry_filters[rel_type.target_entrytype]
183
+ ))
184
+
185
+ # If transitive, add sub procedures
186
+ if issubclass(rel_type, ComposedRelation):
187
+ self.transitive_subprocedures[rel_type] = (
188
+ ExploreProcedureT1(
189
+ relationtype = rel_type.rel_type_1,
190
+ relation_filter = None, entry_filter = None
191
+ ),
192
+ ExploreProcedureT1(
193
+ relationtype = rel_type.rel_type_2,
194
+ relation_filter = None, entry_filter = None
195
+ )
196
+ )
197
+
198
+ # done
199
+ pass
@@ -0,0 +1,421 @@
1
+ """ Method for creating an explored Entry Graph based on the database.
2
+ """
3
+ from typing import Optional
4
+
5
+ import psycopg as pg
6
+
7
+ import chemrecon.connection as connection
8
+
9
+ from chemrecon.schema import Entry, Relation, ComposedRelation, ProceduralRelation, ProceduralGeneratorError
10
+
11
+ from chemrecon.entrygraph.entrygraph import EntryGraph, ReconID, VertexIndex
12
+ from chemrecon.entrygraph.explore_procedure import (
13
+ ExploreProcedure, ExploreProcedureT1, ExploreProcedureT2, ExploreProcedureSym
14
+ )
15
+ from chemrecon.entrygraph.explorationprotocol import ExplorationProtocol
16
+
17
+
18
+ def explore(
19
+ entrygraph: EntryGraph,
20
+ protocol: ExplorationProtocol,
21
+ steps: int = 4
22
+ ):
23
+ """ Expand the given entry graph by traversing the database network using the specified protocol for a given
24
+ number of steps.
25
+ """
26
+ Explorer(entrygraph, protocol, steps).explore()
27
+
28
+
29
+ class Explorer:
30
+ entrygraph: EntryGraph
31
+ generations: Optional[int] = None
32
+ protocol: ExplorationProtocol
33
+
34
+ def __init__(
35
+ self,
36
+ entrygraph: EntryGraph,
37
+ protocol: ExplorationProtocol,
38
+ generations: int = None
39
+ ):
40
+ self.entrygraph = entrygraph
41
+ self.generations = generations
42
+ self.protocol = protocol
43
+
44
+ # Generate cursors for each procedure
45
+ for proc in {
46
+ *protocol.explore_procedures,
47
+ *protocol.post_explore_procedures,
48
+ *[p for p, _ in protocol.transitive_subprocedures.values()],
49
+ *[p for _, p in protocol.transitive_subprocedures.values()]
50
+ }:
51
+ proc: ExploreProcedure
52
+ proc.cursor = pg.Cursor(
53
+ connection = connection.handler.conn,
54
+ row_factory = connection.handler.make_relation_entry_view_row_factory(proc.relation_type)
55
+ )
56
+
57
+ # Main exploration alg.
58
+ # --------------------------------------------------------------------------------------------------------------
59
+ def explore(self) -> None:
60
+ """ Explores the graph based on the database connection.
61
+ After execution, the graph will be fully populated.
62
+ """
63
+ import chemrecon.core.populate_query_handler
64
+
65
+ # Sets of already explored vertices (updated at end of every iteration)
66
+ vs_new: set[VertexIndex] = set()
67
+ vs_explored: set[VertexIndex] = set()
68
+
69
+ # Add initial vertices
70
+ for v_idx in self.entrygraph.initial_vertices:
71
+ vs_new.add(v_idx)
72
+
73
+ # Iteratively explore
74
+ i: int = 0 # Generation
75
+ while True:
76
+ # Main iteration
77
+ i += 1
78
+
79
+ # Compute new set of vertices to explore
80
+ to_explore: dict[type[Entry], set[tuple[VertexIndex, ReconID]]] = {
81
+ etype: set() for etype in self.protocol.entry_types
82
+ }
83
+ for v_idx in vs_new:
84
+ v = self.entrygraph.get_vertex_by_vertex_index(v_idx)
85
+ if v is None:
86
+ assert False, 'unreachable'
87
+ to_explore[type(v.entry)].add((v_idx, v.recon_id))
88
+
89
+ vs_explored.update(vs_new)
90
+
91
+ # For each type of relation, call the explore procedure, and sort out entries according to the filter
92
+ for explr_proc in self.protocol.explore_procedures:
93
+
94
+ # If past generation limit, explore only relations which explicitly ignore this
95
+ if i > self.generations:
96
+ if not explr_proc.relation_type.ignore_generation_limit:
97
+ continue
98
+
99
+ # Run procedure, producing relations and entry endpoints
100
+ input_entries = list(to_explore[explr_proc.takes_entrytype])
101
+ if len(input_entries) > 0:
102
+ try:
103
+ res = self._run_procedure(
104
+ explr_proc,
105
+ [self.entrygraph.g.get_node_data(v).entry for v, _ in input_entries]
106
+ )
107
+ if res is None:
108
+ # Should not happen (?)
109
+ continue
110
+ except ProceduralGeneratorError:
111
+ # Could not generate, so do not add
112
+ continue
113
+
114
+ # res: list of (t_takes, relation[t_takes, t_gives], t_gives)
115
+ for (takes_vertex_index, takes_recon_id), subresult in zip(input_entries, res):
116
+ # TODO error here, sym relations continually gives the e_takes instead of the e_gives
117
+ for rel, e_gives in subresult:
118
+
119
+ # Filter
120
+ if explr_proc.relation_filter is not None:
121
+ if not explr_proc.relation_filter(rel):
122
+ continue
123
+ if explr_proc.entry_filter is not None:
124
+ if not explr_proc.entry_filter(e_gives):
125
+ continue
126
+
127
+ # Add relation to entrygraph
128
+ v_new_idx = self.entrygraph.add_vertex_from(
129
+ from_index = takes_vertex_index,
130
+ relation = rel,
131
+ entry = e_gives,
132
+ generation = i
133
+ )
134
+ vs_new.add(v_new_idx) # Add new vertices to explore
135
+
136
+ # End of iteration, update lists
137
+ vs_new = vs_new - vs_explored
138
+ if len(vs_new) == 0:
139
+ break
140
+
141
+ # Explore terminal relation (does not lead to any new vertices to add)
142
+ # TODO should not add new vertices, only relations between vertices already in the graph
143
+ # TODO !important
144
+ for explr_proc in self.protocol.post_explore_procedures:
145
+ input_entries: list[tuple[VertexIndex, ReconID]] = [
146
+ (v.vertex_index, v.recon_id)
147
+ for v in self.entrygraph.g.nodes()
148
+ if type(v.entry) is explr_proc.takes_entrytype
149
+ ]
150
+ if len(input_entries) > 0:
151
+ res = self._run_procedure(
152
+ explr_proc,
153
+ [self.entrygraph.g.get_node_data(v).entry for v, _ in input_entries]
154
+ )
155
+ for (takes_vertex_index, takes_recon_id), subresult in zip(input_entries, res):
156
+ for rel, e_gives in subresult:
157
+ # Add relation to entrygraph only if exists
158
+ lookup = self.entrygraph.get_vertex_by_entry(e_gives)
159
+ if lookup is not None:
160
+ self.entrygraph.add_edge(
161
+ source_v_index = takes_vertex_index,
162
+ target_v_index = lookup,
163
+ relation = rel
164
+ )
165
+
166
+ # TODO add setting to create new entries from terminal relations?
167
+ # Old, added new entries unnecessarily
168
+ # v_new_idx = self.entrygraph.add_vertex_from(
169
+ # from_index = takes_vertex_index,
170
+ # relation = rel,
171
+ # entry = e_gives,
172
+ # generation = -1
173
+ # )
174
+
175
+ # Commit changes made if handler can populate the backend
176
+ if isinstance(connection.handler, chemrecon.core.populate_query_handler.PopulateQueryHandler):
177
+ connection.handler.conn.commit()
178
+
179
+ # Done, finalise
180
+ pass
181
+
182
+
183
+ # Dispatchers for individual procedures
184
+ # ---------------------------------------------------------------------------------------------------------------
185
+ def _run_procedure[T_takes: Entry, T_gives: Entry](
186
+ self,
187
+ proc: ExploreProcedure[T_takes, T_gives],
188
+ take_entries: list[T_takes]
189
+ ) -> list[list[tuple[Relation[T_takes, T_gives], T_gives]]]:
190
+ if len(take_entries) == 0:
191
+ return []
192
+ # Dispatch depending on whether procedural or database-backed relation
193
+ if issubclass(proc.relation_type, ComposedRelation):
194
+ return self._run_procedure_transitive(proc, take_entries)
195
+ elif issubclass(proc.relation_type, ProceduralRelation):
196
+ return self._run_procedure_procedural(proc, take_entries)
197
+ else:
198
+ return self._run_procedure_database(proc, take_entries)
199
+
200
+
201
+ def _run_procedure_database[T_takes: Entry, T_gives: Entry](
202
+ self,
203
+ proc: ExploreProcedure[T_takes, T_gives],
204
+ take_entries: list[T_takes],
205
+ ) -> list[list[tuple[Relation[T_takes, T_gives], T_gives]]]:
206
+ # The result of the procedure relies only on information in the database
207
+ proc.cursor.executemany(
208
+ query = proc.q,
209
+ params_seq = [[e.recon_id] * proc.n_params for e in take_entries],
210
+ returning = True
211
+ )
212
+
213
+ # Fetch results and return
214
+ result: list[list[tuple[Relation, Entry, Entry]]] = [proc.cursor.fetchall()]
215
+ while proc.cursor.nextset():
216
+ result.append(proc.cursor.fetchall())
217
+
218
+ # How to interpret and add depends on T1, T2, or sym relation
219
+ match proc:
220
+ case ExploreProcedureT1():
221
+ # Result is relation and T2
222
+ return [
223
+ [(r, e2) for r, e1, e2 in subresult]
224
+ for subresult in result
225
+ ]
226
+ case ExploreProcedureT2():
227
+ # Result is relation and T1
228
+ return [
229
+ [(r, e1) for r, e1, e2 in subresult]
230
+ for subresult in result
231
+ ]
232
+ case ExploreProcedureSym():
233
+ # Result entry is T1 or T2, depending on which is different from the input
234
+ return [
235
+ [
236
+ (r, e2) if e1.recon_id == takes_recon_id else (r, e1)
237
+ for r, e1, e2 in subresult
238
+ ]
239
+ for takes_recon_id, subresult in zip((
240
+ e.recon_id for e in take_entries
241
+ ), result)
242
+ ]
243
+ case _:
244
+ assert False, 'unreachable'
245
+
246
+
247
+ def _run_procedure_procedural[T_takes: Entry, T_gives: Entry](
248
+ self,
249
+ proc: ExploreProcedure[T_takes, T_gives],
250
+ take_entries: list[T_takes],
251
+ ) -> list[list[tuple[Relation[T_takes, T_gives], T_gives]]]:
252
+ import chemrecon.core.populate_query_handler
253
+
254
+ # TODO handle catching the ProceduralGeneratorError()!
255
+
256
+ # Run the generate() method of the relation, and possibly update the DB
257
+ # if the handler supports insertion
258
+ assert issubclass(proc.relation_type, ProceduralRelation)
259
+
260
+ # First, check that the relation already exists in the DB.
261
+ db_result = self._run_procedure_database(proc, take_entries)
262
+
263
+ # If we found results for all queries
264
+ if all(len(subresult) > 0 for subresult in db_result):
265
+ return db_result
266
+
267
+ # If not , use generator
268
+ procedural_result: list[list[tuple[Relation[T_takes, T_gives], T_gives]]] = list()
269
+
270
+ for i, take_entry in enumerate(take_entries):
271
+ if len(db_result[i]) > 0:
272
+ procedural_result.append(db_result[i])
273
+ continue
274
+ res = proc.relation_type.generate(
275
+ take_entry
276
+ )
277
+ procedural_result.append([
278
+ (rel, e_gives)
279
+ for rel, e_gives in res
280
+ ])
281
+
282
+ # Update the database based on the db_result if possible
283
+ if isinstance(connection.handler, chemrecon.core.populate_query_handler.PopulateQueryHandler):
284
+ for take_reconid, subresult in zip((e.recon_id for e in take_entries), procedural_result):
285
+ assert take_reconid is not None
286
+ if len(subresult) == 0:
287
+ continue
288
+
289
+ assigned_ids = connection.handler.add_relations_to_entry_with_reconid(
290
+ recon_id = take_reconid,
291
+ entry_table = proc.relation_type.source_entrytype,
292
+ relations = [
293
+ (rel, e_gives)
294
+ for rel, e_gives in subresult
295
+ ]
296
+ )
297
+
298
+ # Assign recon_ids to the created entries
299
+ for assign_recon_id, (new_rel, new_entry) in zip(assigned_ids, subresult):
300
+ new_entry.recon_id = assign_recon_id
301
+
302
+ else:
303
+ # Assign virtual Recon IDs to the generated entries (do not necessarily correspond with db)
304
+ # These will be negative numbers
305
+ for subresult in procedural_result:
306
+ for rel, e in subresult:
307
+ connection.handler.add_procedural_entry(e)
308
+
309
+ # Return result
310
+ return procedural_result
311
+
312
+
313
+ def _run_procedure_transitive[T_takes: Entry, T_gives: Entry, T_intermediate: Entry](
314
+ self,
315
+ proc: ExploreProcedure[T_takes, T_gives],
316
+ take_entries: list[T_takes],
317
+ ) -> list[list[tuple[Relation[T_takes, T_gives], T_gives]]]:
318
+ import chemrecon.core.populate_query_handler
319
+
320
+ assert issubclass(proc.relation_type, ComposedRelation)
321
+
322
+ # First, check that the relation already exists in the DB.
323
+ db_result = self._run_procedure_database(proc, take_entries)
324
+
325
+ # If we found results for all queries
326
+ if all(len(subresult) > 0 for subresult in db_result):
327
+ return db_result
328
+
329
+ # Get rel1 results
330
+ rel_1_proc, rel_2_proc = self.protocol.transitive_subprocedures[proc.relation_type]
331
+ rel_1_result = self._run_procedure(rel_1_proc, take_entries)
332
+
333
+ # Filter by rel1 and e_inter
334
+ rel_1_result_filtered: list[list[tuple[Relation[T_takes, T_intermediate], T_intermediate]]] = list()
335
+ for subresult_1 in rel_1_result:
336
+ subresult_1_filtered: list[tuple[Relation[T_takes, T_intermediate], T_intermediate]] = list()
337
+ for r1, e_inter in subresult_1:
338
+ if proc.relation_type.filter_rel_1(r1) and proc.relation_type.filter_intermediate(e_inter):
339
+ subresult_1_filtered.append((r1, e_inter))
340
+ rel_1_result_filtered.append(subresult_1_filtered)
341
+
342
+ # Results of rel1 are passed as take_entries to rel_2
343
+ intermediate_map: dict[T_takes, list[tuple[Relation[T_takes, T_intermediate], T_intermediate]]] = {
344
+ e_take: subresult
345
+ for e_take, subresult in zip(take_entries, rel_1_result_filtered)
346
+ }
347
+ intermediate_entries: set[T_intermediate] = set()
348
+ for rels in intermediate_map.values():
349
+ intermediate_entries.update(e for _, e in rels)
350
+ intermediate_entries_list = [e for e in intermediate_entries if proc.relation_type.filter_intermediate(e)]
351
+
352
+ # Get rel2 results
353
+ rel_2_result = self._run_procedure(rel_2_proc, intermediate_entries_list)
354
+ rel_2_result_dict: dict[T_intermediate, list[tuple[Relation[T_intermediate, T_gives], T_gives]]] = {
355
+ e_inter: subresult
356
+ for e_inter, subresult in zip(intermediate_entries_list, rel_2_result)
357
+ }
358
+
359
+ # TODO filter rel2 results (not currently used)
360
+
361
+ # Make list of entries to return
362
+ result: list[list[tuple[Relation[T_takes, T_gives], T_gives]]] = list()
363
+ for i, take_entry in enumerate(take_entries):
364
+ # Consider routes through all intermediates
365
+ subresult: list[tuple[Relation[T_takes, T_gives], T_gives]] = list()
366
+ for rel_1, e_inter in intermediate_map[take_entry]:
367
+ # Add all from this intermediate
368
+ subresult.extend(
369
+ (proc.relation_type(rel_1 = rel_1, rel_2 = rel_2, intermediate = e_inter), e_gives)
370
+ for rel_2, e_gives in rel_2_result_dict[e_inter]
371
+ )
372
+ result.append(subresult)
373
+
374
+
375
+ if isinstance(connection.handler, chemrecon.core.populate_query_handler.PopulateQueryHandler):
376
+ # Update the database from rel_1 if procedural
377
+ for take_entry, subresult in zip(take_entries, rel_1_result_filtered):
378
+ assert take_entry.recon_id is not None
379
+ assigned_ids = connection.handler.add_relations_to_entry_with_reconid(
380
+ recon_id = take_entry.recon_id,
381
+ entry_table = proc.relation_type.source_entrytype,
382
+ relations = [
383
+ (rel, e_gives)
384
+ for rel, e_gives in subresult
385
+ ]
386
+ )
387
+ for assign_recon_id, (new_rel, new_entry) in zip(assigned_ids, subresult):
388
+ new_entry.recon_id = assign_recon_id
389
+
390
+ # Update the database from rel_2 if procedural
391
+ for intermediate_entry, subresult in zip(intermediate_entries_list, rel_2_result):
392
+ assert intermediate_entry.recon_id is not None
393
+ assigned_ids = connection.handler.add_relations_to_entry_with_reconid(
394
+ recon_id = intermediate_entry.recon_id,
395
+ entry_table = proc.relation_type.intermediate_entrytype,
396
+ relations = [
397
+ (rel, e_gives)
398
+ for rel, e_gives in subresult
399
+ ]
400
+ )
401
+ for assign_recon_id, (new_rel, new_entry) in zip(assigned_ids, subresult):
402
+ new_entry.recon_id = assign_recon_id
403
+
404
+ # Update the database for the transitive relation
405
+ for take_entry, subresult in zip(take_entries, result):
406
+ connection.handler.add_relations_to_entry_with_reconid(
407
+ recon_id = take_entry.recon_id,
408
+ entry_table = proc.relation_type.source_entrytype,
409
+ relations = [
410
+ (rel, e_gives)
411
+ for rel, e_gives in subresult
412
+ ]
413
+ )
414
+
415
+ else:
416
+ # Assign virtual reconids to the generated entries
417
+ for subresult in result:
418
+ for rel, e in subresult:
419
+ connection.handler.add_procedural_entry(e)
420
+
421
+ return result