chemrecon 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. chemrecon/__init__.py +73 -0
  2. chemrecon/chem/__init__.py +0 -0
  3. chemrecon/chem/chemreaction.py +223 -0
  4. chemrecon/chem/constant_compounds.py +3 -0
  5. chemrecon/chem/create_mol.py +91 -0
  6. chemrecon/chem/elements.py +141 -0
  7. chemrecon/chem/gml/__init__.py +0 -0
  8. chemrecon/chem/gml/gml.py +324 -0
  9. chemrecon/chem/gml/gml_reactant_matching.py +130 -0
  10. chemrecon/chem/gml/gml_to_rdk.py +217 -0
  11. chemrecon/chem/mol.py +483 -0
  12. chemrecon/chem/sumformula.py +120 -0
  13. chemrecon/connection.py +97 -0
  14. chemrecon/core/__init__.py +0 -0
  15. chemrecon/core/id_types.py +687 -0
  16. chemrecon/core/ontology.py +209 -0
  17. chemrecon/core/populate_query_handler.py +336 -0
  18. chemrecon/core/query_handler.py +587 -0
  19. chemrecon/database/__init__.py +1 -0
  20. chemrecon/database/connect.py +63 -0
  21. chemrecon/database/connection_params/chemrecon_pub.dbinfo +5 -0
  22. chemrecon/database/connection_params/local_docker_dev.dbinfo +5 -0
  23. chemrecon/database/connection_params/local_docker_init.dbinfo +5 -0
  24. chemrecon/database/connection_params/local_docker_pub.dbinfo +5 -0
  25. chemrecon/database/params.py +88 -0
  26. chemrecon/entrygraph/draw.py +119 -0
  27. chemrecon/entrygraph/entrygraph.py +301 -0
  28. chemrecon/entrygraph/explorationprotocol.py +199 -0
  29. chemrecon/entrygraph/explore.py +421 -0
  30. chemrecon/entrygraph/explore_procedure.py +183 -0
  31. chemrecon/entrygraph/filter.py +88 -0
  32. chemrecon/entrygraph/scoring.py +141 -0
  33. chemrecon/query/__init__.py +26 -0
  34. chemrecon/query/create_entry.py +86 -0
  35. chemrecon/query/default_protocols.py +57 -0
  36. chemrecon/query/find_entry.py +84 -0
  37. chemrecon/query/get_relations.py +143 -0
  38. chemrecon/query/get_structures_from_compound.py +65 -0
  39. chemrecon/schema/__init__.py +86 -0
  40. chemrecon/schema/db_object.py +363 -0
  41. chemrecon/schema/direction.py +10 -0
  42. chemrecon/schema/entry_types/__init__.py +0 -0
  43. chemrecon/schema/entry_types/aam.py +34 -0
  44. chemrecon/schema/entry_types/aam_repr.py +37 -0
  45. chemrecon/schema/entry_types/compound.py +52 -0
  46. chemrecon/schema/entry_types/enzyme.py +49 -0
  47. chemrecon/schema/entry_types/molstructure.py +64 -0
  48. chemrecon/schema/entry_types/molstructure_repr.py +41 -0
  49. chemrecon/schema/entry_types/reaction.py +57 -0
  50. chemrecon/schema/enums.py +154 -0
  51. chemrecon/schema/procedural_relation_entrygraph.py +66 -0
  52. chemrecon/schema/relation_types_composed/__init__.py +0 -0
  53. chemrecon/schema/relation_types_composed/compound_has_molstructure_relation.py +59 -0
  54. chemrecon/schema/relation_types_composed/reaction_has_aam_relation.py +50 -0
  55. chemrecon/schema/relation_types_procedural/__init__.py +0 -0
  56. chemrecon/schema/relation_types_procedural/aam_convert_relation.py +69 -0
  57. chemrecon/schema/relation_types_procedural/compound_select_structure_proceduralrelation.py +36 -0
  58. chemrecon/schema/relation_types_procedural/compound_similarlity_proceduralrelation.py +1 -0
  59. chemrecon/schema/relation_types_procedural/molstructure_convert_relation.py +49 -0
  60. chemrecon/schema/relation_types_procedural/reaction_select_aam_proceduralrelation.py +38 -0
  61. chemrecon/schema/relation_types_procedural/reaction_similarity_proceduralrelation.py +1 -0
  62. chemrecon/schema/relation_types_source/__init__.py +0 -0
  63. chemrecon/schema/relation_types_source/aam_involves_molstructure_relation.py +77 -0
  64. chemrecon/schema/relation_types_source/aam_repr_involves_molstructure_repr_relation.py +79 -0
  65. chemrecon/schema/relation_types_source/compound_has_structure_representation_relation.py +33 -0
  66. chemrecon/schema/relation_types_source/compound_reference_relation.py +34 -0
  67. chemrecon/schema/relation_types_source/molstructure_standardisation_relation.py +71 -0
  68. chemrecon/schema/relation_types_source/ontology/__init__.py +0 -0
  69. chemrecon/schema/relation_types_source/ontology/compound_ontology.py +369 -0
  70. chemrecon/schema/relation_types_source/ontology/enzyme_ontology.py +142 -0
  71. chemrecon/schema/relation_types_source/ontology/reaction_ontology.py +140 -0
  72. chemrecon/schema/relation_types_source/reaction_has_aam_representation_relation.py +34 -0
  73. chemrecon/schema/relation_types_source/reaction_has_enzyme_relation.py +71 -0
  74. chemrecon/schema/relation_types_source/reaction_involves_compound_relation.py +69 -0
  75. chemrecon/schema/relation_types_source/reaction_reference_relation.py +33 -0
  76. chemrecon/scripts/initialize_database.py +494 -0
  77. chemrecon/utils/copy_signature.py +10 -0
  78. chemrecon/utils/encodeable_list.py +11 -0
  79. chemrecon/utils/get_id_type.py +70 -0
  80. chemrecon/utils/hungarian.py +31 -0
  81. chemrecon/utils/reactant_matching.py +168 -0
  82. chemrecon/utils/rxnutils.py +44 -0
  83. chemrecon/utils/set_cwd.py +12 -0
  84. chemrecon-0.1.1.dist-info/METADATA +143 -0
  85. chemrecon-0.1.1.dist-info/RECORD +86 -0
  86. chemrecon-0.1.1.dist-info/WHEEL +4 -0
@@ -0,0 +1,183 @@
1
+ from __future__ import annotations
2
+
3
+ from abc import ABC, abstractmethod
4
+ from typing import Optional, ClassVar
5
+
6
+ import psycopg as pg
7
+ from psycopg import sql as sql
8
+
9
+ from chemrecon.entrygraph.filter import EntryFilter, RelationFilter
10
+ from chemrecon.schema.db_object import Entry, Relation
11
+
12
+
13
+ class ExploreProcedure[T_takes: Entry, T_gives: Entry](ABC):
14
+ takes_entrytype: type[T_takes]
15
+ gives_entrytype: type[T_gives]
16
+ relation_type: type[Relation] # Filter on the produced relations
17
+ entry_filter: Optional[EntryFilter[T_gives]] # Filter on T_gives
18
+ relation_filter: Optional[RelationFilter]
19
+
20
+ # Pre-computed SQL snippets
21
+ _opt_relation_filter_clause: sql.Composable
22
+ _opt_entry_filter_clause: sql.Composable
23
+
24
+ n_params: ClassVar[int]
25
+
26
+ # SQL
27
+ q: sql.Composed
28
+ cursor: pg.Cursor
29
+
30
+ def __init__(
31
+ self,
32
+ relationtype: type[Relation],
33
+ relation_filter: Optional[RelationFilter],
34
+ entry_filter: Optional[EntryFilter],
35
+ ):
36
+ # Init
37
+ self.relation_type = relationtype
38
+ self.relation_filter = relation_filter
39
+ self.entry_filter = entry_filter
40
+
41
+ if relation_filter is not None:
42
+ #assert relation_filter.relation_type == relationtype
43
+ pass
44
+ if entry_filter is not None:
45
+ #assert entry_filter.entry_type == self.gives_entrytype
46
+ pass
47
+
48
+ # Generate the various clauses ahead of time
49
+ match self.relation_filter:
50
+ # TODO filtering
51
+ #case RelationFilterSource():
52
+ # # Filter
53
+ # self._opt_relation_filter_clause = sql.SQL("""
54
+ # AND (rel_src in {src_set})
55
+ # """).format(
56
+ # src_set = list(relation_filter.allowed_sources)
57
+ # )
58
+ case _:
59
+ # No filter
60
+ self._opt_relation_filter_clause = sql.SQL('')
61
+
62
+ # Entry clause
63
+ # TODO change to filter entries on the DB level (hard for symmetric relations?)
64
+ # for symmetric - BOTH recon ids need to be in the set of allowed id types?
65
+ self._opt_entry_filter_clause = self.make_entry_filter_clause()
66
+
67
+ # Generate the query except for the formats
68
+ # Clause is prepared with as much formating as possible now, %(reconids)s param will be used when
69
+ # calling only
70
+ self.q = sql.SQL("""
71
+ SELECT *
72
+ FROM {rel_view}
73
+ WHERE {where_clause} {opt_relation_clause} {opt_entry_clause}
74
+ ;
75
+ """).format(
76
+ rel_view = sql.Identifier(f'{self.relation_type.get_table_name()}_v'),
77
+ where_clause = self.make_where_clause(),
78
+ opt_relation_clause = self._opt_relation_filter_clause,
79
+ opt_entry_clause = self._opt_entry_filter_clause
80
+ )
81
+
82
+ @abstractmethod
83
+ def make_where_clause(self) -> sql.SQL:
84
+ """ Returns a parametrised SQL string to be used when calling. """
85
+ pass
86
+
87
+ @abstractmethod
88
+ def make_entry_filter_clause(self) -> sql.SQL:
89
+ """ Returns an SQL string filtering correctly by recon_id_1, recon_id_2, or both. """
90
+ pass
91
+
92
+
93
+ class ExploreProcedureT1[T_takes: Entry, T_gives: Entry](ExploreProcedure[T_takes, T_gives]):
94
+ # Explore from T1
95
+ takes_entrytype: type[T_takes]
96
+ gives_entrytype: type[T_gives]
97
+ relation_type: type[Relation[T_takes, T_gives]]
98
+ entry_filter: Optional[EntryFilter[T_gives]]
99
+ relation_filter: Optional[RelationFilter[T_takes, T_gives]]
100
+
101
+ n_params: ClassVar[int] = 1
102
+
103
+ def __init__(
104
+ self,
105
+ relationtype: type[Relation[T_takes, T_gives]],
106
+ relation_filter: Optional[RelationFilter[T_takes, T_gives]],
107
+ entry_filter: Optional[EntryFilter[T_gives]]
108
+ ):
109
+ assert not relationtype.symmetric
110
+ self.takes_entrytype = relationtype.source_entrytype
111
+ self.produces_entrytype = relationtype.target_entrytype
112
+ self.relation = relationtype
113
+
114
+ super().__init__(relationtype, relation_filter, entry_filter)
115
+
116
+ def make_where_clause(self) -> sql.SQL:
117
+ return sql.SQL("recon_id_1 = %s")
118
+
119
+ def make_entry_filter_clause(self) -> sql.SQL:
120
+ # TODO
121
+ return sql.SQL('')
122
+
123
+
124
+ class ExploreProcedureT2[T_takes: Entry, T_gives: Entry](ExploreProcedure[T_takes, T_gives]):
125
+ # Explore from T2
126
+ takes_entrytype: type[T_takes]
127
+ gives_entrytype: type[T_gives]
128
+ relation_type: type[Relation[T_gives, T_takes]]
129
+ entry_filter: Optional[EntryFilter[T_gives]]
130
+ relation_filter: Optional[RelationFilter[T_gives, T_takes]]
131
+
132
+ n_params: ClassVar[int] = 1
133
+
134
+ def __init__(
135
+ self,
136
+ relationtype: type[Relation[T_gives, T_takes]],
137
+ relation_filter: Optional[RelationFilter[T_gives, T_takes]],
138
+ entry_filter: Optional[EntryFilter[T_gives]]
139
+ ):
140
+ assert not relationtype.symmetric
141
+ self.takes_entrytype = relationtype.target_entrytype
142
+ self.produces_entrytype = relationtype.source_entrytype
143
+ self.relation = relationtype
144
+
145
+ super().__init__(relationtype, relation_filter, entry_filter)
146
+
147
+ def make_where_clause(self) -> sql.SQL:
148
+ return sql.SQL("recon_id_2 = %s")
149
+
150
+ def make_entry_filter_clause(self) -> sql.SQL:
151
+ # TODO
152
+ return sql.SQL('')
153
+
154
+
155
+ class ExploreProcedureSym[T: Entry](ExploreProcedure[T, T]):
156
+ # Explore from T
157
+ takes_entrytype: type[T]
158
+ gives_entrytype: type[T]
159
+ relation_type: type[Relation[T, T]]
160
+ entry_filter: Optional[EntryFilter[T]]
161
+ relation_filter: Optional[RelationFilter[T, T]]
162
+
163
+ n_params: ClassVar[int] = 2
164
+
165
+ def __init__(
166
+ self,
167
+ relationtype: type[Relation[T, T]],
168
+ relation_filter: Optional[RelationFilter[T, T]],
169
+ entry_filter: Optional[EntryFilter[T]]
170
+ ):
171
+ assert relationtype.symmetric
172
+ self.takes_entrytype = relationtype.source_entrytype
173
+ self.produces_entrytype = relationtype.target_entrytype
174
+ self.relation = relationtype
175
+
176
+ super().__init__(relationtype, relation_filter, entry_filter)
177
+
178
+ def make_where_clause(self) -> sql.SQL:
179
+ return sql.SQL("(recon_id_1 = %s) OR (recon_id_2 = %s)")
180
+
181
+ def make_entry_filter_clause(self) -> sql.SQL:
182
+ # TODO
183
+ return sql.SQL('')
@@ -0,0 +1,88 @@
1
+ """ TODO re-implement this later, for now, filters should just be functions
2
+ """
3
+
4
+ from abc import ABC, abstractmethod
5
+ from typing import Callable
6
+
7
+ from chemrecon.schema.db_object import SourceEntry
8
+ from chemrecon.core.id_types import IdentifierType
9
+ from chemrecon.schema.db_object import Entry, Relation
10
+ from chemrecon.schema.enums import SourceDatabase
11
+
12
+
13
+ class EntryFilter[T: Entry](ABC):
14
+ """ Base class for filters. """
15
+ entry_type: type[Entry]
16
+ allowed_id_types: set[IdentifierType]
17
+ has_id_type_field: bool
18
+
19
+ @abstractmethod
20
+ def __call__(self, entry: T) -> bool:
21
+ # Base implementation
22
+ return True
23
+
24
+ class RelationFilter[T1: Entry, T2: Entry](ABC):
25
+ """ Base class for filters. """
26
+ relation_type: type[Relation[T1, T2]]
27
+ allowed_sources: set[SourceDatabase]
28
+ has_src_field: bool
29
+
30
+ def __init__(self, relation_type: type[Relation[T1, T2]]):
31
+ self.relation_type = relation_type
32
+
33
+ @abstractmethod
34
+ def __call__(self, relation: Relation[T1, T2]) -> bool:
35
+ # Base implementation
36
+ return True
37
+
38
+ class EntryFilterIdType[T: SourceEntry](EntryFilter[T]):
39
+ """
40
+ """
41
+ allowed_id_types: set[IdentifierType]
42
+ # TODO
43
+
44
+ def __init__(self, allowed_id_types: set[IdentifierType]):
45
+ self.allowed_id_types = allowed_id_types
46
+ raise NotImplementedError()
47
+
48
+ def __call__(self, entry: T) -> bool:
49
+ raise NotImplementedError()
50
+
51
+ # TODO similar to EntryFilterIdType, but for source
52
+
53
+
54
+ class EntryFilterProcedure[T: Entry](EntryFilter):
55
+ """ Allows custom filtering on the application level in addition to type filtering.
56
+ """
57
+ procedure: Callable[[T], bool]
58
+
59
+ def __init__(self, filter_proc: Callable[[T], bool]):
60
+ self.procedure = filter_proc
61
+ super().__init__()
62
+
63
+ def __call__(self, entry: T) -> bool:
64
+ if super().__call__(entry):
65
+ return self.procedure(entry)
66
+ else:
67
+ return False
68
+
69
+
70
+
71
+ class RelationFilterProcedure[T1: Entry, T2: Entry](RelationFilter):
72
+ """ Allows custom filtering on the application level in addition to source
73
+ """
74
+ procedure: Callable[[Relation[T1, T2]], bool]
75
+
76
+ def __call__(self, relation: Relation[T1, T2]) -> bool:
77
+ if super().__call__(relation):
78
+ return self.procedure(relation)
79
+ else:
80
+ return False
81
+
82
+ def __init__(
83
+ self,
84
+ relation_type: type[Relation[T1, T2]],
85
+ filter_proc: Callable[[Relation[T1, T2]], bool]
86
+ ):
87
+ super().__init__(relation_type)
88
+ self.procedure = filter_proc
@@ -0,0 +1,141 @@
1
+ """ Methods for producing a ranking on an entry graph.
2
+ """
3
+ from collections import OrderedDict
4
+ from typing import Callable, Optional, TYPE_CHECKING
5
+
6
+ import rustworkx
7
+
8
+ from chemrecon.entrygraph.entrygraph import (
9
+ Edge, EntryGraph, SourceEdgeArtificial, SourceVertexArtificial, Vertex,
10
+ )
11
+ from chemrecon.schema import Entry, Relation
12
+
13
+ if TYPE_CHECKING:
14
+ from chemrecon.schema.procedural_relation_entrygraph import ProceduralRelationEG
15
+
16
+
17
+ class Scorer[T_rank: Entry]:
18
+ """ A scorer is a callable which takes an entrygraph and produces a ranking of the vertices according to
19
+ the parameters of the scorer.
20
+
21
+ The score of an entry is (informally) the probability that a random walk starting at one of the initial entries
22
+ of the entry graph will terminate at that entry.
23
+ The parameters of the random walk can be customized by specifying weights (probabilities) using a weight
24
+ function on entries and relations, which alters the probability of choosing a given path.
25
+ The default weight of all entries and relations is 1.
26
+ For example, if you do not trust a particular source, edges and vertices from that source can have their
27
+ weight reduced, making them count less in the scoring algorithm.
28
+
29
+ A damping factor, `alpha` can be specified.
30
+ With probability `1-alpha`, the random walk will choose to go to a random entry rather than continuing the walk.
31
+ Furthermore, a _decay_factor_ can be specified such that entries further away from the initial entries are given
32
+ lower scores.
33
+ A decay factor of `0` disables this adjustment. The default is `0.2`.
34
+
35
+ Scores are normalized such that the sum of scores is **1**, which allows comparing scores across entry graphs.
36
+
37
+ Formally, the scores are computed using the PageRank algorithm (https://en.wikipedia.org/wiki/PageRank),
38
+ starting from the initial vertices, and with dangling vertices pointing back to all initial vertices with
39
+ equal probability.
40
+
41
+ """
42
+ score_entry_type: type[T_rank]
43
+
44
+ entry_weight: Callable[[Entry], float]
45
+ relation_weight: Callable[[Relation], float]
46
+
47
+ # Algorithm parameters
48
+ alpha: float
49
+ decay_factor: float
50
+
51
+ def __init__(
52
+ self,
53
+ score_entry_type: type[Entry],
54
+ alpha: float = 0.85,
55
+ decay_factor: float = 0.2,
56
+ entry_weight: Optional[Callable[[Entry], float]] = None,
57
+ relation_weight: Optional[Callable[[Relation], float]] = None,
58
+ ):
59
+ """ Specify a scorer.
60
+ """
61
+ self.alpha = alpha
62
+ self.decay_factor = decay_factor
63
+
64
+ self.score_entry_type = score_entry_type
65
+
66
+ if entry_weight is not None:
67
+ self.entry_weight = entry_weight
68
+ else:
69
+ self.entry_weight = lambda e: 1
70
+
71
+ if relation_weight is not None:
72
+ self.relation_weight = relation_weight
73
+ else:
74
+ self.relation_weight = lambda r: 1
75
+
76
+ def __call__(self, entrygraph: EntryGraph) -> OrderedDict[T_rank, float]:
77
+ """ Produces a ranking of the entries of the type `score_entry_type`.
78
+ The result is an `OrderedDict`, with entries given in descending order of score.
79
+ """
80
+ from chemrecon.schema.procedural_relation_entrygraph import ProceduralRelationEG
81
+
82
+ g = entrygraph.g.copy()
83
+
84
+ # Add source vertex and connect to initial vertices
85
+ source_vertex_index = g.add_node(SourceVertexArtificial())
86
+ for init_v_index in entrygraph.initial_vertices:
87
+ g.add_edge(source_vertex_index, init_v_index, SourceEdgeArtificial())
88
+
89
+ # Get the weight for all edges
90
+ edge_weight_dict: dict[Edge, float] = dict()
91
+ for e_index in g.edge_indices():
92
+ e = g.get_edge_data_by_index(e_index)
93
+ source_index = g.get_edge_endpoints_by_index(e_index)[0]
94
+ if isinstance(e, Edge):
95
+ edge_weight_dict[e] = (
96
+ self.relation_weight(e.relation)
97
+ * self.entry_weight(g.get_node_data(source_index).entry)
98
+ )
99
+ else:
100
+ edge_weight_dict[e] = 1
101
+
102
+ # Modify edges by 'score' parameter of EG-based procedural relations
103
+ if isinstance(e, Edge):
104
+ if isinstance(e.relation, ProceduralRelationEG):
105
+ edge_weight_dict[e] *= e.relation.score
106
+
107
+ for e_index in g.edge_indices():
108
+ e = g.get_edge_data_by_index(e_index)
109
+
110
+ # Produce the initial score (ranking all vertices, not normalized)
111
+ init_scoring = rustworkx.pagerank(
112
+ g,
113
+ alpha = self.alpha,
114
+ weight_fn = lambda e_: edge_weight_dict.get(e_, 1),
115
+ dangling = {source_vertex_index: 1}, # Terminal nodes should loop back to source
116
+ personalization = {source_vertex_index: 1} # Random traversals go back to source
117
+ )
118
+
119
+ # Use only eligible entries (filter by relevant entry type)
120
+ scoring: dict[Vertex, float] = dict()
121
+ for v_idx, score in init_scoring.items():
122
+ v = g.get_node_data(v_idx)
123
+ if isinstance(v, Vertex) and isinstance(v.entry, self.score_entry_type):
124
+ scoring[v] = score
125
+
126
+ # Apply decay based on the generation
127
+ if self.decay_factor != 0:
128
+ for v, score in scoring.items():
129
+ v: Vertex
130
+ scoring[v] = score * ((1 - self.decay_factor)**v.generation)
131
+
132
+ # Normalise s.t. sum(scoring.values()) == 1.0
133
+ score_sum = sum(s for _, s in scoring.items())
134
+ for v, score in scoring.items():
135
+ scoring[v] = score / score_sum
136
+
137
+ # Finalise
138
+ return OrderedDict(
139
+ (k.entry, v) for k, v in
140
+ sorted(scoring.items(), key = lambda pair: pair[1], reverse = True)
141
+ )
@@ -0,0 +1,26 @@
1
+ """
2
+ This module contains functions useful for querying the database.
3
+ These functions are also available in the `chemrecon` scope.
4
+ """
5
+ from chemrecon.query.find_entry import (
6
+ find_entry,
7
+ find_compound_entry, find_reaction_entry, find_enzyme_entry,
8
+ find_structure_representation_entry, find_aam_representation_entry,
9
+ find_structure_entry, find_aam_entry
10
+ )
11
+
12
+ # Relation getters
13
+ from chemrecon.query.get_relations import (
14
+ get_relations_from_entry,
15
+ get_all_relations,
16
+ )
17
+
18
+ # from chemrecon.query.create_entry import (
19
+ # entry,
20
+ # compound_entry, reaction_entry, enzyme_entry,
21
+ # aam_representation_entry, structure_representation_entry,
22
+ # structure_entry, aam_entry,
23
+ # enzyme_from_ec_number,
24
+ # entry_from_identifiers_org
25
+ # )
26
+
@@ -0,0 +1,86 @@
1
+ # """ Contains methods to create entries to look up in the databases.
2
+ # """
3
+ #
4
+ #
5
+ # from chemrecon.core.id_types import (
6
+ # IdentifierType, IdentifierTypeCompound, IdentifierTypeReaction, IdentifierTypeStructureRepresentation,
7
+ # IdentifierTypeEnzyme, E_EC, IdentifierTypeAAM
8
+ # )
9
+ # from chemrecon.schema import (
10
+ # Entry, Compound, Enzyme, Reaction, MolStructureRepr, MolStructure,
11
+ # AAMRepr, AAM
12
+ # )
13
+ #
14
+ #
15
+ # # Direct creation of prototype entries
16
+ # # ----------------------------------------------------------------------------------------------------------------------
17
+ # def entry(id_type: IdentifierType, source_id: str) -> Entry:
18
+ # """ Create a 'prototype' entry, which may or may not correspond to an actual entry in the database.
19
+ # """
20
+ # match id_type:
21
+ # case IdentifierTypeCompound():
22
+ # return compound_entry(id_type, source_id)
23
+ # case IdentifierTypeReaction():
24
+ # return reaction_entry(id_type, source_id)
25
+ # case IdentifierTypeEnzyme():
26
+ # return enzyme_entry(id_type, source_id)
27
+ # case IdentifierTypeStructureRepresentation():
28
+ # return structure_representation_entry(id_type, source_id)
29
+ # case IdentifierTypeAAM():
30
+ # return aam_representation_entry(id_type, source_id)
31
+ # case _:
32
+ # # Not implemented
33
+ # raise NotImplementedError()
34
+ #
35
+ # def compound_entry(id_type: IdentifierTypeCompound, source_id: str) -> Compound:
36
+ # """ TODO docs
37
+ # """
38
+ # return Compound(id_type = id_type, source_id = id_type.std_identifier(source_id))
39
+ #
40
+ # def reaction_entry(id_type: IdentifierTypeReaction, source_id: str) -> Reaction:
41
+ # """ TODO docs
42
+ # """
43
+ # return Reaction(id_type = id_type, source_id = id_type.std_identifier(source_id))
44
+ #
45
+ # def enzyme_entry(id_type: IdentifierTypeEnzyme, source_id: str) -> Enzyme:
46
+ # """ TODO docs
47
+ # """
48
+ # return Enzyme(id_type = id_type, source_id = id_type.std_identifier(source_id))
49
+ #
50
+ # def structure_representation_entry(id_type: IdentifierTypeStructureRepresentation, source_id: str) -> MolStructureRepr:
51
+ # """ TODO docs
52
+ # """
53
+ # return MolStructureRepr(id_type = id_type, source_id = id_type.std_identifier(source_id))
54
+ #
55
+ # def aam_representation_entry(id_type: IdentifierTypeAAM, source_id: str) -> AAMRepr:
56
+ # """ TODO docs
57
+ # """
58
+ # return AAMRepr(id_type = id_type, source_id = id_type.std_identifier(source_id))
59
+ #
60
+ # def structure_entry(smiles: str) -> MolStructure:
61
+ # """ TODO docs
62
+ # """
63
+ # return MolStructure(smiles = smiles)
64
+ #
65
+ # def aam_entry(reaction_smiles: str) -> AAM:
66
+ # """ TODO docs
67
+ # """
68
+ # return AAM(reaction_smiles = reaction_smiles)
69
+ #
70
+ #
71
+ # # Special for ec
72
+ # def enzyme_from_ec_number(ec_number: str) -> Enzyme:
73
+ # """ Create a prototype enzyme entry from an EC number.
74
+ # """
75
+ # return Enzyme(id_type = E_EC, source_id = E_EC.std_identifier(ec_number))
76
+ #
77
+ #
78
+ # # Creation from identifiers.org strings
79
+ # # ----------------------------------------------------------------------------------------------------------------------
80
+ # def entry_from_identifiers_org(identifiers_org_string: str) -> Entry:
81
+ # """ Create an entry from an identifiers.org string. If the string is not valid for any types in the database, raise
82
+ # ValueError.
83
+ # """
84
+ # # TODO
85
+ # # TODO raise ValueError if invalid string
86
+ # raise NotImplementedError()
@@ -0,0 +1,57 @@
1
+ """ ChemRecon comes with a set of pre-defined exploration protocols for various purposes.
2
+ These are located in the chemrecon.query.default_protocols module.
3
+ We recommend looking in this file for inspiration on how to define custom protocols.
4
+ """
5
+ from src.chemrecon import Direction
6
+ from chemrecon.entrygraph.filter import EntryFilterProcedure
7
+ from chemrecon.entrygraph.explorationprotocol import ExplorationProtocol
8
+ from chemrecon.schema.entry_types.compound import Compound
9
+ from chemrecon.schema.entry_types.molstructure_repr import MolStructureRepr
10
+ from chemrecon.schema.entry_types.molstructure import MolStructure
11
+ from chemrecon.schema.relation_types_composed.compound_has_molstructure_relation import CompoundHasMolStructure
12
+ from chemrecon.schema.relation_types_source.compound_reference_relation import CompoundReference
13
+ from chemrecon.schema.relation_types_source.molstructure_standardisation_relation import MolStructureStandardization
14
+
15
+ # TODO filters
16
+
17
+ # General
18
+ # ----------------------------------------------------------------------------------------------------------------------
19
+ #: The Compound-Structure protocol can be used to quickly gain an overview of the structural information
20
+ #: relating to a given compound.
21
+ #: The database compounds are traversed via the `CompoundReference` relation in order to expand the graph to include
22
+ #: other databases which contain the compound.
23
+ #: The `CompoundHasMolStructure` relation is then used to find the associated structure for each compound.
24
+ #: The `MolStructureStandardization` relation is used to standardize various properties of the structures, which can be
25
+ #: helpful in case the databases simply disagree on easy-to-standardize properties, such as charge or tautomerism.
26
+ protocol_compound_structure = ExplorationProtocol(
27
+ relation_types = {
28
+ (CompoundReference, Direction.SYMMETRIC),
29
+ (CompoundHasMolStructure, Direction.FORWARDS),
30
+ (MolStructureStandardization, Direction.FORWARDS),
31
+ }
32
+ )
33
+
34
+ # Search graphs - for selection rather than manual inspection
35
+ # ----------------------------------------------------------------------------------------------------------------------
36
+ protocol_select_structure = ExplorationProtocol(
37
+ relation_types = {
38
+ (CompoundReference, Direction.SYMMETRIC),
39
+ (CompoundHasMolStructure, Direction.FORWARDS),
40
+ (MolStructureStandardization, Direction.FORWARDS),
41
+ },
42
+ )
43
+
44
+ # TODO - FILTER - disallow implicit Structures!
45
+ def _select_structure_entry_filter(e: MolStructureRepr) -> bool:
46
+ if e.implicit:
47
+ return False
48
+ else:
49
+ return True
50
+
51
+ struct_repr_filter = EntryFilterProcedure[MolStructureRepr](
52
+ filter_proc = _select_structure_entry_filter
53
+ )
54
+
55
+
56
+ # TODO include some ontology relations, such as the old_id and new_id relations
57
+
@@ -0,0 +1,84 @@
1
+ """ Functions for finding entries given an index.
2
+ """
3
+ from typing import Optional
4
+
5
+ import chemrecon.connection as connection
6
+
7
+ from chemrecon import IdentifierType, Entry, IdentifierTypeCompound, IdentifierTypeReaction, IdentifierTypeEnzyme, \
8
+ IdentifierTypeStructureRepresentation, IdentifierTypeAAM
9
+ from chemrecon.schema.entry_types.aam import AAM
10
+ from chemrecon.schema.entry_types.aam_repr import AAMRepr
11
+ from chemrecon.schema.entry_types.compound import Compound
12
+ from chemrecon.schema.entry_types.enzyme import Enzyme
13
+ from chemrecon.schema.entry_types.reaction import Reaction
14
+ from chemrecon.schema.entry_types.molstructure_repr import MolStructureRepr
15
+ from chemrecon.schema.entry_types.molstructure import MolStructure
16
+
17
+
18
+ def find_entry(id_type: IdentifierType, source_id: str) -> Optional[Entry]:
19
+ """ Look for an entry with the specified type and id in the connected Database.
20
+ If not found, returns None.
21
+ """
22
+ match id_type:
23
+ case IdentifierTypeCompound():
24
+ return find_compound_entry(id_type, source_id)
25
+ case IdentifierTypeReaction():
26
+ return find_reaction_entry(id_type, source_id)
27
+ case IdentifierTypeEnzyme():
28
+ return find_enzyme_entry(id_type, source_id)
29
+ case IdentifierTypeStructureRepresentation():
30
+ return find_structure_representation_entry(id_type, source_id)
31
+ case IdentifierTypeAAM():
32
+ return find_aam_representation_entry(id_type, source_id)
33
+ case _:
34
+ # Not implemented
35
+ raise NotImplementedError()
36
+
37
+ def find_compound_entry(id_type: IdentifierTypeCompound, source_id: str) -> Optional[Compound]:
38
+ """ Look for an entry with the specified type and id in the connected Database.
39
+ If not found, returns None.
40
+ """
41
+ e = Compound(id_type = id_type.enum_type, source_id = id_type.std_identifier(source_id))
42
+ return connection.handler.get_entry_by_index(e)
43
+
44
+ def find_reaction_entry(id_type: IdentifierTypeReaction, source_id: str) -> Optional[Reaction]:
45
+ """ Look for an entry with the specified type and id in the connected Database.
46
+ If not found, returns None.
47
+ """
48
+ e = Reaction(id_type = id_type.enum_type, source_id = id_type.std_identifier(source_id))
49
+ return connection.handler.get_entry_by_index(e)
50
+
51
+ def find_enzyme_entry(id_type: IdentifierTypeEnzyme, source_id: str) -> Optional[Enzyme]:
52
+ """ Look for an entry with the specified type and id in the connected Database.
53
+ If not found, returns None.
54
+ """
55
+ e = Enzyme(id_type = id_type.enum_type, source_id = id_type.std_identifier(source_id))
56
+ return connection.handler.get_entry_by_index(e)
57
+
58
+ def find_structure_representation_entry(id_type: IdentifierTypeStructureRepresentation, source_id: str) -> Optional[MolStructureRepr]:
59
+ """ Look for an entry with the specified type and id in the connected Database.
60
+ If not found, returns None.
61
+ """
62
+ e = MolStructureRepr(id_type = id_type.enum_type, source_id = id_type.std_identifier(source_id), implicit = False)
63
+ return connection.handler.get_entry_by_index(e)
64
+
65
+ def find_aam_representation_entry(id_type: IdentifierTypeAAM, source_id: str) -> Optional[AAMRepr]:
66
+ """ Look for an entry with the specified type and id in the connected Database.
67
+ If not found, returns None.
68
+ """
69
+ e = AAMRepr(id_type = id_type.enum_type, source_id = id_type.std_identifier(source_id))
70
+ return connection.handler.get_entry_by_index(e)
71
+
72
+ def find_structure_entry(smiles: str) -> Optional[MolStructure]:
73
+ """ Look for an entry with the specified type and id in the connected Database.
74
+ If not found, returns None.
75
+ """
76
+ e = MolStructure(smiles = smiles)
77
+ return connection.handler.get_entry_by_index(e)
78
+
79
+ def find_aam_entry(reaction_smiles: str) -> Optional[AAM]:
80
+ """ Look for an entry with the specified type and id in the connected Database.
81
+ If not found, returns None.
82
+ """
83
+ e = AAM(reaction_smiles = reaction_smiles)
84
+ return connection.handler.get_entry_by_index(e)