chemrecon 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. chemrecon/__init__.py +73 -0
  2. chemrecon/chem/__init__.py +0 -0
  3. chemrecon/chem/chemreaction.py +223 -0
  4. chemrecon/chem/constant_compounds.py +3 -0
  5. chemrecon/chem/create_mol.py +91 -0
  6. chemrecon/chem/elements.py +141 -0
  7. chemrecon/chem/gml/__init__.py +0 -0
  8. chemrecon/chem/gml/gml.py +324 -0
  9. chemrecon/chem/gml/gml_reactant_matching.py +130 -0
  10. chemrecon/chem/gml/gml_to_rdk.py +217 -0
  11. chemrecon/chem/mol.py +483 -0
  12. chemrecon/chem/sumformula.py +120 -0
  13. chemrecon/connection.py +97 -0
  14. chemrecon/core/__init__.py +0 -0
  15. chemrecon/core/id_types.py +687 -0
  16. chemrecon/core/ontology.py +209 -0
  17. chemrecon/core/populate_query_handler.py +336 -0
  18. chemrecon/core/query_handler.py +587 -0
  19. chemrecon/database/__init__.py +1 -0
  20. chemrecon/database/connect.py +63 -0
  21. chemrecon/database/connection_params/chemrecon_pub.dbinfo +5 -0
  22. chemrecon/database/connection_params/local_docker_dev.dbinfo +5 -0
  23. chemrecon/database/connection_params/local_docker_init.dbinfo +5 -0
  24. chemrecon/database/connection_params/local_docker_pub.dbinfo +5 -0
  25. chemrecon/database/params.py +88 -0
  26. chemrecon/entrygraph/draw.py +119 -0
  27. chemrecon/entrygraph/entrygraph.py +301 -0
  28. chemrecon/entrygraph/explorationprotocol.py +199 -0
  29. chemrecon/entrygraph/explore.py +421 -0
  30. chemrecon/entrygraph/explore_procedure.py +183 -0
  31. chemrecon/entrygraph/filter.py +88 -0
  32. chemrecon/entrygraph/scoring.py +141 -0
  33. chemrecon/query/__init__.py +26 -0
  34. chemrecon/query/create_entry.py +86 -0
  35. chemrecon/query/default_protocols.py +57 -0
  36. chemrecon/query/find_entry.py +84 -0
  37. chemrecon/query/get_relations.py +143 -0
  38. chemrecon/query/get_structures_from_compound.py +65 -0
  39. chemrecon/schema/__init__.py +86 -0
  40. chemrecon/schema/db_object.py +363 -0
  41. chemrecon/schema/direction.py +10 -0
  42. chemrecon/schema/entry_types/__init__.py +0 -0
  43. chemrecon/schema/entry_types/aam.py +34 -0
  44. chemrecon/schema/entry_types/aam_repr.py +37 -0
  45. chemrecon/schema/entry_types/compound.py +52 -0
  46. chemrecon/schema/entry_types/enzyme.py +49 -0
  47. chemrecon/schema/entry_types/molstructure.py +64 -0
  48. chemrecon/schema/entry_types/molstructure_repr.py +41 -0
  49. chemrecon/schema/entry_types/reaction.py +57 -0
  50. chemrecon/schema/enums.py +154 -0
  51. chemrecon/schema/procedural_relation_entrygraph.py +66 -0
  52. chemrecon/schema/relation_types_composed/__init__.py +0 -0
  53. chemrecon/schema/relation_types_composed/compound_has_molstructure_relation.py +59 -0
  54. chemrecon/schema/relation_types_composed/reaction_has_aam_relation.py +50 -0
  55. chemrecon/schema/relation_types_procedural/__init__.py +0 -0
  56. chemrecon/schema/relation_types_procedural/aam_convert_relation.py +69 -0
  57. chemrecon/schema/relation_types_procedural/compound_select_structure_proceduralrelation.py +36 -0
  58. chemrecon/schema/relation_types_procedural/compound_similarlity_proceduralrelation.py +1 -0
  59. chemrecon/schema/relation_types_procedural/molstructure_convert_relation.py +49 -0
  60. chemrecon/schema/relation_types_procedural/reaction_select_aam_proceduralrelation.py +38 -0
  61. chemrecon/schema/relation_types_procedural/reaction_similarity_proceduralrelation.py +1 -0
  62. chemrecon/schema/relation_types_source/__init__.py +0 -0
  63. chemrecon/schema/relation_types_source/aam_involves_molstructure_relation.py +77 -0
  64. chemrecon/schema/relation_types_source/aam_repr_involves_molstructure_repr_relation.py +79 -0
  65. chemrecon/schema/relation_types_source/compound_has_structure_representation_relation.py +33 -0
  66. chemrecon/schema/relation_types_source/compound_reference_relation.py +34 -0
  67. chemrecon/schema/relation_types_source/molstructure_standardisation_relation.py +71 -0
  68. chemrecon/schema/relation_types_source/ontology/__init__.py +0 -0
  69. chemrecon/schema/relation_types_source/ontology/compound_ontology.py +369 -0
  70. chemrecon/schema/relation_types_source/ontology/enzyme_ontology.py +142 -0
  71. chemrecon/schema/relation_types_source/ontology/reaction_ontology.py +140 -0
  72. chemrecon/schema/relation_types_source/reaction_has_aam_representation_relation.py +34 -0
  73. chemrecon/schema/relation_types_source/reaction_has_enzyme_relation.py +71 -0
  74. chemrecon/schema/relation_types_source/reaction_involves_compound_relation.py +69 -0
  75. chemrecon/schema/relation_types_source/reaction_reference_relation.py +33 -0
  76. chemrecon/scripts/initialize_database.py +494 -0
  77. chemrecon/utils/copy_signature.py +10 -0
  78. chemrecon/utils/encodeable_list.py +11 -0
  79. chemrecon/utils/get_id_type.py +70 -0
  80. chemrecon/utils/hungarian.py +31 -0
  81. chemrecon/utils/reactant_matching.py +168 -0
  82. chemrecon/utils/rxnutils.py +44 -0
  83. chemrecon/utils/set_cwd.py +12 -0
  84. chemrecon-0.1.1.dist-info/METADATA +143 -0
  85. chemrecon-0.1.1.dist-info/RECORD +86 -0
  86. chemrecon-0.1.1.dist-info/WHEEL +4 -0
@@ -0,0 +1,143 @@
1
+ """ Contains functions to find relations (including procedural) related to an entry.
2
+ """
3
+ from typing import DefaultDict
4
+
5
+ import chemrecon.schema
6
+
7
+ from chemrecon import Entry, Relation, ProceduralRelation, ComposedRelation
8
+ from chemrecon.schema.direction import Direction
9
+ from chemrecon.entrygraph.entrygraph import EntryGraph, Edge
10
+ from chemrecon.entrygraph.explore import explore
11
+ from chemrecon.schema.procedural_relation_entrygraph import ProceduralRelationEG
12
+ from chemrecon.entrygraph.explorationprotocol import ExplorationProtocol
13
+
14
+ import chemrecon.connection as globals
15
+
16
+ # Relation lists for each entry type
17
+ # ----------------------------------------------------------------------------------------------------------------------
18
+ _entrytype_relation_types: dict[
19
+ type[Entry],
20
+ list[tuple[type[Relation], Direction, ExplorationProtocol]]
21
+ ] = DefaultDict(list)
22
+
23
+ for reltype in chemrecon.schema.relationtypes:
24
+ if reltype.symmetric:
25
+ # Symmetric
26
+ protocol_spec = ExplorationProtocol(
27
+ relation_types = {(reltype, Direction.SYMMETRIC)}
28
+ )
29
+ _entrytype_relation_types[reltype.source_entrytype].append(
30
+ (reltype, Direction.SYMMETRIC, protocol_spec)
31
+ )
32
+ else:
33
+ # Not symmetric
34
+ protocol_spec_forward = ExplorationProtocol(
35
+ relation_types = {(reltype, Direction.FORWARDS)},
36
+ )
37
+ _entrytype_relation_types[reltype.source_entrytype].append(
38
+ (reltype, Direction.FORWARDS, protocol_spec_forward)
39
+ )
40
+ if not issubclass(reltype, ProceduralRelation):
41
+ # Add backwards explorer only if not procedural
42
+ protocol_spec_backward = ExplorationProtocol(
43
+ relation_types = {(reltype, Direction.BACKWARDS)},
44
+ )
45
+ _entrytype_relation_types[reltype.target_entrytype].append(
46
+ (reltype, Direction.BACKWARDS, protocol_spec_backward)
47
+ )
48
+
49
+ # Getter functions
50
+ # ----------------------------------------------------------------------------------------------------------------------
51
+ def get_relations_from_entry[R: Relation](
52
+ entry: Entry,
53
+ relation_type: type[R],
54
+ get_backward_relations: bool = True
55
+ ) -> list[tuple[R, Entry]]:
56
+ """ Get relations of a given type with the specified entry as the source.
57
+ """
58
+ # TODO support for get_backward_relations !
59
+ # TODO entrygraph-based method will save procedural relations to the DB!
60
+ # TODO better generics for this!
61
+
62
+ if entry.recon_id is None:
63
+ # Get recon_id first
64
+ index_result = globals.handler.get_entry_by_index(entry)
65
+ if not index_result:
66
+ raise ValueError(f'Given entry: {entry} not found in database.')
67
+
68
+ res = globals.handler.get_relations_with_entries_by_recon_ids(
69
+ entry_type = type(entry),
70
+ recon_ids = [entry.recon_id],
71
+ relation_type = relation_type
72
+ )
73
+ out_list: list[tuple[Relation, Entry]] = list()
74
+ for target, relations in res[0].items():
75
+ for rel in relations:
76
+ out_list.append((rel, target))
77
+ return out_list
78
+
79
+ def get_all_relations(
80
+ entry: Entry,
81
+ get_backward_relations: bool = True,
82
+ get_transitive_relations: bool = False,
83
+ get_procedural_relations: bool = False,
84
+ get_procedural_eg_relations: bool = False
85
+ ) -> list[tuple[Relation, Entry]]:
86
+ """ Get all relations of a given entry.
87
+ Note: Procedural relations cannot be explored backwards.
88
+ """
89
+ # TODO change to entrygraph-based method to save procedural relations to the DB!
90
+ # TODO better generics for this!
91
+
92
+ if entry.recon_id is None:
93
+ # Get recon_id first
94
+ index_result = globals.handler.get_entry_by_index(entry)
95
+ if not index_result:
96
+ raise ValueError(f'Given entry: {entry} not found in database.')
97
+
98
+ output_list: list[tuple[Relation, Entry]]
99
+
100
+ # Create EG spec
101
+ entrytypes: set[type[Entry]] = set()
102
+ relation_types: set[tuple[type[Relation], Direction]] = set()
103
+ for r_type, direction, _ in _entrytype_relation_types[type(entry)]:
104
+
105
+ # Skip if not in defined relation types
106
+ if issubclass(r_type, ComposedRelation) and not get_transitive_relations:
107
+ continue
108
+ if issubclass(r_type, ProceduralRelation):
109
+ if not get_procedural_eg_relations: continue
110
+ if issubclass(r_type, ProceduralRelationEG):
111
+ if not get_procedural_eg_relations:
112
+ continue
113
+ if (not get_backward_relations) and (type(entry) is not r_type.source_entrytype):
114
+ continue
115
+
116
+ # Add to specification
117
+ entrytypes.add(r_type.source_entrytype)
118
+ entrytypes.add(r_type.target_entrytype)
119
+
120
+ if r_type.symmetric:
121
+ relation_types.add((r_type, Direction.SYMMETRIC))
122
+ else:
123
+ if r_type.source_entrytype == type(entry):
124
+ relation_types.add((r_type, Direction.FORWARDS))
125
+ if get_backward_relations and r_type.target_entrytype == type(entry):
126
+ relation_types.add((r_type, Direction.BACKWARDS))
127
+
128
+ protocol_all = ExplorationProtocol(
129
+ relation_types = relation_types,
130
+ )
131
+
132
+ # Create an EntryGraph and explore to depth 1
133
+ eg = EntryGraph(
134
+ initial_entries = {entry}
135
+ )
136
+ explore(eg, protocol = protocol_all, steps = 1)
137
+
138
+ # Get out-edges of a node (vertex with index 0 is the starting node)
139
+ res = eg.get_out_edges_of_vertex(0) # list of (Edge, Vertex)
140
+ return [
141
+ (edge.relation, vertex.entry)
142
+ for edge, vertex in res if isinstance(edge, Edge) # Filter out artificial edges
143
+ ]
@@ -0,0 +1,65 @@
1
+ from collections import OrderedDict
2
+
3
+ from src.chemrecon import Direction
4
+ from chem.mol import MolTemplate, Mol
5
+ from chem.create_mol import mol_from_struct_entry
6
+ from chemrecon.entrygraph.explore import explore
7
+ from chemrecon.entrygraph.scoring import Scorer
8
+ from chemrecon.schema import (
9
+ Compound, MolStructure
10
+ )
11
+ from chemrecon.schema.relation_types_procedural.compound_select_structure_proceduralrelation import \
12
+ CompoundSelectStructure
13
+ from chemrecon.entrygraph.explorationprotocol import ExplorationProtocol
14
+
15
+
16
+ def get_structures_from_compound(
17
+ entries: Compound | list[Compound],
18
+ consider_first_entry_primary: bool = False,
19
+ ) -> OrderedDict[MolTemplate, float]:
20
+
21
+ # TODO update with new protocol syntax
22
+ raise NotImplementedError()
23
+
24
+ entry_set: set[Compound] = set()
25
+ match entries:
26
+ case Compound():
27
+ entry_set = {entries}
28
+ case list():
29
+ entry_set = set(entries)
30
+ case _:
31
+ raise ValueError()
32
+
33
+ # Construct entrygraph and run ranking # TODO set first entry as primary if specified!
34
+ try:
35
+ eg = EG_Structure(
36
+ initial_entries = entry_set
37
+ )
38
+ except ValueError as e:
39
+ return OrderedDict()
40
+ explore(entrygraph = eg, steps = 2)
41
+
42
+ # Score
43
+ scoring = scorer_default(eg)
44
+ scoring_out: OrderedDict[Mol, float] = OrderedDict()
45
+ for k, v in scoring.items():
46
+ try:
47
+ scoring_out[mol_from_struct_entry(k)] = v
48
+ except AttributeError:
49
+ continue
50
+
51
+ return scoring_out
52
+
53
+
54
+ # Specification
55
+ EG_Structure = ExplorationProtocol(
56
+ entry_types = {Compound, MolStructure},
57
+ relation_types = {
58
+ (CompoundSelectStructure, Direction.FORWARDS)
59
+ },
60
+ entry_types_initial = {Compound}
61
+ )
62
+
63
+ scorer_default = Scorer[MolStructure](
64
+ score_entry_type = MolStructure
65
+ )
@@ -0,0 +1,86 @@
1
+ """ Re-exports all concrete schema objects
2
+ """
3
+ from __future__ import annotations
4
+
5
+ from chemrecon.schema.db_object import (Entry, SourceEntry, Relation, DatabaseObject, Column,
6
+ ProceduralRelation, ProceduralGeneratorError, ComposedRelation)
7
+
8
+ from chemrecon.schema.enums import *
9
+
10
+ # Re-export entry types
11
+ from chemrecon.schema.entry_types.aam import AAM
12
+ from chemrecon.schema.entry_types.aam_repr import AAMRepr
13
+ from chemrecon.schema.entry_types.compound import Compound
14
+ from chemrecon.schema.entry_types.enzyme import Enzyme
15
+ from chemrecon.schema.entry_types.reaction import Reaction
16
+ from chemrecon.schema.entry_types.molstructure_repr import MolStructureRepr
17
+ from chemrecon.schema.entry_types.molstructure import MolStructure
18
+
19
+ # Re-export relation types (with corresponding inverses, defined in the same file as the main)
20
+ from chemrecon.schema.relation_types_source.aam_involves_molstructure_relation import *
21
+ from chemrecon.schema.relation_types_source.aam_repr_involves_molstructure_repr_relation import *
22
+ from chemrecon.schema.relation_types_procedural.aam_convert_relation import *
23
+ from chemrecon.schema.relation_types_composed.compound_has_molstructure_relation import *
24
+ from chemrecon.schema.relation_types_source.compound_has_structure_representation_relation import *
25
+ from chemrecon.schema.relation_types_source.compound_reference_relation import *
26
+ from chemrecon.schema.relation_types_composed.reaction_has_aam_relation import *
27
+ from chemrecon.schema.relation_types_source.reaction_has_aam_representation_relation import *
28
+ from chemrecon.schema.relation_types_source.reaction_has_enzyme_relation import *
29
+ from chemrecon.schema.relation_types_source.reaction_involves_compound_relation import *
30
+ from chemrecon.schema.relation_types_source.reaction_reference_relation import *
31
+ from chemrecon.schema.relation_types_procedural.molstructure_convert_relation import *
32
+ from chemrecon.schema.relation_types_source.molstructure_standardisation_relation import *
33
+
34
+ # Ontology
35
+ from chemrecon.schema.relation_types_source.ontology.compound_ontology import *
36
+ from chemrecon.schema.relation_types_source.ontology.reaction_ontology import *
37
+ from chemrecon.schema.relation_types_source.ontology.enzyme_ontology import *
38
+
39
+ # Procedural relation types, import only the relation, not the protocol
40
+ from chemrecon.schema.relation_types_procedural.compound_select_structure_proceduralrelation import (
41
+ CompoundSelectStructure
42
+ )
43
+ from chemrecon.schema.relation_types_procedural.reaction_select_aam_proceduralrelation import (
44
+ ReactionSelectAAM
45
+ )
46
+
47
+ # Export lists
48
+ entrytypes: list[type[Entry]] = [
49
+ Compound, MolStructureRepr, MolStructure, Reaction, Enzyme,
50
+ AAMRepr, AAM
51
+ ]
52
+ relationtypes: list[type[Relation]] = [
53
+ # Main source relations
54
+ CompoundReference,
55
+ CompoundHasStructureRepresentation, CompoundHasMolStructure,
56
+ MolStructureConvert, MolStructureStandardization,
57
+ ReactionReference,
58
+ ReactionInvolvesCompound, CompoundParticipatesInReaction,
59
+ ReactionHasEnzyme,
60
+ ReactionHasAAMRepr,
61
+ ReactionHasAAM,
62
+ AAMReprInvolvesMolStructureRepr,
63
+ AAMConvert,
64
+ AAMInvolvesMolStructure,
65
+
66
+ # Ontology, Compound
67
+ CompoundIsA, CompoundHasInstance,
68
+ CompoundHasNewID, CompoundHasOldID,
69
+ CompoundHasPart, CompoundIsPartOf,
70
+ CompoundHasConjugateAcid, CompoundHasConjugateBase,
71
+ CompoundHasTautomer,
72
+ CompoundHasStereoIsomer,
73
+ CompoundHasIsotopologue,
74
+
75
+ # Ontology, Reaction
76
+ ReactionIsA, ReactionHasInstance,
77
+ ReactionHasNewID, ReactionHasOldID,
78
+
79
+ # Ontology, Enzyme
80
+ EnzymeIsA, EnzymeHasInstance,
81
+ EnzymeHasNewID, EnzymeHasOldID,
82
+
83
+ # EntryGraph Procedural
84
+ CompoundSelectStructure,
85
+ ReactionSelectAAM,
86
+ ]
@@ -0,0 +1,363 @@
1
+ from __future__ import annotations
2
+
3
+ from abc import ABC, abstractmethod
4
+ from enum import Enum
5
+ from typing import Any, ClassVar, Optional, OrderedDict
6
+
7
+ from chemrecon.core.id_types import IdentifierType
8
+ from chemrecon.schema.enums import Quality, SourceDatabase
9
+
10
+
11
+ class DatabaseObject(ABC):
12
+ """ Most generic database object, covers both entries and relations.
13
+ """
14
+ entrytype_name: ClassVar[str]
15
+ _table_name: ClassVar[str]
16
+
17
+ @classmethod
18
+ def get_table_name(cls) -> str:
19
+ return cls._table_name
20
+
21
+
22
+ class Column:
23
+ """ Wrapper for database columns.
24
+ """
25
+ name: str
26
+ col_type: type
27
+ serial: bool # If numeric, whether serial
28
+ index_hash: bool # Whether this should be hashed if used in an index.
29
+
30
+ def __init__(self, name: str, col_type: type, serial: bool = False, index_hash: bool = False):
31
+ self.name = name
32
+ self.col_type = col_type
33
+ self.serial = serial
34
+ self.index_hash = index_hash
35
+
36
+ def __repr__(self):
37
+ return f'col: {self.name}'
38
+
39
+
40
+ # Entry abstract base class
41
+ # --------------------------------------------------------------------------------------------------------------
42
+ class Entry(DatabaseObject, ABC):
43
+ """ Generic base class for entries.
44
+ """
45
+ # Attributes
46
+
47
+ #: Internal identifier in the *ChemRecon* database.
48
+ #: Normally, entries have a nonnegative `recon_id`, unique to the table.
49
+ #: A negative `recon_id` indicates that the object is 'virtual', that is, it was created by a procedural relation,
50
+ #: and does not exist in the database.
51
+ #: A `recon_id` of `None` indicates that the entry is not stored in the database.
52
+ recon_id: Optional[int]
53
+
54
+ # Database
55
+ _columns: ClassVar[list[Column]] # List of columns. The zeroth (recon_id) will be considered the PK.
56
+ _index: ClassVar[list[int]] # List of the columns on which to create a (possibly hashed) index.
57
+
58
+ # For visualization
59
+ _draw_colour: ClassVar[str]
60
+
61
+ def get_columns_with_values(self, include_recon_id = True) -> dict[Column, Any]:
62
+ """ Get the columns of this entry with values. """
63
+ return OrderedDict(
64
+ (c, self.__getattribute__(c.name)) for c in self._columns if
65
+ (c is not col_recon_id) or include_recon_id
66
+ )
67
+
68
+ def get_index_columns_with_values(self) -> dict[Column, Any]:
69
+ """ Get the index (primary key) columns of this entry with values. """
70
+ return OrderedDict(
71
+ (c, self.__getattribute__(c.name)) for c in self.get_index_columns()
72
+ )
73
+
74
+ # Methods for database interaction
75
+ @classmethod
76
+ def get_columns(cls, include_recon_id = True) -> list[Column]:
77
+ if include_recon_id:
78
+ return cls._columns
79
+ else:
80
+ return cls._columns[1:] # Assuming recon_id is columns[0]
81
+
82
+ @classmethod
83
+ def get_index_indices(cls) -> list[int]:
84
+ return cls._index
85
+
86
+ @classmethod
87
+ def get_index_columns(cls) -> list[Column]:
88
+ return [cls._columns[i] for i in cls._index]
89
+
90
+ # Misc
91
+ def __init__(self, recon_id: Optional[int] = None):
92
+ super().__init__()
93
+ self.recon_id = recon_id
94
+
95
+ def __repr__(self):
96
+ attr_cols: str = ', '.join(
97
+ f'{c.name}: {v}'
98
+ for c, v in self.get_columns_with_values(include_recon_id = False).items()
99
+ if (v is not None) and (v != [])
100
+ )
101
+ return f'<{self._table_name} {self.recon_id or '-'}: {attr_cols}>'
102
+
103
+ # Comparison and identity
104
+ def __eq__(self, other: Entry):
105
+ if type(self) is not type(other):
106
+ return False
107
+ elif self.recon_id is not None and other.recon_id is not None:
108
+ # Compare recon_id if applicable
109
+ return self.recon_id == other.recon_id
110
+ else:
111
+ # Else, compare by index columns
112
+ return all(
113
+ self.__getattribute__(col.name) == other.__getattribute__(col.name)
114
+ for col in self.get_index_columns()
115
+ )
116
+
117
+ def __hash__(self):
118
+ return tuple(self.__getattribute__(col.name) for col in self.get_index_columns()).__hash__()
119
+
120
+ def __lt__(self, other: Entry):
121
+ # Compare by reconid, otherwise by index
122
+ if (self.recon_id is not None) and (other.recon_id is not None):
123
+ return self.recon_id.__lt__(other.recon_id)
124
+ else:
125
+ # Compare by index columns
126
+ return tuple(self.get_columns_with_values().values()) < tuple(
127
+ other.get_columns_with_values().values())
128
+
129
+ @classmethod
130
+ def get_supertype_of_id_types(cls) -> Optional[type[IdentifierType]]:
131
+ for c in cls.get_columns():
132
+ if c.name == 'id_type':
133
+ if not issubclass(c.col_type, IdentifierType):
134
+ raise ValueError('Error in identifier type assignment of column.')
135
+ return c.col_type
136
+ return None
137
+
138
+ # Visualisation
139
+ @abstractmethod
140
+ def _vis_str(self) -> str:
141
+ """ Get the string of the primary information for visualization.
142
+ """
143
+ pass
144
+
145
+ def _vis_attrs(self) -> dict[str, str]:
146
+ """ Define extra attributes used in visualization.
147
+ """
148
+ return {
149
+ 'fillcolor': f'"#{self._draw_colour}"'
150
+ }
151
+
152
+ # Encoding (for memoization/caching)
153
+ def encode(self):
154
+ return str(self.recon_id).encode()
155
+
156
+ def serialise(self, index_only: bool = False) -> dict:
157
+ d = dict()
158
+ if index_only:
159
+ for col, val in self.get_index_columns_with_values().items():
160
+ d[col.name] = serialise_col(col, val)
161
+ else:
162
+ for col, val in self.get_columns_with_values().items():
163
+ d[col.name] = serialise_col(col, val)
164
+ return d
165
+
166
+
167
+ # Entry as a database stand-in
168
+ class SourceEntry(Entry, ABC):
169
+ """ An entry which stands for a database entry in one of the source databases.
170
+ """
171
+ source_id: str
172
+ id_type: Enum
173
+
174
+ def _vis_str(self) -> str:
175
+ return f'{self.id_type.name}: {self.source_id}'
176
+
177
+
178
+ # Relation
179
+ # --------------------------------------------------------------------------------------------------------------
180
+ class Relation[T1, T2](DatabaseObject, ABC):
181
+ """ Generic base class for relations between entries.
182
+ """
183
+ # Attributes
184
+ recon_id_1: Optional[int] #: Recon ID of source
185
+ recon_id_2: Optional[int] #: Recon ID of target
186
+ entry_1: Optional[T1] # Entries on either end. Used on prototypes, and can also be fetched via the db.
187
+ entry_2: Optional[T2]
188
+
189
+ # Database
190
+ ignore_generation_limit: ClassVar[bool] = False # If true, will continue exploring after generation limit
191
+ symmetric: ClassVar[bool] # If symmetric, will always have recon_id_1 <= recon_id_2
192
+
193
+ # Set if this is the main table of another inverse relation
194
+ has_inverse: ClassVar[Optional[type[Relation]]] #: :meta private:
195
+
196
+ _attribute_columns: ClassVar[list[Column]]
197
+ _index: ClassVar[list[int]] # List of col index in attr list on which to create an index.
198
+
199
+ source_entrytype: ClassVar[type[Entry]] #: :meta private:
200
+ target_entrytype: ClassVar[type[Entry]] #: :meta private:
201
+
202
+ # Methods for database interaction
203
+ @classmethod
204
+ def get_attribute_columns(cls) -> list[Column]:
205
+ # Note: does not include source/target columns
206
+ return cls._attribute_columns
207
+
208
+ @classmethod
209
+ def get_columns(cls, include_recon_ids: bool = True) -> list[Column]:
210
+ if include_recon_ids:
211
+ return [col_recon_id_1, col_recon_id_2, *cls._attribute_columns]
212
+ else:
213
+ return cls._attribute_columns
214
+
215
+ @classmethod
216
+ def get_index_indices(cls) -> list[int]:
217
+ return cls._index
218
+
219
+ @classmethod
220
+ def get_index_columns(cls) -> list[Column]:
221
+ return [col_recon_id_1, col_recon_id_2, *[
222
+ cls._attribute_columns[i]
223
+ for i in cls._index
224
+ ]]
225
+
226
+ def get_columns_with_values(self, include_recon_ids: bool = True) -> dict[Column, Any]:
227
+ """ Get the columns of this entry with values. """
228
+ return OrderedDict(
229
+ (c, self.__getattribute__(c.name)) for c in self.get_columns(include_recon_ids)
230
+ )
231
+
232
+ def get_index_columns_with_values(self) -> dict[Column, Any]:
233
+ """ Get the index (primary key) columns of this entry with values.
234
+ This always includes `recon_id_1` and `recon_id_2`, and may include attribute columns.
235
+ """
236
+ return OrderedDict(
237
+ (c, self.__getattribute__(c.name)) for c in self.get_index_columns()
238
+ )
239
+
240
+ @classmethod
241
+ def get_entry_table_names(cls) -> tuple[str, str]:
242
+ return cls.source_entrytype.get_table_name(), cls.target_entrytype.get_table_name()
243
+
244
+ # Misc
245
+ def __init__(
246
+ self,
247
+ recon_id_1: Optional[int] = None,
248
+ recon_id_2: Optional[int] = None,
249
+ ):
250
+ super().__init__()
251
+ self.recon_id_1, self.recon_id_2 = recon_id_1, recon_id_2
252
+
253
+ def __repr__(self):
254
+ arrow = '<->' if self.symmetric else '->'
255
+ attr_cols: str = ', '.join(
256
+ f'{c.name}: {v}'
257
+ for c, v in self.get_columns_with_values(include_recon_ids = False).items()
258
+ )
259
+ return f'<{self._table_name} {self.recon_id_1} {arrow} {self.recon_id_2}] {attr_cols}>'
260
+
261
+ # Visualisation
262
+ def _vis_attrs(self) -> dict[str, str]:
263
+ """ Define extra attributes used in visualization.
264
+ """
265
+ return dict()
266
+
267
+ def _vis_str(self) -> str:
268
+ """ Define extra attributes
269
+ """
270
+ return ''
271
+
272
+ # Inverse relations
273
+ class InverseRelation[T1: Entry, T2: Entry](Relation[T1, T2], ABC):
274
+ """ Represents the inverse of another (main) relation.
275
+ The main relation should have the has_inverse tag set.
276
+ Exists in the database only as views of the main relation.
277
+ """
278
+ inverse_main_relation: ClassVar[type[Relation]] #: :meta private:
279
+
280
+ # Check that attribute columns are the same as for the main relation
281
+
282
+
283
+ # Procedural Relations
284
+ class ProceduralRelation[T1: Entry, T2: Entry](Relation[T1, T2], ABC):
285
+ """ Procedural relations are not stored in the database but computed at runtime.
286
+ Other than this, they have the same interface as normal relations.
287
+ Some can be computed only "one-way" (e.g. a compound can be standardized, but not un-standardized)
288
+ """
289
+
290
+ @classmethod
291
+ @abstractmethod
292
+ def generate(
293
+ cls,
294
+ take_entry: T1,
295
+ ) -> list[tuple[ProceduralRelation[T1, T2], T2]]:
296
+ """ Given a T1, generate relations from that.
297
+ """
298
+ raise NotImplementedError()
299
+
300
+
301
+ class ProceduralGeneratorError(Exception):
302
+ """ Should be thrown by generator methods.
303
+ """
304
+ pass
305
+
306
+
307
+ # Composed Relations
308
+ class ComposedRelation[T1, T2, T_intermediate](Relation[T1, T2], ABC):
309
+ # Composed of two other relations
310
+ rel_type_1: ClassVar[type[Relation]] #: :meta private:
311
+ rel_type_2: ClassVar[type[Relation]] #: :meta private:
312
+ intermediate_entrytype: ClassVar[type[Entry]] #: :meta private:
313
+
314
+ # Init based on the given relations and intermediate
315
+ @abstractmethod
316
+ def __init__(
317
+ self,
318
+ rel_1: Relation[T1, T_intermediate],
319
+ rel_2: Relation[T_intermediate, T2],
320
+ intermediate: T_intermediate,
321
+ recon_id_1: Optional[int] = None,
322
+ recon_id_2: Optional[int] = None,
323
+ ):
324
+ """ Init for composed relations should be overriden to set attributes of the composed relation based on the
325
+ attributes of relations and entries used to create it.
326
+ """
327
+ super().__init__(recon_id_1, recon_id_2)
328
+
329
+ # Filter functions on the composed edges, which determine whether to create the procedural relations
330
+ @classmethod
331
+ def filter_rel_1(cls, r: Relation[T1, T_intermediate]) -> bool:
332
+ return True
333
+
334
+ @classmethod
335
+ def filter_intermediate(cls, e: T_intermediate) -> bool:
336
+ return True
337
+
338
+ @classmethod
339
+ def filter_rel_2(cls, r: Relation[T_intermediate, T2]) -> bool:
340
+ return True
341
+
342
+
343
+ # Predefined columns
344
+ # ----------------------------------------------------------------------------------------------------------------------
345
+ col_recon_id = Column('recon_id', int, serial = True)
346
+ col_source_id = Column('source_id', str)
347
+ col_source_id_hashed = Column('source_id', str, index_hash = True)
348
+ col_name = Column('name', str)
349
+ col_src = Column('src', SourceDatabase)
350
+ col_quality = Column('quality', Quality)
351
+ col_properties = Column('properties', list[str])
352
+ col_recon_id_2 = Column('recon_id_2', int)
353
+ col_recon_id_1 = Column('recon_id_1', int)
354
+ col_score = Column('score', float)
355
+
356
+
357
+ # Util
358
+ # ----------------------------------------------------------------------------------------------------------------------
359
+ def serialise_col(col: Column, value: Any) -> Any:
360
+ if isinstance(value, Enum):
361
+ return value.name
362
+ else:
363
+ return value
@@ -0,0 +1,10 @@
1
+ from __future__ import annotations
2
+
3
+ from enum import Enum
4
+
5
+
6
+ class Direction(Enum):
7
+ FORWARDS = 1 #: From source to target.
8
+ BACKWARDS = 2 #: From target to source.
9
+ BOTH = 3 #: Both of the above.
10
+ SYMMETRIC = 4 #: To be used for symmetric relations.
File without changes