chemrecon 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chemrecon/__init__.py +73 -0
- chemrecon/chem/__init__.py +0 -0
- chemrecon/chem/chemreaction.py +223 -0
- chemrecon/chem/constant_compounds.py +3 -0
- chemrecon/chem/create_mol.py +91 -0
- chemrecon/chem/elements.py +141 -0
- chemrecon/chem/gml/__init__.py +0 -0
- chemrecon/chem/gml/gml.py +324 -0
- chemrecon/chem/gml/gml_reactant_matching.py +130 -0
- chemrecon/chem/gml/gml_to_rdk.py +217 -0
- chemrecon/chem/mol.py +483 -0
- chemrecon/chem/sumformula.py +120 -0
- chemrecon/connection.py +97 -0
- chemrecon/core/__init__.py +0 -0
- chemrecon/core/id_types.py +687 -0
- chemrecon/core/ontology.py +209 -0
- chemrecon/core/populate_query_handler.py +336 -0
- chemrecon/core/query_handler.py +587 -0
- chemrecon/database/__init__.py +1 -0
- chemrecon/database/connect.py +63 -0
- chemrecon/database/connection_params/chemrecon_pub.dbinfo +5 -0
- chemrecon/database/connection_params/local_docker_dev.dbinfo +5 -0
- chemrecon/database/connection_params/local_docker_init.dbinfo +5 -0
- chemrecon/database/connection_params/local_docker_pub.dbinfo +5 -0
- chemrecon/database/params.py +88 -0
- chemrecon/entrygraph/draw.py +119 -0
- chemrecon/entrygraph/entrygraph.py +301 -0
- chemrecon/entrygraph/explorationprotocol.py +199 -0
- chemrecon/entrygraph/explore.py +421 -0
- chemrecon/entrygraph/explore_procedure.py +183 -0
- chemrecon/entrygraph/filter.py +88 -0
- chemrecon/entrygraph/scoring.py +141 -0
- chemrecon/query/__init__.py +26 -0
- chemrecon/query/create_entry.py +86 -0
- chemrecon/query/default_protocols.py +57 -0
- chemrecon/query/find_entry.py +84 -0
- chemrecon/query/get_relations.py +143 -0
- chemrecon/query/get_structures_from_compound.py +65 -0
- chemrecon/schema/__init__.py +86 -0
- chemrecon/schema/db_object.py +363 -0
- chemrecon/schema/direction.py +10 -0
- chemrecon/schema/entry_types/__init__.py +0 -0
- chemrecon/schema/entry_types/aam.py +34 -0
- chemrecon/schema/entry_types/aam_repr.py +37 -0
- chemrecon/schema/entry_types/compound.py +52 -0
- chemrecon/schema/entry_types/enzyme.py +49 -0
- chemrecon/schema/entry_types/molstructure.py +64 -0
- chemrecon/schema/entry_types/molstructure_repr.py +41 -0
- chemrecon/schema/entry_types/reaction.py +57 -0
- chemrecon/schema/enums.py +154 -0
- chemrecon/schema/procedural_relation_entrygraph.py +66 -0
- chemrecon/schema/relation_types_composed/__init__.py +0 -0
- chemrecon/schema/relation_types_composed/compound_has_molstructure_relation.py +59 -0
- chemrecon/schema/relation_types_composed/reaction_has_aam_relation.py +50 -0
- chemrecon/schema/relation_types_procedural/__init__.py +0 -0
- chemrecon/schema/relation_types_procedural/aam_convert_relation.py +69 -0
- chemrecon/schema/relation_types_procedural/compound_select_structure_proceduralrelation.py +36 -0
- chemrecon/schema/relation_types_procedural/compound_similarlity_proceduralrelation.py +1 -0
- chemrecon/schema/relation_types_procedural/molstructure_convert_relation.py +49 -0
- chemrecon/schema/relation_types_procedural/reaction_select_aam_proceduralrelation.py +38 -0
- chemrecon/schema/relation_types_procedural/reaction_similarity_proceduralrelation.py +1 -0
- chemrecon/schema/relation_types_source/__init__.py +0 -0
- chemrecon/schema/relation_types_source/aam_involves_molstructure_relation.py +77 -0
- chemrecon/schema/relation_types_source/aam_repr_involves_molstructure_repr_relation.py +79 -0
- chemrecon/schema/relation_types_source/compound_has_structure_representation_relation.py +33 -0
- chemrecon/schema/relation_types_source/compound_reference_relation.py +34 -0
- chemrecon/schema/relation_types_source/molstructure_standardisation_relation.py +71 -0
- chemrecon/schema/relation_types_source/ontology/__init__.py +0 -0
- chemrecon/schema/relation_types_source/ontology/compound_ontology.py +369 -0
- chemrecon/schema/relation_types_source/ontology/enzyme_ontology.py +142 -0
- chemrecon/schema/relation_types_source/ontology/reaction_ontology.py +140 -0
- chemrecon/schema/relation_types_source/reaction_has_aam_representation_relation.py +34 -0
- chemrecon/schema/relation_types_source/reaction_has_enzyme_relation.py +71 -0
- chemrecon/schema/relation_types_source/reaction_involves_compound_relation.py +69 -0
- chemrecon/schema/relation_types_source/reaction_reference_relation.py +33 -0
- chemrecon/scripts/initialize_database.py +494 -0
- chemrecon/utils/copy_signature.py +10 -0
- chemrecon/utils/encodeable_list.py +11 -0
- chemrecon/utils/get_id_type.py +70 -0
- chemrecon/utils/hungarian.py +31 -0
- chemrecon/utils/reactant_matching.py +168 -0
- chemrecon/utils/rxnutils.py +44 -0
- chemrecon/utils/set_cwd.py +12 -0
- chemrecon-0.1.1.dist-info/METADATA +143 -0
- chemrecon-0.1.1.dist-info/RECORD +86 -0
- chemrecon-0.1.1.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
""" Contains functions to find relations (including procedural) related to an entry.
|
|
2
|
+
"""
|
|
3
|
+
from typing import DefaultDict
|
|
4
|
+
|
|
5
|
+
import chemrecon.schema
|
|
6
|
+
|
|
7
|
+
from chemrecon import Entry, Relation, ProceduralRelation, ComposedRelation
|
|
8
|
+
from chemrecon.schema.direction import Direction
|
|
9
|
+
from chemrecon.entrygraph.entrygraph import EntryGraph, Edge
|
|
10
|
+
from chemrecon.entrygraph.explore import explore
|
|
11
|
+
from chemrecon.schema.procedural_relation_entrygraph import ProceduralRelationEG
|
|
12
|
+
from chemrecon.entrygraph.explorationprotocol import ExplorationProtocol
|
|
13
|
+
|
|
14
|
+
import chemrecon.connection as globals
|
|
15
|
+
|
|
16
|
+
# Relation lists for each entry type
|
|
17
|
+
# ----------------------------------------------------------------------------------------------------------------------
|
|
18
|
+
_entrytype_relation_types: dict[
|
|
19
|
+
type[Entry],
|
|
20
|
+
list[tuple[type[Relation], Direction, ExplorationProtocol]]
|
|
21
|
+
] = DefaultDict(list)
|
|
22
|
+
|
|
23
|
+
for reltype in chemrecon.schema.relationtypes:
|
|
24
|
+
if reltype.symmetric:
|
|
25
|
+
# Symmetric
|
|
26
|
+
protocol_spec = ExplorationProtocol(
|
|
27
|
+
relation_types = {(reltype, Direction.SYMMETRIC)}
|
|
28
|
+
)
|
|
29
|
+
_entrytype_relation_types[reltype.source_entrytype].append(
|
|
30
|
+
(reltype, Direction.SYMMETRIC, protocol_spec)
|
|
31
|
+
)
|
|
32
|
+
else:
|
|
33
|
+
# Not symmetric
|
|
34
|
+
protocol_spec_forward = ExplorationProtocol(
|
|
35
|
+
relation_types = {(reltype, Direction.FORWARDS)},
|
|
36
|
+
)
|
|
37
|
+
_entrytype_relation_types[reltype.source_entrytype].append(
|
|
38
|
+
(reltype, Direction.FORWARDS, protocol_spec_forward)
|
|
39
|
+
)
|
|
40
|
+
if not issubclass(reltype, ProceduralRelation):
|
|
41
|
+
# Add backwards explorer only if not procedural
|
|
42
|
+
protocol_spec_backward = ExplorationProtocol(
|
|
43
|
+
relation_types = {(reltype, Direction.BACKWARDS)},
|
|
44
|
+
)
|
|
45
|
+
_entrytype_relation_types[reltype.target_entrytype].append(
|
|
46
|
+
(reltype, Direction.BACKWARDS, protocol_spec_backward)
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
# Getter functions
|
|
50
|
+
# ----------------------------------------------------------------------------------------------------------------------
|
|
51
|
+
def get_relations_from_entry[R: Relation](
|
|
52
|
+
entry: Entry,
|
|
53
|
+
relation_type: type[R],
|
|
54
|
+
get_backward_relations: bool = True
|
|
55
|
+
) -> list[tuple[R, Entry]]:
|
|
56
|
+
""" Get relations of a given type with the specified entry as the source.
|
|
57
|
+
"""
|
|
58
|
+
# TODO support for get_backward_relations !
|
|
59
|
+
# TODO entrygraph-based method will save procedural relations to the DB!
|
|
60
|
+
# TODO better generics for this!
|
|
61
|
+
|
|
62
|
+
if entry.recon_id is None:
|
|
63
|
+
# Get recon_id first
|
|
64
|
+
index_result = globals.handler.get_entry_by_index(entry)
|
|
65
|
+
if not index_result:
|
|
66
|
+
raise ValueError(f'Given entry: {entry} not found in database.')
|
|
67
|
+
|
|
68
|
+
res = globals.handler.get_relations_with_entries_by_recon_ids(
|
|
69
|
+
entry_type = type(entry),
|
|
70
|
+
recon_ids = [entry.recon_id],
|
|
71
|
+
relation_type = relation_type
|
|
72
|
+
)
|
|
73
|
+
out_list: list[tuple[Relation, Entry]] = list()
|
|
74
|
+
for target, relations in res[0].items():
|
|
75
|
+
for rel in relations:
|
|
76
|
+
out_list.append((rel, target))
|
|
77
|
+
return out_list
|
|
78
|
+
|
|
79
|
+
def get_all_relations(
|
|
80
|
+
entry: Entry,
|
|
81
|
+
get_backward_relations: bool = True,
|
|
82
|
+
get_transitive_relations: bool = False,
|
|
83
|
+
get_procedural_relations: bool = False,
|
|
84
|
+
get_procedural_eg_relations: bool = False
|
|
85
|
+
) -> list[tuple[Relation, Entry]]:
|
|
86
|
+
""" Get all relations of a given entry.
|
|
87
|
+
Note: Procedural relations cannot be explored backwards.
|
|
88
|
+
"""
|
|
89
|
+
# TODO change to entrygraph-based method to save procedural relations to the DB!
|
|
90
|
+
# TODO better generics for this!
|
|
91
|
+
|
|
92
|
+
if entry.recon_id is None:
|
|
93
|
+
# Get recon_id first
|
|
94
|
+
index_result = globals.handler.get_entry_by_index(entry)
|
|
95
|
+
if not index_result:
|
|
96
|
+
raise ValueError(f'Given entry: {entry} not found in database.')
|
|
97
|
+
|
|
98
|
+
output_list: list[tuple[Relation, Entry]]
|
|
99
|
+
|
|
100
|
+
# Create EG spec
|
|
101
|
+
entrytypes: set[type[Entry]] = set()
|
|
102
|
+
relation_types: set[tuple[type[Relation], Direction]] = set()
|
|
103
|
+
for r_type, direction, _ in _entrytype_relation_types[type(entry)]:
|
|
104
|
+
|
|
105
|
+
# Skip if not in defined relation types
|
|
106
|
+
if issubclass(r_type, ComposedRelation) and not get_transitive_relations:
|
|
107
|
+
continue
|
|
108
|
+
if issubclass(r_type, ProceduralRelation):
|
|
109
|
+
if not get_procedural_eg_relations: continue
|
|
110
|
+
if issubclass(r_type, ProceduralRelationEG):
|
|
111
|
+
if not get_procedural_eg_relations:
|
|
112
|
+
continue
|
|
113
|
+
if (not get_backward_relations) and (type(entry) is not r_type.source_entrytype):
|
|
114
|
+
continue
|
|
115
|
+
|
|
116
|
+
# Add to specification
|
|
117
|
+
entrytypes.add(r_type.source_entrytype)
|
|
118
|
+
entrytypes.add(r_type.target_entrytype)
|
|
119
|
+
|
|
120
|
+
if r_type.symmetric:
|
|
121
|
+
relation_types.add((r_type, Direction.SYMMETRIC))
|
|
122
|
+
else:
|
|
123
|
+
if r_type.source_entrytype == type(entry):
|
|
124
|
+
relation_types.add((r_type, Direction.FORWARDS))
|
|
125
|
+
if get_backward_relations and r_type.target_entrytype == type(entry):
|
|
126
|
+
relation_types.add((r_type, Direction.BACKWARDS))
|
|
127
|
+
|
|
128
|
+
protocol_all = ExplorationProtocol(
|
|
129
|
+
relation_types = relation_types,
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
# Create an EntryGraph and explore to depth 1
|
|
133
|
+
eg = EntryGraph(
|
|
134
|
+
initial_entries = {entry}
|
|
135
|
+
)
|
|
136
|
+
explore(eg, protocol = protocol_all, steps = 1)
|
|
137
|
+
|
|
138
|
+
# Get out-edges of a node (vertex with index 0 is the starting node)
|
|
139
|
+
res = eg.get_out_edges_of_vertex(0) # list of (Edge, Vertex)
|
|
140
|
+
return [
|
|
141
|
+
(edge.relation, vertex.entry)
|
|
142
|
+
for edge, vertex in res if isinstance(edge, Edge) # Filter out artificial edges
|
|
143
|
+
]
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
from collections import OrderedDict
|
|
2
|
+
|
|
3
|
+
from src.chemrecon import Direction
|
|
4
|
+
from chem.mol import MolTemplate, Mol
|
|
5
|
+
from chem.create_mol import mol_from_struct_entry
|
|
6
|
+
from chemrecon.entrygraph.explore import explore
|
|
7
|
+
from chemrecon.entrygraph.scoring import Scorer
|
|
8
|
+
from chemrecon.schema import (
|
|
9
|
+
Compound, MolStructure
|
|
10
|
+
)
|
|
11
|
+
from chemrecon.schema.relation_types_procedural.compound_select_structure_proceduralrelation import \
|
|
12
|
+
CompoundSelectStructure
|
|
13
|
+
from chemrecon.entrygraph.explorationprotocol import ExplorationProtocol
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def get_structures_from_compound(
|
|
17
|
+
entries: Compound | list[Compound],
|
|
18
|
+
consider_first_entry_primary: bool = False,
|
|
19
|
+
) -> OrderedDict[MolTemplate, float]:
|
|
20
|
+
|
|
21
|
+
# TODO update with new protocol syntax
|
|
22
|
+
raise NotImplementedError()
|
|
23
|
+
|
|
24
|
+
entry_set: set[Compound] = set()
|
|
25
|
+
match entries:
|
|
26
|
+
case Compound():
|
|
27
|
+
entry_set = {entries}
|
|
28
|
+
case list():
|
|
29
|
+
entry_set = set(entries)
|
|
30
|
+
case _:
|
|
31
|
+
raise ValueError()
|
|
32
|
+
|
|
33
|
+
# Construct entrygraph and run ranking # TODO set first entry as primary if specified!
|
|
34
|
+
try:
|
|
35
|
+
eg = EG_Structure(
|
|
36
|
+
initial_entries = entry_set
|
|
37
|
+
)
|
|
38
|
+
except ValueError as e:
|
|
39
|
+
return OrderedDict()
|
|
40
|
+
explore(entrygraph = eg, steps = 2)
|
|
41
|
+
|
|
42
|
+
# Score
|
|
43
|
+
scoring = scorer_default(eg)
|
|
44
|
+
scoring_out: OrderedDict[Mol, float] = OrderedDict()
|
|
45
|
+
for k, v in scoring.items():
|
|
46
|
+
try:
|
|
47
|
+
scoring_out[mol_from_struct_entry(k)] = v
|
|
48
|
+
except AttributeError:
|
|
49
|
+
continue
|
|
50
|
+
|
|
51
|
+
return scoring_out
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
# Specification
|
|
55
|
+
EG_Structure = ExplorationProtocol(
|
|
56
|
+
entry_types = {Compound, MolStructure},
|
|
57
|
+
relation_types = {
|
|
58
|
+
(CompoundSelectStructure, Direction.FORWARDS)
|
|
59
|
+
},
|
|
60
|
+
entry_types_initial = {Compound}
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
scorer_default = Scorer[MolStructure](
|
|
64
|
+
score_entry_type = MolStructure
|
|
65
|
+
)
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
""" Re-exports all concrete schema objects
|
|
2
|
+
"""
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from chemrecon.schema.db_object import (Entry, SourceEntry, Relation, DatabaseObject, Column,
|
|
6
|
+
ProceduralRelation, ProceduralGeneratorError, ComposedRelation)
|
|
7
|
+
|
|
8
|
+
from chemrecon.schema.enums import *
|
|
9
|
+
|
|
10
|
+
# Re-export entry types
|
|
11
|
+
from chemrecon.schema.entry_types.aam import AAM
|
|
12
|
+
from chemrecon.schema.entry_types.aam_repr import AAMRepr
|
|
13
|
+
from chemrecon.schema.entry_types.compound import Compound
|
|
14
|
+
from chemrecon.schema.entry_types.enzyme import Enzyme
|
|
15
|
+
from chemrecon.schema.entry_types.reaction import Reaction
|
|
16
|
+
from chemrecon.schema.entry_types.molstructure_repr import MolStructureRepr
|
|
17
|
+
from chemrecon.schema.entry_types.molstructure import MolStructure
|
|
18
|
+
|
|
19
|
+
# Re-export relation types (with corresponding inverses, defined in the same file as the main)
|
|
20
|
+
from chemrecon.schema.relation_types_source.aam_involves_molstructure_relation import *
|
|
21
|
+
from chemrecon.schema.relation_types_source.aam_repr_involves_molstructure_repr_relation import *
|
|
22
|
+
from chemrecon.schema.relation_types_procedural.aam_convert_relation import *
|
|
23
|
+
from chemrecon.schema.relation_types_composed.compound_has_molstructure_relation import *
|
|
24
|
+
from chemrecon.schema.relation_types_source.compound_has_structure_representation_relation import *
|
|
25
|
+
from chemrecon.schema.relation_types_source.compound_reference_relation import *
|
|
26
|
+
from chemrecon.schema.relation_types_composed.reaction_has_aam_relation import *
|
|
27
|
+
from chemrecon.schema.relation_types_source.reaction_has_aam_representation_relation import *
|
|
28
|
+
from chemrecon.schema.relation_types_source.reaction_has_enzyme_relation import *
|
|
29
|
+
from chemrecon.schema.relation_types_source.reaction_involves_compound_relation import *
|
|
30
|
+
from chemrecon.schema.relation_types_source.reaction_reference_relation import *
|
|
31
|
+
from chemrecon.schema.relation_types_procedural.molstructure_convert_relation import *
|
|
32
|
+
from chemrecon.schema.relation_types_source.molstructure_standardisation_relation import *
|
|
33
|
+
|
|
34
|
+
# Ontology
|
|
35
|
+
from chemrecon.schema.relation_types_source.ontology.compound_ontology import *
|
|
36
|
+
from chemrecon.schema.relation_types_source.ontology.reaction_ontology import *
|
|
37
|
+
from chemrecon.schema.relation_types_source.ontology.enzyme_ontology import *
|
|
38
|
+
|
|
39
|
+
# Procedural relation types, import only the relation, not the protocol
|
|
40
|
+
from chemrecon.schema.relation_types_procedural.compound_select_structure_proceduralrelation import (
|
|
41
|
+
CompoundSelectStructure
|
|
42
|
+
)
|
|
43
|
+
from chemrecon.schema.relation_types_procedural.reaction_select_aam_proceduralrelation import (
|
|
44
|
+
ReactionSelectAAM
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
# Export lists
|
|
48
|
+
entrytypes: list[type[Entry]] = [
|
|
49
|
+
Compound, MolStructureRepr, MolStructure, Reaction, Enzyme,
|
|
50
|
+
AAMRepr, AAM
|
|
51
|
+
]
|
|
52
|
+
relationtypes: list[type[Relation]] = [
|
|
53
|
+
# Main source relations
|
|
54
|
+
CompoundReference,
|
|
55
|
+
CompoundHasStructureRepresentation, CompoundHasMolStructure,
|
|
56
|
+
MolStructureConvert, MolStructureStandardization,
|
|
57
|
+
ReactionReference,
|
|
58
|
+
ReactionInvolvesCompound, CompoundParticipatesInReaction,
|
|
59
|
+
ReactionHasEnzyme,
|
|
60
|
+
ReactionHasAAMRepr,
|
|
61
|
+
ReactionHasAAM,
|
|
62
|
+
AAMReprInvolvesMolStructureRepr,
|
|
63
|
+
AAMConvert,
|
|
64
|
+
AAMInvolvesMolStructure,
|
|
65
|
+
|
|
66
|
+
# Ontology, Compound
|
|
67
|
+
CompoundIsA, CompoundHasInstance,
|
|
68
|
+
CompoundHasNewID, CompoundHasOldID,
|
|
69
|
+
CompoundHasPart, CompoundIsPartOf,
|
|
70
|
+
CompoundHasConjugateAcid, CompoundHasConjugateBase,
|
|
71
|
+
CompoundHasTautomer,
|
|
72
|
+
CompoundHasStereoIsomer,
|
|
73
|
+
CompoundHasIsotopologue,
|
|
74
|
+
|
|
75
|
+
# Ontology, Reaction
|
|
76
|
+
ReactionIsA, ReactionHasInstance,
|
|
77
|
+
ReactionHasNewID, ReactionHasOldID,
|
|
78
|
+
|
|
79
|
+
# Ontology, Enzyme
|
|
80
|
+
EnzymeIsA, EnzymeHasInstance,
|
|
81
|
+
EnzymeHasNewID, EnzymeHasOldID,
|
|
82
|
+
|
|
83
|
+
# EntryGraph Procedural
|
|
84
|
+
CompoundSelectStructure,
|
|
85
|
+
ReactionSelectAAM,
|
|
86
|
+
]
|
|
@@ -0,0 +1,363 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from typing import Any, ClassVar, Optional, OrderedDict
|
|
6
|
+
|
|
7
|
+
from chemrecon.core.id_types import IdentifierType
|
|
8
|
+
from chemrecon.schema.enums import Quality, SourceDatabase
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class DatabaseObject(ABC):
|
|
12
|
+
""" Most generic database object, covers both entries and relations.
|
|
13
|
+
"""
|
|
14
|
+
entrytype_name: ClassVar[str]
|
|
15
|
+
_table_name: ClassVar[str]
|
|
16
|
+
|
|
17
|
+
@classmethod
|
|
18
|
+
def get_table_name(cls) -> str:
|
|
19
|
+
return cls._table_name
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class Column:
|
|
23
|
+
""" Wrapper for database columns.
|
|
24
|
+
"""
|
|
25
|
+
name: str
|
|
26
|
+
col_type: type
|
|
27
|
+
serial: bool # If numeric, whether serial
|
|
28
|
+
index_hash: bool # Whether this should be hashed if used in an index.
|
|
29
|
+
|
|
30
|
+
def __init__(self, name: str, col_type: type, serial: bool = False, index_hash: bool = False):
|
|
31
|
+
self.name = name
|
|
32
|
+
self.col_type = col_type
|
|
33
|
+
self.serial = serial
|
|
34
|
+
self.index_hash = index_hash
|
|
35
|
+
|
|
36
|
+
def __repr__(self):
|
|
37
|
+
return f'col: {self.name}'
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# Entry abstract base class
|
|
41
|
+
# --------------------------------------------------------------------------------------------------------------
|
|
42
|
+
class Entry(DatabaseObject, ABC):
|
|
43
|
+
""" Generic base class for entries.
|
|
44
|
+
"""
|
|
45
|
+
# Attributes
|
|
46
|
+
|
|
47
|
+
#: Internal identifier in the *ChemRecon* database.
|
|
48
|
+
#: Normally, entries have a nonnegative `recon_id`, unique to the table.
|
|
49
|
+
#: A negative `recon_id` indicates that the object is 'virtual', that is, it was created by a procedural relation,
|
|
50
|
+
#: and does not exist in the database.
|
|
51
|
+
#: A `recon_id` of `None` indicates that the entry is not stored in the database.
|
|
52
|
+
recon_id: Optional[int]
|
|
53
|
+
|
|
54
|
+
# Database
|
|
55
|
+
_columns: ClassVar[list[Column]] # List of columns. The zeroth (recon_id) will be considered the PK.
|
|
56
|
+
_index: ClassVar[list[int]] # List of the columns on which to create a (possibly hashed) index.
|
|
57
|
+
|
|
58
|
+
# For visualization
|
|
59
|
+
_draw_colour: ClassVar[str]
|
|
60
|
+
|
|
61
|
+
def get_columns_with_values(self, include_recon_id = True) -> dict[Column, Any]:
|
|
62
|
+
""" Get the columns of this entry with values. """
|
|
63
|
+
return OrderedDict(
|
|
64
|
+
(c, self.__getattribute__(c.name)) for c in self._columns if
|
|
65
|
+
(c is not col_recon_id) or include_recon_id
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
def get_index_columns_with_values(self) -> dict[Column, Any]:
|
|
69
|
+
""" Get the index (primary key) columns of this entry with values. """
|
|
70
|
+
return OrderedDict(
|
|
71
|
+
(c, self.__getattribute__(c.name)) for c in self.get_index_columns()
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
# Methods for database interaction
|
|
75
|
+
@classmethod
|
|
76
|
+
def get_columns(cls, include_recon_id = True) -> list[Column]:
|
|
77
|
+
if include_recon_id:
|
|
78
|
+
return cls._columns
|
|
79
|
+
else:
|
|
80
|
+
return cls._columns[1:] # Assuming recon_id is columns[0]
|
|
81
|
+
|
|
82
|
+
@classmethod
|
|
83
|
+
def get_index_indices(cls) -> list[int]:
|
|
84
|
+
return cls._index
|
|
85
|
+
|
|
86
|
+
@classmethod
|
|
87
|
+
def get_index_columns(cls) -> list[Column]:
|
|
88
|
+
return [cls._columns[i] for i in cls._index]
|
|
89
|
+
|
|
90
|
+
# Misc
|
|
91
|
+
def __init__(self, recon_id: Optional[int] = None):
|
|
92
|
+
super().__init__()
|
|
93
|
+
self.recon_id = recon_id
|
|
94
|
+
|
|
95
|
+
def __repr__(self):
|
|
96
|
+
attr_cols: str = ', '.join(
|
|
97
|
+
f'{c.name}: {v}'
|
|
98
|
+
for c, v in self.get_columns_with_values(include_recon_id = False).items()
|
|
99
|
+
if (v is not None) and (v != [])
|
|
100
|
+
)
|
|
101
|
+
return f'<{self._table_name} {self.recon_id or '-'}: {attr_cols}>'
|
|
102
|
+
|
|
103
|
+
# Comparison and identity
|
|
104
|
+
def __eq__(self, other: Entry):
|
|
105
|
+
if type(self) is not type(other):
|
|
106
|
+
return False
|
|
107
|
+
elif self.recon_id is not None and other.recon_id is not None:
|
|
108
|
+
# Compare recon_id if applicable
|
|
109
|
+
return self.recon_id == other.recon_id
|
|
110
|
+
else:
|
|
111
|
+
# Else, compare by index columns
|
|
112
|
+
return all(
|
|
113
|
+
self.__getattribute__(col.name) == other.__getattribute__(col.name)
|
|
114
|
+
for col in self.get_index_columns()
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
def __hash__(self):
|
|
118
|
+
return tuple(self.__getattribute__(col.name) for col in self.get_index_columns()).__hash__()
|
|
119
|
+
|
|
120
|
+
def __lt__(self, other: Entry):
|
|
121
|
+
# Compare by reconid, otherwise by index
|
|
122
|
+
if (self.recon_id is not None) and (other.recon_id is not None):
|
|
123
|
+
return self.recon_id.__lt__(other.recon_id)
|
|
124
|
+
else:
|
|
125
|
+
# Compare by index columns
|
|
126
|
+
return tuple(self.get_columns_with_values().values()) < tuple(
|
|
127
|
+
other.get_columns_with_values().values())
|
|
128
|
+
|
|
129
|
+
@classmethod
|
|
130
|
+
def get_supertype_of_id_types(cls) -> Optional[type[IdentifierType]]:
|
|
131
|
+
for c in cls.get_columns():
|
|
132
|
+
if c.name == 'id_type':
|
|
133
|
+
if not issubclass(c.col_type, IdentifierType):
|
|
134
|
+
raise ValueError('Error in identifier type assignment of column.')
|
|
135
|
+
return c.col_type
|
|
136
|
+
return None
|
|
137
|
+
|
|
138
|
+
# Visualisation
|
|
139
|
+
@abstractmethod
|
|
140
|
+
def _vis_str(self) -> str:
|
|
141
|
+
""" Get the string of the primary information for visualization.
|
|
142
|
+
"""
|
|
143
|
+
pass
|
|
144
|
+
|
|
145
|
+
def _vis_attrs(self) -> dict[str, str]:
|
|
146
|
+
""" Define extra attributes used in visualization.
|
|
147
|
+
"""
|
|
148
|
+
return {
|
|
149
|
+
'fillcolor': f'"#{self._draw_colour}"'
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
# Encoding (for memoization/caching)
|
|
153
|
+
def encode(self):
|
|
154
|
+
return str(self.recon_id).encode()
|
|
155
|
+
|
|
156
|
+
def serialise(self, index_only: bool = False) -> dict:
|
|
157
|
+
d = dict()
|
|
158
|
+
if index_only:
|
|
159
|
+
for col, val in self.get_index_columns_with_values().items():
|
|
160
|
+
d[col.name] = serialise_col(col, val)
|
|
161
|
+
else:
|
|
162
|
+
for col, val in self.get_columns_with_values().items():
|
|
163
|
+
d[col.name] = serialise_col(col, val)
|
|
164
|
+
return d
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
# Entry as a database stand-in
|
|
168
|
+
class SourceEntry(Entry, ABC):
|
|
169
|
+
""" An entry which stands for a database entry in one of the source databases.
|
|
170
|
+
"""
|
|
171
|
+
source_id: str
|
|
172
|
+
id_type: Enum
|
|
173
|
+
|
|
174
|
+
def _vis_str(self) -> str:
|
|
175
|
+
return f'{self.id_type.name}: {self.source_id}'
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
# Relation
|
|
179
|
+
# --------------------------------------------------------------------------------------------------------------
|
|
180
|
+
class Relation[T1, T2](DatabaseObject, ABC):
|
|
181
|
+
""" Generic base class for relations between entries.
|
|
182
|
+
"""
|
|
183
|
+
# Attributes
|
|
184
|
+
recon_id_1: Optional[int] #: Recon ID of source
|
|
185
|
+
recon_id_2: Optional[int] #: Recon ID of target
|
|
186
|
+
entry_1: Optional[T1] # Entries on either end. Used on prototypes, and can also be fetched via the db.
|
|
187
|
+
entry_2: Optional[T2]
|
|
188
|
+
|
|
189
|
+
# Database
|
|
190
|
+
ignore_generation_limit: ClassVar[bool] = False # If true, will continue exploring after generation limit
|
|
191
|
+
symmetric: ClassVar[bool] # If symmetric, will always have recon_id_1 <= recon_id_2
|
|
192
|
+
|
|
193
|
+
# Set if this is the main table of another inverse relation
|
|
194
|
+
has_inverse: ClassVar[Optional[type[Relation]]] #: :meta private:
|
|
195
|
+
|
|
196
|
+
_attribute_columns: ClassVar[list[Column]]
|
|
197
|
+
_index: ClassVar[list[int]] # List of col index in attr list on which to create an index.
|
|
198
|
+
|
|
199
|
+
source_entrytype: ClassVar[type[Entry]] #: :meta private:
|
|
200
|
+
target_entrytype: ClassVar[type[Entry]] #: :meta private:
|
|
201
|
+
|
|
202
|
+
# Methods for database interaction
|
|
203
|
+
@classmethod
|
|
204
|
+
def get_attribute_columns(cls) -> list[Column]:
|
|
205
|
+
# Note: does not include source/target columns
|
|
206
|
+
return cls._attribute_columns
|
|
207
|
+
|
|
208
|
+
@classmethod
|
|
209
|
+
def get_columns(cls, include_recon_ids: bool = True) -> list[Column]:
|
|
210
|
+
if include_recon_ids:
|
|
211
|
+
return [col_recon_id_1, col_recon_id_2, *cls._attribute_columns]
|
|
212
|
+
else:
|
|
213
|
+
return cls._attribute_columns
|
|
214
|
+
|
|
215
|
+
@classmethod
|
|
216
|
+
def get_index_indices(cls) -> list[int]:
|
|
217
|
+
return cls._index
|
|
218
|
+
|
|
219
|
+
@classmethod
|
|
220
|
+
def get_index_columns(cls) -> list[Column]:
|
|
221
|
+
return [col_recon_id_1, col_recon_id_2, *[
|
|
222
|
+
cls._attribute_columns[i]
|
|
223
|
+
for i in cls._index
|
|
224
|
+
]]
|
|
225
|
+
|
|
226
|
+
def get_columns_with_values(self, include_recon_ids: bool = True) -> dict[Column, Any]:
|
|
227
|
+
""" Get the columns of this entry with values. """
|
|
228
|
+
return OrderedDict(
|
|
229
|
+
(c, self.__getattribute__(c.name)) for c in self.get_columns(include_recon_ids)
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
def get_index_columns_with_values(self) -> dict[Column, Any]:
|
|
233
|
+
""" Get the index (primary key) columns of this entry with values.
|
|
234
|
+
This always includes `recon_id_1` and `recon_id_2`, and may include attribute columns.
|
|
235
|
+
"""
|
|
236
|
+
return OrderedDict(
|
|
237
|
+
(c, self.__getattribute__(c.name)) for c in self.get_index_columns()
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
@classmethod
|
|
241
|
+
def get_entry_table_names(cls) -> tuple[str, str]:
|
|
242
|
+
return cls.source_entrytype.get_table_name(), cls.target_entrytype.get_table_name()
|
|
243
|
+
|
|
244
|
+
# Misc
|
|
245
|
+
def __init__(
|
|
246
|
+
self,
|
|
247
|
+
recon_id_1: Optional[int] = None,
|
|
248
|
+
recon_id_2: Optional[int] = None,
|
|
249
|
+
):
|
|
250
|
+
super().__init__()
|
|
251
|
+
self.recon_id_1, self.recon_id_2 = recon_id_1, recon_id_2
|
|
252
|
+
|
|
253
|
+
def __repr__(self):
|
|
254
|
+
arrow = '<->' if self.symmetric else '->'
|
|
255
|
+
attr_cols: str = ', '.join(
|
|
256
|
+
f'{c.name}: {v}'
|
|
257
|
+
for c, v in self.get_columns_with_values(include_recon_ids = False).items()
|
|
258
|
+
)
|
|
259
|
+
return f'<{self._table_name} {self.recon_id_1} {arrow} {self.recon_id_2}] {attr_cols}>'
|
|
260
|
+
|
|
261
|
+
# Visualisation
|
|
262
|
+
def _vis_attrs(self) -> dict[str, str]:
|
|
263
|
+
""" Define extra attributes used in visualization.
|
|
264
|
+
"""
|
|
265
|
+
return dict()
|
|
266
|
+
|
|
267
|
+
def _vis_str(self) -> str:
|
|
268
|
+
""" Define extra attributes
|
|
269
|
+
"""
|
|
270
|
+
return ''
|
|
271
|
+
|
|
272
|
+
# Inverse relations
|
|
273
|
+
class InverseRelation[T1: Entry, T2: Entry](Relation[T1, T2], ABC):
|
|
274
|
+
""" Represents the inverse of another (main) relation.
|
|
275
|
+
The main relation should have the has_inverse tag set.
|
|
276
|
+
Exists in the database only as views of the main relation.
|
|
277
|
+
"""
|
|
278
|
+
inverse_main_relation: ClassVar[type[Relation]] #: :meta private:
|
|
279
|
+
|
|
280
|
+
# Check that attribute columns are the same as for the main relation
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
# Procedural Relations
|
|
284
|
+
class ProceduralRelation[T1: Entry, T2: Entry](Relation[T1, T2], ABC):
|
|
285
|
+
""" Procedural relations are not stored in the database but computed at runtime.
|
|
286
|
+
Other than this, they have the same interface as normal relations.
|
|
287
|
+
Some can be computed only "one-way" (e.g. a compound can be standardized, but not un-standardized)
|
|
288
|
+
"""
|
|
289
|
+
|
|
290
|
+
@classmethod
|
|
291
|
+
@abstractmethod
|
|
292
|
+
def generate(
|
|
293
|
+
cls,
|
|
294
|
+
take_entry: T1,
|
|
295
|
+
) -> list[tuple[ProceduralRelation[T1, T2], T2]]:
|
|
296
|
+
""" Given a T1, generate relations from that.
|
|
297
|
+
"""
|
|
298
|
+
raise NotImplementedError()
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
class ProceduralGeneratorError(Exception):
|
|
302
|
+
""" Should be thrown by generator methods.
|
|
303
|
+
"""
|
|
304
|
+
pass
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
# Composed Relations
|
|
308
|
+
class ComposedRelation[T1, T2, T_intermediate](Relation[T1, T2], ABC):
|
|
309
|
+
# Composed of two other relations
|
|
310
|
+
rel_type_1: ClassVar[type[Relation]] #: :meta private:
|
|
311
|
+
rel_type_2: ClassVar[type[Relation]] #: :meta private:
|
|
312
|
+
intermediate_entrytype: ClassVar[type[Entry]] #: :meta private:
|
|
313
|
+
|
|
314
|
+
# Init based on the given relations and intermediate
|
|
315
|
+
@abstractmethod
|
|
316
|
+
def __init__(
|
|
317
|
+
self,
|
|
318
|
+
rel_1: Relation[T1, T_intermediate],
|
|
319
|
+
rel_2: Relation[T_intermediate, T2],
|
|
320
|
+
intermediate: T_intermediate,
|
|
321
|
+
recon_id_1: Optional[int] = None,
|
|
322
|
+
recon_id_2: Optional[int] = None,
|
|
323
|
+
):
|
|
324
|
+
""" Init for composed relations should be overriden to set attributes of the composed relation based on the
|
|
325
|
+
attributes of relations and entries used to create it.
|
|
326
|
+
"""
|
|
327
|
+
super().__init__(recon_id_1, recon_id_2)
|
|
328
|
+
|
|
329
|
+
# Filter functions on the composed edges, which determine whether to create the procedural relations
|
|
330
|
+
@classmethod
|
|
331
|
+
def filter_rel_1(cls, r: Relation[T1, T_intermediate]) -> bool:
|
|
332
|
+
return True
|
|
333
|
+
|
|
334
|
+
@classmethod
|
|
335
|
+
def filter_intermediate(cls, e: T_intermediate) -> bool:
|
|
336
|
+
return True
|
|
337
|
+
|
|
338
|
+
@classmethod
|
|
339
|
+
def filter_rel_2(cls, r: Relation[T_intermediate, T2]) -> bool:
|
|
340
|
+
return True
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
# Predefined columns
|
|
344
|
+
# ----------------------------------------------------------------------------------------------------------------------
|
|
345
|
+
col_recon_id = Column('recon_id', int, serial = True)
|
|
346
|
+
col_source_id = Column('source_id', str)
|
|
347
|
+
col_source_id_hashed = Column('source_id', str, index_hash = True)
|
|
348
|
+
col_name = Column('name', str)
|
|
349
|
+
col_src = Column('src', SourceDatabase)
|
|
350
|
+
col_quality = Column('quality', Quality)
|
|
351
|
+
col_properties = Column('properties', list[str])
|
|
352
|
+
col_recon_id_2 = Column('recon_id_2', int)
|
|
353
|
+
col_recon_id_1 = Column('recon_id_1', int)
|
|
354
|
+
col_score = Column('score', float)
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
# Util
|
|
358
|
+
# ----------------------------------------------------------------------------------------------------------------------
|
|
359
|
+
def serialise_col(col: Column, value: Any) -> Any:
|
|
360
|
+
if isinstance(value, Enum):
|
|
361
|
+
return value.name
|
|
362
|
+
else:
|
|
363
|
+
return value
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from enum import Enum
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Direction(Enum):
|
|
7
|
+
FORWARDS = 1 #: From source to target.
|
|
8
|
+
BACKWARDS = 2 #: From target to source.
|
|
9
|
+
BOTH = 3 #: Both of the above.
|
|
10
|
+
SYMMETRIC = 4 #: To be used for symmetric relations.
|
|
File without changes
|