chemrecon 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chemrecon/__init__.py +73 -0
- chemrecon/chem/__init__.py +0 -0
- chemrecon/chem/chemreaction.py +223 -0
- chemrecon/chem/constant_compounds.py +3 -0
- chemrecon/chem/create_mol.py +91 -0
- chemrecon/chem/elements.py +141 -0
- chemrecon/chem/gml/__init__.py +0 -0
- chemrecon/chem/gml/gml.py +324 -0
- chemrecon/chem/gml/gml_reactant_matching.py +130 -0
- chemrecon/chem/gml/gml_to_rdk.py +217 -0
- chemrecon/chem/mol.py +483 -0
- chemrecon/chem/sumformula.py +120 -0
- chemrecon/connection.py +97 -0
- chemrecon/core/__init__.py +0 -0
- chemrecon/core/id_types.py +687 -0
- chemrecon/core/ontology.py +209 -0
- chemrecon/core/populate_query_handler.py +336 -0
- chemrecon/core/query_handler.py +587 -0
- chemrecon/database/__init__.py +1 -0
- chemrecon/database/connect.py +63 -0
- chemrecon/database/connection_params/chemrecon_pub.dbinfo +5 -0
- chemrecon/database/connection_params/local_docker_dev.dbinfo +5 -0
- chemrecon/database/connection_params/local_docker_init.dbinfo +5 -0
- chemrecon/database/connection_params/local_docker_pub.dbinfo +5 -0
- chemrecon/database/params.py +88 -0
- chemrecon/entrygraph/draw.py +119 -0
- chemrecon/entrygraph/entrygraph.py +301 -0
- chemrecon/entrygraph/explorationprotocol.py +199 -0
- chemrecon/entrygraph/explore.py +421 -0
- chemrecon/entrygraph/explore_procedure.py +183 -0
- chemrecon/entrygraph/filter.py +88 -0
- chemrecon/entrygraph/scoring.py +141 -0
- chemrecon/query/__init__.py +26 -0
- chemrecon/query/create_entry.py +86 -0
- chemrecon/query/default_protocols.py +57 -0
- chemrecon/query/find_entry.py +84 -0
- chemrecon/query/get_relations.py +143 -0
- chemrecon/query/get_structures_from_compound.py +65 -0
- chemrecon/schema/__init__.py +86 -0
- chemrecon/schema/db_object.py +363 -0
- chemrecon/schema/direction.py +10 -0
- chemrecon/schema/entry_types/__init__.py +0 -0
- chemrecon/schema/entry_types/aam.py +34 -0
- chemrecon/schema/entry_types/aam_repr.py +37 -0
- chemrecon/schema/entry_types/compound.py +52 -0
- chemrecon/schema/entry_types/enzyme.py +49 -0
- chemrecon/schema/entry_types/molstructure.py +64 -0
- chemrecon/schema/entry_types/molstructure_repr.py +41 -0
- chemrecon/schema/entry_types/reaction.py +57 -0
- chemrecon/schema/enums.py +154 -0
- chemrecon/schema/procedural_relation_entrygraph.py +66 -0
- chemrecon/schema/relation_types_composed/__init__.py +0 -0
- chemrecon/schema/relation_types_composed/compound_has_molstructure_relation.py +59 -0
- chemrecon/schema/relation_types_composed/reaction_has_aam_relation.py +50 -0
- chemrecon/schema/relation_types_procedural/__init__.py +0 -0
- chemrecon/schema/relation_types_procedural/aam_convert_relation.py +69 -0
- chemrecon/schema/relation_types_procedural/compound_select_structure_proceduralrelation.py +36 -0
- chemrecon/schema/relation_types_procedural/compound_similarlity_proceduralrelation.py +1 -0
- chemrecon/schema/relation_types_procedural/molstructure_convert_relation.py +49 -0
- chemrecon/schema/relation_types_procedural/reaction_select_aam_proceduralrelation.py +38 -0
- chemrecon/schema/relation_types_procedural/reaction_similarity_proceduralrelation.py +1 -0
- chemrecon/schema/relation_types_source/__init__.py +0 -0
- chemrecon/schema/relation_types_source/aam_involves_molstructure_relation.py +77 -0
- chemrecon/schema/relation_types_source/aam_repr_involves_molstructure_repr_relation.py +79 -0
- chemrecon/schema/relation_types_source/compound_has_structure_representation_relation.py +33 -0
- chemrecon/schema/relation_types_source/compound_reference_relation.py +34 -0
- chemrecon/schema/relation_types_source/molstructure_standardisation_relation.py +71 -0
- chemrecon/schema/relation_types_source/ontology/__init__.py +0 -0
- chemrecon/schema/relation_types_source/ontology/compound_ontology.py +369 -0
- chemrecon/schema/relation_types_source/ontology/enzyme_ontology.py +142 -0
- chemrecon/schema/relation_types_source/ontology/reaction_ontology.py +140 -0
- chemrecon/schema/relation_types_source/reaction_has_aam_representation_relation.py +34 -0
- chemrecon/schema/relation_types_source/reaction_has_enzyme_relation.py +71 -0
- chemrecon/schema/relation_types_source/reaction_involves_compound_relation.py +69 -0
- chemrecon/schema/relation_types_source/reaction_reference_relation.py +33 -0
- chemrecon/scripts/initialize_database.py +494 -0
- chemrecon/utils/copy_signature.py +10 -0
- chemrecon/utils/encodeable_list.py +11 -0
- chemrecon/utils/get_id_type.py +70 -0
- chemrecon/utils/hungarian.py +31 -0
- chemrecon/utils/reactant_matching.py +168 -0
- chemrecon/utils/rxnutils.py +44 -0
- chemrecon/utils/set_cwd.py +12 -0
- chemrecon-0.1.1.dist-info/METADATA +143 -0
- chemrecon-0.1.1.dist-info/RECORD +86 -0
- chemrecon-0.1.1.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from chemrecon.schema.db_object import Relation, SourceDatabase, InverseRelation, col_src
|
|
4
|
+
from chemrecon.schema.entry_types.enzyme import Enzyme
|
|
5
|
+
from chemrecon.schema.entry_types.reaction import Reaction
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ReactionHasEnzyme(Relation[Reaction, Enzyme]):
|
|
9
|
+
""" Relates a reaction entry to the given enzyme which catalyses the reaction.
|
|
10
|
+
"""
|
|
11
|
+
# Attributes
|
|
12
|
+
src: SourceDatabase #: The source of the relation
|
|
13
|
+
|
|
14
|
+
# Database
|
|
15
|
+
entrytype_name = 'Reaction has Enzyme'
|
|
16
|
+
_table_name = 'ReactionHasEnzyme'
|
|
17
|
+
symmetric = False
|
|
18
|
+
source_entrytype = Reaction
|
|
19
|
+
target_entrytype = Enzyme
|
|
20
|
+
_attribute_columns = [
|
|
21
|
+
col_src
|
|
22
|
+
]
|
|
23
|
+
_index = [0]
|
|
24
|
+
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
src: SourceDatabase = SourceDatabase.unknown,
|
|
28
|
+
recon_id_1: Optional[int] = None,
|
|
29
|
+
recon_id_2: Optional[int] = None
|
|
30
|
+
):
|
|
31
|
+
super().__init__(recon_id_1, recon_id_2)
|
|
32
|
+
self.src = src
|
|
33
|
+
|
|
34
|
+
def _vis_str(self) -> str:
|
|
35
|
+
return f'catalysed by'
|
|
36
|
+
|
|
37
|
+
class EnzymeCatalyzesReaction(InverseRelation[Enzyme, Reaction]):
|
|
38
|
+
""" Relates an enzyme entry to a reaction which it catalyses.
|
|
39
|
+
"""
|
|
40
|
+
# Attributes
|
|
41
|
+
src: SourceDatabase #: The source of the relation
|
|
42
|
+
|
|
43
|
+
# Inverse
|
|
44
|
+
inverse_main_relation = ReactionHasEnzyme
|
|
45
|
+
|
|
46
|
+
# Database
|
|
47
|
+
entrytype_name = 'Enzyme Catalyzes Reaction'
|
|
48
|
+
_table_name = 'EnzymeCatalyzesReaction'
|
|
49
|
+
symmetric = False
|
|
50
|
+
source_entrytype = Enzyme
|
|
51
|
+
target_entrytype = Reaction
|
|
52
|
+
_attribute_columns = [
|
|
53
|
+
col_src
|
|
54
|
+
]
|
|
55
|
+
_index = [0]
|
|
56
|
+
|
|
57
|
+
def __init__(
|
|
58
|
+
self,
|
|
59
|
+
src: SourceDatabase = SourceDatabase.unknown,
|
|
60
|
+
recon_id_1: Optional[int] = None,
|
|
61
|
+
recon_id_2: Optional[int] = None
|
|
62
|
+
):
|
|
63
|
+
super().__init__(recon_id_1, recon_id_2)
|
|
64
|
+
self.src = src
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _vis_str(self) -> str:
|
|
68
|
+
return f'catalyses'
|
|
69
|
+
|
|
70
|
+
# Set inverse
|
|
71
|
+
ReactionHasEnzyme.has_inverse = EnzymeCatalyzesReaction
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from chemrecon.schema import Column, Relation, Reaction, Compound
|
|
4
|
+
from chemrecon.schema.db_object import InverseRelation
|
|
5
|
+
class ReactionInvolvesCompound(Relation[Reaction, Compound]):
|
|
6
|
+
""" Each reaction is connected by this relation to the compounds which take part in the reaction, annotated
|
|
7
|
+
with the stoichiometric coefficient.
|
|
8
|
+
"""
|
|
9
|
+
# Attributes
|
|
10
|
+
n: int #: The stoichiometric coefficient of the compound in the reaction
|
|
11
|
+
|
|
12
|
+
# Database
|
|
13
|
+
entrytype_name = 'Reaction involves Compound'
|
|
14
|
+
_table_name = 'ReactionInvolvesCompound'
|
|
15
|
+
symmetric = False
|
|
16
|
+
source_entrytype = Reaction
|
|
17
|
+
target_entrytype = Compound
|
|
18
|
+
_attribute_columns = [
|
|
19
|
+
Column('n', int)
|
|
20
|
+
]
|
|
21
|
+
_index = [0]
|
|
22
|
+
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
n: int,
|
|
26
|
+
recon_id_1: Optional[int] = None,
|
|
27
|
+
recon_id_2: Optional[int] = None
|
|
28
|
+
):
|
|
29
|
+
super().__init__(recon_id_1, recon_id_2)
|
|
30
|
+
self.n = n
|
|
31
|
+
|
|
32
|
+
def _vis_str(self) -> str:
|
|
33
|
+
return f'has compound ({self.n})'
|
|
34
|
+
|
|
35
|
+
class CompoundParticipatesInReaction(InverseRelation[Compound, Reaction]):
|
|
36
|
+
""" Each reaction is connected by this relation to the compounds which take part in the reaction, annotated
|
|
37
|
+
with the stoichiometric coefficient.
|
|
38
|
+
"""
|
|
39
|
+
# Attributes
|
|
40
|
+
n: int #: The stoichiometric coefficient of the compound in the reaction
|
|
41
|
+
|
|
42
|
+
# Inverse
|
|
43
|
+
inverse_main_relation = ReactionInvolvesCompound
|
|
44
|
+
|
|
45
|
+
# Database
|
|
46
|
+
entrytype_name = 'Compound Participates in Reaction'
|
|
47
|
+
_table_name = 'CompoundParticipatesInReaction'
|
|
48
|
+
symmetric = False
|
|
49
|
+
source_entrytype = Compound
|
|
50
|
+
target_entrytype = Reaction
|
|
51
|
+
_attribute_columns = [
|
|
52
|
+
Column('n', int)
|
|
53
|
+
]
|
|
54
|
+
_index = [0]
|
|
55
|
+
|
|
56
|
+
def __init__(
|
|
57
|
+
self,
|
|
58
|
+
n: int,
|
|
59
|
+
recon_id_1: Optional[int] = None,
|
|
60
|
+
recon_id_2: Optional[int] = None
|
|
61
|
+
):
|
|
62
|
+
super().__init__(recon_id_1, recon_id_2)
|
|
63
|
+
self.n = n
|
|
64
|
+
|
|
65
|
+
def _vis_str(self) -> str:
|
|
66
|
+
return f'in reaction ({self.n})'
|
|
67
|
+
|
|
68
|
+
# Set inverse
|
|
69
|
+
ReactionInvolvesCompound.has_inverse = CompoundParticipatesInReaction
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from chemrecon.schema import Relation, Reaction, SourceDatabase
|
|
4
|
+
from chemrecon.schema.db_object import col_src
|
|
5
|
+
|
|
6
|
+
class ReactionReference(Relation[Reaction, Reaction]):
|
|
7
|
+
""" Inter- or intra-database reference between reactions.
|
|
8
|
+
"""
|
|
9
|
+
# Attributes
|
|
10
|
+
src: SourceDatabase #: The source of the reference.
|
|
11
|
+
|
|
12
|
+
# Database
|
|
13
|
+
entrytype_name = 'Reaction Reference'
|
|
14
|
+
_table_name = 'ReactionReference'
|
|
15
|
+
symmetric = True
|
|
16
|
+
source_entrytype = Reaction
|
|
17
|
+
target_entrytype = Reaction
|
|
18
|
+
_attribute_columns = [
|
|
19
|
+
col_src
|
|
20
|
+
]
|
|
21
|
+
_index = [0]
|
|
22
|
+
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
src: SourceDatabase = SourceDatabase.unknown,
|
|
26
|
+
recon_id_1: Optional[int] = None,
|
|
27
|
+
recon_id_2: Optional[int] = None
|
|
28
|
+
):
|
|
29
|
+
super().__init__(recon_id_1, recon_id_2)
|
|
30
|
+
self.src = src
|
|
31
|
+
|
|
32
|
+
def _vis_str(self) -> str:
|
|
33
|
+
return f'ref ({self.src.name})'
|
|
@@ -0,0 +1,494 @@
|
|
|
1
|
+
""" Handles the creation of the tables described in 'core/schema.py'.
|
|
2
|
+
All tables and objects are placed in the 'chemrecon' schema.
|
|
3
|
+
Should ensure admin access of the connection before attempting to create tables.
|
|
4
|
+
If tables already exist, verify that they are identical.
|
|
5
|
+
This module has the following expectations of the database being connected to:
|
|
6
|
+
- Database named 'chemrecon_db'
|
|
7
|
+
- Initial login with 'postgres' user
|
|
8
|
+
- Afterwards, login with either local_docker or local_docker_dev
|
|
9
|
+
-
|
|
10
|
+
-
|
|
11
|
+
-
|
|
12
|
+
-
|
|
13
|
+
"""
|
|
14
|
+
import argparse
|
|
15
|
+
import builtins
|
|
16
|
+
import os
|
|
17
|
+
from enum import Enum
|
|
18
|
+
from typing import get_origin, get_args
|
|
19
|
+
from types import GenericAlias
|
|
20
|
+
|
|
21
|
+
import psycopg as pg
|
|
22
|
+
import psycopg.errors
|
|
23
|
+
import psycopg.sql as sql
|
|
24
|
+
|
|
25
|
+
import chemrecon.schema as schema
|
|
26
|
+
from chemrecon import Params
|
|
27
|
+
|
|
28
|
+
from chemrecon.database import params
|
|
29
|
+
from chemrecon.database.connect import postgres_connect
|
|
30
|
+
from chemrecon.utils.set_cwd import set_cwd
|
|
31
|
+
|
|
32
|
+
from chemrecon.schema import entrytypes, relationtypes, InverseRelation, Relation
|
|
33
|
+
|
|
34
|
+
# Command-line-arguments
|
|
35
|
+
parser = argparse.ArgumentParser()
|
|
36
|
+
# Whether to set up local or public users
|
|
37
|
+
parser.add_argument('--params', action = 'store')
|
|
38
|
+
parser.add_argument('--devparams', action = 'store')
|
|
39
|
+
|
|
40
|
+
# Custom params
|
|
41
|
+
parser.add_argument('--host', action = 'store')
|
|
42
|
+
parser.add_argument('--port', action = 'store')
|
|
43
|
+
parser.add_argument('--database', action = 'store')
|
|
44
|
+
parser.add_argument('--username', action = 'store')
|
|
45
|
+
parser.add_argument('--password', action = 'store')
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def main():
|
|
49
|
+
print(f'Running ChemRecon database initialisation script.')
|
|
50
|
+
args = vars(parser.parse_args())
|
|
51
|
+
|
|
52
|
+
print(f'\nARGS\n')
|
|
53
|
+
print(args)
|
|
54
|
+
|
|
55
|
+
if 'params' not in args:
|
|
56
|
+
raise ValueError('Must provide parameters file')
|
|
57
|
+
if 'devparams' not in args:
|
|
58
|
+
raise ValueError('Must provide dev parameters file.')
|
|
59
|
+
|
|
60
|
+
# Connection params
|
|
61
|
+
connection_params = Params(
|
|
62
|
+
connection_title = 'CLI',
|
|
63
|
+
db_name = args['database'],
|
|
64
|
+
db_host = args['host'],
|
|
65
|
+
db_port = args['port'],
|
|
66
|
+
username = args['username'],
|
|
67
|
+
password = args['password']
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
print(os.getcwd())
|
|
71
|
+
|
|
72
|
+
# User parameters
|
|
73
|
+
with open(args['params']) as f:
|
|
74
|
+
lines = f.readlines()
|
|
75
|
+
params_user = Params(
|
|
76
|
+
connection_title = 'CLI-user',
|
|
77
|
+
db_name = lines[0].strip(),
|
|
78
|
+
username = lines[1].strip(),
|
|
79
|
+
password = lines[2].strip(),
|
|
80
|
+
db_host = lines[3].strip(),
|
|
81
|
+
db_port = lines[4].strip()
|
|
82
|
+
)
|
|
83
|
+
with open(args['devparams']) as f:
|
|
84
|
+
lines = f.readlines()
|
|
85
|
+
params_dev = Params(
|
|
86
|
+
connection_title = 'CLI-user',
|
|
87
|
+
db_name = lines[0].strip(),
|
|
88
|
+
username = lines[1].strip(),
|
|
89
|
+
password = lines[2].strip(),
|
|
90
|
+
db_host = lines[3].strip(),
|
|
91
|
+
db_port = lines[4].strip()
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
conn = postgres_connect(
|
|
95
|
+
connection_params,
|
|
96
|
+
initialise_enums = False
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
# Initialize
|
|
100
|
+
initialise_database(
|
|
101
|
+
conn,
|
|
102
|
+
dev_parameters = params_dev,
|
|
103
|
+
pub_parameters = params_user,
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def initialise_database(
|
|
108
|
+
conn: pg.Connection,
|
|
109
|
+
dev_parameters: Params,
|
|
110
|
+
pub_parameters: Params,
|
|
111
|
+
only_create_users: bool = False
|
|
112
|
+
):
|
|
113
|
+
# Login with the 'postgres' user to create the DB.
|
|
114
|
+
# Create the database, schema and accounts from an otherwise empty database
|
|
115
|
+
|
|
116
|
+
# Get connection parameters
|
|
117
|
+
set_cwd()
|
|
118
|
+
|
|
119
|
+
print(f'\nCreating database...')
|
|
120
|
+
|
|
121
|
+
with conn.cursor() as c:
|
|
122
|
+
|
|
123
|
+
# Check if DB is already populated
|
|
124
|
+
# TODO
|
|
125
|
+
|
|
126
|
+
# Load parameters
|
|
127
|
+
# ------------------------------------------------------------------------------------------------------
|
|
128
|
+
db_user_dev = dev_parameters.username
|
|
129
|
+
db_pass_dev = dev_parameters.password
|
|
130
|
+
|
|
131
|
+
db_user_pub = pub_parameters.username
|
|
132
|
+
db_pass_pub = pub_parameters.password
|
|
133
|
+
|
|
134
|
+
# Create users if not exist
|
|
135
|
+
try:
|
|
136
|
+
|
|
137
|
+
# Create group for public users
|
|
138
|
+
# ----------------------------------------------------------------------------------------------------------
|
|
139
|
+
|
|
140
|
+
c.execute(sql.SQL(
|
|
141
|
+
'CREATE GROUP {public_group};'
|
|
142
|
+
).format(
|
|
143
|
+
public_group = sql.Identifier('public_group')
|
|
144
|
+
))
|
|
145
|
+
|
|
146
|
+
# Create users/roles
|
|
147
|
+
# ----------------------------------------------------------------------------------------------------------
|
|
148
|
+
c.execute(sql.SQL(
|
|
149
|
+
'CREATE USER {dev_username} WITH PASSWORD {dev_password};'
|
|
150
|
+
).format(
|
|
151
|
+
dev_username = sql.Identifier(db_user_dev),
|
|
152
|
+
dev_password = db_pass_dev,
|
|
153
|
+
))
|
|
154
|
+
c.execute(sql.SQL(
|
|
155
|
+
'CREATE USER {pub_username} WITH PASSWORD {pub_password} IN GROUP {public_group};'
|
|
156
|
+
).format(
|
|
157
|
+
pub_username = sql.Identifier(db_user_pub),
|
|
158
|
+
pub_password = db_pass_pub,
|
|
159
|
+
public_group = sql.Identifier('public_group')
|
|
160
|
+
))
|
|
161
|
+
|
|
162
|
+
except psycopg.errors.DuplicateObject as e:
|
|
163
|
+
# Already exists, reset
|
|
164
|
+
conn.commit()
|
|
165
|
+
print(f' -> Users already exist, skipping.')
|
|
166
|
+
pass
|
|
167
|
+
|
|
168
|
+
# If set to only prepare for restoring with psql, exit here
|
|
169
|
+
# --------------------------------------------------------------------------------------------------------------
|
|
170
|
+
if only_create_users:
|
|
171
|
+
conn.commit()
|
|
172
|
+
return
|
|
173
|
+
|
|
174
|
+
# Create schemata
|
|
175
|
+
# --------------------------------------------------------------------------------------------------------------
|
|
176
|
+
c.execute(sql.SQL( # 'meta' schema for dev purposes
|
|
177
|
+
'CREATE SCHEMA {meta_schema} AUTHORIZATION {dev_username};'
|
|
178
|
+
).format(
|
|
179
|
+
meta_schema = sql.Identifier('meta'),
|
|
180
|
+
dev_username = sql.Identifier(db_user_dev)
|
|
181
|
+
))
|
|
182
|
+
c.execute(sql.SQL( # 'chemrecon' schema for data
|
|
183
|
+
'CREATE SCHEMA {chemrecon_schema} AUTHORIZATION {dev_username};'
|
|
184
|
+
).format(
|
|
185
|
+
chemrecon_schema = sql.Identifier('chemrecon'),
|
|
186
|
+
dev_username = sql.Identifier(db_user_dev)
|
|
187
|
+
))
|
|
188
|
+
|
|
189
|
+
# Create the 'meta' tables
|
|
190
|
+
# --------------------------------------------------------------------------------------------------------------
|
|
191
|
+
c.execute(sql.SQL("""
|
|
192
|
+
CREATE TABLE IF NOT EXISTS meta.procedures
|
|
193
|
+
(
|
|
194
|
+
procedure_name
|
|
195
|
+
TEXT,
|
|
196
|
+
procedure_version
|
|
197
|
+
INT,
|
|
198
|
+
data_source_version
|
|
199
|
+
TEXT,
|
|
200
|
+
finished_time
|
|
201
|
+
TIMESTAMP,
|
|
202
|
+
progress
|
|
203
|
+
INT
|
|
204
|
+
);
|
|
205
|
+
"""))
|
|
206
|
+
conn.commit()
|
|
207
|
+
|
|
208
|
+
# Create and adapt enums
|
|
209
|
+
# --------------------------------------------------------------------------------------------------------------
|
|
210
|
+
print(f'Creating enums')
|
|
211
|
+
for enum in schema.enums.enum_register:
|
|
212
|
+
# Create enum in db
|
|
213
|
+
c.execute(sql.SQL(
|
|
214
|
+
'CREATE TYPE {chemrecon}.{enum_name} AS ENUM ({enum_list});'
|
|
215
|
+
).format(
|
|
216
|
+
chemrecon = sql.Identifier('chemrecon'),
|
|
217
|
+
enum_name = sql.Identifier(enum.__name__),
|
|
218
|
+
enum_list = sql.SQL(', ').join(
|
|
219
|
+
e.name for e in enum
|
|
220
|
+
)
|
|
221
|
+
))
|
|
222
|
+
conn.commit()
|
|
223
|
+
|
|
224
|
+
# Create tables
|
|
225
|
+
# --------------------------------------------------------------------------------------------------------------
|
|
226
|
+
# Entry tables
|
|
227
|
+
# --------------------------------------------------------------------------------------------------------------
|
|
228
|
+
for entrytype in entrytypes:
|
|
229
|
+
table = sql.Identifier(entrytype.get_table_name())
|
|
230
|
+
column_sqls: list[sql.Composable] = list()
|
|
231
|
+
|
|
232
|
+
# Compute the columns
|
|
233
|
+
for i, col in enumerate(entrytype.get_columns()):
|
|
234
|
+
col: schema.db_object.Column
|
|
235
|
+
colname = sql.Identifier(col.name)
|
|
236
|
+
coltype = get_postgres_type(col.col_type)
|
|
237
|
+
col_sql = sql.SQL(
|
|
238
|
+
'{colname} {coltype} {opt_primary_key}'
|
|
239
|
+
).format(
|
|
240
|
+
colname = colname,
|
|
241
|
+
coltype = coltype if not col.serial else sql.SQL('SERIAL'),
|
|
242
|
+
opt_primary_key = sql.SQL('PRIMARY KEY') if i == 0 else sql.SQL('')
|
|
243
|
+
)
|
|
244
|
+
column_sqls.append(col_sql)
|
|
245
|
+
|
|
246
|
+
# Create the table
|
|
247
|
+
q = sql.SQL(
|
|
248
|
+
'CREATE TABLE IF NOT EXISTS {table} ({cols});'
|
|
249
|
+
).format(
|
|
250
|
+
table = table,
|
|
251
|
+
cols = sql.SQL(', ').join(column_sqls),
|
|
252
|
+
)
|
|
253
|
+
print(f' - {q.as_string()}')
|
|
254
|
+
c.execute(q)
|
|
255
|
+
|
|
256
|
+
# Create index and unique constraint on index columns
|
|
257
|
+
if len(entrytype.get_index_indices()) > 0:
|
|
258
|
+
index_cols: list[sql.Composable] = list()
|
|
259
|
+
for col in entrytype.get_index_columns():
|
|
260
|
+
if col.index_hash:
|
|
261
|
+
# If column has potential to be long, index the md5 value of the column instead
|
|
262
|
+
index_cols.append(sql.SQL('md5({colname})').format(colname = sql.Identifier(col.name)))
|
|
263
|
+
else:
|
|
264
|
+
index_cols.append(sql.Identifier(col.name))
|
|
265
|
+
q = sql.SQL(
|
|
266
|
+
'CREATE UNIQUE INDEX IF NOT EXISTS {index_name} ON {table} ({index_cols});'
|
|
267
|
+
).format(
|
|
268
|
+
index_name = sql.Identifier(f'{entrytype.get_table_name()}_index'),
|
|
269
|
+
constraint_name = sql.Identifier(f'{entrytype.get_table_name()}_constraint'),
|
|
270
|
+
table = table,
|
|
271
|
+
index_cols = sql.SQL(', ').join(index_cols)
|
|
272
|
+
)
|
|
273
|
+
print(f' - {q.as_string()}')
|
|
274
|
+
c.execute(q)
|
|
275
|
+
|
|
276
|
+
# Relation Tables
|
|
277
|
+
# --------------------------------------------------------------------------------------------------------------
|
|
278
|
+
for relationtype in relationtypes:
|
|
279
|
+
if issubclass(relationtype, InverseRelation):
|
|
280
|
+
# Inverse relations are handles in the next step
|
|
281
|
+
continue
|
|
282
|
+
|
|
283
|
+
table = relationtype.get_table_name()
|
|
284
|
+
column_sqls: list[sql.Composable] = get_column_sql_terms(relationtype)
|
|
285
|
+
|
|
286
|
+
# Table query
|
|
287
|
+
q = sql.SQL(
|
|
288
|
+
'CREATE TABLE {rel_table_name} ({cols} {optional_check});'
|
|
289
|
+
).format(
|
|
290
|
+
rel_table_name = sql.Identifier(table),
|
|
291
|
+
cols = sql.SQL(', ').join(column_sqls),
|
|
292
|
+
optional_check = ( # If relation is from/to the same table, ensure invariant id_1 <= id_2
|
|
293
|
+
sql.SQL(', CHECK (recon_id_1 <= recon_id_2)') if relationtype.symmetric
|
|
294
|
+
else sql.SQL('')
|
|
295
|
+
)
|
|
296
|
+
)
|
|
297
|
+
print(f' - {q.as_string()}')
|
|
298
|
+
c.execute(q)
|
|
299
|
+
|
|
300
|
+
# Create unique constraint on (recon_id_1, recon_id_2, *index cols)
|
|
301
|
+
index_cols: list[sql.Composable] = [
|
|
302
|
+
sql.Identifier(col.name) for col in relationtype.get_index_columns()
|
|
303
|
+
]
|
|
304
|
+
q = sql.SQL(
|
|
305
|
+
'CREATE UNIQUE INDEX {index_name} ON {table} ({index_cols})'
|
|
306
|
+
).format(
|
|
307
|
+
index_name = sql.Identifier(f'{relationtype.get_table_name()}_index'),
|
|
308
|
+
table = sql.Identifier(relationtype.get_table_name()),
|
|
309
|
+
index_cols = sql.SQL(', ').join(index_cols)
|
|
310
|
+
)
|
|
311
|
+
print(f' - {q.as_string()}')
|
|
312
|
+
c.execute(q)
|
|
313
|
+
|
|
314
|
+
# Create views for inverse relations
|
|
315
|
+
# --------------------------------------------------------------------------------------------------------------
|
|
316
|
+
for relationtype in relationtypes:
|
|
317
|
+
if not issubclass(relationtype, InverseRelation):
|
|
318
|
+
# Inverse relations are handles in the next step
|
|
319
|
+
continue
|
|
320
|
+
|
|
321
|
+
# Sanity checks
|
|
322
|
+
assert relationtype.inverse_main_relation.target_entrytype is relationtype.source_entrytype
|
|
323
|
+
assert relationtype.inverse_main_relation.source_entrytype is relationtype.target_entrytype
|
|
324
|
+
|
|
325
|
+
# Create table as view
|
|
326
|
+
table = relationtype.get_table_name()
|
|
327
|
+
q = sql.SQL("""
|
|
328
|
+
CREATE VIEW {view_name} AS
|
|
329
|
+
SELECT recon_id_1 AS recon_id_2, recon_id_2 AS recon_id_1, {attr_cols}
|
|
330
|
+
FROM {parent_table_name}
|
|
331
|
+
;
|
|
332
|
+
""").format(
|
|
333
|
+
view_name = sql.Identifier(table),
|
|
334
|
+
parent_table_name = sql.Identifier(relationtype.inverse_main_relation.get_table_name()),
|
|
335
|
+
attr_cols = sql.SQL(', ').join(
|
|
336
|
+
sql.Identifier(attr_col.name) for attr_col in relationtype.get_attribute_columns()
|
|
337
|
+
)
|
|
338
|
+
)
|
|
339
|
+
print(f' - {q.as_string()}')
|
|
340
|
+
c.execute(q)
|
|
341
|
+
|
|
342
|
+
# Create corresponding views for ALL relations
|
|
343
|
+
# --------------------------------------------------------------------------------------------------------------
|
|
344
|
+
for relationtype in relationtypes:
|
|
345
|
+
# Create view corresponding to this relation (relation with adjacent entries)
|
|
346
|
+
table = relationtype.get_table_name()
|
|
347
|
+
q = sql.SQL("""
|
|
348
|
+
CREATE VIEW {view_name} AS
|
|
349
|
+
SELECT rel.recon_id_1, rel.recon_id_2, {rel_attr_cols} {t1_attr_cols}, {t2_attr_cols}
|
|
350
|
+
FROM {rel_table} rel
|
|
351
|
+
INNER JOIN {table_1} t1 ON t1.recon_id = rel.recon_id_1
|
|
352
|
+
INNER JOIN {table_2} t2 ON t2.recon_id = rel.recon_id_2
|
|
353
|
+
;
|
|
354
|
+
""").format(
|
|
355
|
+
view_name = sql.Identifier(f'{table}_v'),
|
|
356
|
+
rel_table = sql.Identifier(table),
|
|
357
|
+
table_1 = sql.Identifier(relationtype.source_entrytype.get_table_name()),
|
|
358
|
+
table_2 = sql.Identifier(relationtype.target_entrytype.get_table_name()),
|
|
359
|
+
rel_attr_cols = sql.SQL(', ').join(
|
|
360
|
+
sql.SQL('rel.') + sql.Identifier(c.name)
|
|
361
|
+
+ sql.SQL(' ') + sql.Identifier(f'rel_{c.name}')
|
|
362
|
+
for c in relationtype.get_attribute_columns()
|
|
363
|
+
) + (sql.SQL(', ') if len(relationtype.get_attribute_columns()) > 0 else sql.SQL('')),
|
|
364
|
+
t1_attr_cols = sql.SQL(', ').join(
|
|
365
|
+
sql.SQL('t1.') + sql.Identifier(c.name)
|
|
366
|
+
+ sql.SQL(' ') + sql.Identifier(f't1_{c.name}')
|
|
367
|
+
for c in relationtype.source_entrytype.get_columns(include_recon_id = False)
|
|
368
|
+
),
|
|
369
|
+
t2_attr_cols = sql.SQL(', ').join(
|
|
370
|
+
sql.SQL('t2.') + sql.Identifier(c.name)
|
|
371
|
+
+ sql.SQL(' ') + sql.Identifier(f't2_{c.name}')
|
|
372
|
+
for c in relationtype.target_entrytype.get_columns(include_recon_id = False)
|
|
373
|
+
),
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
print(f' -> {relationtype.get_table_name()}')
|
|
377
|
+
print(f' - {q.as_string()}')
|
|
378
|
+
c.execute(q)
|
|
379
|
+
|
|
380
|
+
# Grant read/write permissions to 'dev' user on all tables
|
|
381
|
+
# --------------------------------------------------------------------------------------------------------------
|
|
382
|
+
for schema_name in {'meta', 'chemrecon'}:
|
|
383
|
+
c.execute(sql.SQL(
|
|
384
|
+
""" GRANT SELECT, INSERT, UPDATE, DELETE
|
|
385
|
+
ON ALL TABLES IN SCHEMA {schema_name}
|
|
386
|
+
TO {dev_username};
|
|
387
|
+
GRANT USAGE, SELECT
|
|
388
|
+
ON ALL SEQUENCES IN SCHEMA {schema_name}
|
|
389
|
+
TO {dev_username};
|
|
390
|
+
"""
|
|
391
|
+
).format(
|
|
392
|
+
schema_name = sql.Identifier(schema_name),
|
|
393
|
+
dev_username = sql.Identifier(db_user_dev)
|
|
394
|
+
))
|
|
395
|
+
|
|
396
|
+
# Grant USAGE, UPDATE on sequences
|
|
397
|
+
# TODO
|
|
398
|
+
|
|
399
|
+
# Grant read permissions to 'public' group on all tables
|
|
400
|
+
# --------------------------------------------------------------------------------------------------------------
|
|
401
|
+
for schema_name in {'meta', 'chemrecon'}:
|
|
402
|
+
c.execute(sql.SQL(
|
|
403
|
+
""" GRANT USAGE ON SCHEMA {schema_name} TO {pub_username};
|
|
404
|
+
GRANT SELECT
|
|
405
|
+
ON ALL TABLES IN SCHEMA {schema_name}
|
|
406
|
+
TO {pub_username};
|
|
407
|
+
GRANT SELECT
|
|
408
|
+
ON ALL SEQUENCES IN SCHEMA {schema_name}
|
|
409
|
+
TO {pub_username};
|
|
410
|
+
"""
|
|
411
|
+
).format(
|
|
412
|
+
schema_name = sql.Identifier(schema_name),
|
|
413
|
+
pub_username = sql.Identifier('public_group')
|
|
414
|
+
))
|
|
415
|
+
|
|
416
|
+
# Grant USAGE on sequences
|
|
417
|
+
# TODO
|
|
418
|
+
|
|
419
|
+
# Commit and finalise
|
|
420
|
+
# --------------------------------------------------------------------------------------------------------------
|
|
421
|
+
print('Database initialized.')
|
|
422
|
+
conn.commit()
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
# Utility functions
|
|
426
|
+
# ----------------------------------------------------------------------------------------------------------------------
|
|
427
|
+
def get_column_sql_terms(relationtype: type[Relation], only_attrs: bool = False) -> list[sql.Composable]:
|
|
428
|
+
""" Generate SQL statemetns to create each column
|
|
429
|
+
"""
|
|
430
|
+
column_sqls: list[sql.Composable] = list()
|
|
431
|
+
|
|
432
|
+
# Source and target columns
|
|
433
|
+
if not only_attrs:
|
|
434
|
+
column_sqls.append(sql.SQL(
|
|
435
|
+
'recon_id_1 int references {table_1}(recon_id)'
|
|
436
|
+
).format(
|
|
437
|
+
table_1 = sql.Identifier(relationtype.source_entrytype.get_table_name()),
|
|
438
|
+
))
|
|
439
|
+
column_sqls.append(sql.SQL(
|
|
440
|
+
'recon_id_2 int references {table_2}(recon_id)'
|
|
441
|
+
).format(
|
|
442
|
+
table_2 = sql.Identifier(relationtype.target_entrytype.get_table_name()),
|
|
443
|
+
))
|
|
444
|
+
|
|
445
|
+
# Attribute columns
|
|
446
|
+
for attrcol in relationtype.get_attribute_columns():
|
|
447
|
+
column_sqls.append(sql.SQL(
|
|
448
|
+
'{colname} {coltype}'
|
|
449
|
+
).format(
|
|
450
|
+
colname = sql.Identifier(attrcol.name),
|
|
451
|
+
coltype = get_postgres_type(attrcol.col_type)
|
|
452
|
+
))
|
|
453
|
+
|
|
454
|
+
return column_sqls
|
|
455
|
+
|
|
456
|
+
def get_postgres_type(t: type) -> sql.Composable:
|
|
457
|
+
# Get the postgres type as an SQL string of the given type
|
|
458
|
+
match t:
|
|
459
|
+
case builtins.str:
|
|
460
|
+
return sql.SQL('text')
|
|
461
|
+
case builtins.int:
|
|
462
|
+
return sql.SQL('integer')
|
|
463
|
+
case builtins.float:
|
|
464
|
+
return sql.SQL('float8')
|
|
465
|
+
case builtins.bool:
|
|
466
|
+
return sql.SQL('boolean')
|
|
467
|
+
case GenericAlias():
|
|
468
|
+
# Subscripted type, e.g. list[int] -> 'integer[]'
|
|
469
|
+
origin_type = get_origin(t)
|
|
470
|
+
match origin_type:
|
|
471
|
+
case builtins.list:
|
|
472
|
+
# List gets translated into postgres array
|
|
473
|
+
subscript = get_args(t)[0]
|
|
474
|
+
subscript_type = get_postgres_type(subscript)
|
|
475
|
+
q = sql.SQL('{subscript_sql} ARRAY').format(
|
|
476
|
+
subscript_sql = subscript_type
|
|
477
|
+
)
|
|
478
|
+
return q
|
|
479
|
+
case _:
|
|
480
|
+
raise NotImplementedError('Implement creating table from whatever type this is.')
|
|
481
|
+
case _:
|
|
482
|
+
# If it is an enum
|
|
483
|
+
if issubclass(t, Enum):
|
|
484
|
+
return sql.SQL('{chemrecon}.{enumname}').format(
|
|
485
|
+
chemrecon = sql.Identifier('chemrecon'),
|
|
486
|
+
enumname = sql.Identifier(t.__name__)
|
|
487
|
+
)
|
|
488
|
+
else:
|
|
489
|
+
raise NotImplementedError('!')
|
|
490
|
+
|
|
491
|
+
|
|
492
|
+
# Script functionality
|
|
493
|
+
if __name__ == '__main__':
|
|
494
|
+
main()
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
from collections.abc import Callable
|
|
2
|
+
from typing import Any, cast, ParamSpec, TypeVar
|
|
3
|
+
|
|
4
|
+
def copy_signature[**P, T](_origin: Callable[P, T]) -> Callable[[Callable[..., Any]], Callable[P, T]]:
|
|
5
|
+
""" Decorates a function to inform type checker that it takes the same arguments as another function.
|
|
6
|
+
"""
|
|
7
|
+
def decorator(target_function: Callable[..., Any]) -> Callable[P, T]:
|
|
8
|
+
return cast(Callable[P, T], target_function)
|
|
9
|
+
|
|
10
|
+
return decorator
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class EncodableList[T: Any](list[T]):
|
|
5
|
+
""" Simple wrapper for built-in list class to allow for encoding for memoization purposes.
|
|
6
|
+
"""
|
|
7
|
+
def encode(self, *args, **kwargs):
|
|
8
|
+
return str([x.encode() for x in self.__iter__()]).encode()
|
|
9
|
+
|
|
10
|
+
def __hash__(self):
|
|
11
|
+
return self.encode().__hash__()
|