chemrecon 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chemrecon/__init__.py +73 -0
- chemrecon/chem/__init__.py +0 -0
- chemrecon/chem/chemreaction.py +223 -0
- chemrecon/chem/constant_compounds.py +3 -0
- chemrecon/chem/create_mol.py +91 -0
- chemrecon/chem/elements.py +141 -0
- chemrecon/chem/gml/__init__.py +0 -0
- chemrecon/chem/gml/gml.py +324 -0
- chemrecon/chem/gml/gml_reactant_matching.py +130 -0
- chemrecon/chem/gml/gml_to_rdk.py +217 -0
- chemrecon/chem/mol.py +483 -0
- chemrecon/chem/sumformula.py +120 -0
- chemrecon/connection.py +97 -0
- chemrecon/core/__init__.py +0 -0
- chemrecon/core/id_types.py +687 -0
- chemrecon/core/ontology.py +209 -0
- chemrecon/core/populate_query_handler.py +336 -0
- chemrecon/core/query_handler.py +587 -0
- chemrecon/database/__init__.py +1 -0
- chemrecon/database/connect.py +63 -0
- chemrecon/database/connection_params/chemrecon_pub.dbinfo +5 -0
- chemrecon/database/connection_params/local_docker_dev.dbinfo +5 -0
- chemrecon/database/connection_params/local_docker_init.dbinfo +5 -0
- chemrecon/database/connection_params/local_docker_pub.dbinfo +5 -0
- chemrecon/database/params.py +88 -0
- chemrecon/entrygraph/draw.py +119 -0
- chemrecon/entrygraph/entrygraph.py +301 -0
- chemrecon/entrygraph/explorationprotocol.py +199 -0
- chemrecon/entrygraph/explore.py +421 -0
- chemrecon/entrygraph/explore_procedure.py +183 -0
- chemrecon/entrygraph/filter.py +88 -0
- chemrecon/entrygraph/scoring.py +141 -0
- chemrecon/query/__init__.py +26 -0
- chemrecon/query/create_entry.py +86 -0
- chemrecon/query/default_protocols.py +57 -0
- chemrecon/query/find_entry.py +84 -0
- chemrecon/query/get_relations.py +143 -0
- chemrecon/query/get_structures_from_compound.py +65 -0
- chemrecon/schema/__init__.py +86 -0
- chemrecon/schema/db_object.py +363 -0
- chemrecon/schema/direction.py +10 -0
- chemrecon/schema/entry_types/__init__.py +0 -0
- chemrecon/schema/entry_types/aam.py +34 -0
- chemrecon/schema/entry_types/aam_repr.py +37 -0
- chemrecon/schema/entry_types/compound.py +52 -0
- chemrecon/schema/entry_types/enzyme.py +49 -0
- chemrecon/schema/entry_types/molstructure.py +64 -0
- chemrecon/schema/entry_types/molstructure_repr.py +41 -0
- chemrecon/schema/entry_types/reaction.py +57 -0
- chemrecon/schema/enums.py +154 -0
- chemrecon/schema/procedural_relation_entrygraph.py +66 -0
- chemrecon/schema/relation_types_composed/__init__.py +0 -0
- chemrecon/schema/relation_types_composed/compound_has_molstructure_relation.py +59 -0
- chemrecon/schema/relation_types_composed/reaction_has_aam_relation.py +50 -0
- chemrecon/schema/relation_types_procedural/__init__.py +0 -0
- chemrecon/schema/relation_types_procedural/aam_convert_relation.py +69 -0
- chemrecon/schema/relation_types_procedural/compound_select_structure_proceduralrelation.py +36 -0
- chemrecon/schema/relation_types_procedural/compound_similarlity_proceduralrelation.py +1 -0
- chemrecon/schema/relation_types_procedural/molstructure_convert_relation.py +49 -0
- chemrecon/schema/relation_types_procedural/reaction_select_aam_proceduralrelation.py +38 -0
- chemrecon/schema/relation_types_procedural/reaction_similarity_proceduralrelation.py +1 -0
- chemrecon/schema/relation_types_source/__init__.py +0 -0
- chemrecon/schema/relation_types_source/aam_involves_molstructure_relation.py +77 -0
- chemrecon/schema/relation_types_source/aam_repr_involves_molstructure_repr_relation.py +79 -0
- chemrecon/schema/relation_types_source/compound_has_structure_representation_relation.py +33 -0
- chemrecon/schema/relation_types_source/compound_reference_relation.py +34 -0
- chemrecon/schema/relation_types_source/molstructure_standardisation_relation.py +71 -0
- chemrecon/schema/relation_types_source/ontology/__init__.py +0 -0
- chemrecon/schema/relation_types_source/ontology/compound_ontology.py +369 -0
- chemrecon/schema/relation_types_source/ontology/enzyme_ontology.py +142 -0
- chemrecon/schema/relation_types_source/ontology/reaction_ontology.py +140 -0
- chemrecon/schema/relation_types_source/reaction_has_aam_representation_relation.py +34 -0
- chemrecon/schema/relation_types_source/reaction_has_enzyme_relation.py +71 -0
- chemrecon/schema/relation_types_source/reaction_involves_compound_relation.py +69 -0
- chemrecon/schema/relation_types_source/reaction_reference_relation.py +33 -0
- chemrecon/scripts/initialize_database.py +494 -0
- chemrecon/utils/copy_signature.py +10 -0
- chemrecon/utils/encodeable_list.py +11 -0
- chemrecon/utils/get_id_type.py +70 -0
- chemrecon/utils/hungarian.py +31 -0
- chemrecon/utils/reactant_matching.py +168 -0
- chemrecon/utils/rxnutils.py +44 -0
- chemrecon/utils/set_cwd.py +12 -0
- chemrecon-0.1.1.dist-info/METADATA +143 -0
- chemrecon-0.1.1.dist-info/RECORD +86 -0
- chemrecon-0.1.1.dist-info/WHEEL +4 -0
chemrecon/__init__.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
""" Defines metadata and the most general exports in the chemrecon.* namespace.
|
|
2
|
+
"""
|
|
3
|
+
# Metadata
|
|
4
|
+
__version__ = '0.1.1' # Library version
|
|
5
|
+
__db_version__: list[str] = ['0.1.1'] # Compatible database versions
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
# Imports
|
|
9
|
+
import psycopg
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# Connection and handler
|
|
13
|
+
import chemrecon.connection
|
|
14
|
+
from chemrecon.connection import (
|
|
15
|
+
connect, connect_public, connect_local_docker, connect_local_docker_dev, disconnect,
|
|
16
|
+
get_query_handler
|
|
17
|
+
)
|
|
18
|
+
from chemrecon.database.params import Params
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# Project initialization
|
|
22
|
+
# Try to set the default database connection
|
|
23
|
+
try:
|
|
24
|
+
disconnect()
|
|
25
|
+
# connect_public()
|
|
26
|
+
except psycopg.OperationalError as e:
|
|
27
|
+
print(f'Cannot connect to default database: {e}')
|
|
28
|
+
|
|
29
|
+
# Import from schema
|
|
30
|
+
from chemrecon.schema import *
|
|
31
|
+
|
|
32
|
+
from chemrecon.core.query_handler import QueryHandler
|
|
33
|
+
from chemrecon.core.populate_query_handler import PopulateQueryHandler
|
|
34
|
+
|
|
35
|
+
# Export identifier types
|
|
36
|
+
from chemrecon.core.id_types import *
|
|
37
|
+
|
|
38
|
+
# Entry creators
|
|
39
|
+
# from chemrecon.query.create_entry import (
|
|
40
|
+
# entry,
|
|
41
|
+
# compound_entry, reaction_entry, enzyme_entry,
|
|
42
|
+
# aam_representation_entry, structure_representation_entry,
|
|
43
|
+
# structure_entry, aam_entry,
|
|
44
|
+
# enzyme_from_ec_number,
|
|
45
|
+
# entry_from_identifiers_org
|
|
46
|
+
# )
|
|
47
|
+
|
|
48
|
+
from chemrecon.query.find_entry import (
|
|
49
|
+
find_entry,
|
|
50
|
+
find_compound_entry, find_reaction_entry, find_enzyme_entry,
|
|
51
|
+
find_structure_representation_entry, find_aam_representation_entry,
|
|
52
|
+
find_structure_entry, find_aam_entry
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# Relation getters
|
|
56
|
+
from chemrecon.query.get_relations import *
|
|
57
|
+
|
|
58
|
+
# EntryGraphs
|
|
59
|
+
from chemrecon.entrygraph.entrygraph import EntryGraph, Vertex, Edge
|
|
60
|
+
from chemrecon.schema.direction import Direction
|
|
61
|
+
from chemrecon.entrygraph.filter import (
|
|
62
|
+
EntryFilter, EntryFilterProcedure,
|
|
63
|
+
RelationFilter, RelationFilterProcedure
|
|
64
|
+
)
|
|
65
|
+
from chemrecon.entrygraph.explorationprotocol import ExplorationProtocol
|
|
66
|
+
from chemrecon.entrygraph.explore import explore
|
|
67
|
+
from chemrecon.entrygraph.scoring import Scorer
|
|
68
|
+
|
|
69
|
+
# Pre-defined entrygraph types
|
|
70
|
+
from chemrecon.query.default_protocols import *
|
|
71
|
+
|
|
72
|
+
# Chemistry - Molecules
|
|
73
|
+
from chemrecon.chem.mol import Mol
|
|
File without changes
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
""" Implements a wrapper for the RDKit reaction type."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
import rdkit.Chem.rdChemReactions as rdk_r
|
|
8
|
+
from rdkit.Chem.rdChemReactions import ChemicalReaction
|
|
9
|
+
from rdkit.Chem import Draw as rdk_draw
|
|
10
|
+
|
|
11
|
+
from chemrecon.chem.mol import MolTemplate, MolInstance
|
|
12
|
+
from chemrecon.chem.sumformula import SumFormula
|
|
13
|
+
from chemrecon.schema.entry_types.aam import AAM
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class Side(Enum):
|
|
17
|
+
L = -1
|
|
18
|
+
R = 1
|
|
19
|
+
|
|
20
|
+
type stoich = int
|
|
21
|
+
|
|
22
|
+
class ChemReaction:
|
|
23
|
+
reaction: ChemicalReaction
|
|
24
|
+
_reaction_smiles: Optional[str]
|
|
25
|
+
|
|
26
|
+
# Templates represent the structure of each compound. (There can be duplicate templates)
|
|
27
|
+
lhs_templates: dict[MolTemplate, stoich]
|
|
28
|
+
rhs_templates: dict[MolTemplate, stoich]
|
|
29
|
+
|
|
30
|
+
# Instances represent specific structures with maps as they participate in the reaction.
|
|
31
|
+
lhs_instances: list[MolInstance]
|
|
32
|
+
rhs_instances: list[MolInstance]
|
|
33
|
+
|
|
34
|
+
# Mapping between each instance and its corresponding template
|
|
35
|
+
instance_template_dict = dict[MolInstance, MolTemplate]
|
|
36
|
+
|
|
37
|
+
# Properties of the given instances, as read through the input files if possible
|
|
38
|
+
instance_properties: dict[MolInstance, dict[str, str]]
|
|
39
|
+
template_ids: dict[MolTemplate, str]
|
|
40
|
+
template_names: dict[MolTemplate, str]
|
|
41
|
+
|
|
42
|
+
# Atom-to-atom map of this reaction
|
|
43
|
+
map: dict[tuple[MolInstance, int], tuple[MolInstance, int]]
|
|
44
|
+
lhs_index: dict[int, tuple[MolInstance, int]] # The global map number to (molecule, index) tuple
|
|
45
|
+
rhs_index: dict[int, tuple[MolInstance, int]] # The global map number to (molecule, index) tuple
|
|
46
|
+
|
|
47
|
+
def __init__(self, rdk_reaction: ChemicalReaction):
|
|
48
|
+
self.reaction = rdk_reaction
|
|
49
|
+
self._reaction_smiles = None
|
|
50
|
+
|
|
51
|
+
self.lhs_instances = list()
|
|
52
|
+
self.rhs_instances = list()
|
|
53
|
+
self.lhs_templates = dict()
|
|
54
|
+
self.rhs_templates = dict()
|
|
55
|
+
|
|
56
|
+
self.instance_template_dict = dict()
|
|
57
|
+
self.map = dict()
|
|
58
|
+
|
|
59
|
+
self.instance_properties = dict()
|
|
60
|
+
self.template_ids = dict()
|
|
61
|
+
self.template_names = dict()
|
|
62
|
+
|
|
63
|
+
# Set reactant and product MolInstances and map
|
|
64
|
+
for rdk_mol in self.reaction.GetReactants():
|
|
65
|
+
self.lhs_instances.append(
|
|
66
|
+
MolInstance(rdk_mol, provenance = 'implicit')
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
for rdk_mol in self.reaction.GetProducts():
|
|
70
|
+
self.rhs_instances.append(
|
|
71
|
+
MolInstance(rdk_mol, provenance = 'implicit')
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
# Populate map
|
|
75
|
+
# The global map number to (molecule, index) tuple
|
|
76
|
+
# Global map number -> molecule, index (referring to the order of atoms in the SMILES string)
|
|
77
|
+
self.lhs_index = dict()
|
|
78
|
+
self.rhs_index = dict()
|
|
79
|
+
for lhs_instance in self.lhs_instances:
|
|
80
|
+
for local_index, global_index in enumerate(lhs_instance.get_atom_map_in_native_order()):
|
|
81
|
+
self.lhs_index[global_index] = (lhs_instance, local_index)
|
|
82
|
+
for rhs_instance in self.rhs_instances:
|
|
83
|
+
for local_index, global_index in enumerate(rhs_instance.get_atom_map_in_native_order()):
|
|
84
|
+
self.rhs_index[global_index] = (rhs_instance, local_index)
|
|
85
|
+
|
|
86
|
+
self.map = dict()
|
|
87
|
+
for i, (lhs_mol, lhs_index) in self.lhs_index.items():
|
|
88
|
+
try:
|
|
89
|
+
self.map[(lhs_mol, lhs_index)] = self.rhs_index[i]
|
|
90
|
+
except KeyError:
|
|
91
|
+
# Missing map
|
|
92
|
+
# TODO do something if this is not a hydrogen?
|
|
93
|
+
pass
|
|
94
|
+
|
|
95
|
+
# self.map = {
|
|
96
|
+
# (lhs_mol, lhs_index): self.rhs_index[i]
|
|
97
|
+
# for i, (lhs_mol, lhs_index) in self.lhs_index.items()
|
|
98
|
+
# }
|
|
99
|
+
|
|
100
|
+
# Compute templates and add
|
|
101
|
+
for side, instance_list, template_list in [
|
|
102
|
+
(-1, self.lhs_instances, self.lhs_templates),
|
|
103
|
+
(1, self.rhs_instances, self.rhs_templates)
|
|
104
|
+
]:
|
|
105
|
+
mol_instance: MolInstance
|
|
106
|
+
for mol_instance in instance_list:
|
|
107
|
+
template = mol_instance.to_mol_template()
|
|
108
|
+
|
|
109
|
+
# Add to instance-template dict
|
|
110
|
+
self.instance_template_dict[mol_instance] = template
|
|
111
|
+
|
|
112
|
+
# Add to template lists
|
|
113
|
+
template_list[template] = template_list.get(template, 0) + side
|
|
114
|
+
|
|
115
|
+
# Getters
|
|
116
|
+
# ------------------------------------------------------------------------------------------------------------------
|
|
117
|
+
def get_lhs_templates(self) -> list[MolTemplate]:
|
|
118
|
+
return list(self.lhs_templates.keys())
|
|
119
|
+
|
|
120
|
+
def get_rhs_templates(self) -> list[MolTemplate]:
|
|
121
|
+
return list(self.rhs_templates.keys())
|
|
122
|
+
|
|
123
|
+
# Balance
|
|
124
|
+
# ------------------------------------------------------------------------------------------------------------------
|
|
125
|
+
def get_balance_difference(self) -> SumFormula:
|
|
126
|
+
""" Get the difference between the LHS and RHS as a (possible negative) MolFormula.
|
|
127
|
+
Positive counts indicate a surplus on the LHS, negative counts indicate a surplus on the RHS.
|
|
128
|
+
"""
|
|
129
|
+
raise NotImplementedError()
|
|
130
|
+
|
|
131
|
+
# lhs_sum = sum(m.get_molformula() for m in self.lhs_instances)
|
|
132
|
+
# rhs_sum = sum(m.get_molformula() for m in self.rhs_instances)
|
|
133
|
+
# return lhs_sum - rhs_sum
|
|
134
|
+
|
|
135
|
+
def is_balanced(self) -> bool:
|
|
136
|
+
""" Returns true if balanced in both atomic composition and charge.
|
|
137
|
+
If not balanced, use .get_balance_differenec() to inspect the difference between LHS and RHS.
|
|
138
|
+
"""
|
|
139
|
+
return self.get_balance_difference().is_zero()
|
|
140
|
+
|
|
141
|
+
# Representations
|
|
142
|
+
# ------------------------------------------------------------------------------------------------------------------
|
|
143
|
+
def to_reaction_smiles(self) -> str:
|
|
144
|
+
if self._reaction_smiles is None:
|
|
145
|
+
# self._reaction_smiles = rdk_r.ReactionToSmarts(self.reaction)
|
|
146
|
+
self._reaction_smiles = rdk_r.ReactionToSmiles(self.reaction)
|
|
147
|
+
|
|
148
|
+
return self._reaction_smiles
|
|
149
|
+
|
|
150
|
+
# Serialise
|
|
151
|
+
def serialize(self) -> dict:
|
|
152
|
+
return {
|
|
153
|
+
'reaction_smiles': self.to_reaction_smiles(),
|
|
154
|
+
'lhs': [
|
|
155
|
+
mol.serialize() for mol in self.lhs_instances
|
|
156
|
+
],
|
|
157
|
+
'rhs': [
|
|
158
|
+
mol.serialize() for mol in self.rhs_instances
|
|
159
|
+
],
|
|
160
|
+
'balanced': 'TODO', # TODO
|
|
161
|
+
'balance_difference': 'TODO', # TODO compute difference in sum formulae for LHS and RHS
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
# Misc
|
|
165
|
+
# ------------------------------------------------------------------------------------------------------------------
|
|
166
|
+
def __hash__(self):
|
|
167
|
+
return self.to_reaction_smiles().__hash__()
|
|
168
|
+
|
|
169
|
+
def sanity_check(self):
|
|
170
|
+
""" Raises an exception if the AAM maps atoms of different elements.
|
|
171
|
+
"""
|
|
172
|
+
# Check all instances have smiles
|
|
173
|
+
for inst in [*self.lhs_instances, *self.rhs_instances]:
|
|
174
|
+
if inst.smiles is None:
|
|
175
|
+
raise AssertionError('Instance invalid.')
|
|
176
|
+
|
|
177
|
+
# Check mapping
|
|
178
|
+
for global_index, (molinst_l, local_index_l) in self.lhs_index.items():
|
|
179
|
+
try:
|
|
180
|
+
molinst_r, local_index_r = self.rhs_index[global_index]
|
|
181
|
+
except KeyError:
|
|
182
|
+
# Atom 'disappears'
|
|
183
|
+
continue
|
|
184
|
+
|
|
185
|
+
l_atom = molinst_l.mol.GetAtomWithIdx(local_index_l)
|
|
186
|
+
r_atom = molinst_r.mol.GetAtomWithIdx(local_index_r)
|
|
187
|
+
if l_atom.GetAtomicNum() != r_atom.GetAtomicNum():
|
|
188
|
+
raise AssertionError(f'Element mismatch: L: {local_index_l}, R: {local_index_r}')
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
# Visualisation
|
|
194
|
+
# ------------------------------------------------------------------------------------------------------------------
|
|
195
|
+
def show(self):
|
|
196
|
+
img = rdk_draw.ReactionToImage(rxn = self.reaction, subImgSize = (800, 800))
|
|
197
|
+
img.show()
|
|
198
|
+
pass
|
|
199
|
+
|
|
200
|
+
# Creators
|
|
201
|
+
# ----------------------------------------------------------------------------------------------------------------------
|
|
202
|
+
def chem_reaction_from_aam_entry(
|
|
203
|
+
entry: AAM
|
|
204
|
+
) -> ChemReaction:
|
|
205
|
+
""" Given an AAM entry, load into a ChemReaction object.
|
|
206
|
+
"""
|
|
207
|
+
return chem_reaction_from_reactionsmiles(
|
|
208
|
+
entry.reaction_smiles
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
def chem_reaction_from_reactionsmiles(
|
|
212
|
+
reactionsmiles: str
|
|
213
|
+
) -> ChemReaction:
|
|
214
|
+
rdk_reaction = rdk_r.ReactionFromSmarts(reactionsmiles)
|
|
215
|
+
return ChemReaction(rdk_reaction)
|
|
216
|
+
|
|
217
|
+
def chem_reaction_from_rxn(
|
|
218
|
+
rxn: str,
|
|
219
|
+
provenance: Optional[str] = None,
|
|
220
|
+
safe: bool = False
|
|
221
|
+
) -> ChemReaction:
|
|
222
|
+
rdk_reaction = rdk_r.ReactionFromRxnBlock(rxn, sanitize = not safe, removeHs = not safe)
|
|
223
|
+
return ChemReaction(rdk_reaction)
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from rdkit import Chem as rdk
|
|
6
|
+
|
|
7
|
+
from chemrecon.chem.gml.gml_to_rdk import gml_to_rdkit_mol
|
|
8
|
+
from chemrecon.chem.mol import MolTemplate, MolInstance, feat_enum_map
|
|
9
|
+
from chemrecon.core.id_types import S_SMILES, S_INCHI, S_GML, S_MOLFILE
|
|
10
|
+
from chemrecon.chem.mol import Mol
|
|
11
|
+
from chemrecon.schema.entry_types.molstructure import MolStructure
|
|
12
|
+
from chemrecon.schema.entry_types.molstructure_repr import MolStructureRepr
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def mol_from_smiles(smiles: str, provenance: Optional[str] = None, template: bool = False, safe = False) -> Mol:
|
|
16
|
+
if template:
|
|
17
|
+
mol = MolTemplate(
|
|
18
|
+
rdk.MolFromSmiles(smiles),
|
|
19
|
+
provenance = provenance
|
|
20
|
+
)
|
|
21
|
+
else:
|
|
22
|
+
mol = MolInstance(
|
|
23
|
+
rdk.MolFromSmiles(smiles),
|
|
24
|
+
provenance = provenance
|
|
25
|
+
)
|
|
26
|
+
return mol
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def mol_from_inchi(inchi: str, provenance: Optional[str] = None, template: bool = False, safe = False) -> Mol:
|
|
30
|
+
if template:
|
|
31
|
+
mol = MolTemplate(rdk.MolFromInchi(inchi), provenance = provenance)
|
|
32
|
+
else:
|
|
33
|
+
mol = MolInstance(rdk.MolFromInchi(inchi), provenance = provenance)
|
|
34
|
+
return mol
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def mol_from_molblock(molblock: str, provenance: Optional[str] = None, template: bool = False, safe = False) -> Mol:
|
|
38
|
+
if template:
|
|
39
|
+
mol = MolTemplate(
|
|
40
|
+
rdk.MolFromMolBlock(molblock, sanitize = not safe, removeHs = not safe),
|
|
41
|
+
provenance = provenance
|
|
42
|
+
)
|
|
43
|
+
else:
|
|
44
|
+
mol = MolInstance(
|
|
45
|
+
rdk.MolFromMolBlock(molblock, sanitize = not safe, removeHs = not safe),
|
|
46
|
+
provenance = provenance
|
|
47
|
+
)
|
|
48
|
+
return mol
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def mol_from_gml(gml: str, provenance: Optional[str] = None, template: bool = False) -> Mol:
|
|
52
|
+
rdk_mol: rdk.Mol = gml_to_rdkit_mol(gml)
|
|
53
|
+
mol = mol_from_rdk_mol(rdk_mol, provenance, template)
|
|
54
|
+
return mol
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def mol_from_rdk_mol(rdk_mol: rdk.Mol, provenance: Optional[str] = None, template: bool = False) -> Mol:
|
|
58
|
+
if template:
|
|
59
|
+
mol = MolTemplate(rdk_mol, provenance = provenance)
|
|
60
|
+
else:
|
|
61
|
+
mol = MolInstance(rdk_mol, provenance = provenance)
|
|
62
|
+
return mol
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def mol_from_structurerepresentation(
|
|
66
|
+
entry: MolStructureRepr,
|
|
67
|
+
provenance: Optional[str] = None,
|
|
68
|
+
template: bool = False,
|
|
69
|
+
safe: bool = False
|
|
70
|
+
):
|
|
71
|
+
match entry.id_type:
|
|
72
|
+
case S_SMILES.enum_type:
|
|
73
|
+
return mol_from_smiles(entry.source_id, provenance, template = template, safe = safe)
|
|
74
|
+
case S_INCHI.enum_type:
|
|
75
|
+
return mol_from_inchi(entry.source_id, provenance, template = template, safe = safe)
|
|
76
|
+
case S_MOLFILE.enum_type:
|
|
77
|
+
return mol_from_molblock(entry.source_id, provenance, template = template, safe = safe)
|
|
78
|
+
case S_GML.enum_type:
|
|
79
|
+
return mol_from_gml(entry.source_id, provenance, template = template)
|
|
80
|
+
case _:
|
|
81
|
+
raise ValueError
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def mol_from_struct_entry(entry: MolStructure):
|
|
85
|
+
mol = Mol(
|
|
86
|
+
rdk.MolFromSmiles(entry.smiles),
|
|
87
|
+
set_features ={
|
|
88
|
+
feat_enum_map[f] for f in entry.std_feats
|
|
89
|
+
}
|
|
90
|
+
)
|
|
91
|
+
return mol
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
""" Basic chemical information.
|
|
2
|
+
"""
|
|
3
|
+
from typing import NamedTuple
|
|
4
|
+
|
|
5
|
+
class Element(NamedTuple):
|
|
6
|
+
num: int
|
|
7
|
+
symbol: str
|
|
8
|
+
name: str
|
|
9
|
+
|
|
10
|
+
n = Element(0, 'n', 'Neutron') # Included for correct list indices
|
|
11
|
+
H = Element(1, 'H', 'Hydrogen')
|
|
12
|
+
He = Element(2, 'He', '')
|
|
13
|
+
Li = Element(3, 'Li', '')
|
|
14
|
+
Be = Element(4, 'Be', '')
|
|
15
|
+
B = Element(5, 'B', '')
|
|
16
|
+
C = Element(6, 'C', '')
|
|
17
|
+
N = Element(7, 'N', '')
|
|
18
|
+
O = Element(8, 'O', '')
|
|
19
|
+
F = Element(9, 'F', '')
|
|
20
|
+
Ne = Element(10, 'Ne', '')
|
|
21
|
+
Na = Element(11, 'Na', '')
|
|
22
|
+
Mg = Element(12, 'Mg', '')
|
|
23
|
+
Al = Element(13, 'Al', '')
|
|
24
|
+
Si = Element(14, 'Si', '')
|
|
25
|
+
P = Element(15, 'P', '')
|
|
26
|
+
S = Element(16, 'S', '')
|
|
27
|
+
Cl = Element(17, 'Cl', '')
|
|
28
|
+
Ar = Element(18, 'Ar', '')
|
|
29
|
+
K = Element(19, 'K', '')
|
|
30
|
+
Ca = Element(20, 'Ca', '')
|
|
31
|
+
Sc = Element(21, 'Sc', '')
|
|
32
|
+
Ti = Element(22, 'Ti', '')
|
|
33
|
+
V = Element(23, 'V', '')
|
|
34
|
+
Cr = Element(24, 'Cr', '')
|
|
35
|
+
Mn = Element(25, 'Mn', '')
|
|
36
|
+
Fe = Element(26, 'Fe', '')
|
|
37
|
+
Co = Element(27, 'Co', '')
|
|
38
|
+
Ni = Element(28, 'Ni', '')
|
|
39
|
+
Cu = Element(29, 'Cu', '')
|
|
40
|
+
Zn = Element(30, 'Zn', '')
|
|
41
|
+
Ga = Element(31, 'Ga', '')
|
|
42
|
+
Ge = Element(32, 'Ge', '')
|
|
43
|
+
As = Element(33, 'As', '')
|
|
44
|
+
Se = Element(34, 'Se', '')
|
|
45
|
+
Br = Element(35, 'Br', '')
|
|
46
|
+
Kr = Element(36, 'Kr', '')
|
|
47
|
+
Rb = Element(37, 'Rb', '')
|
|
48
|
+
Sr = Element(38, 'Sr', '')
|
|
49
|
+
Y = Element(39, 'Y', '')
|
|
50
|
+
Zr = Element(40, 'Zr', '')
|
|
51
|
+
Nb = Element(41, 'Nb', '')
|
|
52
|
+
Mo = Element(42, 'Mo', '')
|
|
53
|
+
Tc = Element(43, 'Tc', '')
|
|
54
|
+
Ru = Element(44, 'Ru', '')
|
|
55
|
+
Rh = Element(45, 'Rh', '')
|
|
56
|
+
Pd = Element(46, 'Pd', '')
|
|
57
|
+
Ag = Element(47, 'Ag', '')
|
|
58
|
+
Cd = Element(48, 'Cd', '')
|
|
59
|
+
In = Element(49, 'In', '')
|
|
60
|
+
Sn = Element(50, 'Sn', '')
|
|
61
|
+
Sb = Element(51, 'Sb', '')
|
|
62
|
+
Te = Element(52, 'Te', '')
|
|
63
|
+
I = Element(53, 'I', '')
|
|
64
|
+
Xe = Element(54, 'Xe', '')
|
|
65
|
+
Cs = Element(55, 'Cs', '')
|
|
66
|
+
Ba = Element(56, 'Ba', '')
|
|
67
|
+
La = Element(57, 'La', '')
|
|
68
|
+
Ce = Element(58, 'Ce', '')
|
|
69
|
+
Pr = Element(59, 'Pr', '')
|
|
70
|
+
Nd = Element(60, 'Nd', '')
|
|
71
|
+
Pm = Element(61, 'Pm', '')
|
|
72
|
+
Sm = Element(62, 'Sm', '')
|
|
73
|
+
Eu = Element(63, 'Eu', '')
|
|
74
|
+
Gd = Element(64, 'Gd', '')
|
|
75
|
+
Tb = Element(65, 'Tb', '')
|
|
76
|
+
Dy = Element(66, 'Dy', '')
|
|
77
|
+
Ho = Element(67, 'Ho', '')
|
|
78
|
+
Er = Element(68, 'Er', '')
|
|
79
|
+
Tm = Element(69, 'Tm', '')
|
|
80
|
+
Yb = Element(70, 'Yb', '')
|
|
81
|
+
Lu = Element(71, 'Lu', '')
|
|
82
|
+
Hf = Element(72, 'Hf', '')
|
|
83
|
+
Ta = Element(73, 'Ta', '')
|
|
84
|
+
W = Element(74, 'W', '')
|
|
85
|
+
Re = Element(75, 'Re', '')
|
|
86
|
+
Os = Element(76, 'Os', '')
|
|
87
|
+
Ir = Element(77, 'Ir', '')
|
|
88
|
+
Pt = Element(78, 'Pt', '')
|
|
89
|
+
Au = Element(79, 'Au', '')
|
|
90
|
+
Hg = Element(80, 'Hg', '')
|
|
91
|
+
Tl = Element(81, 'Tl', '')
|
|
92
|
+
Pb = Element(82, 'Pb', '')
|
|
93
|
+
Bi = Element(83, 'Bi', '')
|
|
94
|
+
Po = Element(84, 'Po', '')
|
|
95
|
+
At = Element(85, 'At', '')
|
|
96
|
+
Rn = Element(86, 'Rn', '')
|
|
97
|
+
Fr = Element(87, 'Fr', '')
|
|
98
|
+
Ra = Element(88, 'Ra', '')
|
|
99
|
+
Ac = Element(89, 'Ac', '')
|
|
100
|
+
Th = Element(90, 'Th', '')
|
|
101
|
+
Pa = Element(91, 'Pa', '')
|
|
102
|
+
U = Element(92, 'U', '')
|
|
103
|
+
Np = Element(93, 'Np', '')
|
|
104
|
+
Pu = Element(94, 'Pu', '')
|
|
105
|
+
Am = Element(95, 'Am', '')
|
|
106
|
+
Cm = Element(96, 'Cm', '')
|
|
107
|
+
Bk = Element(97, 'Bk', '')
|
|
108
|
+
Cf = Element(98, 'Cf', '')
|
|
109
|
+
Es = Element(99, 'Es', '')
|
|
110
|
+
Fm = Element(100, 'Fm', '')
|
|
111
|
+
Md = Element(101, 'Md', '')
|
|
112
|
+
No = Element(102, 'No', '')
|
|
113
|
+
Lr = Element(103, 'Lr', '')
|
|
114
|
+
Rf = Element(104, 'Rf', '')
|
|
115
|
+
Db = Element(105, 'Db', '')
|
|
116
|
+
Sg = Element(106, 'Sg', '')
|
|
117
|
+
Bh = Element(107, 'Bh', '')
|
|
118
|
+
Hs = Element(108, 'Hs', '')
|
|
119
|
+
Mt = Element(109, 'Mt', '')
|
|
120
|
+
Ds = Element(110, 'Ds', '')
|
|
121
|
+
Rg = Element(111, 'Rg', '')
|
|
122
|
+
Cn = Element(112, 'Cn', '')
|
|
123
|
+
Nh = Element(113, 'Nh', '')
|
|
124
|
+
Fl = Element(114, 'Fl', '')
|
|
125
|
+
Mc = Element(115, 'Mc', '')
|
|
126
|
+
Lv = Element(116, 'Lv', '')
|
|
127
|
+
Ts = Element(117, 'Ts', '')
|
|
128
|
+
Og = Element(118, 'Og', '')
|
|
129
|
+
|
|
130
|
+
elements: list[Element] = [
|
|
131
|
+
n,
|
|
132
|
+
H, He,
|
|
133
|
+
Li, Be, B, C, N, O, F, Ne, Na, Mg, Al, Si, P, S, Cl, Ar, K, Ca, Sc, Ti, V, Cr, Mn, Fe,
|
|
134
|
+
Co, Ni, Cu, Zn, Ga, Ge, As, Se, Br, Kr, Rb, Sr, Y, Zr, Nb, Mo, Tc, Ru, Rh, Pd, Ag, Cd, In,
|
|
135
|
+
Sn, Sb, Te, I, Xe, Cs, Ba, La, Ce, Pr, Nd, Pm, Sm, Eu, Gd, Tb, Dy, Ho, Er, Tm, Yb, Lu, Hf,
|
|
136
|
+
Ta, W, Re, Os, Ir, Pt, Au, Hg, Tl, Pb, Bi, Po, At, Rn, Fr, Ra, Ac, Th, Pa, U, Np, Pu, Am, Cm,
|
|
137
|
+
Bk, Cf, Es, Fm, Md, No, Lr, Rf, Db, Sg, Bh, Hs, Mt, Ds, Rg, Cn, Nh, Fl, Mc, Lv, Ts, Og,
|
|
138
|
+
]
|
|
139
|
+
|
|
140
|
+
symbol_element: dict[str, Element] = {e.symbol: e for e in elements}
|
|
141
|
+
atomicnum_element: dict[int, Element] = {e.num: e for e in elements}
|
|
File without changes
|