chemrecon 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chemrecon/__init__.py +73 -0
- chemrecon/chem/__init__.py +0 -0
- chemrecon/chem/chemreaction.py +223 -0
- chemrecon/chem/constant_compounds.py +3 -0
- chemrecon/chem/create_mol.py +91 -0
- chemrecon/chem/elements.py +141 -0
- chemrecon/chem/gml/__init__.py +0 -0
- chemrecon/chem/gml/gml.py +324 -0
- chemrecon/chem/gml/gml_reactant_matching.py +130 -0
- chemrecon/chem/gml/gml_to_rdk.py +217 -0
- chemrecon/chem/mol.py +483 -0
- chemrecon/chem/sumformula.py +120 -0
- chemrecon/connection.py +97 -0
- chemrecon/core/__init__.py +0 -0
- chemrecon/core/id_types.py +687 -0
- chemrecon/core/ontology.py +209 -0
- chemrecon/core/populate_query_handler.py +336 -0
- chemrecon/core/query_handler.py +587 -0
- chemrecon/database/__init__.py +1 -0
- chemrecon/database/connect.py +63 -0
- chemrecon/database/connection_params/chemrecon_pub.dbinfo +5 -0
- chemrecon/database/connection_params/local_docker_dev.dbinfo +5 -0
- chemrecon/database/connection_params/local_docker_init.dbinfo +5 -0
- chemrecon/database/connection_params/local_docker_pub.dbinfo +5 -0
- chemrecon/database/params.py +88 -0
- chemrecon/entrygraph/draw.py +119 -0
- chemrecon/entrygraph/entrygraph.py +301 -0
- chemrecon/entrygraph/explorationprotocol.py +199 -0
- chemrecon/entrygraph/explore.py +421 -0
- chemrecon/entrygraph/explore_procedure.py +183 -0
- chemrecon/entrygraph/filter.py +88 -0
- chemrecon/entrygraph/scoring.py +141 -0
- chemrecon/query/__init__.py +26 -0
- chemrecon/query/create_entry.py +86 -0
- chemrecon/query/default_protocols.py +57 -0
- chemrecon/query/find_entry.py +84 -0
- chemrecon/query/get_relations.py +143 -0
- chemrecon/query/get_structures_from_compound.py +65 -0
- chemrecon/schema/__init__.py +86 -0
- chemrecon/schema/db_object.py +363 -0
- chemrecon/schema/direction.py +10 -0
- chemrecon/schema/entry_types/__init__.py +0 -0
- chemrecon/schema/entry_types/aam.py +34 -0
- chemrecon/schema/entry_types/aam_repr.py +37 -0
- chemrecon/schema/entry_types/compound.py +52 -0
- chemrecon/schema/entry_types/enzyme.py +49 -0
- chemrecon/schema/entry_types/molstructure.py +64 -0
- chemrecon/schema/entry_types/molstructure_repr.py +41 -0
- chemrecon/schema/entry_types/reaction.py +57 -0
- chemrecon/schema/enums.py +154 -0
- chemrecon/schema/procedural_relation_entrygraph.py +66 -0
- chemrecon/schema/relation_types_composed/__init__.py +0 -0
- chemrecon/schema/relation_types_composed/compound_has_molstructure_relation.py +59 -0
- chemrecon/schema/relation_types_composed/reaction_has_aam_relation.py +50 -0
- chemrecon/schema/relation_types_procedural/__init__.py +0 -0
- chemrecon/schema/relation_types_procedural/aam_convert_relation.py +69 -0
- chemrecon/schema/relation_types_procedural/compound_select_structure_proceduralrelation.py +36 -0
- chemrecon/schema/relation_types_procedural/compound_similarlity_proceduralrelation.py +1 -0
- chemrecon/schema/relation_types_procedural/molstructure_convert_relation.py +49 -0
- chemrecon/schema/relation_types_procedural/reaction_select_aam_proceduralrelation.py +38 -0
- chemrecon/schema/relation_types_procedural/reaction_similarity_proceduralrelation.py +1 -0
- chemrecon/schema/relation_types_source/__init__.py +0 -0
- chemrecon/schema/relation_types_source/aam_involves_molstructure_relation.py +77 -0
- chemrecon/schema/relation_types_source/aam_repr_involves_molstructure_repr_relation.py +79 -0
- chemrecon/schema/relation_types_source/compound_has_structure_representation_relation.py +33 -0
- chemrecon/schema/relation_types_source/compound_reference_relation.py +34 -0
- chemrecon/schema/relation_types_source/molstructure_standardisation_relation.py +71 -0
- chemrecon/schema/relation_types_source/ontology/__init__.py +0 -0
- chemrecon/schema/relation_types_source/ontology/compound_ontology.py +369 -0
- chemrecon/schema/relation_types_source/ontology/enzyme_ontology.py +142 -0
- chemrecon/schema/relation_types_source/ontology/reaction_ontology.py +140 -0
- chemrecon/schema/relation_types_source/reaction_has_aam_representation_relation.py +34 -0
- chemrecon/schema/relation_types_source/reaction_has_enzyme_relation.py +71 -0
- chemrecon/schema/relation_types_source/reaction_involves_compound_relation.py +69 -0
- chemrecon/schema/relation_types_source/reaction_reference_relation.py +33 -0
- chemrecon/scripts/initialize_database.py +494 -0
- chemrecon/utils/copy_signature.py +10 -0
- chemrecon/utils/encodeable_list.py +11 -0
- chemrecon/utils/get_id_type.py +70 -0
- chemrecon/utils/hungarian.py +31 -0
- chemrecon/utils/reactant_matching.py +168 -0
- chemrecon/utils/rxnutils.py +44 -0
- chemrecon/utils/set_cwd.py +12 -0
- chemrecon-0.1.1.dist-info/METADATA +143 -0
- chemrecon-0.1.1.dist-info/RECORD +86 -0
- chemrecon-0.1.1.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,687 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Callable
|
|
4
|
+
import re
|
|
5
|
+
|
|
6
|
+
# Lookup lists and dictionaries
|
|
7
|
+
# ----------------------------------------------------------------------------------------------------------------------
|
|
8
|
+
id_types: list[IdentifierType] = list()
|
|
9
|
+
id_type_name_lookup: dict[str, IdentifierType] = dict()
|
|
10
|
+
identifiers_org_dict: dict[str, IdentifierType] = dict()
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
# Types of ID types
|
|
14
|
+
# ----------------------------------------------------------------------------------------------------------------------
|
|
15
|
+
class IdentifierType:
|
|
16
|
+
"""
|
|
17
|
+
Represents a general type of identifier with attributes and methods for standardization,
|
|
18
|
+
recognition, and manipulation.
|
|
19
|
+
|
|
20
|
+
This class is designed to encapsulate information about a specific type of identifier,
|
|
21
|
+
including its primary name, alternative names, recognizable patterns, and
|
|
22
|
+
standardization logic.
|
|
23
|
+
|
|
24
|
+
The class also registers identifier types in global lookup dictionaries
|
|
25
|
+
to facilitate type recognition and access.
|
|
26
|
+
"""
|
|
27
|
+
name: str #: Primary name of this identifier type.
|
|
28
|
+
shortname: str #: The name used for the type in the database.
|
|
29
|
+
alt_names: set[str] #: Alternative names to search for.
|
|
30
|
+
prefixes: set[str]
|
|
31
|
+
suffixes: set[str]
|
|
32
|
+
enum_type: 'IdType' #: The corresponding Enum value, as present in entries
|
|
33
|
+
stdfunc: Callable[[str], str] #: Function used to standardize identifiers.
|
|
34
|
+
recogniser: re.Pattern | None #: Pattern used to recognize identifiers of this type.
|
|
35
|
+
id_org_prefix: str #: Prefix in identifiers.org
|
|
36
|
+
objectname: str # name of the assigned object
|
|
37
|
+
|
|
38
|
+
def __init__(
|
|
39
|
+
self,
|
|
40
|
+
name: str,
|
|
41
|
+
shortname: str,
|
|
42
|
+
alt_names: set[str] = None,
|
|
43
|
+
prefixes: set[str] = None,
|
|
44
|
+
suffixes: set[str] = None,
|
|
45
|
+
stdfunc: Callable[[str], str] = None,
|
|
46
|
+
recogniser: re.Pattern | None = None,
|
|
47
|
+
objectname: str = None
|
|
48
|
+
):
|
|
49
|
+
self.name = name
|
|
50
|
+
self.shortname = shortname
|
|
51
|
+
self.alt_names = alt_names
|
|
52
|
+
self.prefixes = prefixes if prefixes else {}
|
|
53
|
+
self.suffixes = suffixes if suffixes else {}
|
|
54
|
+
self.stdfunc = stdfunc
|
|
55
|
+
self.recogniser = recogniser
|
|
56
|
+
self.objectname = objectname
|
|
57
|
+
|
|
58
|
+
# Make identifiers org link
|
|
59
|
+
# TODO doesn't always work (mnx?)
|
|
60
|
+
self.identifiers_org_prefix = ''
|
|
61
|
+
for prefix in self.prefixes:
|
|
62
|
+
if prefix.startswith('https://identifiers.org/') or prefix.startswith('http://identifiers.org/'):
|
|
63
|
+
identifiers_org_dict[prefix] = self
|
|
64
|
+
self.identifiers_org_prefix = prefix
|
|
65
|
+
|
|
66
|
+
# Register in lookup
|
|
67
|
+
id_types.append(self)
|
|
68
|
+
|
|
69
|
+
# Register names
|
|
70
|
+
id_type_name_lookup[self.name] = self
|
|
71
|
+
id_type_name_lookup[self.shortname] = self
|
|
72
|
+
for alt_name in self.alt_names:
|
|
73
|
+
id_type_name_lookup[alt_name] = self
|
|
74
|
+
|
|
75
|
+
def __repr__(self):
|
|
76
|
+
return self.objectname
|
|
77
|
+
|
|
78
|
+
def __str__(self):
|
|
79
|
+
return self.name
|
|
80
|
+
|
|
81
|
+
def __hash__(self):
|
|
82
|
+
return self.shortname.__hash__()
|
|
83
|
+
|
|
84
|
+
def std_identifier(self, s: str) -> str:
|
|
85
|
+
""" Standardize a given identifier of this type. """
|
|
86
|
+
if self.stdfunc:
|
|
87
|
+
return self.stdfunc(s)
|
|
88
|
+
else:
|
|
89
|
+
return str(s)
|
|
90
|
+
|
|
91
|
+
def trim(self, s: str) -> str:
|
|
92
|
+
""" Remove pre- and suffixes of the string, including identifiers.org urls. """
|
|
93
|
+
s = s.strip()
|
|
94
|
+
for prefix in self.prefixes:
|
|
95
|
+
s = s.removeprefix(prefix)
|
|
96
|
+
for suffix in self.suffixes:
|
|
97
|
+
s = s.removesuffix(suffix)
|
|
98
|
+
return s
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
# Register subtypes
|
|
102
|
+
class IdentifierTypeCompound(IdentifierType):
|
|
103
|
+
enum_type: 'IdTypeCompoundEnum'
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class IdentifierTypeStructureRepresentation(IdentifierType):
|
|
107
|
+
enum_type: 'IdTypeStructureRepresentationEnum'
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class IdentifierTypeReaction(IdentifierType):
|
|
111
|
+
enum_type: 'IdTypeReactionEnum'
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class IdentifierTypeEnzyme(IdentifierType):
|
|
115
|
+
enum_type: 'IdTypeEnzymeEnum'
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
class IdentifierTypeAAM(IdentifierType):
|
|
119
|
+
enum_type: 'IdTypeAAMEnum'
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
# Standardisation functions
|
|
123
|
+
# ----------------------------------------------------------------------------------------------------------------------
|
|
124
|
+
# BiGG
|
|
125
|
+
_compartment_suffixes = [
|
|
126
|
+
'_e', '_ex', '_c', '_p', '_m', '_x', '_b',
|
|
127
|
+
'_E', '_EX', '_C', '_P', '_M', '_X', '_B'
|
|
128
|
+
]
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _std_bigg(s: str) -> str:
|
|
132
|
+
""" Create a universal BIGG id without the compartment suffix"""
|
|
133
|
+
# TODO double underscore is necessary (indicates stereo - "phe__L" is L-Phenylalanine)?
|
|
134
|
+
s_ = s.strip()
|
|
135
|
+
for suffix in _compartment_suffixes:
|
|
136
|
+
s_ = s_.removesuffix(suffix)
|
|
137
|
+
s_ = s_.replace('_DASH', '') # Fix weird 'DASH' notation, i.e. M_12ppd_DASH_S -> M_12ppd__S
|
|
138
|
+
s_ = s_.replace('__', '_') # Hacky fix for double underscore notation
|
|
139
|
+
s_ = s_.removeprefix('m_')
|
|
140
|
+
if not s_.startswith('M_'):
|
|
141
|
+
s_ = f'M_{s_}'
|
|
142
|
+
|
|
143
|
+
if s_.isupper():
|
|
144
|
+
# Fix upper case
|
|
145
|
+
s_ = f'M_{s_.removeprefix("M_").lower()}'
|
|
146
|
+
|
|
147
|
+
return s_
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def _std_bigg_r(s: str) -> str:
|
|
151
|
+
s_ = s.strip()
|
|
152
|
+
for suffix in _compartment_suffixes:
|
|
153
|
+
s_ = s_.removesuffix(suffix)
|
|
154
|
+
s_ = s_.replace('_DASH', '').replace('__', '_').removeprefix('r_')
|
|
155
|
+
if not s_.startswith('R_'):
|
|
156
|
+
s_ = f'R_{s_}'
|
|
157
|
+
if s_.isupper():
|
|
158
|
+
# Fix upper case
|
|
159
|
+
s_ = f'R_{s_.removeprefix("M_").lower()}'
|
|
160
|
+
|
|
161
|
+
# Replace LPAREN, RPAREN notation in names
|
|
162
|
+
s_ = s_.replace('LPAREN_', '_').replace('RPAREN_', '_')
|
|
163
|
+
|
|
164
|
+
return s_
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _std_chebi(chebi: str) -> str:
|
|
168
|
+
""" Chebi IDs should be prefixed with CHEBI: """
|
|
169
|
+
if not chebi:
|
|
170
|
+
raise ValueError(f'Received empty CHEBI')
|
|
171
|
+
chebi_new = chebi.strip()
|
|
172
|
+
if chebi_new.startswith('http://identifiers.org/chebi/CHEBI'):
|
|
173
|
+
return chebi_new.removeprefix('http://identifiers.org/chebi/')
|
|
174
|
+
if chebi_new.startswith('https://identifiers.org/chebi/CHEBI'):
|
|
175
|
+
return chebi_new.removeprefix('https://identifiers.org/chebi/')
|
|
176
|
+
|
|
177
|
+
if chebi_new.startswith('CHEBI:'):
|
|
178
|
+
return chebi_new
|
|
179
|
+
else:
|
|
180
|
+
return f'CHEBI:{chebi_new}'
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
# C_NAME
|
|
184
|
+
def _standardise_cname(cname: str) -> str:
|
|
185
|
+
""" Lowercase, etc """
|
|
186
|
+
return cname.lower().strip()
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
# Compound Identifier Types
|
|
190
|
+
# ---------------------------------------------------------------------------------------------------------------------
|
|
191
|
+
|
|
192
|
+
# Unknown
|
|
193
|
+
C_UNKNOWN = IdentifierTypeCompound(
|
|
194
|
+
name = 'Unknown Compound',
|
|
195
|
+
shortname = 'unknown_c',
|
|
196
|
+
alt_names = set(),
|
|
197
|
+
prefixes = set(),
|
|
198
|
+
objectname = 'C_UNKNOWN'
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
# Name
|
|
202
|
+
C_NAME = IdentifierTypeCompound(
|
|
203
|
+
name = 'Compound Name',
|
|
204
|
+
shortname = 'cname',
|
|
205
|
+
alt_names = set(),
|
|
206
|
+
prefixes = set(),
|
|
207
|
+
objectname = 'C_NAME',
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
# MetaNetX
|
|
211
|
+
C_MNX = IdentifierTypeCompound(
|
|
212
|
+
name = 'MetaNetX',
|
|
213
|
+
shortname = 'mnx',
|
|
214
|
+
alt_names = {'metanetx', 'mnx'},
|
|
215
|
+
prefixes = {
|
|
216
|
+
'http://identifiers.org/metanetx.chemical/',
|
|
217
|
+
'https://identifiers.org/metanetx.chemical/',
|
|
218
|
+
},
|
|
219
|
+
recogniser = re.compile(r'^(MNXM\d+|MNX\d+|BIOMASS|WATER)$'),
|
|
220
|
+
objectname = 'C_MNX',
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
# BiGG
|
|
224
|
+
C_BIGG = IdentifierTypeCompound(
|
|
225
|
+
name = 'BiGG',
|
|
226
|
+
shortname = 'bigg',
|
|
227
|
+
alt_names = {'bigg', 'biggM', 'bigg.metabolite'},
|
|
228
|
+
prefixes = {
|
|
229
|
+
'http://bigg.ucsd.edu/models/universal/metabolites/',
|
|
230
|
+
'http://identifiers.org/bigg.metabolite/',
|
|
231
|
+
'https://identifiers.org/bigg.metabolite/',
|
|
232
|
+
'bigg.metabolite:'
|
|
233
|
+
},
|
|
234
|
+
suffixes = {
|
|
235
|
+
'_e', '_ex', '_c', '_p', '_m', '_x', '_b'
|
|
236
|
+
},
|
|
237
|
+
stdfunc = _std_bigg,
|
|
238
|
+
recogniser = re.compile(r'^[a-z_A-Z0-9]+$'),
|
|
239
|
+
objectname = 'C_BIGG',
|
|
240
|
+
) #: asd
|
|
241
|
+
|
|
242
|
+
# PubChem CID
|
|
243
|
+
C_PUBCHEM = IdentifierTypeCompound(
|
|
244
|
+
name = 'PubChem CiD',
|
|
245
|
+
shortname = 'pubchem_cid',
|
|
246
|
+
alt_names = {'pubchem', 'pc_cid', 'cid'},
|
|
247
|
+
prefixes = {
|
|
248
|
+
'http://identifiers.org/pubchem.compound/',
|
|
249
|
+
'https://identifiers.org/pubchem.compound/'
|
|
250
|
+
},
|
|
251
|
+
recogniser = re.compile(r'^\d+$'),
|
|
252
|
+
objectname = 'C_PUBCHEM',
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
# KEGG Compound
|
|
256
|
+
C_KEGG = IdentifierTypeCompound(
|
|
257
|
+
name = 'KEGG',
|
|
258
|
+
shortname = 'kegg',
|
|
259
|
+
alt_names = {'kegg', 'keggC', 'kegg.compound', 'KEGG COMPOUND', 'KEGG', 'KEGG COMPOUND accession'},
|
|
260
|
+
prefixes = {
|
|
261
|
+
'http://identifiers.org/kegg.compound/',
|
|
262
|
+
'https://identifiers.org/kegg.compound/'
|
|
263
|
+
},
|
|
264
|
+
recogniser = re.compile(r'^C\d+$'),
|
|
265
|
+
objectname = 'C_KEGG',
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
# ChEBI
|
|
269
|
+
C_CHEBI = IdentifierTypeCompound(
|
|
270
|
+
name = 'ChEBI',
|
|
271
|
+
shortname = 'chebi',
|
|
272
|
+
alt_names = {'chebi', 'CHEBI', 'ChEBI'},
|
|
273
|
+
prefixes = {
|
|
274
|
+
'http://identifiers.org/chebi/',
|
|
275
|
+
'https://identifiers.org/chebi/'
|
|
276
|
+
},
|
|
277
|
+
recogniser = re.compile(r'^CHEBI:\d+$'),
|
|
278
|
+
stdfunc = _std_chebi,
|
|
279
|
+
objectname = 'C_CHEBI',
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
# ECMDB
|
|
283
|
+
C_ECMDB = IdentifierTypeCompound(
|
|
284
|
+
name = 'ECMDB',
|
|
285
|
+
shortname = 'ecmdb',
|
|
286
|
+
alt_names = {'ecmdb'},
|
|
287
|
+
recogniser = re.compile(r'^ECMDB\d+$'),
|
|
288
|
+
objectname = 'C_ECMDB',
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
# InChI Key
|
|
292
|
+
C_INCHIKEY = IdentifierTypeCompound(
|
|
293
|
+
name = 'InChI key',
|
|
294
|
+
shortname = 'inchi_key',
|
|
295
|
+
alt_names = {'inchi_key', 'inchikey'},
|
|
296
|
+
prefixes = {
|
|
297
|
+
'http://identifiers.org/inchikey/',
|
|
298
|
+
'https://identifiers.org/inchikey/'
|
|
299
|
+
},
|
|
300
|
+
recogniser = re.compile(r'^[A-Z]{14}-[A-Z]{10}(-[A-Z])?$'),
|
|
301
|
+
objectname = 'C_INCHIKEY',
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
# SLM
|
|
305
|
+
C_SLM = IdentifierTypeCompound(
|
|
306
|
+
name = 'SwissLipids',
|
|
307
|
+
shortname = 'slm',
|
|
308
|
+
alt_names = {'slm', 'SLM'},
|
|
309
|
+
prefixes = {
|
|
310
|
+
'http://identifiers.org/slm/',
|
|
311
|
+
'https://identifiers.org/slm/'
|
|
312
|
+
},
|
|
313
|
+
recogniser = re.compile(r'^SLM:\d+$'), # TODO check?
|
|
314
|
+
objectname = 'C_SLM',
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
# LipidMaps
|
|
318
|
+
C_LIPIDMAPS = IdentifierTypeCompound(
|
|
319
|
+
name = 'LipidMaps',
|
|
320
|
+
shortname = 'lipidmapsm',
|
|
321
|
+
alt_names = {'LipidMapsM', 'lipidmapsM', 'lipidmaps', 'LIPID MAPS'},
|
|
322
|
+
prefixes = {
|
|
323
|
+
'http://identifiers.org/lipidmaps/',
|
|
324
|
+
'https://identifiers.org/lipidmaps/'
|
|
325
|
+
},
|
|
326
|
+
recogniser = re.compile(r'^LM(FA|GL|GP|SP|ST|PR|SL|PK)[0-9]{4}([0-9a-zA-Z]{4,6})?$'),
|
|
327
|
+
objectname = 'C_LIPIDMAPS',
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
# MetaCyc Compound
|
|
331
|
+
C_METACYC = IdentifierTypeCompound(
|
|
332
|
+
name = 'MetaCyc Compound',
|
|
333
|
+
shortname = 'metacyc',
|
|
334
|
+
alt_names = {'metacycM', 'metacyc.compound'},
|
|
335
|
+
prefixes = {
|
|
336
|
+
'http://identifiers.org/metacyc.compound/',
|
|
337
|
+
'https://identifiers.org/metacyc.compound/'
|
|
338
|
+
},
|
|
339
|
+
recogniser = re.compile(r'^[A-Za-z0-9+_.%-:]+$'),
|
|
340
|
+
objectname = 'C_METACYC',
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
# EnviPath (Can be both compound and reaction?)
|
|
344
|
+
C_ENVIPATH = IdentifierTypeCompound(
|
|
345
|
+
name = 'enviPath',
|
|
346
|
+
shortname = 'envipath',
|
|
347
|
+
alt_names = {'envipathM'},
|
|
348
|
+
prefixes = {
|
|
349
|
+
'http://identifiers.org/envipath/',
|
|
350
|
+
'https://identifiers.org/envipath/'
|
|
351
|
+
},
|
|
352
|
+
recogniser = re.compile(
|
|
353
|
+
r'^[\w^_]{8}-[\w^_]{4}-[\w^_]{4}-[\w^_]{4}-[\w^_]{12}/[\w-]+/[\w^_]{8}-'
|
|
354
|
+
r'[\w^_]{4}-[\w^_]{4}-[\w^_]{4}-[\w^_]{12}$'
|
|
355
|
+
),
|
|
356
|
+
objectname = 'C_ENVIPATH',
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
# Seed Compounds
|
|
360
|
+
C_SEED = IdentifierTypeCompound(
|
|
361
|
+
name = 'Seed Compound',
|
|
362
|
+
shortname = 'seed',
|
|
363
|
+
alt_names = {'seedM', 'seed.compound'},
|
|
364
|
+
prefixes = {
|
|
365
|
+
'http://identifiers.org/seed.compound/',
|
|
366
|
+
'https://identifiers.org/seed.compound/'
|
|
367
|
+
},
|
|
368
|
+
recogniser = re.compile(r'^cpd\d+$'),
|
|
369
|
+
objectname = 'C_SEED',
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
# Sabio-RK compound
|
|
373
|
+
C_SABIORK = IdentifierTypeCompound(
|
|
374
|
+
name = 'Sabio-RK Compound',
|
|
375
|
+
shortname = 'sabiork',
|
|
376
|
+
alt_names = {'sabiork', 'sabiorkM', 'sabiork.compound'},
|
|
377
|
+
prefixes = {
|
|
378
|
+
'http://identifiers.org/sabiork.compound/',
|
|
379
|
+
'https://identifiers.org/sabiork.compound/'
|
|
380
|
+
},
|
|
381
|
+
recogniser = re.compile(r'^\d+$'),
|
|
382
|
+
objectname = 'C_SABIORK',
|
|
383
|
+
)
|
|
384
|
+
|
|
385
|
+
# HMDB
|
|
386
|
+
C_HMDB = IdentifierTypeCompound(
|
|
387
|
+
name = 'HMDB',
|
|
388
|
+
shortname = 'hmdb',
|
|
389
|
+
alt_names = {'hmdb', 'HMDB'},
|
|
390
|
+
prefixes = {
|
|
391
|
+
'http://identifiers.org/hmdb/',
|
|
392
|
+
'https://identifiers.org/hmdb/'
|
|
393
|
+
},
|
|
394
|
+
objectname = 'C_HMDB',
|
|
395
|
+
)
|
|
396
|
+
|
|
397
|
+
# Reactome
|
|
398
|
+
C_REACTOME = IdentifierTypeCompound(
|
|
399
|
+
name = 'Reactome',
|
|
400
|
+
shortname = 'reactome',
|
|
401
|
+
alt_names = {'reactomeM', 'reactome.compound'},
|
|
402
|
+
prefixes = {
|
|
403
|
+
'http://identifiers.org/reactome/',
|
|
404
|
+
'https://identifiers.org/reactome/'
|
|
405
|
+
},
|
|
406
|
+
recogniser = re.compile(r'(^R-[A-Z]{3}-\d+(-\d+)?(\.\d+)?$)|(^REACT_\d+(\.\d+)?$)'),
|
|
407
|
+
objectname = 'C_REACTOME',
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
# PDBe
|
|
411
|
+
C_PDBE = IdentifierTypeCompound(
|
|
412
|
+
name = 'PBDE Compound',
|
|
413
|
+
shortname = 'pdbe',
|
|
414
|
+
alt_names = {'PDBeChem'},
|
|
415
|
+
objectname = 'C_PDBE',
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
C_BIOCYC = IdentifierTypeCompound(
|
|
419
|
+
name = 'BioCyc Compound',
|
|
420
|
+
shortname = 'biocyc',
|
|
421
|
+
alt_names = {'biocyc'},
|
|
422
|
+
prefixes = {
|
|
423
|
+
'https://identifiers.org/biocyc/',
|
|
424
|
+
'http://identifiers.org/biocyc/'
|
|
425
|
+
},
|
|
426
|
+
recogniser = re.compile(r'^[A-Z-0-9]+(:)?[A-Za-z0-9+_.%-:]+$'),
|
|
427
|
+
objectname = 'C_BIOCYC',
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
# MetaMDB
|
|
431
|
+
C_METAMDB = IdentifierTypeCompound(
|
|
432
|
+
name = 'MetaMDB Compound',
|
|
433
|
+
shortname = 'metamdb_c',
|
|
434
|
+
alt_names = {'metamdb_c'},
|
|
435
|
+
objectname = 'C_METAMDB',
|
|
436
|
+
)
|
|
437
|
+
|
|
438
|
+
C_BRENDA = IdentifierTypeCompound(
|
|
439
|
+
name = 'Brenda Compound',
|
|
440
|
+
shortname = 'brenda_c',
|
|
441
|
+
alt_names = {'brenda_c'},
|
|
442
|
+
objectname = 'C_BRENDA',
|
|
443
|
+
)
|
|
444
|
+
|
|
445
|
+
# TODO ChemSpider
|
|
446
|
+
|
|
447
|
+
# Structure representation Identifier Types
|
|
448
|
+
# ---------------------------------------------------------------------------------------------------------------------
|
|
449
|
+
|
|
450
|
+
S_UNKNOWN = IdentifierTypeStructureRepresentation(
|
|
451
|
+
name = 'Unknown structure',
|
|
452
|
+
shortname = 'unknown_s',
|
|
453
|
+
alt_names = set(),
|
|
454
|
+
prefixes = set(),
|
|
455
|
+
objectname = 'S_UNKNOWN',
|
|
456
|
+
)
|
|
457
|
+
|
|
458
|
+
# S_SMILES
|
|
459
|
+
S_SMILES = IdentifierTypeStructureRepresentation(
|
|
460
|
+
name = 'S_SMILES',
|
|
461
|
+
shortname = 'smiles',
|
|
462
|
+
alt_names = {'smiles'},
|
|
463
|
+
objectname = 'S_SMILES',
|
|
464
|
+
)
|
|
465
|
+
|
|
466
|
+
# InChI
|
|
467
|
+
S_INCHI = IdentifierTypeStructureRepresentation(
|
|
468
|
+
name = 'InChI',
|
|
469
|
+
shortname = 'inchi',
|
|
470
|
+
alt_names = {'inchi'},
|
|
471
|
+
objectname = 'S_INCHI',
|
|
472
|
+
)
|
|
473
|
+
|
|
474
|
+
# MolFile
|
|
475
|
+
S_MOLFILE = IdentifierTypeStructureRepresentation(
|
|
476
|
+
name = 'Molfile',
|
|
477
|
+
shortname = 'molfile',
|
|
478
|
+
alt_names = {'molfile'},
|
|
479
|
+
objectname = 'S_MOLFILE',
|
|
480
|
+
)
|
|
481
|
+
|
|
482
|
+
# S_GML
|
|
483
|
+
S_GML = IdentifierTypeStructureRepresentation(
|
|
484
|
+
name = 'GML',
|
|
485
|
+
shortname = 'gml',
|
|
486
|
+
alt_names = {'gml'},
|
|
487
|
+
objectname = 'S_GML',
|
|
488
|
+
)
|
|
489
|
+
|
|
490
|
+
# Reaction Identifier Types
|
|
491
|
+
# ---------------------------------------------------------------------------------------------------------------------
|
|
492
|
+
R_UNKNOWN = IdentifierTypeReaction(
|
|
493
|
+
name = 'Unknown Reaction',
|
|
494
|
+
shortname = 'unknown_r',
|
|
495
|
+
alt_names = set(),
|
|
496
|
+
prefixes = set(),
|
|
497
|
+
objectname = 'R_UNKNOWN',
|
|
498
|
+
)
|
|
499
|
+
|
|
500
|
+
R_NAME = IdentifierTypeReaction(
|
|
501
|
+
name = 'Reaction Name',
|
|
502
|
+
shortname = 'rname',
|
|
503
|
+
alt_names = {'rname'},
|
|
504
|
+
objectname = 'R_NAME',
|
|
505
|
+
)
|
|
506
|
+
|
|
507
|
+
R_MNX = IdentifierTypeReaction(
|
|
508
|
+
name = 'MetaNetX Reaction',
|
|
509
|
+
shortname = 'mnx_r',
|
|
510
|
+
alt_names = {'mnxr'},
|
|
511
|
+
prefixes = {
|
|
512
|
+
'https://identifiers.org/metanetx.reaction/',
|
|
513
|
+
'http://identifiers.org/metanetx.reaction/'
|
|
514
|
+
},
|
|
515
|
+
recogniser = re.compile(r'^(MNXR\d+|EMPTY)$'),
|
|
516
|
+
objectname = 'R_MNX',
|
|
517
|
+
)
|
|
518
|
+
|
|
519
|
+
R_METACYC = IdentifierTypeReaction(
|
|
520
|
+
name = 'MetaCyc Reaction',
|
|
521
|
+
shortname = 'metacyc_r',
|
|
522
|
+
alt_names = {'metacycr', 'metacycR', 'metacyc.reaction'},
|
|
523
|
+
prefixes = {
|
|
524
|
+
'https://identifiers.org/metacyc.reaction/',
|
|
525
|
+
'http://identifiers.org/metacyc.reaction/'
|
|
526
|
+
},
|
|
527
|
+
recogniser = re.compile(r'^[A-Za-z0-9+_.%-:]+$'),
|
|
528
|
+
objectname = 'R_METACYC',
|
|
529
|
+
|
|
530
|
+
)
|
|
531
|
+
|
|
532
|
+
R_BIGG = IdentifierTypeReaction(
|
|
533
|
+
name = 'BiGG Reaction',
|
|
534
|
+
shortname = 'bigg_r',
|
|
535
|
+
alt_names = {'biggr', 'bigg_r', 'biggR', 'bigg.reaction'},
|
|
536
|
+
prefixes = {
|
|
537
|
+
'https://identifiers.org/bigg.reaction/',
|
|
538
|
+
'http://identifiers.org/bigg.reaction'
|
|
539
|
+
},
|
|
540
|
+
recogniser = re.compile(r'^[a-z_A-Z0-9]+$'),
|
|
541
|
+
stdfunc = _std_bigg_r,
|
|
542
|
+
objectname = 'R_BIGG',
|
|
543
|
+
)
|
|
544
|
+
|
|
545
|
+
R_SEED = IdentifierTypeReaction(
|
|
546
|
+
name = 'SEED Reaction',
|
|
547
|
+
shortname = 'seed_r',
|
|
548
|
+
alt_names = {'seed_r', 'seedR', 'seed.reaction'},
|
|
549
|
+
recogniser = re.compile(r'^rxn\d+$'),
|
|
550
|
+
objectname = 'R_SEED',
|
|
551
|
+
)
|
|
552
|
+
|
|
553
|
+
R_KEGG = IdentifierTypeReaction(
|
|
554
|
+
name = 'KEGG Reaction',
|
|
555
|
+
shortname = 'kegg_r',
|
|
556
|
+
alt_names = {'kegg_r', 'keggR', 'kegg.reaction'},
|
|
557
|
+
prefixes = {
|
|
558
|
+
'https://identifiers.org/seed.reaction/',
|
|
559
|
+
'http://identifiers.org/seed.reaction'
|
|
560
|
+
},
|
|
561
|
+
recogniser = re.compile(r'^R\d+$'),
|
|
562
|
+
objectname = 'R_KEGG',
|
|
563
|
+
)
|
|
564
|
+
|
|
565
|
+
R_RHEA = IdentifierTypeReaction(
|
|
566
|
+
name = 'RHEA Reaction',
|
|
567
|
+
shortname = 'rhea_r',
|
|
568
|
+
alt_names = {'rheaR', 'rhea'},
|
|
569
|
+
prefixes = {
|
|
570
|
+
'https://identifiers.org/rhea/',
|
|
571
|
+
'http://identifiers.org/rhea/'
|
|
572
|
+
},
|
|
573
|
+
recogniser = re.compile(r'^\d{5}$'),
|
|
574
|
+
objectname = 'R_RHEA',
|
|
575
|
+
)
|
|
576
|
+
|
|
577
|
+
R_SABIORK = IdentifierTypeReaction(
|
|
578
|
+
name = 'Sabio RK Reaction',
|
|
579
|
+
shortname = 'sabiork_r',
|
|
580
|
+
alt_names = {'sabiorkR', 'sabiork.reaction'},
|
|
581
|
+
prefixes = {
|
|
582
|
+
'https://identifiers.org/sabiork.reaction/',
|
|
583
|
+
'http://identifiers.org/sabiork.reaction'
|
|
584
|
+
},
|
|
585
|
+
recogniser = re.compile(r'^\d+$'),
|
|
586
|
+
objectname = 'R_SABIORK',
|
|
587
|
+
)
|
|
588
|
+
|
|
589
|
+
R_METAMDB = IdentifierTypeReaction(
|
|
590
|
+
name = 'MetaMDB reaction',
|
|
591
|
+
shortname = 'metamdb_r',
|
|
592
|
+
alt_names = {'metamdb_r'},
|
|
593
|
+
objectname = 'R_METAMDB',
|
|
594
|
+
)
|
|
595
|
+
|
|
596
|
+
R_MCSA = IdentifierTypeReaction(
|
|
597
|
+
name = 'MCSA reaction',
|
|
598
|
+
shortname = 'mcsa_r',
|
|
599
|
+
alt_names = {'mcsa_r'},
|
|
600
|
+
objectname = 'R_MCSA',
|
|
601
|
+
)
|
|
602
|
+
|
|
603
|
+
R_BRENDA = IdentifierTypeReaction(
|
|
604
|
+
name = 'Brenda Reaction',
|
|
605
|
+
shortname = 'brenda_r',
|
|
606
|
+
alt_names = {'brenda_r'},
|
|
607
|
+
objectname = 'R_BRENDA',
|
|
608
|
+
)
|
|
609
|
+
|
|
610
|
+
# Enzyme Identifier Types
|
|
611
|
+
# --------------------------------------------------------------------------------------------------------------
|
|
612
|
+
E_UNKNOWN = IdentifierTypeEnzyme(
|
|
613
|
+
name = 'Unknown enzyme',
|
|
614
|
+
shortname = 'unknown_e',
|
|
615
|
+
alt_names = set(),
|
|
616
|
+
prefixes = set(),
|
|
617
|
+
objectname = 'E_UNKNOWN',
|
|
618
|
+
)
|
|
619
|
+
|
|
620
|
+
E_NAME = IdentifierTypeEnzyme(
|
|
621
|
+
name = 'Enzyme Name',
|
|
622
|
+
shortname = 'ename',
|
|
623
|
+
alt_names = {'ename'},
|
|
624
|
+
objectname = 'E_NAME',
|
|
625
|
+
)
|
|
626
|
+
|
|
627
|
+
E_EC = IdentifierTypeEnzyme(
|
|
628
|
+
name = 'EC',
|
|
629
|
+
shortname = 'ec',
|
|
630
|
+
alt_names = {'ec'},
|
|
631
|
+
prefixes = {
|
|
632
|
+
'http://identifiers.org/ec-code/',
|
|
633
|
+
'https://identifiers.org/ec-code/'
|
|
634
|
+
},
|
|
635
|
+
recogniser = re.compile(r'^\d+\.-\.-\.-|\d+\.\d+\.-\.-|\d+\.\d+\.\d+\.-|\d+\.\d+\.\d+\.(n)?\d+$'),
|
|
636
|
+
objectname = 'E_EC',
|
|
637
|
+
)
|
|
638
|
+
|
|
639
|
+
# Atom-to-Atom Map Identifier Types
|
|
640
|
+
# --------------------------------------------------------------------------------------------------------------
|
|
641
|
+
A_UNKNOWN = IdentifierTypeAAM(
|
|
642
|
+
name = 'Unknown AAM',
|
|
643
|
+
shortname = 'unknown_a',
|
|
644
|
+
alt_names = set(),
|
|
645
|
+
prefixes = set(),
|
|
646
|
+
objectname = 'A_UNKNOWN',
|
|
647
|
+
)
|
|
648
|
+
|
|
649
|
+
A_REACTIONSMILES = IdentifierTypeAAM(
|
|
650
|
+
name = 'Reaction Smiles',
|
|
651
|
+
shortname = 'rsmiles',
|
|
652
|
+
alt_names = {'rsmiles'},
|
|
653
|
+
objectname = 'A_REACTIONSMILES',
|
|
654
|
+
)
|
|
655
|
+
|
|
656
|
+
A_RXN = IdentifierTypeAAM(
|
|
657
|
+
name = 'RXN',
|
|
658
|
+
shortname = 'rxn',
|
|
659
|
+
alt_names = {'rxn'},
|
|
660
|
+
objectname = 'A_RXN',
|
|
661
|
+
)
|
|
662
|
+
|
|
663
|
+
A_GML_RULE = IdentifierTypeAAM(
|
|
664
|
+
name = 'S_GML rule',
|
|
665
|
+
shortname = 'gml_rule',
|
|
666
|
+
alt_names = {'gml_rule'},
|
|
667
|
+
objectname = 'A_GML_RULE',
|
|
668
|
+
)
|
|
669
|
+
|
|
670
|
+
# Lists of ID types
|
|
671
|
+
# --------------------------------------------------------------------------------------------------------------
|
|
672
|
+
id_types_compound: list[IdentifierTypeCompound] = [
|
|
673
|
+
C_MNX, C_BIGG, C_PUBCHEM, C_KEGG, C_CHEBI, C_ECMDB, C_INCHIKEY, C_SLM, C_LIPIDMAPS, C_METACYC, C_ENVIPATH,
|
|
674
|
+
C_SEED, C_SABIORK, C_HMDB, C_REACTOME, C_BIOCYC, C_METAMDB,
|
|
675
|
+
]
|
|
676
|
+
id_types_structure_representation: list[IdentifierTypeStructureRepresentation] = [
|
|
677
|
+
S_SMILES, S_INCHI, S_MOLFILE, S_GML,
|
|
678
|
+
]
|
|
679
|
+
id_types_reaction: list[IdentifierTypeReaction] = [
|
|
680
|
+
R_BIGG, R_MNX, R_METACYC, R_SEED, R_KEGG, R_RHEA, R_SABIORK, R_METAMDB, R_MCSA,
|
|
681
|
+
]
|
|
682
|
+
id_types_enzyme: list[IdentifierTypeEnzyme] = [
|
|
683
|
+
E_EC
|
|
684
|
+
]
|
|
685
|
+
id_types_aam: list[IdentifierTypeAAM] = [
|
|
686
|
+
A_UNKNOWN, A_REACTIONSMILES, A_RXN, A_GML_RULE
|
|
687
|
+
]
|