chemrecon 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. chemrecon/__init__.py +73 -0
  2. chemrecon/chem/__init__.py +0 -0
  3. chemrecon/chem/chemreaction.py +223 -0
  4. chemrecon/chem/constant_compounds.py +3 -0
  5. chemrecon/chem/create_mol.py +91 -0
  6. chemrecon/chem/elements.py +141 -0
  7. chemrecon/chem/gml/__init__.py +0 -0
  8. chemrecon/chem/gml/gml.py +324 -0
  9. chemrecon/chem/gml/gml_reactant_matching.py +130 -0
  10. chemrecon/chem/gml/gml_to_rdk.py +217 -0
  11. chemrecon/chem/mol.py +483 -0
  12. chemrecon/chem/sumformula.py +120 -0
  13. chemrecon/connection.py +97 -0
  14. chemrecon/core/__init__.py +0 -0
  15. chemrecon/core/id_types.py +687 -0
  16. chemrecon/core/ontology.py +209 -0
  17. chemrecon/core/populate_query_handler.py +336 -0
  18. chemrecon/core/query_handler.py +587 -0
  19. chemrecon/database/__init__.py +1 -0
  20. chemrecon/database/connect.py +63 -0
  21. chemrecon/database/connection_params/chemrecon_pub.dbinfo +5 -0
  22. chemrecon/database/connection_params/local_docker_dev.dbinfo +5 -0
  23. chemrecon/database/connection_params/local_docker_init.dbinfo +5 -0
  24. chemrecon/database/connection_params/local_docker_pub.dbinfo +5 -0
  25. chemrecon/database/params.py +88 -0
  26. chemrecon/entrygraph/draw.py +119 -0
  27. chemrecon/entrygraph/entrygraph.py +301 -0
  28. chemrecon/entrygraph/explorationprotocol.py +199 -0
  29. chemrecon/entrygraph/explore.py +421 -0
  30. chemrecon/entrygraph/explore_procedure.py +183 -0
  31. chemrecon/entrygraph/filter.py +88 -0
  32. chemrecon/entrygraph/scoring.py +141 -0
  33. chemrecon/query/__init__.py +26 -0
  34. chemrecon/query/create_entry.py +86 -0
  35. chemrecon/query/default_protocols.py +57 -0
  36. chemrecon/query/find_entry.py +84 -0
  37. chemrecon/query/get_relations.py +143 -0
  38. chemrecon/query/get_structures_from_compound.py +65 -0
  39. chemrecon/schema/__init__.py +86 -0
  40. chemrecon/schema/db_object.py +363 -0
  41. chemrecon/schema/direction.py +10 -0
  42. chemrecon/schema/entry_types/__init__.py +0 -0
  43. chemrecon/schema/entry_types/aam.py +34 -0
  44. chemrecon/schema/entry_types/aam_repr.py +37 -0
  45. chemrecon/schema/entry_types/compound.py +52 -0
  46. chemrecon/schema/entry_types/enzyme.py +49 -0
  47. chemrecon/schema/entry_types/molstructure.py +64 -0
  48. chemrecon/schema/entry_types/molstructure_repr.py +41 -0
  49. chemrecon/schema/entry_types/reaction.py +57 -0
  50. chemrecon/schema/enums.py +154 -0
  51. chemrecon/schema/procedural_relation_entrygraph.py +66 -0
  52. chemrecon/schema/relation_types_composed/__init__.py +0 -0
  53. chemrecon/schema/relation_types_composed/compound_has_molstructure_relation.py +59 -0
  54. chemrecon/schema/relation_types_composed/reaction_has_aam_relation.py +50 -0
  55. chemrecon/schema/relation_types_procedural/__init__.py +0 -0
  56. chemrecon/schema/relation_types_procedural/aam_convert_relation.py +69 -0
  57. chemrecon/schema/relation_types_procedural/compound_select_structure_proceduralrelation.py +36 -0
  58. chemrecon/schema/relation_types_procedural/compound_similarlity_proceduralrelation.py +1 -0
  59. chemrecon/schema/relation_types_procedural/molstructure_convert_relation.py +49 -0
  60. chemrecon/schema/relation_types_procedural/reaction_select_aam_proceduralrelation.py +38 -0
  61. chemrecon/schema/relation_types_procedural/reaction_similarity_proceduralrelation.py +1 -0
  62. chemrecon/schema/relation_types_source/__init__.py +0 -0
  63. chemrecon/schema/relation_types_source/aam_involves_molstructure_relation.py +77 -0
  64. chemrecon/schema/relation_types_source/aam_repr_involves_molstructure_repr_relation.py +79 -0
  65. chemrecon/schema/relation_types_source/compound_has_structure_representation_relation.py +33 -0
  66. chemrecon/schema/relation_types_source/compound_reference_relation.py +34 -0
  67. chemrecon/schema/relation_types_source/molstructure_standardisation_relation.py +71 -0
  68. chemrecon/schema/relation_types_source/ontology/__init__.py +0 -0
  69. chemrecon/schema/relation_types_source/ontology/compound_ontology.py +369 -0
  70. chemrecon/schema/relation_types_source/ontology/enzyme_ontology.py +142 -0
  71. chemrecon/schema/relation_types_source/ontology/reaction_ontology.py +140 -0
  72. chemrecon/schema/relation_types_source/reaction_has_aam_representation_relation.py +34 -0
  73. chemrecon/schema/relation_types_source/reaction_has_enzyme_relation.py +71 -0
  74. chemrecon/schema/relation_types_source/reaction_involves_compound_relation.py +69 -0
  75. chemrecon/schema/relation_types_source/reaction_reference_relation.py +33 -0
  76. chemrecon/scripts/initialize_database.py +494 -0
  77. chemrecon/utils/copy_signature.py +10 -0
  78. chemrecon/utils/encodeable_list.py +11 -0
  79. chemrecon/utils/get_id_type.py +70 -0
  80. chemrecon/utils/hungarian.py +31 -0
  81. chemrecon/utils/reactant_matching.py +168 -0
  82. chemrecon/utils/rxnutils.py +44 -0
  83. chemrecon/utils/set_cwd.py +12 -0
  84. chemrecon-0.1.1.dist-info/METADATA +143 -0
  85. chemrecon-0.1.1.dist-info/RECORD +86 -0
  86. chemrecon-0.1.1.dist-info/WHEEL +4 -0
@@ -0,0 +1,687 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Callable
4
+ import re
5
+
6
+ # Lookup lists and dictionaries
7
+ # ----------------------------------------------------------------------------------------------------------------------
8
+ id_types: list[IdentifierType] = list()
9
+ id_type_name_lookup: dict[str, IdentifierType] = dict()
10
+ identifiers_org_dict: dict[str, IdentifierType] = dict()
11
+
12
+
13
+ # Types of ID types
14
+ # ----------------------------------------------------------------------------------------------------------------------
15
+ class IdentifierType:
16
+ """
17
+ Represents a general type of identifier with attributes and methods for standardization,
18
+ recognition, and manipulation.
19
+
20
+ This class is designed to encapsulate information about a specific type of identifier,
21
+ including its primary name, alternative names, recognizable patterns, and
22
+ standardization logic.
23
+
24
+ The class also registers identifier types in global lookup dictionaries
25
+ to facilitate type recognition and access.
26
+ """
27
+ name: str #: Primary name of this identifier type.
28
+ shortname: str #: The name used for the type in the database.
29
+ alt_names: set[str] #: Alternative names to search for.
30
+ prefixes: set[str]
31
+ suffixes: set[str]
32
+ enum_type: 'IdType' #: The corresponding Enum value, as present in entries
33
+ stdfunc: Callable[[str], str] #: Function used to standardize identifiers.
34
+ recogniser: re.Pattern | None #: Pattern used to recognize identifiers of this type.
35
+ id_org_prefix: str #: Prefix in identifiers.org
36
+ objectname: str # name of the assigned object
37
+
38
+ def __init__(
39
+ self,
40
+ name: str,
41
+ shortname: str,
42
+ alt_names: set[str] = None,
43
+ prefixes: set[str] = None,
44
+ suffixes: set[str] = None,
45
+ stdfunc: Callable[[str], str] = None,
46
+ recogniser: re.Pattern | None = None,
47
+ objectname: str = None
48
+ ):
49
+ self.name = name
50
+ self.shortname = shortname
51
+ self.alt_names = alt_names
52
+ self.prefixes = prefixes if prefixes else {}
53
+ self.suffixes = suffixes if suffixes else {}
54
+ self.stdfunc = stdfunc
55
+ self.recogniser = recogniser
56
+ self.objectname = objectname
57
+
58
+ # Make identifiers org link
59
+ # TODO doesn't always work (mnx?)
60
+ self.identifiers_org_prefix = ''
61
+ for prefix in self.prefixes:
62
+ if prefix.startswith('https://identifiers.org/') or prefix.startswith('http://identifiers.org/'):
63
+ identifiers_org_dict[prefix] = self
64
+ self.identifiers_org_prefix = prefix
65
+
66
+ # Register in lookup
67
+ id_types.append(self)
68
+
69
+ # Register names
70
+ id_type_name_lookup[self.name] = self
71
+ id_type_name_lookup[self.shortname] = self
72
+ for alt_name in self.alt_names:
73
+ id_type_name_lookup[alt_name] = self
74
+
75
+ def __repr__(self):
76
+ return self.objectname
77
+
78
+ def __str__(self):
79
+ return self.name
80
+
81
+ def __hash__(self):
82
+ return self.shortname.__hash__()
83
+
84
+ def std_identifier(self, s: str) -> str:
85
+ """ Standardize a given identifier of this type. """
86
+ if self.stdfunc:
87
+ return self.stdfunc(s)
88
+ else:
89
+ return str(s)
90
+
91
+ def trim(self, s: str) -> str:
92
+ """ Remove pre- and suffixes of the string, including identifiers.org urls. """
93
+ s = s.strip()
94
+ for prefix in self.prefixes:
95
+ s = s.removeprefix(prefix)
96
+ for suffix in self.suffixes:
97
+ s = s.removesuffix(suffix)
98
+ return s
99
+
100
+
101
+ # Register subtypes
102
+ class IdentifierTypeCompound(IdentifierType):
103
+ enum_type: 'IdTypeCompoundEnum'
104
+
105
+
106
+ class IdentifierTypeStructureRepresentation(IdentifierType):
107
+ enum_type: 'IdTypeStructureRepresentationEnum'
108
+
109
+
110
+ class IdentifierTypeReaction(IdentifierType):
111
+ enum_type: 'IdTypeReactionEnum'
112
+
113
+
114
+ class IdentifierTypeEnzyme(IdentifierType):
115
+ enum_type: 'IdTypeEnzymeEnum'
116
+
117
+
118
+ class IdentifierTypeAAM(IdentifierType):
119
+ enum_type: 'IdTypeAAMEnum'
120
+
121
+
122
+ # Standardisation functions
123
+ # ----------------------------------------------------------------------------------------------------------------------
124
+ # BiGG
125
+ _compartment_suffixes = [
126
+ '_e', '_ex', '_c', '_p', '_m', '_x', '_b',
127
+ '_E', '_EX', '_C', '_P', '_M', '_X', '_B'
128
+ ]
129
+
130
+
131
+ def _std_bigg(s: str) -> str:
132
+ """ Create a universal BIGG id without the compartment suffix"""
133
+ # TODO double underscore is necessary (indicates stereo - "phe__L" is L-Phenylalanine)?
134
+ s_ = s.strip()
135
+ for suffix in _compartment_suffixes:
136
+ s_ = s_.removesuffix(suffix)
137
+ s_ = s_.replace('_DASH', '') # Fix weird 'DASH' notation, i.e. M_12ppd_DASH_S -> M_12ppd__S
138
+ s_ = s_.replace('__', '_') # Hacky fix for double underscore notation
139
+ s_ = s_.removeprefix('m_')
140
+ if not s_.startswith('M_'):
141
+ s_ = f'M_{s_}'
142
+
143
+ if s_.isupper():
144
+ # Fix upper case
145
+ s_ = f'M_{s_.removeprefix("M_").lower()}'
146
+
147
+ return s_
148
+
149
+
150
+ def _std_bigg_r(s: str) -> str:
151
+ s_ = s.strip()
152
+ for suffix in _compartment_suffixes:
153
+ s_ = s_.removesuffix(suffix)
154
+ s_ = s_.replace('_DASH', '').replace('__', '_').removeprefix('r_')
155
+ if not s_.startswith('R_'):
156
+ s_ = f'R_{s_}'
157
+ if s_.isupper():
158
+ # Fix upper case
159
+ s_ = f'R_{s_.removeprefix("M_").lower()}'
160
+
161
+ # Replace LPAREN, RPAREN notation in names
162
+ s_ = s_.replace('LPAREN_', '_').replace('RPAREN_', '_')
163
+
164
+ return s_
165
+
166
+
167
+ def _std_chebi(chebi: str) -> str:
168
+ """ Chebi IDs should be prefixed with CHEBI: """
169
+ if not chebi:
170
+ raise ValueError(f'Received empty CHEBI')
171
+ chebi_new = chebi.strip()
172
+ if chebi_new.startswith('http://identifiers.org/chebi/CHEBI'):
173
+ return chebi_new.removeprefix('http://identifiers.org/chebi/')
174
+ if chebi_new.startswith('https://identifiers.org/chebi/CHEBI'):
175
+ return chebi_new.removeprefix('https://identifiers.org/chebi/')
176
+
177
+ if chebi_new.startswith('CHEBI:'):
178
+ return chebi_new
179
+ else:
180
+ return f'CHEBI:{chebi_new}'
181
+
182
+
183
+ # C_NAME
184
+ def _standardise_cname(cname: str) -> str:
185
+ """ Lowercase, etc """
186
+ return cname.lower().strip()
187
+
188
+
189
+ # Compound Identifier Types
190
+ # ---------------------------------------------------------------------------------------------------------------------
191
+
192
+ # Unknown
193
+ C_UNKNOWN = IdentifierTypeCompound(
194
+ name = 'Unknown Compound',
195
+ shortname = 'unknown_c',
196
+ alt_names = set(),
197
+ prefixes = set(),
198
+ objectname = 'C_UNKNOWN'
199
+ )
200
+
201
+ # Name
202
+ C_NAME = IdentifierTypeCompound(
203
+ name = 'Compound Name',
204
+ shortname = 'cname',
205
+ alt_names = set(),
206
+ prefixes = set(),
207
+ objectname = 'C_NAME',
208
+ )
209
+
210
+ # MetaNetX
211
+ C_MNX = IdentifierTypeCompound(
212
+ name = 'MetaNetX',
213
+ shortname = 'mnx',
214
+ alt_names = {'metanetx', 'mnx'},
215
+ prefixes = {
216
+ 'http://identifiers.org/metanetx.chemical/',
217
+ 'https://identifiers.org/metanetx.chemical/',
218
+ },
219
+ recogniser = re.compile(r'^(MNXM\d+|MNX\d+|BIOMASS|WATER)$'),
220
+ objectname = 'C_MNX',
221
+ )
222
+
223
+ # BiGG
224
+ C_BIGG = IdentifierTypeCompound(
225
+ name = 'BiGG',
226
+ shortname = 'bigg',
227
+ alt_names = {'bigg', 'biggM', 'bigg.metabolite'},
228
+ prefixes = {
229
+ 'http://bigg.ucsd.edu/models/universal/metabolites/',
230
+ 'http://identifiers.org/bigg.metabolite/',
231
+ 'https://identifiers.org/bigg.metabolite/',
232
+ 'bigg.metabolite:'
233
+ },
234
+ suffixes = {
235
+ '_e', '_ex', '_c', '_p', '_m', '_x', '_b'
236
+ },
237
+ stdfunc = _std_bigg,
238
+ recogniser = re.compile(r'^[a-z_A-Z0-9]+$'),
239
+ objectname = 'C_BIGG',
240
+ ) #: asd
241
+
242
+ # PubChem CID
243
+ C_PUBCHEM = IdentifierTypeCompound(
244
+ name = 'PubChem CiD',
245
+ shortname = 'pubchem_cid',
246
+ alt_names = {'pubchem', 'pc_cid', 'cid'},
247
+ prefixes = {
248
+ 'http://identifiers.org/pubchem.compound/',
249
+ 'https://identifiers.org/pubchem.compound/'
250
+ },
251
+ recogniser = re.compile(r'^\d+$'),
252
+ objectname = 'C_PUBCHEM',
253
+ )
254
+
255
+ # KEGG Compound
256
+ C_KEGG = IdentifierTypeCompound(
257
+ name = 'KEGG',
258
+ shortname = 'kegg',
259
+ alt_names = {'kegg', 'keggC', 'kegg.compound', 'KEGG COMPOUND', 'KEGG', 'KEGG COMPOUND accession'},
260
+ prefixes = {
261
+ 'http://identifiers.org/kegg.compound/',
262
+ 'https://identifiers.org/kegg.compound/'
263
+ },
264
+ recogniser = re.compile(r'^C\d+$'),
265
+ objectname = 'C_KEGG',
266
+ )
267
+
268
+ # ChEBI
269
+ C_CHEBI = IdentifierTypeCompound(
270
+ name = 'ChEBI',
271
+ shortname = 'chebi',
272
+ alt_names = {'chebi', 'CHEBI', 'ChEBI'},
273
+ prefixes = {
274
+ 'http://identifiers.org/chebi/',
275
+ 'https://identifiers.org/chebi/'
276
+ },
277
+ recogniser = re.compile(r'^CHEBI:\d+$'),
278
+ stdfunc = _std_chebi,
279
+ objectname = 'C_CHEBI',
280
+ )
281
+
282
+ # ECMDB
283
+ C_ECMDB = IdentifierTypeCompound(
284
+ name = 'ECMDB',
285
+ shortname = 'ecmdb',
286
+ alt_names = {'ecmdb'},
287
+ recogniser = re.compile(r'^ECMDB\d+$'),
288
+ objectname = 'C_ECMDB',
289
+ )
290
+
291
+ # InChI Key
292
+ C_INCHIKEY = IdentifierTypeCompound(
293
+ name = 'InChI key',
294
+ shortname = 'inchi_key',
295
+ alt_names = {'inchi_key', 'inchikey'},
296
+ prefixes = {
297
+ 'http://identifiers.org/inchikey/',
298
+ 'https://identifiers.org/inchikey/'
299
+ },
300
+ recogniser = re.compile(r'^[A-Z]{14}-[A-Z]{10}(-[A-Z])?$'),
301
+ objectname = 'C_INCHIKEY',
302
+ )
303
+
304
+ # SLM
305
+ C_SLM = IdentifierTypeCompound(
306
+ name = 'SwissLipids',
307
+ shortname = 'slm',
308
+ alt_names = {'slm', 'SLM'},
309
+ prefixes = {
310
+ 'http://identifiers.org/slm/',
311
+ 'https://identifiers.org/slm/'
312
+ },
313
+ recogniser = re.compile(r'^SLM:\d+$'), # TODO check?
314
+ objectname = 'C_SLM',
315
+ )
316
+
317
+ # LipidMaps
318
+ C_LIPIDMAPS = IdentifierTypeCompound(
319
+ name = 'LipidMaps',
320
+ shortname = 'lipidmapsm',
321
+ alt_names = {'LipidMapsM', 'lipidmapsM', 'lipidmaps', 'LIPID MAPS'},
322
+ prefixes = {
323
+ 'http://identifiers.org/lipidmaps/',
324
+ 'https://identifiers.org/lipidmaps/'
325
+ },
326
+ recogniser = re.compile(r'^LM(FA|GL|GP|SP|ST|PR|SL|PK)[0-9]{4}([0-9a-zA-Z]{4,6})?$'),
327
+ objectname = 'C_LIPIDMAPS',
328
+ )
329
+
330
+ # MetaCyc Compound
331
+ C_METACYC = IdentifierTypeCompound(
332
+ name = 'MetaCyc Compound',
333
+ shortname = 'metacyc',
334
+ alt_names = {'metacycM', 'metacyc.compound'},
335
+ prefixes = {
336
+ 'http://identifiers.org/metacyc.compound/',
337
+ 'https://identifiers.org/metacyc.compound/'
338
+ },
339
+ recogniser = re.compile(r'^[A-Za-z0-9+_.%-:]+$'),
340
+ objectname = 'C_METACYC',
341
+ )
342
+
343
+ # EnviPath (Can be both compound and reaction?)
344
+ C_ENVIPATH = IdentifierTypeCompound(
345
+ name = 'enviPath',
346
+ shortname = 'envipath',
347
+ alt_names = {'envipathM'},
348
+ prefixes = {
349
+ 'http://identifiers.org/envipath/',
350
+ 'https://identifiers.org/envipath/'
351
+ },
352
+ recogniser = re.compile(
353
+ r'^[\w^_]{8}-[\w^_]{4}-[\w^_]{4}-[\w^_]{4}-[\w^_]{12}/[\w-]+/[\w^_]{8}-'
354
+ r'[\w^_]{4}-[\w^_]{4}-[\w^_]{4}-[\w^_]{12}$'
355
+ ),
356
+ objectname = 'C_ENVIPATH',
357
+ )
358
+
359
+ # Seed Compounds
360
+ C_SEED = IdentifierTypeCompound(
361
+ name = 'Seed Compound',
362
+ shortname = 'seed',
363
+ alt_names = {'seedM', 'seed.compound'},
364
+ prefixes = {
365
+ 'http://identifiers.org/seed.compound/',
366
+ 'https://identifiers.org/seed.compound/'
367
+ },
368
+ recogniser = re.compile(r'^cpd\d+$'),
369
+ objectname = 'C_SEED',
370
+ )
371
+
372
+ # Sabio-RK compound
373
+ C_SABIORK = IdentifierTypeCompound(
374
+ name = 'Sabio-RK Compound',
375
+ shortname = 'sabiork',
376
+ alt_names = {'sabiork', 'sabiorkM', 'sabiork.compound'},
377
+ prefixes = {
378
+ 'http://identifiers.org/sabiork.compound/',
379
+ 'https://identifiers.org/sabiork.compound/'
380
+ },
381
+ recogniser = re.compile(r'^\d+$'),
382
+ objectname = 'C_SABIORK',
383
+ )
384
+
385
+ # HMDB
386
+ C_HMDB = IdentifierTypeCompound(
387
+ name = 'HMDB',
388
+ shortname = 'hmdb',
389
+ alt_names = {'hmdb', 'HMDB'},
390
+ prefixes = {
391
+ 'http://identifiers.org/hmdb/',
392
+ 'https://identifiers.org/hmdb/'
393
+ },
394
+ objectname = 'C_HMDB',
395
+ )
396
+
397
+ # Reactome
398
+ C_REACTOME = IdentifierTypeCompound(
399
+ name = 'Reactome',
400
+ shortname = 'reactome',
401
+ alt_names = {'reactomeM', 'reactome.compound'},
402
+ prefixes = {
403
+ 'http://identifiers.org/reactome/',
404
+ 'https://identifiers.org/reactome/'
405
+ },
406
+ recogniser = re.compile(r'(^R-[A-Z]{3}-\d+(-\d+)?(\.\d+)?$)|(^REACT_\d+(\.\d+)?$)'),
407
+ objectname = 'C_REACTOME',
408
+ )
409
+
410
+ # PDBe
411
+ C_PDBE = IdentifierTypeCompound(
412
+ name = 'PBDE Compound',
413
+ shortname = 'pdbe',
414
+ alt_names = {'PDBeChem'},
415
+ objectname = 'C_PDBE',
416
+ )
417
+
418
+ C_BIOCYC = IdentifierTypeCompound(
419
+ name = 'BioCyc Compound',
420
+ shortname = 'biocyc',
421
+ alt_names = {'biocyc'},
422
+ prefixes = {
423
+ 'https://identifiers.org/biocyc/',
424
+ 'http://identifiers.org/biocyc/'
425
+ },
426
+ recogniser = re.compile(r'^[A-Z-0-9]+(:)?[A-Za-z0-9+_.%-:]+$'),
427
+ objectname = 'C_BIOCYC',
428
+ )
429
+
430
+ # MetaMDB
431
+ C_METAMDB = IdentifierTypeCompound(
432
+ name = 'MetaMDB Compound',
433
+ shortname = 'metamdb_c',
434
+ alt_names = {'metamdb_c'},
435
+ objectname = 'C_METAMDB',
436
+ )
437
+
438
+ C_BRENDA = IdentifierTypeCompound(
439
+ name = 'Brenda Compound',
440
+ shortname = 'brenda_c',
441
+ alt_names = {'brenda_c'},
442
+ objectname = 'C_BRENDA',
443
+ )
444
+
445
+ # TODO ChemSpider
446
+
447
+ # Structure representation Identifier Types
448
+ # ---------------------------------------------------------------------------------------------------------------------
449
+
450
+ S_UNKNOWN = IdentifierTypeStructureRepresentation(
451
+ name = 'Unknown structure',
452
+ shortname = 'unknown_s',
453
+ alt_names = set(),
454
+ prefixes = set(),
455
+ objectname = 'S_UNKNOWN',
456
+ )
457
+
458
+ # S_SMILES
459
+ S_SMILES = IdentifierTypeStructureRepresentation(
460
+ name = 'S_SMILES',
461
+ shortname = 'smiles',
462
+ alt_names = {'smiles'},
463
+ objectname = 'S_SMILES',
464
+ )
465
+
466
+ # InChI
467
+ S_INCHI = IdentifierTypeStructureRepresentation(
468
+ name = 'InChI',
469
+ shortname = 'inchi',
470
+ alt_names = {'inchi'},
471
+ objectname = 'S_INCHI',
472
+ )
473
+
474
+ # MolFile
475
+ S_MOLFILE = IdentifierTypeStructureRepresentation(
476
+ name = 'Molfile',
477
+ shortname = 'molfile',
478
+ alt_names = {'molfile'},
479
+ objectname = 'S_MOLFILE',
480
+ )
481
+
482
+ # S_GML
483
+ S_GML = IdentifierTypeStructureRepresentation(
484
+ name = 'GML',
485
+ shortname = 'gml',
486
+ alt_names = {'gml'},
487
+ objectname = 'S_GML',
488
+ )
489
+
490
+ # Reaction Identifier Types
491
+ # ---------------------------------------------------------------------------------------------------------------------
492
+ R_UNKNOWN = IdentifierTypeReaction(
493
+ name = 'Unknown Reaction',
494
+ shortname = 'unknown_r',
495
+ alt_names = set(),
496
+ prefixes = set(),
497
+ objectname = 'R_UNKNOWN',
498
+ )
499
+
500
+ R_NAME = IdentifierTypeReaction(
501
+ name = 'Reaction Name',
502
+ shortname = 'rname',
503
+ alt_names = {'rname'},
504
+ objectname = 'R_NAME',
505
+ )
506
+
507
+ R_MNX = IdentifierTypeReaction(
508
+ name = 'MetaNetX Reaction',
509
+ shortname = 'mnx_r',
510
+ alt_names = {'mnxr'},
511
+ prefixes = {
512
+ 'https://identifiers.org/metanetx.reaction/',
513
+ 'http://identifiers.org/metanetx.reaction/'
514
+ },
515
+ recogniser = re.compile(r'^(MNXR\d+|EMPTY)$'),
516
+ objectname = 'R_MNX',
517
+ )
518
+
519
+ R_METACYC = IdentifierTypeReaction(
520
+ name = 'MetaCyc Reaction',
521
+ shortname = 'metacyc_r',
522
+ alt_names = {'metacycr', 'metacycR', 'metacyc.reaction'},
523
+ prefixes = {
524
+ 'https://identifiers.org/metacyc.reaction/',
525
+ 'http://identifiers.org/metacyc.reaction/'
526
+ },
527
+ recogniser = re.compile(r'^[A-Za-z0-9+_.%-:]+$'),
528
+ objectname = 'R_METACYC',
529
+
530
+ )
531
+
532
+ R_BIGG = IdentifierTypeReaction(
533
+ name = 'BiGG Reaction',
534
+ shortname = 'bigg_r',
535
+ alt_names = {'biggr', 'bigg_r', 'biggR', 'bigg.reaction'},
536
+ prefixes = {
537
+ 'https://identifiers.org/bigg.reaction/',
538
+ 'http://identifiers.org/bigg.reaction'
539
+ },
540
+ recogniser = re.compile(r'^[a-z_A-Z0-9]+$'),
541
+ stdfunc = _std_bigg_r,
542
+ objectname = 'R_BIGG',
543
+ )
544
+
545
+ R_SEED = IdentifierTypeReaction(
546
+ name = 'SEED Reaction',
547
+ shortname = 'seed_r',
548
+ alt_names = {'seed_r', 'seedR', 'seed.reaction'},
549
+ recogniser = re.compile(r'^rxn\d+$'),
550
+ objectname = 'R_SEED',
551
+ )
552
+
553
+ R_KEGG = IdentifierTypeReaction(
554
+ name = 'KEGG Reaction',
555
+ shortname = 'kegg_r',
556
+ alt_names = {'kegg_r', 'keggR', 'kegg.reaction'},
557
+ prefixes = {
558
+ 'https://identifiers.org/seed.reaction/',
559
+ 'http://identifiers.org/seed.reaction'
560
+ },
561
+ recogniser = re.compile(r'^R\d+$'),
562
+ objectname = 'R_KEGG',
563
+ )
564
+
565
+ R_RHEA = IdentifierTypeReaction(
566
+ name = 'RHEA Reaction',
567
+ shortname = 'rhea_r',
568
+ alt_names = {'rheaR', 'rhea'},
569
+ prefixes = {
570
+ 'https://identifiers.org/rhea/',
571
+ 'http://identifiers.org/rhea/'
572
+ },
573
+ recogniser = re.compile(r'^\d{5}$'),
574
+ objectname = 'R_RHEA',
575
+ )
576
+
577
+ R_SABIORK = IdentifierTypeReaction(
578
+ name = 'Sabio RK Reaction',
579
+ shortname = 'sabiork_r',
580
+ alt_names = {'sabiorkR', 'sabiork.reaction'},
581
+ prefixes = {
582
+ 'https://identifiers.org/sabiork.reaction/',
583
+ 'http://identifiers.org/sabiork.reaction'
584
+ },
585
+ recogniser = re.compile(r'^\d+$'),
586
+ objectname = 'R_SABIORK',
587
+ )
588
+
589
+ R_METAMDB = IdentifierTypeReaction(
590
+ name = 'MetaMDB reaction',
591
+ shortname = 'metamdb_r',
592
+ alt_names = {'metamdb_r'},
593
+ objectname = 'R_METAMDB',
594
+ )
595
+
596
+ R_MCSA = IdentifierTypeReaction(
597
+ name = 'MCSA reaction',
598
+ shortname = 'mcsa_r',
599
+ alt_names = {'mcsa_r'},
600
+ objectname = 'R_MCSA',
601
+ )
602
+
603
+ R_BRENDA = IdentifierTypeReaction(
604
+ name = 'Brenda Reaction',
605
+ shortname = 'brenda_r',
606
+ alt_names = {'brenda_r'},
607
+ objectname = 'R_BRENDA',
608
+ )
609
+
610
+ # Enzyme Identifier Types
611
+ # --------------------------------------------------------------------------------------------------------------
612
+ E_UNKNOWN = IdentifierTypeEnzyme(
613
+ name = 'Unknown enzyme',
614
+ shortname = 'unknown_e',
615
+ alt_names = set(),
616
+ prefixes = set(),
617
+ objectname = 'E_UNKNOWN',
618
+ )
619
+
620
+ E_NAME = IdentifierTypeEnzyme(
621
+ name = 'Enzyme Name',
622
+ shortname = 'ename',
623
+ alt_names = {'ename'},
624
+ objectname = 'E_NAME',
625
+ )
626
+
627
+ E_EC = IdentifierTypeEnzyme(
628
+ name = 'EC',
629
+ shortname = 'ec',
630
+ alt_names = {'ec'},
631
+ prefixes = {
632
+ 'http://identifiers.org/ec-code/',
633
+ 'https://identifiers.org/ec-code/'
634
+ },
635
+ recogniser = re.compile(r'^\d+\.-\.-\.-|\d+\.\d+\.-\.-|\d+\.\d+\.\d+\.-|\d+\.\d+\.\d+\.(n)?\d+$'),
636
+ objectname = 'E_EC',
637
+ )
638
+
639
+ # Atom-to-Atom Map Identifier Types
640
+ # --------------------------------------------------------------------------------------------------------------
641
+ A_UNKNOWN = IdentifierTypeAAM(
642
+ name = 'Unknown AAM',
643
+ shortname = 'unknown_a',
644
+ alt_names = set(),
645
+ prefixes = set(),
646
+ objectname = 'A_UNKNOWN',
647
+ )
648
+
649
+ A_REACTIONSMILES = IdentifierTypeAAM(
650
+ name = 'Reaction Smiles',
651
+ shortname = 'rsmiles',
652
+ alt_names = {'rsmiles'},
653
+ objectname = 'A_REACTIONSMILES',
654
+ )
655
+
656
+ A_RXN = IdentifierTypeAAM(
657
+ name = 'RXN',
658
+ shortname = 'rxn',
659
+ alt_names = {'rxn'},
660
+ objectname = 'A_RXN',
661
+ )
662
+
663
+ A_GML_RULE = IdentifierTypeAAM(
664
+ name = 'S_GML rule',
665
+ shortname = 'gml_rule',
666
+ alt_names = {'gml_rule'},
667
+ objectname = 'A_GML_RULE',
668
+ )
669
+
670
+ # Lists of ID types
671
+ # --------------------------------------------------------------------------------------------------------------
672
+ id_types_compound: list[IdentifierTypeCompound] = [
673
+ C_MNX, C_BIGG, C_PUBCHEM, C_KEGG, C_CHEBI, C_ECMDB, C_INCHIKEY, C_SLM, C_LIPIDMAPS, C_METACYC, C_ENVIPATH,
674
+ C_SEED, C_SABIORK, C_HMDB, C_REACTOME, C_BIOCYC, C_METAMDB,
675
+ ]
676
+ id_types_structure_representation: list[IdentifierTypeStructureRepresentation] = [
677
+ S_SMILES, S_INCHI, S_MOLFILE, S_GML,
678
+ ]
679
+ id_types_reaction: list[IdentifierTypeReaction] = [
680
+ R_BIGG, R_MNX, R_METACYC, R_SEED, R_KEGG, R_RHEA, R_SABIORK, R_METAMDB, R_MCSA,
681
+ ]
682
+ id_types_enzyme: list[IdentifierTypeEnzyme] = [
683
+ E_EC
684
+ ]
685
+ id_types_aam: list[IdentifierTypeAAM] = [
686
+ A_UNKNOWN, A_REACTIONSMILES, A_RXN, A_GML_RULE
687
+ ]