chemrecon 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. chemrecon/__init__.py +73 -0
  2. chemrecon/chem/__init__.py +0 -0
  3. chemrecon/chem/chemreaction.py +223 -0
  4. chemrecon/chem/constant_compounds.py +3 -0
  5. chemrecon/chem/create_mol.py +91 -0
  6. chemrecon/chem/elements.py +141 -0
  7. chemrecon/chem/gml/__init__.py +0 -0
  8. chemrecon/chem/gml/gml.py +324 -0
  9. chemrecon/chem/gml/gml_reactant_matching.py +130 -0
  10. chemrecon/chem/gml/gml_to_rdk.py +217 -0
  11. chemrecon/chem/mol.py +483 -0
  12. chemrecon/chem/sumformula.py +120 -0
  13. chemrecon/connection.py +97 -0
  14. chemrecon/core/__init__.py +0 -0
  15. chemrecon/core/id_types.py +687 -0
  16. chemrecon/core/ontology.py +209 -0
  17. chemrecon/core/populate_query_handler.py +336 -0
  18. chemrecon/core/query_handler.py +587 -0
  19. chemrecon/database/__init__.py +1 -0
  20. chemrecon/database/connect.py +63 -0
  21. chemrecon/database/connection_params/chemrecon_pub.dbinfo +5 -0
  22. chemrecon/database/connection_params/local_docker_dev.dbinfo +5 -0
  23. chemrecon/database/connection_params/local_docker_init.dbinfo +5 -0
  24. chemrecon/database/connection_params/local_docker_pub.dbinfo +5 -0
  25. chemrecon/database/params.py +88 -0
  26. chemrecon/entrygraph/draw.py +119 -0
  27. chemrecon/entrygraph/entrygraph.py +301 -0
  28. chemrecon/entrygraph/explorationprotocol.py +199 -0
  29. chemrecon/entrygraph/explore.py +421 -0
  30. chemrecon/entrygraph/explore_procedure.py +183 -0
  31. chemrecon/entrygraph/filter.py +88 -0
  32. chemrecon/entrygraph/scoring.py +141 -0
  33. chemrecon/query/__init__.py +26 -0
  34. chemrecon/query/create_entry.py +86 -0
  35. chemrecon/query/default_protocols.py +57 -0
  36. chemrecon/query/find_entry.py +84 -0
  37. chemrecon/query/get_relations.py +143 -0
  38. chemrecon/query/get_structures_from_compound.py +65 -0
  39. chemrecon/schema/__init__.py +86 -0
  40. chemrecon/schema/db_object.py +363 -0
  41. chemrecon/schema/direction.py +10 -0
  42. chemrecon/schema/entry_types/__init__.py +0 -0
  43. chemrecon/schema/entry_types/aam.py +34 -0
  44. chemrecon/schema/entry_types/aam_repr.py +37 -0
  45. chemrecon/schema/entry_types/compound.py +52 -0
  46. chemrecon/schema/entry_types/enzyme.py +49 -0
  47. chemrecon/schema/entry_types/molstructure.py +64 -0
  48. chemrecon/schema/entry_types/molstructure_repr.py +41 -0
  49. chemrecon/schema/entry_types/reaction.py +57 -0
  50. chemrecon/schema/enums.py +154 -0
  51. chemrecon/schema/procedural_relation_entrygraph.py +66 -0
  52. chemrecon/schema/relation_types_composed/__init__.py +0 -0
  53. chemrecon/schema/relation_types_composed/compound_has_molstructure_relation.py +59 -0
  54. chemrecon/schema/relation_types_composed/reaction_has_aam_relation.py +50 -0
  55. chemrecon/schema/relation_types_procedural/__init__.py +0 -0
  56. chemrecon/schema/relation_types_procedural/aam_convert_relation.py +69 -0
  57. chemrecon/schema/relation_types_procedural/compound_select_structure_proceduralrelation.py +36 -0
  58. chemrecon/schema/relation_types_procedural/compound_similarlity_proceduralrelation.py +1 -0
  59. chemrecon/schema/relation_types_procedural/molstructure_convert_relation.py +49 -0
  60. chemrecon/schema/relation_types_procedural/reaction_select_aam_proceduralrelation.py +38 -0
  61. chemrecon/schema/relation_types_procedural/reaction_similarity_proceduralrelation.py +1 -0
  62. chemrecon/schema/relation_types_source/__init__.py +0 -0
  63. chemrecon/schema/relation_types_source/aam_involves_molstructure_relation.py +77 -0
  64. chemrecon/schema/relation_types_source/aam_repr_involves_molstructure_repr_relation.py +79 -0
  65. chemrecon/schema/relation_types_source/compound_has_structure_representation_relation.py +33 -0
  66. chemrecon/schema/relation_types_source/compound_reference_relation.py +34 -0
  67. chemrecon/schema/relation_types_source/molstructure_standardisation_relation.py +71 -0
  68. chemrecon/schema/relation_types_source/ontology/__init__.py +0 -0
  69. chemrecon/schema/relation_types_source/ontology/compound_ontology.py +369 -0
  70. chemrecon/schema/relation_types_source/ontology/enzyme_ontology.py +142 -0
  71. chemrecon/schema/relation_types_source/ontology/reaction_ontology.py +140 -0
  72. chemrecon/schema/relation_types_source/reaction_has_aam_representation_relation.py +34 -0
  73. chemrecon/schema/relation_types_source/reaction_has_enzyme_relation.py +71 -0
  74. chemrecon/schema/relation_types_source/reaction_involves_compound_relation.py +69 -0
  75. chemrecon/schema/relation_types_source/reaction_reference_relation.py +33 -0
  76. chemrecon/scripts/initialize_database.py +494 -0
  77. chemrecon/utils/copy_signature.py +10 -0
  78. chemrecon/utils/encodeable_list.py +11 -0
  79. chemrecon/utils/get_id_type.py +70 -0
  80. chemrecon/utils/hungarian.py +31 -0
  81. chemrecon/utils/reactant_matching.py +168 -0
  82. chemrecon/utils/rxnutils.py +44 -0
  83. chemrecon/utils/set_cwd.py +12 -0
  84. chemrecon-0.1.1.dist-info/METADATA +143 -0
  85. chemrecon-0.1.1.dist-info/RECORD +86 -0
  86. chemrecon-0.1.1.dist-info/WHEEL +4 -0
chemrecon/chem/mol.py ADDED
@@ -0,0 +1,483 @@
1
+ """ Defines a wrapper class for RDKit molecules.
2
+ A Mol may either be a MolTemplate or a MolInstance.
3
+ - MolInstance: The molecule as it appears in a reaction concretely.
4
+ - MolTemplate: More abstract representation of the molecule in general.
5
+ """
6
+ from __future__ import annotations
7
+
8
+ import ast
9
+ from copy import copy, deepcopy
10
+ from typing import Type, Optional, Any
11
+
12
+ import rdkit.Chem as rdk
13
+ from rdkit.Chem import rdFingerprintGenerator as rdk_fp
14
+ import rdkit.Chem.MolStandardize.rdMolStandardize as rdk_std
15
+ from rdkit import DataStructs as rdk_ds
16
+
17
+ from chemrecon.chem.elements import atomicnum_element
18
+ from chemrecon.chem.sumformula import SumFormula
19
+ from chemrecon.schema import MolStructure
20
+ from chemrecon.schema.enums import FeatureEnum
21
+
22
+ # RDkit generators
23
+ fpgen = rdk_fp.GetRDKitFPGenerator()
24
+
25
+ # Main class
26
+ # --------------------------------------------------------------------------------------------------------------
27
+ class Mol:
28
+ """ Wrapper for RDKit Mol
29
+ """
30
+ # TODO rename/refactor to 'MolStructure'?
31
+ mol: rdk.Mol
32
+
33
+ smiles: Optional[str]
34
+ features: set[Type[Feature]] # Features for which the rdk_mol is standardized
35
+ n_atoms: int
36
+ mass: float
37
+ charge: int
38
+ molformula: SumFormula
39
+ provenance: Optional[str]
40
+ propdict: dict[str, str] # Properties
41
+
42
+
43
+
44
+ # Optional fingerprint
45
+ fp: Optional[Any]
46
+
47
+ def __init__(
48
+ self,
49
+ rdk_mol: rdk.Mol,
50
+ set_features: set[Type[Feature]] = None,
51
+ provenance: Optional[str] = None
52
+ ):
53
+ if rdk_mol is None:
54
+ raise ValueError('Cannot create Mol from None.')
55
+
56
+ # Normalize the molecule
57
+ try:
58
+ self.mol = rdk_std.Normalize(rdk_mol) # TODO what exactly does normalisation do?
59
+ except ValueError as e:
60
+ # Could not create molecule, or molecule is None?
61
+ print(e)
62
+ self.mol = None
63
+ self.smiles = None
64
+ # return
65
+ raise ValueError(f'Could not create molecule: {e}')
66
+
67
+ # Set smiles for printing
68
+ self.smiles = rdk.MolToSmiles(self.mol)
69
+
70
+ # Fix SMILES bug (non-C lowercase atom symbols)
71
+ # https://github.com/rdkit/rdkit/issues/3697
72
+ self.smiles = self.smiles.replace('[o', '[O')
73
+
74
+ if self.smiles is None:
75
+ raise ValueError('TODO fix')
76
+
77
+ # Compute featuers
78
+ if set_features is not None:
79
+ # Features set in constructor
80
+ self.features = set_features
81
+ else:
82
+ self.features = set()
83
+ for f in feats:
84
+ # Check standardization and set
85
+ if f.is_standardized(mol = self):
86
+ self.features.add(f)
87
+
88
+ # Get properties from name if initialized through an RXN file
89
+ try:
90
+ molprops_rxn = rdk_mol.GetProp('_Name')
91
+ try:
92
+ rxn_propdict = ast.literal_eval(molprops_rxn)
93
+ self.propdict = rxn_propdict
94
+ except Exception as e:
95
+ self.propdict = dict()
96
+ except KeyError:
97
+ # Not applicable, skip
98
+ self.propdict = dict()
99
+ pass
100
+
101
+ # Populate misc fields
102
+ self.n_atoms = self.mol.GetNumAtoms(onlyExplicit = False)
103
+ self.provenance = provenance
104
+
105
+ # Get MolFormula
106
+ self.molformula = self.get_molformula()
107
+
108
+ # TODO n_atoms_tracked
109
+ # TODO mass (simple)
110
+
111
+ self.charge = rdk.GetFormalCharge(self.mol)
112
+
113
+ # Set fp
114
+ self.fp = None
115
+
116
+ if self.provenance is None:
117
+ # TODO fix
118
+ pass
119
+
120
+ # Converters
121
+ # ----------------------------------------------------------------------------------------------------------
122
+ def to_smiles(self) -> str:
123
+ # TODO return more details
124
+ return self.smiles
125
+
126
+ # Database interfacing
127
+ # ----------------------------------------------------------------------------------------------------------
128
+ def to_database_struct(self) -> MolStructure:
129
+ """ Create a database row to be inserted."""
130
+ if self.mol is None and self.smiles is None:
131
+ raise ValueError('Cannot convert None entry')
132
+
133
+ # TODO fixes rdkit bug?
134
+ # Fix SMILES bug (non-C lowercase atom symbols)
135
+ # https://github.com/rdkit/rdkit/issues/3697
136
+ self.smiles.replace(' [o', '[O')
137
+
138
+ # Else, convert
139
+ return MolStructure(
140
+ smiles = self.to_smiles(),
141
+ std_feats = [
142
+ f.feature_enum for f in self.features
143
+ ]
144
+ )
145
+
146
+ # TODO insert calculated properties
147
+
148
+ # Calculate properties
149
+ # ----------------------------------------------------------------------------------------------------------
150
+ # TODO
151
+
152
+ # Standardisation
153
+ def get_standardised(self, feat_list: Optional[list[Feature]] = None) -> Mol:
154
+ """ Get a new Mol standardised according to the given features. If None given, standardise according
155
+ to all features.
156
+ """
157
+ feat_list = feat_list or [F, I, C, T, S]
158
+ rdk_s: rdk.Mol = self.mol
159
+ for f in feat_list:
160
+ rdk_s = f.standardise_rdk(rdk_s)
161
+ return Mol(rdk_s, set_features = feat_list, provenance = self.provenance)
162
+
163
+ # Identity and similarity
164
+ # ------------------------------------------------------------------------------------------------------------------
165
+ def is_identical_up_to_map(self, other: Mol):
166
+ """ Checks whether two Mols are identical except for extra details such as atom mapping numbers. """
167
+ m: rdk.Mol = deepcopy(self.mol)
168
+ m_: rdk.Mol = deepcopy(other.mol)
169
+
170
+ for a in m.GetAtoms():
171
+ a.SetAtomMapNum(0)
172
+ for a in m_.GetAtoms():
173
+ a.SetAtomMapNum(0)
174
+
175
+ return rdk.MolToSmiles(m) == rdk.MolToSmiles(m_)
176
+
177
+ def is_identical_up_to(self, other: Mol, features: set[Feature]) -> bool:
178
+ # TODO
179
+ raise NotImplementedError
180
+
181
+ def generate_fingerprint(self):
182
+ if self.fp is None:
183
+ self.fp = fpgen.GetFingerprint(self.mol)
184
+
185
+ def get_similarity(self, other: Mol) -> float:
186
+ """ Get a similarity metric between two Mols. Should be 1 if the mols are identical,
187
+ and approach zero as the difference increases.
188
+ """
189
+ self.generate_fingerprint()
190
+ other.generate_fingerprint()
191
+ return rdk_ds.TanimotoSimilarity(
192
+ self.fp, other.fp
193
+ )
194
+
195
+ # Misc
196
+ # ----------------------------------------------------------------------------------------------------------
197
+ def feature_string(self) -> str:
198
+ s = ''
199
+ for f in feats:
200
+ if f in self.features:
201
+ s += f.symbol
202
+ else:
203
+ s += '-'
204
+ return s
205
+
206
+ def __repr__(self):
207
+ return f'{self.smiles} [{self.feature_string()}]'
208
+
209
+ def __hash__(self):
210
+ return f'{self.smiles}:::{self.feature_string()}'.__hash__()
211
+
212
+ def __eq__(self, other: Mol):
213
+ if isinstance(other, Mol):
214
+ return self.smiles == other.smiles and self.features == other.features
215
+ else:
216
+ return False
217
+
218
+ # Serialise
219
+ def serialize(self) -> dict:
220
+ return {
221
+ 'smiles': self.to_smiles(),
222
+ 'features': self.feature_string(),
223
+ 'molformula': 'TODO' # TODO
224
+ }
225
+
226
+ # Get molecular formula
227
+ def get_molformula(self) -> SumFormula:
228
+ """ Returns the molecular formula
229
+ """
230
+ # TODO requires rdkit documentation
231
+ formula_num: dict[int, int] = dict()
232
+ for a in self.mol.GetAtoms():
233
+ n: int = a.GetAtomicNum()
234
+ formula_num[n] = formula_num.get(n, 0) + 1
235
+
236
+ charge = rdk.GetFormalCharge(self.mol)
237
+
238
+ # Translate to formula of elements and return
239
+ return SumFormula(
240
+ formula = {
241
+ atomicnum_element[n]: i for n, i in formula_num.items()
242
+ },
243
+ charge = charge
244
+ )
245
+
246
+ # Mol Instance and Template
247
+ # ----------------------------------------------------------------------------------------------------------------------
248
+ class MolInstance(Mol):
249
+ """ MolInstance represents a particular instance of a molecule as it participates in a reaction.
250
+ """
251
+
252
+ n_atoms_tracked: int
253
+ _atom_map: Optional[list[int]] # For each atom in the MolInstance, gives the mapping.
254
+ _atom_map_smiles_order: Optional[list[int]]
255
+
256
+ def __init__(self, rdk_mol: rdk.Mol, set_features: set[Type[Feature]] = None, provenance: Optional[str] = None):
257
+ super().__init__(rdk_mol, set_features = set_features, provenance = provenance)
258
+ self._atom_map = None
259
+ self._atom_map_smiles_order = None
260
+
261
+ def to_mol_template(self) -> MolTemplate:
262
+ """ Remove all information of the instance as it participates in a reaction (atom-to-atom map etc.)
263
+ to generate a generic template of the structure.
264
+ """
265
+ mtemp = MolTemplate(
266
+ rdk_mol = self.mol,
267
+ set_features = self.features,
268
+ provenance = self.provenance
269
+ )
270
+
271
+ # Carry over certain properties
272
+ mtemp.propdict = self.propdict
273
+ return mtemp
274
+
275
+ def get_atom_map_in_native_order(self) -> list[int]:
276
+ # The numbering of the atoms as in the reaction it is an instance of.
277
+ if not self._atom_map:
278
+ self._atom_map = [
279
+ a.GetAtomMapNum()
280
+ for a in self.mol.GetAtoms()
281
+ ]
282
+
283
+ return self._atom_map
284
+
285
+
286
+ def get_atom_map_in_smiles_order(self, safe: bool = False) -> list[int]:
287
+ # Same as getting the atom map, but with the ordering of the atoms in the smiles string
288
+ # Needed to populate DB with atom-to-atom maps
289
+ if not self._atom_map_smiles_order:
290
+ smilesmol = rdk.MolFromSmiles(self.smiles, sanitize = not safe)
291
+ self._atom_map_smiles_order = [
292
+ a.GetAtomMapNum()
293
+ for a in smilesmol.GetAtoms()
294
+ ]
295
+
296
+ return self._atom_map_smiles_order
297
+
298
+ def serialize(self) -> dict:
299
+ d = super().serialize()
300
+ d.update({
301
+ 'template': self.to_mol_template().to_smiles()
302
+ })
303
+ return d
304
+
305
+
306
+ class MolTemplate(Mol):
307
+ """ MolTemplate represents a consistent type of molecule, independent of its particular participation and
308
+ mapping in a reaction.
309
+ """
310
+ pass
311
+
312
+ def __init__(
313
+ self,
314
+ rdk_mol: rdk.Mol,
315
+ set_features: set[Type[Feature]] = None,
316
+ provenance: Optional[str] = None
317
+ ):
318
+ rdk_mol_ = deepcopy(rdk_mol)
319
+ if not rdk_mol_:
320
+ raise ValueError(f'Could not create molecule')
321
+
322
+ # Remove atom-to-atom maps
323
+ for a in rdk_mol_.GetAtoms():
324
+ a.SetAtomMapNum(0)
325
+
326
+ super().__init__(rdk_mol = rdk_mol_, set_features = set_features, provenance = provenance)
327
+
328
+ def instantiate(self, mapping: list[int]) -> MolInstance:
329
+ """ Instantiate a template with a map, to produce an instance of the template as it participates in a reaction.
330
+ """
331
+ rdk_mol_ = deepcopy(self.mol)
332
+ for a, mapnum in zip(rdk_mol_.GetAtoms(), mapping):
333
+ a.SetAtomMapNum(mapnum)
334
+ return MolInstance(rdk_mol = rdk_mol_, set_features = self.features, provenance = 'from_template')
335
+
336
+
337
+ # Features and standardization
338
+ # ----------------------------------------------------------------------------------------------------------------------
339
+ # ABC
340
+ class Feature:
341
+ symbol: str
342
+ name: str
343
+ feature_enum: FeatureEnum
344
+
345
+ @classmethod
346
+ def is_standardized(cls, mol: Mol) -> bool:
347
+ raise NotImplementedError
348
+
349
+ @classmethod
350
+ def standardise(cls, mol: Mol) -> rdk.Mol:
351
+ if mol.mol is None:
352
+ raise ValueError
353
+ return cls.standardise_rdk(mol.mol)
354
+
355
+ @classmethod
356
+ def standardise_rdk(cls, rdk_mol: rdk.Mol) -> rdk.Mol:
357
+ raise NotImplementedError
358
+
359
+
360
+ # FICTS features
361
+ # --------------------------------------------------------------------------------------------------------------
362
+
363
+ # [F] - Fragments
364
+ class F(Feature):
365
+ symbol = 'F'
366
+ name = 'Fragments'
367
+ feature_enum = FeatureEnum.F
368
+
369
+ @classmethod
370
+ def is_standardized(cls, mol: Mol) -> bool:
371
+ n_frags = rdk.GetMolFrags(mol.mol)
372
+ return len(n_frags) <= 1
373
+
374
+ @classmethod
375
+ def standardise_rdk(cls, rdk_mol: rdk.Mol) -> rdk.Mol:
376
+ mol_ = rdk_std.FragmentParent(rdk_mol, skipStandardize = True)
377
+ return mol_
378
+
379
+
380
+ # [I] - Isotope
381
+ class I(Feature):
382
+ symbol = 'I'
383
+ name = 'Isotope'
384
+ feature_enum = FeatureEnum.I
385
+
386
+ @classmethod
387
+ def is_standardized(cls, mol: Mol) -> bool:
388
+ a: rdk.Atom
389
+ for a in mol.mol.GetAtoms():
390
+ if a.GetIsotope():
391
+ return False
392
+ return True
393
+
394
+ @classmethod
395
+ def standardise_rdk(self, rdk_mol: rdk.Mol) -> rdk.Mol:
396
+ mol_ = copy(rdk_mol)
397
+ a: rdk.Atom
398
+ for a in mol_.GetAtoms():
399
+ if a.GetIsotope():
400
+ a.SetIsotope(0)
401
+ return mol_
402
+
403
+
404
+ # [C] - Charge
405
+ class C(Feature):
406
+ symbol = 'C'
407
+ name = 'Charge'
408
+ feature_enum = FeatureEnum.C
409
+
410
+ # TODO use other uncharging approach?
411
+ uncharger = rdk_std.Uncharger()
412
+
413
+ @classmethod
414
+ def is_standardized(cls, mol: Mol) -> bool:
415
+ mol_ = cls.uncharger.uncharge(mol.mol)
416
+ return mol_identical(mol.mol, mol_)
417
+
418
+ @classmethod
419
+ def standardise_rdk(cls, rdk_mol: rdk.Mol) -> rdk.Mol:
420
+ mol_ = cls.uncharger.uncharge(rdk_mol)
421
+ return mol_
422
+
423
+
424
+ # [T] - Tautomer
425
+ class T(Feature):
426
+ symbol = 'T'
427
+ name = 'Tautomer'
428
+ feature_enum = FeatureEnum.T
429
+
430
+ tautomer_enumerator = rdk_std.TautomerEnumerator()
431
+
432
+ @classmethod
433
+ def is_standardized(cls, mol: Mol) -> bool:
434
+ """ Checks if the generated canonical tautomer is different. """
435
+ try:
436
+ mol_canon = cls.tautomer_enumerator.Canonicalize(mol.mol)
437
+ return mol_identical(mol.mol, mol_canon)
438
+ except rdk.rdchem.AtomKekulizeException:
439
+ return False
440
+
441
+ @classmethod
442
+ def standardise_rdk(cls, rdk_mol: rdk.Mol) -> rdk.Mol:
443
+ mol_ = cls.tautomer_enumerator.Canonicalize(rdk_mol)
444
+ return mol_
445
+
446
+
447
+ # [S] - Stereo
448
+ class S(Feature):
449
+ symbol = 'S'
450
+ name = 'Stereo'
451
+ feature_enum = FeatureEnum.S
452
+
453
+ @classmethod
454
+ def is_standardized(cls, mol: Mol) -> bool:
455
+ # TODO redo this check
456
+ return not ('/' in mol.smiles or '\\' in mol.smiles or '@' in mol.smiles)
457
+
458
+ @classmethod
459
+ def standardise_rdk(cls, rdk_mol: rdk.Mol) -> rdk.Mol:
460
+ mol_ = copy(rdk_mol)
461
+ rdk.RemoveStereochemistry(mol_)
462
+ return mol_
463
+
464
+
465
+ feat_enum_map: dict[FeatureEnum, Type[Feature]] = {
466
+ FeatureEnum.F: F,
467
+ FeatureEnum.I: I,
468
+ FeatureEnum.C: C,
469
+ FeatureEnum.T: T,
470
+ FeatureEnum.S: S,
471
+ }
472
+
473
+ # List of features
474
+ feats: list[Type[Feature]] = [F, I, C, T, S]
475
+
476
+
477
+ # Utils
478
+ # --------------------------------------------------------------------------------------------------------------
479
+ def mol_identical(a: rdk.Mol, b: rdk.Mol) -> bool:
480
+ return a.HasSubstructMatch(b) and b.HasSubstructMatch(a)
481
+
482
+
483
+
@@ -0,0 +1,120 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Optional
4
+
5
+ from chemrecon.chem.elements import Element
6
+ from chemrecon.chem import elements as chem
7
+
8
+
9
+ class SumFormula:
10
+ """ Encapsulates a molecular formula.
11
+ """
12
+ formula: dict[Element, int]
13
+ charge: Optional[int]
14
+
15
+ def __init__(self, formula: dict[Element, int], charge: Optional[int] = 0):
16
+ """ Assumes 0 (neutral charge) rather than None (unknown). If charge is unknown, specify explicitly. """
17
+ self.formula = formula
18
+ self.charge = charge
19
+
20
+ def __getitem__(self, element: Element):
21
+ return self.formula[element]
22
+
23
+ def __iter__(self):
24
+ yield from self.formula.__iter__()
25
+
26
+ def __or__(self, other: SumFormula):
27
+ return self.formula | other.formula
28
+
29
+ def get(self, element: Element, default: int):
30
+ return self.formula.get(element, default)
31
+
32
+ def elements(self) -> set[Element]:
33
+ return set(self.formula.keys())
34
+
35
+ def is_zero(self) -> bool:
36
+ """ Returns true if all elements are 0, and charge is 0 or unknown.
37
+ """
38
+ if any(i != 0 for _, i in self.formula.items()):
39
+ return False
40
+ if self.charge is not None and self.charge != 0:
41
+ return False
42
+ return True
43
+
44
+ def has_negative(self) -> bool:
45
+ """ Returns true if the formula has any negative atom counts (e.g. when it represents a difference).
46
+ """
47
+ return any(v < 0 for v in self.formula.values())
48
+
49
+ # Arithmetic operations
50
+ def __add__(self, other: SumFormula):
51
+ """ Add the elements and charges. If at least one has undefined charge, the result has undefined charge.
52
+ """
53
+ sumformula = {
54
+ e: self.get(e, 0) + other.get(e, 0)
55
+ for e in self.elements() | other.elements()
56
+ }
57
+ if self.charge is not None and other.charge is not None:
58
+ charge: Optional[int] = self.charge + other.charge
59
+ else:
60
+ charge: Optional[int] = None
61
+
62
+ return SumFormula(sumformula, charge)
63
+
64
+ def __sub__(self, other: SumFormula):
65
+ """ Computes the difference between sum formulae
66
+ """
67
+ sumformula = {
68
+ e: self.get(e, 0) - other.get(e, 0)
69
+ for e in self.elements() | other.elements()
70
+ }
71
+ if self.charge is not None and other.charge is not None:
72
+ charge: Optional[int] = self.charge + other.charge
73
+ else:
74
+ charge: Optional[int] = None
75
+
76
+ return SumFormula(sumformula, charge)
77
+
78
+ def __eq__(self, other: SumFormula):
79
+ # TODO how to handle equality when one has charge 0 and other has unknown charge?
80
+ return self.formula == other.formula and self.charge == other.charge
81
+
82
+ # Misc
83
+ # ------------------------------------------------------------------------------------------------------------------
84
+ def __str__(self):
85
+ """ Prints output in Hill order
86
+ """
87
+ # https://en.wikipedia.org/wiki/Chemical_formula#Condensed_formula
88
+ elist: list[tuple[Element, int]] = list()
89
+ if chem.C in self.elements():
90
+ # C, H first order
91
+ elist.append((chem.C, self[chem.C]))
92
+ if chem.H in self.elements():
93
+ elist.append((chem.H, self[chem.H]))
94
+ elist.extend([
95
+ (e, i)
96
+ for e, i in sorted(self.formula.items(), key = lambda pair: pair[0].symbol)
97
+ if e not in {chem.C, chem.H}
98
+ ])
99
+ else:
100
+ # Alphabetical order
101
+ elist = [
102
+ (e, i)
103
+ for e, i in sorted(self.formula.items(), key = lambda pair: pair[0].symbol)
104
+ ]
105
+
106
+ chargestr: str = ''
107
+ if self.charge is not None:
108
+ if self.charge > 0:
109
+ chargestr = f' {self.charge}+'
110
+ elif self.charge < 0:
111
+ chargestr = f' {abs(self.charge)}-'
112
+
113
+ # Print as string
114
+ return ''.join(f'{e.symbol}{i}' for e, i in elist) + f'{chargestr}'
115
+
116
+ def molformula_from_str(s: str) -> SumFormula:
117
+ """ Convert a molecular formula string to an object representation.
118
+ """
119
+ # TODO
120
+ raise NotImplementedError(s)
@@ -0,0 +1,97 @@
1
+ """ Handles the global connection state of ChemRecon.
2
+ """
3
+ from __future__ import annotations
4
+
5
+ import os.path
6
+
7
+ import chemrecon.core.query_handler
8
+ import chemrecon.core.populate_query_handler
9
+ from chemrecon.database.connect import postgres_connect
10
+ from chemrecon.database.params import Params, local_docker_pub, local_docker_dev, chemrecon_pub, chemrecon_dev
11
+
12
+ # Handler, default to public production database
13
+ # ----------------------------------------------------------------------------------------------------------------------
14
+ handler: chemrecon.core.query_handler.QueryHandler # QueryHandler or PopulateQueryHandler
15
+
16
+ def get_query_handler() -> chemrecon.core.query_handler.QueryHandler:
17
+ """ Returns the current database query handler."""
18
+ return handler
19
+
20
+ def connect(params: Params, can_write: bool):
21
+ """ Sets the database handler for the ChemRecon library to a custom database connection.
22
+ Also defines whether the library is allowed to write/cache results in the database.
23
+ """
24
+ global handler
25
+
26
+ conn = postgres_connect(params)
27
+ if can_write:
28
+ handler = chemrecon.core.populate_query_handler.PopulateQueryHandler(conn)
29
+ else:
30
+ handler = chemrecon.core.query_handler.QueryHandler(conn)
31
+
32
+ # Done
33
+ print(f'Handler set: {params.connection_string()}')
34
+
35
+ def disconnect():
36
+ global handler
37
+ handler = None
38
+
39
+ def connect_public():
40
+ """ Sets the ChemRecon library to use a database connection to the public
41
+ ChemRecon database maintained by the developer.
42
+ """
43
+ connect(chemrecon_pub, can_write = False)
44
+
45
+ def connect_public_dev():
46
+ """ For developer use.
47
+ """
48
+ connect(chemrecon_dev, can_write = True)
49
+
50
+ def connect_local_docker():
51
+ """ Sets the ChemRecon library to use a database in a local Docker container. Refer to the documentation for
52
+ details on how to run and administrate this database.
53
+ """
54
+ connect(local_docker_pub, can_write = False)
55
+
56
+ def connect_local_docker_dev():
57
+ """ Sets the ChemRecon library to use a database in a local Docker container. Refer to the documentation for
58
+ details on how to run and administrate this database.
59
+ """
60
+ connect(local_docker_dev, can_write = True)
61
+
62
+
63
+ # Local cache files location
64
+ # ----------------------------------------------------------------------------------------------------------------------
65
+ cache_dir: str = 'cache/'
66
+
67
+ def init_cache():
68
+ """ If cache dir doesn't exis
69
+ """
70
+ if not os.path.exists(cache_dir):
71
+ os.makedirs(cache_dir)
72
+
73
+ def flush_cache():
74
+ """ Remove all cached results
75
+ """
76
+ raise NotImplementedError()
77
+
78
+ def set_cache_dir(new_cache_dir: str):
79
+ global cache_dir
80
+ cache_dir = new_cache_dir
81
+
82
+
83
+ # Initialise the cache
84
+ init_cache()
85
+
86
+ # External tools / dependencies
87
+ # ----------------------------------------------------------------------------------------------------------------------
88
+ # Reaction Decoder Tool, RDT
89
+ # rdt_jar_location: str = './chemrecon/redistribute/rdt/'
90
+ # rdt_jar_filename: str = 'rdt-2.4.1-jar-with-dependencies.jar'
91
+ # rdt_jar_path: str = './chemrecon/redistribute/rdt/rdt-2.4.1-jar-with-dependencies.jar'
92
+ # TODO by default in chemrecon/dependencies/rdt/...
93
+
94
+ # def set_rdt_jar_path(path: str):
95
+ # # TODO
96
+ # raise NotImplementedError()
97
+ # pass
File without changes