rdworks 0.25.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. rdworks/__init__.py +35 -0
  2. rdworks/autograph/__init__.py +4 -0
  3. rdworks/autograph/autograph.py +184 -0
  4. rdworks/autograph/centroid.py +90 -0
  5. rdworks/autograph/dynamictreecut.py +135 -0
  6. rdworks/autograph/nmrclust.py +123 -0
  7. rdworks/autograph/rckmeans.py +74 -0
  8. rdworks/bitqt/__init__.py +1 -0
  9. rdworks/bitqt/bitqt.py +355 -0
  10. rdworks/conf.py +374 -0
  11. rdworks/descriptor.py +36 -0
  12. rdworks/display.py +206 -0
  13. rdworks/ionized.py +170 -0
  14. rdworks/matchedseries.py +260 -0
  15. rdworks/mol.py +1522 -0
  16. rdworks/mollibr.py +887 -0
  17. rdworks/pka.py +38 -0
  18. rdworks/predefined/Asinex_fragment.xml +20 -0
  19. rdworks/predefined/Astex_RO3.xml +16 -0
  20. rdworks/predefined/Baell2010_PAINS/Baell2010A.xml +52 -0
  21. rdworks/predefined/Baell2010_PAINS/Baell2010B.xml +169 -0
  22. rdworks/predefined/Baell2010_PAINS/Baell2010C.xml +1231 -0
  23. rdworks/predefined/Baell2010_PAINS/PAINS-less-than-015-hits.xml +2048 -0
  24. rdworks/predefined/Baell2010_PAINS/PAINS-less-than-150-hits.xml +278 -0
  25. rdworks/predefined/Baell2010_PAINS/PAINS-more-than-150-hits.xml +83 -0
  26. rdworks/predefined/Baell2010_PAINS/makexml.py +70 -0
  27. rdworks/predefined/Brenk2008_Dundee/makexml.py +21 -0
  28. rdworks/predefined/CNS.xml +18 -0
  29. rdworks/predefined/ChEMBL_Walters/BMS.xml +543 -0
  30. rdworks/predefined/ChEMBL_Walters/Dundee.xml +318 -0
  31. rdworks/predefined/ChEMBL_Walters/Glaxo.xml +168 -0
  32. rdworks/predefined/ChEMBL_Walters/Inpharmatica.xml +276 -0
  33. rdworks/predefined/ChEMBL_Walters/LINT.xml +174 -0
  34. rdworks/predefined/ChEMBL_Walters/MLSMR.xml +351 -0
  35. rdworks/predefined/ChEMBL_Walters/PAINS.xml +1446 -0
  36. rdworks/predefined/ChEMBL_Walters/SureChEMBL.xml +501 -0
  37. rdworks/predefined/ChEMBL_Walters/makexml.py +40 -0
  38. rdworks/predefined/Hann1999_Glaxo/Hann1999.xml +168 -0
  39. rdworks/predefined/Hann1999_Glaxo/Hann1999Acid.xml +102 -0
  40. rdworks/predefined/Hann1999_Glaxo/Hann1999Base.xml +6 -0
  41. rdworks/predefined/Hann1999_Glaxo/Hann1999ElPh.xml +6 -0
  42. rdworks/predefined/Hann1999_Glaxo/Hann1999NuPh.xml +6 -0
  43. rdworks/predefined/Hann1999_Glaxo/makexml.py +83 -0
  44. rdworks/predefined/Kazius2005/Kazius2005.xml +114 -0
  45. rdworks/predefined/Kazius2005/makexml.py +66 -0
  46. rdworks/predefined/ZINC_druglike.xml +24 -0
  47. rdworks/predefined/ZINC_fragment.xml +14 -0
  48. rdworks/predefined/ZINC_leadlike.xml +15 -0
  49. rdworks/predefined/fragment.xml +7 -0
  50. rdworks/predefined/ionized/simple_smarts_pattern.csv +57 -0
  51. rdworks/predefined/ionized/smarts_pattern.csv +107 -0
  52. rdworks/predefined/misc/makexml.py +119 -0
  53. rdworks/predefined/misc/reactive-part-2.xml +104 -0
  54. rdworks/predefined/misc/reactive-part-3.xml +74 -0
  55. rdworks/predefined/misc/reactive.xml +321 -0
  56. rdworks/readin.py +312 -0
  57. rdworks/rgroup.py +2173 -0
  58. rdworks/scaffold.py +520 -0
  59. rdworks/std.py +143 -0
  60. rdworks/stereoisomers.py +127 -0
  61. rdworks/tautomers.py +20 -0
  62. rdworks/units.py +63 -0
  63. rdworks/utils.py +495 -0
  64. rdworks/xml.py +260 -0
  65. rdworks-0.25.7.dist-info/METADATA +37 -0
  66. rdworks-0.25.7.dist-info/RECORD +69 -0
  67. rdworks-0.25.7.dist-info/WHEEL +5 -0
  68. rdworks-0.25.7.dist-info/licenses/LICENSE +21 -0
  69. rdworks-0.25.7.dist-info/top_level.txt +1 -0
@@ -0,0 +1,127 @@
1
+ from typing import List, Tuple, Union, Optional
2
+
3
+ from rdkit import Chem
4
+ from rdkit.Chem.EnumerateStereoisomers import EnumerateStereoisomers, StereoEnumerationOptions
5
+
6
+ from .mol import Mol
7
+ from .mollibr import MolLibr
8
+
9
+
10
+ def _enum_stereoisomers(rdmol:Chem.Mol) -> List[Chem.Mol]:
11
+ """Returns enumerated stereoisomers.
12
+
13
+ Args:
14
+ rdmol (Chem.Mol): input molecule.
15
+
16
+ Returns:
17
+ List[Chem.Mol]: a list of enumerated stereoisomers.
18
+ """
19
+ return list(EnumerateStereoisomers(
20
+ rdmol,
21
+ options=StereoEnumerationOptions(
22
+ tryEmbedding=False,
23
+ onlyUnassigned=True,
24
+ maxIsomers=1024,
25
+ rand=None,
26
+ unique=True,
27
+ onlyStereoGroups=False,
28
+ )))
29
+
30
+
31
+ def _enum_ring_bond_stereo(rdmol:Chem.Mol, ring_bond_stereo_info:List[Tuple],
32
+ override:bool=False) -> List[Chem.Mol]:
33
+ """Enumerates unspecified double bond stereochemistry (cis/trans).
34
+
35
+ <pre>
36
+ a1 a4 a1
37
+ \ / \
38
+ a2=a3 a2=a3
39
+ \
40
+ a4
41
+ </pre>
42
+
43
+ Args:
44
+ rdmol (Chem.Mol): input molecule.
45
+ ring_bond_stereo_info (List[Tuple]):
46
+ ring_bond_stereo_info will be set when .remove_stereo() is called.
47
+ bond_stereo_info = [(bond_idx, bond_stereo_descriptor), ..] where
48
+ bond_stereo_descriptor is `Chem.StereoDescriptor.Bond_Cis` or
49
+ `Chem.StereoDescriptor.Bond_Trans`, or `Chem.StereoDescriptor.NoValue`.
50
+ override (bool, optional): _description_. Defaults to False.
51
+
52
+ Returns:
53
+ List[Chem.Mol]: list of enumerated stereoisomers.
54
+ """
55
+ isomers = []
56
+ for bond_idx, bond_stereo_desc in ring_bond_stereo_info:
57
+ if (bond_stereo_desc == Chem.StereoDescriptor.NoValue) or override:
58
+ bond = rdmol.GetBondWithIdx(bond_idx)
59
+ (a2,a3) = (bond.GetBeginAtom(), bond.GetEndAtom())
60
+ a2_idx = a2.GetIdx()
61
+ a3_idx = a3.GetIdx()
62
+ a1_idx = sorted([(a.GetIdx(), a.GetAtomicNum()) for a in a2.GetNeighbors() if a.GetIdx() != a3_idx], key=lambda x: x[1], reverse=True)[0][0]
63
+ a4_idx = sorted([(a.GetIdx(), a.GetAtomicNum()) for a in a3.GetNeighbors() if a.GetIdx() != a2_idx], key=lambda x: x[1], reverse=True)[0][0]
64
+ bond.SetStereoAtoms(a1_idx, a4_idx) # need to set reference atoms
65
+ # cis
66
+ bond.SetStereo(Chem.BondStereo.STEREOCIS)
67
+ isomers.append(Chem.Mol(rdmol))
68
+ # trans
69
+ bond.SetStereo(Chem.BondStereo.STEREOTRANS)
70
+ isomers.append(Chem.Mol(rdmol))
71
+ return isomers
72
+
73
+
74
+ def complete_stereoisomers(molecular_input:Union[Mol, str, Chem.Mol], name:Optional[str]=None,
75
+ std:bool=False, override:bool=False, **kwargs) -> MolLibr:
76
+ """Completes stereoisomers and returns a rdworks.MolLibr.
77
+
78
+ Args:
79
+ molecular_input (Union[Mol, str, Chem.Mol]): input molecule.
80
+ name (Optional[str], optional): name of the molecule. Defaults to None.
81
+ std (bool, optional): whether to standardize the input. Defaults to False.
82
+ override (bool, optional): whether to override input stereoisomers. Defaults to False.
83
+
84
+ Raises:
85
+ TypeError: if `molecular_input` is not rdworks.Mol, SMILES, or rdkit.Chem.Mol object.
86
+
87
+ Returns:
88
+ MolLibr: a library of complete stereoisomers.
89
+ """
90
+ if isinstance(molecular_input, Mol):
91
+ if name:
92
+ mol = molecular_input.rename(name)
93
+ else:
94
+ mol = molecular_input
95
+ elif isinstance(molecular_input, str) or isinstance(molecular_input, Chem.Mol):
96
+ mol = Mol(molecular_input, name, std)
97
+ else:
98
+ raise TypeError('complete_stereoisomers() expects rdworks.Mol, SMILES or rdkit.Chem.Mol object')
99
+
100
+ ring_bond_stereo_info = mol.get_ring_bond_stereo()
101
+
102
+ if override:
103
+ mol = mol.remove_stereo()
104
+
105
+ rdmols = _enum_stereoisomers(mol.rdmol)
106
+ # ring bond stereo is not properly enumerated
107
+ # cis/trans information is lost if stereochemistry is removed,
108
+ # which cannot be enumerated by EnumerateStereoisomers() function
109
+ # so _enum_bond_stereo() is introduced
110
+ if len(ring_bond_stereo_info) > 0:
111
+ ring_cis_trans = []
112
+ for rdmol in rdmols:
113
+ ring_cis_trans += _enum_ring_bond_stereo(rdmol,
114
+ ring_bond_stereo_info,
115
+ override=override)
116
+ if len(ring_cis_trans) > 0:
117
+ rdmols = ring_cis_trans
118
+
119
+ if len(rdmols) > 1:
120
+ libr = MolLibr(rdmols).unique().rename(mol.name, sep='.').compute(**kwargs)
121
+ else:
122
+ libr = MolLibr(rdmols).rename(mol.name).compute(**kwargs)
123
+
124
+ for _ in libr:
125
+ _.props.update(mol.props)
126
+
127
+ return libr
rdworks/tautomers.py ADDED
@@ -0,0 +1,20 @@
1
+ from rdkit.Chem.MolStandardize import rdMolStandardize
2
+
3
+ from .mol import Mol
4
+ from .mollibr import MolLibr
5
+
6
+
7
+ def complete_tautomers(mol:Mol, **kwargs) -> MolLibr:
8
+ """Returns a library of enumerated tautomers.
9
+
10
+ Args:
11
+ mol (Mol): input molecule.
12
+
13
+ Returns:
14
+ MolLibr: a library of enumerated tautomers.
15
+ """
16
+ enumerator = rdMolStandardize.TautomerEnumerator()
17
+ rdmols = list(enumerator.Enumerate(mol.rdmol))
18
+ if len(rdmols) > 1:
19
+ return MolLibr(rdmols).unique().rename(mol.name, sep='.').compute(**kwargs)
20
+ return MolLibr(rdmols).compute(**kwargs)
rdworks/units.py ADDED
@@ -0,0 +1,63 @@
1
+ from types import SimpleNamespace
2
+
3
+ # In ASE, the default energy unit is eV (electron volt).
4
+ # It will be converted to kcal/mol
5
+ #CODATA 2018 energy conversion factor
6
+ hartree2ev = 27.211386245988
7
+ hartree2kcalpermol = 627.50947337481
8
+ ev2kcalpermol = 23.060547830619026
9
+
10
+ periodictable = SimpleNamespace(
11
+ symbol = {
12
+ 1: 'H', 2: 'He', 3: 'Li', 4: 'Be', 5: 'B', 6: 'C', 7: 'N', 8: 'O', 9: 'F', 10: 'Ne',
13
+ 11: 'Na', 12: 'Mg', 13: 'Al', 14: 'Si', 15: 'P', 16: 'S', 17: 'Cl', 18: 'Ar', 19: 'K', 20: 'Ca',
14
+ 21: 'Sc', 22: 'Ti', 23: 'V', 24: 'Cr', 25: 'Mn', 26: 'Fe', 27: 'Co', 28: 'Ni', 29: 'Cu', 30: 'Zn',
15
+ 31: 'Ga', 32: 'Ge', 33: 'As', 34: 'Se', 35: 'Br', 36: 'Kr', 37: 'Rb', 38: 'Sr', 39: 'Y', 40: 'Zr',
16
+ 41: 'Nb', 42: 'Mo', 43: 'Tc', 44: 'Ru', 45: 'Rh', 46: 'Pd', 47: 'Ag', 48: 'Cd', 49: 'In', 50: 'Sn',
17
+ 51: 'Sb', 52: 'Te', 53: 'I', 54: 'Xe', 55: 'Cs', 56: 'Ba', 57: 'La', 58: 'Ce', 59: 'Pr', 60: 'Nd',
18
+ 61: 'Pm', 62: 'Sm', 63: 'Eu', 64: 'Gd', 65: 'Tb', 66: 'Dy', 67: 'Ho', 68: 'Er', 69: 'Tm', 70: 'Yb',
19
+ 71: 'Lu', 72: 'Hf', 73: 'Ta', 74: 'W', 75: 'Re', 76: 'Os', 77: 'Ir', 78: 'Pt', 79: 'Au', 80: 'Hg',
20
+ 81: 'Tl', 82: 'Pb', 83: 'Bi', 84: 'Po', 85: 'At', 86: 'Rn', 87: 'Fr', 88: 'Ra', 89: 'Ac', 90: 'Th',
21
+ 91: 'Pa', 92: 'U', 93: 'Np', 94: 'Pu', 95: 'Am', 96: 'Cm', 97: 'Bk', 98: 'Cf', 99: 'Es', 100: 'Fm',
22
+ 101: 'Md', 102: 'No', 103: 'Lr', 104: 'Rf', 105: 'Db', 106: 'Sg', 107: 'Bh', 108: 'Hs', 109: 'Mt', 110: 'Ds',
23
+ 111: 'Rg', 112: 'Cn', 113: 'Nh', 114: 'Fl', 115: 'Mc', 116: 'Lv', 117: 'Ts', 118: 'Og'
24
+ },
25
+ long_symbol = {
26
+ 1: 'Hydrogen', 2: 'Helium', 3: 'Lithium', 4: 'Beryllium', 5: 'Boron',
27
+ 6: 'Carbon', 7: 'Nitrogen', 8: 'Oxygen', 9: 'Fluorine', 10: 'Neon',
28
+ 11: 'Sodium', 12: 'Magnesium', 13: 'Aluminum', 14: 'Silicon', 15: 'Phosphorus',
29
+ 16: 'Sulfur', 17: 'Chlorine', 18: 'Argon', 19: 'Potassium', 20: 'Calcium',
30
+ 21: 'Scandium', 22: 'Titanium', 23: 'Vanadium', 24: 'Chromium', 25: 'Manganese',
31
+ 26: 'Iron', 27: 'Cobalt', 28: 'Nickel', 29: 'Copper', 30: 'Zinc',
32
+ 31: 'Gallium', 32: 'Germanium', 33: 'Arsenic', 34: 'Selenium', 35: 'Bromine',
33
+ 36: 'Krypton', 37: 'Rubidium', 38: 'Strontium', 39: 'Yttrium', 40: 'Zirconium',
34
+ 41: 'Niobium', 42: 'Molybdenum', 43: 'Technetium', 44: 'Ruthenium', 45: 'Rhodium',
35
+ 46: 'Palladium', 47: 'Silver', 48: 'Cadmium', 49: 'Indium', 50: 'Tin',
36
+ 51: 'Antimony', 52: 'Tellurium', 53: 'Iodine', 54: 'Xenon', 55: 'Cesium',
37
+ 56: 'Barium', 57: 'Lanthanum', 58: 'Cerium', 59: 'Praseodymium', 60: 'Neodymium',
38
+ 61: 'Promethium', 62: 'Samarium', 63: 'Europium', 64: 'Gadolinium', 65: 'Terbium',
39
+ 66: 'Dysprosium', 67: 'Holmium', 68: 'Erbium', 69: 'Thulium', 70: 'Ytterbium',
40
+ 71: 'Lutetium', 72: 'Hafnium', 73: 'Tantalum', 74: 'Tungsten', 75: 'Rhenium',
41
+ 76: 'Osmium', 77: 'Iridium', 78: 'Platinum', 79: 'Gold', 80: 'Mercury',
42
+ 81: 'Thallium', 82: 'Lead', 83: 'Bismuth', 84: 'Polonium', 85: 'Astatine',
43
+ 86: 'Radon', 87: 'Francium', 88: 'Radium', 89: 'Actinium', 90: 'Thorium',
44
+ 91: 'Protactinium', 92: 'Uranium', 93: 'Neptunium', 94: 'Plutonium', 95: 'Americium',
45
+ 96: 'Curium', 97: 'Berkelium', 98: 'Californium', 99: 'Einsteinium', 100: 'Fermium',
46
+ 101: 'Mendelevium', 102: 'Nobelium', 103: 'Lawrencium', 104: 'Rutherfordium', 105: 'Dubnium',
47
+ 106: 'Seaborgium', 107: 'Bohrium', 108: 'Hassium', 109: 'Meitnerium', 110: 'Darmstadtium',
48
+ 111: 'Roentgenium', 112: 'Copernicium', 113: 'Nihonium', 114: 'Flerovium', 115: 'Moscovium',
49
+ 116: 'Livermorium', 117: 'Tennessine', 118: 'Oganesson'
50
+ },
51
+ )
52
+
53
+
54
+ # Initialize UFF bond radii (Rappe et al. JACS 1992)
55
+ # Units of angstroms
56
+ # These radii neglect the bond-order and electronegativity corrections in the original paper.
57
+ # Where several values exist for the same atom, the largest was used.
58
+ radii = {
59
+ 1:0.354, 5:0.838, 6:0.757, 7:0.700, 8:0.658, 9:0.668,
60
+ 14:1.117, 15:1.117, 16:1.064, 17:1.044,
61
+ 32:1.197, 33:1.211, 34:1.190, 35:1.192,
62
+ 51:1.407, 52:1.386, 53:1.382,
63
+ }