rdworks 0.47.1__py3-none-any.whl → 0.48.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rdworks/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = '0.47.1'
1
+ __version__ = '0.48.1'
2
2
 
3
3
  from rdworks.conf import Conf
4
4
  from rdworks.mol import Mol
rdworks/ionized.py CHANGED
@@ -1,4 +1,5 @@
1
1
  import importlib.resources
2
+ from types import SimpleNamespace
2
3
  import pandas as pd
3
4
 
4
5
  from rdkit import Chem
@@ -6,6 +7,11 @@ from rdkit import Chem
6
7
  # adapted from https://github.com/dptech-corp/Uni-pKa/enumerator
7
8
 
8
9
  class IonizedStates:
10
+ """Knowledge-based enumeration of (de)protonated states"""
11
+
12
+ smarts_path = importlib.resources.files('rdworks.predefined.ionized')
13
+ ionization_patterns = pd.read_csv(smarts_path / 'simple_smarts_pattern.csv')
14
+
9
15
  # Unreasonable chemical structures
10
16
  unreasonable_patterns = [
11
17
  Chem.MolFromSmarts(s) for s in [
@@ -31,67 +37,38 @@ class IonizedStates:
31
37
  "[N+1](=O)-[O]-[H]",
32
38
  ]]
33
39
 
34
- smarts_path = importlib.resources.files('rdworks.predefined.ionized')
35
- protonation_patterns = pd.read_csv(smarts_path / 'simple_smarts_pattern.csv')
36
40
 
37
- def __init__(self, smiles:str):
41
+ def __init__(self, smiles: str, charge_min: int = -2, charge_max: int = 2):
38
42
  self.smiles = Chem.CanonSmiles(smiles)
43
+ self.charge_max = charge_max
44
+ self.charge_min = charge_min
45
+
39
46
  self.rdmol = Chem.MolFromSmiles(self.smiles)
40
47
  self.rdmol_H = Chem.AddHs(self.rdmol)
41
48
  self.charge = Chem.GetFormalCharge(self.rdmol_H)
42
- self.charge_max = 2
43
- self.charge_min = -2
49
+
44
50
  # initial states
45
51
  self.states = {self.smiles : (self.rdmol_H, self.charge)}
46
- # initial protonation sites
47
- self.protonation_sites = {self.smiles : self.set_protonation_sites(self.smiles)}
48
- # generate initial states
49
- self.protonate(self.smiles)
52
+
53
+ # initial ionization sites
54
+ self.sites = {self.smiles: self.set_ionization_sites(self.smiles)}
50
55
 
51
-
52
- def get_protonation_sites(self) -> dict:
53
- return self.protonation_sites
56
+ # pKa pairs:
57
+ # HA(acid) + H2O == A-(base) + H3O+ or HA+(acid) + H2O == A(base) + H3O+
58
+ self.pairs = []
54
59
 
55
-
56
- def get_states_by_charge(self) -> dict:
60
+ # iteratively build an ensemble of ionized states
57
61
  self.ensemble()
58
- data = {}
59
- for smiles, (romol, charge) in self.states.items():
60
- if charge in data:
61
- data[charge].append(smiles)
62
- else:
63
- data[charge] = [smiles]
64
62
 
65
- return data
66
-
67
- def get_states(self) -> list:
68
- return [smiles for smiles in self.states]
69
-
70
-
71
- def get_states_mol(self) -> list[Chem.Mol]:
72
- return [romol for smiles, (romol, charge) in self.states.items()]
73
-
74
-
75
- def get_num_states(self) -> int:
76
- return len(self.states)
77
63
 
78
64
 
79
65
  @staticmethod
80
- def clean_smiles(rdmol:Chem.Mol) -> str:
81
- Chem.SanitizeMol(rdmol)
82
- rdmol = Chem.MolFromSmiles(Chem.MolToSmiles(rdmol))
83
- rdmol_H = Chem.AddHs(rdmol)
84
- rdmol = Chem.RemoveHs(rdmol_H)
85
- return Chem.CanonSmiles(Chem.MolToSmiles(rdmol))
86
-
87
-
88
- @staticmethod
89
- def set_protonation_sites(smiles:str) -> tuple:
66
+ def set_ionization_sites(smiles: str) -> tuple:
90
67
  subject = Chem.MolFromSmiles(smiles)
91
68
  subject = Chem.AddHs(subject)
92
69
  charge = Chem.GetFormalCharge(subject)
93
70
  indices = [] # atom indices of protonation/deprotonation site(s)
94
- for i, name, smarts, smarts_index, acid_or_base in IonizedStates.protonation_patterns.itertuples():
71
+ for i, name, smarts, smarts_index, acid_or_base in IonizedStates.ionization_patterns.itertuples():
95
72
  pattern = Chem.MolFromSmarts(smarts)
96
73
  matches = subject.GetSubstructMatches(pattern)
97
74
  # returns a list of tuples, where each tuple contains the indices
@@ -100,21 +77,34 @@ class IonizedStates:
100
77
  if len(matches) > 0:
101
78
  smarts_index = int(smarts_index)
102
79
  indices += [(match[smarts_index], acid_or_base) for match in matches]
80
+
103
81
  return (list(set(indices)), subject, charge)
82
+
83
+
84
+ @staticmethod
85
+ def clean_smiles(rdmol: Chem.Mol) -> str:
86
+ Chem.SanitizeMol(rdmol)
87
+ rdmol = Chem.MolFromSmiles(Chem.MolToSmiles(rdmol))
88
+ rdmol_H = Chem.AddHs(rdmol)
89
+ rdmol = Chem.RemoveHs(rdmol_H)
90
+ return Chem.CanonSmiles(Chem.MolToSmiles(rdmol))
104
91
 
105
92
 
106
93
  @staticmethod
107
- def reasonable(romol:Chem.Mol) -> bool:
94
+ def reasonable(romol: Chem.Mol) -> bool:
108
95
  return all([len(romol.GetSubstructMatches(p)) == 0 for p in IonizedStates.unreasonable_patterns])
96
+
109
97
 
110
-
111
- def protonate(self, smiles:str) -> int:
98
+ def ionize(self, smiles: str | None = None) -> int:
112
99
  num_added_states = 0
113
100
 
114
- if smiles not in self.protonation_sites:
115
- self.protonation_sites[smiles] = self.set_protonation_sites(smiles)
101
+ if smiles is None:
102
+ smiles = self.smiles
103
+
104
+ if smiles not in self.sites:
105
+ self.sites[smiles] = self.set_ionization_sites(smiles)
116
106
 
117
- (indices, subject, charge) = self.protonation_sites[smiles]
107
+ (indices, subject, charge) = self.sites[smiles]
118
108
 
119
109
  if (charge >= self.charge_max) or (charge <= self.charge_min):
120
110
  # formal charge will be increased or decreased by protonation/deprotonation
@@ -149,22 +139,56 @@ class IonizedStates:
149
139
  edmol = Chem.AddHs(edmol)
150
140
 
151
141
  # Clean up and save SMILES
152
- state_smiles = IonizedStates.clean_smiles(edmol)
153
- state_mol = Chem.MolFromSmiles(state_smiles)
154
- state_mol = Chem.AddHs(state_mol)
155
- state_charge = Chem.GetFormalCharge(state_mol)
156
- if self.reasonable(state_mol):
157
- if state_smiles in self.states:
142
+ ionized_smiles = IonizedStates.clean_smiles(edmol)
143
+ ionized_mol = Chem.MolFromSmiles(ionized_smiles)
144
+ ionized_mol = Chem.AddHs(ionized_mol)
145
+ ionized_charge = Chem.GetFormalCharge(ionized_mol)
146
+ if self.reasonable(ionized_mol):
147
+ if ionized_smiles in self.states:
158
148
  continue
159
- self.states[state_smiles] = (state_mol, state_charge)
149
+ self.states[ionized_smiles] = (ionized_mol, ionized_charge)
160
150
  num_added_states += 1
151
+
152
+ # store acid-base pair information for pKa
153
+ if acid_or_base == 'A':
154
+ self.pairs.append((i, smiles, ionized_smiles))
155
+ elif acid_or_base == 'B':
156
+ self.pairs.append((i, ionized_smiles, smiles))
161
157
 
162
158
  return num_added_states
163
-
159
+
164
160
 
165
161
  def ensemble(self) -> None:
162
+ # populate initial states
163
+ self.ionize()
164
+
165
+ # propagate
166
166
  num_added_states = None
167
167
  while num_added_states is None or num_added_states > 0:
168
- states = self.states.copy()
168
+ states = self.states.copy() # dictionary
169
+ # self.ionize(smiles) below will change self.states
170
+ # so we cannot iterate self.states. Instead we will
171
+ # iterate over a copy of the self.states
169
172
  for smiles in states:
170
- num_added_states = self.protonate(smiles)
173
+ num_added_states = self.ionize(smiles)
174
+
175
+
176
+ def count(self) -> int:
177
+ return len(self.states)
178
+
179
+
180
+ def get_sites(self) -> dict:
181
+ return self.sites
182
+
183
+
184
+ def get_smiles(self) -> list[str]:
185
+ return [smiles for smiles in self.states]
186
+
187
+
188
+ def get_rdmol(self) -> list[Chem.Mol]:
189
+ return [romol for smiles, (romol, charge) in self.states.items()]
190
+
191
+
192
+ def get_pairs(self) -> list:
193
+ return self.pairs
194
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rdworks
3
- Version: 0.47.1
3
+ Version: 0.48.1
4
4
  Summary: Routine tasks built on RDKit and other tools
5
5
  Author-email: Sung-Hun Bae <sunghun.bae@gmail.com>
6
6
  Maintainer-email: Sung-Hun Bae <sunghun.bae@gmail.com>
@@ -1,8 +1,8 @@
1
- rdworks/__init__.py,sha256=CkKXxGhqvK8jzCxjOXnj2IOOvdHKpHE6ekjM5ZECSUw,1368
1
+ rdworks/__init__.py,sha256=TjR6uygQrAgURJ2d1zoPHmai8uJLNF0oFmG5s8Q6TJI,1368
2
2
  rdworks/conf.py,sha256=iQLb3Qg3pjGiiMVMJ5-d57BC1id3zxEhEGlhhrLrA_c,34162
3
3
  rdworks/descriptor.py,sha256=34T_dQ6g8v3u-ym8TLKbQtxIIV5TEo-d3pdedq3o-cg,2106
4
4
  rdworks/display.py,sha256=JR0gR26UpH-JCxVOaqXZCUj2MiGZSrx9Me87FncspVI,13469
5
- rdworks/ionized.py,sha256=5oIjMRpkX792RIpEEE2Ir96icfFaN_h21mSihhfQPAw,6713
5
+ rdworks/ionized.py,sha256=LQLApkskhPPMZCmFMbStJXS-ugVTDSzZJD7pQbJTe8E,7425
6
6
  rdworks/matchedseries.py,sha256=A3ON4CUpQV159mu9VqgNiJ8uoQ9ePOry9d3ra4NCAgc,10377
7
7
  rdworks/mol.py,sha256=e62DYV8XeqLL1mBydbS881pSUfJjaeHxalG64CIant8,68133
8
8
  rdworks/mollibr.py,sha256=X4UBO6Ga-QmNS7RwUiaDYAx0Q5hnWs71yTkEpH02Qb4,37696
@@ -66,8 +66,8 @@ rdworks/predefined/misc/reactive-part-3.xml,sha256=LgWHSEbRTVmgBoIO45xbTo1xQJs0X
66
66
  rdworks/predefined/misc/reactive.xml,sha256=syedoQ6VYUfRLnxy99ObuDniJ_a_WhrWAJbTKFfJ6VY,11248
67
67
  rdworks/xtb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
68
68
  rdworks/xtb/wrapper.py,sha256=NQgkdRKN5YEtv_UPYKWDijzMEs5v2kFrhUWHqiro7bE,22174
69
- rdworks-0.47.1.dist-info/licenses/LICENSE,sha256=UOkJSBqYyQUvtCp7a-vdCANeEcLE2dnTie_eB1By5SY,1074
70
- rdworks-0.47.1.dist-info/METADATA,sha256=w8BCgsWZ6AqzjSjZL1z4ker0-mwca09u5XiBsXa0ycw,1967
71
- rdworks-0.47.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
72
- rdworks-0.47.1.dist-info/top_level.txt,sha256=05C98HbvBK2axUBogC_hAT_CdpOeQYGnQ6vRAgawr8s,8
73
- rdworks-0.47.1.dist-info/RECORD,,
69
+ rdworks-0.48.1.dist-info/licenses/LICENSE,sha256=UOkJSBqYyQUvtCp7a-vdCANeEcLE2dnTie_eB1By5SY,1074
70
+ rdworks-0.48.1.dist-info/METADATA,sha256=op0QbvR5pScMsbe112ZrEUmSoT77msXNuZCd-zhTOF8,1967
71
+ rdworks-0.48.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
72
+ rdworks-0.48.1.dist-info/top_level.txt,sha256=05C98HbvBK2axUBogC_hAT_CdpOeQYGnQ6vRAgawr8s,8
73
+ rdworks-0.48.1.dist-info/RECORD,,