rdworks 0.25.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. rdworks/__init__.py +35 -0
  2. rdworks/autograph/__init__.py +4 -0
  3. rdworks/autograph/autograph.py +184 -0
  4. rdworks/autograph/centroid.py +90 -0
  5. rdworks/autograph/dynamictreecut.py +135 -0
  6. rdworks/autograph/nmrclust.py +123 -0
  7. rdworks/autograph/rckmeans.py +74 -0
  8. rdworks/bitqt/__init__.py +1 -0
  9. rdworks/bitqt/bitqt.py +355 -0
  10. rdworks/conf.py +374 -0
  11. rdworks/descriptor.py +36 -0
  12. rdworks/display.py +206 -0
  13. rdworks/ionized.py +170 -0
  14. rdworks/matchedseries.py +260 -0
  15. rdworks/mol.py +1522 -0
  16. rdworks/mollibr.py +887 -0
  17. rdworks/pka.py +38 -0
  18. rdworks/predefined/Asinex_fragment.xml +20 -0
  19. rdworks/predefined/Astex_RO3.xml +16 -0
  20. rdworks/predefined/Baell2010_PAINS/Baell2010A.xml +52 -0
  21. rdworks/predefined/Baell2010_PAINS/Baell2010B.xml +169 -0
  22. rdworks/predefined/Baell2010_PAINS/Baell2010C.xml +1231 -0
  23. rdworks/predefined/Baell2010_PAINS/PAINS-less-than-015-hits.xml +2048 -0
  24. rdworks/predefined/Baell2010_PAINS/PAINS-less-than-150-hits.xml +278 -0
  25. rdworks/predefined/Baell2010_PAINS/PAINS-more-than-150-hits.xml +83 -0
  26. rdworks/predefined/Baell2010_PAINS/makexml.py +70 -0
  27. rdworks/predefined/Brenk2008_Dundee/makexml.py +21 -0
  28. rdworks/predefined/CNS.xml +18 -0
  29. rdworks/predefined/ChEMBL_Walters/BMS.xml +543 -0
  30. rdworks/predefined/ChEMBL_Walters/Dundee.xml +318 -0
  31. rdworks/predefined/ChEMBL_Walters/Glaxo.xml +168 -0
  32. rdworks/predefined/ChEMBL_Walters/Inpharmatica.xml +276 -0
  33. rdworks/predefined/ChEMBL_Walters/LINT.xml +174 -0
  34. rdworks/predefined/ChEMBL_Walters/MLSMR.xml +351 -0
  35. rdworks/predefined/ChEMBL_Walters/PAINS.xml +1446 -0
  36. rdworks/predefined/ChEMBL_Walters/SureChEMBL.xml +501 -0
  37. rdworks/predefined/ChEMBL_Walters/makexml.py +40 -0
  38. rdworks/predefined/Hann1999_Glaxo/Hann1999.xml +168 -0
  39. rdworks/predefined/Hann1999_Glaxo/Hann1999Acid.xml +102 -0
  40. rdworks/predefined/Hann1999_Glaxo/Hann1999Base.xml +6 -0
  41. rdworks/predefined/Hann1999_Glaxo/Hann1999ElPh.xml +6 -0
  42. rdworks/predefined/Hann1999_Glaxo/Hann1999NuPh.xml +6 -0
  43. rdworks/predefined/Hann1999_Glaxo/makexml.py +83 -0
  44. rdworks/predefined/Kazius2005/Kazius2005.xml +114 -0
  45. rdworks/predefined/Kazius2005/makexml.py +66 -0
  46. rdworks/predefined/ZINC_druglike.xml +24 -0
  47. rdworks/predefined/ZINC_fragment.xml +14 -0
  48. rdworks/predefined/ZINC_leadlike.xml +15 -0
  49. rdworks/predefined/fragment.xml +7 -0
  50. rdworks/predefined/ionized/simple_smarts_pattern.csv +57 -0
  51. rdworks/predefined/ionized/smarts_pattern.csv +107 -0
  52. rdworks/predefined/misc/makexml.py +119 -0
  53. rdworks/predefined/misc/reactive-part-2.xml +104 -0
  54. rdworks/predefined/misc/reactive-part-3.xml +74 -0
  55. rdworks/predefined/misc/reactive.xml +321 -0
  56. rdworks/readin.py +312 -0
  57. rdworks/rgroup.py +2173 -0
  58. rdworks/scaffold.py +520 -0
  59. rdworks/std.py +143 -0
  60. rdworks/stereoisomers.py +127 -0
  61. rdworks/tautomers.py +20 -0
  62. rdworks/units.py +63 -0
  63. rdworks/utils.py +495 -0
  64. rdworks/xml.py +260 -0
  65. rdworks-0.25.7.dist-info/METADATA +37 -0
  66. rdworks-0.25.7.dist-info/RECORD +69 -0
  67. rdworks-0.25.7.dist-info/WHEEL +5 -0
  68. rdworks-0.25.7.dist-info/licenses/LICENSE +21 -0
  69. rdworks-0.25.7.dist-info/top_level.txt +1 -0
rdworks/bitqt/bitqt.py ADDED
@@ -0,0 +1,355 @@
1
+ """
2
+ @author: Roy Gonzalez-Aleman [roy_gonzalez@fq.uh.cu]
3
+ @author: Daniel Platero Rochart [daniel.platero@gmail.com]
4
+ """
5
+ from collections import deque, OrderedDict
6
+ from typing import Tuple, Optional
7
+
8
+ import numpy as np
9
+ import pandas as pd
10
+
11
+ from bitarray import util as bu
12
+ from bitarray import bitarray as ba
13
+
14
+ from ..autograph.centroid import centroid_medoid
15
+
16
+
17
+
18
+ def convert_to_bitarr_matrix(rmsdMatrix:np.array, cutoff:float) -> OrderedDict:
19
+ """
20
+ Convert RMSD binary-encoded square matrix.
21
+ Pairwise similarity is saved in RAM as bits (dict of bitarrays), not floats.
22
+ Returns:
23
+ matrix : collections.OrderedDict. dict of bitarrays.
24
+ """
25
+ N = rmsdMatrix.shape[0]
26
+ cutoff = np.full(N, cutoff, dtype=np.float32)
27
+ # numpy.full(shape, fill_value, dtype=None, order='C', *, like=None)
28
+ # Return a new array of given shape and type, filled with fill_value.
29
+ matrix = OrderedDict()
30
+ to_explore = range(N)
31
+ for i in to_explore:
32
+ rmsd_ = rmsdMatrix[i,:]
33
+ # mdtraj.rmsd(target, reference, frame=0, atom_indices=None, parallel=True, precentered=False)
34
+ # Compute RMSD of all conformations in target to a reference conformation. Note, this will center the conformations in place.
35
+ vector_np = np.less_equal(rmsd_, cutoff)
36
+ # Return the truth value of (x1 <= x2) element-wise.
37
+ bitarr = ba()
38
+ bitarr.pack(vector_np.tobytes())
39
+ bitarr.fill()
40
+ matrix.update({i: bitarr})
41
+ return matrix
42
+
43
+
44
+
45
+ def calc_matrix_degrees(unclustered_bit, matrix):
46
+ """
47
+ Calculate number of neighbors (degree) of unclustered nodes in matrix.
48
+
49
+ Parameters
50
+ ----------
51
+ unclustered_bit : bitarray.bitarray
52
+ bitarray with indices of unclustered nodes turned on.
53
+ matrix : collections.OrderedDict
54
+ dict of bitarrays.
55
+
56
+ Returns
57
+ -------
58
+ degrees : numpy.ndarray
59
+ array containing each node degree. Clustered nodes have degree = 0.
60
+
61
+ """
62
+ one = ba('1')
63
+ degrees = np.zeros(len(unclustered_bit), dtype=np.int32)
64
+ for node in unclustered_bit.itersearch(one):
65
+ try:
66
+ degrees[node] = matrix[node].count()
67
+ except KeyError:
68
+ pass
69
+ return degrees
70
+
71
+
72
+ def colour_matrix(degrees, matrix):
73
+ """
74
+ Greedy coloring of bit-encoded RMSD matrix.
75
+
76
+ Parameters
77
+ ----------
78
+ degrees : numpy.ndarray
79
+ array containing each node degree. Clustered nodes have degree = 0.
80
+ matrix : collections.OrderedDict
81
+ dict of bitarrays.
82
+
83
+ Returns
84
+ -------
85
+ colors : numpy.ndarray
86
+ array of colors assigned to each node of the matrix.
87
+ """
88
+ # Constants ---------------------------------------------------------------
89
+ N = degrees.size
90
+ m = len(matrix)
91
+ one = ba('1')
92
+ xcolor = 0
93
+ # Initialize containers ---------------------------------------------------
94
+ ordered_by_degrees = iter((-degrees[:m]).argsort())
95
+ colors = np.zeros(N, dtype=np.int32)
96
+ colored = ba(N)
97
+ colored.setall(0)
98
+ seen = set()
99
+ while True:
100
+ # Retrieve the max-degree node ----------------------------------------
101
+ max_node = next(ordered_by_degrees)
102
+ if max_node in seen:
103
+ continue
104
+ seen.add(max_node)
105
+ xcolor += 1
106
+ not_neighbors = ~ matrix[max_node]
107
+ not_colored = ~colored
108
+ candidates = not_neighbors & not_colored
109
+ # Nodes passing conditions (not-neighb, not-colored, not-neighb) ------
110
+ passed = [max_node]
111
+ for candidate in candidates.itersearch(one):
112
+ passed.append(candidate)
113
+ try:
114
+ candidates &= ~matrix[candidate]
115
+ except KeyError:
116
+ continue
117
+ if not candidates.any():
118
+ break
119
+ seen.update(passed)
120
+ # Deliver a color class to passed nodes -------------------------------
121
+ colors[passed] = xcolor
122
+ colored = ba()
123
+ colored.pack(colors.astype(np.bool_).tobytes())
124
+ if colored.count(0) == 0:
125
+ break
126
+ return colors
127
+
128
+
129
+ def bitarray_to_np(bitarr):
130
+ """
131
+ Convert from bitarray.bitarray to numpy.ndarray efficiently.
132
+
133
+ Parameters
134
+ ----------
135
+ bitarr : bitarray.bitarray
136
+ a bitarray.
137
+
138
+ Returns
139
+ -------
140
+ numpy.ndarray
141
+ boolean bitarray equivalent to the binary bitarray input object.
142
+ """
143
+ return np.unpackbits(bitarr).astype(np.bool_)
144
+
145
+
146
+ def do_bit_cascade(big_node, degrees, colors, matrix, max_):
147
+ """
148
+ Perform succesive AND operations between an initial bitarray and subsequent
149
+ bitarray candidates to search for a clique.
150
+
151
+ Parameters
152
+ ----------
153
+ big_node : int
154
+ node whose bitarray will start the operations.
155
+ degrees : numpy.ndarray
156
+ array containing each node degree. Clustered nodes have degree = 0.
157
+ colors : numpy.ndarray
158
+ array of colors assigned to each node of the matrix.
159
+ clustered_bit : bitarray.bitarray
160
+ bitarray with indices of clustered nodes turned on.
161
+ matrix : collections.OrderedDict
162
+ dict of bitarrays.
163
+ max_ : int
164
+ Stop iterative AND operations after the initial bitarray has max_
165
+ bits turned on.
166
+
167
+ Returns
168
+ -------
169
+ init_cascade : bitarray.bitarray
170
+ initial bitarray before any AND operation.
171
+ ar : numpy.ndarray
172
+ array of nodes forming a clique.
173
+ """
174
+ init_cascade = matrix[big_node]
175
+ # .... recovering neighbors and their information .........................
176
+ neighb = bitarray_to_np(init_cascade).nonzero()[0]
177
+ neighb_colors = colors[neighb]
178
+ if len(set(neighb_colors.tolist())) <= max_:
179
+ return None
180
+ neighb_degrees = degrees[neighb]
181
+ g = np.bincount(neighb_colors)
182
+ neighb_g = g[neighb_colors]
183
+ # .... ordering neighbors by g ---> colors ---> degrees ...................
184
+ idx = np.lexsort([-neighb_degrees, neighb_colors, neighb_g])
185
+ candidates_info = zip(neighb[idx], neighb_colors[idx])
186
+
187
+ # .... BitCascade considering divergence ..................................
188
+ counter = 0
189
+ seen = set()
190
+ for candidate, color in candidates_info:
191
+ if (color in seen) or (not init_cascade[candidate]):
192
+ continue
193
+ seen.add(color)
194
+ init_cascade = matrix[candidate] & init_cascade
195
+ counter += 1
196
+ COUNT = init_cascade.count()
197
+ if (COUNT <= max_):
198
+ return None
199
+ if counter >= COUNT:
200
+ break
201
+ ar = np.nonzero(np.unpackbits(init_cascade).astype(np.bool_))[0]
202
+ return init_cascade, ar
203
+
204
+
205
+ def set_to_bitarray(set_, N):
206
+ """
207
+ Convert from python set to bitarray.bitarray.
208
+
209
+ Parameters
210
+ ----------
211
+ set_ : set
212
+ a python set.
213
+ N : int
214
+ lenght of the desired bitarray. It must be greater than the maximum
215
+ value of indices present in set.
216
+
217
+ Returns
218
+ -------
219
+ bitarr : bitarray.bitarray
220
+ bitarray of lenght N with indices present in set turned on.
221
+ """
222
+ zero_arr = np.zeros(N, dtype=np.bool_)
223
+ zero_arr[list(set_)] = 1
224
+ bitarr = ba()
225
+ bitarr.pack(zero_arr.tobytes())
226
+ return bitarr
227
+
228
+
229
+ def get_cluster_stats(clusters):
230
+ """
231
+ Get "cluster_statistics.txt" containing clusterID, cluster_size, and
232
+ cluster percentage from trajectory.
233
+
234
+ Parameters
235
+ ----------
236
+ clusters : numpy.ndarray
237
+ array of clusters ID.
238
+ outdir : str
239
+ Path where to create the VMD visualization .log.
240
+
241
+ Returns
242
+ -------
243
+ clusters_df : pandas.DataFrame
244
+ dataframe with cluster_statistics info.
245
+ """
246
+ clusters_df = pd.DataFrame(columns=['cluster_id', 'size', 'percent'])
247
+ clusters_df['cluster_id'] = list(range(0, clusters.max() + 1))
248
+ sizes = []
249
+ for x in clusters_df.cluster_id:
250
+ sizes.append(len(np.where(clusters == x)[0]))
251
+ clusters_df['size'] = sizes
252
+
253
+ sum_ = clusters_df['size'].sum()
254
+ percents = [round(x / sum_ * 100, 4) for x in clusters_df['size']]
255
+ clusters_df['percent'] = percents
256
+
257
+ return clusters_df
258
+
259
+
260
+ def BitQT(rmsdMatrix:np.array, cutoff:float, min_clust_size:int=2, nclust:Optional[int]=None) -> Tuple:
261
+ """BitQT clustering
262
+ Returns:
263
+ (cluster_assignment, centroid_indices)
264
+ """
265
+ matrix = convert_to_bitarr_matrix(rmsdMatrix, cutoff)
266
+ # ++++ Tracking clust/uNCLUSTERed bits to avoid re-computations +++++++++++
267
+ N = len(matrix[0])
268
+ m = len(matrix)
269
+ unclust_bit = ba(N)
270
+ unclust_bit.setall(1)
271
+ clustered_bit = unclust_bit.copy()
272
+ clustered_bit.setall(0)
273
+ zeros = np.zeros(N, dtype=np.int32)
274
+ # ++++ Save clusters in an array (1 .. N) +++++++++++++++++++++++++++++++++
275
+ clusters_array = np.zeros(N, dtype=np.int32)
276
+ NCLUSTER = 0
277
+ clustered = set()
278
+ nmembers = []
279
+ # ++++ Coloring ordered vertices (1 .. N) +++++++++++++++++++++++++++++++++
280
+ degrees = calc_matrix_degrees(unclust_bit, matrix)
281
+ ordered_by_degs = degrees.argsort()[::-1]
282
+ colors = colour_matrix(ordered_by_degs, matrix)
283
+ # colors[np.frombuffer(clustered_bit.unpack(), dtype=np.bool)] = 0
284
+
285
+ # =========================================================================
286
+ # 2. Main algorithm: BitQT !
287
+ # =========================================================================
288
+ while any(degrees):
289
+ NCLUSTER += 1
290
+ # ++++ Find a big clique early ++++++++++++++++++++++++++++++++++++++++
291
+ big_node = degrees.argmax()
292
+ bit_clique, big_clique = do_bit_cascade(big_node, degrees, colors,
293
+ matrix, 0)
294
+ big_clique_size = big_clique.size
295
+ # ++++ Find promising nodes +++++++++++++++++++++++++++++++++++++++++++
296
+ biggers = degrees > big_clique_size
297
+ biggers[big_clique] = False
298
+ cluster_colors = colors[big_clique]
299
+ biggers_colors = colors[biggers]
300
+ promising_colors = np.setdiff1d(biggers_colors, cluster_colors)
301
+ promising_nodes = deque()
302
+ for x in promising_colors:
303
+ promising_nodes.extend(((colors == x) & biggers).nonzero()[0])
304
+ # ++++ Explore all promising nodes ++++++++++++++++++++++++++++++++++++
305
+ cum_found = big_clique
306
+ while promising_nodes:
307
+ node = promising_nodes.popleft()
308
+ try:
309
+ bit_clique, clique = do_bit_cascade(node, degrees, colors,
310
+ matrix, big_clique_size)
311
+ CLIQUE_SIZE = len(clique)
312
+ except TypeError:
313
+ CLIQUE_SIZE = 0
314
+ # ++++ Cumulative update only if biggers candidates are found +++++
315
+ if CLIQUE_SIZE > big_clique_size:
316
+ big_node = node
317
+ big_clique = clique
318
+ big_clique_size = big_clique.size
319
+ # ++++ Repeat previous condition ++++++++++++++++++++++++++++++
320
+ cum_found = np.concatenate((cum_found, big_clique))
321
+ biggers = degrees > big_clique_size
322
+ biggers[cum_found] = False
323
+ cluster_colors = colors[big_clique]
324
+ biggers_colors = colors[biggers]
325
+ promising_colors = np.setdiff1d(biggers_colors, cluster_colors)
326
+ promising_nodes = deque()
327
+ for x in promising_colors:
328
+ promising_nodes.extend(((colors == x) & biggers).nonzero()[0])
329
+ nmembers.append(big_clique_size)
330
+
331
+ if (big_clique_size < min_clust_size) or (nclust and NCLUSTER == nclust):
332
+ break
333
+
334
+ # ++++ Save new cluster & update NCLUSTER +++++++++++++++++++++++++++++
335
+ clusters_array[big_clique] = NCLUSTER
336
+ # ++++ Update (un)clustered_bit +++++++++++++++++++++++++++++++++++++++
337
+ clustered.update(big_clique)
338
+ clustered_bit = set_to_bitarray(clustered, N)
339
+ unclust_bit = ~clustered_bit
340
+ # ++++ Hard erasing of clustered frames from matrix +++++++++++++++++++
341
+ degrees = zeros.copy()
342
+ for x in unclust_bit[:m].itersearch(ba('1')):
343
+ degrees[x] = matrix[x].count()
344
+ if bu.count_and(matrix[x], clustered_bit):
345
+ matrix[x] &= (matrix[x] ^ clustered_bit)
346
+
347
+ # =========================================================================
348
+ # 3. Output
349
+ # =========================================================================
350
+ # cluster_stats = get_cluster_stats(clusters_array[:m], args.outdir)
351
+
352
+ cluster_assignment = list(clusters_array[:m])
353
+ centroid_indices = centroid_medoid(cluster_assignment, rmsdMatrix)
354
+
355
+ return cluster_assignment, centroid_indices