biotite 1.1.0__cp311-cp311-macosx_11_0_arm64.whl → 1.3.0__cp311-cp311-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/application/application.py +3 -3
- biotite/application/autodock/app.py +1 -1
- biotite/application/blast/webapp.py +1 -1
- biotite/application/clustalo/app.py +1 -1
- biotite/application/localapp.py +2 -2
- biotite/application/msaapp.py +10 -10
- biotite/application/muscle/app3.py +3 -3
- biotite/application/muscle/app5.py +3 -3
- biotite/application/sra/app.py +0 -5
- biotite/application/util.py +21 -1
- biotite/application/viennarna/rnaalifold.py +8 -8
- biotite/application/viennarna/rnaplot.py +10 -8
- biotite/application/viennarna/util.py +1 -1
- biotite/application/webapp.py +1 -1
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +191 -0
- biotite/database/entrez/dbnames.py +10 -0
- biotite/database/entrez/download.py +9 -10
- biotite/database/entrez/key.py +1 -1
- biotite/database/entrez/query.py +5 -4
- biotite/database/pubchem/download.py +6 -6
- biotite/database/pubchem/error.py +10 -0
- biotite/database/pubchem/query.py +12 -23
- biotite/database/rcsb/download.py +3 -2
- biotite/database/rcsb/query.py +2 -3
- biotite/database/uniprot/check.py +2 -2
- biotite/database/uniprot/download.py +2 -5
- biotite/database/uniprot/query.py +3 -4
- biotite/file.py +14 -2
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +20 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +201 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1226 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +19 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +94 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/align/__init__.py +0 -4
- biotite/sequence/align/alignment.py +33 -11
- biotite/sequence/align/banded.cpython-311-darwin.so +0 -0
- biotite/sequence/align/banded.pyx +22 -22
- biotite/sequence/align/cigar.py +2 -2
- biotite/sequence/align/kmeralphabet.cpython-311-darwin.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +2 -2
- biotite/sequence/align/kmersimilarity.cpython-311-darwin.so +0 -0
- biotite/sequence/align/kmertable.cpython-311-darwin.so +0 -0
- biotite/sequence/align/kmertable.pyx +6 -6
- biotite/sequence/align/localgapped.cpython-311-darwin.so +0 -0
- biotite/sequence/align/localgapped.pyx +47 -47
- biotite/sequence/align/localungapped.cpython-311-darwin.so +0 -0
- biotite/sequence/align/localungapped.pyx +10 -10
- biotite/sequence/align/matrix.py +12 -3
- biotite/sequence/align/multiple.cpython-311-darwin.so +0 -0
- biotite/sequence/align/multiple.pyx +1 -2
- biotite/sequence/align/pairwise.cpython-311-darwin.so +0 -0
- biotite/sequence/align/pairwise.pyx +37 -39
- biotite/sequence/align/permutation.cpython-311-darwin.so +0 -0
- biotite/sequence/align/selector.cpython-311-darwin.so +0 -0
- biotite/sequence/align/selector.pyx +2 -2
- biotite/sequence/align/statistics.py +1 -1
- biotite/sequence/align/tracetable.cpython-311-darwin.so +0 -0
- biotite/sequence/alphabet.py +2 -2
- biotite/sequence/annotation.py +19 -13
- biotite/sequence/codec.cpython-311-darwin.so +0 -0
- biotite/sequence/codon.py +1 -2
- biotite/sequence/graphics/alignment.py +25 -39
- biotite/sequence/graphics/dendrogram.py +4 -2
- biotite/sequence/graphics/features.py +2 -2
- biotite/sequence/graphics/logo.py +10 -12
- biotite/sequence/io/fasta/convert.py +1 -2
- biotite/sequence/io/fasta/file.py +1 -1
- biotite/sequence/io/fastq/file.py +3 -3
- biotite/sequence/io/genbank/file.py +3 -3
- biotite/sequence/io/genbank/sequence.py +2 -0
- biotite/sequence/io/gff/convert.py +1 -1
- biotite/sequence/io/gff/file.py +1 -2
- biotite/sequence/phylo/nj.cpython-311-darwin.so +0 -0
- biotite/sequence/phylo/tree.cpython-311-darwin.so +0 -0
- biotite/sequence/phylo/upgma.cpython-311-darwin.so +0 -0
- biotite/sequence/profile.py +19 -25
- biotite/sequence/search.py +0 -1
- biotite/sequence/seqtypes.py +12 -5
- biotite/sequence/sequence.py +1 -2
- biotite/structure/__init__.py +2 -0
- biotite/structure/alphabet/i3d.py +1 -2
- biotite/structure/alphabet/pb.py +1 -2
- biotite/structure/alphabet/unkerasify.py +8 -2
- biotite/structure/atoms.py +35 -27
- biotite/structure/basepairs.py +39 -40
- biotite/structure/bonds.cpython-311-darwin.so +0 -0
- biotite/structure/bonds.pyx +8 -5
- biotite/structure/box.py +159 -23
- biotite/structure/celllist.cpython-311-darwin.so +0 -0
- biotite/structure/celllist.pyx +83 -68
- biotite/structure/chains.py +17 -55
- biotite/structure/charges.cpython-311-darwin.so +0 -0
- biotite/structure/compare.py +420 -13
- biotite/structure/density.py +1 -1
- biotite/structure/dotbracket.py +31 -32
- biotite/structure/filter.py +8 -8
- biotite/structure/geometry.py +15 -15
- biotite/structure/graphics/rna.py +19 -16
- biotite/structure/hbond.py +18 -21
- biotite/structure/info/atoms.py +11 -2
- biotite/structure/info/ccd.py +0 -2
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/info/groups.py +0 -3
- biotite/structure/info/misc.py +0 -1
- biotite/structure/info/radii.py +92 -22
- biotite/structure/info/standardize.py +1 -2
- biotite/structure/integrity.py +4 -6
- biotite/structure/io/general.py +2 -2
- biotite/structure/io/gro/file.py +8 -9
- biotite/structure/io/mol/convert.py +1 -1
- biotite/structure/io/mol/ctab.py +33 -28
- biotite/structure/io/mol/mol.py +1 -1
- biotite/structure/io/mol/sdf.py +39 -13
- biotite/structure/io/pdb/convert.py +86 -5
- biotite/structure/io/pdb/file.py +90 -24
- biotite/structure/io/pdb/hybrid36.cpython-311-darwin.so +0 -0
- biotite/structure/io/pdbqt/file.py +4 -4
- biotite/structure/io/pdbx/bcif.py +22 -7
- biotite/structure/io/pdbx/cif.py +20 -7
- biotite/structure/io/pdbx/component.py +6 -0
- biotite/structure/io/pdbx/compress.py +71 -34
- biotite/structure/io/pdbx/convert.py +429 -77
- biotite/structure/io/pdbx/encoding.cpython-311-darwin.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +39 -23
- biotite/structure/io/trajfile.py +9 -6
- biotite/structure/io/util.py +38 -0
- biotite/structure/mechanics.py +0 -1
- biotite/structure/molecules.py +0 -15
- biotite/structure/pseudoknots.py +13 -19
- biotite/structure/repair.py +2 -4
- biotite/structure/residues.py +20 -48
- biotite/structure/rings.py +335 -0
- biotite/structure/sasa.cpython-311-darwin.so +0 -0
- biotite/structure/sasa.pyx +30 -30
- biotite/structure/segments.py +123 -9
- biotite/structure/sequence.py +0 -1
- biotite/structure/spacegroups.json +1567 -0
- biotite/structure/spacegroups.license +26 -0
- biotite/structure/sse.py +0 -2
- biotite/structure/superimpose.py +75 -253
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +232 -26
- biotite/structure/util.py +3 -3
- biotite/version.py +9 -4
- biotite/visualize.py +111 -1
- {biotite-1.1.0.dist-info → biotite-1.3.0.dist-info}/METADATA +8 -36
- {biotite-1.1.0.dist-info → biotite-1.3.0.dist-info}/RECORD +160 -138
- {biotite-1.1.0.dist-info → biotite-1.3.0.dist-info}/WHEEL +3 -1
- {biotite-1.1.0.dist-info → biotite-1.3.0.dist-info}/licenses/LICENSE.rst +0 -0
biotite/structure/celllist.pyx
CHANGED
|
@@ -28,10 +28,10 @@ ctypedef np.uint8_t uint8
|
|
|
28
28
|
cdef class CellList:
|
|
29
29
|
"""
|
|
30
30
|
__init__(atom_array, cell_size, periodic=False, box=None, selection=None)
|
|
31
|
-
|
|
31
|
+
|
|
32
32
|
This class enables the efficient search of atoms in vicinity of a
|
|
33
33
|
defined location.
|
|
34
|
-
|
|
34
|
+
|
|
35
35
|
This class stores the indices of an atom array in virtual "cells",
|
|
36
36
|
each corresponding to a specific coordinate interval.
|
|
37
37
|
If the atoms in vicinity of a specific location are searched, only
|
|
@@ -41,7 +41,7 @@ cdef class CellList:
|
|
|
41
41
|
after the :class:`CellList` has been created.
|
|
42
42
|
Therefore a :class:`CellList` saves calculation time in those
|
|
43
43
|
cases, where vicinity is checked for multiple locations.
|
|
44
|
-
|
|
44
|
+
|
|
45
45
|
Parameters
|
|
46
46
|
----------
|
|
47
47
|
atom_array : AtomArray or ndarray, dtype=float, shape=(n,3)
|
|
@@ -55,7 +55,6 @@ cdef class CellList:
|
|
|
55
55
|
periodic : bool, optional
|
|
56
56
|
If true, the cell list considers periodic copies of atoms.
|
|
57
57
|
The periodicity is based on the `box` attribute of `atom_array`.
|
|
58
|
-
(Default: False)
|
|
59
58
|
box : ndarray, dtype=float, shape=(3,3), optional
|
|
60
59
|
If provided, the periodicity is based on this parameter instead
|
|
61
60
|
of the :attr:`box` attribute of `atom_array`.
|
|
@@ -64,14 +63,14 @@ cdef class CellList:
|
|
|
64
63
|
If provided, only the atoms masked by this array are stored in
|
|
65
64
|
the cell list. However, the indices stored in the cell list
|
|
66
65
|
will still refer to the original unfiltered `atom_array`.
|
|
67
|
-
|
|
66
|
+
|
|
68
67
|
Examples
|
|
69
68
|
--------
|
|
70
|
-
|
|
69
|
+
|
|
71
70
|
>>> cell_list = CellList(atom_array, cell_size=5)
|
|
72
71
|
>>> near_atoms = atom_array[cell_list.get_atoms(np.array([1,2,3]), radius=7.0)]
|
|
73
72
|
"""
|
|
74
|
-
|
|
73
|
+
|
|
75
74
|
# The atom coordinates
|
|
76
75
|
cdef float32[:,:] _coord
|
|
77
76
|
# A boolean mask that covers the selected atoms
|
|
@@ -99,8 +98,8 @@ cdef class CellList:
|
|
|
99
98
|
cdef int _orig_length
|
|
100
99
|
cdef float32[:] _orig_min_coord
|
|
101
100
|
cdef float32[:] _orig_max_coord
|
|
102
|
-
|
|
103
|
-
|
|
101
|
+
|
|
102
|
+
|
|
104
103
|
@cython.initializedcheck(False)
|
|
105
104
|
@cython.boundscheck(False)
|
|
106
105
|
@cython.wraparound(False)
|
|
@@ -119,14 +118,10 @@ cdef class CellList:
|
|
|
119
118
|
# if 'periodic' is true
|
|
120
119
|
self._orig_length = coord.shape[0]
|
|
121
120
|
self._box = None
|
|
122
|
-
if
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
if coord.shape[1] != 3:
|
|
127
|
-
raise ValueError("Coordinates must have form (x,y,z)")
|
|
128
|
-
if np.isnan(coord).any():
|
|
129
|
-
raise ValueError("Coordinates contain NaN values")
|
|
121
|
+
if selection is None:
|
|
122
|
+
_check_coord(coord)
|
|
123
|
+
else:
|
|
124
|
+
_check_coord(coord[selection])
|
|
130
125
|
|
|
131
126
|
if periodic:
|
|
132
127
|
if box is not None:
|
|
@@ -145,7 +140,7 @@ cdef class CellList:
|
|
|
145
140
|
raise ValueError("Box contains NaN values")
|
|
146
141
|
coord = move_inside_box(coord, self._box)
|
|
147
142
|
coord, indices = repeat_box_coord(coord, self._box)
|
|
148
|
-
|
|
143
|
+
|
|
149
144
|
if self._has_initialized_cells():
|
|
150
145
|
raise Exception("Duplicate call of constructor")
|
|
151
146
|
self._cells = None
|
|
@@ -155,23 +150,23 @@ cdef class CellList:
|
|
|
155
150
|
self._coord = coord.astype(np.float32, copy=False)
|
|
156
151
|
self._cellsize = cell_size
|
|
157
152
|
# calculate how many cells are required for each dimension
|
|
158
|
-
min_coord = np.
|
|
159
|
-
max_coord = np.
|
|
153
|
+
min_coord = np.nanmin(coord, axis=0).astype(np.float32)
|
|
154
|
+
max_coord = np.nanmax(coord, axis=0).astype(np.float32)
|
|
160
155
|
self._min_coord = min_coord
|
|
161
156
|
self._max_coord = max_coord
|
|
162
157
|
cell_count = (((max_coord - min_coord) / cell_size) +1).astype(int)
|
|
163
158
|
if self._periodic:
|
|
164
|
-
self._orig_min_coord = np.
|
|
159
|
+
self._orig_min_coord = np.nanmin(coord[:self._orig_length], axis=0) \
|
|
165
160
|
.astype(np.float32)
|
|
166
|
-
self._orig_max_coord = np.
|
|
161
|
+
self._orig_max_coord = np.nanmax(coord[:self._orig_length], axis=0) \
|
|
167
162
|
.astype(np.float32)
|
|
168
|
-
|
|
163
|
+
|
|
169
164
|
# ndarray of pointers to C-arrays
|
|
170
165
|
# containing indices to atom array
|
|
171
166
|
self._cells = np.zeros(cell_count, dtype=np.uint64)
|
|
172
167
|
# Stores the length of the C-arrays
|
|
173
168
|
self._cell_length = np.zeros(cell_count, dtype=np.int32)
|
|
174
|
-
|
|
169
|
+
|
|
175
170
|
# Prepare selection
|
|
176
171
|
if selection is not None:
|
|
177
172
|
self._has_selection = True
|
|
@@ -183,7 +178,7 @@ cdef class CellList:
|
|
|
183
178
|
)
|
|
184
179
|
else:
|
|
185
180
|
self._has_selection = False
|
|
186
|
-
|
|
181
|
+
|
|
187
182
|
# Fill cells
|
|
188
183
|
for atom_array_i in range(self._coord.shape[0]):
|
|
189
184
|
# Only put selected atoms into cell list
|
|
@@ -208,34 +203,34 @@ cdef class CellList:
|
|
|
208
203
|
# Store new cell pointer and length
|
|
209
204
|
self._cell_length[i,j,k] = length
|
|
210
205
|
self._cells[i,j,k] = <ptr> cell_ptr
|
|
211
|
-
|
|
212
|
-
|
|
206
|
+
|
|
207
|
+
|
|
213
208
|
def __dealloc__(self):
|
|
214
209
|
if self._has_initialized_cells():
|
|
215
210
|
deallocate_ptrs(self._cells)
|
|
216
|
-
|
|
217
|
-
|
|
211
|
+
|
|
212
|
+
|
|
218
213
|
@cython.initializedcheck(False)
|
|
219
214
|
@cython.boundscheck(False)
|
|
220
215
|
@cython.wraparound(False)
|
|
221
216
|
def create_adjacency_matrix(self, float32 threshold_distance):
|
|
222
217
|
"""
|
|
223
218
|
create_adjacency_matrix(threshold_distance)
|
|
224
|
-
|
|
219
|
+
|
|
225
220
|
Create an adjacency matrix for the atoms in this cell list.
|
|
226
221
|
|
|
227
222
|
An adjacency matrix depicts which atoms *i* and *j* have
|
|
228
223
|
a distance lower than a given threshold distance.
|
|
229
224
|
The values in the adjacency matrix ``m`` are
|
|
230
225
|
``m[i,j] = 1 if distance(i,j) <= threshold else 0``
|
|
231
|
-
|
|
226
|
+
|
|
232
227
|
Parameters
|
|
233
228
|
----------
|
|
234
229
|
threshold_distance : float
|
|
235
230
|
The threshold distance. All atom pairs that have a distance
|
|
236
231
|
lower than this value are indicated by ``True`` values in
|
|
237
232
|
the resulting matrix.
|
|
238
|
-
|
|
233
|
+
|
|
239
234
|
Returns
|
|
240
235
|
-------
|
|
241
236
|
matrix : ndarray, dtype=bool, shape=(n,n)
|
|
@@ -244,7 +239,7 @@ cdef class CellList:
|
|
|
244
239
|
:class:`CellList`, the rows and columns corresponding to
|
|
245
240
|
atoms, that are not masked by the selection, have all
|
|
246
241
|
elements set to ``False``.
|
|
247
|
-
|
|
242
|
+
|
|
248
243
|
Notes
|
|
249
244
|
-----
|
|
250
245
|
The highest performance is achieved when the the cell size is
|
|
@@ -269,7 +264,7 @@ cdef class CellList:
|
|
|
269
264
|
if threshold_distance < 0:
|
|
270
265
|
raise ValueError("Threshold must be a positive value")
|
|
271
266
|
cdef int i=0
|
|
272
|
-
|
|
267
|
+
|
|
273
268
|
# Get atom position for all original positions
|
|
274
269
|
# (no periodic copies)
|
|
275
270
|
coord = np.asarray(self._coord[:self._orig_length])
|
|
@@ -287,17 +282,17 @@ cdef class CellList:
|
|
|
287
282
|
return matrix
|
|
288
283
|
else:
|
|
289
284
|
return self.get_atoms(coord, threshold_distance, as_mask=True)
|
|
290
|
-
|
|
291
|
-
|
|
285
|
+
|
|
286
|
+
|
|
292
287
|
@cython.initializedcheck(False)
|
|
293
288
|
@cython.boundscheck(False)
|
|
294
289
|
@cython.wraparound(False)
|
|
295
290
|
def get_atoms(self, np.ndarray coord, radius, bint as_mask=False):
|
|
296
291
|
"""
|
|
297
292
|
get_atoms(coord, radius, as_mask=False)
|
|
298
|
-
|
|
293
|
+
|
|
299
294
|
Find atoms with a maximum distance from given coordinates.
|
|
300
|
-
|
|
295
|
+
|
|
301
296
|
Parameters
|
|
302
297
|
----------
|
|
303
298
|
coord : ndarray, dtype=float, shape=(3,) or shape=(m,3)
|
|
@@ -321,7 +316,7 @@ cdef class CellList:
|
|
|
321
316
|
as_mask : bool, optional
|
|
322
317
|
If true, the result is returned as boolean mask, instead
|
|
323
318
|
of an index array.
|
|
324
|
-
|
|
319
|
+
|
|
325
320
|
Returns
|
|
326
321
|
-------
|
|
327
322
|
indices : ndarray, dtype=int32, shape=(p,) or shape=(m,p)
|
|
@@ -335,7 +330,7 @@ cdef class CellList:
|
|
|
335
330
|
The values are true for atoms in the atom array,
|
|
336
331
|
that are in the defined vicinity.
|
|
337
332
|
Only returned with `as_mask` set to true.
|
|
338
|
-
|
|
333
|
+
|
|
339
334
|
See Also
|
|
340
335
|
--------
|
|
341
336
|
get_atoms_in_cells
|
|
@@ -376,7 +371,7 @@ cdef class CellList:
|
|
|
376
371
|
A 18 PRO HA H 2.719 3.181 1.316
|
|
377
372
|
A 18 PRO HB3 H 2.781 3.223 3.618
|
|
378
373
|
A 18 PRO CB C 3.035 4.190 3.187
|
|
379
|
-
|
|
374
|
+
|
|
380
375
|
Get adjacent atoms for mutliple positions:
|
|
381
376
|
|
|
382
377
|
>>> cell_list = CellList(atom_array, 3)
|
|
@@ -403,14 +398,14 @@ cdef class CellList:
|
|
|
403
398
|
cdef float32 sq_radius
|
|
404
399
|
cdef float32[:] sq_radii
|
|
405
400
|
cdef np.ndarray cell_radii
|
|
406
|
-
|
|
401
|
+
|
|
407
402
|
cdef int[:,:] all_indices
|
|
408
403
|
cdef int[:,:] indices
|
|
409
404
|
cdef float32[:,:] coord_v
|
|
410
405
|
|
|
411
406
|
if len(coord) == 0:
|
|
412
407
|
return _empty_result(as_mask)
|
|
413
|
-
|
|
408
|
+
|
|
414
409
|
# Handle periodicity for the input coordinates
|
|
415
410
|
if self._periodic:
|
|
416
411
|
coord = move_inside_box(coord, self._box)
|
|
@@ -437,7 +432,7 @@ cdef class CellList:
|
|
|
437
432
|
)
|
|
438
433
|
# These have to be narrowed down in the next step
|
|
439
434
|
# using the Euclidian distance
|
|
440
|
-
|
|
435
|
+
|
|
441
436
|
# Filter all indices from all_indices
|
|
442
437
|
# where squared distance is smaller than squared radius
|
|
443
438
|
# Using the squared distance is computationally cheaper than
|
|
@@ -464,23 +459,23 @@ cdef class CellList:
|
|
|
464
459
|
array_i += 1
|
|
465
460
|
if array_i > max_array_length:
|
|
466
461
|
max_array_length = array_i
|
|
467
|
-
|
|
462
|
+
|
|
468
463
|
return self._post_process(
|
|
469
464
|
np.asarray(indices)[:, :max_array_length],
|
|
470
465
|
as_mask, is_multi_coord
|
|
471
466
|
)
|
|
472
|
-
|
|
473
|
-
|
|
467
|
+
|
|
468
|
+
|
|
474
469
|
@cython.boundscheck(False)
|
|
475
470
|
@cython.wraparound(False)
|
|
476
471
|
def get_atoms_in_cells(self, np.ndarray coord,
|
|
477
472
|
cell_radius=1, bint as_mask=False):
|
|
478
473
|
"""
|
|
479
474
|
get_atoms_in_cells(coord, cell_radius=1, as_mask=False)
|
|
480
|
-
|
|
475
|
+
|
|
481
476
|
Find atoms with a maximum cell distance from given
|
|
482
477
|
coordinates.
|
|
483
|
-
|
|
478
|
+
|
|
484
479
|
Instead of using the radius as maximum euclidian distance to the
|
|
485
480
|
given coordinates,
|
|
486
481
|
the radius is measured as the amount of cells:
|
|
@@ -489,7 +484,7 @@ cdef class CellList:
|
|
|
489
484
|
that the atoms indices from this cell and the 8 surrounding
|
|
490
485
|
cells are returned and so forth.
|
|
491
486
|
This is more efficient than `get_atoms()`.
|
|
492
|
-
|
|
487
|
+
|
|
493
488
|
Parameters
|
|
494
489
|
----------
|
|
495
490
|
coord : ndarray, dtype=float, shape=(3,) or shape=(m,3)
|
|
@@ -518,7 +513,7 @@ cdef class CellList:
|
|
|
518
513
|
as_mask : bool, optional
|
|
519
514
|
If true, the result is returned as boolean mask, instead
|
|
520
515
|
of an index array.
|
|
521
|
-
|
|
516
|
+
|
|
522
517
|
Returns
|
|
523
518
|
-------
|
|
524
519
|
indices : ndarray, dtype=int32, shape=(p,) or shape=(m,p)
|
|
@@ -563,8 +558,8 @@ cdef class CellList:
|
|
|
563
558
|
coord, cell_radius, is_multi_radius
|
|
564
559
|
)
|
|
565
560
|
return self._post_process(array_indices, as_mask, is_multi_coord)
|
|
566
|
-
|
|
567
|
-
|
|
561
|
+
|
|
562
|
+
|
|
568
563
|
@cython.boundscheck(False)
|
|
569
564
|
@cython.wraparound(False)
|
|
570
565
|
def _get_atoms_in_cells(self,
|
|
@@ -573,7 +568,7 @@ cdef class CellList:
|
|
|
573
568
|
bint is_multi_radius):
|
|
574
569
|
"""
|
|
575
570
|
Get the indices of atoms in `cell_radii` adjacency of `coord`.
|
|
576
|
-
|
|
571
|
+
|
|
577
572
|
Parameters
|
|
578
573
|
----------
|
|
579
574
|
coord : ndarray, dtype=float32, shape=(n,3)
|
|
@@ -583,7 +578,7 @@ cdef class CellList:
|
|
|
583
578
|
is_multi_radius : bool
|
|
584
579
|
True indicates, that all values in `cell_radii` are the
|
|
585
580
|
same.
|
|
586
|
-
|
|
581
|
+
|
|
587
582
|
Returns
|
|
588
583
|
-------
|
|
589
584
|
array_indices : ndarray, dtype=int32, shape=(m,p)
|
|
@@ -607,8 +602,8 @@ cdef class CellList:
|
|
|
607
602
|
cdef int max_array_length \
|
|
608
603
|
= self._find_adjacent_atoms(coord, array_indices, cell_radii)
|
|
609
604
|
return array_indices[:, :max_array_length]
|
|
610
|
-
|
|
611
|
-
|
|
605
|
+
|
|
606
|
+
|
|
612
607
|
@cython.boundscheck(False)
|
|
613
608
|
@cython.wraparound(False)
|
|
614
609
|
cdef int _find_adjacent_atoms(self,
|
|
@@ -618,7 +613,7 @@ cdef class CellList:
|
|
|
618
613
|
"""
|
|
619
614
|
This method fills the given empty index array
|
|
620
615
|
with actual indices of adjacent atoms.
|
|
621
|
-
|
|
616
|
+
|
|
622
617
|
Since the length of 'indices' (second dimension) is
|
|
623
618
|
the worst case assumption, this method returns the actual
|
|
624
619
|
required length, i.e. the highest length of all arrays
|
|
@@ -632,11 +627,17 @@ cdef class CellList:
|
|
|
632
627
|
cdef int pos_i, array_i, cell_i
|
|
633
628
|
cdef int max_array_length = 0
|
|
634
629
|
cdef int cell_r
|
|
635
|
-
|
|
630
|
+
|
|
636
631
|
cdef ptr[:,:,:] cells = self._cells
|
|
637
632
|
cdef int[:,:,:] cell_length = self._cell_length
|
|
633
|
+
cdef uint8[:] finite_mask = (
|
|
634
|
+
np.isfinite(np.asarray(coord)).all(axis=-1).astype(np.uint8, copy=False)
|
|
635
|
+
)
|
|
638
636
|
|
|
639
637
|
for pos_i in range(coord.shape[0]):
|
|
638
|
+
if not finite_mask[pos_i]:
|
|
639
|
+
# For non-finite coordinates, there are no adjacent atoms
|
|
640
|
+
continue
|
|
640
641
|
array_i = 0
|
|
641
642
|
cell_r = cell_radius[pos_i]
|
|
642
643
|
x = coord[pos_i, 0]
|
|
@@ -663,7 +664,7 @@ cdef class CellList:
|
|
|
663
664
|
if array_i > max_array_length:
|
|
664
665
|
max_array_length = array_i
|
|
665
666
|
return max_array_length
|
|
666
|
-
|
|
667
|
+
|
|
667
668
|
|
|
668
669
|
@cython.boundscheck(False)
|
|
669
670
|
@cython.wraparound(False)
|
|
@@ -695,8 +696,8 @@ cdef class CellList:
|
|
|
695
696
|
return indices
|
|
696
697
|
else:
|
|
697
698
|
return indices[0]
|
|
698
|
-
|
|
699
|
-
|
|
699
|
+
|
|
700
|
+
|
|
700
701
|
@cython.initializedcheck(False)
|
|
701
702
|
@cython.boundscheck(False)
|
|
702
703
|
@cython.wraparound(False)
|
|
@@ -706,7 +707,7 @@ cdef class CellList:
|
|
|
706
707
|
i[0] = <int>((x - self._min_coord[0]) / self._cellsize)
|
|
707
708
|
j[0] = <int>((y - self._min_coord[1]) / self._cellsize)
|
|
708
709
|
k[0] = <int>((z - self._min_coord[2]) / self._cellsize)
|
|
709
|
-
|
|
710
|
+
|
|
710
711
|
@cython.initializedcheck(False)
|
|
711
712
|
@cython.boundscheck(False)
|
|
712
713
|
@cython.wraparound(False)
|
|
@@ -718,7 +719,7 @@ cdef class CellList:
|
|
|
718
719
|
if z < self._min_coord[2] or z > self._max_coord[2]:
|
|
719
720
|
return False
|
|
720
721
|
return True
|
|
721
|
-
|
|
722
|
+
|
|
722
723
|
@cython.initializedcheck(False)
|
|
723
724
|
@cython.boundscheck(False)
|
|
724
725
|
@cython.wraparound(False)
|
|
@@ -737,7 +738,7 @@ cdef class CellList:
|
|
|
737
738
|
break
|
|
738
739
|
matrix[i, index] = True
|
|
739
740
|
return np.asarray(matrix, dtype=bool)
|
|
740
|
-
|
|
741
|
+
|
|
741
742
|
cdef inline bint _has_initialized_cells(self):
|
|
742
743
|
# Memoryviews are not initialized on class creation
|
|
743
744
|
# This method checks if the _cells memoryview was initialized
|
|
@@ -751,6 +752,20 @@ cdef class CellList:
|
|
|
751
752
|
return False
|
|
752
753
|
|
|
753
754
|
|
|
755
|
+
def _check_coord(coord):
|
|
756
|
+
"""
|
|
757
|
+
Perform checks on validity of coordinates.
|
|
758
|
+
"""
|
|
759
|
+
if coord.ndim != 2:
|
|
760
|
+
raise ValueError("Coordinates must have shape (n,3)")
|
|
761
|
+
if coord.shape[0] == 0:
|
|
762
|
+
raise ValueError("Coordinates must not be empty")
|
|
763
|
+
if coord.shape[1] != 3:
|
|
764
|
+
raise ValueError("Coordinates must have form (x,y,z)")
|
|
765
|
+
if not np.isfinite(coord).all():
|
|
766
|
+
raise ValueError("Coordinates contain non-finite values")
|
|
767
|
+
|
|
768
|
+
|
|
754
769
|
def _empty_result(as_mask):
|
|
755
770
|
"""
|
|
756
771
|
Create return value for :func:`get_atoms()` and
|
|
@@ -774,11 +789,11 @@ def _prepare_vectorization(np.ndarray coord, radius, radius_dtype):
|
|
|
774
789
|
radii/coordinates were given.
|
|
775
790
|
|
|
776
791
|
The shapes before and after conversion are:
|
|
777
|
-
|
|
792
|
+
|
|
778
793
|
- coord: (3, ), radius: scalar -> coord: (1,3), radius: (1,)
|
|
779
794
|
- coord: (n,3), radius: scalar -> coord: (n,3), radius: (n,)
|
|
780
795
|
- coord: (n,3), radius: (n, ) -> coord: (n,3), radius: (n,)
|
|
781
|
-
|
|
796
|
+
|
|
782
797
|
Thes resulting values have the same dimensionality for all cases and
|
|
783
798
|
can be handeled uniformly by `get_atoms()` and
|
|
784
799
|
`get_atoms_in_cells()`.
|
|
@@ -798,7 +813,7 @@ def _prepare_vectorization(np.ndarray coord, radius, radius_dtype):
|
|
|
798
813
|
raise ValueError(
|
|
799
814
|
f"Invalid shape for input coordinates"
|
|
800
815
|
)
|
|
801
|
-
|
|
816
|
+
|
|
802
817
|
if isinstance(radius, np.ndarray):
|
|
803
818
|
# Multiple radii
|
|
804
819
|
# Check whether amount of coordinates match amount of radii
|
biotite/structure/chains.py
CHANGED
|
@@ -22,23 +22,23 @@ __all__ = [
|
|
|
22
22
|
"chain_iter",
|
|
23
23
|
]
|
|
24
24
|
|
|
25
|
-
import numpy as np
|
|
26
25
|
from biotite.structure.segments import (
|
|
27
26
|
apply_segment_wise,
|
|
28
27
|
get_segment_masks,
|
|
29
28
|
get_segment_positions,
|
|
29
|
+
get_segment_starts,
|
|
30
30
|
get_segment_starts_for,
|
|
31
31
|
segment_iter,
|
|
32
32
|
spread_segment_wise,
|
|
33
33
|
)
|
|
34
34
|
|
|
35
35
|
|
|
36
|
-
def get_chain_starts(array, add_exclusive_stop=False):
|
|
36
|
+
def get_chain_starts(array, add_exclusive_stop=False, extra_categories=()):
|
|
37
37
|
"""
|
|
38
38
|
Get the indices in an atom array, which indicates the beginning of
|
|
39
39
|
a new chain.
|
|
40
40
|
|
|
41
|
-
A new chain starts, when the chain ID changes or when the residue ID
|
|
41
|
+
A new chain starts, when the chain or sym ID changes or when the residue ID
|
|
42
42
|
decreases.
|
|
43
43
|
|
|
44
44
|
Parameters
|
|
@@ -49,6 +49,9 @@ def get_chain_starts(array, add_exclusive_stop=False):
|
|
|
49
49
|
If true, the exclusive stop of the input atom array, i.e.
|
|
50
50
|
``array.array_length()``, is added to the returned array of
|
|
51
51
|
start indices as last element.
|
|
52
|
+
extra_categories : tuple of str, optional
|
|
53
|
+
Additional annotation categories that induce the start of a new chain,
|
|
54
|
+
when their value change from one atom to the next.
|
|
52
55
|
|
|
53
56
|
Returns
|
|
54
57
|
-------
|
|
@@ -59,26 +62,16 @@ def get_chain_starts(array, add_exclusive_stop=False):
|
|
|
59
62
|
-----
|
|
60
63
|
This method is internally used by all other chain-related
|
|
61
64
|
functions.
|
|
62
|
-
|
|
63
|
-
See also
|
|
64
|
-
--------
|
|
65
|
-
get_residue_starts
|
|
66
65
|
"""
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
# The first chain is not included yet -> Insert '[0]'
|
|
78
|
-
if add_exclusive_stop:
|
|
79
|
-
return np.concatenate(([0], chain_starts, [array.array_length()]))
|
|
80
|
-
else:
|
|
81
|
-
return np.concatenate(([0], chain_starts))
|
|
66
|
+
categories = ["chain_id"] + list(extra_categories)
|
|
67
|
+
if "sym_id" in array.get_annotation_categories():
|
|
68
|
+
categories.append("sym_id")
|
|
69
|
+
return get_segment_starts(
|
|
70
|
+
array,
|
|
71
|
+
add_exclusive_stop,
|
|
72
|
+
continuous_categories=("res_id",),
|
|
73
|
+
equal_categories=categories,
|
|
74
|
+
)
|
|
82
75
|
|
|
83
76
|
|
|
84
77
|
def apply_chain_wise(array, data, function, axis=None):
|
|
@@ -115,10 +108,6 @@ def apply_chain_wise(array, data, function, axis=None):
|
|
|
115
108
|
Chain-wise evaluation of `data` by `function`. The size of the
|
|
116
109
|
first dimension of this array is equal to the amount of
|
|
117
110
|
chains.
|
|
118
|
-
|
|
119
|
-
See also
|
|
120
|
-
--------
|
|
121
|
-
apply_residue_wise
|
|
122
111
|
"""
|
|
123
112
|
starts = get_chain_starts(array, add_exclusive_stop=True)
|
|
124
113
|
return apply_segment_wise(starts, data, function, axis)
|
|
@@ -148,10 +137,6 @@ def spread_chain_wise(array, input_data):
|
|
|
148
137
|
output_data : ndarray
|
|
149
138
|
Chain-wise spread `input_data`. Length is the same as
|
|
150
139
|
`array_length()` of `array`.
|
|
151
|
-
|
|
152
|
-
See also
|
|
153
|
-
--------
|
|
154
|
-
spread_residue_wise
|
|
155
140
|
"""
|
|
156
141
|
starts = get_chain_starts(array, add_exclusive_stop=True)
|
|
157
142
|
return spread_segment_wise(starts, input_data)
|
|
@@ -177,10 +162,6 @@ def get_chain_masks(array, indices):
|
|
|
177
162
|
Multiple boolean masks, one for each given index in `indices`.
|
|
178
163
|
Each array masks the atoms that belong to the same chain as
|
|
179
164
|
the atom at the given index.
|
|
180
|
-
|
|
181
|
-
See also
|
|
182
|
-
--------
|
|
183
|
-
get_residue_masks
|
|
184
165
|
"""
|
|
185
166
|
starts = get_chain_starts(array, add_exclusive_stop=True)
|
|
186
167
|
return get_segment_masks(starts, indices)
|
|
@@ -205,10 +186,6 @@ def get_chain_starts_for(array, indices):
|
|
|
205
186
|
start_indices : ndarray, dtype=int, shape=(k,)
|
|
206
187
|
The indices that point to the chain starts for the input
|
|
207
188
|
`indices`.
|
|
208
|
-
|
|
209
|
-
See also
|
|
210
|
-
--------
|
|
211
|
-
get_residue_starts_for
|
|
212
189
|
"""
|
|
213
190
|
starts = get_chain_starts(array, add_exclusive_stop=True)
|
|
214
191
|
return get_segment_starts_for(starts, indices)
|
|
@@ -235,10 +212,6 @@ def get_chain_positions(array, indices):
|
|
|
235
212
|
-------
|
|
236
213
|
start_indices : ndarray, dtype=int, shape=(k,)
|
|
237
214
|
The indices that point to the position of the chains.
|
|
238
|
-
|
|
239
|
-
See also
|
|
240
|
-
--------
|
|
241
|
-
get_residue_positions
|
|
242
215
|
"""
|
|
243
216
|
starts = get_chain_starts(array, add_exclusive_stop=True)
|
|
244
217
|
return get_segment_positions(starts, indices)
|
|
@@ -260,10 +233,6 @@ def get_chains(array):
|
|
|
260
233
|
-------
|
|
261
234
|
ids : ndarray, dtype=str
|
|
262
235
|
List of chain IDs.
|
|
263
|
-
|
|
264
|
-
See also
|
|
265
|
-
--------
|
|
266
|
-
get_residues
|
|
267
236
|
"""
|
|
268
237
|
return array.chain_id[get_chain_starts(array)]
|
|
269
238
|
|
|
@@ -284,10 +253,6 @@ def get_chain_count(array):
|
|
|
284
253
|
-------
|
|
285
254
|
count : int
|
|
286
255
|
Amount of chains.
|
|
287
|
-
|
|
288
|
-
See also
|
|
289
|
-
--------
|
|
290
|
-
get_residue_count
|
|
291
256
|
"""
|
|
292
257
|
return len(get_chain_starts(array))
|
|
293
258
|
|
|
@@ -305,10 +270,7 @@ def chain_iter(array):
|
|
|
305
270
|
------
|
|
306
271
|
chain : AtomArray or AtomArrayStack
|
|
307
272
|
A single chain of the input `array`.
|
|
308
|
-
|
|
309
|
-
See also
|
|
310
|
-
--------
|
|
311
|
-
residue_iter
|
|
312
273
|
"""
|
|
313
274
|
starts = get_chain_starts(array, add_exclusive_stop=True)
|
|
314
|
-
|
|
275
|
+
for chain in segment_iter(array, starts):
|
|
276
|
+
yield chain
|
|
Binary file
|