biotite 1.4.0__cp311-cp311-win_amd64.whl → 1.5.0__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/application/dssp/app.py +63 -6
- biotite/database/afdb/download.py +12 -6
- biotite/database/rcsb/download.py +1 -0
- biotite/database/rcsb/query.py +2 -2
- biotite/sequence/align/banded.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/kmeralphabet.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/kmersimilarity.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/localgapped.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/localungapped.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/multiple.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/pairwise.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/permutation.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/selector.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/tracetable.cp311-win_amd64.pyd +0 -0
- biotite/sequence/codec.cp311-win_amd64.pyd +0 -0
- biotite/sequence/phylo/nj.cp311-win_amd64.pyd +0 -0
- biotite/sequence/phylo/tree.cp311-win_amd64.pyd +0 -0
- biotite/sequence/phylo/upgma.cp311-win_amd64.pyd +0 -0
- biotite/structure/atoms.py +1 -1
- biotite/structure/bonds.cp311-win_amd64.pyd +0 -0
- biotite/structure/celllist.cp311-win_amd64.pyd +0 -0
- biotite/structure/chains.py +34 -0
- biotite/structure/charges.cp311-win_amd64.pyd +0 -0
- biotite/structure/filter.py +2 -1
- biotite/structure/geometry.py +164 -2
- biotite/structure/info/atoms.py +8 -0
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/io/pdb/convert.py +1 -0
- biotite/structure/io/pdb/file.py +16 -2
- biotite/structure/io/pdb/hybrid36.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/pdbx/bcif.py +1 -1
- biotite/structure/io/pdbx/cif.py +1 -1
- biotite/structure/io/pdbx/compress.py +13 -9
- biotite/structure/io/pdbx/convert.py +17 -6
- biotite/structure/io/pdbx/encoding.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/pdbx/encoding.pyx +39 -8
- biotite/structure/residues.py +173 -1
- biotite/structure/sasa.cp311-win_amd64.pyd +0 -0
- biotite/structure/segments.py +39 -3
- biotite/structure/util.py +14 -22
- biotite/version.py +16 -3
- {biotite-1.4.0.dist-info → biotite-1.5.0.dist-info}/METADATA +1 -1
- {biotite-1.4.0.dist-info → biotite-1.5.0.dist-info}/RECORD +46 -46
- {biotite-1.4.0.dist-info → biotite-1.5.0.dist-info}/WHEEL +0 -0
- {biotite-1.4.0.dist-info → biotite-1.5.0.dist-info}/licenses/LICENSE.rst +0 -0
|
@@ -55,6 +55,7 @@ from biotite.structure.io.pdbx.bcif import (
|
|
|
55
55
|
from biotite.structure.io.pdbx.cif import CIFBlock, CIFFile
|
|
56
56
|
from biotite.structure.io.pdbx.component import MaskValue
|
|
57
57
|
from biotite.structure.io.pdbx.encoding import StringArrayEncoding
|
|
58
|
+
from biotite.structure.repair import create_continuous_res_ids
|
|
58
59
|
from biotite.structure.residues import (
|
|
59
60
|
get_residue_count,
|
|
60
61
|
get_residue_positions,
|
|
@@ -496,12 +497,6 @@ def _fill_annotations(array, atom_site, extra_fields, use_author_fields):
|
|
|
496
497
|
atom_site, f"{prefix}_asym_id", f"{alt_prefix}_asym_id"
|
|
497
498
|
).as_array(str),
|
|
498
499
|
)
|
|
499
|
-
array.set_annotation(
|
|
500
|
-
"res_id",
|
|
501
|
-
_get_or_fallback(
|
|
502
|
-
atom_site, f"{prefix}_seq_id", f"{alt_prefix}_seq_id"
|
|
503
|
-
).as_array(int, -1),
|
|
504
|
-
)
|
|
505
500
|
array.set_annotation("ins_code", atom_site["pdbx_PDB_ins_code"].as_array(str, ""))
|
|
506
501
|
array.set_annotation(
|
|
507
502
|
"res_name",
|
|
@@ -518,6 +513,22 @@ def _fill_annotations(array, atom_site, extra_fields, use_author_fields):
|
|
|
518
513
|
)
|
|
519
514
|
array.set_annotation("element", atom_site["type_symbol"].as_array(str))
|
|
520
515
|
|
|
516
|
+
# Special handling for `res_id`, as the `label_seq_id` is equal (`.`) for all
|
|
517
|
+
# hetero residues, which makes distinguishing subsequent residues from another
|
|
518
|
+
# difficult (https://github.com/biotite-dev/biotite/issues/553)
|
|
519
|
+
res_id = _get_or_fallback(
|
|
520
|
+
atom_site, f"{prefix}_seq_id", f"{alt_prefix}_seq_id"
|
|
521
|
+
).as_array(int, -1)
|
|
522
|
+
if not use_author_fields and "auth_seq_id" in atom_site:
|
|
523
|
+
# Therefore, the `auth_seq_id` is still used to determine residue starts
|
|
524
|
+
# in `create_continuous_res_ids()`, even if `use_author_fields = False`.
|
|
525
|
+
res_id_for_residue_starts = atom_site["auth_seq_id"].as_array(int, -1)
|
|
526
|
+
array.set_annotation("res_id", res_id_for_residue_starts)
|
|
527
|
+
fallback_res_ids = create_continuous_res_ids(array)
|
|
528
|
+
array.set_annotation("res_id", np.where(res_id == -1, fallback_res_ids, res_id))
|
|
529
|
+
else:
|
|
530
|
+
array.set_annotation("res_id", res_id)
|
|
531
|
+
|
|
521
532
|
if "atom_id" in extra_fields:
|
|
522
533
|
if "id" in atom_site:
|
|
523
534
|
array.set_annotation("atom_id", atom_site["id"].as_array(int))
|
|
Binary file
|
|
@@ -225,9 +225,13 @@ class Encoding(_Component, metaclass=ABCMeta):
|
|
|
225
225
|
-------
|
|
226
226
|
decoded_data : ndarray
|
|
227
227
|
The decoded data.
|
|
228
|
+
|
|
229
|
+
Warnings
|
|
230
|
+
--------
|
|
231
|
+
When overriding this method, do not omit bound checks with
|
|
232
|
+
``@cython.boundscheck(False)`` or ``@cython.wraparound(False)``,
|
|
233
|
+
since the file content may be invalid/malicious.
|
|
228
234
|
"""
|
|
229
|
-
# Important: Do not omit bound checks for decoding,
|
|
230
|
-
# since the file content may be invalid/malicious.
|
|
231
235
|
raise NotImplementedError()
|
|
232
236
|
|
|
233
237
|
def __str__(self):
|
|
@@ -883,17 +887,39 @@ class StringArrayEncoding(Encoding):
|
|
|
883
887
|
else:
|
|
884
888
|
check_present = True
|
|
885
889
|
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
890
|
+
if len(self.strings) > 0:
|
|
891
|
+
string_order = _safe_cast(np.argsort(self.strings), np.int32)
|
|
892
|
+
sorted_strings = self.strings[string_order]
|
|
893
|
+
sorted_indices = np.searchsorted(sorted_strings, data)
|
|
894
|
+
indices = string_order[sorted_indices]
|
|
895
|
+
# `"" not in self.strings` can be quite costly and is only necessary,
|
|
896
|
+
# if the the `strings` were given by the user, as otherwise we always
|
|
897
|
+
# include an empty string explicitly when we compute them in this function
|
|
898
|
+
# -> Only run if `check_present` is True
|
|
899
|
+
if check_present and "" not in self.strings:
|
|
900
|
+
# Represent empty strings as -1
|
|
901
|
+
indices[data == ""] = -1
|
|
902
|
+
else:
|
|
903
|
+
# There are no strings -> The indices can only ever be -1 to indicate
|
|
904
|
+
# missing values
|
|
905
|
+
# The check if this is correct is done below
|
|
906
|
+
indices = np.full(data.shape[0], -1, dtype=np.int32)
|
|
907
|
+
|
|
908
|
+
valid_indices_mask = indices != -1
|
|
909
|
+
if check_present and not np.all(
|
|
910
|
+
self.strings[indices[valid_indices_mask]] == data[valid_indices_mask]
|
|
911
|
+
):
|
|
891
912
|
raise ValueError("Data contains strings not present in 'strings'")
|
|
892
913
|
return encode_stepwise(indices, self.data_encoding)
|
|
893
914
|
|
|
894
915
|
def decode(self, data):
|
|
895
916
|
indices = decode_stepwise(data, self.data_encoding)
|
|
896
|
-
|
|
917
|
+
# Initialize with empty strings
|
|
918
|
+
strings = np.zeros(indices.shape[0], dtype=self.strings.dtype)
|
|
919
|
+
# `-1`` indices indicate missing values
|
|
920
|
+
valid_indices_mask = indices != -1
|
|
921
|
+
strings[valid_indices_mask] = self.strings[indices[valid_indices_mask]]
|
|
922
|
+
return strings
|
|
897
923
|
|
|
898
924
|
def __eq__(self, other):
|
|
899
925
|
if not isinstance(other, type(self)):
|
|
@@ -1009,6 +1035,11 @@ def decode_stepwise(data, encoding):
|
|
|
1009
1035
|
"""
|
|
1010
1036
|
for enc in reversed(encoding):
|
|
1011
1037
|
data = enc.decode(data)
|
|
1038
|
+
# ByteEncoding may decode in a non-writable array,
|
|
1039
|
+
# as it creates the ndarray cheaply from buffer
|
|
1040
|
+
if not data.flags.writeable:
|
|
1041
|
+
# Make the resulting ndarray writable, by copying the underlying buffer
|
|
1042
|
+
data = data.copy()
|
|
1012
1043
|
return data
|
|
1013
1044
|
|
|
1014
1045
|
|
biotite/structure/residues.py
CHANGED
|
@@ -16,13 +16,17 @@ __all__ = [
|
|
|
16
16
|
"get_residue_masks",
|
|
17
17
|
"get_residue_starts_for",
|
|
18
18
|
"get_residue_positions",
|
|
19
|
+
"get_all_residue_positions",
|
|
19
20
|
"get_residues",
|
|
20
21
|
"get_residue_count",
|
|
21
22
|
"residue_iter",
|
|
23
|
+
"get_atom_name_indices",
|
|
22
24
|
]
|
|
23
25
|
|
|
26
|
+
import numpy as np
|
|
24
27
|
from biotite.structure.segments import (
|
|
25
28
|
apply_segment_wise,
|
|
29
|
+
get_all_segment_positions,
|
|
26
30
|
get_segment_masks,
|
|
27
31
|
get_segment_positions,
|
|
28
32
|
get_segment_starts,
|
|
@@ -72,7 +76,7 @@ def get_residue_starts(array, add_exclusive_stop=False, extra_categories=()):
|
|
|
72
76
|
[ 0 16 35 56 75 92 116 135 157 169 176 183 197 208 219 226 250 264
|
|
73
77
|
278 292 304]
|
|
74
78
|
"""
|
|
75
|
-
categories = ["chain_id", "res_id", "ins_code"
|
|
79
|
+
categories = ["chain_id", "res_id", "ins_code"] + list(extra_categories)
|
|
76
80
|
if "sym_id" in array.get_annotation_categories():
|
|
77
81
|
categories.append("sym_id")
|
|
78
82
|
return get_segment_starts(array, add_exclusive_stop, equal_categories=categories)
|
|
@@ -361,6 +365,11 @@ def get_residue_positions(array, indices):
|
|
|
361
365
|
residue_indices : ndarray, dtype=int, shape=(k,)
|
|
362
366
|
The indices that point to the position of the residues.
|
|
363
367
|
|
|
368
|
+
See Also
|
|
369
|
+
--------
|
|
370
|
+
get_all_residue_positions :
|
|
371
|
+
Similar to this function, but for all atoms in the :class:`struc.AtomArray`.
|
|
372
|
+
|
|
364
373
|
Examples
|
|
365
374
|
--------
|
|
366
375
|
>>> atom_index = [5, 42]
|
|
@@ -380,6 +389,50 @@ def get_residue_positions(array, indices):
|
|
|
380
389
|
return get_segment_positions(starts, indices)
|
|
381
390
|
|
|
382
391
|
|
|
392
|
+
def get_all_residue_positions(array):
|
|
393
|
+
"""
|
|
394
|
+
For each atom, obtain the position of the residue
|
|
395
|
+
corresponding to this atom in the input `array`.
|
|
396
|
+
|
|
397
|
+
For example, the position of the first residue in the atom array is
|
|
398
|
+
``0``, the the position of the second residue is ``1``, etc.
|
|
399
|
+
|
|
400
|
+
Parameters
|
|
401
|
+
----------
|
|
402
|
+
array : AtomArray or AtomArrayStack
|
|
403
|
+
The atom array (stack) to determine the residues from.
|
|
404
|
+
|
|
405
|
+
Returns
|
|
406
|
+
-------
|
|
407
|
+
residue_indices : ndarray, dtype=int, shape=(k,)
|
|
408
|
+
The indices that point to the position of the residues.
|
|
409
|
+
|
|
410
|
+
See Also
|
|
411
|
+
--------
|
|
412
|
+
get_residue_positions :
|
|
413
|
+
Similar to this function, but for a given subset of atom indices.
|
|
414
|
+
|
|
415
|
+
Examples
|
|
416
|
+
--------
|
|
417
|
+
>>> print(get_all_residue_positions(atom_array))
|
|
418
|
+
[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1
|
|
419
|
+
1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2
|
|
420
|
+
2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
|
|
421
|
+
3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5
|
|
422
|
+
5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6
|
|
423
|
+
6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7
|
|
424
|
+
7 7 7 7 7 7 7 7 7 7 7 7 7 8 8 8 8 8 8 8 8 8 8 8
|
|
425
|
+
8 9 9 9 9 9 9 9 10 10 10 10 10 10 10 11 11 11 11 11 11 11 11 11
|
|
426
|
+
11 11 11 11 11 12 12 12 12 12 12 12 12 12 12 12 13 13 13 13 13 13 13 13
|
|
427
|
+
13 13 13 14 14 14 14 14 14 14 15 15 15 15 15 15 15 15 15 15 15 15 15 15
|
|
428
|
+
15 15 15 15 15 15 15 15 15 15 16 16 16 16 16 16 16 16 16 16 16 16 16 16
|
|
429
|
+
17 17 17 17 17 17 17 17 17 17 17 17 17 17 18 18 18 18 18 18 18 18 18 18
|
|
430
|
+
18 18 18 18 19 19 19 19 19 19 19 19 19 19 19 19]
|
|
431
|
+
"""
|
|
432
|
+
starts = get_residue_starts(array, add_exclusive_stop=True)
|
|
433
|
+
return get_all_segment_positions(starts, array.array_length())
|
|
434
|
+
|
|
435
|
+
|
|
383
436
|
def get_residues(array):
|
|
384
437
|
"""
|
|
385
438
|
Get the residue IDs and names of an atom array (stack).
|
|
@@ -542,3 +595,122 @@ def residue_iter(array):
|
|
|
542
595
|
starts = get_residue_starts(array, add_exclusive_stop=True)
|
|
543
596
|
for residue in segment_iter(array, starts):
|
|
544
597
|
yield residue
|
|
598
|
+
|
|
599
|
+
|
|
600
|
+
def get_atom_name_indices(atoms, atom_names):
|
|
601
|
+
"""
|
|
602
|
+
For each residue, get the index of the atom with the given atom name.
|
|
603
|
+
|
|
604
|
+
Parameters
|
|
605
|
+
----------
|
|
606
|
+
atoms : AtomArray or AtomArrayStack
|
|
607
|
+
Search for the indices of the given atom names in this structure.
|
|
608
|
+
atom_names : list of str, length=p
|
|
609
|
+
The names of the atoms to get the indices of.
|
|
610
|
+
|
|
611
|
+
Returns
|
|
612
|
+
-------
|
|
613
|
+
indices : ndarray, dtype=int, shape=(k, p)
|
|
614
|
+
For every residue and atom name, the return value contains the atom index in
|
|
615
|
+
the :class:`AtomArray` where the sought atom name is located.
|
|
616
|
+
Where the atom name is not present in a residue, the array is filled with `-1`.
|
|
617
|
+
|
|
618
|
+
Examples
|
|
619
|
+
--------
|
|
620
|
+
|
|
621
|
+
>>> indices = get_atom_name_indices(atom_array, ["CA", "CB"])
|
|
622
|
+
>>> print(indices)
|
|
623
|
+
[[ 1 4]
|
|
624
|
+
[ 17 20]
|
|
625
|
+
[ 36 39]
|
|
626
|
+
[ 57 60]
|
|
627
|
+
[ 76 79]
|
|
628
|
+
[ 93 96]
|
|
629
|
+
[117 120]
|
|
630
|
+
[136 139]
|
|
631
|
+
[158 161]
|
|
632
|
+
[170 -1]
|
|
633
|
+
[177 -1]
|
|
634
|
+
[184 187]
|
|
635
|
+
[198 201]
|
|
636
|
+
[209 212]
|
|
637
|
+
[220 -1]
|
|
638
|
+
[227 230]
|
|
639
|
+
[251 254]
|
|
640
|
+
[265 268]
|
|
641
|
+
[279 282]
|
|
642
|
+
[293 296]]
|
|
643
|
+
>>> for row in indices:
|
|
644
|
+
... for index in row:
|
|
645
|
+
... if index != -1:
|
|
646
|
+
... print(atom_array[index])
|
|
647
|
+
... print()
|
|
648
|
+
A 1 ASN CA C -8.608 3.135 -1.618
|
|
649
|
+
A 1 ASN CB C -9.437 3.396 -2.889
|
|
650
|
+
<BLANKLINE>
|
|
651
|
+
A 2 LEU CA C -4.923 4.002 -2.452
|
|
652
|
+
A 2 LEU CB C -4.411 5.450 -2.619
|
|
653
|
+
<BLANKLINE>
|
|
654
|
+
A 3 TYR CA C -3.690 2.738 0.981
|
|
655
|
+
A 3 TYR CB C -3.964 3.472 2.302
|
|
656
|
+
<BLANKLINE>
|
|
657
|
+
A 4 ILE CA C -5.857 -0.449 0.613
|
|
658
|
+
A 4 ILE CB C -7.386 -0.466 0.343
|
|
659
|
+
<BLANKLINE>
|
|
660
|
+
A 5 GLN CA C -4.122 -1.167 -2.743
|
|
661
|
+
A 5 GLN CB C -4.292 -0.313 -4.013
|
|
662
|
+
<BLANKLINE>
|
|
663
|
+
A 6 TRP CA C -0.716 -0.631 -0.993
|
|
664
|
+
A 6 TRP CB C -0.221 0.703 -0.417
|
|
665
|
+
<BLANKLINE>
|
|
666
|
+
A 7 LEU CA C -1.641 -2.932 1.963
|
|
667
|
+
A 7 LEU CB C -2.710 -2.645 3.033
|
|
668
|
+
<BLANKLINE>
|
|
669
|
+
A 8 LYS CA C -3.024 -5.791 -0.269
|
|
670
|
+
A 8 LYS CB C -4.224 -5.697 -1.232
|
|
671
|
+
<BLANKLINE>
|
|
672
|
+
A 9 ASP CA C 0.466 -6.016 -1.905
|
|
673
|
+
A 9 ASP CB C 1.033 -4.839 -2.724
|
|
674
|
+
<BLANKLINE>
|
|
675
|
+
A 10 GLY CA C 2.060 -6.618 1.593
|
|
676
|
+
<BLANKLINE>
|
|
677
|
+
A 11 GLY CA C 2.626 -2.967 2.723
|
|
678
|
+
<BLANKLINE>
|
|
679
|
+
A 12 PRO CA C 6.333 -2.533 3.806
|
|
680
|
+
A 12 PRO CB C 6.740 -2.387 5.279
|
|
681
|
+
<BLANKLINE>
|
|
682
|
+
A 13 SER CA C 7.049 -6.179 2.704
|
|
683
|
+
A 13 SER CB C 6.458 -7.371 3.472
|
|
684
|
+
<BLANKLINE>
|
|
685
|
+
A 14 SER CA C 6.389 -5.315 -1.015
|
|
686
|
+
A 14 SER CB C 4.914 -4.993 -1.265
|
|
687
|
+
<BLANKLINE>
|
|
688
|
+
A 15 GLY CA C 9.451 -3.116 -1.870
|
|
689
|
+
<BLANKLINE>
|
|
690
|
+
A 16 ARG CA C 7.289 0.084 -2.054
|
|
691
|
+
A 16 ARG CB C 6.110 -0.243 -2.994
|
|
692
|
+
<BLANKLINE>
|
|
693
|
+
A 17 PRO CA C 6.782 3.088 0.345
|
|
694
|
+
A 17 PRO CB C 7.554 4.394 0.119
|
|
695
|
+
<BLANKLINE>
|
|
696
|
+
A 18 PRO CA C 3.287 4.031 1.686
|
|
697
|
+
A 18 PRO CB C 3.035 4.190 3.187
|
|
698
|
+
<BLANKLINE>
|
|
699
|
+
A 19 PRO CA C 1.185 6.543 -0.353
|
|
700
|
+
A 19 PRO CB C 0.048 6.014 -1.229
|
|
701
|
+
<BLANKLINE>
|
|
702
|
+
A 20 SER CA C 0.852 10.027 1.285
|
|
703
|
+
A 20 SER CB C 1.972 11.071 1.284
|
|
704
|
+
<BLANKLINE>
|
|
705
|
+
"""
|
|
706
|
+
residue_indices = get_all_residue_positions(atoms)
|
|
707
|
+
indices = np.full(
|
|
708
|
+
(residue_indices[-1] + 1, len(atom_names)), fill_value=-1, dtype=int
|
|
709
|
+
)
|
|
710
|
+
for i, atom_name in enumerate(atom_names):
|
|
711
|
+
if atom_name is None:
|
|
712
|
+
atom_name_indices = np.where(atoms.hetero)[0]
|
|
713
|
+
else:
|
|
714
|
+
atom_name_indices = np.where(atoms.atom_name == atom_name)[0]
|
|
715
|
+
indices[residue_indices[atom_name_indices], i] = atom_name_indices
|
|
716
|
+
return indices
|
|
Binary file
|
biotite/structure/segments.py
CHANGED
|
@@ -11,6 +11,7 @@ __all__ = [
|
|
|
11
11
|
"get_segment_masks",
|
|
12
12
|
"get_segment_starts_for",
|
|
13
13
|
"get_segment_positions",
|
|
14
|
+
"get_all_segment_positions",
|
|
14
15
|
"segment_iter",
|
|
15
16
|
]
|
|
16
17
|
|
|
@@ -62,13 +63,13 @@ def get_segment_starts(
|
|
|
62
63
|
# Convert mask to indices
|
|
63
64
|
# Add 1, to shift the indices from the end of a segment
|
|
64
65
|
# to the start of a new segment
|
|
65
|
-
|
|
66
|
+
segment_starts = np.where(segment_start_mask)[0] + 1
|
|
66
67
|
|
|
67
68
|
# The first chain is not included yet -> Insert '[0]'
|
|
68
69
|
if add_exclusive_stop:
|
|
69
|
-
return np.concatenate(([0],
|
|
70
|
+
return np.concatenate(([0], segment_starts, [array.array_length()]))
|
|
70
71
|
else:
|
|
71
|
-
return np.concatenate(([0],
|
|
72
|
+
return np.concatenate(([0], segment_starts))
|
|
72
73
|
|
|
73
74
|
|
|
74
75
|
def apply_segment_wise(starts, data, function, axis=None):
|
|
@@ -252,6 +253,11 @@ def get_segment_positions(starts, indices):
|
|
|
252
253
|
-------
|
|
253
254
|
segment_indices : ndarray, shape=(k,)
|
|
254
255
|
The indices that point to the position of the segments.
|
|
256
|
+
|
|
257
|
+
See Also
|
|
258
|
+
--------
|
|
259
|
+
get_all_segment_positions :
|
|
260
|
+
Similar to this function, but for all atoms in the :class:`struc.AtomArray`.
|
|
255
261
|
"""
|
|
256
262
|
indices = np.asarray(indices)
|
|
257
263
|
length = starts[-1]
|
|
@@ -269,6 +275,36 @@ def get_segment_positions(starts, indices):
|
|
|
269
275
|
return np.searchsorted(starts, indices, side="right") - 1
|
|
270
276
|
|
|
271
277
|
|
|
278
|
+
def get_all_segment_positions(starts, length):
|
|
279
|
+
"""
|
|
280
|
+
Generalized version of :func:`get_all_residue_positions()`
|
|
281
|
+
for residues and chains.
|
|
282
|
+
|
|
283
|
+
Parameters
|
|
284
|
+
----------
|
|
285
|
+
starts : ndarray, dtype=int
|
|
286
|
+
The sorted start indices of segments.
|
|
287
|
+
Includes exclusive stop, i.e. the length of the corresponding
|
|
288
|
+
atom array.
|
|
289
|
+
length : int
|
|
290
|
+
The length of the corresponding :class:`struc.AtomArray`.
|
|
291
|
+
|
|
292
|
+
Returns
|
|
293
|
+
-------
|
|
294
|
+
segment_indices : ndarray, shape=(k,)
|
|
295
|
+
For each atom the indices that point to the corresponding position of the
|
|
296
|
+
segments.
|
|
297
|
+
|
|
298
|
+
See Also
|
|
299
|
+
--------
|
|
300
|
+
get_segment_positions :
|
|
301
|
+
Similar to this function, but for a given subset of atom indices.
|
|
302
|
+
"""
|
|
303
|
+
segment_changes = np.zeros(length, dtype=int)
|
|
304
|
+
segment_changes[starts[1:-1]] = 1
|
|
305
|
+
return np.cumsum(segment_changes)
|
|
306
|
+
|
|
307
|
+
|
|
272
308
|
def segment_iter(array, starts):
|
|
273
309
|
"""
|
|
274
310
|
Generalized version of :func:`residue_iter()`
|
biotite/structure/util.py
CHANGED
|
@@ -18,8 +18,9 @@ __all__ = [
|
|
|
18
18
|
|
|
19
19
|
import numpy as np
|
|
20
20
|
from biotite.structure.atoms import AtomArrayStack
|
|
21
|
-
from biotite.structure.
|
|
22
|
-
|
|
21
|
+
from biotite.structure.residues import (
|
|
22
|
+
get_atom_name_indices,
|
|
23
|
+
)
|
|
23
24
|
|
|
24
25
|
|
|
25
26
|
def vector_dot(v1, v2):
|
|
@@ -127,42 +128,33 @@ def coord_for_atom_name_per_residue(atoms, atom_names, mask=None):
|
|
|
127
128
|
coord: ndarray, shape=(k, m, r, 3) or shape=(k, r, 3)
|
|
128
129
|
The coordinates of the specified atom for each residue.
|
|
129
130
|
"""
|
|
130
|
-
|
|
131
|
-
residue_starts = get_residue_starts(atoms)
|
|
132
|
-
all_residue_masks = get_residue_masks(atoms, residue_starts)
|
|
131
|
+
atom_name_indices = get_atom_name_indices(atoms, atom_names)
|
|
133
132
|
|
|
133
|
+
is_multi_model = isinstance(atoms, AtomArrayStack)
|
|
134
134
|
if is_multi_model:
|
|
135
135
|
coord = np.full(
|
|
136
|
-
(len(atom_names), atoms.stack_depth(),
|
|
136
|
+
(len(atom_names), atoms.stack_depth(), atom_name_indices.shape[0], 3),
|
|
137
137
|
np.nan,
|
|
138
138
|
dtype=np.float32,
|
|
139
139
|
)
|
|
140
140
|
else:
|
|
141
141
|
coord = np.full(
|
|
142
|
-
(len(atom_names),
|
|
142
|
+
(len(atom_names), atom_name_indices.shape[0], 3),
|
|
143
143
|
np.nan,
|
|
144
144
|
dtype=np.float32,
|
|
145
145
|
)
|
|
146
146
|
|
|
147
|
-
for
|
|
148
|
-
|
|
147
|
+
for atom_name_i, atom_indices in enumerate(atom_name_indices.T):
|
|
148
|
+
valid_mask = atom_indices != -1
|
|
149
149
|
if mask is not None:
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
number_of_specified_atoms_per_residue = np.count_nonzero(
|
|
153
|
-
all_residue_masks_for_specified_atom, axis=-1
|
|
154
|
-
)
|
|
155
|
-
if np.any(number_of_specified_atoms_per_residue > 1):
|
|
156
|
-
raise BadStructureError(f"Multiple '{atom_name}' atoms per residue")
|
|
157
|
-
residues_with_specified_atom = number_of_specified_atoms_per_residue == 1
|
|
158
|
-
coord_of_specified_atoms = atoms.coord[..., specified_atom_mask, :]
|
|
150
|
+
valid_mask &= mask[atom_indices]
|
|
151
|
+
coord_for_atom_name = atoms.coord[..., atom_indices[valid_mask], :]
|
|
159
152
|
if is_multi_model:
|
|
160
153
|
# Swap dimensions due to NumPy's behavior when using advanced indexing
|
|
161
154
|
# (https://numpy.org/devdocs/user/basics.indexing.html#combining-advanced-and-basic-indexing)
|
|
162
|
-
coord[
|
|
163
|
-
|
|
155
|
+
coord[atom_name_i, ..., valid_mask, :] = coord_for_atom_name.transpose(
|
|
156
|
+
1, 0, 2
|
|
164
157
|
)
|
|
165
158
|
else:
|
|
166
|
-
coord[
|
|
167
|
-
|
|
159
|
+
coord[atom_name_i, valid_mask, :] = coord_for_atom_name
|
|
168
160
|
return coord
|
biotite/version.py
CHANGED
|
@@ -1,7 +1,14 @@
|
|
|
1
1
|
# file generated by setuptools-scm
|
|
2
2
|
# don't change, don't track in version control
|
|
3
3
|
|
|
4
|
-
__all__ = [
|
|
4
|
+
__all__ = [
|
|
5
|
+
"__version__",
|
|
6
|
+
"__version_tuple__",
|
|
7
|
+
"version",
|
|
8
|
+
"version_tuple",
|
|
9
|
+
"__commit_id__",
|
|
10
|
+
"commit_id",
|
|
11
|
+
]
|
|
5
12
|
|
|
6
13
|
TYPE_CHECKING = False
|
|
7
14
|
if TYPE_CHECKING:
|
|
@@ -9,13 +16,19 @@ if TYPE_CHECKING:
|
|
|
9
16
|
from typing import Union
|
|
10
17
|
|
|
11
18
|
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
19
|
+
COMMIT_ID = Union[str, None]
|
|
12
20
|
else:
|
|
13
21
|
VERSION_TUPLE = object
|
|
22
|
+
COMMIT_ID = object
|
|
14
23
|
|
|
15
24
|
version: str
|
|
16
25
|
__version__: str
|
|
17
26
|
__version_tuple__: VERSION_TUPLE
|
|
18
27
|
version_tuple: VERSION_TUPLE
|
|
28
|
+
commit_id: COMMIT_ID
|
|
29
|
+
__commit_id__: COMMIT_ID
|
|
19
30
|
|
|
20
|
-
__version__ = version = '1.
|
|
21
|
-
__version_tuple__ = version_tuple = (1,
|
|
31
|
+
__version__ = version = '1.5.0'
|
|
32
|
+
__version_tuple__ = version_tuple = (1, 5, 0)
|
|
33
|
+
|
|
34
|
+
__commit_id__ = commit_id = None
|