biotite 1.0.0__cp312-cp312-macosx_11_0_arm64.whl → 1.1.0__cp312-cp312-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/application/dssp/app.py +13 -3
- biotite/application/localapp.py +34 -0
- biotite/application/muscle/app3.py +2 -15
- biotite/application/muscle/app5.py +2 -2
- biotite/application/util.py +1 -1
- biotite/application/viennarna/rnaplot.py +6 -2
- biotite/database/rcsb/query.py +6 -6
- biotite/database/uniprot/check.py +20 -15
- biotite/database/uniprot/download.py +1 -1
- biotite/database/uniprot/query.py +1 -1
- biotite/sequence/align/alignment.py +16 -3
- biotite/sequence/align/banded.cpython-312-darwin.so +0 -0
- biotite/sequence/align/banded.pyx +5 -5
- biotite/sequence/align/kmeralphabet.cpython-312-darwin.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +17 -0
- biotite/sequence/align/kmersimilarity.cpython-312-darwin.so +0 -0
- biotite/sequence/align/kmertable.cpython-312-darwin.so +0 -0
- biotite/sequence/align/kmertable.pyx +52 -42
- biotite/sequence/align/localgapped.cpython-312-darwin.so +0 -0
- biotite/sequence/align/localungapped.cpython-312-darwin.so +0 -0
- biotite/sequence/align/matrix.py +273 -55
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/multiple.cpython-312-darwin.so +0 -0
- biotite/sequence/align/pairwise.cpython-312-darwin.so +0 -0
- biotite/sequence/align/permutation.cpython-312-darwin.so +0 -0
- biotite/sequence/align/selector.cpython-312-darwin.so +0 -0
- biotite/sequence/align/tracetable.cpython-312-darwin.so +0 -0
- biotite/sequence/alphabet.py +3 -0
- biotite/sequence/codec.cpython-312-darwin.so +0 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +2 -1
- biotite/sequence/graphics/colorschemes.py +44 -11
- biotite/sequence/phylo/nj.cpython-312-darwin.so +0 -0
- biotite/sequence/phylo/tree.cpython-312-darwin.so +0 -0
- biotite/sequence/phylo/upgma.cpython-312-darwin.so +0 -0
- biotite/sequence/profile.py +86 -4
- biotite/sequence/seqtypes.py +124 -3
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +4 -3
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +110 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +171 -0
- biotite/structure/alphabet/unkerasify.py +122 -0
- biotite/structure/atoms.py +156 -43
- biotite/structure/bonds.cpython-312-darwin.so +0 -0
- biotite/structure/bonds.pyx +72 -21
- biotite/structure/celllist.cpython-312-darwin.so +0 -0
- biotite/structure/charges.cpython-312-darwin.so +0 -0
- biotite/structure/filter.py +1 -1
- biotite/structure/geometry.py +60 -113
- biotite/structure/info/__init__.py +1 -0
- biotite/structure/info/atoms.py +13 -13
- biotite/structure/info/bonds.py +12 -6
- biotite/structure/info/ccd.py +125 -32
- biotite/structure/info/{ccd/components.bcif → components.bcif} +0 -0
- biotite/structure/info/groups.py +63 -17
- biotite/structure/info/masses.py +9 -6
- biotite/structure/info/misc.py +15 -21
- biotite/structure/info/standardize.py +3 -2
- biotite/structure/io/mol/sdf.py +41 -40
- biotite/structure/io/pdb/convert.py +2 -0
- biotite/structure/io/pdb/file.py +74 -3
- biotite/structure/io/pdb/hybrid36.cpython-312-darwin.so +0 -0
- biotite/structure/io/pdbqt/file.py +32 -32
- biotite/structure/io/pdbx/__init__.py +1 -0
- biotite/structure/io/pdbx/bcif.py +32 -8
- biotite/structure/io/pdbx/cif.py +148 -107
- biotite/structure/io/pdbx/component.py +9 -4
- biotite/structure/io/pdbx/compress.py +321 -0
- biotite/structure/io/pdbx/convert.py +227 -68
- biotite/structure/io/pdbx/encoding.cpython-312-darwin.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +98 -17
- biotite/structure/io/trajfile.py +16 -16
- biotite/structure/molecules.py +141 -141
- biotite/structure/sasa.cpython-312-darwin.so +0 -0
- biotite/structure/segments.py +1 -2
- biotite/structure/util.py +73 -1
- biotite/version.py +2 -2
- {biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/METADATA +4 -1
- {biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/RECORD +88 -78
- biotite/structure/info/ccd/README.rst +0 -8
- biotite/structure/info/ccd/amino_acids.txt +0 -1663
- biotite/structure/info/ccd/carbohydrates.txt +0 -1135
- biotite/structure/info/ccd/nucleotides.txt +0 -798
- {biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/WHEEL +0 -0
- {biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/licenses/LICENSE.rst +0 -0
|
@@ -24,6 +24,10 @@ from biotite.structure.atoms import AtomArray, AtomArrayStack, repeat
|
|
|
24
24
|
from biotite.structure.bonds import BondList, BondType, connect_via_residue_names
|
|
25
25
|
from biotite.structure.box import unitcell_from_vectors, vectors_from_unitcell
|
|
26
26
|
from biotite.structure.error import BadStructureError
|
|
27
|
+
from biotite.structure.filter import _canonical_aa_list as canonical_aa_list
|
|
28
|
+
from biotite.structure.filter import (
|
|
29
|
+
_canonical_nucleotide_list as canonical_nucleotide_list,
|
|
30
|
+
)
|
|
27
31
|
from biotite.structure.filter import (
|
|
28
32
|
filter_first_altloc,
|
|
29
33
|
filter_highest_occupancy_altloc,
|
|
@@ -36,32 +40,38 @@ from biotite.structure.io.pdbx.bcif import (
|
|
|
36
40
|
from biotite.structure.io.pdbx.cif import CIFBlock, CIFFile
|
|
37
41
|
from biotite.structure.io.pdbx.component import MaskValue
|
|
38
42
|
from biotite.structure.io.pdbx.encoding import StringArrayEncoding
|
|
39
|
-
from biotite.structure.residues import
|
|
43
|
+
from biotite.structure.residues import (
|
|
44
|
+
get_residue_count,
|
|
45
|
+
get_residue_positions,
|
|
46
|
+
get_residue_starts_for,
|
|
47
|
+
)
|
|
40
48
|
from biotite.structure.util import matrix_rotate
|
|
41
49
|
|
|
42
|
-
#
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
"
|
|
47
|
-
"
|
|
48
|
-
"
|
|
49
|
-
"
|
|
50
|
-
"
|
|
51
|
-
"
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
"
|
|
57
|
-
"
|
|
58
|
-
"
|
|
59
|
-
"
|
|
50
|
+
# Bond types in `struct_conn` category that refer to covalent bonds
|
|
51
|
+
PDBX_BOND_TYPE_ID_TO_TYPE = {
|
|
52
|
+
# Although a covalent bond, could in theory have a higher bond order,
|
|
53
|
+
# practically inter-residue bonds are always single
|
|
54
|
+
"covale": BondType.SINGLE,
|
|
55
|
+
"covale_base": BondType.SINGLE,
|
|
56
|
+
"covale_phosphate": BondType.SINGLE,
|
|
57
|
+
"covale_sugar": BondType.SINGLE,
|
|
58
|
+
"disulf": BondType.SINGLE,
|
|
59
|
+
"modres": BondType.SINGLE,
|
|
60
|
+
"modres_link": BondType.SINGLE,
|
|
61
|
+
"metalc": BondType.COORDINATION,
|
|
62
|
+
}
|
|
63
|
+
PDBX_BOND_TYPE_TO_TYPE_ID = {
|
|
64
|
+
BondType.ANY: "covale",
|
|
65
|
+
BondType.SINGLE: "covale",
|
|
66
|
+
BondType.DOUBLE: "covale",
|
|
67
|
+
BondType.TRIPLE: "covale",
|
|
68
|
+
BondType.QUADRUPLE: "covale",
|
|
69
|
+
BondType.AROMATIC_SINGLE: "covale",
|
|
70
|
+
BondType.AROMATIC_DOUBLE: "covale",
|
|
71
|
+
BondType.AROMATIC_TRIPLE: "covale",
|
|
72
|
+
BondType.COORDINATION: "metalc",
|
|
60
73
|
}
|
|
61
|
-
# ...and vice versa
|
|
62
74
|
PDBX_BOND_TYPE_TO_ORDER = {
|
|
63
|
-
# 'ANY' is masked later, it is merely added here to avoid a KeyError
|
|
64
|
-
BondType.ANY: "",
|
|
65
75
|
BondType.SINGLE: "sing",
|
|
66
76
|
BondType.DOUBLE: "doub",
|
|
67
77
|
BondType.TRIPLE: "trip",
|
|
@@ -69,6 +79,9 @@ PDBX_BOND_TYPE_TO_ORDER = {
|
|
|
69
79
|
BondType.AROMATIC_SINGLE: "sing",
|
|
70
80
|
BondType.AROMATIC_DOUBLE: "doub",
|
|
71
81
|
BondType.AROMATIC_TRIPLE: "trip",
|
|
82
|
+
# These are masked later, it is merely added here to avoid a KeyError
|
|
83
|
+
BondType.ANY: "",
|
|
84
|
+
BondType.COORDINATION: "",
|
|
72
85
|
}
|
|
73
86
|
# Map 'chem_comp_bond' bond orders and aromaticity to 'BondType'...
|
|
74
87
|
COMP_BOND_ORDER_TO_TYPE = {
|
|
@@ -84,6 +97,7 @@ COMP_BOND_ORDER_TO_TYPE = {
|
|
|
84
97
|
COMP_BOND_TYPE_TO_ORDER = {
|
|
85
98
|
bond_type: order for order, bond_type in COMP_BOND_ORDER_TO_TYPE.items()
|
|
86
99
|
}
|
|
100
|
+
CANONICAL_RESIDUE_LIST = canonical_aa_list + canonical_nucleotide_list
|
|
87
101
|
|
|
88
102
|
_proteinseq_type_list = ["polypeptide(D)", "polypeptide(L)"]
|
|
89
103
|
_nucleotideseq_type_list = [
|
|
@@ -450,7 +464,7 @@ def _fill_annotations(array, atom_site, extra_fields, use_author_fields):
|
|
|
450
464
|
"chain_id",
|
|
451
465
|
_get_or_fallback(
|
|
452
466
|
atom_site, f"{prefix}_asym_id", f"{alt_prefix}_asym_id"
|
|
453
|
-
).as_array(
|
|
467
|
+
).as_array(str),
|
|
454
468
|
)
|
|
455
469
|
array.set_annotation(
|
|
456
470
|
"res_id",
|
|
@@ -458,33 +472,70 @@ def _fill_annotations(array, atom_site, extra_fields, use_author_fields):
|
|
|
458
472
|
atom_site, f"{prefix}_seq_id", f"{alt_prefix}_seq_id"
|
|
459
473
|
).as_array(int, -1),
|
|
460
474
|
)
|
|
461
|
-
array.set_annotation("ins_code", atom_site["pdbx_PDB_ins_code"].as_array(
|
|
475
|
+
array.set_annotation("ins_code", atom_site["pdbx_PDB_ins_code"].as_array(str, ""))
|
|
462
476
|
array.set_annotation(
|
|
463
477
|
"res_name",
|
|
464
478
|
_get_or_fallback(
|
|
465
479
|
atom_site, f"{prefix}_comp_id", f"{alt_prefix}_comp_id"
|
|
466
|
-
).as_array(
|
|
480
|
+
).as_array(str),
|
|
467
481
|
)
|
|
468
482
|
array.set_annotation("hetero", atom_site["group_PDB"].as_array(str) == "HETATM")
|
|
469
483
|
array.set_annotation(
|
|
470
484
|
"atom_name",
|
|
471
485
|
_get_or_fallback(
|
|
472
486
|
atom_site, f"{prefix}_atom_id", f"{alt_prefix}_atom_id"
|
|
473
|
-
).as_array(
|
|
487
|
+
).as_array(str),
|
|
474
488
|
)
|
|
475
|
-
array.set_annotation("element", atom_site["type_symbol"].as_array(
|
|
489
|
+
array.set_annotation("element", atom_site["type_symbol"].as_array(str))
|
|
476
490
|
|
|
477
491
|
if "atom_id" in extra_fields:
|
|
478
|
-
|
|
492
|
+
if "id" in atom_site:
|
|
493
|
+
array.set_annotation("atom_id", atom_site["id"].as_array(int))
|
|
494
|
+
else:
|
|
495
|
+
warnings.warn(
|
|
496
|
+
"Missing 'id' in 'atom_site' category. 'atom_id' generated automatically.",
|
|
497
|
+
UserWarning,
|
|
498
|
+
)
|
|
499
|
+
array.set_annotation("atom_id", np.arange(array.array_length()))
|
|
479
500
|
extra_fields.remove("atom_id")
|
|
480
501
|
if "b_factor" in extra_fields:
|
|
481
|
-
|
|
502
|
+
if "B_iso_or_equiv" in atom_site:
|
|
503
|
+
array.set_annotation(
|
|
504
|
+
"b_factor", atom_site["B_iso_or_equiv"].as_array(float)
|
|
505
|
+
)
|
|
506
|
+
else:
|
|
507
|
+
warnings.warn(
|
|
508
|
+
"Missing 'B_iso_or_equiv' in 'atom_site' category. 'b_factor' will be set to `nan`.",
|
|
509
|
+
UserWarning,
|
|
510
|
+
)
|
|
511
|
+
array.set_annotation("b_factor", np.full(array.array_length(), np.nan))
|
|
482
512
|
extra_fields.remove("b_factor")
|
|
483
513
|
if "occupancy" in extra_fields:
|
|
484
|
-
|
|
514
|
+
if "occupancy" in atom_site:
|
|
515
|
+
array.set_annotation("occupancy", atom_site["occupancy"].as_array(float))
|
|
516
|
+
else:
|
|
517
|
+
warnings.warn(
|
|
518
|
+
"Missing 'occupancy' in 'atom_site' category. 'occupancy' will be assumed to be 1.0",
|
|
519
|
+
UserWarning,
|
|
520
|
+
)
|
|
521
|
+
array.set_annotation(
|
|
522
|
+
"occupancy", np.ones(array.array_length(), dtype=float)
|
|
523
|
+
)
|
|
485
524
|
extra_fields.remove("occupancy")
|
|
486
525
|
if "charge" in extra_fields:
|
|
487
|
-
|
|
526
|
+
if "pdbx_formal_charge" in atom_site:
|
|
527
|
+
array.set_annotation(
|
|
528
|
+
"charge",
|
|
529
|
+
atom_site["pdbx_formal_charge"].as_array(
|
|
530
|
+
int, 0
|
|
531
|
+
), # masked values are set to 0
|
|
532
|
+
)
|
|
533
|
+
else:
|
|
534
|
+
warnings.warn(
|
|
535
|
+
"Missing 'pdbx_formal_charge' in 'atom_site' category. 'charge' will be set to 0",
|
|
536
|
+
UserWarning,
|
|
537
|
+
)
|
|
538
|
+
array.set_annotation("charge", np.zeros(array.array_length(), dtype=int))
|
|
488
539
|
extra_fields.remove("charge")
|
|
489
540
|
|
|
490
541
|
# Handle all remaining custom fields
|
|
@@ -536,7 +587,8 @@ def _parse_inter_residue_bonds(atom_site, struct_conn):
|
|
|
536
587
|
]
|
|
537
588
|
|
|
538
589
|
covale_mask = np.isin(
|
|
539
|
-
struct_conn["conn_type_id"].as_array(str),
|
|
590
|
+
struct_conn["conn_type_id"].as_array(str),
|
|
591
|
+
list(PDBX_BOND_TYPE_ID_TO_TYPE.keys()),
|
|
540
592
|
)
|
|
541
593
|
if "ptnr1_symmetry" in struct_conn:
|
|
542
594
|
covale_mask &= struct_conn["ptnr1_symmetry"].as_array(str, IDENTITY) == IDENTITY
|
|
@@ -576,13 +628,14 @@ def _parse_inter_residue_bonds(atom_site, struct_conn):
|
|
|
576
628
|
atoms_indices_1 = atoms_indices_1[mapping_exists_mask]
|
|
577
629
|
atoms_indices_2 = atoms_indices_2[mapping_exists_mask]
|
|
578
630
|
|
|
579
|
-
|
|
580
|
-
bond_order = struct_conn["pdbx_value_order"].as_array("U4", "")
|
|
631
|
+
bond_type_id = struct_conn["conn_type_id"].as_array()
|
|
581
632
|
# Consecutively apply the same masks as applied to the atom indices
|
|
582
633
|
# Logical combination does not work here,
|
|
583
634
|
# as the second mask was created based on already filtered data
|
|
584
|
-
|
|
585
|
-
|
|
635
|
+
bond_type_id = bond_type_id[covale_mask][mapping_exists_mask]
|
|
636
|
+
# The type ID is always present in the dictionary,
|
|
637
|
+
# as it was used to filter the applicable bonds
|
|
638
|
+
bond_types = [PDBX_BOND_TYPE_ID_TO_TYPE[type_id] for type_id in bond_type_id]
|
|
586
639
|
|
|
587
640
|
return BondList(
|
|
588
641
|
atom_site.row_count,
|
|
@@ -593,7 +646,7 @@ def _parse_inter_residue_bonds(atom_site, struct_conn):
|
|
|
593
646
|
def _find_matches(query_arrays, reference_arrays):
|
|
594
647
|
"""
|
|
595
648
|
For each index in the `query_arrays` find the indices in the
|
|
596
|
-
`reference_arrays` where all query values the reference counterpart.
|
|
649
|
+
`reference_arrays` where all query values match the reference counterpart.
|
|
597
650
|
If no match is found for a query, the corresponding index is -1.
|
|
598
651
|
"""
|
|
599
652
|
match_masks_for_all_columns = np.stack(
|
|
@@ -703,7 +756,13 @@ def _get_box(block):
|
|
|
703
756
|
return vectors_from_unitcell(len_a, len_b, len_c, alpha, beta, gamma)
|
|
704
757
|
|
|
705
758
|
|
|
706
|
-
def set_structure(
|
|
759
|
+
def set_structure(
|
|
760
|
+
pdbx_file,
|
|
761
|
+
array,
|
|
762
|
+
data_block=None,
|
|
763
|
+
include_bonds=False,
|
|
764
|
+
extra_fields=[],
|
|
765
|
+
):
|
|
707
766
|
"""
|
|
708
767
|
Set the ``atom_site`` category with atom information from an
|
|
709
768
|
:class:`AtomArray` or :class:`AtomArrayStack`.
|
|
@@ -737,6 +796,10 @@ def set_structure(pdbx_file, array, data_block=None, include_bonds=False):
|
|
|
737
796
|
category.
|
|
738
797
|
Inter-residue bonds will be written into the ``struct_conn``
|
|
739
798
|
independent of this parameter.
|
|
799
|
+
extra_fields : list of str, optional
|
|
800
|
+
List of additional fields from the ``atom_site`` category
|
|
801
|
+
that should be written into the file.
|
|
802
|
+
Default is an empty list.
|
|
740
803
|
|
|
741
804
|
Notes
|
|
742
805
|
-----
|
|
@@ -797,6 +860,32 @@ def set_structure(pdbx_file, array, data_block=None, include_bonds=False):
|
|
|
797
860
|
np.where(array.charge == 0, MaskValue.MISSING, MaskValue.PRESENT),
|
|
798
861
|
)
|
|
799
862
|
|
|
863
|
+
# Handle all remaining custom fields
|
|
864
|
+
if len(extra_fields) > 0:
|
|
865
|
+
# ... check to avoid clashes with standard annotations
|
|
866
|
+
_standard_annotations = [
|
|
867
|
+
"hetero",
|
|
868
|
+
"element",
|
|
869
|
+
"atom_name",
|
|
870
|
+
"res_name",
|
|
871
|
+
"chain_id",
|
|
872
|
+
"res_id",
|
|
873
|
+
"ins_code",
|
|
874
|
+
"atom_id",
|
|
875
|
+
"b_factor",
|
|
876
|
+
"occupancy",
|
|
877
|
+
"charge",
|
|
878
|
+
]
|
|
879
|
+
_reserved_annotation_names = list(atom_site.keys()) + _standard_annotations
|
|
880
|
+
|
|
881
|
+
for annot in extra_fields:
|
|
882
|
+
if annot in _reserved_annotation_names:
|
|
883
|
+
raise ValueError(
|
|
884
|
+
f"Annotation name '{annot}' is reserved and cannot be written to as extra field. "
|
|
885
|
+
"Please choose another name."
|
|
886
|
+
)
|
|
887
|
+
atom_site[annot] = np.copy(array.get_annotation(annot))
|
|
888
|
+
|
|
800
889
|
if array.bonds is not None:
|
|
801
890
|
struct_conn = _set_inter_residue_bonds(array, atom_site)
|
|
802
891
|
if struct_conn is not None:
|
|
@@ -964,25 +1053,38 @@ def _set_intra_residue_bonds(array, atom_site):
|
|
|
964
1053
|
aromatic_flag[i] = aromatic
|
|
965
1054
|
any_mask = bond_array[:, 2] == BondType.ANY
|
|
966
1055
|
|
|
967
|
-
|
|
1056
|
+
# Remove already existing residue and atom name combinations
|
|
1057
|
+
# These appear when the structure contains a residue multiple times
|
|
1058
|
+
atom_id_1 = array.atom_name[bond_array[:, 0]]
|
|
1059
|
+
atom_id_2 = array.atom_name[bond_array[:, 1]]
|
|
968
1060
|
# Take the residue name from the first atom index, as the residue
|
|
969
1061
|
# name is the same for both atoms, since we have only intra bonds
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
1062
|
+
comp_id = array.res_name[bond_array[:, 0]]
|
|
1063
|
+
_, unique_indices = np.unique(
|
|
1064
|
+
np.stack([comp_id, atom_id_1, atom_id_2], axis=-1), axis=0, return_index=True
|
|
1065
|
+
)
|
|
1066
|
+
unique_indices.sort()
|
|
1067
|
+
|
|
1068
|
+
chem_comp_bond = Category()
|
|
1069
|
+
n_bonds = len(unique_indices)
|
|
1070
|
+
chem_comp_bond["pdbx_ordinal"] = np.arange(1, n_bonds + 1, dtype=np.int32)
|
|
1071
|
+
chem_comp_bond["comp_id"] = comp_id[unique_indices]
|
|
1072
|
+
chem_comp_bond["atom_id_1"] = atom_id_1[unique_indices]
|
|
1073
|
+
chem_comp_bond["atom_id_2"] = atom_id_2[unique_indices]
|
|
973
1074
|
chem_comp_bond["value_order"] = Column(
|
|
974
|
-
value_order,
|
|
1075
|
+
value_order[unique_indices],
|
|
1076
|
+
np.where(any_mask[unique_indices], MaskValue.MISSING, MaskValue.PRESENT),
|
|
975
1077
|
)
|
|
976
1078
|
chem_comp_bond["pdbx_aromatic_flag"] = Column(
|
|
977
|
-
aromatic_flag,
|
|
1079
|
+
aromatic_flag[unique_indices],
|
|
1080
|
+
np.where(any_mask[unique_indices], MaskValue.MISSING, MaskValue.PRESENT),
|
|
978
1081
|
)
|
|
979
1082
|
# BondList does not contain stereo information
|
|
980
1083
|
# -> all values are missing
|
|
981
1084
|
chem_comp_bond["pdbx_stereo_config"] = Column(
|
|
982
|
-
np.zeros(
|
|
983
|
-
np.full(
|
|
1085
|
+
np.zeros(n_bonds, dtype="U1"),
|
|
1086
|
+
np.full(n_bonds, MaskValue.MISSING),
|
|
984
1087
|
)
|
|
985
|
-
chem_comp_bond["pdbx_ordinal"] = np.arange(1, len(bond_array) + 1, dtype=np.int32)
|
|
986
1088
|
return chem_comp_bond
|
|
987
1089
|
|
|
988
1090
|
|
|
@@ -1007,13 +1109,22 @@ def _set_inter_residue_bonds(array, atom_site):
|
|
|
1007
1109
|
bond_array = _filter_bonds(array, "inter")
|
|
1008
1110
|
if len(bond_array) == 0:
|
|
1009
1111
|
return None
|
|
1112
|
+
|
|
1113
|
+
# Filter out 'standard' links, i.e. backbone bonds between adjacent canonical
|
|
1114
|
+
# nucleotide/amino acid residues
|
|
1115
|
+
bond_array = bond_array[~_filter_canonical_links(array, bond_array)]
|
|
1116
|
+
if len(bond_array) == 0:
|
|
1117
|
+
return None
|
|
1118
|
+
|
|
1010
1119
|
struct_conn = Category()
|
|
1011
1120
|
struct_conn["id"] = np.arange(1, len(bond_array) + 1)
|
|
1012
|
-
struct_conn["conn_type_id"] =
|
|
1121
|
+
struct_conn["conn_type_id"] = [
|
|
1122
|
+
PDBX_BOND_TYPE_TO_TYPE_ID[btype] for btype in bond_array[:, 2]
|
|
1123
|
+
]
|
|
1013
1124
|
struct_conn["pdbx_value_order"] = Column(
|
|
1014
1125
|
np.array([PDBX_BOND_TYPE_TO_ORDER[btype] for btype in bond_array[:, 2]]),
|
|
1015
1126
|
np.where(
|
|
1016
|
-
bond_array[:, 2]
|
|
1127
|
+
np.isin(bond_array[:, 2], (BondType.ANY, BondType.COORDINATION)),
|
|
1017
1128
|
MaskValue.MISSING,
|
|
1018
1129
|
MaskValue.PRESENT,
|
|
1019
1130
|
),
|
|
@@ -1049,6 +1160,27 @@ def _filter_bonds(array, connection):
|
|
|
1049
1160
|
raise ValueError("Invalid 'connection' option")
|
|
1050
1161
|
|
|
1051
1162
|
|
|
1163
|
+
def _filter_canonical_links(array, bond_array):
|
|
1164
|
+
"""
|
|
1165
|
+
Filter out peptide bonds between adjacent canonical amino acid residues.
|
|
1166
|
+
"""
|
|
1167
|
+
# Get the residue index for each bonded atom
|
|
1168
|
+
residue_indices = get_residue_positions(array, bond_array[:, :2].flatten()).reshape(
|
|
1169
|
+
-1, 2
|
|
1170
|
+
)
|
|
1171
|
+
|
|
1172
|
+
return (
|
|
1173
|
+
# Must be canonical residues
|
|
1174
|
+
np.isin(array.res_name[bond_array[:, 0]], CANONICAL_RESIDUE_LIST) &
|
|
1175
|
+
np.isin(array.res_name[bond_array[:, 1]], CANONICAL_RESIDUE_LIST) &
|
|
1176
|
+
# Must be backbone bond
|
|
1177
|
+
np.isin(array.atom_name[bond_array[:, 0]], ("C", "O3'")) &
|
|
1178
|
+
np.isin(array.atom_name[bond_array[:, 1]], ("N", "P")) &
|
|
1179
|
+
# Must connect adjacent residues
|
|
1180
|
+
residue_indices[:, 1] - residue_indices[:, 0] == 1
|
|
1181
|
+
) # fmt: skip
|
|
1182
|
+
|
|
1183
|
+
|
|
1052
1184
|
def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=None):
|
|
1053
1185
|
"""
|
|
1054
1186
|
Create an :class:`AtomArray` for a chemical component from the
|
|
@@ -1135,12 +1267,11 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=Non
|
|
|
1135
1267
|
|
|
1136
1268
|
array = AtomArray(atom_category.row_count)
|
|
1137
1269
|
|
|
1138
|
-
array.hetero[
|
|
1139
|
-
array.res_name
|
|
1140
|
-
array.atom_name
|
|
1141
|
-
array.element
|
|
1142
|
-
array.
|
|
1143
|
-
array.charge = atom_category["charge"].as_array(int, 0)
|
|
1270
|
+
array.set_annotation("hetero", np.full(len(atom_category["comp_id"]), True))
|
|
1271
|
+
array.set_annotation("res_name", atom_category["comp_id"].as_array(str))
|
|
1272
|
+
array.set_annotation("atom_name", atom_category["atom_id"].as_array(str))
|
|
1273
|
+
array.set_annotation("element", atom_category["type_symbol"].as_array(str))
|
|
1274
|
+
array.set_annotation("charge", atom_category["charge"].as_array(int, 0))
|
|
1144
1275
|
|
|
1145
1276
|
coord_fields = [f"pdbx_model_Cartn_{dim}_ideal" for dim in ("x", "y", "z")]
|
|
1146
1277
|
alt_coord_fields = [f"model_Cartn_{dim}" for dim in ("x", "y", "z")]
|
|
@@ -1148,17 +1279,28 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=Non
|
|
|
1148
1279
|
# Swap with the fallback option
|
|
1149
1280
|
coord_fields, alt_coord_fields = alt_coord_fields, coord_fields
|
|
1150
1281
|
try:
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
|
|
1282
|
+
array.coord = _parse_component_coordinates(
|
|
1283
|
+
[atom_category[field] for field in coord_fields]
|
|
1284
|
+
)
|
|
1285
|
+
except Exception as err:
|
|
1286
|
+
if isinstance(err, KeyError):
|
|
1287
|
+
key = err.args[0]
|
|
1288
|
+
warnings.warn(
|
|
1289
|
+
f"Attribute '{key}' not found within 'chem_comp_atom' category. "
|
|
1290
|
+
f"The fallback coordinates will be used instead",
|
|
1291
|
+
UserWarning,
|
|
1292
|
+
)
|
|
1293
|
+
elif isinstance(err, ValueError):
|
|
1294
|
+
warnings.warn(
|
|
1295
|
+
"The coordinates are missing for some atoms. "
|
|
1296
|
+
"The fallback coordinates will be used instead",
|
|
1297
|
+
UserWarning,
|
|
1298
|
+
)
|
|
1299
|
+
else:
|
|
1300
|
+
raise
|
|
1301
|
+
array.coord = _parse_component_coordinates(
|
|
1302
|
+
[atom_category[field] for field in alt_coord_fields]
|
|
1159
1303
|
)
|
|
1160
|
-
for i, field in enumerate(alt_coord_fields):
|
|
1161
|
-
array.coord[:, i] = atom_category[field].as_array(np.float32)
|
|
1162
1304
|
|
|
1163
1305
|
try:
|
|
1164
1306
|
bond_category = block["chem_comp_bond"]
|
|
@@ -1188,6 +1330,17 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=Non
|
|
|
1188
1330
|
return array
|
|
1189
1331
|
|
|
1190
1332
|
|
|
1333
|
+
def _parse_component_coordinates(coord_columns):
|
|
1334
|
+
coord = np.zeros((len(coord_columns[0]), 3), dtype=np.float32)
|
|
1335
|
+
for i, column in enumerate(coord_columns):
|
|
1336
|
+
if column.mask is not None and column.mask.array.any():
|
|
1337
|
+
raise ValueError(
|
|
1338
|
+
"Missing coordinates for some atoms",
|
|
1339
|
+
)
|
|
1340
|
+
coord[:, i] = column.as_array(np.float32)
|
|
1341
|
+
return coord
|
|
1342
|
+
|
|
1343
|
+
|
|
1191
1344
|
def set_component(pdbx_file, array, data_block=None):
|
|
1192
1345
|
"""
|
|
1193
1346
|
Set the ``chem_comp_atom`` and, if bonds are available,
|
|
@@ -1404,7 +1557,10 @@ def get_assembly(
|
|
|
1404
1557
|
Returns
|
|
1405
1558
|
-------
|
|
1406
1559
|
assembly : AtomArray or AtomArrayStack
|
|
1407
|
-
The assembly.
|
|
1560
|
+
The assembly.
|
|
1561
|
+
The return type depends on the `model` parameter.
|
|
1562
|
+
Contains the `sym_id` annotation, which enumerates the copies of the asymmetric
|
|
1563
|
+
unit in the assembly.
|
|
1408
1564
|
|
|
1409
1565
|
Examples
|
|
1410
1566
|
--------
|
|
@@ -1493,7 +1649,6 @@ def _apply_transformations(structure, transformation_dict, operations):
|
|
|
1493
1649
|
"""
|
|
1494
1650
|
# Additional first dimesion for 'structure.repeat()'
|
|
1495
1651
|
assembly_coord = np.zeros((len(operations),) + structure.coord.shape)
|
|
1496
|
-
|
|
1497
1652
|
# Apply corresponding transformation for each copy in the assembly
|
|
1498
1653
|
for i, operation in enumerate(operations):
|
|
1499
1654
|
coord = structure.coord
|
|
@@ -1507,7 +1662,11 @@ def _apply_transformations(structure, transformation_dict, operations):
|
|
|
1507
1662
|
coord += translation_vector
|
|
1508
1663
|
assembly_coord[i] = coord
|
|
1509
1664
|
|
|
1510
|
-
|
|
1665
|
+
assembly = repeat(structure, assembly_coord)
|
|
1666
|
+
assembly.set_annotation(
|
|
1667
|
+
"sym_id", np.repeat(np.arange(len(operations)), structure.array_length())
|
|
1668
|
+
)
|
|
1669
|
+
return assembly
|
|
1511
1670
|
|
|
1512
1671
|
|
|
1513
1672
|
def _get_transformations(struct_oper):
|
|
Binary file
|