biotite 1.0.1__cp312-cp312-win_amd64.whl → 1.1.0__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/application/dssp/app.py +13 -3
- biotite/application/localapp.py +34 -0
- biotite/application/muscle/app3.py +2 -15
- biotite/application/muscle/app5.py +2 -2
- biotite/application/util.py +1 -1
- biotite/application/viennarna/rnaplot.py +6 -2
- biotite/database/rcsb/query.py +6 -6
- biotite/database/uniprot/check.py +20 -15
- biotite/database/uniprot/download.py +1 -1
- biotite/database/uniprot/query.py +1 -1
- biotite/sequence/align/alignment.py +16 -3
- biotite/sequence/align/banded.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/banded.pyx +5 -5
- biotite/sequence/align/kmeralphabet.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/kmeralphabet.pyx +17 -0
- biotite/sequence/align/kmersimilarity.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.pyx +52 -42
- biotite/sequence/align/localgapped.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/localungapped.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/matrix.py +273 -55
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/multiple.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/pairwise.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/permutation.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/selector.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/tracetable.cp312-win_amd64.pyd +0 -0
- biotite/sequence/alphabet.py +3 -0
- biotite/sequence/codec.cp312-win_amd64.pyd +0 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +2 -1
- biotite/sequence/graphics/colorschemes.py +44 -11
- biotite/sequence/phylo/nj.cp312-win_amd64.pyd +0 -0
- biotite/sequence/phylo/tree.cp312-win_amd64.pyd +0 -0
- biotite/sequence/phylo/upgma.cp312-win_amd64.pyd +0 -0
- biotite/sequence/profile.py +86 -4
- biotite/sequence/seqtypes.py +124 -3
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +4 -3
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +110 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +171 -0
- biotite/structure/alphabet/unkerasify.py +122 -0
- biotite/structure/atoms.py +129 -40
- biotite/structure/bonds.cp312-win_amd64.pyd +0 -0
- biotite/structure/bonds.pyx +72 -21
- biotite/structure/celllist.cp312-win_amd64.pyd +0 -0
- biotite/structure/charges.cp312-win_amd64.pyd +0 -0
- biotite/structure/geometry.py +60 -113
- biotite/structure/info/__init__.py +1 -0
- biotite/structure/info/atoms.py +13 -13
- biotite/structure/info/bonds.py +12 -6
- biotite/structure/info/ccd.py +125 -32
- biotite/structure/info/{ccd/components.bcif → components.bcif} +0 -0
- biotite/structure/info/groups.py +63 -17
- biotite/structure/info/masses.py +9 -6
- biotite/structure/info/misc.py +15 -21
- biotite/structure/info/standardize.py +3 -2
- biotite/structure/io/mol/sdf.py +41 -40
- biotite/structure/io/pdb/convert.py +2 -0
- biotite/structure/io/pdb/file.py +74 -3
- biotite/structure/io/pdb/hybrid36.cp312-win_amd64.pyd +0 -0
- biotite/structure/io/pdbqt/file.py +32 -32
- biotite/structure/io/pdbx/__init__.py +1 -0
- biotite/structure/io/pdbx/bcif.py +32 -8
- biotite/structure/io/pdbx/cif.py +72 -59
- biotite/structure/io/pdbx/component.py +9 -4
- biotite/structure/io/pdbx/compress.py +321 -0
- biotite/structure/io/pdbx/convert.py +194 -48
- biotite/structure/io/pdbx/encoding.cp312-win_amd64.pyd +0 -0
- biotite/structure/io/pdbx/encoding.pyx +98 -17
- biotite/structure/molecules.py +141 -141
- biotite/structure/sasa.cp312-win_amd64.pyd +0 -0
- biotite/structure/segments.py +1 -2
- biotite/structure/util.py +73 -1
- biotite/version.py +2 -2
- {biotite-1.0.1.dist-info → biotite-1.1.0.dist-info}/METADATA +3 -1
- {biotite-1.0.1.dist-info → biotite-1.1.0.dist-info}/RECORD +86 -76
- biotite/structure/info/ccd/README.rst +0 -8
- biotite/structure/info/ccd/amino_acids.txt +0 -1663
- biotite/structure/info/ccd/carbohydrates.txt +0 -1135
- biotite/structure/info/ccd/nucleotides.txt +0 -798
- {biotite-1.0.1.dist-info → biotite-1.1.0.dist-info}/WHEEL +0 -0
- {biotite-1.0.1.dist-info → biotite-1.1.0.dist-info}/licenses/LICENSE.rst +0 -0
|
@@ -24,6 +24,10 @@ from biotite.structure.atoms import AtomArray, AtomArrayStack, repeat
|
|
|
24
24
|
from biotite.structure.bonds import BondList, BondType, connect_via_residue_names
|
|
25
25
|
from biotite.structure.box import unitcell_from_vectors, vectors_from_unitcell
|
|
26
26
|
from biotite.structure.error import BadStructureError
|
|
27
|
+
from biotite.structure.filter import _canonical_aa_list as canonical_aa_list
|
|
28
|
+
from biotite.structure.filter import (
|
|
29
|
+
_canonical_nucleotide_list as canonical_nucleotide_list,
|
|
30
|
+
)
|
|
27
31
|
from biotite.structure.filter import (
|
|
28
32
|
filter_first_altloc,
|
|
29
33
|
filter_highest_occupancy_altloc,
|
|
@@ -36,32 +40,38 @@ from biotite.structure.io.pdbx.bcif import (
|
|
|
36
40
|
from biotite.structure.io.pdbx.cif import CIFBlock, CIFFile
|
|
37
41
|
from biotite.structure.io.pdbx.component import MaskValue
|
|
38
42
|
from biotite.structure.io.pdbx.encoding import StringArrayEncoding
|
|
39
|
-
from biotite.structure.residues import
|
|
43
|
+
from biotite.structure.residues import (
|
|
44
|
+
get_residue_count,
|
|
45
|
+
get_residue_positions,
|
|
46
|
+
get_residue_starts_for,
|
|
47
|
+
)
|
|
40
48
|
from biotite.structure.util import matrix_rotate
|
|
41
49
|
|
|
42
|
-
#
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
"
|
|
47
|
-
"
|
|
48
|
-
"
|
|
49
|
-
"
|
|
50
|
-
"
|
|
51
|
-
"
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
"
|
|
57
|
-
"
|
|
58
|
-
"
|
|
59
|
-
"
|
|
50
|
+
# Bond types in `struct_conn` category that refer to covalent bonds
|
|
51
|
+
PDBX_BOND_TYPE_ID_TO_TYPE = {
|
|
52
|
+
# Although a covalent bond, could in theory have a higher bond order,
|
|
53
|
+
# practically inter-residue bonds are always single
|
|
54
|
+
"covale": BondType.SINGLE,
|
|
55
|
+
"covale_base": BondType.SINGLE,
|
|
56
|
+
"covale_phosphate": BondType.SINGLE,
|
|
57
|
+
"covale_sugar": BondType.SINGLE,
|
|
58
|
+
"disulf": BondType.SINGLE,
|
|
59
|
+
"modres": BondType.SINGLE,
|
|
60
|
+
"modres_link": BondType.SINGLE,
|
|
61
|
+
"metalc": BondType.COORDINATION,
|
|
62
|
+
}
|
|
63
|
+
PDBX_BOND_TYPE_TO_TYPE_ID = {
|
|
64
|
+
BondType.ANY: "covale",
|
|
65
|
+
BondType.SINGLE: "covale",
|
|
66
|
+
BondType.DOUBLE: "covale",
|
|
67
|
+
BondType.TRIPLE: "covale",
|
|
68
|
+
BondType.QUADRUPLE: "covale",
|
|
69
|
+
BondType.AROMATIC_SINGLE: "covale",
|
|
70
|
+
BondType.AROMATIC_DOUBLE: "covale",
|
|
71
|
+
BondType.AROMATIC_TRIPLE: "covale",
|
|
72
|
+
BondType.COORDINATION: "metalc",
|
|
60
73
|
}
|
|
61
|
-
# ...and vice versa
|
|
62
74
|
PDBX_BOND_TYPE_TO_ORDER = {
|
|
63
|
-
# 'ANY' is masked later, it is merely added here to avoid a KeyError
|
|
64
|
-
BondType.ANY: "",
|
|
65
75
|
BondType.SINGLE: "sing",
|
|
66
76
|
BondType.DOUBLE: "doub",
|
|
67
77
|
BondType.TRIPLE: "trip",
|
|
@@ -69,6 +79,9 @@ PDBX_BOND_TYPE_TO_ORDER = {
|
|
|
69
79
|
BondType.AROMATIC_SINGLE: "sing",
|
|
70
80
|
BondType.AROMATIC_DOUBLE: "doub",
|
|
71
81
|
BondType.AROMATIC_TRIPLE: "trip",
|
|
82
|
+
# These are masked later, it is merely added here to avoid a KeyError
|
|
83
|
+
BondType.ANY: "",
|
|
84
|
+
BondType.COORDINATION: "",
|
|
72
85
|
}
|
|
73
86
|
# Map 'chem_comp_bond' bond orders and aromaticity to 'BondType'...
|
|
74
87
|
COMP_BOND_ORDER_TO_TYPE = {
|
|
@@ -84,6 +97,7 @@ COMP_BOND_ORDER_TO_TYPE = {
|
|
|
84
97
|
COMP_BOND_TYPE_TO_ORDER = {
|
|
85
98
|
bond_type: order for order, bond_type in COMP_BOND_ORDER_TO_TYPE.items()
|
|
86
99
|
}
|
|
100
|
+
CANONICAL_RESIDUE_LIST = canonical_aa_list + canonical_nucleotide_list
|
|
87
101
|
|
|
88
102
|
_proteinseq_type_list = ["polypeptide(D)", "polypeptide(L)"]
|
|
89
103
|
_nucleotideseq_type_list = [
|
|
@@ -475,16 +489,53 @@ def _fill_annotations(array, atom_site, extra_fields, use_author_fields):
|
|
|
475
489
|
array.set_annotation("element", atom_site["type_symbol"].as_array(str))
|
|
476
490
|
|
|
477
491
|
if "atom_id" in extra_fields:
|
|
478
|
-
|
|
492
|
+
if "id" in atom_site:
|
|
493
|
+
array.set_annotation("atom_id", atom_site["id"].as_array(int))
|
|
494
|
+
else:
|
|
495
|
+
warnings.warn(
|
|
496
|
+
"Missing 'id' in 'atom_site' category. 'atom_id' generated automatically.",
|
|
497
|
+
UserWarning,
|
|
498
|
+
)
|
|
499
|
+
array.set_annotation("atom_id", np.arange(array.array_length()))
|
|
479
500
|
extra_fields.remove("atom_id")
|
|
480
501
|
if "b_factor" in extra_fields:
|
|
481
|
-
|
|
502
|
+
if "B_iso_or_equiv" in atom_site:
|
|
503
|
+
array.set_annotation(
|
|
504
|
+
"b_factor", atom_site["B_iso_or_equiv"].as_array(float)
|
|
505
|
+
)
|
|
506
|
+
else:
|
|
507
|
+
warnings.warn(
|
|
508
|
+
"Missing 'B_iso_or_equiv' in 'atom_site' category. 'b_factor' will be set to `nan`.",
|
|
509
|
+
UserWarning,
|
|
510
|
+
)
|
|
511
|
+
array.set_annotation("b_factor", np.full(array.array_length(), np.nan))
|
|
482
512
|
extra_fields.remove("b_factor")
|
|
483
513
|
if "occupancy" in extra_fields:
|
|
484
|
-
|
|
514
|
+
if "occupancy" in atom_site:
|
|
515
|
+
array.set_annotation("occupancy", atom_site["occupancy"].as_array(float))
|
|
516
|
+
else:
|
|
517
|
+
warnings.warn(
|
|
518
|
+
"Missing 'occupancy' in 'atom_site' category. 'occupancy' will be assumed to be 1.0",
|
|
519
|
+
UserWarning,
|
|
520
|
+
)
|
|
521
|
+
array.set_annotation(
|
|
522
|
+
"occupancy", np.ones(array.array_length(), dtype=float)
|
|
523
|
+
)
|
|
485
524
|
extra_fields.remove("occupancy")
|
|
486
525
|
if "charge" in extra_fields:
|
|
487
|
-
|
|
526
|
+
if "pdbx_formal_charge" in atom_site:
|
|
527
|
+
array.set_annotation(
|
|
528
|
+
"charge",
|
|
529
|
+
atom_site["pdbx_formal_charge"].as_array(
|
|
530
|
+
int, 0
|
|
531
|
+
), # masked values are set to 0
|
|
532
|
+
)
|
|
533
|
+
else:
|
|
534
|
+
warnings.warn(
|
|
535
|
+
"Missing 'pdbx_formal_charge' in 'atom_site' category. 'charge' will be set to 0",
|
|
536
|
+
UserWarning,
|
|
537
|
+
)
|
|
538
|
+
array.set_annotation("charge", np.zeros(array.array_length(), dtype=int))
|
|
488
539
|
extra_fields.remove("charge")
|
|
489
540
|
|
|
490
541
|
# Handle all remaining custom fields
|
|
@@ -536,7 +587,8 @@ def _parse_inter_residue_bonds(atom_site, struct_conn):
|
|
|
536
587
|
]
|
|
537
588
|
|
|
538
589
|
covale_mask = np.isin(
|
|
539
|
-
struct_conn["conn_type_id"].as_array(str),
|
|
590
|
+
struct_conn["conn_type_id"].as_array(str),
|
|
591
|
+
list(PDBX_BOND_TYPE_ID_TO_TYPE.keys()),
|
|
540
592
|
)
|
|
541
593
|
if "ptnr1_symmetry" in struct_conn:
|
|
542
594
|
covale_mask &= struct_conn["ptnr1_symmetry"].as_array(str, IDENTITY) == IDENTITY
|
|
@@ -576,13 +628,14 @@ def _parse_inter_residue_bonds(atom_site, struct_conn):
|
|
|
576
628
|
atoms_indices_1 = atoms_indices_1[mapping_exists_mask]
|
|
577
629
|
atoms_indices_2 = atoms_indices_2[mapping_exists_mask]
|
|
578
630
|
|
|
579
|
-
|
|
580
|
-
bond_order = struct_conn["pdbx_value_order"].as_array(str, "")
|
|
631
|
+
bond_type_id = struct_conn["conn_type_id"].as_array()
|
|
581
632
|
# Consecutively apply the same masks as applied to the atom indices
|
|
582
633
|
# Logical combination does not work here,
|
|
583
634
|
# as the second mask was created based on already filtered data
|
|
584
|
-
|
|
585
|
-
|
|
635
|
+
bond_type_id = bond_type_id[covale_mask][mapping_exists_mask]
|
|
636
|
+
# The type ID is always present in the dictionary,
|
|
637
|
+
# as it was used to filter the applicable bonds
|
|
638
|
+
bond_types = [PDBX_BOND_TYPE_ID_TO_TYPE[type_id] for type_id in bond_type_id]
|
|
586
639
|
|
|
587
640
|
return BondList(
|
|
588
641
|
atom_site.row_count,
|
|
@@ -593,7 +646,7 @@ def _parse_inter_residue_bonds(atom_site, struct_conn):
|
|
|
593
646
|
def _find_matches(query_arrays, reference_arrays):
|
|
594
647
|
"""
|
|
595
648
|
For each index in the `query_arrays` find the indices in the
|
|
596
|
-
`reference_arrays` where all query values the reference counterpart.
|
|
649
|
+
`reference_arrays` where all query values match the reference counterpart.
|
|
597
650
|
If no match is found for a query, the corresponding index is -1.
|
|
598
651
|
"""
|
|
599
652
|
match_masks_for_all_columns = np.stack(
|
|
@@ -703,7 +756,13 @@ def _get_box(block):
|
|
|
703
756
|
return vectors_from_unitcell(len_a, len_b, len_c, alpha, beta, gamma)
|
|
704
757
|
|
|
705
758
|
|
|
706
|
-
def set_structure(
|
|
759
|
+
def set_structure(
|
|
760
|
+
pdbx_file,
|
|
761
|
+
array,
|
|
762
|
+
data_block=None,
|
|
763
|
+
include_bonds=False,
|
|
764
|
+
extra_fields=[],
|
|
765
|
+
):
|
|
707
766
|
"""
|
|
708
767
|
Set the ``atom_site`` category with atom information from an
|
|
709
768
|
:class:`AtomArray` or :class:`AtomArrayStack`.
|
|
@@ -737,6 +796,10 @@ def set_structure(pdbx_file, array, data_block=None, include_bonds=False):
|
|
|
737
796
|
category.
|
|
738
797
|
Inter-residue bonds will be written into the ``struct_conn``
|
|
739
798
|
independent of this parameter.
|
|
799
|
+
extra_fields : list of str, optional
|
|
800
|
+
List of additional fields from the ``atom_site`` category
|
|
801
|
+
that should be written into the file.
|
|
802
|
+
Default is an empty list.
|
|
740
803
|
|
|
741
804
|
Notes
|
|
742
805
|
-----
|
|
@@ -797,6 +860,32 @@ def set_structure(pdbx_file, array, data_block=None, include_bonds=False):
|
|
|
797
860
|
np.where(array.charge == 0, MaskValue.MISSING, MaskValue.PRESENT),
|
|
798
861
|
)
|
|
799
862
|
|
|
863
|
+
# Handle all remaining custom fields
|
|
864
|
+
if len(extra_fields) > 0:
|
|
865
|
+
# ... check to avoid clashes with standard annotations
|
|
866
|
+
_standard_annotations = [
|
|
867
|
+
"hetero",
|
|
868
|
+
"element",
|
|
869
|
+
"atom_name",
|
|
870
|
+
"res_name",
|
|
871
|
+
"chain_id",
|
|
872
|
+
"res_id",
|
|
873
|
+
"ins_code",
|
|
874
|
+
"atom_id",
|
|
875
|
+
"b_factor",
|
|
876
|
+
"occupancy",
|
|
877
|
+
"charge",
|
|
878
|
+
]
|
|
879
|
+
_reserved_annotation_names = list(atom_site.keys()) + _standard_annotations
|
|
880
|
+
|
|
881
|
+
for annot in extra_fields:
|
|
882
|
+
if annot in _reserved_annotation_names:
|
|
883
|
+
raise ValueError(
|
|
884
|
+
f"Annotation name '{annot}' is reserved and cannot be written to as extra field. "
|
|
885
|
+
"Please choose another name."
|
|
886
|
+
)
|
|
887
|
+
atom_site[annot] = np.copy(array.get_annotation(annot))
|
|
888
|
+
|
|
800
889
|
if array.bonds is not None:
|
|
801
890
|
struct_conn = _set_inter_residue_bonds(array, atom_site)
|
|
802
891
|
if struct_conn is not None:
|
|
@@ -1021,13 +1110,21 @@ def _set_inter_residue_bonds(array, atom_site):
|
|
|
1021
1110
|
if len(bond_array) == 0:
|
|
1022
1111
|
return None
|
|
1023
1112
|
|
|
1113
|
+
# Filter out 'standard' links, i.e. backbone bonds between adjacent canonical
|
|
1114
|
+
# nucleotide/amino acid residues
|
|
1115
|
+
bond_array = bond_array[~_filter_canonical_links(array, bond_array)]
|
|
1116
|
+
if len(bond_array) == 0:
|
|
1117
|
+
return None
|
|
1118
|
+
|
|
1024
1119
|
struct_conn = Category()
|
|
1025
1120
|
struct_conn["id"] = np.arange(1, len(bond_array) + 1)
|
|
1026
|
-
struct_conn["conn_type_id"] =
|
|
1121
|
+
struct_conn["conn_type_id"] = [
|
|
1122
|
+
PDBX_BOND_TYPE_TO_TYPE_ID[btype] for btype in bond_array[:, 2]
|
|
1123
|
+
]
|
|
1027
1124
|
struct_conn["pdbx_value_order"] = Column(
|
|
1028
1125
|
np.array([PDBX_BOND_TYPE_TO_ORDER[btype] for btype in bond_array[:, 2]]),
|
|
1029
1126
|
np.where(
|
|
1030
|
-
bond_array[:, 2]
|
|
1127
|
+
np.isin(bond_array[:, 2], (BondType.ANY, BondType.COORDINATION)),
|
|
1031
1128
|
MaskValue.MISSING,
|
|
1032
1129
|
MaskValue.PRESENT,
|
|
1033
1130
|
),
|
|
@@ -1063,6 +1160,27 @@ def _filter_bonds(array, connection):
|
|
|
1063
1160
|
raise ValueError("Invalid 'connection' option")
|
|
1064
1161
|
|
|
1065
1162
|
|
|
1163
|
+
def _filter_canonical_links(array, bond_array):
|
|
1164
|
+
"""
|
|
1165
|
+
Filter out peptide bonds between adjacent canonical amino acid residues.
|
|
1166
|
+
"""
|
|
1167
|
+
# Get the residue index for each bonded atom
|
|
1168
|
+
residue_indices = get_residue_positions(array, bond_array[:, :2].flatten()).reshape(
|
|
1169
|
+
-1, 2
|
|
1170
|
+
)
|
|
1171
|
+
|
|
1172
|
+
return (
|
|
1173
|
+
# Must be canonical residues
|
|
1174
|
+
np.isin(array.res_name[bond_array[:, 0]], CANONICAL_RESIDUE_LIST) &
|
|
1175
|
+
np.isin(array.res_name[bond_array[:, 1]], CANONICAL_RESIDUE_LIST) &
|
|
1176
|
+
# Must be backbone bond
|
|
1177
|
+
np.isin(array.atom_name[bond_array[:, 0]], ("C", "O3'")) &
|
|
1178
|
+
np.isin(array.atom_name[bond_array[:, 1]], ("N", "P")) &
|
|
1179
|
+
# Must connect adjacent residues
|
|
1180
|
+
residue_indices[:, 1] - residue_indices[:, 0] == 1
|
|
1181
|
+
) # fmt: skip
|
|
1182
|
+
|
|
1183
|
+
|
|
1066
1184
|
def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=None):
|
|
1067
1185
|
"""
|
|
1068
1186
|
Create an :class:`AtomArray` for a chemical component from the
|
|
@@ -1161,17 +1279,28 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=Non
|
|
|
1161
1279
|
# Swap with the fallback option
|
|
1162
1280
|
coord_fields, alt_coord_fields = alt_coord_fields, coord_fields
|
|
1163
1281
|
try:
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1282
|
+
array.coord = _parse_component_coordinates(
|
|
1283
|
+
[atom_category[field] for field in coord_fields]
|
|
1284
|
+
)
|
|
1285
|
+
except Exception as err:
|
|
1286
|
+
if isinstance(err, KeyError):
|
|
1287
|
+
key = err.args[0]
|
|
1288
|
+
warnings.warn(
|
|
1289
|
+
f"Attribute '{key}' not found within 'chem_comp_atom' category. "
|
|
1290
|
+
f"The fallback coordinates will be used instead",
|
|
1291
|
+
UserWarning,
|
|
1292
|
+
)
|
|
1293
|
+
elif isinstance(err, ValueError):
|
|
1294
|
+
warnings.warn(
|
|
1295
|
+
"The coordinates are missing for some atoms. "
|
|
1296
|
+
"The fallback coordinates will be used instead",
|
|
1297
|
+
UserWarning,
|
|
1298
|
+
)
|
|
1299
|
+
else:
|
|
1300
|
+
raise
|
|
1301
|
+
array.coord = _parse_component_coordinates(
|
|
1302
|
+
[atom_category[field] for field in alt_coord_fields]
|
|
1172
1303
|
)
|
|
1173
|
-
for i, field in enumerate(alt_coord_fields):
|
|
1174
|
-
array.coord[:, i] = atom_category[field].as_array(np.float32)
|
|
1175
1304
|
|
|
1176
1305
|
try:
|
|
1177
1306
|
bond_category = block["chem_comp_bond"]
|
|
@@ -1201,6 +1330,17 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=Non
|
|
|
1201
1330
|
return array
|
|
1202
1331
|
|
|
1203
1332
|
|
|
1333
|
+
def _parse_component_coordinates(coord_columns):
|
|
1334
|
+
coord = np.zeros((len(coord_columns[0]), 3), dtype=np.float32)
|
|
1335
|
+
for i, column in enumerate(coord_columns):
|
|
1336
|
+
if column.mask is not None and column.mask.array.any():
|
|
1337
|
+
raise ValueError(
|
|
1338
|
+
"Missing coordinates for some atoms",
|
|
1339
|
+
)
|
|
1340
|
+
coord[:, i] = column.as_array(np.float32)
|
|
1341
|
+
return coord
|
|
1342
|
+
|
|
1343
|
+
|
|
1204
1344
|
def set_component(pdbx_file, array, data_block=None):
|
|
1205
1345
|
"""
|
|
1206
1346
|
Set the ``chem_comp_atom`` and, if bonds are available,
|
|
@@ -1417,7 +1557,10 @@ def get_assembly(
|
|
|
1417
1557
|
Returns
|
|
1418
1558
|
-------
|
|
1419
1559
|
assembly : AtomArray or AtomArrayStack
|
|
1420
|
-
The assembly.
|
|
1560
|
+
The assembly.
|
|
1561
|
+
The return type depends on the `model` parameter.
|
|
1562
|
+
Contains the `sym_id` annotation, which enumerates the copies of the asymmetric
|
|
1563
|
+
unit in the assembly.
|
|
1421
1564
|
|
|
1422
1565
|
Examples
|
|
1423
1566
|
--------
|
|
@@ -1506,7 +1649,6 @@ def _apply_transformations(structure, transformation_dict, operations):
|
|
|
1506
1649
|
"""
|
|
1507
1650
|
# Additional first dimesion for 'structure.repeat()'
|
|
1508
1651
|
assembly_coord = np.zeros((len(operations),) + structure.coord.shape)
|
|
1509
|
-
|
|
1510
1652
|
# Apply corresponding transformation for each copy in the assembly
|
|
1511
1653
|
for i, operation in enumerate(operations):
|
|
1512
1654
|
coord = structure.coord
|
|
@@ -1520,7 +1662,11 @@ def _apply_transformations(structure, transformation_dict, operations):
|
|
|
1520
1662
|
coord += translation_vector
|
|
1521
1663
|
assembly_coord[i] = coord
|
|
1522
1664
|
|
|
1523
|
-
|
|
1665
|
+
assembly = repeat(structure, assembly_coord)
|
|
1666
|
+
assembly.set_annotation(
|
|
1667
|
+
"sym_id", np.repeat(np.arange(len(operations)), structure.array_length())
|
|
1668
|
+
)
|
|
1669
|
+
return assembly
|
|
1524
1670
|
|
|
1525
1671
|
|
|
1526
1672
|
def _get_transformations(struct_oper):
|
|
Binary file
|
|
@@ -287,7 +287,8 @@ class FixedPointEncoding(Encoding):
|
|
|
287
287
|
The data type of the array to be encoded.
|
|
288
288
|
Either a NumPy dtype or a *BinaryCIF* type code is accepted.
|
|
289
289
|
The dtype must be a float type.
|
|
290
|
-
If omitted,
|
|
290
|
+
If omitted, the data type is taken from the data the
|
|
291
|
+
first time :meth:`encode()` is called.
|
|
291
292
|
|
|
292
293
|
Attributes
|
|
293
294
|
----------
|
|
@@ -304,7 +305,7 @@ class FixedPointEncoding(Encoding):
|
|
|
304
305
|
[987 654]
|
|
305
306
|
"""
|
|
306
307
|
factor: ...
|
|
307
|
-
src_type: ... =
|
|
308
|
+
src_type: ... = None
|
|
308
309
|
|
|
309
310
|
def __post_init__(self):
|
|
310
311
|
if self.src_type is not None:
|
|
@@ -315,6 +316,14 @@ class FixedPointEncoding(Encoding):
|
|
|
315
316
|
)
|
|
316
317
|
|
|
317
318
|
def encode(self, data):
|
|
319
|
+
# If not given in constructor, it is determined from the data
|
|
320
|
+
if self.src_type is None:
|
|
321
|
+
self.src_type = TypeCode.from_dtype(data.dtype)
|
|
322
|
+
if self.src_type not in (TypeCode.FLOAT32, TypeCode.FLOAT64):
|
|
323
|
+
raise ValueError(
|
|
324
|
+
"Only floating point types are supported"
|
|
325
|
+
)
|
|
326
|
+
|
|
318
327
|
# Round to avoid wrong values due to floating point inaccuracies
|
|
319
328
|
return np.round(data * self.factor).astype(np.int32)
|
|
320
329
|
|
|
@@ -340,7 +349,8 @@ class IntervalQuantizationEncoding(Encoding):
|
|
|
340
349
|
The data type of the array to be encoded.
|
|
341
350
|
Either a NumPy dtype or a *BinaryCIF* type code is accepted.
|
|
342
351
|
The dtype must be a float type.
|
|
343
|
-
If omitted,
|
|
352
|
+
If omitted, the data type is taken from the data the
|
|
353
|
+
first time :meth:`encode()` is called.
|
|
344
354
|
|
|
345
355
|
Attributes
|
|
346
356
|
----------
|
|
@@ -367,13 +377,17 @@ class IntervalQuantizationEncoding(Encoding):
|
|
|
367
377
|
min: ...
|
|
368
378
|
max: ...
|
|
369
379
|
num_steps: ...
|
|
370
|
-
src_type: ... =
|
|
380
|
+
src_type: ... = None
|
|
371
381
|
|
|
372
382
|
def __post_init__(self):
|
|
373
383
|
if self.src_type is not None:
|
|
374
384
|
self.src_type = TypeCode.from_dtype(self.src_type)
|
|
375
385
|
|
|
376
386
|
def encode(self, data):
|
|
387
|
+
# If not given in constructor, it is determined from the data
|
|
388
|
+
if self.src_type is None:
|
|
389
|
+
self.src_type = TypeCode.from_dtype(data.dtype)
|
|
390
|
+
|
|
377
391
|
steps = np.linspace(
|
|
378
392
|
self.min, self.max, self.num_steps, dtype=data.dtype
|
|
379
393
|
)
|
|
@@ -524,7 +538,8 @@ class DeltaEncoding(Encoding):
|
|
|
524
538
|
first time :meth:`encode()` is called.
|
|
525
539
|
origin : int, optional
|
|
526
540
|
The starting value from which the differences are calculated.
|
|
527
|
-
If omitted, the
|
|
541
|
+
If omitted, the value is taken from the first array element the
|
|
542
|
+
first time :meth:`encode()` is called.
|
|
528
543
|
|
|
529
544
|
Attributes
|
|
530
545
|
----------
|
|
@@ -535,11 +550,14 @@ class DeltaEncoding(Encoding):
|
|
|
535
550
|
--------
|
|
536
551
|
|
|
537
552
|
>>> data = np.array([1, 1, 2, 3, 5, 8])
|
|
538
|
-
>>>
|
|
539
|
-
|
|
553
|
+
>>> encoding = DeltaEncoding()
|
|
554
|
+
>>> print(encoding.encode(data))
|
|
555
|
+
[0 0 1 1 2 3]
|
|
556
|
+
>>> print(encoding.origin)
|
|
557
|
+
1
|
|
540
558
|
"""
|
|
541
559
|
src_type: ... = None
|
|
542
|
-
origin: ... =
|
|
560
|
+
origin: ... = None
|
|
543
561
|
|
|
544
562
|
def __post_init__(self):
|
|
545
563
|
if self.src_type is not None:
|
|
@@ -549,6 +567,8 @@ class DeltaEncoding(Encoding):
|
|
|
549
567
|
# If not given in constructor, it is determined from the data
|
|
550
568
|
if self.src_type is None:
|
|
551
569
|
self.src_type = TypeCode.from_dtype(data.dtype)
|
|
570
|
+
if self.origin is None:
|
|
571
|
+
self.origin = data[0]
|
|
552
572
|
|
|
553
573
|
data = data - self.origin
|
|
554
574
|
return np.diff(data, prepend=0).astype(np.int32, copy=False)
|
|
@@ -582,7 +602,8 @@ class IntegerPackingEncoding(Encoding):
|
|
|
582
602
|
is_unsigned : bool, optional
|
|
583
603
|
Whether the values should be packed into signed or unsigned
|
|
584
604
|
integers.
|
|
585
|
-
If omitted,
|
|
605
|
+
If omitted, first time :meth:`encode()` is called, determines whether
|
|
606
|
+
the values fit into unsigned integers.
|
|
586
607
|
|
|
587
608
|
Attributes
|
|
588
609
|
----------
|
|
@@ -601,7 +622,7 @@ class IntegerPackingEncoding(Encoding):
|
|
|
601
622
|
"""
|
|
602
623
|
byte_count: ...
|
|
603
624
|
src_size: ... = None
|
|
604
|
-
is_unsigned: ... =
|
|
625
|
+
is_unsigned: ... = None
|
|
605
626
|
|
|
606
627
|
def encode(self, data):
|
|
607
628
|
if self.src_size is None:
|
|
@@ -610,6 +631,9 @@ class IntegerPackingEncoding(Encoding):
|
|
|
610
631
|
raise IndexError(
|
|
611
632
|
"Given source size does not match actual data size"
|
|
612
633
|
)
|
|
634
|
+
if self.is_unsigned is None:
|
|
635
|
+
# Only positive values -> use unsigned integers
|
|
636
|
+
self.is_unsigned = data.min().item() >= 0
|
|
613
637
|
|
|
614
638
|
data = data.astype(np.int32, copy=False)
|
|
615
639
|
return self._encode(
|
|
@@ -672,7 +696,7 @@ class IntegerPackingEncoding(Encoding):
|
|
|
672
696
|
# Get length of output array
|
|
673
697
|
# by summing up required length of each element
|
|
674
698
|
cdef int number
|
|
675
|
-
cdef
|
|
699
|
+
cdef long length = 0
|
|
676
700
|
for i in range(data.shape[0]):
|
|
677
701
|
number = data[i]
|
|
678
702
|
if number < 0:
|
|
@@ -750,7 +774,7 @@ class StringArrayEncoding(Encoding):
|
|
|
750
774
|
If omitted, the unique strings are determined from the data the
|
|
751
775
|
first time :meth:`encode()` is called.
|
|
752
776
|
data_encoding : list of Encoding, optional
|
|
753
|
-
The encodings that are applied to the
|
|
777
|
+
The encodings that are applied to the index array.
|
|
754
778
|
If omitted, the array is directly encoded into bytes without
|
|
755
779
|
further compression.
|
|
756
780
|
offset_encoding : list of Encoding, optional
|
|
@@ -837,8 +861,11 @@ class StringArrayEncoding(Encoding):
|
|
|
837
861
|
raise TypeError("Data must be of string type")
|
|
838
862
|
|
|
839
863
|
if self.strings is None:
|
|
840
|
-
# 'unique()' already sorts the strings
|
|
841
|
-
|
|
864
|
+
# 'unique()' already sorts the strings, but this is not necessarily
|
|
865
|
+
# desired, as this makes efficient encoding of the indices more difficult
|
|
866
|
+
# -> Bring into the original order
|
|
867
|
+
_, unique_indices = np.unique(data, return_index=True)
|
|
868
|
+
self.strings = data[np.sort(unique_indices)]
|
|
842
869
|
check_present = False
|
|
843
870
|
else:
|
|
844
871
|
check_present = True
|
|
@@ -888,6 +915,19 @@ _encoding_classes_kinds = {
|
|
|
888
915
|
|
|
889
916
|
|
|
890
917
|
def deserialize_encoding(content):
|
|
918
|
+
"""
|
|
919
|
+
Create a :class:`Encoding` by deserializing the given *BinaryCIF* content.
|
|
920
|
+
|
|
921
|
+
Parameters
|
|
922
|
+
----------
|
|
923
|
+
content : dict
|
|
924
|
+
The encoding represenet as *BinaryCIF* dictionary.
|
|
925
|
+
|
|
926
|
+
Returns
|
|
927
|
+
-------
|
|
928
|
+
encoding : Encoding
|
|
929
|
+
The deserialized encoding.
|
|
930
|
+
"""
|
|
891
931
|
try:
|
|
892
932
|
encoding_class = _encoding_classes[content["kind"]]
|
|
893
933
|
except KeyError:
|
|
@@ -898,28 +938,69 @@ def deserialize_encoding(content):
|
|
|
898
938
|
|
|
899
939
|
|
|
900
940
|
def create_uncompressed_encoding(array):
|
|
901
|
-
|
|
941
|
+
"""
|
|
942
|
+
Create a simple encoding for the given array that does not compress the data.
|
|
902
943
|
|
|
903
|
-
|
|
944
|
+
Parameters
|
|
945
|
+
----------
|
|
946
|
+
array : ndarray
|
|
947
|
+
The array to to create the encoding for.
|
|
948
|
+
|
|
949
|
+
Returns
|
|
950
|
+
-------
|
|
951
|
+
encoding : list of Encoding
|
|
952
|
+
The encoding for the data.
|
|
953
|
+
"""
|
|
954
|
+
if np.issubdtype(array.dtype, np.str_):
|
|
904
955
|
return [StringArrayEncoding()]
|
|
905
956
|
else:
|
|
906
957
|
return [ByteArrayEncoding()]
|
|
907
958
|
|
|
908
959
|
|
|
909
960
|
def encode_stepwise(data, encoding):
|
|
961
|
+
"""
|
|
962
|
+
Apply a list of encodings stepwise to the given data.
|
|
963
|
+
|
|
964
|
+
Parameters
|
|
965
|
+
----------
|
|
966
|
+
data : ndarray
|
|
967
|
+
The data to be encoded.
|
|
968
|
+
encoding : list of Encoding
|
|
969
|
+
The encodings to be applied.
|
|
970
|
+
|
|
971
|
+
Returns
|
|
972
|
+
-------
|
|
973
|
+
encoded_data : ndarray or bytes
|
|
974
|
+
The encoded data.
|
|
975
|
+
"""
|
|
910
976
|
for encoding in encoding:
|
|
911
977
|
data = encoding.encode(data)
|
|
912
978
|
return data
|
|
913
979
|
|
|
914
980
|
|
|
915
981
|
def decode_stepwise(data, encoding):
|
|
982
|
+
"""
|
|
983
|
+
Apply a list of encodings stepwise to the given data.
|
|
984
|
+
|
|
985
|
+
Parameters
|
|
986
|
+
----------
|
|
987
|
+
data : ndarray or bytes
|
|
988
|
+
The data to be decoded.
|
|
989
|
+
encoding : list of Encoding
|
|
990
|
+
The encodings to be applied.
|
|
991
|
+
|
|
992
|
+
Returns
|
|
993
|
+
-------
|
|
994
|
+
decoded_data : ndarray
|
|
995
|
+
The decoded data.
|
|
996
|
+
"""
|
|
916
997
|
for enc in reversed(encoding):
|
|
917
998
|
data = enc.decode(data)
|
|
918
999
|
return data
|
|
919
1000
|
|
|
920
1001
|
|
|
921
1002
|
def _camel_to_snake_case(attribute_name):
|
|
922
|
-
return
|
|
1003
|
+
return CAMEL_CASE_PATTERN.sub("_", attribute_name).lower()
|
|
923
1004
|
|
|
924
1005
|
|
|
925
1006
|
def _snake_to_camel_case(attribute_name):
|