biotite 1.1.0__cp313-cp313-win_amd64.whl → 1.2.0__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/application/application.py +3 -3
- biotite/application/autodock/app.py +1 -1
- biotite/application/blast/webapp.py +1 -1
- biotite/application/clustalo/app.py +1 -1
- biotite/application/localapp.py +2 -2
- biotite/application/msaapp.py +10 -10
- biotite/application/muscle/app3.py +3 -3
- biotite/application/muscle/app5.py +3 -3
- biotite/application/sra/app.py +0 -5
- biotite/application/util.py +21 -1
- biotite/application/viennarna/rnaalifold.py +8 -8
- biotite/application/viennarna/rnaplot.py +3 -1
- biotite/application/viennarna/util.py +1 -1
- biotite/application/webapp.py +1 -1
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +191 -0
- biotite/database/entrez/dbnames.py +10 -0
- biotite/database/entrez/download.py +9 -10
- biotite/database/entrez/key.py +1 -1
- biotite/database/entrez/query.py +5 -4
- biotite/database/pubchem/download.py +6 -6
- biotite/database/pubchem/error.py +10 -0
- biotite/database/pubchem/query.py +12 -23
- biotite/database/rcsb/download.py +3 -2
- biotite/database/rcsb/query.py +2 -3
- biotite/database/uniprot/check.py +2 -2
- biotite/database/uniprot/download.py +2 -5
- biotite/database/uniprot/query.py +3 -4
- biotite/file.py +14 -2
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +16 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +198 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1226 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +15 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +71 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/align/__init__.py +0 -4
- biotite/sequence/align/alignment.py +33 -11
- biotite/sequence/align/banded.cp313-win_amd64.pyd +0 -0
- biotite/sequence/align/banded.pyx +21 -21
- biotite/sequence/align/cigar.py +2 -2
- biotite/sequence/align/kmeralphabet.cp313-win_amd64.pyd +0 -0
- biotite/sequence/align/kmeralphabet.pyx +2 -2
- biotite/sequence/align/kmersimilarity.cp313-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.cp313-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.pyx +6 -6
- biotite/sequence/align/localgapped.cp313-win_amd64.pyd +0 -0
- biotite/sequence/align/localgapped.pyx +47 -47
- biotite/sequence/align/localungapped.cp313-win_amd64.pyd +0 -0
- biotite/sequence/align/localungapped.pyx +10 -10
- biotite/sequence/align/matrix.py +12 -3
- biotite/sequence/align/multiple.cp313-win_amd64.pyd +0 -0
- biotite/sequence/align/pairwise.cp313-win_amd64.pyd +0 -0
- biotite/sequence/align/pairwise.pyx +35 -35
- biotite/sequence/align/permutation.cp313-win_amd64.pyd +0 -0
- biotite/sequence/align/selector.cp313-win_amd64.pyd +0 -0
- biotite/sequence/align/selector.pyx +2 -2
- biotite/sequence/align/statistics.py +1 -1
- biotite/sequence/align/tracetable.cp313-win_amd64.pyd +0 -0
- biotite/sequence/alphabet.py +2 -2
- biotite/sequence/annotation.py +19 -13
- biotite/sequence/codec.cp313-win_amd64.pyd +0 -0
- biotite/sequence/codon.py +1 -2
- biotite/sequence/graphics/alignment.py +25 -39
- biotite/sequence/graphics/dendrogram.py +4 -2
- biotite/sequence/graphics/features.py +2 -2
- biotite/sequence/graphics/logo.py +10 -12
- biotite/sequence/io/fasta/convert.py +1 -2
- biotite/sequence/io/fasta/file.py +1 -1
- biotite/sequence/io/fastq/file.py +3 -3
- biotite/sequence/io/genbank/file.py +3 -3
- biotite/sequence/io/genbank/sequence.py +2 -0
- biotite/sequence/io/gff/convert.py +1 -1
- biotite/sequence/io/gff/file.py +1 -2
- biotite/sequence/phylo/nj.cp313-win_amd64.pyd +0 -0
- biotite/sequence/phylo/tree.cp313-win_amd64.pyd +0 -0
- biotite/sequence/phylo/upgma.cp313-win_amd64.pyd +0 -0
- biotite/sequence/profile.py +19 -25
- biotite/sequence/search.py +0 -1
- biotite/sequence/seqtypes.py +12 -5
- biotite/sequence/sequence.py +1 -2
- biotite/structure/__init__.py +2 -0
- biotite/structure/alphabet/i3d.py +1 -2
- biotite/structure/alphabet/pb.py +1 -2
- biotite/structure/alphabet/unkerasify.py +8 -2
- biotite/structure/atoms.py +35 -27
- biotite/structure/basepairs.py +26 -26
- biotite/structure/bonds.cp313-win_amd64.pyd +0 -0
- biotite/structure/bonds.pyx +8 -5
- biotite/structure/box.py +19 -21
- biotite/structure/celllist.cp313-win_amd64.pyd +0 -0
- biotite/structure/celllist.pyx +83 -67
- biotite/structure/chains.py +5 -37
- biotite/structure/charges.cp313-win_amd64.pyd +0 -0
- biotite/structure/compare.py +420 -13
- biotite/structure/density.py +1 -1
- biotite/structure/dotbracket.py +27 -28
- biotite/structure/filter.py +8 -8
- biotite/structure/geometry.py +15 -15
- biotite/structure/hbond.py +17 -19
- biotite/structure/info/atoms.py +11 -2
- biotite/structure/info/ccd.py +0 -2
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/info/groups.py +0 -3
- biotite/structure/info/misc.py +0 -1
- biotite/structure/info/radii.py +92 -22
- biotite/structure/info/standardize.py +1 -2
- biotite/structure/integrity.py +4 -6
- biotite/structure/io/general.py +2 -2
- biotite/structure/io/gro/file.py +8 -9
- biotite/structure/io/mol/convert.py +1 -1
- biotite/structure/io/mol/ctab.py +33 -28
- biotite/structure/io/mol/mol.py +1 -1
- biotite/structure/io/mol/sdf.py +39 -13
- biotite/structure/io/pdb/convert.py +2 -3
- biotite/structure/io/pdb/file.py +11 -22
- biotite/structure/io/pdb/hybrid36.cp313-win_amd64.pyd +0 -0
- biotite/structure/io/pdbqt/file.py +4 -4
- biotite/structure/io/pdbx/bcif.py +22 -7
- biotite/structure/io/pdbx/cif.py +20 -7
- biotite/structure/io/pdbx/component.py +6 -0
- biotite/structure/io/pdbx/compress.py +2 -2
- biotite/structure/io/pdbx/convert.py +222 -33
- biotite/structure/io/pdbx/encoding.cp313-win_amd64.pyd +0 -0
- biotite/structure/io/trajfile.py +9 -6
- biotite/structure/io/util.py +38 -0
- biotite/structure/mechanics.py +0 -1
- biotite/structure/molecules.py +0 -15
- biotite/structure/pseudoknots.py +7 -13
- biotite/structure/repair.py +2 -4
- biotite/structure/residues.py +13 -24
- biotite/structure/rings.py +335 -0
- biotite/structure/sasa.cp313-win_amd64.pyd +0 -0
- biotite/structure/sasa.pyx +2 -1
- biotite/structure/segments.py +68 -9
- biotite/structure/sequence.py +0 -1
- biotite/structure/sse.py +0 -2
- biotite/structure/superimpose.py +74 -62
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +12 -25
- biotite/structure/util.py +3 -3
- biotite/version.py +9 -4
- biotite/visualize.py +111 -1
- {biotite-1.1.0.dist-info → biotite-1.2.0.dist-info}/METADATA +5 -3
- {biotite-1.1.0.dist-info → biotite-1.2.0.dist-info}/RECORD +155 -135
- {biotite-1.1.0.dist-info → biotite-1.2.0.dist-info}/WHEEL +0 -0
- {biotite-1.1.0.dist-info → biotite-1.2.0.dist-info}/licenses/LICENSE.rst +0 -0
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
# information.
|
|
4
4
|
|
|
5
5
|
__name__ = "biotite.structure.io.pdbx"
|
|
6
|
-
__author__ = "Fabrice Allain, Patrick Kunzmann"
|
|
6
|
+
__author__ = "Fabrice Allain, Patrick Kunzmann, Cheyenne Ziegler"
|
|
7
7
|
__all__ = [
|
|
8
8
|
"get_sequence",
|
|
9
9
|
"get_model_count",
|
|
@@ -13,6 +13,7 @@ __all__ = [
|
|
|
13
13
|
"set_component",
|
|
14
14
|
"list_assemblies",
|
|
15
15
|
"get_assembly",
|
|
16
|
+
"get_sse",
|
|
16
17
|
]
|
|
17
18
|
|
|
18
19
|
import itertools
|
|
@@ -81,6 +82,7 @@ PDBX_BOND_TYPE_TO_ORDER = {
|
|
|
81
82
|
BondType.AROMATIC_TRIPLE: "trip",
|
|
82
83
|
# These are masked later, it is merely added here to avoid a KeyError
|
|
83
84
|
BondType.ANY: "",
|
|
85
|
+
BondType.AROMATIC: "",
|
|
84
86
|
BondType.COORDINATION: "",
|
|
85
87
|
}
|
|
86
88
|
# Map 'chem_comp_bond' bond orders and aromaticity to 'BondType'...
|
|
@@ -92,12 +94,19 @@ COMP_BOND_ORDER_TO_TYPE = {
|
|
|
92
94
|
("SING", "Y"): BondType.AROMATIC_SINGLE,
|
|
93
95
|
("DOUB", "Y"): BondType.AROMATIC_DOUBLE,
|
|
94
96
|
("TRIP", "Y"): BondType.AROMATIC_TRIPLE,
|
|
97
|
+
("AROM", "Y"): BondType.AROMATIC,
|
|
95
98
|
}
|
|
96
99
|
# ...and vice versa
|
|
97
100
|
COMP_BOND_TYPE_TO_ORDER = {
|
|
98
101
|
bond_type: order for order, bond_type in COMP_BOND_ORDER_TO_TYPE.items()
|
|
99
102
|
}
|
|
100
103
|
CANONICAL_RESIDUE_LIST = canonical_aa_list + canonical_nucleotide_list
|
|
104
|
+
# it was observed that when the number or rows in `atom_site` and `struct_conn`
|
|
105
|
+
# exceed a certain threshold,
|
|
106
|
+
# a dictionary approach is less computation and memory intensive than the dense
|
|
107
|
+
# vectorized approach.
|
|
108
|
+
# https://github.com/biotite-dev/biotite/pull/765#issuecomment-2708867357
|
|
109
|
+
FIND_MATCHES_SWITCH_THRESHOLD = 4000000
|
|
101
110
|
|
|
102
111
|
_proteinseq_type_list = ["polypeptide(D)", "polypeptide(L)"]
|
|
103
112
|
_nucleotideseq_type_list = [
|
|
@@ -160,8 +169,8 @@ def get_sequence(pdbx_file, data_block=None):
|
|
|
160
169
|
-------
|
|
161
170
|
sequence_dict : Dictionary of Sequences
|
|
162
171
|
Dictionary keys are derived from ``entity_poly.pdbx_strand_id``
|
|
163
|
-
(
|
|
164
|
-
|
|
172
|
+
(equivalent to ``atom_site.auth_asym_id``).
|
|
173
|
+
Dictionary values are sequences.
|
|
165
174
|
|
|
166
175
|
Notes
|
|
167
176
|
-----
|
|
@@ -217,9 +226,7 @@ def get_model_count(pdbx_file, data_block=None):
|
|
|
217
226
|
The number of models.
|
|
218
227
|
"""
|
|
219
228
|
block = _get_block(pdbx_file, data_block)
|
|
220
|
-
return len(
|
|
221
|
-
_get_model_starts(block["atom_site"]["pdbx_PDB_model_num"].as_array(np.int32))
|
|
222
|
-
)
|
|
229
|
+
return len(np.unique((block["atom_site"]["pdbx_PDB_model_num"].as_array(np.int32))))
|
|
223
230
|
|
|
224
231
|
|
|
225
232
|
def get_structure(
|
|
@@ -310,7 +317,6 @@ def get_structure(
|
|
|
310
317
|
>>> arr = get_structure(file, model=1)
|
|
311
318
|
>>> print(len(arr))
|
|
312
319
|
304
|
|
313
|
-
|
|
314
320
|
"""
|
|
315
321
|
block = _get_block(pdbx_file, data_block)
|
|
316
322
|
|
|
@@ -321,13 +327,12 @@ def get_structure(
|
|
|
321
327
|
raise InvalidFileError("Missing 'atom_site' category in file")
|
|
322
328
|
|
|
323
329
|
models = atom_site["pdbx_PDB_model_num"].as_array(np.int32)
|
|
324
|
-
|
|
325
|
-
model_count = len(model_starts)
|
|
330
|
+
model_count = len(np.unique(models))
|
|
326
331
|
atom_count = len(models)
|
|
327
332
|
|
|
328
333
|
if model is None:
|
|
329
334
|
# For a stack, the annotations are derived from the first model
|
|
330
|
-
model_atom_site = _filter_model(atom_site,
|
|
335
|
+
model_atom_site = _filter_model(atom_site, 1)
|
|
331
336
|
# Any field of the category would work here to get the length
|
|
332
337
|
model_length = model_atom_site.row_count
|
|
333
338
|
atoms = AtomArrayStack(model_count, model_length)
|
|
@@ -373,7 +378,7 @@ def get_structure(
|
|
|
373
378
|
f"the given model {model} does not exist"
|
|
374
379
|
)
|
|
375
380
|
|
|
376
|
-
model_atom_site = _filter_model(atom_site,
|
|
381
|
+
model_atom_site = _filter_model(atom_site, model)
|
|
377
382
|
# Any field of the category would work here to get the length
|
|
378
383
|
model_length = model_atom_site.row_count
|
|
379
384
|
atoms = AtomArray(model_length)
|
|
@@ -649,6 +654,17 @@ def _find_matches(query_arrays, reference_arrays):
|
|
|
649
654
|
`reference_arrays` where all query values match the reference counterpart.
|
|
650
655
|
If no match is found for a query, the corresponding index is -1.
|
|
651
656
|
"""
|
|
657
|
+
if (
|
|
658
|
+
query_arrays[0].shape[0] * reference_arrays[0].shape[0]
|
|
659
|
+
<= FIND_MATCHES_SWITCH_THRESHOLD
|
|
660
|
+
):
|
|
661
|
+
match_indices = _find_matches_by_dense_array(query_arrays, reference_arrays)
|
|
662
|
+
else:
|
|
663
|
+
match_indices = _find_matches_by_dict(query_arrays, reference_arrays)
|
|
664
|
+
return match_indices
|
|
665
|
+
|
|
666
|
+
|
|
667
|
+
def _find_matches_by_dense_array(query_arrays, reference_arrays):
|
|
652
668
|
match_masks_for_all_columns = np.stack(
|
|
653
669
|
[
|
|
654
670
|
query[:, np.newaxis] == reference[np.newaxis, :]
|
|
@@ -676,6 +692,38 @@ def _find_matches(query_arrays, reference_arrays):
|
|
|
676
692
|
return match_indices
|
|
677
693
|
|
|
678
694
|
|
|
695
|
+
def _find_matches_by_dict(query_arrays, reference_arrays):
|
|
696
|
+
# Convert reference arrays to a dictionary for O(1) lookups
|
|
697
|
+
reference_dict = {}
|
|
698
|
+
ambiguous_keys = set()
|
|
699
|
+
for ref_idx, ref_row in enumerate(zip(*reference_arrays)):
|
|
700
|
+
ref_key = tuple(ref_row)
|
|
701
|
+
if ref_key in reference_dict:
|
|
702
|
+
ambiguous_keys.add(ref_key)
|
|
703
|
+
continue
|
|
704
|
+
reference_dict[ref_key] = ref_idx
|
|
705
|
+
|
|
706
|
+
match_indices = []
|
|
707
|
+
for query_idx, query_row in enumerate(zip(*query_arrays)):
|
|
708
|
+
query_key = tuple(query_row)
|
|
709
|
+
occurrence = reference_dict.get(query_key)
|
|
710
|
+
|
|
711
|
+
if occurrence is None:
|
|
712
|
+
# -1 indicates that no match was found in the reference
|
|
713
|
+
match_indices.append(-1)
|
|
714
|
+
elif query_key in ambiguous_keys:
|
|
715
|
+
# The query cannot be uniquely matched to an atom in the reference
|
|
716
|
+
raise InvalidFileError(
|
|
717
|
+
f"The covalent bond in the 'struct_conn' category at index "
|
|
718
|
+
f"{query_idx} cannot be unambiguously assigned to atoms in "
|
|
719
|
+
f"the 'atom_site' category"
|
|
720
|
+
)
|
|
721
|
+
else:
|
|
722
|
+
match_indices.append(occurrence)
|
|
723
|
+
|
|
724
|
+
return np.array(match_indices)
|
|
725
|
+
|
|
726
|
+
|
|
679
727
|
def _get_struct_conn_col_name(col_name, partner):
|
|
680
728
|
"""
|
|
681
729
|
For a column name in ``atom_site`` get the corresponding column name
|
|
@@ -714,21 +762,26 @@ def _filter_altloc(array, atom_site, altloc):
|
|
|
714
762
|
raise ValueError(f"'{altloc}' is not a valid 'altloc' option")
|
|
715
763
|
|
|
716
764
|
|
|
717
|
-
def
|
|
718
|
-
"""
|
|
719
|
-
Get the start index for each model in the arrays of the
|
|
720
|
-
``atom_site`` category.
|
|
721
|
-
"""
|
|
722
|
-
_, indices = np.unique(model_array, return_index=True)
|
|
723
|
-
indices.sort()
|
|
724
|
-
return indices
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
def _filter_model(atom_site, model_starts, model):
|
|
765
|
+
def _filter_model(atom_site, model):
|
|
728
766
|
"""
|
|
729
767
|
Reduce the ``atom_site`` category to the values for the given
|
|
730
768
|
model.
|
|
769
|
+
|
|
770
|
+
Parameters
|
|
771
|
+
----------
|
|
772
|
+
atom_site : CIFCategory or BinaryCIFCategory
|
|
773
|
+
``atom_site`` category containing all models.
|
|
774
|
+
model : int
|
|
775
|
+
The model to be selected.
|
|
776
|
+
|
|
777
|
+
Returns
|
|
778
|
+
-------
|
|
779
|
+
atom_site : CIFCategory or BinaryCIFCategory
|
|
780
|
+
The ``atom_site`` category containing only the selected model.
|
|
731
781
|
"""
|
|
782
|
+
models = atom_site["pdbx_PDB_model_num"].as_array(np.int32)
|
|
783
|
+
_, model_starts = np.unique(models, return_index=True)
|
|
784
|
+
model_starts.sort()
|
|
732
785
|
# Append exclusive stop
|
|
733
786
|
model_starts = np.append(model_starts, [atom_site.row_count])
|
|
734
787
|
# Indexing starts at 0, but model number starts at 1
|
|
@@ -815,7 +868,6 @@ def set_structure(
|
|
|
815
868
|
>>> file = CIFFile()
|
|
816
869
|
>>> set_structure(file, atom_array)
|
|
817
870
|
>>> file.write(os.path.join(path_to_directory, "structure.cif"))
|
|
818
|
-
|
|
819
871
|
"""
|
|
820
872
|
_check_non_empty(array)
|
|
821
873
|
|
|
@@ -836,7 +888,11 @@ def set_structure(
|
|
|
836
888
|
)
|
|
837
889
|
atom_site["label_comp_id"] = np.copy(array.res_name)
|
|
838
890
|
atom_site["label_asym_id"] = np.copy(array.chain_id)
|
|
839
|
-
atom_site["label_entity_id"] =
|
|
891
|
+
atom_site["label_entity_id"] = (
|
|
892
|
+
np.copy(array.label_entity_id)
|
|
893
|
+
if "label_entity_id" in array.get_annotation_categories()
|
|
894
|
+
else _determine_entity_id(array.chain_id)
|
|
895
|
+
)
|
|
840
896
|
atom_site["label_seq_id"] = np.copy(array.res_id)
|
|
841
897
|
atom_site["pdbx_PDB_ins_code"] = Column(
|
|
842
898
|
np.copy(array.ins_code),
|
|
@@ -1181,7 +1237,13 @@ def _filter_canonical_links(array, bond_array):
|
|
|
1181
1237
|
) # fmt: skip
|
|
1182
1238
|
|
|
1183
1239
|
|
|
1184
|
-
def get_component(
|
|
1240
|
+
def get_component(
|
|
1241
|
+
pdbx_file,
|
|
1242
|
+
data_block=None,
|
|
1243
|
+
use_ideal_coord=True,
|
|
1244
|
+
res_name=None,
|
|
1245
|
+
allow_missing_coord=False,
|
|
1246
|
+
):
|
|
1185
1247
|
"""
|
|
1186
1248
|
Create an :class:`AtomArray` for a chemical component from the
|
|
1187
1249
|
``chem_comp_atom`` and, if available, the ``chem_comp_bond``
|
|
@@ -1209,6 +1271,11 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=Non
|
|
|
1209
1271
|
In this case, the component with the given residue name is
|
|
1210
1272
|
read.
|
|
1211
1273
|
By default, all rows would be read in this case.
|
|
1274
|
+
allow_missing_coord : bool, optional
|
|
1275
|
+
Whether to allow missing coordinate values in components.
|
|
1276
|
+
If ``True``, these will be represented as ``nan`` values.
|
|
1277
|
+
If ``False``, a ``ValueError`` is raised when missing coordinates
|
|
1278
|
+
are encountered.
|
|
1212
1279
|
|
|
1213
1280
|
Returns
|
|
1214
1281
|
-------
|
|
@@ -1299,7 +1366,8 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=Non
|
|
|
1299
1366
|
else:
|
|
1300
1367
|
raise
|
|
1301
1368
|
array.coord = _parse_component_coordinates(
|
|
1302
|
-
[atom_category[field] for field in alt_coord_fields]
|
|
1369
|
+
[atom_category[field] for field in alt_coord_fields],
|
|
1370
|
+
allow_missing=allow_missing_coord,
|
|
1303
1371
|
)
|
|
1304
1372
|
|
|
1305
1373
|
try:
|
|
@@ -1310,7 +1378,7 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=Non
|
|
|
1310
1378
|
)
|
|
1311
1379
|
except KeyError:
|
|
1312
1380
|
warnings.warn(
|
|
1313
|
-
"Category 'chem_comp_bond' not found.
|
|
1381
|
+
"Category 'chem_comp_bond' not found. No bonds will be parsed",
|
|
1314
1382
|
UserWarning,
|
|
1315
1383
|
)
|
|
1316
1384
|
else:
|
|
@@ -1330,14 +1398,20 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=Non
|
|
|
1330
1398
|
return array
|
|
1331
1399
|
|
|
1332
1400
|
|
|
1333
|
-
def _parse_component_coordinates(coord_columns):
|
|
1401
|
+
def _parse_component_coordinates(coord_columns, allow_missing=False):
|
|
1334
1402
|
coord = np.zeros((len(coord_columns[0]), 3), dtype=np.float32)
|
|
1335
1403
|
for i, column in enumerate(coord_columns):
|
|
1336
1404
|
if column.mask is not None and column.mask.array.any():
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1405
|
+
if allow_missing:
|
|
1406
|
+
warnings.warn(
|
|
1407
|
+
"Missing coordinates for some atoms. Those will be set to nan",
|
|
1408
|
+
UserWarning,
|
|
1409
|
+
)
|
|
1410
|
+
else:
|
|
1411
|
+
raise ValueError(
|
|
1412
|
+
"Missing coordinates for some atoms",
|
|
1413
|
+
)
|
|
1414
|
+
coord[:, i] = column.as_array(np.float32, masked_value=np.nan)
|
|
1341
1415
|
return coord
|
|
1342
1416
|
|
|
1343
1417
|
|
|
@@ -1445,6 +1519,7 @@ def list_assemblies(pdbx_file, data_block=None):
|
|
|
1445
1519
|
|
|
1446
1520
|
Examples
|
|
1447
1521
|
--------
|
|
1522
|
+
|
|
1448
1523
|
>>> import os.path
|
|
1449
1524
|
>>> file = CIFFile.read(os.path.join(path_to_structures, "1f2n.cif"))
|
|
1450
1525
|
>>> assembly_ids = list_assemblies(file)
|
|
@@ -1742,4 +1817,118 @@ def _convert_string_to_sequence(string, stype):
|
|
|
1742
1817
|
elif stype in _other_type_list:
|
|
1743
1818
|
return None
|
|
1744
1819
|
else:
|
|
1745
|
-
raise InvalidFileError("mmCIF _entity_poly.type unsupported
|
|
1820
|
+
raise InvalidFileError("mmCIF _entity_poly.type unsupported type: " + stype)
|
|
1821
|
+
|
|
1822
|
+
|
|
1823
|
+
def get_sse(pdbx_file, data_block=None, match_model=None):
|
|
1824
|
+
"""
|
|
1825
|
+
Get the secondary structure from a PDBx file.
|
|
1826
|
+
|
|
1827
|
+
Parameters
|
|
1828
|
+
----------
|
|
1829
|
+
pdbx_file : CIFFile or CIFBlock or BinaryCIFFile or BinaryCIFBlock
|
|
1830
|
+
The file object.
|
|
1831
|
+
The following categories are required:
|
|
1832
|
+
|
|
1833
|
+
- ``entity_poly``
|
|
1834
|
+
- ``struct_conf`` (if alpha-helices are present)
|
|
1835
|
+
- ``struct_sheet_range`` (if beta-strands are present)
|
|
1836
|
+
- ``atom_site`` (if `match_model` is set)
|
|
1837
|
+
|
|
1838
|
+
data_block : str, optional
|
|
1839
|
+
The name of the data block.
|
|
1840
|
+
Default is the first (and most times only) data block of the
|
|
1841
|
+
file.
|
|
1842
|
+
If the data block object is passed directly to `pdbx_file`,
|
|
1843
|
+
this parameter is ignored.
|
|
1844
|
+
match_model : None, optional
|
|
1845
|
+
If a model number is given, only secondary structure elements for residues are
|
|
1846
|
+
kept, that are resolved in the given model.
|
|
1847
|
+
This means secondary structure elements for residues that would not appear
|
|
1848
|
+
in a corresponding :class:`AtomArray` from :func:`get_structure()` are removed.
|
|
1849
|
+
By default, all residues in the sequence are kept.
|
|
1850
|
+
|
|
1851
|
+
Returns
|
|
1852
|
+
-------
|
|
1853
|
+
sse_dict : dict of str -> ndarray, dtype=str
|
|
1854
|
+
The dictionary maps the chain ID (derived from ``auth_asym_id``) to the
|
|
1855
|
+
secondary structure of the respective chain.
|
|
1856
|
+
|
|
1857
|
+
- ``"a"``: alpha-helix
|
|
1858
|
+
- ``"b"``: beta-strand
|
|
1859
|
+
- ``"c"``: coil or not an amino acid
|
|
1860
|
+
|
|
1861
|
+
Each secondary structure element corresponds to the ``label_seq_id`` of the
|
|
1862
|
+
``atom_site`` category.
|
|
1863
|
+
This means that the 0-th position of the array corresponds to the residue
|
|
1864
|
+
in ``atom_site`` with ``label_seq_id`` ``1``.
|
|
1865
|
+
|
|
1866
|
+
Examples
|
|
1867
|
+
--------
|
|
1868
|
+
|
|
1869
|
+
>>> import os.path
|
|
1870
|
+
>>> file = CIFFile.read(os.path.join(path_to_structures, "1aki.cif"))
|
|
1871
|
+
>>> sse = get_sse(file, match_model=1)
|
|
1872
|
+
>>> print(sse)
|
|
1873
|
+
{'A': array(['c', 'c', 'c', 'c', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
|
|
1874
|
+
'a', 'c', 'c', 'c', 'c', 'c', 'a', 'a', 'a', 'c', 'c', 'a', 'a',
|
|
1875
|
+
'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', 'c', 'c',
|
|
1876
|
+
'c', 'c', 'c', 'b', 'b', 'b', 'c', 'c', 'c', 'c', 'c', 'b', 'b',
|
|
1877
|
+
'b', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c',
|
|
1878
|
+
'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c',
|
|
1879
|
+
'c', 'a', 'a', 'a', 'a', 'a', 'c', 'c', 'c', 'c', 'a', 'a', 'a',
|
|
1880
|
+
'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', 'c', 'a',
|
|
1881
|
+
'a', 'a', 'a', 'c', 'a', 'a', 'a', 'a', 'a', 'a', 'c', 'c', 'c',
|
|
1882
|
+
'c', 'c', 'a', 'a', 'a', 'a', 'c', 'c', 'c', 'c', 'c', 'c'],
|
|
1883
|
+
dtype='<U1')}
|
|
1884
|
+
|
|
1885
|
+
If only secondary structure elements for resolved residues are requested, the length
|
|
1886
|
+
of the returned array matches the number of peptide residues in the structure.
|
|
1887
|
+
|
|
1888
|
+
>>> file = CIFFile.read(os.path.join(path_to_structures, "3o5r.cif"))
|
|
1889
|
+
>>> print(len(get_sse(file, match_model=1)["A"]))
|
|
1890
|
+
128
|
|
1891
|
+
>>> atoms = get_structure(file, model=1)
|
|
1892
|
+
>>> atoms = atoms[filter_amino_acids(atoms) & (atoms.chain_id == "A")]
|
|
1893
|
+
>>> print(get_residue_count(atoms))
|
|
1894
|
+
128
|
|
1895
|
+
"""
|
|
1896
|
+
block = _get_block(pdbx_file, data_block)
|
|
1897
|
+
|
|
1898
|
+
# Init all chains with "c" for coil
|
|
1899
|
+
sse_dict = {
|
|
1900
|
+
chain_id: np.repeat("c", len(sequence))
|
|
1901
|
+
for chain_id, sequence in get_sequence(block).items()
|
|
1902
|
+
}
|
|
1903
|
+
|
|
1904
|
+
# Populate SSE arrays with helices and strands
|
|
1905
|
+
for sse_symbol, category_name in [
|
|
1906
|
+
("a", "struct_conf"),
|
|
1907
|
+
("b", "struct_sheet_range"),
|
|
1908
|
+
]:
|
|
1909
|
+
if category_name in block:
|
|
1910
|
+
category = block[category_name]
|
|
1911
|
+
chains = category["beg_auth_asym_id"].as_array(str)
|
|
1912
|
+
start_positions = category["beg_label_seq_id"].as_array(int)
|
|
1913
|
+
end_positions = category["end_label_seq_id"].as_array(int)
|
|
1914
|
+
|
|
1915
|
+
# set alpha helix positions
|
|
1916
|
+
for chain, start, end in zip(chains, start_positions, end_positions):
|
|
1917
|
+
# Translate the 1-based positions from PDBx into 0-based array indices
|
|
1918
|
+
sse_dict[chain][start - 1 : end] = sse_symbol
|
|
1919
|
+
|
|
1920
|
+
if match_model is not None:
|
|
1921
|
+
model_atom_site = _filter_model(block["atom_site"], match_model)
|
|
1922
|
+
chain_ids = model_atom_site["auth_asym_id"].as_array(str)
|
|
1923
|
+
res_ids = model_atom_site["label_seq_id"].as_array(int, masked_value=-1)
|
|
1924
|
+
# Filter out masked residues, i.e. residues not part of a chain
|
|
1925
|
+
mask = res_ids != -1
|
|
1926
|
+
chain_ids = chain_ids[mask]
|
|
1927
|
+
res_ids = res_ids[mask]
|
|
1928
|
+
for chain_id, sse in sse_dict.items():
|
|
1929
|
+
res_ids_in_chain = res_ids[chain_ids == chain_id]
|
|
1930
|
+
# Transform from 1-based residue ID to 0-based index
|
|
1931
|
+
indices = np.unique(res_ids_in_chain) - 1
|
|
1932
|
+
sse_dict[chain_id] = sse[indices]
|
|
1933
|
+
|
|
1934
|
+
return sse_dict
|
|
Binary file
|
biotite/structure/io/trajfile.py
CHANGED
|
@@ -187,9 +187,11 @@ class TrajectoryFile(File, metaclass=abc.ABCMeta):
|
|
|
187
187
|
time : float or ndarray, dtype=float32, shape=(n,) or None
|
|
188
188
|
The simulation time of the current frame or stack in *ps*.
|
|
189
189
|
|
|
190
|
-
See
|
|
190
|
+
See Also
|
|
191
191
|
--------
|
|
192
|
-
read_iter_structure
|
|
192
|
+
read_iter_structure :
|
|
193
|
+
Get an :class:`AtomArray` for each frame or an :class:`AtomArrayStack`
|
|
194
|
+
for each chunk of frames instead.
|
|
193
195
|
|
|
194
196
|
Notes
|
|
195
197
|
-----
|
|
@@ -315,9 +317,10 @@ class TrajectoryFile(File, metaclass=abc.ABCMeta):
|
|
|
315
317
|
If `stack_size` is set, multiple frames are returned as
|
|
316
318
|
:class:`AtomArrayStack`.
|
|
317
319
|
|
|
318
|
-
See
|
|
320
|
+
See Also
|
|
319
321
|
--------
|
|
320
|
-
read_iter
|
|
322
|
+
read_iter :
|
|
323
|
+
Get an the raw data for each frame or for each chunk of frames instead.
|
|
321
324
|
|
|
322
325
|
Notes
|
|
323
326
|
-----
|
|
@@ -480,7 +483,7 @@ class TrajectoryFile(File, metaclass=abc.ABCMeta):
|
|
|
480
483
|
|
|
481
484
|
Parameters
|
|
482
485
|
----------
|
|
483
|
-
|
|
486
|
+
box : ndarray, dtype=float, shape=(m,3,3)
|
|
484
487
|
The box vectors to be set.
|
|
485
488
|
"""
|
|
486
489
|
self._check_model_count(box)
|
|
@@ -546,7 +549,7 @@ class TrajectoryFile(File, metaclass=abc.ABCMeta):
|
|
|
546
549
|
------
|
|
547
550
|
NotImplementedError
|
|
548
551
|
"""
|
|
549
|
-
raise NotImplementedError("Copying is not implemented
|
|
552
|
+
raise NotImplementedError("Copying is not implemented for trajectory files")
|
|
550
553
|
|
|
551
554
|
@classmethod
|
|
552
555
|
@abc.abstractmethod
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
Common functions used by a number of subpackages.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
__name__ = "biotite.structure.io"
|
|
10
|
+
__author__ = "Patrick Kunzmann"
|
|
11
|
+
__all__ = ["number_of_integer_digits"]
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def number_of_integer_digits(values):
|
|
17
|
+
"""
|
|
18
|
+
Get the maximum number of characters needed to represent the
|
|
19
|
+
pre-decimal positions of the given numeric values.
|
|
20
|
+
|
|
21
|
+
Parameters
|
|
22
|
+
----------
|
|
23
|
+
values : ndarray, dtype=float
|
|
24
|
+
The values to be checked.
|
|
25
|
+
|
|
26
|
+
Returns
|
|
27
|
+
-------
|
|
28
|
+
n_digits : int
|
|
29
|
+
The maximum number of characters needed to represent the
|
|
30
|
+
pre-decimal positions of the given numeric values.
|
|
31
|
+
"""
|
|
32
|
+
if len(values) == 0:
|
|
33
|
+
return 0
|
|
34
|
+
values = values.astype(int, copy=False)
|
|
35
|
+
n_digits = 0
|
|
36
|
+
n_digits = max(n_digits, len(str(np.min(values))))
|
|
37
|
+
n_digits = max(n_digits, len(str(np.max(values))))
|
|
38
|
+
return n_digits
|
biotite/structure/mechanics.py
CHANGED
biotite/structure/molecules.py
CHANGED
|
@@ -39,11 +39,6 @@ def get_molecule_indices(array):
|
|
|
39
39
|
Consequently, the length of this list is equal to the number of
|
|
40
40
|
molecules in the input `array`.
|
|
41
41
|
|
|
42
|
-
See also
|
|
43
|
-
--------
|
|
44
|
-
get_molecule_masks
|
|
45
|
-
molecule_iter
|
|
46
|
-
|
|
47
42
|
Examples
|
|
48
43
|
--------
|
|
49
44
|
Get an :class:`AtomArray` for ATP and show that it is a single
|
|
@@ -157,11 +152,6 @@ def get_molecule_masks(array):
|
|
|
157
152
|
Consequently, the length of this list is equal to the number of
|
|
158
153
|
molecules in the input `array`.
|
|
159
154
|
|
|
160
|
-
See also
|
|
161
|
-
--------
|
|
162
|
-
get_molecule_indices
|
|
163
|
-
molecule_iter
|
|
164
|
-
|
|
165
155
|
Examples
|
|
166
156
|
--------
|
|
167
157
|
Get an :class:`AtomArray` for ATP and show that it is a single
|
|
@@ -270,11 +260,6 @@ def molecule_iter(array):
|
|
|
270
260
|
molecule : AtomArray or AtomArrayStack
|
|
271
261
|
A single molecule of the input `array`.
|
|
272
262
|
|
|
273
|
-
See also
|
|
274
|
-
--------
|
|
275
|
-
get_molecule_indices
|
|
276
|
-
get_molecule_masks
|
|
277
|
-
|
|
278
263
|
Examples
|
|
279
264
|
--------
|
|
280
265
|
Get an :class:`AtomArray` for ATP and break it into two molecules
|
biotite/structure/pseudoknots.py
CHANGED
|
@@ -69,6 +69,11 @@ def pseudoknots(base_pairs, scores=None, max_pseudoknot_order=None):
|
|
|
69
69
|
Therefore, there are no pseudoknots between base pairs with the same
|
|
70
70
|
pseudoknot order.
|
|
71
71
|
|
|
72
|
+
References
|
|
73
|
+
----------
|
|
74
|
+
|
|
75
|
+
.. footbibliography::
|
|
76
|
+
|
|
72
77
|
Examples
|
|
73
78
|
--------
|
|
74
79
|
Remove the pseudoknotted base pair for the sequence *ABCbac*, where
|
|
@@ -102,17 +107,6 @@ def pseudoknots(base_pairs, scores=None, max_pseudoknot_order=None):
|
|
|
102
107
|
[[0 0 1]]
|
|
103
108
|
>>> print(dot_bracket(basepairs, 6)[0])
|
|
104
109
|
(([))]
|
|
105
|
-
|
|
106
|
-
See Also
|
|
107
|
-
--------
|
|
108
|
-
base_pairs
|
|
109
|
-
dot_bracket
|
|
110
|
-
|
|
111
|
-
References
|
|
112
|
-
----------
|
|
113
|
-
|
|
114
|
-
.. footbibliography::
|
|
115
|
-
|
|
116
110
|
"""
|
|
117
111
|
if len(base_pairs) == 0:
|
|
118
112
|
# No base pairs -> empty pseudoknot order array
|
|
@@ -149,9 +143,9 @@ class _Region:
|
|
|
149
143
|
|
|
150
144
|
Parameters
|
|
151
145
|
----------
|
|
152
|
-
base_pairs: ndarray, shape=(n,2), dtype=int
|
|
146
|
+
base_pairs : ndarray, shape=(n,2), dtype=int
|
|
153
147
|
All base pairs of the structure the region is a subset for.
|
|
154
|
-
region_pairs: ndarray, dtype=int
|
|
148
|
+
region_pairs : ndarray, dtype=int
|
|
155
149
|
The indices of the base pairs in ``base_pairs`` that are part of
|
|
156
150
|
the region.
|
|
157
151
|
scores : ndarray, dtype=int, shape=(n,) (default: None)
|
biotite/structure/repair.py
CHANGED
|
@@ -48,7 +48,6 @@ def create_continuous_res_ids(atoms, restart_each_chain=True):
|
|
|
48
48
|
>>> res_ids, _ = get_residues(atom_array)
|
|
49
49
|
>>> print(res_ids)
|
|
50
50
|
[ 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19]
|
|
51
|
-
|
|
52
51
|
"""
|
|
53
52
|
res_ids_diff = np.zeros(atoms.array_length(), dtype=int)
|
|
54
53
|
res_starts = get_residue_starts(atoms)
|
|
@@ -80,7 +79,7 @@ def infer_elements(atoms):
|
|
|
80
79
|
|
|
81
80
|
See Also
|
|
82
81
|
--------
|
|
83
|
-
create_atoms_names : The opposite of this function
|
|
82
|
+
create_atoms_names : The opposite of this function.
|
|
84
83
|
|
|
85
84
|
Examples
|
|
86
85
|
--------
|
|
@@ -89,7 +88,6 @@ def infer_elements(atoms):
|
|
|
89
88
|
['N' 'C' 'C' 'O' 'C' 'C' 'O' 'N' 'H' 'H']
|
|
90
89
|
>>> print(infer_elements(["CA", "C", "C1", "OD1", "HD21", "1H", "FE"]))
|
|
91
90
|
['C' 'C' 'C' 'O' 'H' 'H' 'FE']
|
|
92
|
-
|
|
93
91
|
"""
|
|
94
92
|
if isinstance(atoms, (AtomArray, AtomArrayStack)):
|
|
95
93
|
atom_names = atoms.atom_name
|
|
@@ -117,7 +115,7 @@ def create_atom_names(atoms):
|
|
|
117
115
|
|
|
118
116
|
See Also
|
|
119
117
|
--------
|
|
120
|
-
infer_elements : The opposite of this function
|
|
118
|
+
infer_elements : The opposite of this function.
|
|
121
119
|
|
|
122
120
|
Notes
|
|
123
121
|
-----
|