biotite 1.0.1__cp312-cp312-win_amd64.whl → 1.2.0__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/application/application.py +3 -3
- biotite/application/autodock/app.py +1 -1
- biotite/application/blast/webapp.py +1 -1
- biotite/application/clustalo/app.py +1 -1
- biotite/application/dssp/app.py +13 -3
- biotite/application/localapp.py +36 -2
- biotite/application/msaapp.py +10 -10
- biotite/application/muscle/app3.py +5 -18
- biotite/application/muscle/app5.py +5 -5
- biotite/application/sra/app.py +0 -5
- biotite/application/util.py +22 -2
- biotite/application/viennarna/rnaalifold.py +8 -8
- biotite/application/viennarna/rnaplot.py +9 -3
- biotite/application/viennarna/util.py +1 -1
- biotite/application/webapp.py +1 -1
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +191 -0
- biotite/database/entrez/dbnames.py +10 -0
- biotite/database/entrez/download.py +9 -10
- biotite/database/entrez/key.py +1 -1
- biotite/database/entrez/query.py +5 -4
- biotite/database/pubchem/download.py +6 -6
- biotite/database/pubchem/error.py +10 -0
- biotite/database/pubchem/query.py +12 -23
- biotite/database/rcsb/download.py +3 -2
- biotite/database/rcsb/query.py +8 -9
- biotite/database/uniprot/check.py +22 -17
- biotite/database/uniprot/download.py +3 -6
- biotite/database/uniprot/query.py +4 -5
- biotite/file.py +14 -2
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +16 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +198 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1226 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +15 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +71 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/align/__init__.py +0 -4
- biotite/sequence/align/alignment.py +49 -14
- biotite/sequence/align/banded.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/banded.pyx +26 -26
- biotite/sequence/align/cigar.py +2 -2
- biotite/sequence/align/kmeralphabet.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/kmeralphabet.pyx +19 -2
- biotite/sequence/align/kmersimilarity.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.pyx +58 -48
- biotite/sequence/align/localgapped.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/localgapped.pyx +47 -47
- biotite/sequence/align/localungapped.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/localungapped.pyx +10 -10
- biotite/sequence/align/matrix.py +284 -57
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/multiple.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/pairwise.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/pairwise.pyx +35 -35
- biotite/sequence/align/permutation.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/selector.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/selector.pyx +2 -2
- biotite/sequence/align/statistics.py +1 -1
- biotite/sequence/align/tracetable.cp312-win_amd64.pyd +0 -0
- biotite/sequence/alphabet.py +5 -2
- biotite/sequence/annotation.py +19 -13
- biotite/sequence/codec.cp312-win_amd64.pyd +0 -0
- biotite/sequence/codon.py +1 -2
- biotite/sequence/graphics/alignment.py +25 -39
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +2 -1
- biotite/sequence/graphics/colorschemes.py +44 -11
- biotite/sequence/graphics/dendrogram.py +4 -2
- biotite/sequence/graphics/features.py +2 -2
- biotite/sequence/graphics/logo.py +10 -12
- biotite/sequence/io/fasta/convert.py +1 -2
- biotite/sequence/io/fasta/file.py +1 -1
- biotite/sequence/io/fastq/file.py +3 -3
- biotite/sequence/io/genbank/file.py +3 -3
- biotite/sequence/io/genbank/sequence.py +2 -0
- biotite/sequence/io/gff/convert.py +1 -1
- biotite/sequence/io/gff/file.py +1 -2
- biotite/sequence/phylo/nj.cp312-win_amd64.pyd +0 -0
- biotite/sequence/phylo/tree.cp312-win_amd64.pyd +0 -0
- biotite/sequence/phylo/upgma.cp312-win_amd64.pyd +0 -0
- biotite/sequence/profile.py +105 -29
- biotite/sequence/search.py +0 -1
- biotite/sequence/seqtypes.py +136 -8
- biotite/sequence/sequence.py +1 -2
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +6 -3
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +109 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +170 -0
- biotite/structure/alphabet/unkerasify.py +128 -0
- biotite/structure/atoms.py +163 -66
- biotite/structure/basepairs.py +26 -26
- biotite/structure/bonds.cp312-win_amd64.pyd +0 -0
- biotite/structure/bonds.pyx +79 -25
- biotite/structure/box.py +19 -21
- biotite/structure/celllist.cp312-win_amd64.pyd +0 -0
- biotite/structure/celllist.pyx +83 -67
- biotite/structure/chains.py +5 -37
- biotite/structure/charges.cp312-win_amd64.pyd +0 -0
- biotite/structure/compare.py +420 -13
- biotite/structure/density.py +1 -1
- biotite/structure/dotbracket.py +27 -28
- biotite/structure/filter.py +8 -8
- biotite/structure/geometry.py +74 -127
- biotite/structure/hbond.py +17 -19
- biotite/structure/info/__init__.py +1 -0
- biotite/structure/info/atoms.py +24 -15
- biotite/structure/info/bonds.py +12 -6
- biotite/structure/info/ccd.py +125 -34
- biotite/structure/info/{ccd/components.bcif → components.bcif} +0 -0
- biotite/structure/info/groups.py +62 -19
- biotite/structure/info/masses.py +9 -6
- biotite/structure/info/misc.py +15 -22
- biotite/structure/info/radii.py +92 -22
- biotite/structure/info/standardize.py +4 -4
- biotite/structure/integrity.py +4 -6
- biotite/structure/io/general.py +2 -2
- biotite/structure/io/gro/file.py +8 -9
- biotite/structure/io/mol/convert.py +1 -1
- biotite/structure/io/mol/ctab.py +33 -28
- biotite/structure/io/mol/mol.py +1 -1
- biotite/structure/io/mol/sdf.py +80 -53
- biotite/structure/io/pdb/convert.py +4 -3
- biotite/structure/io/pdb/file.py +85 -25
- biotite/structure/io/pdb/hybrid36.cp312-win_amd64.pyd +0 -0
- biotite/structure/io/pdbqt/file.py +36 -36
- biotite/structure/io/pdbx/__init__.py +1 -0
- biotite/structure/io/pdbx/bcif.py +54 -15
- biotite/structure/io/pdbx/cif.py +92 -66
- biotite/structure/io/pdbx/component.py +15 -4
- biotite/structure/io/pdbx/compress.py +321 -0
- biotite/structure/io/pdbx/convert.py +410 -75
- biotite/structure/io/pdbx/encoding.cp312-win_amd64.pyd +0 -0
- biotite/structure/io/pdbx/encoding.pyx +98 -17
- biotite/structure/io/trajfile.py +9 -6
- biotite/structure/io/util.py +38 -0
- biotite/structure/mechanics.py +0 -1
- biotite/structure/molecules.py +141 -156
- biotite/structure/pseudoknots.py +7 -13
- biotite/structure/repair.py +2 -4
- biotite/structure/residues.py +13 -24
- biotite/structure/rings.py +335 -0
- biotite/structure/sasa.cp312-win_amd64.pyd +0 -0
- biotite/structure/sasa.pyx +2 -1
- biotite/structure/segments.py +69 -11
- biotite/structure/sequence.py +0 -1
- biotite/structure/sse.py +0 -2
- biotite/structure/superimpose.py +74 -62
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +12 -25
- biotite/structure/util.py +76 -4
- biotite/version.py +9 -4
- biotite/visualize.py +111 -1
- {biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/METADATA +6 -2
- {biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/RECORD +173 -143
- biotite/structure/info/ccd/README.rst +0 -8
- biotite/structure/info/ccd/amino_acids.txt +0 -1663
- biotite/structure/info/ccd/carbohydrates.txt +0 -1135
- biotite/structure/info/ccd/nucleotides.txt +0 -798
- {biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/WHEEL +0 -0
- {biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/licenses/LICENSE.rst +0 -0
biotite/sequence/seqtypes.py
CHANGED
|
@@ -4,10 +4,22 @@
|
|
|
4
4
|
|
|
5
5
|
__name__ = "biotite.sequence"
|
|
6
6
|
__author__ = "Patrick Kunzmann", "Thomas Nevolianis"
|
|
7
|
-
__all__ = [
|
|
8
|
-
|
|
7
|
+
__all__ = [
|
|
8
|
+
"GeneralSequence",
|
|
9
|
+
"NucleotideSequence",
|
|
10
|
+
"ProteinSequence",
|
|
11
|
+
"PositionalSequence",
|
|
12
|
+
"PurePositionalSequence",
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
from dataclasses import dataclass, field
|
|
9
16
|
import numpy as np
|
|
10
|
-
from biotite.sequence.alphabet import
|
|
17
|
+
from biotite.sequence.alphabet import (
|
|
18
|
+
Alphabet,
|
|
19
|
+
AlphabetError,
|
|
20
|
+
AlphabetMapper,
|
|
21
|
+
LetterAlphabet,
|
|
22
|
+
)
|
|
11
23
|
from biotite.sequence.sequence import Sequence
|
|
12
24
|
|
|
13
25
|
|
|
@@ -188,7 +200,6 @@ class NucleotideSequence(Sequence):
|
|
|
188
200
|
TGCGAA
|
|
189
201
|
>>> print(dna_seq.reverse().complement())
|
|
190
202
|
AAGCGT
|
|
191
|
-
|
|
192
203
|
"""
|
|
193
204
|
# Interpreting the sequence code of this object in the
|
|
194
205
|
# complementary alphabet gives the complementary symbols
|
|
@@ -214,7 +225,7 @@ class NucleotideSequence(Sequence):
|
|
|
214
225
|
complete : bool, optional
|
|
215
226
|
If true, the complete sequence is translated. In this case
|
|
216
227
|
the sequence length must be a multiple of 3.
|
|
217
|
-
Otherwise all ORFs are translated.
|
|
228
|
+
Otherwise all ORFs are translated.
|
|
218
229
|
codon_table : CodonTable, optional
|
|
219
230
|
The codon table to be used. By default the default table
|
|
220
231
|
will be used
|
|
@@ -224,7 +235,6 @@ class NucleotideSequence(Sequence):
|
|
|
224
235
|
even if the start codon codes for another amino acid.
|
|
225
236
|
Otherwise the translation starts with the amino acid
|
|
226
237
|
the codon codes for. Only applies, if `complete` is false.
|
|
227
|
-
(Default: False)
|
|
228
238
|
|
|
229
239
|
Returns
|
|
230
240
|
-------
|
|
@@ -254,7 +264,6 @@ class NucleotideSequence(Sequence):
|
|
|
254
264
|
... print(seq)
|
|
255
265
|
MML*
|
|
256
266
|
ML*
|
|
257
|
-
|
|
258
267
|
"""
|
|
259
268
|
if self._alphabet != NucleotideSequence.alphabet_unamb:
|
|
260
269
|
raise AlphabetError("Translation requires unambiguous alphabet")
|
|
@@ -574,6 +583,11 @@ class ProteinSequence(Sequence):
|
|
|
574
583
|
in the protein and the average isotopic mass of one water
|
|
575
584
|
molecule.
|
|
576
585
|
|
|
586
|
+
Parameters
|
|
587
|
+
----------
|
|
588
|
+
monoisotopic : bool
|
|
589
|
+
Use the mass of the most common isotope.
|
|
590
|
+
|
|
577
591
|
Returns
|
|
578
592
|
-------
|
|
579
593
|
weight : float
|
|
@@ -587,6 +601,120 @@ class ProteinSequence(Sequence):
|
|
|
587
601
|
|
|
588
602
|
if np.isnan(weight):
|
|
589
603
|
raise ValueError(
|
|
590
|
-
"Sequence contains ambiguous amino acids,
|
|
604
|
+
"Sequence contains ambiguous amino acids, cannot calculate weight"
|
|
591
605
|
)
|
|
592
606
|
return weight
|
|
607
|
+
|
|
608
|
+
|
|
609
|
+
class PositionalSequence(Sequence):
|
|
610
|
+
"""
|
|
611
|
+
A sequence where each symbol is associated with a position.
|
|
612
|
+
|
|
613
|
+
For each individual position the sequence contains a separate
|
|
614
|
+
:class:`PositionalSequence.Symbol`, encoded by a custom alphabet for this sequence.
|
|
615
|
+
In consequence the symbol code is the position in the sequence itself.
|
|
616
|
+
This is useful for aligning sequences based on a position-specific
|
|
617
|
+
substitution matrix.
|
|
618
|
+
|
|
619
|
+
Parameters
|
|
620
|
+
----------
|
|
621
|
+
original_sequence : seq.Sequence
|
|
622
|
+
The original sequence to create the positional sequence from.
|
|
623
|
+
"""
|
|
624
|
+
|
|
625
|
+
@dataclass(frozen=True)
|
|
626
|
+
class Symbol:
|
|
627
|
+
"""
|
|
628
|
+
Combination of a symbol and its position in a sequence.
|
|
629
|
+
|
|
630
|
+
Attributes
|
|
631
|
+
----------
|
|
632
|
+
original_alphabet : Alphabet
|
|
633
|
+
The original alphabet, where the symbol stems from.
|
|
634
|
+
original_code : int
|
|
635
|
+
The code of the original symbol in the original alphabet.
|
|
636
|
+
position : int
|
|
637
|
+
The 0-based position of the symbol in the sequence.
|
|
638
|
+
symbol : object
|
|
639
|
+
The symbol from the original alphabet.
|
|
640
|
+
|
|
641
|
+
See Also
|
|
642
|
+
--------
|
|
643
|
+
PositionalSequence
|
|
644
|
+
The sequence type containing :class:`PositionalSymbol` objects.
|
|
645
|
+
"""
|
|
646
|
+
|
|
647
|
+
original_alphabet: ...
|
|
648
|
+
original_code: ...
|
|
649
|
+
position: ...
|
|
650
|
+
symbol: ... = field(init=False)
|
|
651
|
+
|
|
652
|
+
def __post_init__(self):
|
|
653
|
+
sym = self.original_alphabet.decode(self.original_code)
|
|
654
|
+
super().__setattr__("symbol", sym)
|
|
655
|
+
|
|
656
|
+
def __str__(self):
|
|
657
|
+
return str(self.symbol)
|
|
658
|
+
|
|
659
|
+
def __init__(self, original_sequence):
|
|
660
|
+
self._orig_alphabet = original_sequence.get_alphabet()
|
|
661
|
+
self._alphabet = Alphabet(
|
|
662
|
+
[
|
|
663
|
+
PositionalSequence.Symbol(self._orig_alphabet, code, pos)
|
|
664
|
+
for pos, code in enumerate(original_sequence.code)
|
|
665
|
+
]
|
|
666
|
+
)
|
|
667
|
+
self.code = np.arange(
|
|
668
|
+
len(original_sequence), dtype=Sequence.dtype(len(self._alphabet))
|
|
669
|
+
)
|
|
670
|
+
|
|
671
|
+
def reconstruct(self):
|
|
672
|
+
"""
|
|
673
|
+
Reconstruct the original sequence from the positional sequence.
|
|
674
|
+
|
|
675
|
+
Returns
|
|
676
|
+
-------
|
|
677
|
+
original_sequence : GeneralSequence
|
|
678
|
+
The original sequence.
|
|
679
|
+
Although the actual type of the returned sequence is always a
|
|
680
|
+
:class:`GeneralSequence`, the alphabet and the symbols of the returned
|
|
681
|
+
sequence are equal to the original sequence.
|
|
682
|
+
"""
|
|
683
|
+
original_sequence = GeneralSequence(self._orig_alphabet)
|
|
684
|
+
original_sequence.code = np.array([sym.original_code for sym in self._alphabet])
|
|
685
|
+
return original_sequence
|
|
686
|
+
|
|
687
|
+
def get_alphabet(self):
|
|
688
|
+
return self._alphabet
|
|
689
|
+
|
|
690
|
+
def __str__(self) -> str:
|
|
691
|
+
return "".join([str(sym) for sym in self.symbols])
|
|
692
|
+
|
|
693
|
+
def __repr__(self):
|
|
694
|
+
return f"PositionalSequence({self.reconstruct()!r})"
|
|
695
|
+
|
|
696
|
+
|
|
697
|
+
class PurePositionalSequence(Sequence):
|
|
698
|
+
"""
|
|
699
|
+
An object of this class is a 'placeholder' sequence, where each symbol is the
|
|
700
|
+
position in the sequence itself.
|
|
701
|
+
|
|
702
|
+
This class is similar to :class:`PositionalSequence`, but the symbols are not
|
|
703
|
+
derived from an original sequence, but are the pure position.
|
|
704
|
+
Hence, there is no meaningful string representation of the sequence and its symbols.
|
|
705
|
+
|
|
706
|
+
Parameters
|
|
707
|
+
----------
|
|
708
|
+
length : int
|
|
709
|
+
The length of the sequence.
|
|
710
|
+
"""
|
|
711
|
+
|
|
712
|
+
def __init__(self, length):
|
|
713
|
+
self._alphabet = Alphabet(range(length))
|
|
714
|
+
self.code = np.arange(length, dtype=Sequence.dtype(length))
|
|
715
|
+
|
|
716
|
+
def get_alphabet(self):
|
|
717
|
+
return self._alphabet
|
|
718
|
+
|
|
719
|
+
def __repr__(self):
|
|
720
|
+
return f"PurePositionalSequence({len(self)})"
|
biotite/sequence/sequence.py
CHANGED
|
@@ -139,7 +139,6 @@ class Sequence(Copyable, metaclass=abc.ABCMeta):
|
|
|
139
139
|
>>> dna_seq_concat = dna_seq + dna_seq_rev
|
|
140
140
|
>>> print(dna_seq_concat)
|
|
141
141
|
ACGTAATGCA
|
|
142
|
-
|
|
143
142
|
"""
|
|
144
143
|
|
|
145
144
|
def __init__(self, sequence=()):
|
|
@@ -354,7 +353,7 @@ class Sequence(Copyable, metaclass=abc.ABCMeta):
|
|
|
354
353
|
|
|
355
354
|
Parameters
|
|
356
355
|
----------
|
|
357
|
-
|
|
356
|
+
alphabet_size : int
|
|
358
357
|
The size of the alphabet.
|
|
359
358
|
|
|
360
359
|
Returns
|
biotite/setup_ccd.py
ADDED
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
__author__ = "Patrick Kunzmann"
|
|
2
|
+
__all__ = []
|
|
3
|
+
|
|
4
|
+
import gzip
|
|
5
|
+
import logging
|
|
6
|
+
from collections import defaultdict
|
|
7
|
+
from io import StringIO
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
import numpy as np
|
|
10
|
+
import requests
|
|
11
|
+
from biotite.structure.io.pdbx import *
|
|
12
|
+
|
|
13
|
+
OUTPUT_CCD = Path(__file__).parent / "structure" / "info" / "components.bcif"
|
|
14
|
+
CCD_URL = "https://files.wwpdb.org/pub/pdb/data/monomers/components.cif.gz"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def concatenate_ccd(categories=None):
|
|
18
|
+
"""
|
|
19
|
+
Create the CCD in BinaryCIF format with each category contains the
|
|
20
|
+
data of all blocks.
|
|
21
|
+
|
|
22
|
+
Parameters
|
|
23
|
+
----------
|
|
24
|
+
categories : list of str, optional
|
|
25
|
+
The names of the categories to include.
|
|
26
|
+
By default, all categories from the CCD are included.
|
|
27
|
+
|
|
28
|
+
Returns
|
|
29
|
+
-------
|
|
30
|
+
compressed_file : BinaryCIFFile
|
|
31
|
+
The compressed CCD in BinaryCIF format.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
logging.info("Download and read CCD...")
|
|
35
|
+
ccd_cif_text = gzip.decompress(requests.get(CCD_URL).content).decode()
|
|
36
|
+
ccd_file = CIFFile.read(StringIO(ccd_cif_text))
|
|
37
|
+
|
|
38
|
+
compressed_block = BinaryCIFBlock()
|
|
39
|
+
if categories is None:
|
|
40
|
+
categories = _list_all_category_names(ccd_file)
|
|
41
|
+
for category_name in categories:
|
|
42
|
+
logging.info(f"Concatenate and compress '{category_name}' category...")
|
|
43
|
+
compressed_block[category_name] = compress(
|
|
44
|
+
_concatenate_blocks_into_category(ccd_file, category_name)
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
logging.info("Write concatenated CCD into BinaryCIF...")
|
|
48
|
+
compressed_file = BinaryCIFFile()
|
|
49
|
+
compressed_file["components"] = compressed_block
|
|
50
|
+
return compressed_file
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _concatenate_blocks_into_category(pdbx_file, category_name):
|
|
54
|
+
"""
|
|
55
|
+
Concatenate the given category from all blocks into a single
|
|
56
|
+
category.
|
|
57
|
+
|
|
58
|
+
Parameters
|
|
59
|
+
----------
|
|
60
|
+
pdbx_file : PDBxFile
|
|
61
|
+
The PDBx file, whose blocks should be concatenated.
|
|
62
|
+
category_name : str
|
|
63
|
+
The name of the category to concatenate.
|
|
64
|
+
|
|
65
|
+
Returns
|
|
66
|
+
-------
|
|
67
|
+
category : BinaryCIFCategory
|
|
68
|
+
The concatenated category.
|
|
69
|
+
"""
|
|
70
|
+
columns_names = _list_all_column_names(pdbx_file, category_name)
|
|
71
|
+
data_chunks = defaultdict(list)
|
|
72
|
+
mask_chunks = defaultdict(list)
|
|
73
|
+
for block in pdbx_file.values():
|
|
74
|
+
if category_name not in block:
|
|
75
|
+
continue
|
|
76
|
+
category = block[category_name]
|
|
77
|
+
for column_name in columns_names:
|
|
78
|
+
if column_name in category:
|
|
79
|
+
column = category[column_name]
|
|
80
|
+
data_chunks[column_name].append(column.data.array)
|
|
81
|
+
if column.mask is not None:
|
|
82
|
+
mask_chunks[column_name].append(column.mask.array)
|
|
83
|
+
else:
|
|
84
|
+
mask_chunks[column_name].append(
|
|
85
|
+
np.full(category.row_count, MaskValue.PRESENT, dtype=np.uint8)
|
|
86
|
+
)
|
|
87
|
+
else:
|
|
88
|
+
# Column is missing in this block
|
|
89
|
+
# -> handle it as data masked as 'missing'
|
|
90
|
+
data_chunks[column_name].append(
|
|
91
|
+
# For now all arrays are of type string anyway,
|
|
92
|
+
# as they are read from a CIF file
|
|
93
|
+
np.full(category.row_count, "", dtype="U1")
|
|
94
|
+
)
|
|
95
|
+
mask_chunks[column_name].append(
|
|
96
|
+
np.full(category.row_count, MaskValue.MISSING, dtype=np.uint8)
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
bcif_columns = {}
|
|
100
|
+
for col_name in columns_names:
|
|
101
|
+
data = np.concatenate(data_chunks[col_name])
|
|
102
|
+
mask = np.concatenate(mask_chunks[col_name])
|
|
103
|
+
data = _into_fitting_type(data, mask)
|
|
104
|
+
if np.all(mask == MaskValue.PRESENT):
|
|
105
|
+
mask = None
|
|
106
|
+
bcif_columns[col_name] = BinaryCIFColumn(data, mask)
|
|
107
|
+
return BinaryCIFCategory(bcif_columns)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _list_all_column_names(pdbx_file, category_name):
|
|
111
|
+
"""
|
|
112
|
+
Get all columns that exist in any block for a given category.
|
|
113
|
+
|
|
114
|
+
Parameters
|
|
115
|
+
----------
|
|
116
|
+
pdbx_file : PDBxFile
|
|
117
|
+
The PDBx file to search in for the columns.
|
|
118
|
+
category_name : str
|
|
119
|
+
The name of the category to search in.
|
|
120
|
+
|
|
121
|
+
Returns
|
|
122
|
+
-------
|
|
123
|
+
columns_names : list of str
|
|
124
|
+
The names of the columns.
|
|
125
|
+
"""
|
|
126
|
+
columns_names = set()
|
|
127
|
+
for block in pdbx_file.values():
|
|
128
|
+
if category_name in block:
|
|
129
|
+
columns_names.update(block[category_name].keys())
|
|
130
|
+
return sorted(columns_names)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _list_all_category_names(pdbx_file):
|
|
134
|
+
"""
|
|
135
|
+
Get all categories that exist in any block.
|
|
136
|
+
|
|
137
|
+
Parameters
|
|
138
|
+
----------
|
|
139
|
+
pdbx_file : PDBxFile
|
|
140
|
+
The PDBx file to search in for the columns.
|
|
141
|
+
|
|
142
|
+
Returns
|
|
143
|
+
-------
|
|
144
|
+
columns_names : list of str
|
|
145
|
+
The names of the columns.
|
|
146
|
+
"""
|
|
147
|
+
category_names = set()
|
|
148
|
+
for block in pdbx_file.values():
|
|
149
|
+
category_names.update(block.keys())
|
|
150
|
+
return sorted(category_names)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _into_fitting_type(string_array, mask):
|
|
154
|
+
"""
|
|
155
|
+
Try to find a numeric type for a string ndarray, if possible.
|
|
156
|
+
|
|
157
|
+
Parameters
|
|
158
|
+
----------
|
|
159
|
+
string_array : ndarray, dtype=string
|
|
160
|
+
The array to convert.
|
|
161
|
+
mask : ndarray, dtype=uint8
|
|
162
|
+
Only values in `string_array` where the mask is ``MaskValue.PRESENT`` are
|
|
163
|
+
considered for type conversion.
|
|
164
|
+
|
|
165
|
+
Returns
|
|
166
|
+
-------
|
|
167
|
+
array : ndarray
|
|
168
|
+
The array converted into an appropriate dtype.
|
|
169
|
+
"""
|
|
170
|
+
mask = mask == MaskValue.PRESENT
|
|
171
|
+
# Only try to find an appropriate dtype for unmasked values
|
|
172
|
+
values = string_array[mask]
|
|
173
|
+
try:
|
|
174
|
+
# Try to fit into integer type
|
|
175
|
+
values = values.astype(int)
|
|
176
|
+
except ValueError:
|
|
177
|
+
try:
|
|
178
|
+
# Try to fit into float type
|
|
179
|
+
values = values.astype(float)
|
|
180
|
+
except ValueError:
|
|
181
|
+
# Keep string type
|
|
182
|
+
pass
|
|
183
|
+
array = np.zeros(string_array.shape, dtype=values.dtype)
|
|
184
|
+
array[mask] = values
|
|
185
|
+
return array
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def main():
|
|
189
|
+
logging.basicConfig(level=logging.INFO, format="%(levelname)s:%(message)s")
|
|
190
|
+
OUTPUT_CCD.parent.mkdir(parents=True, exist_ok=True)
|
|
191
|
+
|
|
192
|
+
compressed_ccd = concatenate_ccd(["chem_comp", "chem_comp_atom", "chem_comp_bond"])
|
|
193
|
+
compressed_ccd.write(OUTPUT_CCD)
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
if __name__ == "__main__":
|
|
197
|
+
main()
|
biotite/structure/__init__.py
CHANGED
|
@@ -57,14 +57,15 @@ The annotation arrays can be accessed either via the method
|
|
|
57
57
|
The following annotation categories are optionally used by some
|
|
58
58
|
functions:
|
|
59
59
|
|
|
60
|
-
========= =========== =================
|
|
60
|
+
========= =========== ================= =========================================
|
|
61
61
|
Category Type Examples Description
|
|
62
|
-
========= =========== =================
|
|
62
|
+
========= =========== ================= =========================================
|
|
63
63
|
atom_id int 1,2,3, ... Atom serial number
|
|
64
64
|
b_factor float 0.9, 12.3, ... Temperature factor
|
|
65
65
|
occupancy float .1, .3, .9, ... Occupancy
|
|
66
66
|
charge int -2,-1,0,1,2, ... Electric charge of the atom
|
|
67
|
-
|
|
67
|
+
sym_id string '1','2','3', ... Symmetry ID for assemblies/symmetry mates
|
|
68
|
+
========= =========== ================= =========================================
|
|
68
69
|
|
|
69
70
|
For each type, the attributes can be accessed directly.
|
|
70
71
|
Both :class:`AtomArray` and :class:`AtomArrayStack` support
|
|
@@ -124,9 +125,11 @@ from .pseudoknots import *
|
|
|
124
125
|
from .rdf import *
|
|
125
126
|
from .repair import *
|
|
126
127
|
from .residues import *
|
|
128
|
+
from .rings import *
|
|
127
129
|
from .sasa import *
|
|
128
130
|
from .sequence import *
|
|
129
131
|
from .sse import *
|
|
130
132
|
from .superimpose import *
|
|
133
|
+
from .tm import *
|
|
131
134
|
from .transform import *
|
|
132
135
|
# util and segments are used internally
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
A subpackage for converting structures to structural alphabet sequences.
|
|
7
|
+
|
|
8
|
+
Structural alphabets represent the local geometry of each residue in a structure as
|
|
9
|
+
symbol in a sequence.
|
|
10
|
+
This allows using sequence-based functionality from :mod:`biotite.sequence` on
|
|
11
|
+
structural data.
|
|
12
|
+
|
|
13
|
+
For each supported structural alphabet, this subpackage provides a conversion function
|
|
14
|
+
that converts each chain of a given structure into a :class:`Sequence` object from the
|
|
15
|
+
respective structural alphabet.
|
|
16
|
+
|
|
17
|
+
Note that the structural alphabets use lower-case letters as symbols, in order to
|
|
18
|
+
distinguish them better from the nucleotide and amino acid alphabets.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
__name__ = "biotite.structure.alphabet"
|
|
22
|
+
__author__ = "Martin Larralde, Patrick Kunzmann"
|
|
23
|
+
|
|
24
|
+
from .i3d import *
|
|
25
|
+
from .pb import *
|