biotite 1.0.1__cp312-cp312-macosx_11_0_arm64.whl → 1.2.0__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (177) hide show
  1. biotite/application/application.py +3 -3
  2. biotite/application/autodock/app.py +1 -1
  3. biotite/application/blast/webapp.py +1 -1
  4. biotite/application/clustalo/app.py +1 -1
  5. biotite/application/dssp/app.py +13 -3
  6. biotite/application/localapp.py +36 -2
  7. biotite/application/msaapp.py +10 -10
  8. biotite/application/muscle/app3.py +5 -18
  9. biotite/application/muscle/app5.py +5 -5
  10. biotite/application/sra/app.py +0 -5
  11. biotite/application/util.py +22 -2
  12. biotite/application/viennarna/rnaalifold.py +8 -8
  13. biotite/application/viennarna/rnaplot.py +9 -3
  14. biotite/application/viennarna/util.py +1 -1
  15. biotite/application/webapp.py +1 -1
  16. biotite/database/afdb/__init__.py +12 -0
  17. biotite/database/afdb/download.py +191 -0
  18. biotite/database/entrez/dbnames.py +10 -0
  19. biotite/database/entrez/download.py +9 -10
  20. biotite/database/entrez/key.py +1 -1
  21. biotite/database/entrez/query.py +5 -4
  22. biotite/database/pubchem/download.py +6 -6
  23. biotite/database/pubchem/error.py +10 -0
  24. biotite/database/pubchem/query.py +12 -23
  25. biotite/database/rcsb/download.py +3 -2
  26. biotite/database/rcsb/query.py +8 -9
  27. biotite/database/uniprot/check.py +22 -17
  28. biotite/database/uniprot/download.py +3 -6
  29. biotite/database/uniprot/query.py +4 -5
  30. biotite/file.py +14 -2
  31. biotite/interface/__init__.py +19 -0
  32. biotite/interface/openmm/__init__.py +16 -0
  33. biotite/interface/openmm/state.py +93 -0
  34. biotite/interface/openmm/system.py +227 -0
  35. biotite/interface/pymol/__init__.py +198 -0
  36. biotite/interface/pymol/cgo.py +346 -0
  37. biotite/interface/pymol/convert.py +185 -0
  38. biotite/interface/pymol/display.py +267 -0
  39. biotite/interface/pymol/object.py +1226 -0
  40. biotite/interface/pymol/shapes.py +178 -0
  41. biotite/interface/pymol/startup.py +169 -0
  42. biotite/interface/rdkit/__init__.py +15 -0
  43. biotite/interface/rdkit/mol.py +490 -0
  44. biotite/interface/version.py +71 -0
  45. biotite/interface/warning.py +19 -0
  46. biotite/sequence/align/__init__.py +0 -4
  47. biotite/sequence/align/alignment.py +49 -14
  48. biotite/sequence/align/banded.cpython-312-darwin.so +0 -0
  49. biotite/sequence/align/banded.pyx +26 -26
  50. biotite/sequence/align/cigar.py +2 -2
  51. biotite/sequence/align/kmeralphabet.cpython-312-darwin.so +0 -0
  52. biotite/sequence/align/kmeralphabet.pyx +19 -2
  53. biotite/sequence/align/kmersimilarity.cpython-312-darwin.so +0 -0
  54. biotite/sequence/align/kmertable.cpython-312-darwin.so +0 -0
  55. biotite/sequence/align/kmertable.pyx +58 -48
  56. biotite/sequence/align/localgapped.cpython-312-darwin.so +0 -0
  57. biotite/sequence/align/localgapped.pyx +47 -47
  58. biotite/sequence/align/localungapped.cpython-312-darwin.so +0 -0
  59. biotite/sequence/align/localungapped.pyx +10 -10
  60. biotite/sequence/align/matrix.py +284 -57
  61. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  62. biotite/sequence/align/matrix_data/PB.license +21 -0
  63. biotite/sequence/align/matrix_data/PB.mat +18 -0
  64. biotite/sequence/align/multiple.cpython-312-darwin.so +0 -0
  65. biotite/sequence/align/pairwise.cpython-312-darwin.so +0 -0
  66. biotite/sequence/align/pairwise.pyx +35 -35
  67. biotite/sequence/align/permutation.cpython-312-darwin.so +0 -0
  68. biotite/sequence/align/selector.cpython-312-darwin.so +0 -0
  69. biotite/sequence/align/selector.pyx +2 -2
  70. biotite/sequence/align/statistics.py +1 -1
  71. biotite/sequence/align/tracetable.cpython-312-darwin.so +0 -0
  72. biotite/sequence/alphabet.py +5 -2
  73. biotite/sequence/annotation.py +19 -13
  74. biotite/sequence/codec.cpython-312-darwin.so +0 -0
  75. biotite/sequence/codon.py +1 -2
  76. biotite/sequence/graphics/alignment.py +25 -39
  77. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  78. biotite/sequence/graphics/color_schemes/pb_flower.json +2 -1
  79. biotite/sequence/graphics/colorschemes.py +44 -11
  80. biotite/sequence/graphics/dendrogram.py +4 -2
  81. biotite/sequence/graphics/features.py +2 -2
  82. biotite/sequence/graphics/logo.py +10 -12
  83. biotite/sequence/io/fasta/convert.py +1 -2
  84. biotite/sequence/io/fasta/file.py +1 -1
  85. biotite/sequence/io/fastq/file.py +3 -3
  86. biotite/sequence/io/genbank/file.py +3 -3
  87. biotite/sequence/io/genbank/sequence.py +2 -0
  88. biotite/sequence/io/gff/convert.py +1 -1
  89. biotite/sequence/io/gff/file.py +1 -2
  90. biotite/sequence/phylo/nj.cpython-312-darwin.so +0 -0
  91. biotite/sequence/phylo/tree.cpython-312-darwin.so +0 -0
  92. biotite/sequence/phylo/upgma.cpython-312-darwin.so +0 -0
  93. biotite/sequence/profile.py +105 -29
  94. biotite/sequence/search.py +0 -1
  95. biotite/sequence/seqtypes.py +136 -8
  96. biotite/sequence/sequence.py +1 -2
  97. biotite/setup_ccd.py +197 -0
  98. biotite/structure/__init__.py +6 -3
  99. biotite/structure/alphabet/__init__.py +25 -0
  100. biotite/structure/alphabet/encoder.py +332 -0
  101. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  102. biotite/structure/alphabet/i3d.py +109 -0
  103. biotite/structure/alphabet/layers.py +86 -0
  104. biotite/structure/alphabet/pb.license +21 -0
  105. biotite/structure/alphabet/pb.py +170 -0
  106. biotite/structure/alphabet/unkerasify.py +128 -0
  107. biotite/structure/atoms.py +163 -66
  108. biotite/structure/basepairs.py +26 -26
  109. biotite/structure/bonds.cpython-312-darwin.so +0 -0
  110. biotite/structure/bonds.pyx +79 -25
  111. biotite/structure/box.py +19 -21
  112. biotite/structure/celllist.cpython-312-darwin.so +0 -0
  113. biotite/structure/celllist.pyx +83 -67
  114. biotite/structure/chains.py +5 -37
  115. biotite/structure/charges.cpython-312-darwin.so +0 -0
  116. biotite/structure/compare.py +420 -13
  117. biotite/structure/density.py +1 -1
  118. biotite/structure/dotbracket.py +27 -28
  119. biotite/structure/filter.py +8 -8
  120. biotite/structure/geometry.py +74 -127
  121. biotite/structure/hbond.py +17 -19
  122. biotite/structure/info/__init__.py +1 -0
  123. biotite/structure/info/atoms.py +24 -15
  124. biotite/structure/info/bonds.py +12 -6
  125. biotite/structure/info/ccd.py +125 -34
  126. biotite/structure/info/{ccd/components.bcif → components.bcif} +0 -0
  127. biotite/structure/info/groups.py +62 -19
  128. biotite/structure/info/masses.py +9 -6
  129. biotite/structure/info/misc.py +15 -22
  130. biotite/structure/info/radii.py +92 -22
  131. biotite/structure/info/standardize.py +4 -4
  132. biotite/structure/integrity.py +4 -6
  133. biotite/structure/io/general.py +2 -2
  134. biotite/structure/io/gro/file.py +8 -9
  135. biotite/structure/io/mol/convert.py +1 -1
  136. biotite/structure/io/mol/ctab.py +33 -28
  137. biotite/structure/io/mol/mol.py +1 -1
  138. biotite/structure/io/mol/sdf.py +80 -53
  139. biotite/structure/io/pdb/convert.py +4 -3
  140. biotite/structure/io/pdb/file.py +85 -25
  141. biotite/structure/io/pdb/hybrid36.cpython-312-darwin.so +0 -0
  142. biotite/structure/io/pdbqt/file.py +36 -36
  143. biotite/structure/io/pdbx/__init__.py +1 -0
  144. biotite/structure/io/pdbx/bcif.py +54 -15
  145. biotite/structure/io/pdbx/cif.py +92 -66
  146. biotite/structure/io/pdbx/component.py +15 -4
  147. biotite/structure/io/pdbx/compress.py +321 -0
  148. biotite/structure/io/pdbx/convert.py +410 -75
  149. biotite/structure/io/pdbx/encoding.cpython-312-darwin.so +0 -0
  150. biotite/structure/io/pdbx/encoding.pyx +98 -17
  151. biotite/structure/io/trajfile.py +9 -6
  152. biotite/structure/io/util.py +38 -0
  153. biotite/structure/mechanics.py +0 -1
  154. biotite/structure/molecules.py +141 -156
  155. biotite/structure/pseudoknots.py +7 -13
  156. biotite/structure/repair.py +2 -4
  157. biotite/structure/residues.py +13 -24
  158. biotite/structure/rings.py +335 -0
  159. biotite/structure/sasa.cpython-312-darwin.so +0 -0
  160. biotite/structure/sasa.pyx +2 -1
  161. biotite/structure/segments.py +69 -11
  162. biotite/structure/sequence.py +0 -1
  163. biotite/structure/sse.py +0 -2
  164. biotite/structure/superimpose.py +74 -62
  165. biotite/structure/tm.py +581 -0
  166. biotite/structure/transform.py +12 -25
  167. biotite/structure/util.py +76 -4
  168. biotite/version.py +9 -4
  169. biotite/visualize.py +111 -1
  170. {biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/METADATA +6 -2
  171. {biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/RECORD +173 -143
  172. biotite/structure/info/ccd/README.rst +0 -8
  173. biotite/structure/info/ccd/amino_acids.txt +0 -1663
  174. biotite/structure/info/ccd/carbohydrates.txt +0 -1135
  175. biotite/structure/info/ccd/nucleotides.txt +0 -798
  176. {biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/WHEEL +0 -0
  177. {biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/licenses/LICENSE.rst +0 -0
@@ -4,10 +4,22 @@
4
4
 
5
5
  __name__ = "biotite.sequence"
6
6
  __author__ = "Patrick Kunzmann", "Thomas Nevolianis"
7
- __all__ = ["GeneralSequence", "NucleotideSequence", "ProteinSequence"]
8
-
7
+ __all__ = [
8
+ "GeneralSequence",
9
+ "NucleotideSequence",
10
+ "ProteinSequence",
11
+ "PositionalSequence",
12
+ "PurePositionalSequence",
13
+ ]
14
+
15
+ from dataclasses import dataclass, field
9
16
  import numpy as np
10
- from biotite.sequence.alphabet import AlphabetError, AlphabetMapper, LetterAlphabet
17
+ from biotite.sequence.alphabet import (
18
+ Alphabet,
19
+ AlphabetError,
20
+ AlphabetMapper,
21
+ LetterAlphabet,
22
+ )
11
23
  from biotite.sequence.sequence import Sequence
12
24
 
13
25
 
@@ -188,7 +200,6 @@ class NucleotideSequence(Sequence):
188
200
  TGCGAA
189
201
  >>> print(dna_seq.reverse().complement())
190
202
  AAGCGT
191
-
192
203
  """
193
204
  # Interpreting the sequence code of this object in the
194
205
  # complementary alphabet gives the complementary symbols
@@ -214,7 +225,7 @@ class NucleotideSequence(Sequence):
214
225
  complete : bool, optional
215
226
  If true, the complete sequence is translated. In this case
216
227
  the sequence length must be a multiple of 3.
217
- Otherwise all ORFs are translated. (Default: False)
228
+ Otherwise all ORFs are translated.
218
229
  codon_table : CodonTable, optional
219
230
  The codon table to be used. By default the default table
220
231
  will be used
@@ -224,7 +235,6 @@ class NucleotideSequence(Sequence):
224
235
  even if the start codon codes for another amino acid.
225
236
  Otherwise the translation starts with the amino acid
226
237
  the codon codes for. Only applies, if `complete` is false.
227
- (Default: False)
228
238
 
229
239
  Returns
230
240
  -------
@@ -254,7 +264,6 @@ class NucleotideSequence(Sequence):
254
264
  ... print(seq)
255
265
  MML*
256
266
  ML*
257
-
258
267
  """
259
268
  if self._alphabet != NucleotideSequence.alphabet_unamb:
260
269
  raise AlphabetError("Translation requires unambiguous alphabet")
@@ -574,6 +583,11 @@ class ProteinSequence(Sequence):
574
583
  in the protein and the average isotopic mass of one water
575
584
  molecule.
576
585
 
586
+ Parameters
587
+ ----------
588
+ monoisotopic : bool
589
+ Use the mass of the most common isotope.
590
+
577
591
  Returns
578
592
  -------
579
593
  weight : float
@@ -587,6 +601,120 @@ class ProteinSequence(Sequence):
587
601
 
588
602
  if np.isnan(weight):
589
603
  raise ValueError(
590
- "Sequence contains ambiguous amino acids, " "cannot calculate weight"
604
+ "Sequence contains ambiguous amino acids, cannot calculate weight"
591
605
  )
592
606
  return weight
607
+
608
+
609
+ class PositionalSequence(Sequence):
610
+ """
611
+ A sequence where each symbol is associated with a position.
612
+
613
+ For each individual position the sequence contains a separate
614
+ :class:`PositionalSequence.Symbol`, encoded by a custom alphabet for this sequence.
615
+ In consequence the symbol code is the position in the sequence itself.
616
+ This is useful for aligning sequences based on a position-specific
617
+ substitution matrix.
618
+
619
+ Parameters
620
+ ----------
621
+ original_sequence : seq.Sequence
622
+ The original sequence to create the positional sequence from.
623
+ """
624
+
625
+ @dataclass(frozen=True)
626
+ class Symbol:
627
+ """
628
+ Combination of a symbol and its position in a sequence.
629
+
630
+ Attributes
631
+ ----------
632
+ original_alphabet : Alphabet
633
+ The original alphabet, where the symbol stems from.
634
+ original_code : int
635
+ The code of the original symbol in the original alphabet.
636
+ position : int
637
+ The 0-based position of the symbol in the sequence.
638
+ symbol : object
639
+ The symbol from the original alphabet.
640
+
641
+ See Also
642
+ --------
643
+ PositionalSequence
644
+ The sequence type containing :class:`PositionalSymbol` objects.
645
+ """
646
+
647
+ original_alphabet: ...
648
+ original_code: ...
649
+ position: ...
650
+ symbol: ... = field(init=False)
651
+
652
+ def __post_init__(self):
653
+ sym = self.original_alphabet.decode(self.original_code)
654
+ super().__setattr__("symbol", sym)
655
+
656
+ def __str__(self):
657
+ return str(self.symbol)
658
+
659
+ def __init__(self, original_sequence):
660
+ self._orig_alphabet = original_sequence.get_alphabet()
661
+ self._alphabet = Alphabet(
662
+ [
663
+ PositionalSequence.Symbol(self._orig_alphabet, code, pos)
664
+ for pos, code in enumerate(original_sequence.code)
665
+ ]
666
+ )
667
+ self.code = np.arange(
668
+ len(original_sequence), dtype=Sequence.dtype(len(self._alphabet))
669
+ )
670
+
671
+ def reconstruct(self):
672
+ """
673
+ Reconstruct the original sequence from the positional sequence.
674
+
675
+ Returns
676
+ -------
677
+ original_sequence : GeneralSequence
678
+ The original sequence.
679
+ Although the actual type of the returned sequence is always a
680
+ :class:`GeneralSequence`, the alphabet and the symbols of the returned
681
+ sequence are equal to the original sequence.
682
+ """
683
+ original_sequence = GeneralSequence(self._orig_alphabet)
684
+ original_sequence.code = np.array([sym.original_code for sym in self._alphabet])
685
+ return original_sequence
686
+
687
+ def get_alphabet(self):
688
+ return self._alphabet
689
+
690
+ def __str__(self) -> str:
691
+ return "".join([str(sym) for sym in self.symbols])
692
+
693
+ def __repr__(self):
694
+ return f"PositionalSequence({self.reconstruct()!r})"
695
+
696
+
697
+ class PurePositionalSequence(Sequence):
698
+ """
699
+ An object of this class is a 'placeholder' sequence, where each symbol is the
700
+ position in the sequence itself.
701
+
702
+ This class is similar to :class:`PositionalSequence`, but the symbols are not
703
+ derived from an original sequence, but are the pure position.
704
+ Hence, there is no meaningful string representation of the sequence and its symbols.
705
+
706
+ Parameters
707
+ ----------
708
+ length : int
709
+ The length of the sequence.
710
+ """
711
+
712
+ def __init__(self, length):
713
+ self._alphabet = Alphabet(range(length))
714
+ self.code = np.arange(length, dtype=Sequence.dtype(length))
715
+
716
+ def get_alphabet(self):
717
+ return self._alphabet
718
+
719
+ def __repr__(self):
720
+ return f"PurePositionalSequence({len(self)})"
@@ -139,7 +139,6 @@ class Sequence(Copyable, metaclass=abc.ABCMeta):
139
139
  >>> dna_seq_concat = dna_seq + dna_seq_rev
140
140
  >>> print(dna_seq_concat)
141
141
  ACGTAATGCA
142
-
143
142
  """
144
143
 
145
144
  def __init__(self, sequence=()):
@@ -354,7 +353,7 @@ class Sequence(Copyable, metaclass=abc.ABCMeta):
354
353
 
355
354
  Parameters
356
355
  ----------
357
- alpahabet_size : int
356
+ alphabet_size : int
358
357
  The size of the alphabet.
359
358
 
360
359
  Returns
biotite/setup_ccd.py ADDED
@@ -0,0 +1,197 @@
1
+ __author__ = "Patrick Kunzmann"
2
+ __all__ = []
3
+
4
+ import gzip
5
+ import logging
6
+ from collections import defaultdict
7
+ from io import StringIO
8
+ from pathlib import Path
9
+ import numpy as np
10
+ import requests
11
+ from biotite.structure.io.pdbx import *
12
+
13
+ OUTPUT_CCD = Path(__file__).parent / "structure" / "info" / "components.bcif"
14
+ CCD_URL = "https://files.wwpdb.org/pub/pdb/data/monomers/components.cif.gz"
15
+
16
+
17
+ def concatenate_ccd(categories=None):
18
+ """
19
+ Create the CCD in BinaryCIF format with each category contains the
20
+ data of all blocks.
21
+
22
+ Parameters
23
+ ----------
24
+ categories : list of str, optional
25
+ The names of the categories to include.
26
+ By default, all categories from the CCD are included.
27
+
28
+ Returns
29
+ -------
30
+ compressed_file : BinaryCIFFile
31
+ The compressed CCD in BinaryCIF format.
32
+ """
33
+
34
+ logging.info("Download and read CCD...")
35
+ ccd_cif_text = gzip.decompress(requests.get(CCD_URL).content).decode()
36
+ ccd_file = CIFFile.read(StringIO(ccd_cif_text))
37
+
38
+ compressed_block = BinaryCIFBlock()
39
+ if categories is None:
40
+ categories = _list_all_category_names(ccd_file)
41
+ for category_name in categories:
42
+ logging.info(f"Concatenate and compress '{category_name}' category...")
43
+ compressed_block[category_name] = compress(
44
+ _concatenate_blocks_into_category(ccd_file, category_name)
45
+ )
46
+
47
+ logging.info("Write concatenated CCD into BinaryCIF...")
48
+ compressed_file = BinaryCIFFile()
49
+ compressed_file["components"] = compressed_block
50
+ return compressed_file
51
+
52
+
53
+ def _concatenate_blocks_into_category(pdbx_file, category_name):
54
+ """
55
+ Concatenate the given category from all blocks into a single
56
+ category.
57
+
58
+ Parameters
59
+ ----------
60
+ pdbx_file : PDBxFile
61
+ The PDBx file, whose blocks should be concatenated.
62
+ category_name : str
63
+ The name of the category to concatenate.
64
+
65
+ Returns
66
+ -------
67
+ category : BinaryCIFCategory
68
+ The concatenated category.
69
+ """
70
+ columns_names = _list_all_column_names(pdbx_file, category_name)
71
+ data_chunks = defaultdict(list)
72
+ mask_chunks = defaultdict(list)
73
+ for block in pdbx_file.values():
74
+ if category_name not in block:
75
+ continue
76
+ category = block[category_name]
77
+ for column_name in columns_names:
78
+ if column_name in category:
79
+ column = category[column_name]
80
+ data_chunks[column_name].append(column.data.array)
81
+ if column.mask is not None:
82
+ mask_chunks[column_name].append(column.mask.array)
83
+ else:
84
+ mask_chunks[column_name].append(
85
+ np.full(category.row_count, MaskValue.PRESENT, dtype=np.uint8)
86
+ )
87
+ else:
88
+ # Column is missing in this block
89
+ # -> handle it as data masked as 'missing'
90
+ data_chunks[column_name].append(
91
+ # For now all arrays are of type string anyway,
92
+ # as they are read from a CIF file
93
+ np.full(category.row_count, "", dtype="U1")
94
+ )
95
+ mask_chunks[column_name].append(
96
+ np.full(category.row_count, MaskValue.MISSING, dtype=np.uint8)
97
+ )
98
+
99
+ bcif_columns = {}
100
+ for col_name in columns_names:
101
+ data = np.concatenate(data_chunks[col_name])
102
+ mask = np.concatenate(mask_chunks[col_name])
103
+ data = _into_fitting_type(data, mask)
104
+ if np.all(mask == MaskValue.PRESENT):
105
+ mask = None
106
+ bcif_columns[col_name] = BinaryCIFColumn(data, mask)
107
+ return BinaryCIFCategory(bcif_columns)
108
+
109
+
110
+ def _list_all_column_names(pdbx_file, category_name):
111
+ """
112
+ Get all columns that exist in any block for a given category.
113
+
114
+ Parameters
115
+ ----------
116
+ pdbx_file : PDBxFile
117
+ The PDBx file to search in for the columns.
118
+ category_name : str
119
+ The name of the category to search in.
120
+
121
+ Returns
122
+ -------
123
+ columns_names : list of str
124
+ The names of the columns.
125
+ """
126
+ columns_names = set()
127
+ for block in pdbx_file.values():
128
+ if category_name in block:
129
+ columns_names.update(block[category_name].keys())
130
+ return sorted(columns_names)
131
+
132
+
133
+ def _list_all_category_names(pdbx_file):
134
+ """
135
+ Get all categories that exist in any block.
136
+
137
+ Parameters
138
+ ----------
139
+ pdbx_file : PDBxFile
140
+ The PDBx file to search in for the columns.
141
+
142
+ Returns
143
+ -------
144
+ columns_names : list of str
145
+ The names of the columns.
146
+ """
147
+ category_names = set()
148
+ for block in pdbx_file.values():
149
+ category_names.update(block.keys())
150
+ return sorted(category_names)
151
+
152
+
153
+ def _into_fitting_type(string_array, mask):
154
+ """
155
+ Try to find a numeric type for a string ndarray, if possible.
156
+
157
+ Parameters
158
+ ----------
159
+ string_array : ndarray, dtype=string
160
+ The array to convert.
161
+ mask : ndarray, dtype=uint8
162
+ Only values in `string_array` where the mask is ``MaskValue.PRESENT`` are
163
+ considered for type conversion.
164
+
165
+ Returns
166
+ -------
167
+ array : ndarray
168
+ The array converted into an appropriate dtype.
169
+ """
170
+ mask = mask == MaskValue.PRESENT
171
+ # Only try to find an appropriate dtype for unmasked values
172
+ values = string_array[mask]
173
+ try:
174
+ # Try to fit into integer type
175
+ values = values.astype(int)
176
+ except ValueError:
177
+ try:
178
+ # Try to fit into float type
179
+ values = values.astype(float)
180
+ except ValueError:
181
+ # Keep string type
182
+ pass
183
+ array = np.zeros(string_array.shape, dtype=values.dtype)
184
+ array[mask] = values
185
+ return array
186
+
187
+
188
+ def main():
189
+ logging.basicConfig(level=logging.INFO, format="%(levelname)s:%(message)s")
190
+ OUTPUT_CCD.parent.mkdir(parents=True, exist_ok=True)
191
+
192
+ compressed_ccd = concatenate_ccd(["chem_comp", "chem_comp_atom", "chem_comp_bond"])
193
+ compressed_ccd.write(OUTPUT_CCD)
194
+
195
+
196
+ if __name__ == "__main__":
197
+ main()
@@ -57,14 +57,15 @@ The annotation arrays can be accessed either via the method
57
57
  The following annotation categories are optionally used by some
58
58
  functions:
59
59
 
60
- ========= =========== ================= ============================
60
+ ========= =========== ================= =========================================
61
61
  Category Type Examples Description
62
- ========= =========== ================= ============================
62
+ ========= =========== ================= =========================================
63
63
  atom_id int 1,2,3, ... Atom serial number
64
64
  b_factor float 0.9, 12.3, ... Temperature factor
65
65
  occupancy float .1, .3, .9, ... Occupancy
66
66
  charge int -2,-1,0,1,2, ... Electric charge of the atom
67
- ========= =========== ================= ============================
67
+ sym_id string '1','2','3', ... Symmetry ID for assemblies/symmetry mates
68
+ ========= =========== ================= =========================================
68
69
 
69
70
  For each type, the attributes can be accessed directly.
70
71
  Both :class:`AtomArray` and :class:`AtomArrayStack` support
@@ -124,9 +125,11 @@ from .pseudoknots import *
124
125
  from .rdf import *
125
126
  from .repair import *
126
127
  from .residues import *
128
+ from .rings import *
127
129
  from .sasa import *
128
130
  from .sequence import *
129
131
  from .sse import *
130
132
  from .superimpose import *
133
+ from .tm import *
131
134
  from .transform import *
132
135
  # util and segments are used internally
@@ -0,0 +1,25 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ A subpackage for converting structures to structural alphabet sequences.
7
+
8
+ Structural alphabets represent the local geometry of each residue in a structure as
9
+ symbol in a sequence.
10
+ This allows using sequence-based functionality from :mod:`biotite.sequence` on
11
+ structural data.
12
+
13
+ For each supported structural alphabet, this subpackage provides a conversion function
14
+ that converts each chain of a given structure into a :class:`Sequence` object from the
15
+ respective structural alphabet.
16
+
17
+ Note that the structural alphabets use lower-case letters as symbols, in order to
18
+ distinguish them better from the nucleotide and amino acid alphabets.
19
+ """
20
+
21
+ __name__ = "biotite.structure.alphabet"
22
+ __author__ = "Martin Larralde, Patrick Kunzmann"
23
+
24
+ from .i3d import *
25
+ from .pb import *