biotite 0.41.2__cp312-cp312-win_amd64.whl → 1.0.1__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (205) hide show
  1. biotite/__init__.py +2 -3
  2. biotite/application/__init__.py +1 -1
  3. biotite/application/application.py +20 -10
  4. biotite/application/autodock/__init__.py +1 -1
  5. biotite/application/autodock/app.py +74 -79
  6. biotite/application/blast/__init__.py +1 -1
  7. biotite/application/blast/alignment.py +19 -10
  8. biotite/application/blast/webapp.py +92 -85
  9. biotite/application/clustalo/__init__.py +1 -1
  10. biotite/application/clustalo/app.py +46 -61
  11. biotite/application/dssp/__init__.py +1 -1
  12. biotite/application/dssp/app.py +8 -11
  13. biotite/application/localapp.py +62 -60
  14. biotite/application/mafft/__init__.py +1 -1
  15. biotite/application/mafft/app.py +16 -22
  16. biotite/application/msaapp.py +78 -89
  17. biotite/application/muscle/__init__.py +1 -1
  18. biotite/application/muscle/app3.py +50 -64
  19. biotite/application/muscle/app5.py +23 -31
  20. biotite/application/sra/__init__.py +1 -1
  21. biotite/application/sra/app.py +64 -68
  22. biotite/application/tantan/__init__.py +1 -1
  23. biotite/application/tantan/app.py +22 -45
  24. biotite/application/util.py +7 -9
  25. biotite/application/viennarna/rnaalifold.py +34 -28
  26. biotite/application/viennarna/rnafold.py +24 -39
  27. biotite/application/viennarna/rnaplot.py +36 -21
  28. biotite/application/viennarna/util.py +17 -12
  29. biotite/application/webapp.py +13 -14
  30. biotite/copyable.py +13 -13
  31. biotite/database/__init__.py +1 -1
  32. biotite/database/entrez/__init__.py +1 -1
  33. biotite/database/entrez/check.py +2 -3
  34. biotite/database/entrez/dbnames.py +7 -5
  35. biotite/database/entrez/download.py +55 -49
  36. biotite/database/entrez/key.py +1 -1
  37. biotite/database/entrez/query.py +62 -23
  38. biotite/database/error.py +2 -1
  39. biotite/database/pubchem/__init__.py +1 -1
  40. biotite/database/pubchem/download.py +43 -45
  41. biotite/database/pubchem/error.py +2 -2
  42. biotite/database/pubchem/query.py +34 -31
  43. biotite/database/pubchem/throttle.py +3 -4
  44. biotite/database/rcsb/__init__.py +1 -1
  45. biotite/database/rcsb/download.py +44 -52
  46. biotite/database/rcsb/query.py +85 -80
  47. biotite/database/uniprot/check.py +6 -3
  48. biotite/database/uniprot/download.py +6 -11
  49. biotite/database/uniprot/query.py +115 -31
  50. biotite/file.py +12 -31
  51. biotite/sequence/__init__.py +3 -3
  52. biotite/sequence/align/__init__.py +2 -2
  53. biotite/sequence/align/alignment.py +99 -90
  54. biotite/sequence/align/banded.cp312-win_amd64.pyd +0 -0
  55. biotite/sequence/align/buckets.py +12 -10
  56. biotite/sequence/align/cigar.py +43 -52
  57. biotite/sequence/align/kmeralphabet.cp312-win_amd64.pyd +0 -0
  58. biotite/sequence/align/kmeralphabet.pyx +55 -51
  59. biotite/sequence/align/kmersimilarity.cp312-win_amd64.pyd +0 -0
  60. biotite/sequence/align/kmertable.cp312-win_amd64.pyd +0 -0
  61. biotite/sequence/align/kmertable.pyx +3 -2
  62. biotite/sequence/align/localgapped.cp312-win_amd64.pyd +0 -0
  63. biotite/sequence/align/localungapped.cp312-win_amd64.pyd +0 -0
  64. biotite/sequence/align/matrix.py +81 -82
  65. biotite/sequence/align/multiple.cp312-win_amd64.pyd +0 -0
  66. biotite/sequence/align/multiple.pyx +1 -1
  67. biotite/sequence/align/pairwise.cp312-win_amd64.pyd +0 -0
  68. biotite/sequence/align/permutation.cp312-win_amd64.pyd +0 -0
  69. biotite/sequence/align/permutation.pyx +12 -4
  70. biotite/sequence/align/selector.cp312-win_amd64.pyd +0 -0
  71. biotite/sequence/align/selector.pyx +52 -54
  72. biotite/sequence/align/statistics.py +32 -33
  73. biotite/sequence/align/tracetable.cp312-win_amd64.pyd +0 -0
  74. biotite/sequence/alphabet.py +51 -65
  75. biotite/sequence/annotation.py +78 -77
  76. biotite/sequence/codec.cp312-win_amd64.pyd +0 -0
  77. biotite/sequence/codon.py +90 -79
  78. biotite/sequence/graphics/__init__.py +1 -1
  79. biotite/sequence/graphics/alignment.py +184 -103
  80. biotite/sequence/graphics/colorschemes.py +10 -12
  81. biotite/sequence/graphics/dendrogram.py +79 -34
  82. biotite/sequence/graphics/features.py +133 -99
  83. biotite/sequence/graphics/logo.py +22 -28
  84. biotite/sequence/graphics/plasmid.py +229 -178
  85. biotite/sequence/io/fasta/__init__.py +1 -1
  86. biotite/sequence/io/fasta/convert.py +44 -33
  87. biotite/sequence/io/fasta/file.py +42 -55
  88. biotite/sequence/io/fastq/__init__.py +1 -1
  89. biotite/sequence/io/fastq/convert.py +11 -14
  90. biotite/sequence/io/fastq/file.py +68 -112
  91. biotite/sequence/io/genbank/__init__.py +2 -2
  92. biotite/sequence/io/genbank/annotation.py +12 -20
  93. biotite/sequence/io/genbank/file.py +74 -76
  94. biotite/sequence/io/genbank/metadata.py +74 -62
  95. biotite/sequence/io/genbank/sequence.py +13 -14
  96. biotite/sequence/io/general.py +39 -30
  97. biotite/sequence/io/gff/__init__.py +2 -2
  98. biotite/sequence/io/gff/convert.py +10 -15
  99. biotite/sequence/io/gff/file.py +81 -65
  100. biotite/sequence/phylo/__init__.py +1 -1
  101. biotite/sequence/phylo/nj.cp312-win_amd64.pyd +0 -0
  102. biotite/sequence/phylo/tree.cp312-win_amd64.pyd +0 -0
  103. biotite/sequence/phylo/upgma.cp312-win_amd64.pyd +0 -0
  104. biotite/sequence/profile.py +57 -28
  105. biotite/sequence/search.py +17 -15
  106. biotite/sequence/seqtypes.py +200 -164
  107. biotite/sequence/sequence.py +15 -17
  108. biotite/structure/__init__.py +3 -3
  109. biotite/structure/atoms.py +246 -236
  110. biotite/structure/basepairs.py +260 -271
  111. biotite/structure/bonds.cp312-win_amd64.pyd +0 -0
  112. biotite/structure/bonds.pyx +29 -32
  113. biotite/structure/box.py +67 -71
  114. biotite/structure/celllist.cp312-win_amd64.pyd +0 -0
  115. biotite/structure/chains.py +55 -39
  116. biotite/structure/charges.cp312-win_amd64.pyd +0 -0
  117. biotite/structure/compare.py +32 -32
  118. biotite/structure/density.py +13 -18
  119. biotite/structure/dotbracket.py +20 -22
  120. biotite/structure/error.py +10 -2
  121. biotite/structure/filter.py +83 -78
  122. biotite/structure/geometry.py +130 -119
  123. biotite/structure/graphics/atoms.py +60 -43
  124. biotite/structure/graphics/rna.py +81 -68
  125. biotite/structure/hbond.py +112 -93
  126. biotite/structure/info/__init__.py +0 -2
  127. biotite/structure/info/atoms.py +10 -11
  128. biotite/structure/info/bonds.py +41 -43
  129. biotite/structure/info/ccd.py +4 -5
  130. biotite/structure/info/groups.py +1 -3
  131. biotite/structure/info/masses.py +5 -10
  132. biotite/structure/info/misc.py +1 -1
  133. biotite/structure/info/radii.py +20 -20
  134. biotite/structure/info/standardize.py +15 -26
  135. biotite/structure/integrity.py +18 -71
  136. biotite/structure/io/__init__.py +3 -4
  137. biotite/structure/io/dcd/__init__.py +1 -1
  138. biotite/structure/io/dcd/file.py +22 -20
  139. biotite/structure/io/general.py +47 -61
  140. biotite/structure/io/gro/__init__.py +1 -1
  141. biotite/structure/io/gro/file.py +73 -72
  142. biotite/structure/io/mol/__init__.py +1 -1
  143. biotite/structure/io/mol/convert.py +8 -11
  144. biotite/structure/io/mol/ctab.py +37 -36
  145. biotite/structure/io/mol/header.py +14 -10
  146. biotite/structure/io/mol/mol.py +9 -53
  147. biotite/structure/io/mol/sdf.py +47 -50
  148. biotite/structure/io/netcdf/__init__.py +1 -1
  149. biotite/structure/io/netcdf/file.py +24 -23
  150. biotite/structure/io/pdb/__init__.py +1 -1
  151. biotite/structure/io/pdb/convert.py +32 -20
  152. biotite/structure/io/pdb/file.py +151 -172
  153. biotite/structure/io/pdb/hybrid36.cp312-win_amd64.pyd +0 -0
  154. biotite/structure/io/pdbqt/__init__.py +1 -1
  155. biotite/structure/io/pdbqt/convert.py +17 -11
  156. biotite/structure/io/pdbqt/file.py +128 -80
  157. biotite/structure/io/pdbx/__init__.py +1 -2
  158. biotite/structure/io/pdbx/bcif.py +36 -44
  159. biotite/structure/io/pdbx/cif.py +140 -110
  160. biotite/structure/io/pdbx/component.py +10 -16
  161. biotite/structure/io/pdbx/convert.py +260 -258
  162. biotite/structure/io/pdbx/encoding.cp312-win_amd64.pyd +0 -0
  163. biotite/structure/io/trajfile.py +90 -107
  164. biotite/structure/io/trr/__init__.py +1 -1
  165. biotite/structure/io/trr/file.py +12 -15
  166. biotite/structure/io/xtc/__init__.py +1 -1
  167. biotite/structure/io/xtc/file.py +11 -14
  168. biotite/structure/mechanics.py +9 -11
  169. biotite/structure/molecules.py +3 -4
  170. biotite/structure/pseudoknots.py +53 -67
  171. biotite/structure/rdf.py +23 -21
  172. biotite/structure/repair.py +137 -86
  173. biotite/structure/residues.py +26 -16
  174. biotite/structure/sasa.cp312-win_amd64.pyd +0 -0
  175. biotite/structure/{resutil.py → segments.py} +24 -23
  176. biotite/structure/sequence.py +10 -11
  177. biotite/structure/sse.py +100 -119
  178. biotite/structure/superimpose.py +39 -77
  179. biotite/structure/transform.py +97 -71
  180. biotite/structure/util.py +11 -13
  181. biotite/version.py +2 -2
  182. biotite/visualize.py +69 -55
  183. {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/METADATA +6 -5
  184. biotite-1.0.1.dist-info/RECORD +322 -0
  185. biotite/structure/io/ctab.py +0 -72
  186. biotite/structure/io/mmtf/__init__.py +0 -21
  187. biotite/structure/io/mmtf/assembly.py +0 -214
  188. biotite/structure/io/mmtf/convertarray.cp312-win_amd64.pyd +0 -0
  189. biotite/structure/io/mmtf/convertarray.pyx +0 -341
  190. biotite/structure/io/mmtf/convertfile.cp312-win_amd64.pyd +0 -0
  191. biotite/structure/io/mmtf/convertfile.pyx +0 -501
  192. biotite/structure/io/mmtf/decode.cp312-win_amd64.pyd +0 -0
  193. biotite/structure/io/mmtf/decode.pyx +0 -152
  194. biotite/structure/io/mmtf/encode.cp312-win_amd64.pyd +0 -0
  195. biotite/structure/io/mmtf/encode.pyx +0 -183
  196. biotite/structure/io/mmtf/file.py +0 -233
  197. biotite/structure/io/npz/__init__.py +0 -20
  198. biotite/structure/io/npz/file.py +0 -152
  199. biotite/structure/io/pdbx/legacy.py +0 -267
  200. biotite/structure/io/tng/__init__.py +0 -13
  201. biotite/structure/io/tng/file.py +0 -46
  202. biotite/temp.py +0 -86
  203. biotite-0.41.2.dist-info/RECORD +0 -340
  204. {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/WHEEL +0 -0
  205. {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/licenses/LICENSE.rst +0 -0
@@ -6,31 +6,22 @@ __name__ = "biotite.application.muscle"
6
6
  __author__ = "Patrick Kunzmann"
7
7
  __all__ = ["Muscle5App"]
8
8
 
9
- import numbers
10
- import warnings
11
- from tempfile import NamedTemporaryFile
12
- from ..localapp import cleanup_tempfile
13
- from ..msaapp import MSAApp
14
- from ..application import AppState, VersionError, requires_state
15
- from ...sequence.sequence import Sequence
16
- from ...sequence.seqtypes import NucleotideSequence, ProteinSequence
17
- from ...sequence.align.matrix import SubstitutionMatrix
18
- from ...sequence.align.alignment import Alignment
19
- from ...sequence.phylo.tree import Tree
20
- from .app3 import get_version
9
+ from biotite.application.application import AppState, VersionError, requires_state
10
+ from biotite.application.msaapp import MSAApp
11
+ from biotite.application.muscle.app3 import get_version
21
12
 
22
13
 
23
14
  class Muscle5App(MSAApp):
24
15
  """
25
16
  Perform a multiple sequence alignment using MUSCLE version 5.
26
-
17
+
27
18
  Parameters
28
19
  ----------
29
20
  sequences : list of Sequence
30
21
  The sequences to be aligned.
31
22
  bin_path : str, optional
32
23
  Path of the MUSCLE binary.
33
-
24
+
34
25
  See also
35
26
  --------
36
27
  MuscleApp
@@ -38,7 +29,7 @@ class Muscle5App(MSAApp):
38
29
  Notes
39
30
  -----
40
31
  Alignment ensemble generation is not supported, yet.
41
-
32
+
42
33
  Examples
43
34
  --------
44
35
 
@@ -56,14 +47,14 @@ class Muscle5App(MSAApp):
56
47
  BI-SMITE
57
48
  -I-QLITE
58
49
  """
59
-
50
+
60
51
  def __init__(self, sequences, bin_path="muscle"):
61
52
  major_version = get_version(bin_path)[0]
62
53
  if major_version < 5:
63
54
  raise VersionError(
64
55
  f"At least Muscle 5 is required, got version {major_version}"
65
56
  )
66
-
57
+
67
58
  super().__init__(sequences, bin_path)
68
59
  self._mode = "align"
69
60
  self._consiters = None
@@ -86,7 +77,7 @@ class Muscle5App(MSAApp):
86
77
  self._consiters = consistency
87
78
  if refinement is not None:
88
79
  self._refineiters = refinement
89
-
80
+
90
81
  @requires_state(AppState.CREATED)
91
82
  def set_thread_number(self, number):
92
83
  """
@@ -110,48 +101,49 @@ class Muscle5App(MSAApp):
110
101
  args = [
111
102
  f"-{self._mode}",
112
103
  self.get_input_file_path(),
113
- "-output", self.get_output_file_path(),
104
+ "-output",
105
+ self.get_output_file_path(),
114
106
  ]
115
107
  if self.get_seqtype() == "protein":
116
108
  args += ["-amino"]
117
109
  else:
118
110
  args += ["-nt"]
119
111
  if self._n_threads is not None:
120
- args += ["-threads", str(self._n_threads)]
112
+ args += ["-threads", str(self._n_threads)]
121
113
  if self._consiters is not None:
122
- args += ["-consiters", str(self._consiters)]
114
+ args += ["-consiters", str(self._consiters)]
123
115
  if self._refineiters is not None:
124
- args += ["-refineiters", str(self._refineiters)]
116
+ args += ["-refineiters", str(self._refineiters)]
125
117
  self.set_arguments(args)
126
118
  super().run()
127
-
119
+
128
120
  def clean_up(self):
129
121
  super().clean_up()
130
-
122
+
131
123
  @staticmethod
132
124
  def supports_nucleotide():
133
125
  return True
134
-
126
+
135
127
  @staticmethod
136
128
  def supports_protein():
137
129
  return True
138
-
130
+
139
131
  @staticmethod
140
132
  def supports_custom_nucleotide_matrix():
141
133
  return False
142
-
134
+
143
135
  @staticmethod
144
136
  def supports_custom_protein_matrix():
145
137
  return False
146
-
138
+
147
139
  @classmethod
148
140
  def align(cls, sequences, bin_path="muscle"):
149
141
  """
150
142
  Perform a multiple sequence alignment.
151
-
143
+
152
144
  This is a convenience function, that wraps the :class:`Muscle5App`
153
145
  execution.
154
-
146
+
155
147
  Parameters
156
148
  ----------
157
149
  sequences : iterable object of Sequence
@@ -159,7 +151,7 @@ class Muscle5App(MSAApp):
159
151
  bin_path : str, optional
160
152
  Path of the MSA software binary. By default, the default path
161
153
  will be used.
162
-
154
+
163
155
  Returns
164
156
  -------
165
157
  alignment : Alignment
@@ -15,4 +15,4 @@ writes sequence reads into FASTA format.
15
15
  __name__ = "biotite.application.sra"
16
16
  __author__ = "Patrick Kunzmann"
17
17
 
18
- from .app import *
18
+ from .app import *
@@ -7,17 +7,21 @@ __author__ = "Patrick Kunzmann"
7
7
  __all__ = ["FastaDumpApp", "FastqDumpApp"]
8
8
 
9
9
  import abc
10
- from os.path import join
11
- from subprocess import Popen, SubprocessError, PIPE, TimeoutExpired
12
10
  import glob
11
+ from os.path import join
12
+ from subprocess import PIPE, Popen, SubprocessError, TimeoutExpired
13
13
  from tempfile import TemporaryDirectory
14
- from ..application import Application, AppState, AppStateError, \
15
- requires_state
16
- from ...sequence.seqtypes import NucleotideSequence
17
- from ...sequence.io.fastq.file import FastqFile
18
- from ...sequence.io.fasta.file import FastaFile
19
- from ...sequence.io.fastq.convert import get_sequences as get_sequences_and_scores
20
- from ...sequence.io.fasta.convert import get_sequences
14
+ from biotite.application.application import (
15
+ Application,
16
+ AppState,
17
+ AppStateError,
18
+ requires_state,
19
+ )
20
+ from biotite.sequence.io.fasta.convert import get_sequences
21
+ from biotite.sequence.io.fasta.file import FastaFile
22
+ from biotite.sequence.io.fastq.convert import get_sequences as get_sequences_and_scores
23
+ from biotite.sequence.io.fastq.file import FastqFile
24
+ from biotite.sequence.seqtypes import NucleotideSequence
21
25
 
22
26
 
23
27
  # Do not use LocalApp, as two programs are executed
@@ -48,8 +52,13 @@ class _DumpApp(Application, metaclass=abc.ABCMeta):
48
52
  the score format.
49
53
  """
50
54
 
51
- def __init__(self, uid, output_path_prefix=None,
52
- prefetch_path="prefetch", fasterq_dump_path="fasterq-dump"):
55
+ def __init__(
56
+ self,
57
+ uid,
58
+ output_path_prefix=None,
59
+ prefetch_path="prefetch",
60
+ fasterq_dump_path="fasterq-dump",
61
+ ):
53
62
  super().__init__()
54
63
  self._prefetch_path = prefetch_path
55
64
  self._fasterq_dump_path = fasterq_dump_path
@@ -62,21 +71,16 @@ class _DumpApp(Application, metaclass=abc.ABCMeta):
62
71
  self._prefetch_process = None
63
72
  self._fasterq_dump_process = None
64
73
 
65
-
66
74
  @requires_state(AppState.RUNNING | AppState.FINISHED)
67
75
  def join(self, timeout=None):
68
76
  # Override method as repetitive calls of 'is_finished()'
69
77
  # are not necessary as 'communicate()' already waits for the
70
78
  # finished application
71
79
  try:
72
- _, self._stderr = self._process.communicate(
73
- timeout=timeout
74
- )
80
+ _, self._stderr = self._process.communicate(timeout=timeout)
75
81
  except TimeoutExpired:
76
82
  self.cancel()
77
- raise TimeoutError(
78
- f"The application expired its timeout ({timeout:.1f} s)"
79
- )
83
+ raise TimeoutError(f"The application expired its timeout ({timeout:.1f} s)")
80
84
  self._state = AppState.FINISHED
81
85
 
82
86
  try:
@@ -90,7 +94,6 @@ class _DumpApp(Application, metaclass=abc.ABCMeta):
90
94
  self._state = AppState.JOINED
91
95
  self.clean_up()
92
96
 
93
-
94
97
  def run(self):
95
98
  # Prefetch into a temp directory with file name equaling UID
96
99
  # This ensures that the ID in the header is not the temp prefix
@@ -105,16 +108,14 @@ class _DumpApp(Application, metaclass=abc.ABCMeta):
105
108
  command, stdout=PIPE, stderr=PIPE, shell=True, encoding="UTF-8"
106
109
  )
107
110
 
108
-
109
111
  def is_finished(self):
110
112
  code = self._process.poll()
111
- if code == None:
113
+ if code is None:
112
114
  return False
113
115
  else:
114
- _, self._stderr = self._process.communicate()
116
+ _, self._stderr = self._process.communicate()
115
117
  return True
116
118
 
117
-
118
119
  def evaluate(self):
119
120
  super().evaluate()
120
121
  # Check if applicaion terminated correctly
@@ -128,26 +129,24 @@ class _DumpApp(Application, metaclass=abc.ABCMeta):
128
129
 
129
130
  self._file_names = (
130
131
  # For entries with one read per spot
131
- glob.glob(self._prefix + ".fastq") +
132
+ glob.glob(self._prefix + ".fastq")
133
+ +
132
134
  # For entries with multiple reads per spot
133
135
  glob.glob(self._prefix + "_*.fastq")
134
136
  )
135
137
  # Only load FASTQ files into memory when needed
136
138
  self._fastq_files = None
137
139
 
138
-
139
140
  def wait_interval(self):
140
141
  # Not used in this implementation of 'join()'
141
142
  raise NotImplementedError()
142
143
 
143
-
144
144
  def clean_up(self):
145
145
  if self.get_app_state() == AppState.CANCELLED:
146
146
  self._process.kill()
147
147
  # Directory with temp files does not need to be deleted,
148
148
  # as temp dir is automatically deleted upon object destruction
149
149
 
150
-
151
150
  @requires_state(AppState.CREATED)
152
151
  def get_prefetch_options(self):
153
152
  """
@@ -176,7 +175,6 @@ class _DumpApp(Application, metaclass=abc.ABCMeta):
176
175
  """
177
176
  return ""
178
177
 
179
-
180
178
  @requires_state(AppState.JOINED)
181
179
  def get_file_paths(self):
182
180
  """
@@ -189,7 +187,6 @@ class _DumpApp(Application, metaclass=abc.ABCMeta):
189
187
  """
190
188
  return self._file_names
191
189
 
192
-
193
190
  @requires_state(AppState.JOINED)
194
191
  @abc.abstractmethod
195
192
  def get_sequences(self):
@@ -236,15 +233,18 @@ class FastqDumpApp(_DumpApp):
236
233
  the score format.
237
234
  """
238
235
 
239
- def __init__(self, uid, output_path_prefix=None, prefetch_path="prefetch",
240
- fasterq_dump_path="fasterq-dump", offset="Sanger"):
241
- super().__init__(
242
- uid, output_path_prefix, prefetch_path, fasterq_dump_path
243
- )
236
+ def __init__(
237
+ self,
238
+ uid,
239
+ output_path_prefix=None,
240
+ prefetch_path="prefetch",
241
+ fasterq_dump_path="fasterq-dump",
242
+ offset="Sanger",
243
+ ):
244
+ super().__init__(uid, output_path_prefix, prefetch_path, fasterq_dump_path)
244
245
  self._offset = offset
245
246
  self._fastq_files = None
246
247
 
247
-
248
248
  @requires_state(AppState.JOINED)
249
249
  def get_fastq(self):
250
250
  """
@@ -265,20 +265,16 @@ class FastqDumpApp(_DumpApp):
265
265
  ]
266
266
  return self._fastq_files
267
267
 
268
-
269
268
  @requires_state(AppState.JOINED)
270
269
  def get_sequences(self):
271
270
  return [
272
271
  {
273
- header: NucleotideSequence(
274
- seq_str.replace("U","T").replace("X","N")
275
- )
272
+ header: NucleotideSequence(seq_str.replace("U", "T").replace("X", "N"))
276
273
  for header, (seq_str, _) in fastq_file.items()
277
274
  }
278
275
  for fastq_file in self.get_fastq()
279
276
  ]
280
277
 
281
-
282
278
  @requires_state(AppState.JOINED)
283
279
  def get_sequences_and_scores(self):
284
280
  """
@@ -294,15 +290,17 @@ class FastqDumpApp(_DumpApp):
294
290
  Each item in the list is a dictionary mapping identifiers to its
295
291
  corresponding sequence and score values.
296
292
  """
297
- return [
298
- get_sequences_and_scores(fastq_file)
299
- for fastq_file in self.get_fastq()
300
- ]
301
-
293
+ return [get_sequences_and_scores(fastq_file) for fastq_file in self.get_fastq()]
302
294
 
303
295
  @classmethod
304
- def fetch(cls, uid, output_path_prefix=None, prefetch_path="prefetch",
305
- fasterq_dump_path="fasterq-dump", offset="Sanger"):
296
+ def fetch(
297
+ cls,
298
+ uid,
299
+ output_path_prefix=None,
300
+ prefetch_path="prefetch",
301
+ fasterq_dump_path="fasterq-dump",
302
+ offset="Sanger",
303
+ ):
306
304
  """
307
305
  Get the sequences belonging to the UID from the
308
306
  *NCBI sequence read archive* (SRA).
@@ -338,9 +336,7 @@ class FastqDumpApp(_DumpApp):
338
336
  Each item in the list is a dictionary mapping identifiers to its
339
337
  corresponding sequence.
340
338
  """
341
- app = cls(
342
- uid, output_path_prefix, prefetch_path, fasterq_dump_path, offset
343
- )
339
+ app = cls(uid, output_path_prefix, prefetch_path, fasterq_dump_path, offset)
344
340
  app.start()
345
341
  app.join()
346
342
  return app.get_sequences()
@@ -368,14 +364,16 @@ class FastaDumpApp(_DumpApp):
368
364
  respectively.
369
365
  """
370
366
 
371
- def __init__(self, uid, output_path_prefix=None, prefetch_path="prefetch",
372
- fasterq_dump_path="fasterq-dump"):
373
- super().__init__(
374
- uid, output_path_prefix, prefetch_path, fasterq_dump_path
375
- )
367
+ def __init__(
368
+ self,
369
+ uid,
370
+ output_path_prefix=None,
371
+ prefetch_path="prefetch",
372
+ fasterq_dump_path="fasterq-dump",
373
+ ):
374
+ super().__init__(uid, output_path_prefix, prefetch_path, fasterq_dump_path)
376
375
  self._fasta_files = None
377
376
 
378
-
379
377
  @requires_state(AppState.CREATED)
380
378
  def get_prefetch_options(self):
381
379
  return
@@ -383,12 +381,10 @@ class FastaDumpApp(_DumpApp):
383
381
  # when https://github.com/ncbi/sra-tools/issues/883 is resolved
384
382
  # return "--eliminate-quals"
385
383
 
386
-
387
384
  @requires_state(AppState.CREATED)
388
385
  def get_fastq_dump_options(self):
389
386
  return "--fasta"
390
387
 
391
-
392
388
  @requires_state(AppState.JOINED)
393
389
  def get_fasta(self):
394
390
  """
@@ -404,20 +400,22 @@ class FastaDumpApp(_DumpApp):
404
400
  """
405
401
  if self._fasta_files is None:
406
402
  self._fasta_files = [
407
- FastaFile.read(file_name)
408
- for file_name in self.get_file_paths()
403
+ FastaFile.read(file_name) for file_name in self.get_file_paths()
409
404
  ]
410
405
  return self._fasta_files
411
406
 
412
-
413
407
  @requires_state(AppState.JOINED)
414
408
  def get_sequences(self):
415
409
  return [get_sequences(fasta_file) for fasta_file in self.get_fasta()]
416
410
 
417
-
418
411
  @classmethod
419
- def fetch(cls, uid, output_path_prefix=None, prefetch_path="prefetch",
420
- fasterq_dump_path="fasterq-dump"):
412
+ def fetch(
413
+ cls,
414
+ uid,
415
+ output_path_prefix=None,
416
+ prefetch_path="prefetch",
417
+ fasterq_dump_path="fasterq-dump",
418
+ ):
421
419
  """
422
420
  Get the sequences belonging to the UID from the
423
421
  *NCBI sequence read archive* (SRA).
@@ -448,9 +446,7 @@ class FastaDumpApp(_DumpApp):
448
446
  Each item in the list is a dictionary mapping identifiers to its
449
447
  corresponding sequence.
450
448
  """
451
- app = cls(
452
- uid, output_path_prefix, prefetch_path, fasterq_dump_path
453
- )
449
+ app = cls(uid, output_path_prefix, prefetch_path, fasterq_dump_path)
454
450
  app.start()
455
451
  app.join()
456
- return app.get_sequences()
452
+ return app.get_sequences()
@@ -9,4 +9,4 @@ A subpackage for masking sequence regions using the *tantan* software.
9
9
  __name__ = "biotite.application.tantan"
10
10
  __author__ = "Patrick Kunzmann"
11
11
 
12
- from .app import *
12
+ from .app import *
@@ -6,17 +6,15 @@ __name__ = "biotite.application.tantan"
6
6
  __author__ = "Patrick Kunzmann"
7
7
  __all__ = ["TantanApp"]
8
8
 
9
- from collections.abc import Sequence as SequenceABC
10
9
  import io
10
+ from collections.abc import Sequence as SequenceABC
11
11
  from tempfile import NamedTemporaryFile
12
12
  import numpy as np
13
- from ..localapp import LocalApp, cleanup_tempfile
14
- from ..application import AppState, requires_state
15
- from ...sequence.seqtypes import NucleotideSequence, ProteinSequence
16
- from ...sequence.alphabet import common_alphabet
17
- from ...sequence.io.fasta.file import FastaFile
18
- from ..util import map_sequence, map_matrix
19
-
13
+ from biotite.application.application import AppState, requires_state
14
+ from biotite.application.localapp import LocalApp, cleanup_tempfile
15
+ from biotite.sequence.alphabet import common_alphabet
16
+ from biotite.sequence.io.fasta.file import FastaFile
17
+ from biotite.sequence.seqtypes import NucleotideSequence, ProteinSequence
20
18
 
21
19
  MASKING_LETTER = "!"
22
20
 
@@ -43,7 +41,7 @@ class TantanApp(LocalApp):
43
41
 
44
42
  References
45
43
  ----------
46
-
44
+
47
45
  .. footbibliography::
48
46
 
49
47
  Examples
@@ -59,10 +57,10 @@ class TantanApp(LocalApp):
59
57
  True True True True True True True True False False False False
60
58
  False]
61
59
  >>> print(sequence, "\n" + "".join(["^" if e else " " for e in repeat_mask]))
62
- GGCATCGATATATATATATAGTCAA
63
- ^^^^^^^^^^^
60
+ GGCATCGATATATATATATAGTCAA
61
+ ^^^^^^^^^^^
64
62
  """
65
-
63
+
66
64
  def __init__(self, sequence, matrix=None, bin_path="tantan"):
67
65
  super().__init__(bin_path)
68
66
 
@@ -93,59 +91,43 @@ class TantanApp(LocalApp):
93
91
  )
94
92
  self._is_protein = True
95
93
  else:
96
- raise TypeError(
97
- "A NucleotideSequence or ProteinSequence is required"
98
- )
99
-
94
+ raise TypeError("A NucleotideSequence or ProteinSequence is required")
95
+
100
96
  if matrix is None:
101
97
  self._matrix_file = None
102
98
  else:
103
- common_alph = common_alphabet(
104
- (seq.alphabet for seq in self._sequences)
105
- )
99
+ common_alph = common_alphabet((seq.alphabet for seq in self._sequences))
106
100
  if common_alph is None:
107
- raise ValueError(
108
- "There is no common alphabet within the sequences"
109
- )
101
+ raise ValueError("There is no common alphabet within the sequences")
110
102
  if not matrix.get_alphabet1().extends(common_alph):
111
103
  raise ValueError(
112
104
  "The alphabet of the sequence(s) do not fit the matrix"
113
105
  )
114
106
  if not matrix.is_symmetric():
115
107
  raise ValueError("A symmetric matrix is required")
116
- self._matrix_file = NamedTemporaryFile(
117
- "w", suffix=".mat", delete=False
118
- )
108
+ self._matrix_file = NamedTemporaryFile("w", suffix=".mat", delete=False)
119
109
  self._matrix = matrix
120
-
121
- self._in_file = NamedTemporaryFile("w", suffix=".fa", delete=False)
122
110
 
111
+ self._in_file = NamedTemporaryFile("w", suffix=".fa", delete=False)
123
112
 
124
113
  def run(self):
125
114
  FastaFile.write_iter(
126
115
  self._in_file,
127
- (
128
- (f"sequence_{i:d}", str(seq))
129
- for i, seq in enumerate(self._sequences)
130
- )
116
+ ((f"sequence_{i:d}", str(seq)) for i, seq in enumerate(self._sequences)),
131
117
  )
132
118
  self._in_file.flush()
133
119
  if self._matrix is not None:
134
120
  self._matrix_file.write(str(self._matrix))
135
121
  self._matrix_file.flush()
136
-
122
+
137
123
  args = []
138
124
  if self._matrix is not None:
139
125
  args += ["-m", self._matrix_file.name]
140
126
  if self._is_protein:
141
- args += ["-p"]
142
- args += [
143
- "-x", MASKING_LETTER,
144
- self._in_file.name
145
- ]
127
+ args += ["-p"]
128
+ args += ["-x", MASKING_LETTER, self._in_file.name]
146
129
  self.set_arguments(args)
147
130
  super().run()
148
-
149
131
 
150
132
  def evaluate(self):
151
133
  super().evaluate()
@@ -154,18 +136,14 @@ class TantanApp(LocalApp):
154
136
  self._masks = []
155
137
  encoded_masking_letter = MASKING_LETTER.encode("ASCII")[0]
156
138
  for _, masked_seq_string in FastaFile.read_iter(out_file):
157
- array = np.frombuffer(
158
- masked_seq_string.encode("ASCII"), dtype=np.ubyte
159
- )
139
+ array = np.frombuffer(masked_seq_string.encode("ASCII"), dtype=np.ubyte)
160
140
  self._masks.append(array == encoded_masking_letter)
161
-
162
141
 
163
142
  def clean_up(self):
164
143
  super().clean_up()
165
144
  cleanup_tempfile(self._in_file)
166
145
  if self._matrix_file is not None:
167
146
  cleanup_tempfile(self._matrix_file)
168
-
169
147
 
170
148
  @requires_state(AppState.JOINED)
171
149
  def get_mask(self):
@@ -186,7 +164,6 @@ class TantanApp(LocalApp):
186
164
  else:
187
165
  return self._masks[0]
188
166
 
189
-
190
167
  @staticmethod
191
168
  def mask_repeats(sequence, matrix=None, bin_path="tantan"):
192
169
  """
@@ -219,4 +196,4 @@ class TantanApp(LocalApp):
219
196
  app = TantanApp(sequence, matrix, bin_path)
220
197
  app.start()
221
198
  app.join()
222
- return app.get_mask()
199
+ return app.get_mask()
@@ -8,15 +8,15 @@ __all__ = ["map_sequence", "map_matrix"]
8
8
 
9
9
 
10
10
  import numpy as np
11
- from ..sequence.seqtypes import ProteinSequence
12
- from ..sequence.align.matrix import SubstitutionMatrix
11
+ from biotite.sequence.align.matrix import SubstitutionMatrix
12
+ from biotite.sequence.seqtypes import ProteinSequence
13
13
 
14
14
 
15
15
  def map_sequence(sequence):
16
16
  """
17
17
  Map a sequence with an arbitrary alphabet into a
18
18
  :class:`ProteinSequence`, in order to support arbitrary sequence
19
- types in software that can handle protein sequences.
19
+ types in software that can handle protein sequences.
20
20
  """
21
21
  if len(sequence.alphabet) > len(ProteinSequence.alphabet):
22
22
  # Cannot map into a protein sequence if the alphabet
@@ -39,12 +39,11 @@ def map_matrix(matrix):
39
39
  Map a :class:`SubstitutionMatrix` with an arbitrary alphabet into a
40
40
  class:`SubstitutionMatrix` for protein sequences, in order to support
41
41
  arbitrary sequence types in software that can handle protein
42
- sequences.
42
+ sequences.
43
43
  """
44
44
  if matrix is None:
45
45
  raise TypeError(
46
- "A substitution matrix must be provided for custom "
47
- "sequence types"
46
+ "A substitution matrix must be provided for custom " "sequence types"
48
47
  )
49
48
  # Create a protein substitution matrix with the values taken
50
49
  # from the original matrix
@@ -54,6 +53,5 @@ def map_matrix(matrix):
54
53
  new_score_matrix = np.zeros((new_length, new_length))
55
54
  new_score_matrix[:old_length, :old_length] = matrix.score_matrix()
56
55
  return SubstitutionMatrix(
57
- ProteinSequence.alphabet, ProteinSequence.alphabet,
58
- new_score_matrix
59
- )
56
+ ProteinSequence.alphabet, ProteinSequence.alphabet, new_score_matrix
57
+ )