biotite 0.41.2__cp310-cp310-win_amd64.whl → 1.0.1__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (205) hide show
  1. biotite/__init__.py +2 -3
  2. biotite/application/__init__.py +1 -1
  3. biotite/application/application.py +20 -10
  4. biotite/application/autodock/__init__.py +1 -1
  5. biotite/application/autodock/app.py +74 -79
  6. biotite/application/blast/__init__.py +1 -1
  7. biotite/application/blast/alignment.py +19 -10
  8. biotite/application/blast/webapp.py +92 -85
  9. biotite/application/clustalo/__init__.py +1 -1
  10. biotite/application/clustalo/app.py +46 -61
  11. biotite/application/dssp/__init__.py +1 -1
  12. biotite/application/dssp/app.py +8 -11
  13. biotite/application/localapp.py +62 -60
  14. biotite/application/mafft/__init__.py +1 -1
  15. biotite/application/mafft/app.py +16 -22
  16. biotite/application/msaapp.py +78 -89
  17. biotite/application/muscle/__init__.py +1 -1
  18. biotite/application/muscle/app3.py +50 -64
  19. biotite/application/muscle/app5.py +23 -31
  20. biotite/application/sra/__init__.py +1 -1
  21. biotite/application/sra/app.py +64 -68
  22. biotite/application/tantan/__init__.py +1 -1
  23. biotite/application/tantan/app.py +22 -45
  24. biotite/application/util.py +7 -9
  25. biotite/application/viennarna/rnaalifold.py +34 -28
  26. biotite/application/viennarna/rnafold.py +24 -39
  27. biotite/application/viennarna/rnaplot.py +36 -21
  28. biotite/application/viennarna/util.py +17 -12
  29. biotite/application/webapp.py +13 -14
  30. biotite/copyable.py +13 -13
  31. biotite/database/__init__.py +1 -1
  32. biotite/database/entrez/__init__.py +1 -1
  33. biotite/database/entrez/check.py +2 -3
  34. biotite/database/entrez/dbnames.py +7 -5
  35. biotite/database/entrez/download.py +55 -49
  36. biotite/database/entrez/key.py +1 -1
  37. biotite/database/entrez/query.py +62 -23
  38. biotite/database/error.py +2 -1
  39. biotite/database/pubchem/__init__.py +1 -1
  40. biotite/database/pubchem/download.py +43 -45
  41. biotite/database/pubchem/error.py +2 -2
  42. biotite/database/pubchem/query.py +34 -31
  43. biotite/database/pubchem/throttle.py +3 -4
  44. biotite/database/rcsb/__init__.py +1 -1
  45. biotite/database/rcsb/download.py +44 -52
  46. biotite/database/rcsb/query.py +85 -80
  47. biotite/database/uniprot/check.py +6 -3
  48. biotite/database/uniprot/download.py +6 -11
  49. biotite/database/uniprot/query.py +115 -31
  50. biotite/file.py +12 -31
  51. biotite/sequence/__init__.py +3 -3
  52. biotite/sequence/align/__init__.py +2 -2
  53. biotite/sequence/align/alignment.py +99 -90
  54. biotite/sequence/align/banded.cp310-win_amd64.pyd +0 -0
  55. biotite/sequence/align/buckets.py +12 -10
  56. biotite/sequence/align/cigar.py +43 -52
  57. biotite/sequence/align/kmeralphabet.cp310-win_amd64.pyd +0 -0
  58. biotite/sequence/align/kmeralphabet.pyx +55 -51
  59. biotite/sequence/align/kmersimilarity.cp310-win_amd64.pyd +0 -0
  60. biotite/sequence/align/kmertable.cp310-win_amd64.pyd +0 -0
  61. biotite/sequence/align/kmertable.pyx +3 -2
  62. biotite/sequence/align/localgapped.cp310-win_amd64.pyd +0 -0
  63. biotite/sequence/align/localungapped.cp310-win_amd64.pyd +0 -0
  64. biotite/sequence/align/matrix.py +81 -82
  65. biotite/sequence/align/multiple.cp310-win_amd64.pyd +0 -0
  66. biotite/sequence/align/multiple.pyx +1 -1
  67. biotite/sequence/align/pairwise.cp310-win_amd64.pyd +0 -0
  68. biotite/sequence/align/permutation.cp310-win_amd64.pyd +0 -0
  69. biotite/sequence/align/permutation.pyx +12 -4
  70. biotite/sequence/align/selector.cp310-win_amd64.pyd +0 -0
  71. biotite/sequence/align/selector.pyx +52 -54
  72. biotite/sequence/align/statistics.py +32 -33
  73. biotite/sequence/align/tracetable.cp310-win_amd64.pyd +0 -0
  74. biotite/sequence/alphabet.py +51 -65
  75. biotite/sequence/annotation.py +78 -77
  76. biotite/sequence/codec.cp310-win_amd64.pyd +0 -0
  77. biotite/sequence/codon.py +90 -79
  78. biotite/sequence/graphics/__init__.py +1 -1
  79. biotite/sequence/graphics/alignment.py +184 -103
  80. biotite/sequence/graphics/colorschemes.py +10 -12
  81. biotite/sequence/graphics/dendrogram.py +79 -34
  82. biotite/sequence/graphics/features.py +133 -99
  83. biotite/sequence/graphics/logo.py +22 -28
  84. biotite/sequence/graphics/plasmid.py +229 -178
  85. biotite/sequence/io/fasta/__init__.py +1 -1
  86. biotite/sequence/io/fasta/convert.py +44 -33
  87. biotite/sequence/io/fasta/file.py +42 -55
  88. biotite/sequence/io/fastq/__init__.py +1 -1
  89. biotite/sequence/io/fastq/convert.py +11 -14
  90. biotite/sequence/io/fastq/file.py +68 -112
  91. biotite/sequence/io/genbank/__init__.py +2 -2
  92. biotite/sequence/io/genbank/annotation.py +12 -20
  93. biotite/sequence/io/genbank/file.py +74 -76
  94. biotite/sequence/io/genbank/metadata.py +74 -62
  95. biotite/sequence/io/genbank/sequence.py +13 -14
  96. biotite/sequence/io/general.py +39 -30
  97. biotite/sequence/io/gff/__init__.py +2 -2
  98. biotite/sequence/io/gff/convert.py +10 -15
  99. biotite/sequence/io/gff/file.py +81 -65
  100. biotite/sequence/phylo/__init__.py +1 -1
  101. biotite/sequence/phylo/nj.cp310-win_amd64.pyd +0 -0
  102. biotite/sequence/phylo/tree.cp310-win_amd64.pyd +0 -0
  103. biotite/sequence/phylo/upgma.cp310-win_amd64.pyd +0 -0
  104. biotite/sequence/profile.py +57 -28
  105. biotite/sequence/search.py +17 -15
  106. biotite/sequence/seqtypes.py +200 -164
  107. biotite/sequence/sequence.py +15 -17
  108. biotite/structure/__init__.py +3 -3
  109. biotite/structure/atoms.py +246 -236
  110. biotite/structure/basepairs.py +260 -271
  111. biotite/structure/bonds.cp310-win_amd64.pyd +0 -0
  112. biotite/structure/bonds.pyx +29 -32
  113. biotite/structure/box.py +67 -71
  114. biotite/structure/celllist.cp310-win_amd64.pyd +0 -0
  115. biotite/structure/chains.py +55 -39
  116. biotite/structure/charges.cp310-win_amd64.pyd +0 -0
  117. biotite/structure/compare.py +32 -32
  118. biotite/structure/density.py +13 -18
  119. biotite/structure/dotbracket.py +20 -22
  120. biotite/structure/error.py +10 -2
  121. biotite/structure/filter.py +83 -78
  122. biotite/structure/geometry.py +130 -119
  123. biotite/structure/graphics/atoms.py +60 -43
  124. biotite/structure/graphics/rna.py +81 -68
  125. biotite/structure/hbond.py +112 -93
  126. biotite/structure/info/__init__.py +0 -2
  127. biotite/structure/info/atoms.py +10 -11
  128. biotite/structure/info/bonds.py +41 -43
  129. biotite/structure/info/ccd.py +4 -5
  130. biotite/structure/info/groups.py +1 -3
  131. biotite/structure/info/masses.py +5 -10
  132. biotite/structure/info/misc.py +1 -1
  133. biotite/structure/info/radii.py +20 -20
  134. biotite/structure/info/standardize.py +15 -26
  135. biotite/structure/integrity.py +18 -71
  136. biotite/structure/io/__init__.py +3 -4
  137. biotite/structure/io/dcd/__init__.py +1 -1
  138. biotite/structure/io/dcd/file.py +22 -20
  139. biotite/structure/io/general.py +47 -61
  140. biotite/structure/io/gro/__init__.py +1 -1
  141. biotite/structure/io/gro/file.py +73 -72
  142. biotite/structure/io/mol/__init__.py +1 -1
  143. biotite/structure/io/mol/convert.py +8 -11
  144. biotite/structure/io/mol/ctab.py +37 -36
  145. biotite/structure/io/mol/header.py +14 -10
  146. biotite/structure/io/mol/mol.py +9 -53
  147. biotite/structure/io/mol/sdf.py +47 -50
  148. biotite/structure/io/netcdf/__init__.py +1 -1
  149. biotite/structure/io/netcdf/file.py +24 -23
  150. biotite/structure/io/pdb/__init__.py +1 -1
  151. biotite/structure/io/pdb/convert.py +32 -20
  152. biotite/structure/io/pdb/file.py +151 -172
  153. biotite/structure/io/pdb/hybrid36.cp310-win_amd64.pyd +0 -0
  154. biotite/structure/io/pdbqt/__init__.py +1 -1
  155. biotite/structure/io/pdbqt/convert.py +17 -11
  156. biotite/structure/io/pdbqt/file.py +128 -80
  157. biotite/structure/io/pdbx/__init__.py +1 -2
  158. biotite/structure/io/pdbx/bcif.py +36 -44
  159. biotite/structure/io/pdbx/cif.py +140 -110
  160. biotite/structure/io/pdbx/component.py +10 -16
  161. biotite/structure/io/pdbx/convert.py +260 -258
  162. biotite/structure/io/pdbx/encoding.cp310-win_amd64.pyd +0 -0
  163. biotite/structure/io/trajfile.py +90 -107
  164. biotite/structure/io/trr/__init__.py +1 -1
  165. biotite/structure/io/trr/file.py +12 -15
  166. biotite/structure/io/xtc/__init__.py +1 -1
  167. biotite/structure/io/xtc/file.py +11 -14
  168. biotite/structure/mechanics.py +9 -11
  169. biotite/structure/molecules.py +3 -4
  170. biotite/structure/pseudoknots.py +53 -67
  171. biotite/structure/rdf.py +23 -21
  172. biotite/structure/repair.py +137 -86
  173. biotite/structure/residues.py +26 -16
  174. biotite/structure/sasa.cp310-win_amd64.pyd +0 -0
  175. biotite/structure/{resutil.py → segments.py} +24 -23
  176. biotite/structure/sequence.py +10 -11
  177. biotite/structure/sse.py +100 -119
  178. biotite/structure/superimpose.py +39 -77
  179. biotite/structure/transform.py +97 -71
  180. biotite/structure/util.py +11 -13
  181. biotite/version.py +2 -2
  182. biotite/visualize.py +69 -55
  183. {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/METADATA +6 -5
  184. biotite-1.0.1.dist-info/RECORD +322 -0
  185. biotite/structure/io/ctab.py +0 -72
  186. biotite/structure/io/mmtf/__init__.py +0 -21
  187. biotite/structure/io/mmtf/assembly.py +0 -214
  188. biotite/structure/io/mmtf/convertarray.cp310-win_amd64.pyd +0 -0
  189. biotite/structure/io/mmtf/convertarray.pyx +0 -341
  190. biotite/structure/io/mmtf/convertfile.cp310-win_amd64.pyd +0 -0
  191. biotite/structure/io/mmtf/convertfile.pyx +0 -501
  192. biotite/structure/io/mmtf/decode.cp310-win_amd64.pyd +0 -0
  193. biotite/structure/io/mmtf/decode.pyx +0 -152
  194. biotite/structure/io/mmtf/encode.cp310-win_amd64.pyd +0 -0
  195. biotite/structure/io/mmtf/encode.pyx +0 -183
  196. biotite/structure/io/mmtf/file.py +0 -233
  197. biotite/structure/io/npz/__init__.py +0 -20
  198. biotite/structure/io/npz/file.py +0 -152
  199. biotite/structure/io/pdbx/legacy.py +0 -267
  200. biotite/structure/io/tng/__init__.py +0 -13
  201. biotite/structure/io/tng/file.py +0 -46
  202. biotite/temp.py +0 -86
  203. biotite-0.41.2.dist-info/RECORD +0 -340
  204. {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/WHEEL +0 -0
  205. {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/licenses/LICENSE.rst +0 -0
@@ -18,5 +18,5 @@ getting/setting directly :class:`Sequence` objects, rather than strings.
18
18
  __name__ = "biotite.sequence.io.fasta"
19
19
  __author__ = "Patrick Kunzmann"
20
20
 
21
+ from .convert import *
21
22
  from .file import *
22
- from .convert import *
@@ -7,13 +7,18 @@ __author__ = "Patrick Kunzmann"
7
7
 
8
8
  import warnings
9
9
  from collections import OrderedDict
10
- from ...sequence import Sequence
11
- from ...alphabet import AlphabetError, LetterAlphabet
12
- from ...seqtypes import NucleotideSequence, ProteinSequence
13
- from ...align.alignment import Alignment
10
+ from biotite.sequence.align.alignment import Alignment
11
+ from biotite.sequence.alphabet import AlphabetError, LetterAlphabet
12
+ from biotite.sequence.seqtypes import NucleotideSequence, ProteinSequence
14
13
 
15
- __all__ = ["get_sequence", "get_sequences", "set_sequence", "set_sequences",
16
- "get_alignment", "set_alignment"]
14
+ __all__ = [
15
+ "get_sequence",
16
+ "get_sequences",
17
+ "set_sequence",
18
+ "set_sequences",
19
+ "get_alignment",
20
+ "set_alignment",
21
+ ]
17
22
 
18
23
 
19
24
  def get_sequence(fasta_file, header=None, seq_type=None):
@@ -180,8 +185,10 @@ def get_alignment(fasta_file, additional_gap_chars=("_",), seq_type=None):
180
185
  for i, seq_str in enumerate(seq_strings):
181
186
  seq_strings[i] = seq_str.replace(char, "-")
182
187
  # Remove gaps for creation of sequences
183
- sequences = [_convert_to_sequence(seq_str.replace("-",""), seq_type)
184
- for seq_str in seq_strings]
188
+ sequences = [
189
+ _convert_to_sequence(seq_str.replace("-", ""), seq_type)
190
+ for seq_str in seq_strings
191
+ ]
185
192
  trace = Alignment.trace_from_strings(seq_strings)
186
193
  return Alignment(sequences, trace, score=None)
187
194
 
@@ -212,44 +219,29 @@ def set_alignment(fasta_file, alignment, seq_names):
212
219
 
213
220
 
214
221
  def _convert_to_sequence(seq_str, seq_type=None):
215
-
216
- # Define preprocessing of preimplemented sequence types
217
-
218
- # Replace selenocysteine with cysteine
219
- # and pyrrolysine with lysine
220
- process_protein_sequence = (
221
- lambda x : x.upper().replace("U", "C").replace("O", "K")
222
- )
223
- # For nucleotides uracil is represented by thymine and there is only
224
- # one letter for completely unknown nucleotides
225
- process_nucleotide_sequence = (
226
- lambda x : x.upper().replace("U","T").replace("X","N")
227
- )
228
-
229
222
  # Set manually selected sequence type
230
-
231
223
  if seq_type is not None:
232
224
  # Do preprocessing as done without manual selection
233
225
  if seq_type == NucleotideSequence:
234
- seq_str = process_nucleotide_sequence(seq_str)
226
+ seq_str = _process_nucleotide_sequence(seq_str)
235
227
  elif seq_type == ProteinSequence:
236
228
  if "U" in seq_str:
237
229
  warnings.warn(
238
230
  "ProteinSequence objects do not support selenocysteine "
239
231
  "(U), occurrences were substituted by cysteine (C)"
240
232
  )
241
- seq_str = process_protein_sequence(seq_str)
233
+ seq_str = _process_protein_sequence(seq_str)
242
234
  # Return the converted sequence
243
235
  return seq_type(seq_str)
244
236
 
245
237
  # Attempt to automatically determine sequence type
246
238
 
247
239
  try:
248
- return NucleotideSequence(process_nucleotide_sequence(seq_str))
240
+ return NucleotideSequence(_process_nucleotide_sequence(seq_str))
249
241
  except AlphabetError:
250
242
  pass
251
243
  try:
252
- prot_seq = ProteinSequence(process_protein_sequence(seq_str))
244
+ prot_seq = ProteinSequence(_process_protein_sequence(seq_str))
253
245
  # Raise Warning after conversion into 'ProteinSequence'
254
246
  # to wait for potential 'AlphabetError'
255
247
  if "U" in seq_str:
@@ -259,15 +251,34 @@ def _convert_to_sequence(seq_str, seq_type=None):
259
251
  )
260
252
  return prot_seq
261
253
  except AlphabetError:
262
- raise ValueError("FASTA data cannot be converted either to "
263
- "'NucleotideSequence' nor to 'ProteinSequence'")
254
+ raise ValueError(
255
+ "FASTA data cannot be converted either to "
256
+ "'NucleotideSequence' nor to 'ProteinSequence'"
257
+ )
258
+
259
+
260
+ def _process_protein_sequence(x):
261
+ """
262
+ Replace selenocysteine with cysteine and pyrrolysine with lysine.
263
+ """
264
+ return x.upper().replace("U", "C").replace("O", "K")
265
+
266
+
267
+ def _process_nucleotide_sequence(x):
268
+ """
269
+ For nucleotides uracil is represented by thymine and there is only
270
+ one letter for completely unknown nucleotides
271
+ """
272
+ return x.upper().replace("U", "T").replace("X", "N")
264
273
 
265
274
 
266
275
  def _convert_to_string(sequence, as_rna):
267
276
  if not isinstance(sequence.get_alphabet(), LetterAlphabet):
268
- raise ValueError("Only sequences using single letter alphabets "
269
- "can be stored in a FASTA file")
277
+ raise ValueError(
278
+ "Only sequences using single letter alphabets "
279
+ "can be stored in a FASTA file"
280
+ )
270
281
  if isinstance(sequence, NucleotideSequence) and as_rna:
271
- return(str(sequence).replace("T", "U"))
282
+ return str(sequence).replace("T", "U")
272
283
  else:
273
- return(str(sequence))
284
+ return str(sequence)
@@ -6,21 +6,21 @@ __name__ = "biotite.sequence.io.fasta"
6
6
  __author__ = "Patrick Kunzmann"
7
7
  __all__ = ["FastaFile"]
8
8
 
9
- from ....file import TextFile, InvalidFileError, wrap_string
10
9
  from collections import OrderedDict
11
10
  from collections.abc import MutableMapping
11
+ from biotite.file import InvalidFileError, TextFile, wrap_string
12
12
 
13
13
 
14
14
  class FastaFile(TextFile, MutableMapping):
15
15
  """
16
16
  This class represents a file in FASTA format.
17
-
17
+
18
18
  A FASTA file contains so called *header* lines, beginning with
19
19
  ``>``, that describe following sequence.
20
20
  The corresponding sequence starts at the line after the header line
21
21
  and ends at the next header line or at the end of file.
22
22
  The header along with its sequence forms an entry.
23
-
23
+
24
24
  This class is used in a dictionary like manner, implementing the
25
25
  :class:`MutableMapping` interface:
26
26
  Headers (without the leading ``>``) are used as keys,
@@ -35,10 +35,10 @@ class FastaFile(TextFile, MutableMapping):
35
35
  after which a line break is inserted.
36
36
  Only relevant, when adding sequences to a file.
37
37
  Default is 80.
38
-
38
+
39
39
  Examples
40
40
  --------
41
-
41
+
42
42
  >>> import os.path
43
43
  >>> file = FastaFile()
44
44
  >>> file["seq1"] = "ATACT"
@@ -61,17 +61,17 @@ class FastaFile(TextFile, MutableMapping):
61
61
  {'seq2': 'AAAATT'}
62
62
  >>> file.write(os.path.join(path_to_directory, "test.fasta"))
63
63
  """
64
-
64
+
65
65
  def __init__(self, chars_per_line=80):
66
66
  super().__init__()
67
67
  self._chars_per_line = chars_per_line
68
68
  self._entries = OrderedDict()
69
-
69
+
70
70
  @classmethod
71
71
  def read(cls, file, chars_per_line=80):
72
72
  """
73
73
  Read a FASTA file.
74
-
74
+
75
75
  Parameters
76
76
  ----------
77
77
  file : file-like object or str
@@ -82,7 +82,7 @@ class FastaFile(TextFile, MutableMapping):
82
82
  after which a line break is inserted.
83
83
  Only relevant, when adding sequences to a file.
84
84
  Default is 80.
85
-
85
+
86
86
  Returns
87
87
  -------
88
88
  file_object : FastaFile
@@ -90,24 +90,23 @@ class FastaFile(TextFile, MutableMapping):
90
90
  """
91
91
  file = super().read(file, chars_per_line)
92
92
  # Filter out empty and comment lines
93
- file.lines = [line for line in file.lines
94
- if len(line.strip()) != 0 and line[0] != ";"]
93
+ file.lines = [
94
+ line for line in file.lines if len(line.strip()) != 0 and line[0] != ";"
95
+ ]
95
96
  if len(file.lines) == 0:
96
97
  raise InvalidFileError("File is empty or contains only comments")
97
98
  file._find_entries()
98
99
  return file
99
-
100
+
100
101
  def __setitem__(self, header, seq_str):
101
102
  if not isinstance(header, str):
102
- raise IndexError(
103
- "'FastaFile' only supports header strings as keys"
104
- )
103
+ raise IndexError("'FastaFile' only supports header strings as keys")
105
104
  if not isinstance(seq_str, str):
106
- raise TypeError("'FastaFile' only supports sequence strings "
107
- "as values")
105
+ raise TypeError("'FastaFile' only supports sequence strings " "as values")
108
106
  # Create lines for new header and sequence (with line breaks)
109
- new_lines = [">" + header.replace("\n","").strip()] + \
110
- wrap_string(seq_str, width=self._chars_per_line)
107
+ new_lines = [">" + header.replace("\n", "").strip()] + wrap_string(
108
+ seq_str, width=self._chars_per_line
109
+ )
111
110
  if header in self:
112
111
  # Delete lines of entry corresponding to the header,
113
112
  # if existing
@@ -118,83 +117,75 @@ class FastaFile(TextFile, MutableMapping):
118
117
  # Simply append lines
119
118
  # Add entry in a more efficient way than '_find_entries()'
120
119
  # for this simple case
121
- self._entries[header] = (
122
- len(self.lines),
123
- len(self.lines) + len(new_lines)
124
- )
120
+ self._entries[header] = (len(self.lines), len(self.lines) + len(new_lines))
125
121
  self.lines += new_lines
126
-
122
+
127
123
  def __getitem__(self, header):
128
124
  if not isinstance(header, str):
129
- raise IndexError(
130
- "'FastaFile' only supports header strings as keys"
131
- )
125
+ raise IndexError("'FastaFile' only supports header strings as keys")
132
126
  start, stop = self._entries[header]
133
127
  # Concatenate sequence string from following lines
134
- seq_string = "".join(
135
- [line.strip() for line in self.lines[start+1 : stop]]
136
- )
128
+ seq_string = "".join([line.strip() for line in self.lines[start + 1 : stop]])
137
129
  return seq_string
138
-
130
+
139
131
  def __delitem__(self, header):
140
132
  start, stop = self._entries[header]
141
133
  del self.lines[start:stop]
142
134
  del self._entries[header]
143
135
  self._find_entries()
144
-
136
+
145
137
  def __len__(self):
146
138
  return len(self._entries)
147
-
139
+
148
140
  def __iter__(self):
149
141
  return self._entries.__iter__()
150
-
142
+
151
143
  def __contains__(self, identifer):
152
144
  return identifer in self._entries
153
-
145
+
154
146
  def _find_entries(self):
155
147
  if len(self.lines) > 0 and self.lines[0][0] != ">":
156
148
  raise InvalidFileError(
157
149
  f"File starts with '{self.lines[0][0]}' instead of '>'"
158
150
  )
159
-
151
+
160
152
  header_i = []
161
153
  for i, line in enumerate(self.lines):
162
154
  if line[0] == ">":
163
155
  header_i.append(i)
164
-
156
+
165
157
  self._entries = OrderedDict()
166
158
  for j in range(len(header_i)):
167
159
  # Remove leading '>' from header
168
160
  header = self.lines[header_i[j]].strip()[1:]
169
161
  start = header_i[j]
170
- if j < len(header_i) -1:
162
+ if j < len(header_i) - 1:
171
163
  # Header in mid or start of file
172
164
  # -> stop is start of next header
173
- stop = header_i[j+1]
165
+ stop = header_i[j + 1]
174
166
  else:
175
167
  # Last header -> entry stops at end of file
176
168
  stop = len(self.lines)
177
169
  self._entries[header] = (start, stop)
178
170
 
179
-
180
171
  @staticmethod
181
172
  def read_iter(file):
182
173
  """
183
174
  Create an iterator over each sequence of the given FASTA file.
184
-
175
+
185
176
  Parameters
186
177
  ----------
187
178
  file : file-like object or str
188
179
  The file to be read.
189
180
  Alternatively a file path can be supplied.
190
-
181
+
191
182
  Yields
192
183
  ------
193
184
  header : str
194
185
  The header of the current sequence.
195
186
  seq_str : str
196
187
  The current sequence as string.
197
-
188
+
198
189
  Notes
199
190
  -----
200
191
  This approach gives the same results as
@@ -221,7 +212,6 @@ class FastaFile(TextFile, MutableMapping):
221
212
  # Yield final entry
222
213
  if header is not None:
223
214
  yield header, "".join(seq_str_list)
224
-
225
215
 
226
216
  @staticmethod
227
217
  def write_iter(file, items, chars_per_line=80):
@@ -235,7 +225,7 @@ class FastaFile(TextFile, MutableMapping):
235
225
  Hence, this static method may save a large amount of memory if
236
226
  a large file should be written, especially if the `items`
237
227
  are provided as generator.
238
-
228
+
239
229
  Parameters
240
230
  ----------
241
231
  file : file-like object or str
@@ -256,23 +246,20 @@ class FastaFile(TextFile, MutableMapping):
256
246
  This method does not test, whether the given identifiers are
257
247
  unambiguous.
258
248
  """
249
+
259
250
  def line_generator():
260
251
  for item in items:
261
252
  header, seq_str = item
262
253
  if not isinstance(header, str):
263
- raise IndexError(
264
- "'FastaFile' only supports header strings"
265
- )
254
+ raise IndexError("'FastaFile' only supports header strings")
266
255
  if not isinstance(seq_str, str):
267
- raise TypeError(
268
- "'FastaFile' only supports sequence strings"
269
- )
270
-
256
+ raise TypeError("'FastaFile' only supports sequence strings")
257
+
271
258
  # Yield header line
272
- yield ">" + header.replace("\n","").strip()
259
+ yield ">" + header.replace("\n", "").strip()
273
260
 
274
261
  # Yield sequence line(s)
275
262
  for line in wrap_string(seq_str, width=chars_per_line):
276
263
  yield line
277
-
278
- TextFile.write_iter(file, line_generator())
264
+
265
+ TextFile.write_iter(file, line_generator())
@@ -15,5 +15,5 @@ values.
15
15
  __name__ = "biotite.sequence.io.fastq"
16
16
  __author__ = "Patrick Kunzmann"
17
17
 
18
+ from .convert import *
18
19
  from .file import *
19
- from .convert import *
@@ -6,10 +6,7 @@ __name__ = "biotite.sequence.io.fastq"
6
6
  __author__ = "Patrick Kunzmann"
7
7
 
8
8
  from collections import OrderedDict
9
- from ...sequence import Sequence
10
- from ...alphabet import AlphabetError, LetterAlphabet
11
- from ...seqtypes import NucleotideSequence
12
- from ...align.alignment import Alignment
9
+ from biotite.sequence.seqtypes import NucleotideSequence
13
10
 
14
11
  __all__ = ["get_sequence", "get_sequences", "set_sequence", "set_sequences"]
15
12
 
@@ -17,7 +14,7 @@ __all__ = ["get_sequence", "get_sequences", "set_sequence", "set_sequences"]
17
14
  def get_sequence(fastq_file, header=None):
18
15
  """
19
16
  Get a sequence and quality scores from a `FastqFile` instance.
20
-
17
+
21
18
  Parameters
22
19
  ----------
23
20
  fastq_file : FastqFile
@@ -25,7 +22,7 @@ def get_sequence(fastq_file, header=None):
25
22
  header : str, optional
26
23
  The identifier to get the sequence and scores from.
27
24
  By default, the first sequence of the file is returned.
28
-
25
+
29
26
  Returns
30
27
  -------
31
28
  sequence : NucleotideSequence
@@ -43,7 +40,7 @@ def get_sequence(fastq_file, header=None):
43
40
  break
44
41
  if seq_str is None:
45
42
  raise ValueError("File does not contain any sequences")
46
- processed_seq_str = seq_str.replace("U","T").replace("X","N")
43
+ processed_seq_str = seq_str.replace("U", "T").replace("X", "N")
47
44
  return NucleotideSequence(processed_seq_str), scores
48
45
 
49
46
 
@@ -51,12 +48,12 @@ def get_sequences(fastq_file):
51
48
  """
52
49
  Get a dictionary from a `FastqFile` instance,
53
50
  where identifiers are keys and sequence-score-tuples are values.
54
-
51
+
55
52
  Parameters
56
53
  ----------
57
54
  fastq_file : FastqFile
58
55
  The `Fastqile` to be accessed.
59
-
56
+
60
57
  Returns
61
58
  -------
62
59
  seq_dict : dict
@@ -65,7 +62,7 @@ def get_sequences(fastq_file):
65
62
  """
66
63
  seq_dict = OrderedDict()
67
64
  for header, (seq_str, scores) in fastq_file.items():
68
- processed_seq_str = seq_str.replace("U","T").replace("X","N")
65
+ processed_seq_str = seq_str.replace("U", "T").replace("X", "N")
69
66
  seq_dict[header] = NucleotideSequence(processed_seq_str), scores
70
67
  return seq_dict
71
68
 
@@ -73,7 +70,7 @@ def get_sequences(fastq_file):
73
70
  def set_sequence(fastq_file, sequence, scores, header=None, as_rna=False):
74
71
  """
75
72
  Set a sequence and a quality score array in a `FastqFile` instance.
76
-
73
+
77
74
  Parameters
78
75
  ----------
79
76
  fastq_file : FastqFile
@@ -96,7 +93,7 @@ def set_sequence(fastq_file, sequence, scores, header=None, as_rna=False):
96
93
  def set_sequences(fastq_file, sequence_dict, as_rna=False):
97
94
  """
98
95
  Set sequences in a `FastqFile` instance from a dictionary.
99
-
96
+
100
97
  Parameters
101
98
  ----------
102
99
  fastq_file : FastqFile
@@ -115,6 +112,6 @@ def set_sequences(fastq_file, sequence_dict, as_rna=False):
115
112
 
116
113
  def _convert_to_string(sequence, as_rna):
117
114
  if as_rna:
118
- return(str(sequence).replace("T", "U"))
115
+ return str(sequence).replace("T", "U")
119
116
  else:
120
- return(str(sequence))
117
+ return str(sequence)