biotite 0.41.2__cp310-cp310-macosx_11_0_arm64.whl → 1.0.0__cp310-cp310-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (205) hide show
  1. biotite/__init__.py +2 -3
  2. biotite/application/__init__.py +1 -1
  3. biotite/application/application.py +20 -10
  4. biotite/application/autodock/__init__.py +1 -1
  5. biotite/application/autodock/app.py +74 -79
  6. biotite/application/blast/__init__.py +1 -1
  7. biotite/application/blast/alignment.py +19 -10
  8. biotite/application/blast/webapp.py +92 -85
  9. biotite/application/clustalo/__init__.py +1 -1
  10. biotite/application/clustalo/app.py +46 -61
  11. biotite/application/dssp/__init__.py +1 -1
  12. biotite/application/dssp/app.py +8 -11
  13. biotite/application/localapp.py +62 -60
  14. biotite/application/mafft/__init__.py +1 -1
  15. biotite/application/mafft/app.py +16 -22
  16. biotite/application/msaapp.py +78 -89
  17. biotite/application/muscle/__init__.py +1 -1
  18. biotite/application/muscle/app3.py +50 -64
  19. biotite/application/muscle/app5.py +23 -31
  20. biotite/application/sra/__init__.py +1 -1
  21. biotite/application/sra/app.py +64 -68
  22. biotite/application/tantan/__init__.py +1 -1
  23. biotite/application/tantan/app.py +22 -45
  24. biotite/application/util.py +7 -9
  25. biotite/application/viennarna/rnaalifold.py +34 -28
  26. biotite/application/viennarna/rnafold.py +24 -39
  27. biotite/application/viennarna/rnaplot.py +36 -21
  28. biotite/application/viennarna/util.py +17 -12
  29. biotite/application/webapp.py +13 -14
  30. biotite/copyable.py +13 -13
  31. biotite/database/__init__.py +1 -1
  32. biotite/database/entrez/__init__.py +1 -1
  33. biotite/database/entrez/check.py +2 -3
  34. biotite/database/entrez/dbnames.py +7 -5
  35. biotite/database/entrez/download.py +55 -49
  36. biotite/database/entrez/key.py +1 -1
  37. biotite/database/entrez/query.py +62 -23
  38. biotite/database/error.py +2 -1
  39. biotite/database/pubchem/__init__.py +1 -1
  40. biotite/database/pubchem/download.py +43 -45
  41. biotite/database/pubchem/error.py +2 -2
  42. biotite/database/pubchem/query.py +34 -31
  43. biotite/database/pubchem/throttle.py +3 -4
  44. biotite/database/rcsb/__init__.py +1 -1
  45. biotite/database/rcsb/download.py +44 -52
  46. biotite/database/rcsb/query.py +85 -80
  47. biotite/database/uniprot/check.py +6 -3
  48. biotite/database/uniprot/download.py +6 -11
  49. biotite/database/uniprot/query.py +115 -31
  50. biotite/file.py +12 -31
  51. biotite/sequence/__init__.py +3 -3
  52. biotite/sequence/align/__init__.py +2 -2
  53. biotite/sequence/align/alignment.py +99 -90
  54. biotite/sequence/align/banded.cpython-310-darwin.so +0 -0
  55. biotite/sequence/align/buckets.py +12 -10
  56. biotite/sequence/align/cigar.py +43 -52
  57. biotite/sequence/align/kmeralphabet.cpython-310-darwin.so +0 -0
  58. biotite/sequence/align/kmeralphabet.pyx +55 -51
  59. biotite/sequence/align/kmersimilarity.cpython-310-darwin.so +0 -0
  60. biotite/sequence/align/kmertable.cpython-310-darwin.so +0 -0
  61. biotite/sequence/align/kmertable.pyx +3 -2
  62. biotite/sequence/align/localgapped.cpython-310-darwin.so +0 -0
  63. biotite/sequence/align/localungapped.cpython-310-darwin.so +0 -0
  64. biotite/sequence/align/matrix.py +81 -82
  65. biotite/sequence/align/multiple.cpython-310-darwin.so +0 -0
  66. biotite/sequence/align/multiple.pyx +1 -1
  67. biotite/sequence/align/pairwise.cpython-310-darwin.so +0 -0
  68. biotite/sequence/align/permutation.cpython-310-darwin.so +0 -0
  69. biotite/sequence/align/permutation.pyx +12 -4
  70. biotite/sequence/align/selector.cpython-310-darwin.so +0 -0
  71. biotite/sequence/align/selector.pyx +52 -54
  72. biotite/sequence/align/statistics.py +32 -33
  73. biotite/sequence/align/tracetable.cpython-310-darwin.so +0 -0
  74. biotite/sequence/alphabet.py +51 -65
  75. biotite/sequence/annotation.py +78 -77
  76. biotite/sequence/codec.cpython-310-darwin.so +0 -0
  77. biotite/sequence/codon.py +90 -79
  78. biotite/sequence/graphics/__init__.py +1 -1
  79. biotite/sequence/graphics/alignment.py +184 -103
  80. biotite/sequence/graphics/colorschemes.py +10 -12
  81. biotite/sequence/graphics/dendrogram.py +79 -34
  82. biotite/sequence/graphics/features.py +133 -99
  83. biotite/sequence/graphics/logo.py +22 -28
  84. biotite/sequence/graphics/plasmid.py +229 -178
  85. biotite/sequence/io/fasta/__init__.py +1 -1
  86. biotite/sequence/io/fasta/convert.py +44 -33
  87. biotite/sequence/io/fasta/file.py +42 -55
  88. biotite/sequence/io/fastq/__init__.py +1 -1
  89. biotite/sequence/io/fastq/convert.py +11 -14
  90. biotite/sequence/io/fastq/file.py +68 -112
  91. biotite/sequence/io/genbank/__init__.py +2 -2
  92. biotite/sequence/io/genbank/annotation.py +12 -20
  93. biotite/sequence/io/genbank/file.py +74 -76
  94. biotite/sequence/io/genbank/metadata.py +74 -62
  95. biotite/sequence/io/genbank/sequence.py +13 -14
  96. biotite/sequence/io/general.py +39 -30
  97. biotite/sequence/io/gff/__init__.py +2 -2
  98. biotite/sequence/io/gff/convert.py +10 -15
  99. biotite/sequence/io/gff/file.py +81 -65
  100. biotite/sequence/phylo/__init__.py +1 -1
  101. biotite/sequence/phylo/nj.cpython-310-darwin.so +0 -0
  102. biotite/sequence/phylo/tree.cpython-310-darwin.so +0 -0
  103. biotite/sequence/phylo/upgma.cpython-310-darwin.so +0 -0
  104. biotite/sequence/profile.py +57 -28
  105. biotite/sequence/search.py +17 -15
  106. biotite/sequence/seqtypes.py +200 -164
  107. biotite/sequence/sequence.py +15 -17
  108. biotite/structure/__init__.py +3 -3
  109. biotite/structure/atoms.py +221 -235
  110. biotite/structure/basepairs.py +260 -271
  111. biotite/structure/bonds.cpython-310-darwin.so +0 -0
  112. biotite/structure/bonds.pyx +29 -32
  113. biotite/structure/box.py +67 -71
  114. biotite/structure/celllist.cpython-310-darwin.so +0 -0
  115. biotite/structure/chains.py +55 -39
  116. biotite/structure/charges.cpython-310-darwin.so +0 -0
  117. biotite/structure/compare.py +32 -32
  118. biotite/structure/density.py +13 -18
  119. biotite/structure/dotbracket.py +20 -22
  120. biotite/structure/error.py +10 -2
  121. biotite/structure/filter.py +82 -77
  122. biotite/structure/geometry.py +130 -119
  123. biotite/structure/graphics/atoms.py +60 -43
  124. biotite/structure/graphics/rna.py +81 -68
  125. biotite/structure/hbond.py +112 -93
  126. biotite/structure/info/__init__.py +0 -2
  127. biotite/structure/info/atoms.py +10 -11
  128. biotite/structure/info/bonds.py +41 -43
  129. biotite/structure/info/ccd.py +4 -5
  130. biotite/structure/info/groups.py +1 -3
  131. biotite/structure/info/masses.py +5 -10
  132. biotite/structure/info/misc.py +1 -1
  133. biotite/structure/info/radii.py +20 -20
  134. biotite/structure/info/standardize.py +15 -26
  135. biotite/structure/integrity.py +18 -71
  136. biotite/structure/io/__init__.py +3 -4
  137. biotite/structure/io/dcd/__init__.py +1 -1
  138. biotite/structure/io/dcd/file.py +22 -20
  139. biotite/structure/io/general.py +47 -61
  140. biotite/structure/io/gro/__init__.py +1 -1
  141. biotite/structure/io/gro/file.py +73 -72
  142. biotite/structure/io/mol/__init__.py +1 -1
  143. biotite/structure/io/mol/convert.py +8 -11
  144. biotite/structure/io/mol/ctab.py +37 -36
  145. biotite/structure/io/mol/header.py +14 -10
  146. biotite/structure/io/mol/mol.py +9 -53
  147. biotite/structure/io/mol/sdf.py +47 -50
  148. biotite/structure/io/netcdf/__init__.py +1 -1
  149. biotite/structure/io/netcdf/file.py +24 -23
  150. biotite/structure/io/pdb/__init__.py +1 -1
  151. biotite/structure/io/pdb/convert.py +32 -20
  152. biotite/structure/io/pdb/file.py +151 -172
  153. biotite/structure/io/pdb/hybrid36.cpython-310-darwin.so +0 -0
  154. biotite/structure/io/pdbqt/__init__.py +1 -1
  155. biotite/structure/io/pdbqt/convert.py +17 -11
  156. biotite/structure/io/pdbqt/file.py +128 -80
  157. biotite/structure/io/pdbx/__init__.py +1 -2
  158. biotite/structure/io/pdbx/bcif.py +36 -44
  159. biotite/structure/io/pdbx/cif.py +64 -62
  160. biotite/structure/io/pdbx/component.py +10 -16
  161. biotite/structure/io/pdbx/convert.py +235 -246
  162. biotite/structure/io/pdbx/encoding.cpython-310-darwin.so +0 -0
  163. biotite/structure/io/trajfile.py +76 -93
  164. biotite/structure/io/trr/__init__.py +1 -1
  165. biotite/structure/io/trr/file.py +12 -15
  166. biotite/structure/io/xtc/__init__.py +1 -1
  167. biotite/structure/io/xtc/file.py +11 -14
  168. biotite/structure/mechanics.py +9 -11
  169. biotite/structure/molecules.py +3 -4
  170. biotite/structure/pseudoknots.py +53 -67
  171. biotite/structure/rdf.py +23 -21
  172. biotite/structure/repair.py +137 -86
  173. biotite/structure/residues.py +26 -16
  174. biotite/structure/sasa.cpython-310-darwin.so +0 -0
  175. biotite/structure/{resutil.py → segments.py} +24 -23
  176. biotite/structure/sequence.py +10 -11
  177. biotite/structure/sse.py +100 -119
  178. biotite/structure/superimpose.py +39 -77
  179. biotite/structure/transform.py +97 -71
  180. biotite/structure/util.py +11 -13
  181. biotite/version.py +2 -2
  182. biotite/visualize.py +69 -55
  183. {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/METADATA +5 -5
  184. biotite-1.0.0.dist-info/RECORD +322 -0
  185. biotite/structure/io/ctab.py +0 -72
  186. biotite/structure/io/mmtf/__init__.py +0 -21
  187. biotite/structure/io/mmtf/assembly.py +0 -214
  188. biotite/structure/io/mmtf/convertarray.cpython-310-darwin.so +0 -0
  189. biotite/structure/io/mmtf/convertarray.pyx +0 -341
  190. biotite/structure/io/mmtf/convertfile.cpython-310-darwin.so +0 -0
  191. biotite/structure/io/mmtf/convertfile.pyx +0 -501
  192. biotite/structure/io/mmtf/decode.cpython-310-darwin.so +0 -0
  193. biotite/structure/io/mmtf/decode.pyx +0 -152
  194. biotite/structure/io/mmtf/encode.cpython-310-darwin.so +0 -0
  195. biotite/structure/io/mmtf/encode.pyx +0 -183
  196. biotite/structure/io/mmtf/file.py +0 -233
  197. biotite/structure/io/npz/__init__.py +0 -20
  198. biotite/structure/io/npz/file.py +0 -152
  199. biotite/structure/io/pdbx/legacy.py +0 -267
  200. biotite/structure/io/tng/__init__.py +0 -13
  201. biotite/structure/io/tng/file.py +0 -46
  202. biotite/temp.py +0 -86
  203. biotite-0.41.2.dist-info/RECORD +0 -340
  204. {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/WHEEL +0 -0
  205. {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/licenses/LICENSE.rst +0 -0
@@ -5,23 +5,21 @@
5
5
  __name__ = "biotite.sequence.io.fastq"
6
6
  __author__ = "Patrick Kunzmann"
7
7
 
8
- import warnings
9
- from numbers import Integral
10
8
  from collections import OrderedDict
11
9
  from collections.abc import MutableMapping
10
+ from numbers import Integral
12
11
  import numpy as np
13
- from ....file import TextFile, InvalidFileError, wrap_string
14
- from ...seqtypes import NucleotideSequence
12
+ from biotite.file import InvalidFileError, TextFile, wrap_string
15
13
 
16
14
  __all__ = ["FastqFile"]
17
15
 
18
16
 
19
17
  _OFFSETS = {
20
- "Sanger" : 33,
21
- "Solexa" : 64,
22
- "Illumina-1.3" : 64,
23
- "Illumina-1.5" : 64,
24
- "Illumina-1.8" : 33,
18
+ "Sanger": 33,
19
+ "Solexa": 64,
20
+ "Illumina-1.3": 64,
21
+ "Illumina-1.5": 64,
22
+ "Illumina-1.8": 33,
25
23
  }
26
24
 
27
25
 
@@ -47,7 +45,7 @@ class FastqFile(TextFile, MutableMapping):
47
45
  An identifier string (without the leading ``@``) is used as index
48
46
  to get and set the corresponding sequence and quality.
49
47
  ``del`` removes an entry in the file.
50
-
48
+
51
49
  Parameters
52
50
  ----------
53
51
  offset : int or {'Sanger', 'Solexa', 'Illumina-1.3', 'Illumina-1.5', 'Illumina-1.8'}
@@ -61,10 +59,10 @@ class FastqFile(TextFile, MutableMapping):
61
59
  Only relevant, when adding sequences to a file.
62
60
  By default each sequence (and score string)
63
61
  is put into one line.
64
-
62
+
65
63
  Examples
66
64
  --------
67
-
65
+
68
66
  >>> import os.path
69
67
  >>> file = FastqFile(offset="Sanger")
70
68
  >>> file["seq1"] = str(NucleotideSequence("ATACT")), [0,3,10,7,12]
@@ -91,18 +89,18 @@ class FastqFile(TextFile, MutableMapping):
91
89
  0.96=GD
92
90
  >>> file.write(os.path.join(path_to_directory, "test.fastq"))
93
91
  """
94
-
92
+
95
93
  def __init__(self, offset, chars_per_line=None):
96
94
  super().__init__()
97
95
  self._chars_per_line = chars_per_line
98
96
  self._entries = OrderedDict()
99
97
  self._offset = _convert_offset(offset)
100
-
98
+
101
99
  @classmethod
102
100
  def read(cls, file, offset, chars_per_line=None):
103
101
  """
104
102
  Read a FASTQ file.
105
-
103
+
106
104
  Parameters
107
105
  ----------
108
106
  file : file-like object or str
@@ -119,7 +117,7 @@ class FastqFile(TextFile, MutableMapping):
119
117
  Only relevant, when adding sequences to a file.
120
118
  By default each sequence (and score string)
121
119
  is put into one line.
122
-
120
+
123
121
  Returns
124
122
  -------
125
123
  file_object : FastqFile
@@ -134,31 +132,7 @@ class FastqFile(TextFile, MutableMapping):
134
132
  raise InvalidFileError("File is empty")
135
133
  file._find_entries()
136
134
  return file
137
-
138
- def get_sequence(self, identifier):
139
- """
140
- Get the sequence for the specified identifier.
141
-
142
- DEPRECATED: Use :meth:`get_seq_string()` or
143
- :func:`get_sequence()` instead.
144
135
 
145
- Parameters
146
- ----------
147
- identifier : str
148
- The identifier of the sequence.
149
-
150
- Returns
151
- -------
152
- sequence : NucleotideSequence
153
- The sequence corresponding to the identifier.
154
- """
155
- warnings.warn(
156
- "'get_sequence()' is deprecated, use the 'get_seq_string()'"
157
- "method or 'fasta.get_sequence()' function instead",
158
- DeprecationWarning
159
- )
160
- return NucleotideSequence(self.get_seq_string(identifier))
161
-
162
136
  def get_seq_string(self, identifier):
163
137
  """
164
138
  Get the string representing the sequence for the specified
@@ -168,22 +142,19 @@ class FastqFile(TextFile, MutableMapping):
168
142
  ----------
169
143
  identifier : str
170
144
  The identifier of the sequence.
171
-
145
+
172
146
  Returns
173
147
  -------
174
148
  sequence : str
175
149
  The sequence corresponding to the identifier.
176
150
  """
177
151
  if not isinstance(identifier, str):
178
- raise IndexError(
179
- "'FastqFile' only supports identifier strings as keys"
180
- )
181
- seq_start, seq_stop, score_start, score_stop \
182
- = self._entries[identifier]
152
+ raise IndexError("'FastqFile' only supports identifier strings as keys")
153
+ seq_start, seq_stop, score_start, score_stop = self._entries[identifier]
183
154
  # Concatenate sequence string from the sequence lines
184
- seq_str = "".join(self.lines[seq_start : seq_stop])
155
+ seq_str = "".join(self.lines[seq_start:seq_stop])
185
156
  return seq_str
186
-
157
+
187
158
  def get_quality(self, identifier):
188
159
  """
189
160
  Get the quality scores for the specified identifier.
@@ -192,24 +163,20 @@ class FastqFile(TextFile, MutableMapping):
192
163
  ----------
193
164
  identifier : str
194
165
  The identifier of the quality scores.
195
-
166
+
196
167
  Returns
197
168
  -------
198
169
  scores : ndarray, dtype=int
199
170
  The quality scores corresponding to the identifier.
200
171
  """
201
172
  if not isinstance(identifier, str):
202
- raise IndexError(
203
- "'FastqFile' only supports identifier strings as keys"
204
- )
205
- seq_start, seq_stop, score_start, score_stop \
206
- = self._entries[identifier]
173
+ raise IndexError("'FastqFile' only supports identifier strings as keys")
174
+ seq_start, seq_stop, score_start, score_stop = self._entries[identifier]
207
175
  # Concatenate sequence string from the score lines
208
176
  return _score_str_to_scores(
209
- "".join(self.lines[score_start : score_stop]),
210
- self._offset
177
+ "".join(self.lines[score_start:score_stop]), self._offset
211
178
  )
212
-
179
+
213
180
  def __setitem__(self, identifier, item):
214
181
  sequence, scores = item
215
182
  if len(sequence) != len(scores):
@@ -218,24 +185,22 @@ class FastqFile(TextFile, MutableMapping):
218
185
  f"but score length is {len(scores)}"
219
186
  )
220
187
  if not isinstance(identifier, str):
221
- raise IndexError(
222
- "'FastqFile' only supports strings as identifier"
223
- )
188
+ raise IndexError("'FastqFile' only supports strings as identifier")
224
189
  # Delete lines of entry corresponding to the identifier,
225
190
  # if already existing
226
191
  if identifier in self:
227
192
  del self[identifier]
228
-
193
+
229
194
  # Create new lines
230
195
  # Start with identifier line
231
- new_lines = ["@" + identifier.replace("\n","").strip()]
196
+ new_lines = ["@" + identifier.replace("\n", "").strip()]
232
197
  # Append new lines with sequence string (with line breaks)
233
198
  seq_start_i = len(new_lines)
234
199
  if self._chars_per_line is None:
235
200
  new_lines.append(str(sequence))
236
201
  else:
237
202
  new_lines += wrap_string(sequence, width=self._chars_per_line)
238
- seq_stop_i =len(new_lines)
203
+ seq_stop_i = len(new_lines)
239
204
  # Append sequence-score separator
240
205
  new_lines += ["+"]
241
206
  # Append scores
@@ -261,29 +226,28 @@ class FastqFile(TextFile, MutableMapping):
261
226
  len(self.lines) + seq_start_i,
262
227
  len(self.lines) + seq_stop_i,
263
228
  len(self.lines) + score_start_i,
264
- len(self.lines) + score_stop_i
229
+ len(self.lines) + score_stop_i,
265
230
  )
266
231
  self.lines += new_lines
267
-
232
+
268
233
  def __getitem__(self, identifier):
269
234
  return self.get_seq_string(identifier), self.get_quality(identifier)
270
-
235
+
271
236
  def __delitem__(self, identifier):
272
- seq_start, seq_stop, score_start, score_stop \
273
- = self._entries[identifier]
274
- del self.lines[seq_start-1 : score_stop]
237
+ seq_start, seq_stop, score_start, score_stop = self._entries[identifier]
238
+ del self.lines[seq_start - 1 : score_stop]
275
239
  del self._entries[identifier]
276
240
  self._find_entries()
277
-
241
+
278
242
  def __len__(self):
279
243
  return len(self._entries)
280
-
244
+
281
245
  def __iter__(self):
282
246
  return self._entries.__iter__()
283
-
247
+
284
248
  def __contains__(self, identifer):
285
249
  return identifer in self._entries
286
-
250
+
287
251
  def _find_entries(self):
288
252
  self._entries = OrderedDict()
289
253
  in_sequence = False
@@ -302,7 +266,7 @@ class FastqFile(TextFile, MutableMapping):
302
266
  if not in_scores and not in_sequence and line[0] == "@":
303
267
  # Identifier line
304
268
  identifier = line[1:]
305
- seq_start_i = i+1
269
+ seq_start_i = i + 1
306
270
  # Next line is sequence
307
271
  in_sequence = True
308
272
  # Reset
@@ -314,7 +278,7 @@ class FastqFile(TextFile, MutableMapping):
314
278
  in_sequence = False
315
279
  in_scores = True
316
280
  seq_stop_i = i
317
- score_start_i = i+1
281
+ score_start_i = i + 1
318
282
  else:
319
283
  # Still in sequence
320
284
  seq_len += len(line)
@@ -330,9 +294,12 @@ class FastqFile(TextFile, MutableMapping):
330
294
  in_scores = False
331
295
  # Record this entry
332
296
  self._entries[identifier] = (
333
- seq_start_i, seq_stop_i, score_start_i, score_stop_i
297
+ seq_start_i,
298
+ seq_stop_i,
299
+ score_start_i,
300
+ score_stop_i,
334
301
  )
335
- else: # score_len > seq_len
302
+ else: # score_len > seq_len
336
303
  raise InvalidFileError(
337
304
  f"The amount of scores is not equal to the sequence "
338
305
  f"length for the sequence in line {seq_start_i+1} "
@@ -343,14 +310,13 @@ class FastqFile(TextFile, MutableMapping):
343
310
  # must have properly ended
344
311
  if in_sequence or in_scores:
345
312
  raise InvalidFileError("The last entry in the file is incomplete")
346
-
347
313
 
348
314
  @staticmethod
349
315
  def read_iter(file, offset):
350
316
  """
351
317
  Create an iterator over each sequence (and corresponding scores)
352
318
  of the given FASTQ file.
353
-
319
+
354
320
  Parameters
355
321
  ----------
356
322
  file : file-like object or str
@@ -361,7 +327,7 @@ class FastqFile(TextFile, MutableMapping):
361
327
  ASCII code.
362
328
  Can either be directly the value, or a string that indicates
363
329
  the score format.
364
-
330
+
365
331
  Yields
366
332
  ------
367
333
  identifier : str
@@ -369,7 +335,7 @@ class FastqFile(TextFile, MutableMapping):
369
335
  sequence : tuple(str, ndarray)
370
336
  The current sequence as string and its corresponding quality
371
337
  scores as :class:`ndarray`.
372
-
338
+
373
339
  Notes
374
340
  -----
375
341
  This approach gives the same results as
@@ -377,7 +343,7 @@ class FastqFile(TextFile, MutableMapping):
377
343
  and much more memory efficient.
378
344
  """
379
345
  offset = _convert_offset(offset)
380
-
346
+
381
347
  identifier = None
382
348
  seq_str_list = []
383
349
  score_str_list = []
@@ -391,7 +357,7 @@ class FastqFile(TextFile, MutableMapping):
391
357
  # Ignore empty lines
392
358
  if len(line) == 0:
393
359
  continue
394
-
360
+
395
361
  if not in_scores and not in_sequence and line[0] == "@":
396
362
  # Track new entry
397
363
  identifier = line[1:]
@@ -401,7 +367,7 @@ class FastqFile(TextFile, MutableMapping):
401
367
  score_len = 0
402
368
  seq_str_list = []
403
369
  score_str_list = []
404
-
370
+
405
371
  elif in_sequence:
406
372
  if line[0] == "+":
407
373
  # End of sequence start of scores
@@ -411,7 +377,7 @@ class FastqFile(TextFile, MutableMapping):
411
377
  # Still in sequence
412
378
  seq_len += len(line)
413
379
  seq_str_list.append(line)
414
-
380
+
415
381
  elif in_scores:
416
382
  score_len += len(line)
417
383
  score_str_list.append(line)
@@ -422,20 +388,15 @@ class FastqFile(TextFile, MutableMapping):
422
388
  # -> End of entry
423
389
  in_scores = False
424
390
  # yield this entry
425
- scores = _score_str_to_scores(
426
- "".join(score_str_list),
427
- offset
428
- )
391
+ scores = _score_str_to_scores("".join(score_str_list), offset)
429
392
  yield identifier, ("".join(seq_str_list), scores)
430
- else: # score_len > seq_len
393
+ else: # score_len > seq_len
431
394
  raise InvalidFileError(
432
- f"The amount of scores is not equal to the sequence "
433
- f"length"
395
+ "The amount of scores is not equal to the sequence " "length"
434
396
  )
435
-
397
+
436
398
  else:
437
- raise InvalidFileError(f"FASTQ file is invalid")
438
-
399
+ raise InvalidFileError("FASTQ file is invalid")
439
400
 
440
401
  @staticmethod
441
402
  def write_iter(file, items, offset, chars_per_line=None):
@@ -449,7 +410,7 @@ class FastqFile(TextFile, MutableMapping):
449
410
  Hence, this static method may save a large amount of memory if
450
411
  a large file should be written, especially if the `items`
451
412
  are provided as generator.
452
-
413
+
453
414
  Parameters
454
415
  ----------
455
416
  file : file-like object or str
@@ -487,12 +448,10 @@ class FastqFile(TextFile, MutableMapping):
487
448
  f"but score length is {len(scores)}"
488
449
  )
489
450
  if not isinstance(identifier, str):
490
- raise IndexError(
491
- "'FastqFile' only supports strings as identifier"
492
- )
493
-
451
+ raise IndexError("'FastqFile' only supports strings as identifier")
452
+
494
453
  # Yield identifier line
495
- yield "@" + identifier.replace("\n","").strip()
454
+ yield "@" + identifier.replace("\n", "").strip()
496
455
 
497
456
  # Yield sequence line(s)
498
457
  if chars_per_line is None:
@@ -500,10 +459,10 @@ class FastqFile(TextFile, MutableMapping):
500
459
  else:
501
460
  for line in wrap_string(sequence, width=chars_per_line):
502
461
  yield line
503
-
462
+
504
463
  # Yield separator
505
464
  yield "+"
506
-
465
+
507
466
  # Yield scores
508
467
  score_chars = _scores_to_score_str(scores, offset)
509
468
  if chars_per_line is None:
@@ -511,7 +470,7 @@ class FastqFile(TextFile, MutableMapping):
511
470
  else:
512
471
  for line in wrap_string(score_chars, width=chars_per_line):
513
472
  yield line
514
-
473
+
515
474
  TextFile.write_iter(file, line_generator())
516
475
 
517
476
 
@@ -519,15 +478,11 @@ def _score_str_to_scores(score_str, offset):
519
478
  """
520
479
  Convert an ASCII string into actual score values.
521
480
  """
522
- scores = np.frombuffer(
523
- bytearray(
524
- score_str, encoding="ascii"
525
- ),
526
- dtype=np.int8
527
- )
481
+ scores = np.frombuffer(bytearray(score_str, encoding="ascii"), dtype=np.int8)
528
482
  scores -= offset
529
483
  return scores
530
484
 
485
+
531
486
  def _scores_to_score_str(scores, offset):
532
487
  """
533
488
  Convert score values into an ASCII string.
@@ -535,6 +490,7 @@ def _scores_to_score_str(scores, offset):
535
490
  scores = np.asarray(scores) + offset
536
491
  return scores.astype(np.int8, copy=False).tobytes().decode("ascii")
537
492
 
493
+
538
494
  def _convert_offset(offset_val_or_string):
539
495
  """
540
496
  If the given offset is a string return the corresponding numerical
@@ -543,9 +499,9 @@ def _convert_offset(offset_val_or_string):
543
499
  if isinstance(offset_val_or_string, Integral):
544
500
  return offset_val_or_string
545
501
  elif isinstance(offset_val_or_string, str):
546
- return _OFFSETS[offset_val_or_string]
502
+ return _OFFSETS[offset_val_or_string]
547
503
  else:
548
504
  raise TypeError(
549
505
  f"The offset must be either an integer or a string "
550
506
  f"indicating the format, not {type(offset_val_or_string).__name__}"
551
- )
507
+ )
@@ -11,7 +11,7 @@ and *GenPept* format.
11
11
  __name__ = "biotite.sequence.io.genbank"
12
12
  __author__ = "Patrick Kunzmann"
13
13
 
14
- from .file import *
15
14
  from .annotation import *
15
+ from .file import *
16
+ from .metadata import *
16
17
  from .sequence import *
17
- from .metadata import *
@@ -12,10 +12,8 @@ __all__ = ["get_annotation", "set_annotation"]
12
12
 
13
13
  import re
14
14
  import warnings
15
- from ....file import InvalidFileError
16
- from ...annotation import Annotation, Feature, Location
17
- from .file import GenBankFile
18
-
15
+ from biotite.file import InvalidFileError
16
+ from biotite.sequence.annotation import Annotation, Feature, Location
19
17
 
20
18
  _KEY_START = 5
21
19
  _QUAL_START = 21
@@ -46,7 +44,6 @@ def get_annotation(gb_file, include_only=None):
46
44
  raise InvalidFileError("File has multiple 'FEATURES' fields")
47
45
  lines, _ = fields[0]
48
46
 
49
-
50
47
  ### Parse all lines to create an index of features,
51
48
  # i.e. pairs of the feature key
52
49
  # and the text belonging to the respective feature
@@ -60,13 +57,12 @@ def get_annotation(gb_file, include_only=None):
60
57
  # Store old feature key and value
61
58
  feature_list.append((feature_key, feature_value))
62
59
  # Track new key
63
- feature_key = line[_KEY_START : _QUAL_START-1].strip()
60
+ feature_key = line[_KEY_START : _QUAL_START - 1].strip()
64
61
  feature_value = ""
65
62
  feature_value += line[_QUAL_START:] + " "
66
63
  # Store last feature key and value (loop already exited)
67
64
  feature_list.append((feature_key, feature_value))
68
65
 
69
-
70
66
  ### Process only relevant features and put them into an Annotation
71
67
  annotation = Annotation()
72
68
  # Regex to separate qualifiers from each other
@@ -92,7 +88,7 @@ def get_annotation(gb_file, include_only=None):
92
88
  loc_string = qualifier_parts.pop(0).strip()
93
89
  try:
94
90
  locs = _parse_locs(loc_string)
95
- except:
91
+ except Exception:
96
92
  warnings.warn(
97
93
  f"'{loc_string}' is an unsupported location identifier, "
98
94
  f"skipping feature"
@@ -114,7 +110,7 @@ def get_annotation(gb_file, include_only=None):
114
110
  # -> split at whitespaces,
115
111
  # as keys do not contain whitespaces
116
112
  for subpart in part.split():
117
- if not "=" in subpart:
113
+ if "=" not in subpart:
118
114
  # Qualifier without value, e.g. '/pseudo'
119
115
  # -> store immediately
120
116
  # Remove "/" -> subpart[1:]
@@ -147,11 +143,11 @@ def get_annotation(gb_file, include_only=None):
147
143
  def _parse_locs(loc_str):
148
144
  locs = []
149
145
  if loc_str.startswith(("join", "order")):
150
- str_list = loc_str[loc_str.index("(")+1:loc_str.rindex(")")].split(",")
146
+ str_list = loc_str[loc_str.index("(") + 1 : loc_str.rindex(")")].split(",")
151
147
  for s in str_list:
152
148
  locs.extend(_parse_locs(s.strip()))
153
149
  elif loc_str.startswith("complement"):
154
- compl_str = loc_str[loc_str.index("(")+1:loc_str.rindex(")")]
150
+ compl_str = loc_str[loc_str.index("(") + 1 : loc_str.rindex(")")]
155
151
  compl_locs = [
156
152
  Location(loc.first, loc.last, Location.Strand.REVERSE, loc.defect)
157
153
  for loc in _parse_locs(compl_str)
@@ -214,8 +210,6 @@ def _set_qual(qual_dict, key, val):
214
210
  qual_dict[key] = val
215
211
 
216
212
 
217
-
218
-
219
213
  def set_annotation(gb_file, annotation):
220
214
  """
221
215
  Set the *FEATURES* field of a GenBank file with an annotation.
@@ -236,12 +230,12 @@ def set_annotation(gb_file, annotation):
236
230
  for key, values in feature.qual.items():
237
231
  if values is None:
238
232
  line = " " * _QUAL_START
239
- line += f'/{key}'
233
+ line += f"/{key}"
240
234
  lines.append(line)
241
235
  else:
242
236
  for val in values.split("\n"):
243
237
  line = " " * _QUAL_START
244
- line += f'/{key}="{val}"'
238
+ line += f'/{key}="{val}"'
245
239
  lines.append(line)
246
240
  gb_file.set_field("FEATURES", lines)
247
241
 
@@ -254,11 +248,11 @@ def _convert_to_loc_string(locs):
254
248
  if len(locs) == 1:
255
249
  loc = list(locs)[0]
256
250
  loc_first_str = str(loc.first)
257
- loc_last_str = str(loc.last)
251
+ loc_last_str = str(loc.last)
258
252
  if loc.defect & Location.Defect.BEYOND_LEFT:
259
253
  loc_first_str = "<" + loc_first_str
260
254
  if loc.defect & Location.Defect.BEYOND_RIGHT:
261
- loc_last_str = ">" + loc_last_str
255
+ loc_last_str = ">" + loc_last_str
262
256
  if loc.first == loc.last:
263
257
  loc_string = loc_first_str
264
258
  elif loc.defect & Location.Defect.UNK_LOC:
@@ -270,8 +264,6 @@ def _convert_to_loc_string(locs):
270
264
  if loc.strand == Location.Strand.REVERSE:
271
265
  loc_string = f"complement({loc_string})"
272
266
  else:
273
- loc_string = ",".join(
274
- [_convert_to_loc_string([loc]) for loc in locs]
275
- )
267
+ loc_string = ",".join([_convert_to_loc_string([loc]) for loc in locs])
276
268
  loc_string = f"join({loc_string})"
277
269
  return loc_string