biotite 0.41.2__cp310-cp310-win_amd64.whl → 1.0.0__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (205) hide show
  1. biotite/__init__.py +2 -3
  2. biotite/application/__init__.py +1 -1
  3. biotite/application/application.py +20 -10
  4. biotite/application/autodock/__init__.py +1 -1
  5. biotite/application/autodock/app.py +74 -79
  6. biotite/application/blast/__init__.py +1 -1
  7. biotite/application/blast/alignment.py +19 -10
  8. biotite/application/blast/webapp.py +92 -85
  9. biotite/application/clustalo/__init__.py +1 -1
  10. biotite/application/clustalo/app.py +46 -61
  11. biotite/application/dssp/__init__.py +1 -1
  12. biotite/application/dssp/app.py +8 -11
  13. biotite/application/localapp.py +62 -60
  14. biotite/application/mafft/__init__.py +1 -1
  15. biotite/application/mafft/app.py +16 -22
  16. biotite/application/msaapp.py +78 -89
  17. biotite/application/muscle/__init__.py +1 -1
  18. biotite/application/muscle/app3.py +50 -64
  19. biotite/application/muscle/app5.py +23 -31
  20. biotite/application/sra/__init__.py +1 -1
  21. biotite/application/sra/app.py +64 -68
  22. biotite/application/tantan/__init__.py +1 -1
  23. biotite/application/tantan/app.py +22 -45
  24. biotite/application/util.py +7 -9
  25. biotite/application/viennarna/rnaalifold.py +34 -28
  26. biotite/application/viennarna/rnafold.py +24 -39
  27. biotite/application/viennarna/rnaplot.py +36 -21
  28. biotite/application/viennarna/util.py +17 -12
  29. biotite/application/webapp.py +13 -14
  30. biotite/copyable.py +13 -13
  31. biotite/database/__init__.py +1 -1
  32. biotite/database/entrez/__init__.py +1 -1
  33. biotite/database/entrez/check.py +2 -3
  34. biotite/database/entrez/dbnames.py +7 -5
  35. biotite/database/entrez/download.py +55 -49
  36. biotite/database/entrez/key.py +1 -1
  37. biotite/database/entrez/query.py +62 -23
  38. biotite/database/error.py +2 -1
  39. biotite/database/pubchem/__init__.py +1 -1
  40. biotite/database/pubchem/download.py +43 -45
  41. biotite/database/pubchem/error.py +2 -2
  42. biotite/database/pubchem/query.py +34 -31
  43. biotite/database/pubchem/throttle.py +3 -4
  44. biotite/database/rcsb/__init__.py +1 -1
  45. biotite/database/rcsb/download.py +44 -52
  46. biotite/database/rcsb/query.py +85 -80
  47. biotite/database/uniprot/check.py +6 -3
  48. biotite/database/uniprot/download.py +6 -11
  49. biotite/database/uniprot/query.py +115 -31
  50. biotite/file.py +12 -31
  51. biotite/sequence/__init__.py +3 -3
  52. biotite/sequence/align/__init__.py +2 -2
  53. biotite/sequence/align/alignment.py +99 -90
  54. biotite/sequence/align/banded.cp310-win_amd64.pyd +0 -0
  55. biotite/sequence/align/buckets.py +12 -10
  56. biotite/sequence/align/cigar.py +43 -52
  57. biotite/sequence/align/kmeralphabet.cp310-win_amd64.pyd +0 -0
  58. biotite/sequence/align/kmeralphabet.pyx +55 -51
  59. biotite/sequence/align/kmersimilarity.cp310-win_amd64.pyd +0 -0
  60. biotite/sequence/align/kmertable.cp310-win_amd64.pyd +0 -0
  61. biotite/sequence/align/kmertable.pyx +3 -2
  62. biotite/sequence/align/localgapped.cp310-win_amd64.pyd +0 -0
  63. biotite/sequence/align/localungapped.cp310-win_amd64.pyd +0 -0
  64. biotite/sequence/align/matrix.py +81 -82
  65. biotite/sequence/align/multiple.cp310-win_amd64.pyd +0 -0
  66. biotite/sequence/align/multiple.pyx +1 -1
  67. biotite/sequence/align/pairwise.cp310-win_amd64.pyd +0 -0
  68. biotite/sequence/align/permutation.cp310-win_amd64.pyd +0 -0
  69. biotite/sequence/align/permutation.pyx +12 -4
  70. biotite/sequence/align/selector.cp310-win_amd64.pyd +0 -0
  71. biotite/sequence/align/selector.pyx +52 -54
  72. biotite/sequence/align/statistics.py +32 -33
  73. biotite/sequence/align/tracetable.cp310-win_amd64.pyd +0 -0
  74. biotite/sequence/alphabet.py +51 -65
  75. biotite/sequence/annotation.py +78 -77
  76. biotite/sequence/codec.cp310-win_amd64.pyd +0 -0
  77. biotite/sequence/codon.py +90 -79
  78. biotite/sequence/graphics/__init__.py +1 -1
  79. biotite/sequence/graphics/alignment.py +184 -103
  80. biotite/sequence/graphics/colorschemes.py +10 -12
  81. biotite/sequence/graphics/dendrogram.py +79 -34
  82. biotite/sequence/graphics/features.py +133 -99
  83. biotite/sequence/graphics/logo.py +22 -28
  84. biotite/sequence/graphics/plasmid.py +229 -178
  85. biotite/sequence/io/fasta/__init__.py +1 -1
  86. biotite/sequence/io/fasta/convert.py +44 -33
  87. biotite/sequence/io/fasta/file.py +42 -55
  88. biotite/sequence/io/fastq/__init__.py +1 -1
  89. biotite/sequence/io/fastq/convert.py +11 -14
  90. biotite/sequence/io/fastq/file.py +68 -112
  91. biotite/sequence/io/genbank/__init__.py +2 -2
  92. biotite/sequence/io/genbank/annotation.py +12 -20
  93. biotite/sequence/io/genbank/file.py +74 -76
  94. biotite/sequence/io/genbank/metadata.py +74 -62
  95. biotite/sequence/io/genbank/sequence.py +13 -14
  96. biotite/sequence/io/general.py +39 -30
  97. biotite/sequence/io/gff/__init__.py +2 -2
  98. biotite/sequence/io/gff/convert.py +10 -15
  99. biotite/sequence/io/gff/file.py +81 -65
  100. biotite/sequence/phylo/__init__.py +1 -1
  101. biotite/sequence/phylo/nj.cp310-win_amd64.pyd +0 -0
  102. biotite/sequence/phylo/tree.cp310-win_amd64.pyd +0 -0
  103. biotite/sequence/phylo/upgma.cp310-win_amd64.pyd +0 -0
  104. biotite/sequence/profile.py +57 -28
  105. biotite/sequence/search.py +17 -15
  106. biotite/sequence/seqtypes.py +200 -164
  107. biotite/sequence/sequence.py +15 -17
  108. biotite/structure/__init__.py +3 -3
  109. biotite/structure/atoms.py +221 -235
  110. biotite/structure/basepairs.py +260 -271
  111. biotite/structure/bonds.cp310-win_amd64.pyd +0 -0
  112. biotite/structure/bonds.pyx +29 -32
  113. biotite/structure/box.py +67 -71
  114. biotite/structure/celllist.cp310-win_amd64.pyd +0 -0
  115. biotite/structure/chains.py +55 -39
  116. biotite/structure/charges.cp310-win_amd64.pyd +0 -0
  117. biotite/structure/compare.py +32 -32
  118. biotite/structure/density.py +13 -18
  119. biotite/structure/dotbracket.py +20 -22
  120. biotite/structure/error.py +10 -2
  121. biotite/structure/filter.py +82 -77
  122. biotite/structure/geometry.py +130 -119
  123. biotite/structure/graphics/atoms.py +60 -43
  124. biotite/structure/graphics/rna.py +81 -68
  125. biotite/structure/hbond.py +112 -93
  126. biotite/structure/info/__init__.py +0 -2
  127. biotite/structure/info/atoms.py +10 -11
  128. biotite/structure/info/bonds.py +41 -43
  129. biotite/structure/info/ccd.py +4 -5
  130. biotite/structure/info/groups.py +1 -3
  131. biotite/structure/info/masses.py +5 -10
  132. biotite/structure/info/misc.py +1 -1
  133. biotite/structure/info/radii.py +20 -20
  134. biotite/structure/info/standardize.py +15 -26
  135. biotite/structure/integrity.py +18 -71
  136. biotite/structure/io/__init__.py +3 -4
  137. biotite/structure/io/dcd/__init__.py +1 -1
  138. biotite/structure/io/dcd/file.py +22 -20
  139. biotite/structure/io/general.py +47 -61
  140. biotite/structure/io/gro/__init__.py +1 -1
  141. biotite/structure/io/gro/file.py +73 -72
  142. biotite/structure/io/mol/__init__.py +1 -1
  143. biotite/structure/io/mol/convert.py +8 -11
  144. biotite/structure/io/mol/ctab.py +37 -36
  145. biotite/structure/io/mol/header.py +14 -10
  146. biotite/structure/io/mol/mol.py +9 -53
  147. biotite/structure/io/mol/sdf.py +47 -50
  148. biotite/structure/io/netcdf/__init__.py +1 -1
  149. biotite/structure/io/netcdf/file.py +24 -23
  150. biotite/structure/io/pdb/__init__.py +1 -1
  151. biotite/structure/io/pdb/convert.py +32 -20
  152. biotite/structure/io/pdb/file.py +151 -172
  153. biotite/structure/io/pdb/hybrid36.cp310-win_amd64.pyd +0 -0
  154. biotite/structure/io/pdbqt/__init__.py +1 -1
  155. biotite/structure/io/pdbqt/convert.py +17 -11
  156. biotite/structure/io/pdbqt/file.py +128 -80
  157. biotite/structure/io/pdbx/__init__.py +1 -2
  158. biotite/structure/io/pdbx/bcif.py +36 -44
  159. biotite/structure/io/pdbx/cif.py +64 -62
  160. biotite/structure/io/pdbx/component.py +10 -16
  161. biotite/structure/io/pdbx/convert.py +235 -246
  162. biotite/structure/io/pdbx/encoding.cp310-win_amd64.pyd +0 -0
  163. biotite/structure/io/trajfile.py +76 -93
  164. biotite/structure/io/trr/__init__.py +1 -1
  165. biotite/structure/io/trr/file.py +12 -15
  166. biotite/structure/io/xtc/__init__.py +1 -1
  167. biotite/structure/io/xtc/file.py +11 -14
  168. biotite/structure/mechanics.py +9 -11
  169. biotite/structure/molecules.py +3 -4
  170. biotite/structure/pseudoknots.py +53 -67
  171. biotite/structure/rdf.py +23 -21
  172. biotite/structure/repair.py +137 -86
  173. biotite/structure/residues.py +26 -16
  174. biotite/structure/sasa.cp310-win_amd64.pyd +0 -0
  175. biotite/structure/{resutil.py → segments.py} +24 -23
  176. biotite/structure/sequence.py +10 -11
  177. biotite/structure/sse.py +100 -119
  178. biotite/structure/superimpose.py +39 -77
  179. biotite/structure/transform.py +97 -71
  180. biotite/structure/util.py +11 -13
  181. biotite/version.py +2 -2
  182. biotite/visualize.py +69 -55
  183. {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/METADATA +5 -5
  184. biotite-1.0.0.dist-info/RECORD +322 -0
  185. biotite/structure/io/ctab.py +0 -72
  186. biotite/structure/io/mmtf/__init__.py +0 -21
  187. biotite/structure/io/mmtf/assembly.py +0 -214
  188. biotite/structure/io/mmtf/convertarray.cp310-win_amd64.pyd +0 -0
  189. biotite/structure/io/mmtf/convertarray.pyx +0 -341
  190. biotite/structure/io/mmtf/convertfile.cp310-win_amd64.pyd +0 -0
  191. biotite/structure/io/mmtf/convertfile.pyx +0 -501
  192. biotite/structure/io/mmtf/decode.cp310-win_amd64.pyd +0 -0
  193. biotite/structure/io/mmtf/decode.pyx +0 -152
  194. biotite/structure/io/mmtf/encode.cp310-win_amd64.pyd +0 -0
  195. biotite/structure/io/mmtf/encode.pyx +0 -183
  196. biotite/structure/io/mmtf/file.py +0 -233
  197. biotite/structure/io/npz/__init__.py +0 -20
  198. biotite/structure/io/npz/file.py +0 -152
  199. biotite/structure/io/pdbx/legacy.py +0 -267
  200. biotite/structure/io/tng/__init__.py +0 -13
  201. biotite/structure/io/tng/file.py +0 -46
  202. biotite/temp.py +0 -86
  203. biotite-0.41.2.dist-info/RECORD +0 -340
  204. {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/WHEEL +0 -0
  205. {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/licenses/LICENSE.rst +0 -0
@@ -6,14 +6,16 @@ __name__ = "biotite.sequence.io.genbank"
6
6
  __author__ = "Patrick Kunzmann"
7
7
  __all__ = ["GenBankFile", "MultiFile"]
8
8
 
9
- #import textwrap
9
+ # import textwrap
10
10
  import copy
11
- #import re
11
+
12
+ # import re
12
13
  import io
13
- from ....file import TextFile, InvalidFileError
14
14
  from collections import OrderedDict
15
- #from ...annotation import Location, Feature, Annotation, AnnotatedSequence
16
- #from ...seqtypes import NucleotideSequence, ProteinSequence
15
+ from biotite.file import InvalidFileError, TextFile
16
+
17
+ # from ...annotation import Location, Feature, Annotation, AnnotatedSequence
18
+ # from ...seqtypes import NucleotideSequence, ProteinSequence
17
19
 
18
20
 
19
21
  class GenBankFile(TextFile):
@@ -33,7 +35,7 @@ class GenBankFile(TextFile):
33
35
  Some fields may occur multiple times, e.g. the *REFERENCE* field.
34
36
  A sample GenBank file can be viewed at
35
37
  `<https://www.ncbi.nlm.nih.gov/Sitemap/samplerecord.html>`_.
36
-
38
+
37
39
  This class provides a low-level interface for parsing, editing and
38
40
  writing GenBank files.
39
41
  It works like a list of field entries, where a field consists of the
@@ -47,7 +49,7 @@ class GenBankFile(TextFile):
47
49
  The subfields are represented by a dictionary, with subfield names
48
50
  being keys and the corresponding lines being values.
49
51
  The *FEATURES* and *ORIGIN* fields have no subfields.
50
-
52
+
51
53
  Every entry can be obtained, set and deleted via the index operator.
52
54
 
53
55
  Notes
@@ -55,7 +57,7 @@ class GenBankFile(TextFile):
55
57
  This class does not support location identifiers with references
56
58
  to other Entrez database entries, e.g.
57
59
  ``join(1..100,J00194.1:100..202)``.
58
-
60
+
59
61
  Examples
60
62
  --------
61
63
  Create a GenBank file from scratch:
@@ -79,9 +81,9 @@ class GenBankFile(TextFile):
79
81
  ['One line', 'A second line']
80
82
  >>> print(subfields)
81
83
  OrderedDict([('SUBFIELD1', ['Single Line']), ('SUBFIELD2', ['Two', 'lines'])])
82
-
84
+
83
85
  Adding an additional field:
84
-
86
+
85
87
  >>> file.insert(0, "OTHERFIELD", ["Another line"])
86
88
  >>> print(len(file))
87
89
  2
@@ -174,18 +176,18 @@ class GenBankFile(TextFile):
174
176
  # and names of categories
175
177
  self._field_pos = []
176
178
  self._find_field_indices()
177
-
179
+
178
180
  @classmethod
179
181
  def read(cls, file):
180
182
  """
181
183
  Read a GenBank file.
182
-
184
+
183
185
  Parameters
184
186
  ----------
185
187
  file : file-like object or str
186
188
  The file to be read.
187
189
  Alternatively a file path can be supplied.
188
-
190
+
189
191
  Returns
190
192
  -------
191
193
  file_object : GenBankFile
@@ -194,16 +196,16 @@ class GenBankFile(TextFile):
194
196
  file = super().read(file)
195
197
  file._find_field_indices()
196
198
  return file
197
-
199
+
198
200
  def get_fields(self, name):
199
201
  """
200
202
  Get all *GenBank* fields associated with a given field name.
201
-
203
+
202
204
  Parameters
203
205
  ----------
204
206
  name : str
205
207
  The field name.
206
-
208
+
207
209
  Returns
208
210
  -------
209
211
  fields : list of (list of str, OrderedDict of str -> str)
@@ -218,17 +220,17 @@ class GenBankFile(TextFile):
218
220
  indices = self.get_indices(name)
219
221
  # Omit the field name
220
222
  return [self[i][1:] for i in indices]
221
-
223
+
222
224
  def get_indices(self, name):
223
225
  """
224
226
  Get the indices to all *GenBank* fields associated with a given
225
227
  field name.
226
-
228
+
227
229
  Parameters
228
230
  ----------
229
231
  name : str
230
232
  The field name.
231
-
233
+
232
234
  Returns
233
235
  -------
234
236
  fields : list of int
@@ -242,7 +244,7 @@ class GenBankFile(TextFile):
242
244
  if fname == name:
243
245
  indices.append(i)
244
246
  return indices
245
-
247
+
246
248
  def set_field(self, name, content, subfield_dict=None):
247
249
  """
248
250
  Set a *GenBank* field with the given content.
@@ -250,7 +252,7 @@ class GenBankFile(TextFile):
250
252
  If the field already exists in the file, the field is
251
253
  overwritten, otherwise a new field is created at the end of
252
254
  the file.
253
-
255
+
254
256
  Parameters
255
257
  ----------
256
258
  name : str
@@ -261,7 +263,7 @@ class GenBankFile(TextFile):
261
263
  The subfields of the field.
262
264
  The dictionary maps subfield names to the content lines of
263
265
  the respective subfield.
264
-
266
+
265
267
  Raises
266
268
  ------
267
269
  InvalidFileError
@@ -283,13 +285,13 @@ class GenBankFile(TextFile):
283
285
  def __getitem__(self, index):
284
286
  index = self._translate_idx(index)
285
287
  start, stop, name = self._field_pos[index]
286
-
288
+
287
289
  if name in ["FEATURES", "ORIGIN"]:
288
290
  # For those two fields return the complete lines,
289
291
  # beginning with the line after the field name
290
- content = self._get_field_content(start+1, stop, indent=0)
292
+ content = self._get_field_content(start + 1, stop, indent=0)
291
293
  subfield_dict = OrderedDict()
292
-
294
+
293
295
  else:
294
296
  # For all metadata fields use the
295
297
  # standard GenBank indentation (=12)
@@ -297,11 +299,11 @@ class GenBankFile(TextFile):
297
299
  subfield_dict = OrderedDict()
298
300
  subfield_start = None
299
301
  first_subfield_start = None
300
- for i in range(start+1, stop):
302
+ header = None
303
+ for i in range(start + 1, stop):
301
304
  line = self.lines[i]
302
- # Check if line contains a new subfield
303
- # (Header beginning from first column)
304
305
  if len(line) != 0 and line[:12].strip() != "":
306
+ # New header -> new subfield
305
307
  if first_subfield_start is None:
306
308
  first_subfield_start = i
307
309
  # Store previous subfield
@@ -320,12 +322,10 @@ class GenBankFile(TextFile):
320
322
  # that are not part of a subfield
321
323
  if first_subfield_start is not None:
322
324
  stop = first_subfield_start
323
- content = self._get_field_content(
324
- start, stop, indent=12
325
- )
326
-
325
+ content = self._get_field_content(start, stop, indent=12)
326
+
327
327
  return name, content, subfield_dict
328
-
328
+
329
329
  def __setitem__(self, index, item):
330
330
  index = self._translate_idx(index)
331
331
  if not isinstance(item, tuple):
@@ -342,7 +342,7 @@ class GenBankFile(TextFile):
342
342
  "Expected a tuple of name, content and optionally subfields"
343
343
  )
344
344
  inserted_lines = self._to_lines(name, content, subfields)
345
-
345
+
346
346
  # Stop of field to be replaced is start of new field
347
347
  start, old_stop, _ = self._field_pos[index]
348
348
  # If not the last element is set,
@@ -355,12 +355,12 @@ class GenBankFile(TextFile):
355
355
  # Shift the start/stop indices of the following fields
356
356
  # by the amount of created fields
357
357
  shift = len(inserted_lines) - (old_stop - start)
358
- for i in range(index+1, len(self._field_pos)):
358
+ for i in range(index + 1, len(self._field_pos)):
359
359
  old_start, old_stop, fname = self._field_pos[i]
360
- self._field_pos[i] = old_start+shift, old_stop+shift, fname
360
+ self._field_pos[i] = old_start + shift, old_stop + shift, fname
361
361
  # Add new entry
362
- self._field_pos[index] = start, start+len(inserted_lines), name.upper()
363
-
362
+ self._field_pos[index] = start, start + len(inserted_lines), name.upper()
363
+
364
364
  def __delitem__(self, index):
365
365
  index = self._translate_idx(index)
366
366
  start, stop, _ = self._field_pos[index]
@@ -369,17 +369,17 @@ class GenBankFile(TextFile):
369
369
  shift = stop - start
370
370
  for i in range(index, len(self._field_pos)):
371
371
  old_start, old_stop, name = self._field_pos[i]
372
- self._field_pos[i] = old_start-shift, old_stop-shift, name
373
- del self.lines[start : stop]
372
+ self._field_pos[i] = old_start - shift, old_stop - shift, name
373
+ del self.lines[start:stop]
374
374
  del self._field_pos[index]
375
-
375
+
376
376
  def __len__(self):
377
377
  return len(self._field_pos)
378
378
 
379
379
  def insert(self, index, name, content, subfields=None):
380
380
  """
381
381
  Insert a *GenBank* field at the given position.
382
-
382
+
383
383
  Parameters
384
384
  ----------
385
385
  index : int
@@ -398,12 +398,12 @@ class GenBankFile(TextFile):
398
398
  """
399
399
  index = self._translate_idx(index, length_exclusive=False)
400
400
  inserted_lines = self._to_lines(name, content, subfields)
401
-
401
+
402
402
  # Stop of previous field is start of new field
403
403
  if index == 0:
404
404
  start = 0
405
405
  else:
406
- _, start, _ = self._field_pos[index-1]
406
+ _, start, _ = self._field_pos[index - 1]
407
407
  # If the new lines are not inserted at the end,
408
408
  # the following lines need to be added, too
409
409
  if start is not len(self.lines):
@@ -416,17 +416,16 @@ class GenBankFile(TextFile):
416
416
  shift = len(inserted_lines)
417
417
  for i in range(index, len(self._field_pos)):
418
418
  old_start, old_stop, fname = self._field_pos[i]
419
- self._field_pos[i] = old_start+shift, old_stop+shift, fname
419
+ self._field_pos[i] = old_start + shift, old_stop + shift, fname
420
420
  # Add new entry
421
421
  self._field_pos.insert(
422
- index,
423
- (start, start+len(inserted_lines), name.upper())
422
+ index, (start, start + len(inserted_lines), name.upper())
424
423
  )
425
-
424
+
426
425
  def append(self, name, content, subfields=None):
427
426
  """
428
427
  Create a new *GenBank* field at the end of the file.
429
-
428
+
430
429
  Parameters
431
430
  ----------
432
431
  name : str
@@ -440,7 +439,6 @@ class GenBankFile(TextFile):
440
439
  """
441
440
  self.insert(len(self), name, content, subfields)
442
441
 
443
-
444
442
  def _find_field_indices(self):
445
443
  """
446
444
  Identify the start and exclusive stop indices of lines
@@ -469,10 +467,10 @@ class GenBankFile(TextFile):
469
467
 
470
468
  def _get_field_content(self, start, stop, indent):
471
469
  if indent == 0:
472
- return self.lines[start : stop]
470
+ return self.lines[start:stop]
473
471
  else:
474
- return [line[12:] for line in self.lines[start : stop]]
475
-
472
+ return [line[12:] for line in self.lines[start:stop]]
473
+
476
474
  def _to_lines(self, name, content, subfields):
477
475
  """
478
476
  Convert the field name, field content und subfield dictionary
@@ -480,22 +478,22 @@ class GenBankFile(TextFile):
480
478
  """
481
479
  if subfields is None:
482
480
  subfields = {}
483
-
481
+
484
482
  name = name.strip().upper()
485
483
  if len(name) == 0:
486
- raise ValueError(f"Must give a non emtpy name")
487
- subfields = OrderedDict({
488
- subfield_name.upper().strip() : subfield_lines
489
- for subfield_name, subfield_lines in subfields.items()
490
- })
491
-
484
+ raise ValueError("Must give a non emtpy name")
485
+ subfields = OrderedDict(
486
+ {
487
+ subfield_name.upper().strip(): subfield_lines
488
+ for subfield_name, subfield_lines in subfields.items()
489
+ }
490
+ )
491
+
492
492
  # Create lines for new field
493
493
  if name == "FEATURES":
494
494
  # Header line plus all actual feature lines
495
495
  lines = copy.copy(content)
496
- lines.insert(
497
- 0, "FEATURES" + " "*13 + "Location/Qualifiers"
498
- )
496
+ lines.insert(0, "FEATURES" + " " * 13 + "Location/Qualifiers")
499
497
  elif name == "ORIGIN":
500
498
  # Header line plus all actual sequence lines
501
499
  lines = copy.copy(content)
@@ -504,19 +502,19 @@ class GenBankFile(TextFile):
504
502
  name_column = []
505
503
  content_column = []
506
504
  # Create a line for the field name and empty lines
507
- # for each additional line required by the content
508
- name_column += [name] + [""] * (len(content)-1)
505
+ # for each additional line required by the content
506
+ name_column += [name] + [""] * (len(content) - 1)
509
507
  content_column += content
510
508
  for subfield_name, subfield_lines in subfields.items():
511
- name_column += [" " + subfield_name] \
512
- + [""] * (len(subfield_lines)-1)
509
+ name_column += [" " + subfield_name] + [""] * (len(subfield_lines) - 1)
513
510
  content_column += subfield_lines
514
- lines = [f"{n_col:12}{c_col}" for n_col, c_col
515
- in zip(name_column, content_column)]
516
-
511
+ lines = [
512
+ f"{n_col:12}{c_col}"
513
+ for n_col, c_col in zip(name_column, content_column)
514
+ ]
515
+
517
516
  return lines
518
517
 
519
-
520
518
  def _translate_idx(self, index, length_exclusive=True):
521
519
  """
522
520
  Check index boundaries and convert negative index to positive
@@ -539,15 +537,15 @@ class MultiFile(TextFile):
539
537
  """
540
538
  This class represents a file in *GenBank* or *GenPept* format,
541
539
  that contains multiple entries, for more than one UID.
542
-
540
+
543
541
  The information for each UID are appended to each other in such a
544
542
  file.
545
543
  Objects of this class can be iterated to obtain a
546
544
  :class:`GenBankFile` for each entry in the file.
547
-
545
+
548
546
  Examples
549
547
  --------
550
-
548
+
551
549
  >>> import os.path
552
550
  >>> file_name = fetch_single_file(
553
551
  ... ["1L2Y_A", "3O5R_A", "5UGO_A"],
@@ -568,8 +566,8 @@ class MultiFile(TextFile):
568
566
  line = self.lines[i]
569
567
  if line.strip() == "//":
570
568
  # Create file with lines corresponding to that file
571
- file_content = "\n".join(self.lines[start_i : i+1])
569
+ file_content = "\n".join(self.lines[start_i : i + 1])
572
570
  file = GenBankFile.read(io.StringIO(file_content))
573
571
  # Reset file start index
574
572
  start_i = i
575
- yield file
573
+ yield file
@@ -8,17 +8,24 @@ Functions for obtaining metadata fields of a GenBank file.
8
8
 
9
9
  __name__ = "biotite.sequence.io.genbank"
10
10
  __author__ = "Patrick Kunzmann, Natasha Jaffe"
11
- __all__ = ["get_locus", "get_definition", "get_accession", "get_version",
12
- "get_gi", "get_db_link", "get_source",
13
- "set_locus"]
11
+ __all__ = [
12
+ "get_locus",
13
+ "get_definition",
14
+ "get_accession",
15
+ "get_version",
16
+ "get_gi",
17
+ "get_db_link",
18
+ "get_source",
19
+ "set_locus",
20
+ ]
21
+
22
+ from biotite.file import InvalidFileError
14
23
 
15
- from ....file import InvalidFileError
16
- from .file import GenBankFile
17
24
 
18
25
  def get_locus(gb_file):
19
26
  """
20
27
  Parse the *LOCUS* field of a GenBank or GenPept file.
21
-
28
+
22
29
  Parameters
23
30
  ----------
24
31
  gb_file : GenBankFile
@@ -39,10 +46,10 @@ def get_locus(gb_file):
39
46
  The GenBank division to which the file belongs.
40
47
  date : str, optional
41
48
  The date of last modification.
42
-
49
+
43
50
  Examples
44
51
  --------
45
-
52
+
46
53
  >>> import os.path
47
54
  >>> file = GenBankFile.read(os.path.join(path_to_sequences, "ec_bl21.gb"))
48
55
  >>> name, length, mol_type, is_circular, division, date = get_locus(file)
@@ -68,59 +75,57 @@ def get_locus(gb_file):
68
75
  # The first field will always be the ID
69
76
  name = fields[0]
70
77
 
71
- # The second field will always be the length followed
78
+ # The second field will always be the length followed
72
79
  # by units (eg 1224 aa)
73
80
  length = int(fields[1])
74
81
 
75
- # The third field *should* be the molecular type
82
+ # The third field *should* be the molecular type
76
83
  # but sometimes this is missing. This gets tricky
77
84
  # because sometimes the next field, circular/linear,
78
85
  # is missing, too. The field after that, division,
79
86
  # is a 3 letter all caps token. Unfortunately, mol_type
80
- # is also often a 3 letter all caps token (eg DNA)!
87
+ # is also often a 3 letter all caps token (eg DNA)!
81
88
  # Fortunately, GenBank publishes the set list of divisions
82
89
  # here: https://www.ncbi.nlm.nih.gov/genbank/samplerecord ,
83
90
  # so we can check against that set when determining whether
84
91
  # the current token represents the molecular type.
85
92
  divisions = (
86
- 'PRI', # primate sequences
87
- 'ROD', # rodent sequences
88
- 'MAM', # other mammalian sequences
89
- 'VRT', # other vertebrate sequences
90
- 'INV', # invertebrate sequences
91
- 'PLN', # plant, fungal, and algal sequences
92
- 'BCT', # bacterial sequences
93
- 'VRL', # viral sequences
94
- 'PHG', # bacteriophage sequences
95
- 'SYN', # synthetic sequences
96
- 'UNA', # unannotated sequences
97
- 'EST', # EST sequences (expressed sequence tags)
98
- 'PAT', # patent sequences
99
- 'STS', # STS sequences (sequence tagged sites)
100
- 'GSS', # GSS sequences (genome survey sequences)
101
- 'HTG', # HTG sequences (high-throughput genomic sequences)
102
- 'HTC', # unfinished high-throughput cDNA sequencing
103
- 'ENV', # environmental sampling sequences
104
- 'CON',
93
+ "PRI", # primate sequences
94
+ "ROD", # rodent sequences
95
+ "MAM", # other mammalian sequences
96
+ "VRT", # other vertebrate sequences
97
+ "INV", # invertebrate sequences
98
+ "PLN", # plant, fungal, and algal sequences
99
+ "BCT", # bacterial sequences
100
+ "VRL", # viral sequences
101
+ "PHG", # bacteriophage sequences
102
+ "SYN", # synthetic sequences
103
+ "UNA", # unannotated sequences
104
+ "EST", # EST sequences (expressed sequence tags)
105
+ "PAT", # patent sequences
106
+ "STS", # STS sequences (sequence tagged sites)
107
+ "GSS", # GSS sequences (genome survey sequences)
108
+ "HTG", # HTG sequences (high-throughput genomic sequences)
109
+ "HTC", # unfinished high-throughput cDNA sequencing
110
+ "ENV", # environmental sampling sequences
111
+ "CON",
105
112
  )
106
113
 
107
- # NOTE: Remember that fields[2] is the unit for length,
114
+ # NOTE: Remember that fields[2] is the unit for length,
108
115
  # eg bp or aa, so we move to fields[3] here.
109
- if fields[3] not in ('linear', 'circular') \
110
- and fields[3] not in divisions:
116
+ if fields[3] not in ("linear", "circular") and fields[3] not in divisions:
111
117
  mol_type = fields[3]
112
118
  next_idx = 4
113
119
  else:
114
120
  mol_type = None
115
121
  next_idx = 3
116
122
 
117
-
118
- # The next field should be the token 'linear' or 'circular',
123
+ # The next field should be the token 'linear' or 'circular',
119
124
  # but sometimes this is missing
120
- if 'linear' == fields[next_idx]:
125
+ if "linear" == fields[next_idx]:
121
126
  is_circular = False
122
127
  next_idx += 1
123
- elif 'circular' == fields[next_idx]:
128
+ elif "circular" == fields[next_idx]:
124
129
  is_circular = True
125
130
  next_idx += 1
126
131
  else:
@@ -136,23 +141,24 @@ def get_locus(gb_file):
136
141
 
137
142
  return name, length, mol_type, is_circular, division, date
138
143
 
144
+
139
145
  def get_definition(gb_file):
140
146
  """
141
147
  Parse the *DEFINITION* field of a GenBank or GenPept file.
142
-
148
+
143
149
  Parameters
144
150
  ----------
145
151
  gb_file : GenBankFile
146
152
  The GenBank file to read the *DEFINITION* field from.
147
-
153
+
148
154
  Returns
149
155
  -------
150
156
  definition : str
151
157
  Content of the *DEFINITION* field.
152
-
158
+
153
159
  Examples
154
160
  --------
155
-
161
+
156
162
  >>> import os.path
157
163
  >>> file = GenBankFile.read(os.path.join(path_to_sequences, "ec_bl21.gb"))
158
164
  >>> print(get_definition(file))
@@ -161,23 +167,24 @@ def get_definition(gb_file):
161
167
  lines, _ = _expect_single_field(gb_file, "DEFINITION")
162
168
  return " ".join([line.strip() for line in lines])
163
169
 
170
+
164
171
  def get_accession(gb_file):
165
172
  """
166
173
  Parse the *ACCESSION* field of a GenBank or GenPept file.
167
-
174
+
168
175
  Parameters
169
176
  ----------
170
177
  gb_file : GenBankFile
171
178
  The GenBank file to read the *ACCESSION* field from.
172
-
179
+
173
180
  Returns
174
181
  -------
175
182
  accession : str
176
183
  The accession ID of the file.
177
-
184
+
178
185
  Examples
179
186
  --------
180
-
187
+
181
188
  >>> import os.path
182
189
  >>> file = GenBankFile.read(os.path.join(path_to_sequences, "ec_bl21.gb"))
183
190
  >>> print(get_accession(file))
@@ -187,16 +194,17 @@ def get_accession(gb_file):
187
194
  # 'ACCESSION' field has only one line
188
195
  return lines[0]
189
196
 
197
+
190
198
  def get_version(gb_file):
191
199
  """
192
200
  Parse the version from the *VERSION* field of a GenBank or GenPept
193
201
  file.
194
-
202
+
195
203
  Parameters
196
204
  ----------
197
205
  gb_file : GenBankFile
198
206
  The GenBank file to read the *VERSION* field from.
199
-
207
+
200
208
  Returns
201
209
  -------
202
210
  version : str
@@ -206,16 +214,17 @@ def get_version(gb_file):
206
214
  # 'VERSION' field has only one line
207
215
  return lines[0].split()[0]
208
216
 
217
+
209
218
  def get_gi(gb_file):
210
219
  """
211
220
  Parse the GI from the *VERSION* field of a GenBank or GenPept
212
221
  file.
213
-
222
+
214
223
  Parameters
215
224
  ----------
216
225
  gb_file : GenBankFile
217
226
  The GenBank file to read the *VERSION* field from.
218
-
227
+
219
228
  Returns
220
229
  -------
221
230
  gi : str
@@ -229,24 +238,25 @@ def get_gi(gb_file):
229
238
  # Truncate GI
230
239
  return int(version_info[1][3:])
231
240
 
241
+
232
242
  def get_db_link(gb_file):
233
243
  """
234
244
  Parse the *DBLINK* field of a GenBank or GenPept file.
235
-
245
+
236
246
  Parameters
237
247
  ----------
238
248
  gb_file : GenBankFile
239
249
  The GenBank file to read the *DBLINK* field from.
240
-
250
+
241
251
  Returns
242
252
  -------
243
253
  link_dict : dict
244
254
  A dictionary storing the database links, with the database
245
255
  name as key, and the corresponding ID as value.
246
-
256
+
247
257
  Examples
248
258
  --------
249
-
259
+
250
260
  >>> import os.path
251
261
  >>> file = GenBankFile.read(os.path.join(path_to_sequences, "ec_bl21.gb"))
252
262
  >>> for key, val in get_db_link(file).items():
@@ -265,12 +275,12 @@ def get_db_link(gb_file):
265
275
  def get_source(gb_file):
266
276
  """
267
277
  Parse the *SOURCE* field of a GenBank or GenPept file.
268
-
278
+
269
279
  Parameters
270
280
  ----------
271
281
  gb_file : GenBankFile
272
282
  The GenBank file to read the *SOURCE* field from.
273
-
283
+
274
284
  Returns
275
285
  -------
276
286
  accession : str
@@ -290,12 +300,12 @@ def _expect_single_field(gb_file, name):
290
300
  return fields[0]
291
301
 
292
302
 
293
-
294
- def set_locus(gb_file, name, length, mol_type=None, is_circular=False,
295
- division=None, date=None):
303
+ def set_locus(
304
+ gb_file, name, length, mol_type=None, is_circular=False, division=None, date=None
305
+ ):
296
306
  """
297
307
  Set the *LOCUS* field of a GenBank file.
298
-
308
+
299
309
  Parameters
300
310
  ----------
301
311
  gb_file : GenBankFile
@@ -319,6 +329,8 @@ def set_locus(gb_file, name, length, mol_type=None, is_circular=False,
319
329
  circularity = "circular" if is_circular else "linear"
320
330
  division = "" if division is None else division
321
331
  date = "" if date is None else date
322
- line = f"{name:18} {length:>9} {restype_abbr} {mol_type:^10} " \
323
- f"{circularity:8} {division:3} {date:11}"
324
- gb_file.set_field("LOCUS", [line])
332
+ line = (
333
+ f"{name:18} {length:>9} {restype_abbr} {mol_type:^10} "
334
+ f"{circularity:8} {division:3} {date:11}"
335
+ )
336
+ gb_file.set_field("LOCUS", [line])