biotite 0.41.2__cp311-cp311-win_amd64.whl → 1.0.1__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (205) hide show
  1. biotite/__init__.py +2 -3
  2. biotite/application/__init__.py +1 -1
  3. biotite/application/application.py +20 -10
  4. biotite/application/autodock/__init__.py +1 -1
  5. biotite/application/autodock/app.py +74 -79
  6. biotite/application/blast/__init__.py +1 -1
  7. biotite/application/blast/alignment.py +19 -10
  8. biotite/application/blast/webapp.py +92 -85
  9. biotite/application/clustalo/__init__.py +1 -1
  10. biotite/application/clustalo/app.py +46 -61
  11. biotite/application/dssp/__init__.py +1 -1
  12. biotite/application/dssp/app.py +8 -11
  13. biotite/application/localapp.py +62 -60
  14. biotite/application/mafft/__init__.py +1 -1
  15. biotite/application/mafft/app.py +16 -22
  16. biotite/application/msaapp.py +78 -89
  17. biotite/application/muscle/__init__.py +1 -1
  18. biotite/application/muscle/app3.py +50 -64
  19. biotite/application/muscle/app5.py +23 -31
  20. biotite/application/sra/__init__.py +1 -1
  21. biotite/application/sra/app.py +64 -68
  22. biotite/application/tantan/__init__.py +1 -1
  23. biotite/application/tantan/app.py +22 -45
  24. biotite/application/util.py +7 -9
  25. biotite/application/viennarna/rnaalifold.py +34 -28
  26. biotite/application/viennarna/rnafold.py +24 -39
  27. biotite/application/viennarna/rnaplot.py +36 -21
  28. biotite/application/viennarna/util.py +17 -12
  29. biotite/application/webapp.py +13 -14
  30. biotite/copyable.py +13 -13
  31. biotite/database/__init__.py +1 -1
  32. biotite/database/entrez/__init__.py +1 -1
  33. biotite/database/entrez/check.py +2 -3
  34. biotite/database/entrez/dbnames.py +7 -5
  35. biotite/database/entrez/download.py +55 -49
  36. biotite/database/entrez/key.py +1 -1
  37. biotite/database/entrez/query.py +62 -23
  38. biotite/database/error.py +2 -1
  39. biotite/database/pubchem/__init__.py +1 -1
  40. biotite/database/pubchem/download.py +43 -45
  41. biotite/database/pubchem/error.py +2 -2
  42. biotite/database/pubchem/query.py +34 -31
  43. biotite/database/pubchem/throttle.py +3 -4
  44. biotite/database/rcsb/__init__.py +1 -1
  45. biotite/database/rcsb/download.py +44 -52
  46. biotite/database/rcsb/query.py +85 -80
  47. biotite/database/uniprot/check.py +6 -3
  48. biotite/database/uniprot/download.py +6 -11
  49. biotite/database/uniprot/query.py +115 -31
  50. biotite/file.py +12 -31
  51. biotite/sequence/__init__.py +3 -3
  52. biotite/sequence/align/__init__.py +2 -2
  53. biotite/sequence/align/alignment.py +99 -90
  54. biotite/sequence/align/banded.cp311-win_amd64.pyd +0 -0
  55. biotite/sequence/align/buckets.py +12 -10
  56. biotite/sequence/align/cigar.py +43 -52
  57. biotite/sequence/align/kmeralphabet.cp311-win_amd64.pyd +0 -0
  58. biotite/sequence/align/kmeralphabet.pyx +55 -51
  59. biotite/sequence/align/kmersimilarity.cp311-win_amd64.pyd +0 -0
  60. biotite/sequence/align/kmertable.cp311-win_amd64.pyd +0 -0
  61. biotite/sequence/align/kmertable.pyx +3 -2
  62. biotite/sequence/align/localgapped.cp311-win_amd64.pyd +0 -0
  63. biotite/sequence/align/localungapped.cp311-win_amd64.pyd +0 -0
  64. biotite/sequence/align/matrix.py +81 -82
  65. biotite/sequence/align/multiple.cp311-win_amd64.pyd +0 -0
  66. biotite/sequence/align/multiple.pyx +1 -1
  67. biotite/sequence/align/pairwise.cp311-win_amd64.pyd +0 -0
  68. biotite/sequence/align/permutation.cp311-win_amd64.pyd +0 -0
  69. biotite/sequence/align/permutation.pyx +12 -4
  70. biotite/sequence/align/selector.cp311-win_amd64.pyd +0 -0
  71. biotite/sequence/align/selector.pyx +52 -54
  72. biotite/sequence/align/statistics.py +32 -33
  73. biotite/sequence/align/tracetable.cp311-win_amd64.pyd +0 -0
  74. biotite/sequence/alphabet.py +51 -65
  75. biotite/sequence/annotation.py +78 -77
  76. biotite/sequence/codec.cp311-win_amd64.pyd +0 -0
  77. biotite/sequence/codon.py +90 -79
  78. biotite/sequence/graphics/__init__.py +1 -1
  79. biotite/sequence/graphics/alignment.py +184 -103
  80. biotite/sequence/graphics/colorschemes.py +10 -12
  81. biotite/sequence/graphics/dendrogram.py +79 -34
  82. biotite/sequence/graphics/features.py +133 -99
  83. biotite/sequence/graphics/logo.py +22 -28
  84. biotite/sequence/graphics/plasmid.py +229 -178
  85. biotite/sequence/io/fasta/__init__.py +1 -1
  86. biotite/sequence/io/fasta/convert.py +44 -33
  87. biotite/sequence/io/fasta/file.py +42 -55
  88. biotite/sequence/io/fastq/__init__.py +1 -1
  89. biotite/sequence/io/fastq/convert.py +11 -14
  90. biotite/sequence/io/fastq/file.py +68 -112
  91. biotite/sequence/io/genbank/__init__.py +2 -2
  92. biotite/sequence/io/genbank/annotation.py +12 -20
  93. biotite/sequence/io/genbank/file.py +74 -76
  94. biotite/sequence/io/genbank/metadata.py +74 -62
  95. biotite/sequence/io/genbank/sequence.py +13 -14
  96. biotite/sequence/io/general.py +39 -30
  97. biotite/sequence/io/gff/__init__.py +2 -2
  98. biotite/sequence/io/gff/convert.py +10 -15
  99. biotite/sequence/io/gff/file.py +81 -65
  100. biotite/sequence/phylo/__init__.py +1 -1
  101. biotite/sequence/phylo/nj.cp311-win_amd64.pyd +0 -0
  102. biotite/sequence/phylo/tree.cp311-win_amd64.pyd +0 -0
  103. biotite/sequence/phylo/upgma.cp311-win_amd64.pyd +0 -0
  104. biotite/sequence/profile.py +57 -28
  105. biotite/sequence/search.py +17 -15
  106. biotite/sequence/seqtypes.py +200 -164
  107. biotite/sequence/sequence.py +15 -17
  108. biotite/structure/__init__.py +3 -3
  109. biotite/structure/atoms.py +246 -236
  110. biotite/structure/basepairs.py +260 -271
  111. biotite/structure/bonds.cp311-win_amd64.pyd +0 -0
  112. biotite/structure/bonds.pyx +29 -32
  113. biotite/structure/box.py +67 -71
  114. biotite/structure/celllist.cp311-win_amd64.pyd +0 -0
  115. biotite/structure/chains.py +55 -39
  116. biotite/structure/charges.cp311-win_amd64.pyd +0 -0
  117. biotite/structure/compare.py +32 -32
  118. biotite/structure/density.py +13 -18
  119. biotite/structure/dotbracket.py +20 -22
  120. biotite/structure/error.py +10 -2
  121. biotite/structure/filter.py +83 -78
  122. biotite/structure/geometry.py +130 -119
  123. biotite/structure/graphics/atoms.py +60 -43
  124. biotite/structure/graphics/rna.py +81 -68
  125. biotite/structure/hbond.py +112 -93
  126. biotite/structure/info/__init__.py +0 -2
  127. biotite/structure/info/atoms.py +10 -11
  128. biotite/structure/info/bonds.py +41 -43
  129. biotite/structure/info/ccd.py +4 -5
  130. biotite/structure/info/groups.py +1 -3
  131. biotite/structure/info/masses.py +5 -10
  132. biotite/structure/info/misc.py +1 -1
  133. biotite/structure/info/radii.py +20 -20
  134. biotite/structure/info/standardize.py +15 -26
  135. biotite/structure/integrity.py +18 -71
  136. biotite/structure/io/__init__.py +3 -4
  137. biotite/structure/io/dcd/__init__.py +1 -1
  138. biotite/structure/io/dcd/file.py +22 -20
  139. biotite/structure/io/general.py +47 -61
  140. biotite/structure/io/gro/__init__.py +1 -1
  141. biotite/structure/io/gro/file.py +73 -72
  142. biotite/structure/io/mol/__init__.py +1 -1
  143. biotite/structure/io/mol/convert.py +8 -11
  144. biotite/structure/io/mol/ctab.py +37 -36
  145. biotite/structure/io/mol/header.py +14 -10
  146. biotite/structure/io/mol/mol.py +9 -53
  147. biotite/structure/io/mol/sdf.py +47 -50
  148. biotite/structure/io/netcdf/__init__.py +1 -1
  149. biotite/structure/io/netcdf/file.py +24 -23
  150. biotite/structure/io/pdb/__init__.py +1 -1
  151. biotite/structure/io/pdb/convert.py +32 -20
  152. biotite/structure/io/pdb/file.py +151 -172
  153. biotite/structure/io/pdb/hybrid36.cp311-win_amd64.pyd +0 -0
  154. biotite/structure/io/pdbqt/__init__.py +1 -1
  155. biotite/structure/io/pdbqt/convert.py +17 -11
  156. biotite/structure/io/pdbqt/file.py +128 -80
  157. biotite/structure/io/pdbx/__init__.py +1 -2
  158. biotite/structure/io/pdbx/bcif.py +36 -44
  159. biotite/structure/io/pdbx/cif.py +140 -110
  160. biotite/structure/io/pdbx/component.py +10 -16
  161. biotite/structure/io/pdbx/convert.py +260 -258
  162. biotite/structure/io/pdbx/encoding.cp311-win_amd64.pyd +0 -0
  163. biotite/structure/io/trajfile.py +90 -107
  164. biotite/structure/io/trr/__init__.py +1 -1
  165. biotite/structure/io/trr/file.py +12 -15
  166. biotite/structure/io/xtc/__init__.py +1 -1
  167. biotite/structure/io/xtc/file.py +11 -14
  168. biotite/structure/mechanics.py +9 -11
  169. biotite/structure/molecules.py +3 -4
  170. biotite/structure/pseudoknots.py +53 -67
  171. biotite/structure/rdf.py +23 -21
  172. biotite/structure/repair.py +137 -86
  173. biotite/structure/residues.py +26 -16
  174. biotite/structure/sasa.cp311-win_amd64.pyd +0 -0
  175. biotite/structure/{resutil.py → segments.py} +24 -23
  176. biotite/structure/sequence.py +10 -11
  177. biotite/structure/sse.py +100 -119
  178. biotite/structure/superimpose.py +39 -77
  179. biotite/structure/transform.py +97 -71
  180. biotite/structure/util.py +11 -13
  181. biotite/version.py +2 -2
  182. biotite/visualize.py +69 -55
  183. {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/METADATA +6 -5
  184. biotite-1.0.1.dist-info/RECORD +322 -0
  185. biotite/structure/io/ctab.py +0 -72
  186. biotite/structure/io/mmtf/__init__.py +0 -21
  187. biotite/structure/io/mmtf/assembly.py +0 -214
  188. biotite/structure/io/mmtf/convertarray.cp311-win_amd64.pyd +0 -0
  189. biotite/structure/io/mmtf/convertarray.pyx +0 -341
  190. biotite/structure/io/mmtf/convertfile.cp311-win_amd64.pyd +0 -0
  191. biotite/structure/io/mmtf/convertfile.pyx +0 -501
  192. biotite/structure/io/mmtf/decode.cp311-win_amd64.pyd +0 -0
  193. biotite/structure/io/mmtf/decode.pyx +0 -152
  194. biotite/structure/io/mmtf/encode.cp311-win_amd64.pyd +0 -0
  195. biotite/structure/io/mmtf/encode.pyx +0 -183
  196. biotite/structure/io/mmtf/file.py +0 -233
  197. biotite/structure/io/npz/__init__.py +0 -20
  198. biotite/structure/io/npz/file.py +0 -152
  199. biotite/structure/io/pdbx/legacy.py +0 -267
  200. biotite/structure/io/tng/__init__.py +0 -13
  201. biotite/structure/io/tng/file.py +0 -46
  202. biotite/temp.py +0 -86
  203. biotite-0.41.2.dist-info/RECORD +0 -340
  204. {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/WHEEL +0 -0
  205. {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/licenses/LICENSE.rst +0 -0
@@ -8,16 +8,19 @@ Functions for converting a sequence from/to a GenBank file.
8
8
 
9
9
  __name__ = "biotite.sequence.io.genbank"
10
10
  __author__ = "Patrick Kunzmann"
11
- __all__ = ["get_raw_sequence", "get_sequence", "get_annotated_sequence",
12
- "set_sequence", "set_annotated_sequence"]
11
+ __all__ = [
12
+ "get_raw_sequence",
13
+ "get_sequence",
14
+ "get_annotated_sequence",
15
+ "set_sequence",
16
+ "set_annotated_sequence",
17
+ ]
13
18
 
14
19
  import re
15
- from ....file import InvalidFileError
16
- from ...seqtypes import ProteinSequence, NucleotideSequence
17
- from ...annotation import AnnotatedSequence
18
- from .file import GenBankFile
19
- from .annotation import get_annotation, set_annotation
20
-
20
+ from biotite.file import InvalidFileError
21
+ from biotite.sequence.annotation import AnnotatedSequence
22
+ from biotite.sequence.io.genbank.annotation import get_annotation, set_annotation
23
+ from biotite.sequence.seqtypes import NucleotideSequence, ProteinSequence
21
24
 
22
25
  _SYMBOLS_PER_CHUNK = 10
23
26
  _SEQ_CHUNKS_PER_LINE = 6
@@ -112,7 +115,7 @@ def _convert_seq_str(seq_str, format):
112
115
  if len(seq_str) == 0:
113
116
  raise InvalidFileError("The file's 'ORIGIN' field is empty")
114
117
  if format == "gb":
115
- return NucleotideSequence(seq_str.replace("U","T").replace("X","N"))
118
+ return NucleotideSequence(seq_str.replace("U", "T").replace("X", "N"))
116
119
  elif format == "gp":
117
120
  return ProteinSequence(seq_str.replace("U", "C").replace("O", "K"))
118
121
  else:
@@ -125,8 +128,6 @@ def _get_seq_start(origin_content):
125
128
  return int(origin_content[0].split()[0])
126
129
 
127
130
 
128
-
129
-
130
131
  def set_sequence(gb_file, sequence, sequence_start=1):
131
132
  """
132
133
  Set the *ORIGIN* field of a GenBank file with a sequence.
@@ -167,6 +168,4 @@ def set_annotated_sequence(gb_file, annot_sequence):
167
168
  The annotated sequence that is put into the GenBank file.
168
169
  """
169
170
  set_annotation(gb_file, annot_sequence.annotation)
170
- set_sequence(
171
- gb_file, annot_sequence.sequence, annot_sequence.sequence_start
172
- )
171
+ set_sequence(gb_file, annot_sequence.sequence, annot_sequence.sequence_start)
@@ -9,31 +9,27 @@ general sequence files.
9
9
 
10
10
  __name__ = "biotite.sequence.io"
11
11
  __author__ = "Patrick Kunzmann"
12
- __all__ = ["load_sequence", "save_sequence",
13
- "load_sequences", "save_sequences"]
12
+ __all__ = ["load_sequence", "save_sequence", "load_sequences", "save_sequences"]
14
13
 
15
- import itertools
16
14
  import os.path
17
- import io
18
15
  from collections import OrderedDict
19
16
  import numpy as np
20
- from ..seqtypes import NucleotideSequence, ProteinSequence
21
- from ..alphabet import Alphabet
17
+ from biotite.sequence.seqtypes import NucleotideSequence
22
18
 
23
19
 
24
20
  def load_sequence(file_path):
25
21
  """
26
22
  Load a sequence from a sequence file without the need
27
23
  to manually instantiate a :class:`File` object.
28
-
24
+
29
25
  Internally this function uses a :class:`File` object, based on the
30
26
  file extension.
31
-
27
+
32
28
  Parameters
33
29
  ----------
34
30
  file_path : str
35
31
  The path to the sequence file.
36
-
32
+
37
33
  Returns
38
34
  -------
39
35
  sequence : Sequence
@@ -42,11 +38,13 @@ def load_sequence(file_path):
42
38
  # We only need the suffix here
43
39
  filename, suffix = os.path.splitext(file_path)
44
40
  if suffix in [".fasta", ".fa", ".mpfa", ".fna", ".fsa"]:
45
- from .fasta import FastaFile, get_sequence
41
+ from biotite.sequence.io.fasta import FastaFile, get_sequence
42
+
46
43
  file = FastaFile.read(file_path)
47
44
  return get_sequence(file)
48
45
  elif suffix in [".fastq", ".fq"]:
49
- from .fastq import FastqFile
46
+ from biotite.sequence.io.fastq import FastqFile
47
+
50
48
  # Quality scores are irrelevant for this function
51
49
  # -> Offset is irrelevant
52
50
  file = FastqFile.read(file_path, offset="Sanger")
@@ -56,7 +54,8 @@ def load_sequence(file_path):
56
54
  break
57
55
  return sequence
58
56
  elif suffix in [".gb", ".gbk", ".gp"]:
59
- from .genbank import GenBankFile, get_sequence
57
+ from biotite.sequence.io.genbank import GenBankFile, get_sequence
58
+
60
59
  format = "gp" if suffix == ".gp" else "gb"
61
60
  file = GenBankFile.read(file_path)
62
61
  return get_sequence(file, format)
@@ -68,10 +67,10 @@ def save_sequence(file_path, sequence):
68
67
  """
69
68
  Save a sequence into a sequence file without the need
70
69
  to manually instantiate a :class:`File` object.
71
-
70
+
72
71
  Internally this function uses a :class:`File` object, based on the
73
72
  given file extension.
74
-
73
+
75
74
  Parameters
76
75
  ----------
77
76
  file_path : str
@@ -82,12 +81,14 @@ def save_sequence(file_path, sequence):
82
81
  # We only need the suffix here
83
82
  filename, suffix = os.path.splitext(file_path)
84
83
  if suffix in [".fasta", ".fa", ".mpfa", ".fna", ".fsa"]:
85
- from .fasta import FastaFile, set_sequence
84
+ from biotite.sequence.io.fasta import FastaFile, set_sequence
85
+
86
86
  file = FastaFile()
87
87
  set_sequence(file, sequence)
88
88
  file.write(file_path)
89
89
  elif suffix in [".fastq", ".fq"]:
90
- from .fastq import FastqFile
90
+ from biotite.sequence.io.fastq import FastqFile
91
+
91
92
  # Quality scores are irrelevant for this function
92
93
  # -> Offset is irrelevant
93
94
  file = FastqFile(offset="Sanger")
@@ -96,7 +97,8 @@ def save_sequence(file_path, sequence):
96
97
  file["sequence"] = str(sequence), scores
97
98
  file.write(file_path)
98
99
  elif suffix in [".gb", ".gbk", ".gp"]:
99
- from .genbank import GenBankFile, set_locus, set_sequence
100
+ from biotite.sequence.io.genbank import GenBankFile, set_locus, set_sequence
101
+
100
102
  file = GenBankFile()
101
103
  set_locus(file, "sequence", len(sequence))
102
104
  set_sequence(file, sequence)
@@ -109,37 +111,42 @@ def load_sequences(file_path):
109
111
  """
110
112
  Load multiple sequences from a sequence file without the need
111
113
  to manually instantiate a :class:`File` object.
112
-
114
+
113
115
  Internally this function uses a :class:`File` object, based on the
114
116
  file extension.
115
-
117
+
116
118
  Parameters
117
119
  ----------
118
120
  file_path : str
119
121
  The path to the sequence file.
120
-
122
+
121
123
  Returns
122
124
  -------
123
125
  sequences : dict of (str, Sequence)
124
126
  The sequences in the file.
125
127
  This dictionary maps each header name to
126
- the respective sequence.
128
+ the respective sequence.
127
129
  """
128
130
  # We only need the suffix here
129
131
  filename, suffix = os.path.splitext(file_path)
130
132
  if suffix in [".fasta", ".fa", ".mpfa", ".fna", ".fsa"]:
131
- from .fasta import FastaFile, get_sequences
133
+ from biotite.sequence.io.fasta import FastaFile, get_sequences
134
+
132
135
  file = FastaFile.read(file_path)
133
136
  return get_sequences(file)
134
137
  elif suffix in [".fastq", ".fq"]:
135
- from .fastq import FastqFile
138
+ from biotite.sequence.io.fastq import FastqFile
139
+
136
140
  # Quality scores are irrelevant for this function
137
141
  # -> Offset is irrelevant
138
142
  file = FastqFile.read(file_path, offset="Sanger")
139
- return {identifier : NucleotideSequence(seq_str)
140
- for identifier, (seq_str, scores) in file.items()}
143
+ return {
144
+ identifier: NucleotideSequence(seq_str)
145
+ for identifier, (seq_str, scores) in file.items()
146
+ }
141
147
  elif suffix in [".gb", ".gbk", ".gp"]:
142
- from .genbank import MultiFile, get_definition, get_sequence
148
+ from biotite.sequence.io.genbank import MultiFile, get_definition, get_sequence
149
+
143
150
  file = MultiFile.read(file_path)
144
151
  format = "gp" if suffix == ".gp" else "gb"
145
152
  sequences = OrderedDict()
@@ -154,10 +161,10 @@ def save_sequences(file_path, sequences):
154
161
  """
155
162
  Save multiple sequences into a sequence file without the need
156
163
  to manually instantiate a :class:`File` object.
157
-
164
+
158
165
  Internally this function uses a :class:`File` object, based on the
159
166
  given file extension.
160
-
167
+
161
168
  Parameters
162
169
  ----------
163
170
  file_path : str
@@ -169,12 +176,14 @@ def save_sequences(file_path, sequences):
169
176
  # We only need the suffix here
170
177
  filename, suffix = os.path.splitext(file_path)
171
178
  if suffix in [".fasta", ".fa", ".mpfa", ".fna", ".fsa"]:
172
- from .fasta import FastaFile, set_sequences
179
+ from biotite.sequence.io.fasta import FastaFile, set_sequences
180
+
173
181
  file = FastaFile()
174
182
  set_sequences(file, sequences)
175
183
  file.write(file_path)
176
184
  elif suffix in [".fastq", ".fq"]:
177
- from .fastq import FastqFile
185
+ from biotite.sequence.io.fastq import FastqFile
186
+
178
187
  # Quality scores are irrelevant for this function
179
188
  # -> Offset is irrelevant
180
189
  file = FastqFile(offset="Sanger")
@@ -14,7 +14,7 @@ interface to this format, and high-level functions for extracting
14
14
  GFF 3 files. This means, that you cannot directly access the the
15
15
  parent or child of a feature.
16
16
  However, the ``Id`` and ``Name`` attributes are stored in the
17
- qualifiers of the created :class:`Feature` objects.
17
+ qualifiers of the created :class:`Feature` objects.
18
18
  Hence, it is possible to implement such a data structure from this
19
19
  information.
20
20
  """
@@ -22,5 +22,5 @@ interface to this format, and high-level functions for extracting
22
22
  __name__ = "biotite.sequence.io.gff"
23
23
  __author__ = "Patrick Kunzmann"
24
24
 
25
+ from .convert import *
25
26
  from .file import *
26
- from .convert import *
@@ -6,7 +6,7 @@ __name__ = "biotite.sequence.io.gff"
6
6
  __author__ = "Patrick Kunzmann"
7
7
  __all__ = ["get_annotation", "set_annotation"]
8
8
 
9
- from ...annotation import Location, Feature, Annotation
9
+ from biotite.sequence.annotation import Annotation, Feature, Location
10
10
 
11
11
 
12
12
  def get_annotation(gff_file):
@@ -22,12 +22,12 @@ def get_annotation(gff_file):
22
22
  Thus, for entries with the same ``ID``, the *type* and *attributes*
23
23
  are only parsed once and the locations are aggregated from each
24
24
  entry.
25
-
25
+
26
26
  Parameters
27
27
  ----------
28
28
  gff_file : GFFFile
29
29
  The file tro extract the :class:`Annotation` object from.
30
-
30
+
31
31
  Returns
32
32
  -------
33
33
  annotation : Annotation
@@ -45,9 +45,7 @@ def get_annotation(gff_file):
45
45
  # (beginning of the file)
46
46
  if current_key is not None:
47
47
  # Beginning of new feature -> Save previous feature
48
- annot.add_feature(
49
- Feature(current_key, current_locs, current_qual)
50
- )
48
+ annot.add_feature(Feature(current_key, current_locs, current_qual))
51
49
  # Track new feature
52
50
  current_key = type
53
51
  current_locs = [Location(start, end, strand)]
@@ -61,15 +59,14 @@ def get_annotation(gff_file):
61
59
  return annot
62
60
 
63
61
 
64
- def set_annotation(gff_file, annotation,
65
- seqid=None, source=None, is_stranded=True):
62
+ def set_annotation(gff_file, annotation, seqid=None, source=None, is_stranded=True):
66
63
  """
67
64
  Write an :class:`Annotation` object into a GFF3 file.
68
65
 
69
66
  Each feature will get one entry for each location it has.
70
67
  :class:`Feature` objects with multiple locations require the ``ID``
71
68
  qualifier in its :attr:`Feature.qual` attribute.
72
-
69
+
73
70
  Parameters
74
71
  ----------
75
72
  gff_file : GFFFile
@@ -87,14 +84,13 @@ def set_annotation(gff_file, annotation,
87
84
  for feature in sorted(annotation):
88
85
  if len(feature.locs) > 1 and "ID" not in feature.qual:
89
86
  raise ValueError(
90
- "The 'Id' qualifier is required "
91
- "for features with multiple locations"
87
+ "The 'Id' qualifier is required " "for features with multiple locations"
92
88
  )
93
89
  ## seqid ##
94
90
  if seqid is not None and " " in seqid:
95
91
  raise ValueError("The 'seqid' must not contain whitespaces")
96
92
  ## source ##
97
- #Nothing to be done
93
+ # Nothing to be done
98
94
  ## type ##
99
95
  type = feature.key
100
96
  ## strand ##
@@ -128,6 +124,5 @@ def set_annotation(gff_file, annotation,
128
124
  else:
129
125
  phase = None
130
126
  gff_file.append(
131
- seqid, source, type, start, end,
132
- score, strand, phase, attributes
133
- )
127
+ seqid, source, type, start, end, score, strand, phase, attributes
128
+ )
@@ -6,19 +6,17 @@ __name__ = "biotite.sequence.io.gff"
6
6
  __author__ = "Patrick Kunzmann"
7
7
  __all__ = ["GFFFile"]
8
8
 
9
- import copy
10
9
  import string
11
- from urllib.parse import quote, unquote
12
10
  import warnings
13
- from ....file import TextFile, InvalidFileError
14
- from ...annotation import Location
15
-
11
+ from urllib.parse import quote, unquote
12
+ from biotite.file import InvalidFileError, TextFile
13
+ from biotite.sequence.annotation import Location
16
14
 
17
15
  # All punctuation characters except
18
16
  # percent, semicolon, equals, ampersand, comma
19
- _NOT_QUOTED = "".join(
20
- [char for char in string.punctuation if char not in "%;=&,"]
21
- ) + " "
17
+ _NOT_QUOTED = (
18
+ "".join([char for char in string.punctuation if char not in "%;=&,"]) + " "
19
+ )
22
20
 
23
21
 
24
22
  class GFFFile(TextFile):
@@ -61,7 +59,7 @@ class GFFFile(TextFile):
61
59
  The content after the ``##FASTA`` directive is simply ignored.
62
60
  Please provide the sequence via a separate file or read the FASTA
63
61
  data directly via the :attr:`lines` attribute:
64
-
62
+
65
63
  >>> import os.path
66
64
  >>> from io import StringIO
67
65
  >>> gff_file = GFFFile.read(os.path.join(path_to_sequences, "indexing_test.gff3"))
@@ -121,7 +119,7 @@ class GFFFile(TextFile):
121
119
  ##Example directive param1 param2
122
120
  SomeSeqID Biotite CDS 1 99 . + 0 ID=FeatureID;product=A protein
123
121
  """
124
-
122
+
125
123
  def __init__(self):
126
124
  super().__init__()
127
125
  # Maps entry indices to line indices
@@ -132,18 +130,18 @@ class GFFFile(TextFile):
132
130
  self._has_fasta = None
133
131
  self._index_entries()
134
132
  self.append_directive("gff-version", "3")
135
-
133
+
136
134
  @classmethod
137
135
  def read(cls, file):
138
136
  """
139
137
  Read a GFF3 file.
140
-
138
+
141
139
  Parameters
142
140
  ----------
143
141
  file : file-like object or str
144
142
  The file to be read.
145
143
  Alternatively a file path can be supplied.
146
-
144
+
147
145
  Returns
148
146
  -------
149
147
  file_object : GFFFile
@@ -152,18 +150,29 @@ class GFFFile(TextFile):
152
150
  file = super().read(file)
153
151
  file._index_entries()
154
152
  return file
155
-
156
- def insert(self, index, seqid, source, type, start, end,
157
- score, strand, phase, attributes=None):
153
+
154
+ def insert(
155
+ self,
156
+ index,
157
+ seqid,
158
+ source,
159
+ type,
160
+ start,
161
+ end,
162
+ score,
163
+ strand,
164
+ phase,
165
+ attributes=None,
166
+ ):
158
167
  """
159
168
  Insert an entry at the given index.
160
-
169
+
161
170
  Parameters
162
171
  ----------
163
172
  index : int
164
173
  Index where the entry is inserted.
165
174
  If the index is equal to the length of the file, the entry
166
- is appended at the end of the file.
175
+ is appended at the end of the file.
167
176
  seqid : str
168
177
  The ID of the reference sequence.
169
178
  source : str
@@ -184,22 +193,23 @@ class GFFFile(TextFile):
184
193
  Additional properties of the feature.
185
194
  """
186
195
  if index == len(self):
187
- self.append(seqid, source, type, start, end,
188
- score, strand, phase, attributes)
196
+ self.append(
197
+ seqid, source, type, start, end, score, strand, phase, attributes
198
+ )
189
199
  else:
190
200
  line_index = self._entries[index]
191
201
  line = GFFFile._create_line(
192
- seqid, source, type, start, end,
193
- score, strand, phase, attributes
202
+ seqid, source, type, start, end, score, strand, phase, attributes
194
203
  )
195
204
  self.lines.insert(line_index, line)
196
205
  self._index_entries()
197
-
198
- def append(self, seqid, source, type, start, end,
199
- score, strand, phase, attributes=None):
206
+
207
+ def append(
208
+ self, seqid, source, type, start, end, score, strand, phase, attributes=None
209
+ ):
200
210
  """
201
211
  Append an entry to the end of the file.
202
-
212
+
203
213
  Parameters
204
214
  ----------
205
215
  seqid : str
@@ -232,11 +242,11 @@ class GFFFile(TextFile):
232
242
  self.lines.append(line)
233
243
  # Fast update of entry index by adding last line
234
244
  self._entries.append(len(self.lines) - 1)
235
-
245
+
236
246
  def append_directive(self, directive, *args):
237
247
  """
238
248
  Append a directive line to the end of the file.
239
-
249
+
240
250
  Parameters
241
251
  ----------
242
252
  directive : str
@@ -245,13 +255,13 @@ class GFFFile(TextFile):
245
255
  Optional parameters for the directive.
246
256
  Each argument is simply appended to the directive, separated
247
257
  by a single space character.
248
-
258
+
249
259
  Raises
250
260
  ------
251
261
  NotImplementedError
252
262
  If the ``##FASTA`` directive is used, which is not
253
263
  supported.
254
-
264
+
255
265
  Examples
256
266
  --------
257
267
 
@@ -262,17 +272,15 @@ class GFFFile(TextFile):
262
272
  ##Example directive param1 param2
263
273
  """
264
274
  if directive.startswith("FASTA"):
265
- raise NotImplementedError(
266
- "Adding FASTA information is not supported"
267
- )
275
+ raise NotImplementedError("Adding FASTA information is not supported")
268
276
  directive_line = "##" + directive + " " + " ".join(args)
269
277
  self._directives.append((directive_line[2:], len(self.lines)))
270
278
  self.lines.append(directive_line)
271
-
279
+
272
280
  def directives(self):
273
281
  """
274
282
  Get the directives in the file.
275
-
283
+
276
284
  Returns
277
285
  -------
278
286
  directives : list of tuple(str, int)
@@ -283,7 +291,7 @@ class GFFFile(TextFile):
283
291
  """
284
292
  # Sort in line order
285
293
  return sorted(self._directives, key=lambda directive: directive[1])
286
-
294
+
287
295
  def __setitem__(self, index, item):
288
296
  seqid, source, type, start, end, score, strand, phase, attrib = item
289
297
  line = GFFFile._create_line(
@@ -292,15 +300,13 @@ class GFFFile(TextFile):
292
300
  line_index = self._entries[index]
293
301
  self.lines[line_index] = line
294
302
 
295
-
296
303
  def __getitem__(self, index):
297
- if (index >= 0 and index >= len(self)) or \
298
- (index < 0 and -index > len(self)):
299
- raise IndexError(
300
- f"Index {index} is out of range for GFFFile with "
301
- f"{len(self)} entries"
302
- )
303
-
304
+ if (index >= 0 and index >= len(self)) or (index < 0 and -index > len(self)):
305
+ raise IndexError(
306
+ f"Index {index} is out of range for GFFFile with "
307
+ f"{len(self)} entries"
308
+ )
309
+
304
310
  line_index = self._entries[index]
305
311
  # Columns are tab separated
306
312
  s = self.lines[line_index].strip().split("\t")
@@ -324,15 +330,15 @@ class GFFFile(TextFile):
324
330
  attrib = GFFFile._parse_attributes(attrib)
325
331
 
326
332
  return seqid, source, type, start, end, score, strand, phase, attrib
327
-
333
+
328
334
  def __delitem__(self, index):
329
335
  line_index = self._entries[index]
330
336
  del self.lines[line_index]
331
337
  self._index_entries()
332
-
338
+
333
339
  def __len__(self):
334
340
  return len(self._entries)
335
-
341
+
336
342
  def _index_entries(self):
337
343
  """
338
344
  Parse the file for comment and directive lines.
@@ -374,15 +380,12 @@ class GFFFile(TextFile):
374
380
  self._entries = self._entries[:entry_counter]
375
381
 
376
382
  @staticmethod
377
- def _create_line(seqid, source, type, start, end,
378
- score, strand, phase, attributes):
383
+ def _create_line(seqid, source, type, start, end, score, strand, phase, attributes):
379
384
  """
380
385
  Create a line for a newly created entry.
381
386
  """
382
- seqid = quote(seqid.strip(), safe=_NOT_QUOTED) \
383
- if seqid is not None else "."
384
- source = quote(source.strip(), safe=_NOT_QUOTED) \
385
- if source is not None else "."
387
+ seqid = quote(seqid.strip(), safe=_NOT_QUOTED) if seqid is not None else "."
388
+ source = quote(source.strip(), safe=_NOT_QUOTED) if source is not None else "."
386
389
  type = type.strip()
387
390
 
388
391
  # Perform checks
@@ -394,7 +397,7 @@ class GFFFile(TextFile):
394
397
  raise ValueError("'type' must not be empty")
395
398
  if seqid[0] == ">":
396
399
  raise ValueError("'seqid' must not start with '>'")
397
-
400
+
398
401
  score = str(score) if score is not None else "."
399
402
  if strand == Location.Strand.FORWARD:
400
403
  strand = "+"
@@ -403,16 +406,31 @@ class GFFFile(TextFile):
403
406
  else:
404
407
  strand = "."
405
408
  phase = str(phase) if phase is not None else "."
406
- attributes = ";".join(
407
- [quote(key, safe=_NOT_QUOTED) + "=" + quote(val, safe=_NOT_QUOTED)
408
- for key, val in attributes.items()]
409
- ) if attributes is not None and len(attributes) > 0 else "."
409
+ attributes = (
410
+ ";".join(
411
+ [
412
+ quote(key, safe=_NOT_QUOTED) + "=" + quote(val, safe=_NOT_QUOTED)
413
+ for key, val in attributes.items()
414
+ ]
415
+ )
416
+ if attributes is not None and len(attributes) > 0
417
+ else "."
418
+ )
410
419
 
411
420
  return "\t".join(
412
- [seqid, source, type, str(start), str(end),
413
- str(score), strand, phase, attributes]
421
+ [
422
+ seqid,
423
+ source,
424
+ type,
425
+ str(start),
426
+ str(end),
427
+ str(score),
428
+ strand,
429
+ phase,
430
+ attributes,
431
+ ]
414
432
  )
415
-
433
+
416
434
  @staticmethod
417
435
  def _parse_attributes(attributes):
418
436
  """
@@ -426,9 +444,7 @@ class GFFFile(TextFile):
426
444
  for entry in attrib_entries:
427
445
  compounds = entry.split("=")
428
446
  if len(compounds) != 2:
429
- raise InvalidFileError(
430
- f"Attribute entry '{entry}' is invalid"
431
- )
447
+ raise InvalidFileError(f"Attribute entry '{entry}' is invalid")
432
448
  key, val = compounds
433
449
  attrib_dict[unquote(key)] = unquote(val)
434
- return attrib_dict
450
+ return attrib_dict
@@ -31,6 +31,6 @@ popular *UPGMA* (:func:`upgma()`) and *Neighbor-Joining*
31
31
  __name__ = "biotite.sequence.phylo"
32
32
  __author__ = "Patrick Kunzmann"
33
33
 
34
+ from .nj import *
34
35
  from .tree import *
35
36
  from .upgma import *
36
- from .nj import *