biotite 1.0.0__cp312-cp312-macosx_11_0_arm64.whl → 1.1.0__cp312-cp312-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/application/dssp/app.py +13 -3
- biotite/application/localapp.py +34 -0
- biotite/application/muscle/app3.py +2 -15
- biotite/application/muscle/app5.py +2 -2
- biotite/application/util.py +1 -1
- biotite/application/viennarna/rnaplot.py +6 -2
- biotite/database/rcsb/query.py +6 -6
- biotite/database/uniprot/check.py +20 -15
- biotite/database/uniprot/download.py +1 -1
- biotite/database/uniprot/query.py +1 -1
- biotite/sequence/align/alignment.py +16 -3
- biotite/sequence/align/banded.cpython-312-darwin.so +0 -0
- biotite/sequence/align/banded.pyx +5 -5
- biotite/sequence/align/kmeralphabet.cpython-312-darwin.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +17 -0
- biotite/sequence/align/kmersimilarity.cpython-312-darwin.so +0 -0
- biotite/sequence/align/kmertable.cpython-312-darwin.so +0 -0
- biotite/sequence/align/kmertable.pyx +52 -42
- biotite/sequence/align/localgapped.cpython-312-darwin.so +0 -0
- biotite/sequence/align/localungapped.cpython-312-darwin.so +0 -0
- biotite/sequence/align/matrix.py +273 -55
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/multiple.cpython-312-darwin.so +0 -0
- biotite/sequence/align/pairwise.cpython-312-darwin.so +0 -0
- biotite/sequence/align/permutation.cpython-312-darwin.so +0 -0
- biotite/sequence/align/selector.cpython-312-darwin.so +0 -0
- biotite/sequence/align/tracetable.cpython-312-darwin.so +0 -0
- biotite/sequence/alphabet.py +3 -0
- biotite/sequence/codec.cpython-312-darwin.so +0 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +2 -1
- biotite/sequence/graphics/colorschemes.py +44 -11
- biotite/sequence/phylo/nj.cpython-312-darwin.so +0 -0
- biotite/sequence/phylo/tree.cpython-312-darwin.so +0 -0
- biotite/sequence/phylo/upgma.cpython-312-darwin.so +0 -0
- biotite/sequence/profile.py +86 -4
- biotite/sequence/seqtypes.py +124 -3
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +4 -3
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +110 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +171 -0
- biotite/structure/alphabet/unkerasify.py +122 -0
- biotite/structure/atoms.py +156 -43
- biotite/structure/bonds.cpython-312-darwin.so +0 -0
- biotite/structure/bonds.pyx +72 -21
- biotite/structure/celllist.cpython-312-darwin.so +0 -0
- biotite/structure/charges.cpython-312-darwin.so +0 -0
- biotite/structure/filter.py +1 -1
- biotite/structure/geometry.py +60 -113
- biotite/structure/info/__init__.py +1 -0
- biotite/structure/info/atoms.py +13 -13
- biotite/structure/info/bonds.py +12 -6
- biotite/structure/info/ccd.py +125 -32
- biotite/structure/info/{ccd/components.bcif → components.bcif} +0 -0
- biotite/structure/info/groups.py +63 -17
- biotite/structure/info/masses.py +9 -6
- biotite/structure/info/misc.py +15 -21
- biotite/structure/info/standardize.py +3 -2
- biotite/structure/io/mol/sdf.py +41 -40
- biotite/structure/io/pdb/convert.py +2 -0
- biotite/structure/io/pdb/file.py +74 -3
- biotite/structure/io/pdb/hybrid36.cpython-312-darwin.so +0 -0
- biotite/structure/io/pdbqt/file.py +32 -32
- biotite/structure/io/pdbx/__init__.py +1 -0
- biotite/structure/io/pdbx/bcif.py +32 -8
- biotite/structure/io/pdbx/cif.py +148 -107
- biotite/structure/io/pdbx/component.py +9 -4
- biotite/structure/io/pdbx/compress.py +321 -0
- biotite/structure/io/pdbx/convert.py +227 -68
- biotite/structure/io/pdbx/encoding.cpython-312-darwin.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +98 -17
- biotite/structure/io/trajfile.py +16 -16
- biotite/structure/molecules.py +141 -141
- biotite/structure/sasa.cpython-312-darwin.so +0 -0
- biotite/structure/segments.py +1 -2
- biotite/structure/util.py +73 -1
- biotite/version.py +2 -2
- {biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/METADATA +4 -1
- {biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/RECORD +88 -78
- biotite/structure/info/ccd/README.rst +0 -8
- biotite/structure/info/ccd/amino_acids.txt +0 -1663
- biotite/structure/info/ccd/carbohydrates.txt +0 -1135
- biotite/structure/info/ccd/nucleotides.txt +0 -798
- {biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/WHEEL +0 -0
- {biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/licenses/LICENSE.rst +0 -0
biotite/application/dssp/app.py
CHANGED
|
@@ -6,10 +6,11 @@ __name__ = "biotite.application.dssp"
|
|
|
6
6
|
__author__ = "Patrick Kunzmann"
|
|
7
7
|
__all__ = ["DsspApp"]
|
|
8
8
|
|
|
9
|
+
from subprocess import SubprocessError
|
|
9
10
|
from tempfile import NamedTemporaryFile
|
|
10
11
|
import numpy as np
|
|
11
12
|
from biotite.application.application import AppState, requires_state
|
|
12
|
-
from biotite.application.localapp import LocalApp, cleanup_tempfile
|
|
13
|
+
from biotite.application.localapp import LocalApp, cleanup_tempfile, get_version
|
|
13
14
|
from biotite.structure.io.pdbx.cif import CIFFile
|
|
14
15
|
from biotite.structure.io.pdbx.convert import set_structure
|
|
15
16
|
|
|
@@ -72,7 +73,13 @@ class DsspApp(LocalApp):
|
|
|
72
73
|
self._array.set_annotation(
|
|
73
74
|
"occupancy", np.ones(self._array.array_length(), dtype=float)
|
|
74
75
|
)
|
|
75
|
-
|
|
76
|
+
try:
|
|
77
|
+
# The parameters have changed in version 4
|
|
78
|
+
self._new_cli = get_version(bin_path)[0] >= 4
|
|
79
|
+
except SubprocessError:
|
|
80
|
+
# In older versions, the no version is returned with `--version`
|
|
81
|
+
# -> a SubprocessError is raised
|
|
82
|
+
self._new_cli = False
|
|
76
83
|
self._in_file = NamedTemporaryFile("w", suffix=".cif", delete=False)
|
|
77
84
|
self._out_file = NamedTemporaryFile("r", suffix=".dssp", delete=False)
|
|
78
85
|
|
|
@@ -81,7 +88,10 @@ class DsspApp(LocalApp):
|
|
|
81
88
|
set_structure(in_file, self._array)
|
|
82
89
|
in_file.write(self._in_file)
|
|
83
90
|
self._in_file.flush()
|
|
84
|
-
|
|
91
|
+
if self._new_cli:
|
|
92
|
+
self.set_arguments([self._in_file.name, self._out_file.name])
|
|
93
|
+
else:
|
|
94
|
+
self.set_arguments(["-i", self._in_file.name, "-o", self._out_file.name])
|
|
85
95
|
super().run()
|
|
86
96
|
|
|
87
97
|
def evaluate(self):
|
biotite/application/localapp.py
CHANGED
|
@@ -8,7 +8,10 @@ __all__ = ["LocalApp"]
|
|
|
8
8
|
|
|
9
9
|
import abc
|
|
10
10
|
import copy
|
|
11
|
+
import re
|
|
12
|
+
import subprocess
|
|
11
13
|
from os import chdir, getcwd, remove
|
|
14
|
+
from pathlib import Path
|
|
12
15
|
from subprocess import PIPE, Popen, SubprocessError, TimeoutExpired
|
|
13
16
|
from biotite.application.application import (
|
|
14
17
|
Application,
|
|
@@ -306,3 +309,34 @@ def cleanup_tempfile(temp_file):
|
|
|
306
309
|
except FileNotFoundError:
|
|
307
310
|
# File was already deleted, e.g. due to `TemporaryFile(delete=True)`
|
|
308
311
|
pass
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
def get_version(bin_path, version_option="--version"):
|
|
315
|
+
"""
|
|
316
|
+
Get the version of a locally installed application.
|
|
317
|
+
|
|
318
|
+
Parameters
|
|
319
|
+
----------
|
|
320
|
+
bin_path : str or Path
|
|
321
|
+
Path of the application.
|
|
322
|
+
version_option : str, optional
|
|
323
|
+
The command line option to get the version.
|
|
324
|
+
|
|
325
|
+
Returns
|
|
326
|
+
-------
|
|
327
|
+
major, minor : int
|
|
328
|
+
The major and minor version number.
|
|
329
|
+
"""
|
|
330
|
+
output = subprocess.run(
|
|
331
|
+
[bin_path, version_option], capture_output=True, text=True
|
|
332
|
+
).stdout
|
|
333
|
+
# Find matches for version string containing major and minor version
|
|
334
|
+
match = re.search(r"\d+\.\d+", output)
|
|
335
|
+
if match is None:
|
|
336
|
+
raise subprocess.SubprocessError(
|
|
337
|
+
f"Could not determine '{Path(bin_path).name}' version "
|
|
338
|
+
f"from the string '{output}'"
|
|
339
|
+
)
|
|
340
|
+
version_string = match.group(0)
|
|
341
|
+
splitted = version_string.split(".")
|
|
342
|
+
return int(splitted[0]), int(splitted[1])
|
|
@@ -7,13 +7,11 @@ __author__ = "Patrick Kunzmann"
|
|
|
7
7
|
__all__ = ["MuscleApp"]
|
|
8
8
|
|
|
9
9
|
import numbers
|
|
10
|
-
import re
|
|
11
|
-
import subprocess
|
|
12
10
|
import warnings
|
|
13
11
|
from collections.abc import Sequence
|
|
14
12
|
from tempfile import NamedTemporaryFile
|
|
15
13
|
from biotite.application.application import AppState, VersionError, requires_state
|
|
16
|
-
from biotite.application.localapp import cleanup_tempfile
|
|
14
|
+
from biotite.application.localapp import cleanup_tempfile, get_version
|
|
17
15
|
from biotite.application.msaapp import MSAApp
|
|
18
16
|
from biotite.sequence.phylo.tree import Tree
|
|
19
17
|
|
|
@@ -54,7 +52,7 @@ class MuscleApp(MSAApp):
|
|
|
54
52
|
"""
|
|
55
53
|
|
|
56
54
|
def __init__(self, sequences, bin_path="muscle", matrix=None):
|
|
57
|
-
major_version = get_version(bin_path)[0]
|
|
55
|
+
major_version = get_version(bin_path, "-version")[0]
|
|
58
56
|
if major_version != 3:
|
|
59
57
|
raise VersionError(f"Muscle 3 is required, got version {major_version}")
|
|
60
58
|
|
|
@@ -227,14 +225,3 @@ class MuscleApp(MSAApp):
|
|
|
227
225
|
app.start()
|
|
228
226
|
app.join()
|
|
229
227
|
return app.get_alignment()
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
def get_version(bin_path="muscle"):
|
|
233
|
-
output = subprocess.run([bin_path, "-version"], capture_output=True, text=True)
|
|
234
|
-
# Find matches for version string containing major and minor version
|
|
235
|
-
match = re.search(r"\d+\.\d+", output.stdout)
|
|
236
|
-
if match is None:
|
|
237
|
-
raise subprocess.SubprocessError("Could not determine Muscle version")
|
|
238
|
-
version_string = match.group(0)
|
|
239
|
-
splitted = version_string.split(".")
|
|
240
|
-
return int(splitted[0]), int(splitted[1])
|
|
@@ -7,8 +7,8 @@ __author__ = "Patrick Kunzmann"
|
|
|
7
7
|
__all__ = ["Muscle5App"]
|
|
8
8
|
|
|
9
9
|
from biotite.application.application import AppState, VersionError, requires_state
|
|
10
|
+
from biotite.application.localapp import get_version
|
|
10
11
|
from biotite.application.msaapp import MSAApp
|
|
11
|
-
from biotite.application.muscle.app3 import get_version
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class Muscle5App(MSAApp):
|
|
@@ -49,7 +49,7 @@ class Muscle5App(MSAApp):
|
|
|
49
49
|
"""
|
|
50
50
|
|
|
51
51
|
def __init__(self, sequences, bin_path="muscle"):
|
|
52
|
-
major_version = get_version(bin_path)[0]
|
|
52
|
+
major_version = get_version(bin_path, "-version")[0]
|
|
53
53
|
if major_version < 5:
|
|
54
54
|
raise VersionError(
|
|
55
55
|
f"At least Muscle 5 is required, got version {major_version}"
|
biotite/application/util.py
CHANGED
|
@@ -50,7 +50,7 @@ def map_matrix(matrix):
|
|
|
50
50
|
# All trailing symbols are filled with zeros
|
|
51
51
|
old_length = len(matrix.get_alphabet1())
|
|
52
52
|
new_length = len(ProteinSequence.alphabet)
|
|
53
|
-
new_score_matrix = np.zeros((new_length, new_length))
|
|
53
|
+
new_score_matrix = np.zeros((new_length, new_length), dtype=np.int32)
|
|
54
54
|
new_score_matrix[:old_length, :old_length] = matrix.score_matrix()
|
|
55
55
|
return SubstitutionMatrix(
|
|
56
56
|
ProteinSequence.alphabet, ProteinSequence.alphabet, new_score_matrix
|
|
@@ -99,8 +99,12 @@ class RNAplotApp(LocalApp):
|
|
|
99
99
|
self._in_file.write(self._dot_bracket)
|
|
100
100
|
self._in_file.flush()
|
|
101
101
|
self.set_arguments(
|
|
102
|
-
[
|
|
103
|
-
|
|
102
|
+
[
|
|
103
|
+
"-i", self._in_file.name,
|
|
104
|
+
"--output-format", "xrna",
|
|
105
|
+
"-t", self._layout_type,
|
|
106
|
+
]
|
|
107
|
+
) # fmt: skip
|
|
104
108
|
super().run()
|
|
105
109
|
|
|
106
110
|
def evaluate(self):
|
biotite/database/rcsb/query.py
CHANGED
|
@@ -146,9 +146,9 @@ class BasicQuery(SingleQuery):
|
|
|
146
146
|
Examples
|
|
147
147
|
--------
|
|
148
148
|
|
|
149
|
-
>>> query = BasicQuery("
|
|
149
|
+
>>> query = BasicQuery("Miniprotein Construct")
|
|
150
150
|
>>> print(sorted(search(query)))
|
|
151
|
-
['1L2Y'
|
|
151
|
+
['1L2Y']
|
|
152
152
|
"""
|
|
153
153
|
|
|
154
154
|
def __init__(self, term):
|
|
@@ -346,9 +346,9 @@ class SequenceQuery(SingleQuery):
|
|
|
346
346
|
--------
|
|
347
347
|
|
|
348
348
|
>>> sequence = "NLYIQWLKDGGPSSGRPPPS"
|
|
349
|
-
>>> query = SequenceQuery(sequence, scope="protein", min_identity=0.
|
|
349
|
+
>>> query = SequenceQuery(sequence, scope="protein", min_identity=0.95)
|
|
350
350
|
>>> print(sorted(search(query)))
|
|
351
|
-
['1L2Y', '
|
|
351
|
+
['1L2Y', '2LDJ', '9G22', '9G2N', '9G2O', '9G31', '9G32', '9GDL', '9GDN', '9GDT', '9GDU', '9GE1']
|
|
352
352
|
"""
|
|
353
353
|
|
|
354
354
|
def __init__(self, sequence, scope, min_identity=0.0, max_expect_value=10000000.0):
|
|
@@ -441,7 +441,7 @@ class StructureQuery(SingleQuery):
|
|
|
441
441
|
|
|
442
442
|
>>> query = StructureQuery("1L2Y", chain="A")
|
|
443
443
|
>>> print(sorted(search(query)))
|
|
444
|
-
['1L2Y', '1RIJ', '2JOF', '2LDJ', '2M7D', '7MQS']
|
|
444
|
+
['1L2Y', '1RIJ', '2JOF', '2LDJ', '2M7D', '7MQS', '9DPF']
|
|
445
445
|
"""
|
|
446
446
|
|
|
447
447
|
def __init__(self, pdb_id, chain=None, assembly=None, strict=True):
|
|
@@ -868,7 +868,7 @@ def search(
|
|
|
868
868
|
... query, return_type="polymer_entity", return_groups=True,
|
|
869
869
|
... group_by=UniprotGrouping(sort_by="rcsb_accession_info.initial_release_date"),
|
|
870
870
|
... ))
|
|
871
|
-
|
|
871
|
+
{'P24297': ['5NW3_1'], 'P27707': ['4JLJ_1'], 'P80176': ['5D8V_1'], 'O29777': ['7R0H_1'], 'P01542': ['3NIR_1', '1EJG_1']}
|
|
872
872
|
"""
|
|
873
873
|
query_dict = _initialize_query_dict(query, return_type, group_by, content_types)
|
|
874
874
|
|
|
@@ -10,7 +10,7 @@ from biotite.database.error import RequestError
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
# Taken from https://www.uniprot.org/help/api_retrieve_entries
|
|
13
|
-
def assert_valid_response(
|
|
13
|
+
def assert_valid_response(response):
|
|
14
14
|
"""
|
|
15
15
|
Checks whether the response is valid.
|
|
16
16
|
|
|
@@ -19,17 +19,22 @@ def assert_valid_response(response_status_code):
|
|
|
19
19
|
response_status_code: int
|
|
20
20
|
Status code of request.get.
|
|
21
21
|
"""
|
|
22
|
-
if
|
|
23
|
-
raise RequestError("
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
"
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
22
|
+
if len(response.content) == 0:
|
|
23
|
+
raise RequestError("No content returned")
|
|
24
|
+
match response.status_code:
|
|
25
|
+
case 400:
|
|
26
|
+
raise RequestError("Bad request. There is a problem with your input.")
|
|
27
|
+
case 404:
|
|
28
|
+
raise RequestError("Not found. The resource you requested doesn't exist.")
|
|
29
|
+
case 410:
|
|
30
|
+
raise RequestError("Gone. The resource you requested was removed.")
|
|
31
|
+
case 500:
|
|
32
|
+
raise RequestError(
|
|
33
|
+
"Internal server error. "
|
|
34
|
+
"Most likely a temporary problem, "
|
|
35
|
+
"but if the problem persists please contact UniProt team."
|
|
36
|
+
)
|
|
37
|
+
case 503:
|
|
38
|
+
raise RequestError(
|
|
39
|
+
"Service not available. The server is being updated, try again later."
|
|
40
|
+
)
|
|
@@ -111,7 +111,7 @@ def fetch(ids, format, target_path=None, overwrite=False, verbose=False):
|
|
|
111
111
|
if format in ["fasta", "gff", "txt", "xml", "rdf", "tab"]:
|
|
112
112
|
r = requests.get(_fetch_url + db_name + "/" + id + "." + format)
|
|
113
113
|
content = r.text
|
|
114
|
-
assert_valid_response(r
|
|
114
|
+
assert_valid_response(r)
|
|
115
115
|
else:
|
|
116
116
|
raise ValueError(f"Format '{format}' is not supported")
|
|
117
117
|
if file is None:
|
|
@@ -289,5 +289,5 @@ def search(query, number=500):
|
|
|
289
289
|
params = {"query": str(query), "format": "list", "size": str(number)}
|
|
290
290
|
r = requests.get(_base_url, params=params)
|
|
291
291
|
content = r.text
|
|
292
|
-
assert_valid_response(r
|
|
292
|
+
assert_valid_response(r)
|
|
293
293
|
return content.split("\n")[:-1]
|
|
@@ -9,7 +9,6 @@ import numbers
|
|
|
9
9
|
import textwrap
|
|
10
10
|
from collections.abc import Sequence
|
|
11
11
|
import numpy as np
|
|
12
|
-
from biotite.sequence.alphabet import LetterAlphabet
|
|
13
12
|
|
|
14
13
|
__all__ = [
|
|
15
14
|
"Alignment",
|
|
@@ -111,7 +110,7 @@ class Alignment(object):
|
|
|
111
110
|
for i in range(len(self.trace)):
|
|
112
111
|
j = self.trace[i][seq_index]
|
|
113
112
|
if j != -1:
|
|
114
|
-
seq_str += self.sequences[seq_index][j]
|
|
113
|
+
seq_str += str(self.sequences[seq_index][j])
|
|
115
114
|
else:
|
|
116
115
|
seq_str += "-"
|
|
117
116
|
return seq_str
|
|
@@ -133,7 +132,7 @@ class Alignment(object):
|
|
|
133
132
|
# has an non-single letter alphabet
|
|
134
133
|
all_single_letter = True
|
|
135
134
|
for seq in self.sequences:
|
|
136
|
-
if not
|
|
135
|
+
if not _is_single_letter(seq.alphabet):
|
|
137
136
|
all_single_letter = False
|
|
138
137
|
if all_single_letter:
|
|
139
138
|
# First dimension: sequence number,
|
|
@@ -665,3 +664,17 @@ def remove_terminal_gaps(alignment):
|
|
|
665
664
|
"no overlap and the resulting alignment would be empty"
|
|
666
665
|
)
|
|
667
666
|
return alignment[start:stop]
|
|
667
|
+
|
|
668
|
+
|
|
669
|
+
def _is_single_letter(alphabet):
|
|
670
|
+
"""
|
|
671
|
+
More relaxed version of :func:`biotite.sequence.alphabet.is_letter_alphabet()`:
|
|
672
|
+
It is sufficient that only only the string representation of each symbol is only
|
|
673
|
+
a single character.
|
|
674
|
+
"""
|
|
675
|
+
if alphabet.is_letter_alphabet():
|
|
676
|
+
return True
|
|
677
|
+
for symbol in alphabet:
|
|
678
|
+
if len(str(symbol)) != 1:
|
|
679
|
+
return False
|
|
680
|
+
return True
|
|
Binary file
|
|
@@ -214,9 +214,6 @@ def align_banded(seq1, seq2, matrix, band, gap_penalty=-10, local=False,
|
|
|
214
214
|
else:
|
|
215
215
|
is_swapped = False
|
|
216
216
|
lower_diag, upper_diag = min(band), max(band)
|
|
217
|
-
band_width = upper_diag - lower_diag + 1
|
|
218
|
-
if band_width < 1:
|
|
219
|
-
raise ValueError("The width of the band is 0")
|
|
220
217
|
if len(seq1) + upper_diag <= 0 or lower_diag >= len(seq2):
|
|
221
218
|
raise ValueError(
|
|
222
219
|
"Alignment band is out of range, the band allows no overlap "
|
|
@@ -226,6 +223,9 @@ def align_banded(seq1, seq2, matrix, band, gap_penalty=-10, local=False,
|
|
|
226
223
|
# covers the search space of an unbanded alignment
|
|
227
224
|
lower_diag = max(lower_diag, -len(seq1)+1)
|
|
228
225
|
upper_diag = min(upper_diag, len(seq2)-1)
|
|
226
|
+
band_width = upper_diag - lower_diag + 1
|
|
227
|
+
if band_width < 1:
|
|
228
|
+
raise ValueError("The width of the band is 0")
|
|
229
229
|
|
|
230
230
|
# This implementation uses transposed tables in comparison
|
|
231
231
|
# to the common visualization
|
|
@@ -249,12 +249,12 @@ def align_banded(seq1, seq2, matrix, band, gap_penalty=-10, local=False,
|
|
|
249
249
|
###############
|
|
250
250
|
|
|
251
251
|
# A score value that signals that the respective direction in the
|
|
252
|
-
# dynamic programming matrix should not be used since
|
|
252
|
+
# dynamic programming matrix should not be used, since it would be
|
|
253
253
|
# outside the band
|
|
254
254
|
# It is the 'worst' score available, so the trace table will never
|
|
255
255
|
# include such a direction
|
|
256
256
|
neg_inf = np.iinfo(np.int32).min
|
|
257
|
-
# Correct the 'negative infinity' integer, by making it more
|
|
257
|
+
# Correct the 'negative infinity' integer, by making it more positive
|
|
258
258
|
# This prevents an integer underflow when the gap penalty or
|
|
259
259
|
# match score is added to this value
|
|
260
260
|
neg_inf -= min(gap_penalty) if affine_penalty else gap_penalty
|
|
Binary file
|
|
@@ -568,6 +568,23 @@ class KmerAlphabet(Alphabet):
|
|
|
568
568
|
return int(len(self._base_alph) ** self._k)
|
|
569
569
|
|
|
570
570
|
|
|
571
|
+
def __iter__(self):
|
|
572
|
+
# Creating all symbols is expensive
|
|
573
|
+
# -> Use a generator instead
|
|
574
|
+
if isinstance(self._base_alph, LetterAlphabet):
|
|
575
|
+
return ("".join(self.decode(code)) for code in range(len(self)))
|
|
576
|
+
else:
|
|
577
|
+
return (list(self.decode(code)) for code in range(len(self)))
|
|
578
|
+
|
|
579
|
+
|
|
580
|
+
def __contains__(self, symbol):
|
|
581
|
+
try:
|
|
582
|
+
self.fuse(self._base_alph.encode_multiple(symbol))
|
|
583
|
+
return True
|
|
584
|
+
except AlphabetError:
|
|
585
|
+
return False
|
|
586
|
+
|
|
587
|
+
|
|
571
588
|
def _to_array_form(model_string):
|
|
572
589
|
"""
|
|
573
590
|
Convert the the common string representation of a *k-mer* spacing
|
|
Binary file
|
|
Binary file
|
|
@@ -1384,8 +1384,7 @@ cdef class KmerTable:
|
|
|
1384
1384
|
|
|
1385
1385
|
|
|
1386
1386
|
def __getstate__(self):
|
|
1387
|
-
|
|
1388
|
-
return _pickle_c_arrays(self._ptr_array, relevant_kmers)
|
|
1387
|
+
return _pickle_c_arrays(self._ptr_array)
|
|
1389
1388
|
|
|
1390
1389
|
|
|
1391
1390
|
def __setstate__(self, state):
|
|
@@ -2836,12 +2835,7 @@ cdef class BucketKmerTable:
|
|
|
2836
2835
|
|
|
2837
2836
|
|
|
2838
2837
|
def __getstate__(self):
|
|
2839
|
-
|
|
2840
|
-
np.asarray(self._ptr_array) != 0
|
|
2841
|
-
)[0]
|
|
2842
|
-
return _pickle_c_arrays(self._ptr_array, relevant_buckets)
|
|
2843
|
-
|
|
2844
|
-
|
|
2838
|
+
return _pickle_c_arrays(self._ptr_array)
|
|
2845
2839
|
|
|
2846
2840
|
def __setstate__(self, state):
|
|
2847
2841
|
_unpickle_c_arrays(self._ptr_array, state)
|
|
@@ -3097,27 +3091,44 @@ def _append_entries(ptr[:] trg_ptr_array, ptr[:] src_ptr_array):
|
|
|
3097
3091
|
|
|
3098
3092
|
@cython.boundscheck(False)
|
|
3099
3093
|
@cython.wraparound(False)
|
|
3100
|
-
def _pickle_c_arrays(ptr[:] ptr_array
|
|
3094
|
+
def _pickle_c_arrays(ptr[:] ptr_array):
|
|
3101
3095
|
"""
|
|
3102
|
-
Pickle the
|
|
3103
|
-
|
|
3096
|
+
Pickle the C arrays into a single concatenated :class:`ndarray`.
|
|
3097
|
+
The lengths of each C-array on these concatenated array is saved as well.
|
|
3104
3098
|
"""
|
|
3105
|
-
cdef int64
|
|
3106
|
-
cdef int64 bucket
|
|
3099
|
+
cdef int64 pointer_i, bucket_i, concat_i
|
|
3107
3100
|
cdef int64 length
|
|
3108
3101
|
cdef uint32* bucket_ptr
|
|
3109
3102
|
|
|
3110
|
-
|
|
3111
|
-
|
|
3112
|
-
for
|
|
3113
|
-
|
|
3114
|
-
bucket_ptr
|
|
3115
|
-
|
|
3116
|
-
|
|
3117
|
-
|
|
3118
|
-
|
|
3103
|
+
# First pass: Count the total concatenated size
|
|
3104
|
+
cdef int64 total_length = 0
|
|
3105
|
+
for pointer_i in range(ptr_array.shape[0]):
|
|
3106
|
+
bucket_ptr = <uint32*>ptr_array[pointer_i]
|
|
3107
|
+
if bucket_ptr != NULL:
|
|
3108
|
+
# The first element of the C-array is the length
|
|
3109
|
+
# of the array
|
|
3110
|
+
total_length += (<int64*>bucket_ptr)[0]
|
|
3111
|
+
|
|
3112
|
+
# Second pass: Copy the C-arrays into a single concatenated array
|
|
3113
|
+
# and track the start position of each C-array
|
|
3114
|
+
cdef uint32[:] concatenated_array = np.empty(total_length, dtype=np.uint32)
|
|
3115
|
+
cdef int64[:] lengths = np.empty(ptr_array.shape[0], dtype=np.int64)
|
|
3116
|
+
concat_i = 0
|
|
3117
|
+
for pointer_i in range(ptr_array.shape[0]):
|
|
3118
|
+
bucket_ptr = <uint32*>ptr_array[pointer_i]
|
|
3119
|
+
if bucket_ptr != NULL:
|
|
3120
|
+
length = (<int64*>bucket_ptr)[0]
|
|
3121
|
+
lengths[pointer_i] = length
|
|
3122
|
+
memcpy(
|
|
3123
|
+
&concatenated_array[concat_i],
|
|
3124
|
+
bucket_ptr,
|
|
3125
|
+
length * sizeof(uint32),
|
|
3126
|
+
)
|
|
3127
|
+
concat_i += length
|
|
3128
|
+
else:
|
|
3129
|
+
lengths[pointer_i] = 0
|
|
3119
3130
|
|
|
3120
|
-
return np.asarray(
|
|
3131
|
+
return np.asarray(concatenated_array), np.asarray(lengths)
|
|
3121
3132
|
|
|
3122
3133
|
|
|
3123
3134
|
@cython.boundscheck(False)
|
|
@@ -3126,28 +3137,27 @@ def _unpickle_c_arrays(ptr[:] ptr_array, state):
|
|
|
3126
3137
|
"""
|
|
3127
3138
|
Unpickle the pickled `state` into the given `ptr_array`.
|
|
3128
3139
|
"""
|
|
3129
|
-
cdef int64
|
|
3130
|
-
cdef int64
|
|
3131
|
-
cdef int64 byte_length
|
|
3140
|
+
cdef int64 pointer_i, concat_i
|
|
3141
|
+
cdef int64 length
|
|
3132
3142
|
cdef uint32* bucket_ptr
|
|
3133
|
-
|
|
3134
|
-
|
|
3135
|
-
cdef int64[:]
|
|
3136
|
-
|
|
3137
|
-
|
|
3138
|
-
for
|
|
3139
|
-
|
|
3140
|
-
if
|
|
3141
|
-
|
|
3142
|
-
pickled_bytes = pickled_pointers[i]
|
|
3143
|
-
byte_length = len(pickled_bytes)
|
|
3144
|
-
if byte_length != 0:
|
|
3145
|
-
bucket_ptr = <uint32*>malloc(byte_length)
|
|
3143
|
+
|
|
3144
|
+
cdef uint32[:] concatenated_array = state[0]
|
|
3145
|
+
cdef int64[:] lengths = state[1]
|
|
3146
|
+
|
|
3147
|
+
concat_i = 0
|
|
3148
|
+
for pointer_i in range(ptr_array.shape[0]):
|
|
3149
|
+
length = lengths[pointer_i]
|
|
3150
|
+
if length != 0:
|
|
3151
|
+
bucket_ptr = <uint32*>malloc(length * sizeof(uint32))
|
|
3146
3152
|
if not bucket_ptr:
|
|
3147
3153
|
raise MemoryError
|
|
3148
|
-
|
|
3149
|
-
|
|
3150
|
-
|
|
3154
|
+
memcpy(
|
|
3155
|
+
bucket_ptr,
|
|
3156
|
+
&concatenated_array[concat_i],
|
|
3157
|
+
length * sizeof(uint32),
|
|
3158
|
+
)
|
|
3159
|
+
concat_i += length
|
|
3160
|
+
ptr_array[pointer_i] = <ptr>bucket_ptr
|
|
3151
3161
|
|
|
3152
3162
|
|
|
3153
3163
|
cdef inline void _deallocate_ptrs(ptr[:] ptrs):
|
|
Binary file
|
|
Binary file
|