biotite 0.39.0__cp311-cp311-macosx_11_0_arm64.whl → 0.41.0__cp311-cp311-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +3 -3
- biotite/application/dssp/app.py +18 -18
- biotite/database/pubchem/download.py +23 -23
- biotite/database/pubchem/query.py +7 -7
- biotite/database/rcsb/download.py +19 -14
- biotite/file.py +17 -9
- biotite/sequence/align/banded.c +258 -237
- biotite/sequence/align/banded.cpython-311-darwin.so +0 -0
- biotite/sequence/align/cigar.py +60 -15
- biotite/sequence/align/kmeralphabet.c +243 -222
- biotite/sequence/align/kmeralphabet.cpython-311-darwin.so +0 -0
- biotite/sequence/align/kmersimilarity.c +215 -196
- biotite/sequence/align/kmersimilarity.cpython-311-darwin.so +0 -0
- biotite/sequence/align/kmertable.cpp +233 -205
- biotite/sequence/align/kmertable.cpython-311-darwin.so +0 -0
- biotite/sequence/align/localgapped.c +258 -237
- biotite/sequence/align/localgapped.cpython-311-darwin.so +0 -0
- biotite/sequence/align/localungapped.c +235 -214
- biotite/sequence/align/localungapped.cpython-311-darwin.so +0 -0
- biotite/sequence/align/multiple.c +255 -234
- biotite/sequence/align/multiple.cpython-311-darwin.so +0 -0
- biotite/sequence/align/pairwise.c +274 -253
- biotite/sequence/align/pairwise.cpython-311-darwin.so +0 -0
- biotite/sequence/align/permutation.c +215 -196
- biotite/sequence/align/permutation.cpython-311-darwin.so +0 -0
- biotite/sequence/align/selector.c +217 -197
- biotite/sequence/align/selector.cpython-311-darwin.so +0 -0
- biotite/sequence/align/tracetable.c +215 -195
- biotite/sequence/align/tracetable.cpython-311-darwin.so +0 -0
- biotite/sequence/annotation.py +2 -2
- biotite/sequence/codec.c +235 -214
- biotite/sequence/codec.cpython-311-darwin.so +0 -0
- biotite/sequence/io/fasta/convert.py +27 -24
- biotite/sequence/phylo/nj.c +215 -196
- biotite/sequence/phylo/nj.cpython-311-darwin.so +0 -0
- biotite/sequence/phylo/tree.c +227 -202
- biotite/sequence/phylo/tree.cpython-311-darwin.so +0 -0
- biotite/sequence/phylo/upgma.c +215 -196
- biotite/sequence/phylo/upgma.cpython-311-darwin.so +0 -0
- biotite/structure/__init__.py +2 -0
- biotite/structure/basepairs.py +7 -12
- biotite/structure/bonds.c +1437 -1279
- biotite/structure/bonds.cpython-311-darwin.so +0 -0
- biotite/structure/celllist.c +217 -197
- biotite/structure/celllist.cpython-311-darwin.so +0 -0
- biotite/structure/charges.c +1052 -1101
- biotite/structure/charges.cpython-311-darwin.so +0 -0
- biotite/structure/dotbracket.py +2 -0
- biotite/structure/filter.py +30 -37
- biotite/structure/info/__init__.py +5 -8
- biotite/structure/info/atoms.py +31 -68
- biotite/structure/info/bonds.py +47 -101
- biotite/structure/info/ccd/README.rst +8 -0
- biotite/structure/info/ccd/amino_acids.txt +1663 -0
- biotite/structure/info/ccd/carbohydrates.txt +1135 -0
- biotite/structure/info/ccd/components.bcif +0 -0
- biotite/structure/info/ccd/nucleotides.txt +798 -0
- biotite/structure/info/ccd.py +95 -0
- biotite/structure/info/groups.py +90 -0
- biotite/structure/info/masses.py +21 -20
- biotite/structure/info/misc.py +78 -25
- biotite/structure/info/standardize.py +17 -12
- biotite/structure/integrity.py +19 -70
- biotite/structure/io/__init__.py +2 -4
- biotite/structure/io/ctab.py +12 -106
- biotite/structure/io/general.py +167 -181
- biotite/structure/io/gro/file.py +16 -16
- biotite/structure/io/mmtf/__init__.py +3 -0
- biotite/structure/io/mmtf/convertarray.c +219 -198
- biotite/structure/io/mmtf/convertarray.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/convertfile.c +217 -197
- biotite/structure/io/mmtf/convertfile.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/decode.c +225 -204
- biotite/structure/io/mmtf/decode.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/encode.c +215 -196
- biotite/structure/io/mmtf/encode.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/file.py +34 -26
- biotite/structure/io/mol/__init__.py +4 -2
- biotite/structure/io/mol/convert.py +71 -7
- biotite/structure/io/mol/ctab.py +414 -0
- biotite/structure/io/mol/header.py +116 -0
- biotite/structure/io/mol/{file.py → mol.py} +69 -82
- biotite/structure/io/mol/sdf.py +909 -0
- biotite/structure/io/npz/__init__.py +3 -0
- biotite/structure/io/npz/file.py +21 -18
- biotite/structure/io/pdb/__init__.py +3 -3
- biotite/structure/io/pdb/file.py +89 -34
- biotite/structure/io/pdb/hybrid36.c +63 -43
- biotite/structure/io/pdb/hybrid36.cpython-311-darwin.so +0 -0
- biotite/structure/io/pdbqt/file.py +32 -32
- biotite/structure/io/pdbx/__init__.py +12 -6
- biotite/structure/io/pdbx/bcif.py +648 -0
- biotite/structure/io/pdbx/cif.py +1032 -0
- biotite/structure/io/pdbx/component.py +246 -0
- biotite/structure/io/pdbx/convert.py +858 -386
- biotite/structure/io/pdbx/encoding.c +112813 -0
- biotite/structure/io/pdbx/encoding.cpython-311-darwin.so +0 -0
- biotite/structure/io/pdbx/legacy.py +267 -0
- biotite/structure/molecules.py +151 -151
- biotite/structure/repair.py +253 -0
- biotite/structure/sasa.c +215 -196
- biotite/structure/sasa.cpython-311-darwin.so +0 -0
- biotite/structure/sequence.py +112 -0
- biotite/structure/superimpose.py +618 -116
- {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/METADATA +3 -3
- {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/RECORD +109 -103
- {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/WHEEL +1 -1
- biotite/structure/info/amino_acids.json +0 -1556
- biotite/structure/info/amino_acids.py +0 -42
- biotite/structure/info/carbohydrates.json +0 -1122
- biotite/structure/info/carbohydrates.py +0 -39
- biotite/structure/info/intra_bonds.msgpack +0 -0
- biotite/structure/info/link_types.msgpack +0 -1
- biotite/structure/info/nucleotides.json +0 -772
- biotite/structure/info/nucleotides.py +0 -39
- biotite/structure/info/residue_masses.msgpack +0 -0
- biotite/structure/info/residue_names.msgpack +0 -3
- biotite/structure/info/residues.msgpack +0 -0
- biotite/structure/io/pdbx/file.py +0 -652
- {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/LICENSE.rst +0 -0
- {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/top_level.txt +0 -0
|
Binary file
|
biotite/sequence/align/cigar.py
CHANGED
|
@@ -106,6 +106,14 @@ def read_alignment_from_cigar(cigar, position,
|
|
|
106
106
|
AAAAGGTTTCCGACCGTAGGTAG
|
|
107
107
|
CCCCGGTTT--GACCGTATGTAG
|
|
108
108
|
|
|
109
|
+
Explicit terminal deletions are also possible.
|
|
110
|
+
Note that in this case the deleted positions count as aligned bases
|
|
111
|
+
with respect to the `position` parameter.
|
|
112
|
+
|
|
113
|
+
>>> print(read_alignment_from_cigar("3D9M2D12M4D", 0, ref, seg))
|
|
114
|
+
TATAAAAGGTTTCCGACCGTAGGTAGCTGA
|
|
115
|
+
---CCCCGGTTT--GACCGTATGTAG----
|
|
116
|
+
|
|
109
117
|
If bases in the segment sequence are soft-clipped, they do not
|
|
110
118
|
appear in the alignment.
|
|
111
119
|
Furthermore, the start of the reference sequence must be adapted.
|
|
@@ -122,7 +130,7 @@ def read_alignment_from_cigar(cigar, position,
|
|
|
122
130
|
GGTTTCCGACCGTAGGTAG
|
|
123
131
|
GGTTT--GACCGTATGTAG
|
|
124
132
|
|
|
125
|
-
Reading from BAM codes is also possible
|
|
133
|
+
Reading from BAM codes is also possible.
|
|
126
134
|
|
|
127
135
|
>>> seg = NucleotideSequence("CCCCGGTTTGACCGTATGTAG")
|
|
128
136
|
>>> op_tuples = [
|
|
@@ -190,7 +198,8 @@ def read_alignment_from_cigar(cigar, position,
|
|
|
190
198
|
|
|
191
199
|
def write_alignment_to_cigar(alignment, reference_index=0, segment_index=1,
|
|
192
200
|
introns=(), distinguish_matches=False,
|
|
193
|
-
hard_clip=False,
|
|
201
|
+
hard_clip=False, include_terminal_gaps=False,
|
|
202
|
+
as_string=True):
|
|
194
203
|
"""
|
|
195
204
|
Convert an :class:`Alignment` into a CIGAR string.
|
|
196
205
|
|
|
@@ -220,6 +229,13 @@ def write_alignment_to_cigar(alignment, reference_index=0, segment_index=1,
|
|
|
220
229
|
hard_clip : bool, optional
|
|
221
230
|
If true, clipped bases are hard-clipped.
|
|
222
231
|
Otherwise, clipped bases are soft-clipped.
|
|
232
|
+
include_terminal_gaps : bool, optional
|
|
233
|
+
If true, terminal gaps in the segment sequence are included in
|
|
234
|
+
the CIGAR string.
|
|
235
|
+
These are represented by ``D`` operations at the start and/or
|
|
236
|
+
end of the string.
|
|
237
|
+
By default, those terminal gaps are omitted in the CIGAR, which
|
|
238
|
+
is the way SAM/BAM expects a CIGAR to be.
|
|
223
239
|
as_string : bool, optional
|
|
224
240
|
If true, the CIGAR string is returned.
|
|
225
241
|
Otherwise, a list of tuples is returned, where the first element
|
|
@@ -238,6 +254,12 @@ def write_alignment_to_cigar(alignment, reference_index=0, segment_index=1,
|
|
|
238
254
|
--------
|
|
239
255
|
read_alignment_from_cigar
|
|
240
256
|
|
|
257
|
+
Notes
|
|
258
|
+
-----
|
|
259
|
+
If `include_terminal_gaps` is set to true, you usually want to set
|
|
260
|
+
``position=0`` in :func:`read_alignment_from_cigar` to get the
|
|
261
|
+
correct alignment.
|
|
262
|
+
|
|
241
263
|
Examples
|
|
242
264
|
--------
|
|
243
265
|
|
|
@@ -256,6 +278,8 @@ def write_alignment_to_cigar(alignment, reference_index=0, segment_index=1,
|
|
|
256
278
|
9M2N12M
|
|
257
279
|
>>> print(write_alignment_to_cigar(semiglobal_alignment, distinguish_matches=True))
|
|
258
280
|
4X5=2D7=1X4=
|
|
281
|
+
>>> print(write_alignment_to_cigar(semiglobal_alignment, include_terminal_gaps=True))
|
|
282
|
+
3D9M2D12M4D
|
|
259
283
|
>>> local_alignment = align_optimal(ref, seg, matrix, local=True)[0]
|
|
260
284
|
>>> print(local_alignment)
|
|
261
285
|
GGTTTCCGACCGTAGGTAG
|
|
@@ -274,9 +298,8 @@ def write_alignment_to_cigar(alignment, reference_index=0, segment_index=1,
|
|
|
274
298
|
CigarOp.DELETION 2
|
|
275
299
|
CigarOp.MATCH 12
|
|
276
300
|
"""
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
alignment = alignment[no_gap_pos[0] : no_gap_pos[-1] + 1]
|
|
301
|
+
if not include_terminal_gaps:
|
|
302
|
+
alignment = _remove_terminal_segment_gaps(alignment, segment_index)
|
|
280
303
|
|
|
281
304
|
ref_trace = alignment.trace[:, reference_index]
|
|
282
305
|
seg_trace = alignment.trace[:, segment_index]
|
|
@@ -321,19 +344,13 @@ def write_alignment_to_cigar(alignment, reference_index=0, segment_index=1,
|
|
|
321
344
|
op_tuples = _aggregate_consecutive(operations)
|
|
322
345
|
|
|
323
346
|
clip_op = CigarOp.HARD_CLIP if hard_clip else CigarOp.SOFT_CLIP
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
# all previous bases are clipped...
|
|
328
|
-
start_clip_length = seg_trace[0]
|
|
347
|
+
start_clip_length, end_clip_length = _find_clipped_bases(
|
|
348
|
+
alignment, segment_index
|
|
349
|
+
)
|
|
329
350
|
if start_clip_length != 0:
|
|
330
|
-
start_clip = [(clip_op,
|
|
351
|
+
start_clip = [(clip_op, start_clip_length)]
|
|
331
352
|
else:
|
|
332
353
|
start_clip = np.zeros((0, 2), dtype=int)
|
|
333
|
-
# ...and the same applies for the last base
|
|
334
|
-
end_clip_length = (
|
|
335
|
-
len(alignment.sequences[segment_index]) - seg_trace[-1] - 1
|
|
336
|
-
)
|
|
337
354
|
if end_clip_length != 0:
|
|
338
355
|
end_clip = [(clip_op, end_clip_length)]
|
|
339
356
|
else:
|
|
@@ -347,6 +364,34 @@ def write_alignment_to_cigar(alignment, reference_index=0, segment_index=1,
|
|
|
347
364
|
return op_tuples
|
|
348
365
|
|
|
349
366
|
|
|
367
|
+
def _remove_terminal_segment_gaps(alignment, segment_index):
|
|
368
|
+
"""
|
|
369
|
+
Remove terminal gaps in the segment sequence.
|
|
370
|
+
"""
|
|
371
|
+
no_gap_pos = np.where(alignment.trace[:, segment_index] != -1)[0]
|
|
372
|
+
return alignment[no_gap_pos[0] : no_gap_pos[-1] + 1]
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def _find_clipped_bases(alignment, segment_index):
|
|
376
|
+
"""
|
|
377
|
+
Find the number of clipped bases at the start and end of the segment.
|
|
378
|
+
"""
|
|
379
|
+
# Finding the clipped part is easier, when the terminal segment gaps
|
|
380
|
+
# are removed (if not already done)
|
|
381
|
+
alignment = _remove_terminal_segment_gaps(alignment, segment_index)
|
|
382
|
+
seg_trace = alignment.trace[:, segment_index]
|
|
383
|
+
# Missing bases at the beginning and end of the segment are
|
|
384
|
+
# interpreted as clipped
|
|
385
|
+
# As first element in the segment trace is the first aligned base,
|
|
386
|
+
# all previous bases are clipped...
|
|
387
|
+
start_clip_length = seg_trace[0]
|
|
388
|
+
# ...and the same applies for the last base
|
|
389
|
+
end_clip_length = (
|
|
390
|
+
len(alignment.sequences[segment_index]) - seg_trace[-1] - 1
|
|
391
|
+
)
|
|
392
|
+
return start_clip_length, end_clip_length
|
|
393
|
+
|
|
394
|
+
|
|
350
395
|
def _aggregate_consecutive(operations):
|
|
351
396
|
"""
|
|
352
397
|
Aggregate consecutive operations of the same type.
|