biotite 0.41.1__cp311-cp311-win_amd64.whl → 0.41.2__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/application/__init__.py +35 -9
- biotite/application/application.py +2 -1
- biotite/sequence/__init__.py +13 -2
- biotite/sequence/align/__init__.py +158 -4
- biotite/sequence/align/banded.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/kmeralphabet.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/kmersimilarity.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/localgapped.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/localungapped.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/multiple.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/multiple.pyx +34 -34
- biotite/sequence/align/pairwise.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/permutation.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/selector.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/tracetable.cp311-win_amd64.pyd +0 -0
- biotite/sequence/alphabet.py +63 -63
- biotite/sequence/codec.cp311-win_amd64.pyd +0 -0
- biotite/sequence/phylo/nj.cp311-win_amd64.pyd +0 -0
- biotite/sequence/phylo/tree.cp311-win_amd64.pyd +0 -0
- biotite/sequence/phylo/upgma.cp311-win_amd64.pyd +0 -0
- biotite/sequence/sequence.py +52 -50
- biotite/structure/atoms.py +8 -8
- biotite/structure/bonds.cp311-win_amd64.pyd +0 -0
- biotite/structure/bonds.pyx +59 -68
- biotite/structure/celllist.cp311-win_amd64.pyd +0 -0
- biotite/structure/charges.cp311-win_amd64.pyd +0 -0
- biotite/structure/info/ccd.py +17 -2
- biotite/structure/info/groups.py +9 -12
- biotite/structure/io/mmtf/convertarray.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/convertfile.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/decode.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/encode.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/pdb/hybrid36.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/pdbx/bcif.py +0 -8
- biotite/structure/io/pdbx/encoding.cp311-win_amd64.pyd +0 -0
- biotite/structure/sasa.cp311-win_amd64.pyd +0 -0
- biotite/version.py +2 -2
- {biotite-0.41.1.dist-info → biotite-0.41.2.dist-info}/METADATA +2 -2
- {biotite-0.41.1.dist-info → biotite-0.41.2.dist-info}/RECORD +42 -42
- {biotite-0.41.1.dist-info → biotite-0.41.2.dist-info}/WHEEL +1 -1
- {biotite-0.41.1.dist-info → biotite-0.41.2.dist-info}/licenses/LICENSE.rst +0 -0
biotite/sequence/alphabet.py
CHANGED
|
@@ -18,44 +18,44 @@ class Alphabet(object):
|
|
|
18
18
|
"""
|
|
19
19
|
This class defines the allowed symbols for a :class:`Sequence` and
|
|
20
20
|
handles the encoding/decoding between symbols and symbol codes.
|
|
21
|
-
|
|
21
|
+
|
|
22
22
|
An :class:`Alphabet` is created with the list of symbols, that can
|
|
23
23
|
be used in this context.
|
|
24
24
|
In most cases a symbol will be simply a letter, hence a string of
|
|
25
25
|
length 1. But in principle every hashable Python object can serve
|
|
26
26
|
as symbol.
|
|
27
|
-
|
|
27
|
+
|
|
28
28
|
The encoding of a symbol into a symbol code is
|
|
29
29
|
done in the following way: Find the first index in the symbol list,
|
|
30
30
|
where the list element equals the symbol. This index is the
|
|
31
31
|
symbol code. If the symbol is not found in the list, an
|
|
32
32
|
:class:`AlphabetError` is raised.
|
|
33
|
-
|
|
33
|
+
|
|
34
34
|
Internally, a dictionary is used for encoding, with symbols as keys
|
|
35
35
|
and symbol codes as values. Therefore, every symbol must be
|
|
36
36
|
hashable. For decoding the symbol list is indexed with the symbol
|
|
37
37
|
code.
|
|
38
|
-
|
|
38
|
+
|
|
39
39
|
If an alphabet *1* contains the same symbols and the same
|
|
40
40
|
symbol-code-mappings like another alphabet *2*, but alphabet *1*
|
|
41
41
|
introduces also new symbols, then alphabet *1* *extends* alphabet
|
|
42
42
|
*2*.
|
|
43
43
|
Per definition, every alphabet also extends itself.
|
|
44
|
-
|
|
44
|
+
|
|
45
45
|
Objects of this class are immutable.
|
|
46
|
-
|
|
46
|
+
|
|
47
47
|
Parameters
|
|
48
48
|
----------
|
|
49
49
|
symbols : iterable object
|
|
50
50
|
The symbols, that are allowed in this alphabet. The
|
|
51
51
|
corresponding code for a symbol, is the index of that symbol
|
|
52
52
|
in this list.
|
|
53
|
-
|
|
53
|
+
|
|
54
54
|
Examples
|
|
55
55
|
--------
|
|
56
56
|
Create an Alphabet containing DNA letters and encode/decode a
|
|
57
57
|
letter/code:
|
|
58
|
-
|
|
58
|
+
|
|
59
59
|
>>> alph = Alphabet(["A","C","G","T"])
|
|
60
60
|
>>> print(alph.encode("G"))
|
|
61
61
|
2
|
|
@@ -66,9 +66,9 @@ class Alphabet(object):
|
|
|
66
66
|
... except Exception as e:
|
|
67
67
|
... print(e)
|
|
68
68
|
Symbol 'foo' is not in the alphabet
|
|
69
|
-
|
|
69
|
+
|
|
70
70
|
Create an Alphabet of arbitrary objects:
|
|
71
|
-
|
|
71
|
+
|
|
72
72
|
>>> alph = Alphabet(["foo", 42, (1,2,3), 5, 3.141])
|
|
73
73
|
>>> print(alph.encode((1,2,3)))
|
|
74
74
|
2
|
|
@@ -77,26 +77,26 @@ class Alphabet(object):
|
|
|
77
77
|
|
|
78
78
|
On the subject of alphabet extension:
|
|
79
79
|
An alphabet always extends itself.
|
|
80
|
-
|
|
80
|
+
|
|
81
81
|
>>> Alphabet(["A","C","G","T"]).extends(Alphabet(["A","C","G","T"]))
|
|
82
82
|
True
|
|
83
83
|
|
|
84
84
|
An alphabet extends an alphabet when it contains additional symbols...
|
|
85
|
-
|
|
85
|
+
|
|
86
86
|
>>> Alphabet(["A","C","G","T","U"]).extends(Alphabet(["A","C","G","T"]))
|
|
87
87
|
True
|
|
88
|
-
|
|
88
|
+
|
|
89
89
|
...but not vice versa
|
|
90
|
-
|
|
90
|
+
|
|
91
91
|
>>> Alphabet(["A","C","G","T"]).extends(Alphabet(["A","C","G","T","U"]))
|
|
92
92
|
False
|
|
93
|
-
|
|
93
|
+
|
|
94
94
|
Two alphabets with same symbols but different symbol-code-mappings
|
|
95
|
-
|
|
96
|
-
>>> Alphabet(["A","C","G","T"]).extends(Alphabet(["A","C","T","G"]))
|
|
95
|
+
|
|
96
|
+
>>> Alphabet(["A","C","G","T"]).extends(Alphabet(["A","C","T","G"]))
|
|
97
97
|
False
|
|
98
98
|
"""
|
|
99
|
-
|
|
99
|
+
|
|
100
100
|
def __init__(self, symbols):
|
|
101
101
|
if len(symbols) == 0:
|
|
102
102
|
raise ValueError("Symbol list is empty")
|
|
@@ -112,18 +112,18 @@ class Alphabet(object):
|
|
|
112
112
|
def get_symbols(self):
|
|
113
113
|
"""
|
|
114
114
|
Get the symbols in the alphabet.
|
|
115
|
-
|
|
115
|
+
|
|
116
116
|
Returns
|
|
117
117
|
-------
|
|
118
118
|
symbols : list
|
|
119
119
|
Copy of the internal list of symbols.
|
|
120
120
|
"""
|
|
121
121
|
return copy.deepcopy(self._symbols)
|
|
122
|
-
|
|
122
|
+
|
|
123
123
|
def extends(self, alphabet):
|
|
124
124
|
"""
|
|
125
125
|
Check, if this alphabet extends another alphabet.
|
|
126
|
-
|
|
126
|
+
|
|
127
127
|
Parameters
|
|
128
128
|
----------
|
|
129
129
|
alphabet : Alphabet
|
|
@@ -141,21 +141,21 @@ class Alphabet(object):
|
|
|
141
141
|
else:
|
|
142
142
|
return alphabet.get_symbols() \
|
|
143
143
|
== self.get_symbols()[:len(alphabet)]
|
|
144
|
-
|
|
144
|
+
|
|
145
145
|
def encode(self, symbol):
|
|
146
146
|
"""
|
|
147
147
|
Use the alphabet to encode a symbol.
|
|
148
|
-
|
|
148
|
+
|
|
149
149
|
Parameters
|
|
150
150
|
----------
|
|
151
151
|
symbol : object
|
|
152
152
|
The object to encode into a symbol code.
|
|
153
|
-
|
|
153
|
+
|
|
154
154
|
Returns
|
|
155
155
|
-------
|
|
156
156
|
code : int
|
|
157
157
|
The symbol code of `symbol`.
|
|
158
|
-
|
|
158
|
+
|
|
159
159
|
Raises
|
|
160
160
|
------
|
|
161
161
|
AlphabetError
|
|
@@ -167,21 +167,21 @@ class Alphabet(object):
|
|
|
167
167
|
raise AlphabetError(
|
|
168
168
|
f"Symbol {repr(symbol)} is not in the alphabet"
|
|
169
169
|
)
|
|
170
|
-
|
|
170
|
+
|
|
171
171
|
def decode(self, code):
|
|
172
172
|
"""
|
|
173
173
|
Use the alphabet to decode a symbol code.
|
|
174
|
-
|
|
174
|
+
|
|
175
175
|
Parameters
|
|
176
176
|
----------
|
|
177
177
|
code : int
|
|
178
178
|
The symbol code to be decoded.
|
|
179
|
-
|
|
179
|
+
|
|
180
180
|
Returns
|
|
181
181
|
-------
|
|
182
182
|
symbol : object
|
|
183
183
|
The symbol corresponding to `code`.
|
|
184
|
-
|
|
184
|
+
|
|
185
185
|
Raises
|
|
186
186
|
------
|
|
187
187
|
AlphabetError
|
|
@@ -190,41 +190,41 @@ class Alphabet(object):
|
|
|
190
190
|
if code < 0 or code >= len(self._symbols):
|
|
191
191
|
raise AlphabetError(f"'{code:d}' is not a valid code")
|
|
192
192
|
return self._symbols[code]
|
|
193
|
-
|
|
193
|
+
|
|
194
194
|
def encode_multiple(self, symbols, dtype=np.int64):
|
|
195
195
|
"""
|
|
196
196
|
Encode a list of symbols.
|
|
197
|
-
|
|
197
|
+
|
|
198
198
|
Parameters
|
|
199
199
|
----------
|
|
200
200
|
symbols : array-like
|
|
201
201
|
The symbols to encode.
|
|
202
202
|
dtype : dtype, optional
|
|
203
203
|
The dtype of the output ndarray. (Default: `int64`)
|
|
204
|
-
|
|
204
|
+
|
|
205
205
|
Returns
|
|
206
206
|
-------
|
|
207
207
|
code : ndarray
|
|
208
208
|
The sequence code.
|
|
209
209
|
"""
|
|
210
210
|
return np.array([self.encode(e) for e in symbols], dtype=dtype)
|
|
211
|
-
|
|
211
|
+
|
|
212
212
|
def decode_multiple(self, code):
|
|
213
213
|
"""
|
|
214
214
|
Decode a sequence code into a list of symbols.
|
|
215
|
-
|
|
215
|
+
|
|
216
216
|
Parameters
|
|
217
217
|
----------
|
|
218
218
|
code : ndarray
|
|
219
219
|
The sequence code to decode.
|
|
220
|
-
|
|
220
|
+
|
|
221
221
|
Returns
|
|
222
222
|
-------
|
|
223
223
|
symbols : list
|
|
224
224
|
The decoded list of symbols.
|
|
225
225
|
"""
|
|
226
226
|
return [self.decode(c) for c in code]
|
|
227
|
-
|
|
227
|
+
|
|
228
228
|
def is_letter_alphabet(self):
|
|
229
229
|
"""
|
|
230
230
|
Check whether the symbols in this alphabet are single printable
|
|
@@ -246,22 +246,22 @@ class Alphabet(object):
|
|
|
246
246
|
if symbol not in LetterAlphabet.PRINATBLES:
|
|
247
247
|
return False
|
|
248
248
|
return True
|
|
249
|
-
|
|
249
|
+
|
|
250
250
|
def __str__(self):
|
|
251
251
|
return str(self.get_symbols())
|
|
252
|
-
|
|
252
|
+
|
|
253
253
|
def __len__(self):
|
|
254
254
|
return len(self.get_symbols())
|
|
255
|
-
|
|
255
|
+
|
|
256
256
|
def __iter__(self):
|
|
257
257
|
return self.get_symbols().__iter__()
|
|
258
|
-
|
|
258
|
+
|
|
259
259
|
def __contains__(self, symbol):
|
|
260
260
|
return symbol in self.get_symbols()
|
|
261
|
-
|
|
261
|
+
|
|
262
262
|
def __hash__(self):
|
|
263
263
|
return hash(tuple(self._symbols))
|
|
264
|
-
|
|
264
|
+
|
|
265
265
|
def __eq__(self, item):
|
|
266
266
|
if item is self:
|
|
267
267
|
return True
|
|
@@ -291,7 +291,7 @@ class LetterAlphabet(Alphabet):
|
|
|
291
291
|
corresponding code for a symbol, is the index of that symbol
|
|
292
292
|
in this list.
|
|
293
293
|
"""
|
|
294
|
-
|
|
294
|
+
|
|
295
295
|
PRINATBLES = (string.digits + string.ascii_letters + string.punctuation) \
|
|
296
296
|
.encode("ASCII")
|
|
297
297
|
|
|
@@ -319,7 +319,7 @@ class LetterAlphabet(Alphabet):
|
|
|
319
319
|
def __repr__(self):
|
|
320
320
|
"""Represent LetterAlphabet as a string for debugging."""
|
|
321
321
|
return f'LetterAlphabet({self.get_symbols()})'
|
|
322
|
-
|
|
322
|
+
|
|
323
323
|
def extends(self, alphabet):
|
|
324
324
|
if alphabet is self:
|
|
325
325
|
return True
|
|
@@ -335,7 +335,7 @@ class LetterAlphabet(Alphabet):
|
|
|
335
335
|
def get_symbols(self):
|
|
336
336
|
"""
|
|
337
337
|
Get the symbols in the alphabet.
|
|
338
|
-
|
|
338
|
+
|
|
339
339
|
Returns
|
|
340
340
|
-------
|
|
341
341
|
symbols : list
|
|
@@ -343,7 +343,7 @@ class LetterAlphabet(Alphabet):
|
|
|
343
343
|
"""
|
|
344
344
|
return [symbol.decode("ASCII") for symbol
|
|
345
345
|
in self._symbols_as_bytes()]
|
|
346
|
-
|
|
346
|
+
|
|
347
347
|
def encode(self, symbol):
|
|
348
348
|
if not isinstance(symbol, (str, bytes)) or len(symbol) > 1:
|
|
349
349
|
raise AlphabetError(f"Symbol '{symbol}' is not a single letter")
|
|
@@ -353,16 +353,16 @@ class LetterAlphabet(Alphabet):
|
|
|
353
353
|
f"Symbol {repr(symbol)} is not in the alphabet"
|
|
354
354
|
)
|
|
355
355
|
return indices[0]
|
|
356
|
-
|
|
356
|
+
|
|
357
357
|
def decode(self, code, as_bytes=False):
|
|
358
358
|
if code < 0 or code >= len(self._symbols):
|
|
359
359
|
raise AlphabetError(f"'{code:d}' is not a valid code")
|
|
360
360
|
return chr(self._symbols[code])
|
|
361
|
-
|
|
361
|
+
|
|
362
362
|
def encode_multiple(self, symbols, dtype=None):
|
|
363
363
|
"""
|
|
364
364
|
Encode multiple symbols.
|
|
365
|
-
|
|
365
|
+
|
|
366
366
|
Parameters
|
|
367
367
|
----------
|
|
368
368
|
symbols : iterable object or str or bytes
|
|
@@ -371,7 +371,7 @@ class LetterAlphabet(Alphabet):
|
|
|
371
371
|
containing the symbols is provided, instead of e.g. a list.
|
|
372
372
|
dtype : dtype, optional
|
|
373
373
|
For compatibility with superclass. The value is ignored
|
|
374
|
-
|
|
374
|
+
|
|
375
375
|
Returns
|
|
376
376
|
-------
|
|
377
377
|
code : ndarray
|
|
@@ -391,11 +391,11 @@ class LetterAlphabet(Alphabet):
|
|
|
391
391
|
dtype=np.ubyte
|
|
392
392
|
)
|
|
393
393
|
return encode_chars(alphabet=self._symbols, symbols=symbols)
|
|
394
|
-
|
|
394
|
+
|
|
395
395
|
def decode_multiple(self, code, as_bytes=False):
|
|
396
396
|
"""
|
|
397
397
|
Decode a sequence code into a list of symbols.
|
|
398
|
-
|
|
398
|
+
|
|
399
399
|
Parameters
|
|
400
400
|
----------
|
|
401
401
|
code : ndarray, dtype=uint8
|
|
@@ -421,20 +421,20 @@ class LetterAlphabet(Alphabet):
|
|
|
421
421
|
if not as_bytes:
|
|
422
422
|
symbols = symbols.astype("U1")
|
|
423
423
|
return symbols
|
|
424
|
-
|
|
424
|
+
|
|
425
425
|
def __contains__(self, symbol):
|
|
426
426
|
if not isinstance(symbol, (str, bytes)):
|
|
427
427
|
return False
|
|
428
428
|
return ord(symbol) in self._symbols
|
|
429
|
-
|
|
429
|
+
|
|
430
430
|
def __len__(self):
|
|
431
431
|
return len(self._symbols)
|
|
432
|
-
|
|
432
|
+
|
|
433
433
|
def _symbols_as_bytes(self):
|
|
434
434
|
"Properly convert from dtype 'np.ubyte' to '|S1'"
|
|
435
435
|
return np.frombuffer(self._symbols, dtype="|S1")
|
|
436
436
|
|
|
437
|
-
|
|
437
|
+
|
|
438
438
|
|
|
439
439
|
class AlphabetMapper(object):
|
|
440
440
|
"""
|
|
@@ -445,7 +445,7 @@ class AlphabetMapper(object):
|
|
|
445
445
|
alphabet so that the symbol itself is preserved.
|
|
446
446
|
This class works for single symbol codes or an entire sequence code
|
|
447
447
|
likewise.
|
|
448
|
-
|
|
448
|
+
|
|
449
449
|
Parameters
|
|
450
450
|
----------
|
|
451
451
|
source_alphabet, target_alphabet : Alphabet
|
|
@@ -454,7 +454,7 @@ class AlphabetMapper(object):
|
|
|
454
454
|
The target alphabet must contain at least all symbols of the
|
|
455
455
|
source alphabet, but it is not required that the shared symbols
|
|
456
456
|
are in the same order.
|
|
457
|
-
|
|
457
|
+
|
|
458
458
|
Examples
|
|
459
459
|
--------
|
|
460
460
|
|
|
@@ -470,16 +470,16 @@ class AlphabetMapper(object):
|
|
|
470
470
|
>>> in_sequence = GeneralSequence(source_alph, "GCCTAT")
|
|
471
471
|
>>> print(in_sequence.code)
|
|
472
472
|
[2 1 1 3 0 3]
|
|
473
|
-
>>> print(in_sequence)
|
|
473
|
+
>>> print("".join(in_sequence.symbols))
|
|
474
474
|
GCCTAT
|
|
475
475
|
>>> out_sequence = GeneralSequence(target_alph)
|
|
476
476
|
>>> out_sequence.code = mapper[in_sequence.code]
|
|
477
477
|
>>> print(out_sequence.code)
|
|
478
478
|
[3 4 4 0 2 0]
|
|
479
|
-
>>> print(out_sequence)
|
|
479
|
+
>>> print("".join(out_sequence.symbols))
|
|
480
480
|
GCCTAT
|
|
481
481
|
"""
|
|
482
|
-
|
|
482
|
+
|
|
483
483
|
def __init__(self, source_alphabet, target_alphabet):
|
|
484
484
|
if target_alphabet.extends(source_alphabet):
|
|
485
485
|
self._necessary_mapping = False
|
|
@@ -493,7 +493,7 @@ class AlphabetMapper(object):
|
|
|
493
493
|
symbol = source_alphabet.decode(old_code)
|
|
494
494
|
new_code = target_alphabet.encode(symbol)
|
|
495
495
|
self._mapper[old_code] = new_code
|
|
496
|
-
|
|
496
|
+
|
|
497
497
|
def __getitem__(self, code):
|
|
498
498
|
if isinstance(code, Integral):
|
|
499
499
|
if self._necessary_mapping:
|
|
@@ -552,7 +552,7 @@ def common_alphabet(alphabets):
|
|
|
552
552
|
-------
|
|
553
553
|
common_alphabet : Alphabet or None
|
|
554
554
|
The alphabet from `alphabets` that extends all alphabets.
|
|
555
|
-
``None`` if no such common alphabet exists.
|
|
555
|
+
``None`` if no such common alphabet exists.
|
|
556
556
|
"""
|
|
557
557
|
common_alphabet = None
|
|
558
558
|
for alphabet in alphabets:
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|