biotite 0.41.2__cp311-cp311-macosx_11_0_arm64.whl → 1.0.1__cp311-cp311-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +2 -3
- biotite/application/__init__.py +1 -1
- biotite/application/application.py +20 -10
- biotite/application/autodock/__init__.py +1 -1
- biotite/application/autodock/app.py +74 -79
- biotite/application/blast/__init__.py +1 -1
- biotite/application/blast/alignment.py +19 -10
- biotite/application/blast/webapp.py +92 -85
- biotite/application/clustalo/__init__.py +1 -1
- biotite/application/clustalo/app.py +46 -61
- biotite/application/dssp/__init__.py +1 -1
- biotite/application/dssp/app.py +8 -11
- biotite/application/localapp.py +62 -60
- biotite/application/mafft/__init__.py +1 -1
- biotite/application/mafft/app.py +16 -22
- biotite/application/msaapp.py +78 -89
- biotite/application/muscle/__init__.py +1 -1
- biotite/application/muscle/app3.py +50 -64
- biotite/application/muscle/app5.py +23 -31
- biotite/application/sra/__init__.py +1 -1
- biotite/application/sra/app.py +64 -68
- biotite/application/tantan/__init__.py +1 -1
- biotite/application/tantan/app.py +22 -45
- biotite/application/util.py +7 -9
- biotite/application/viennarna/rnaalifold.py +34 -28
- biotite/application/viennarna/rnafold.py +24 -39
- biotite/application/viennarna/rnaplot.py +36 -21
- biotite/application/viennarna/util.py +17 -12
- biotite/application/webapp.py +13 -14
- biotite/copyable.py +13 -13
- biotite/database/__init__.py +1 -1
- biotite/database/entrez/__init__.py +1 -1
- biotite/database/entrez/check.py +2 -3
- biotite/database/entrez/dbnames.py +7 -5
- biotite/database/entrez/download.py +55 -49
- biotite/database/entrez/key.py +1 -1
- biotite/database/entrez/query.py +62 -23
- biotite/database/error.py +2 -1
- biotite/database/pubchem/__init__.py +1 -1
- biotite/database/pubchem/download.py +43 -45
- biotite/database/pubchem/error.py +2 -2
- biotite/database/pubchem/query.py +34 -31
- biotite/database/pubchem/throttle.py +3 -4
- biotite/database/rcsb/__init__.py +1 -1
- biotite/database/rcsb/download.py +44 -52
- biotite/database/rcsb/query.py +85 -80
- biotite/database/uniprot/check.py +6 -3
- biotite/database/uniprot/download.py +6 -11
- biotite/database/uniprot/query.py +115 -31
- biotite/file.py +12 -31
- biotite/sequence/__init__.py +3 -3
- biotite/sequence/align/__init__.py +2 -2
- biotite/sequence/align/alignment.py +99 -90
- biotite/sequence/align/banded.cpython-311-darwin.so +0 -0
- biotite/sequence/align/buckets.py +12 -10
- biotite/sequence/align/cigar.py +43 -52
- biotite/sequence/align/kmeralphabet.cpython-311-darwin.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +55 -51
- biotite/sequence/align/kmersimilarity.cpython-311-darwin.so +0 -0
- biotite/sequence/align/kmertable.cpython-311-darwin.so +0 -0
- biotite/sequence/align/kmertable.pyx +3 -2
- biotite/sequence/align/localgapped.cpython-311-darwin.so +0 -0
- biotite/sequence/align/localungapped.cpython-311-darwin.so +0 -0
- biotite/sequence/align/matrix.py +81 -82
- biotite/sequence/align/multiple.cpython-311-darwin.so +0 -0
- biotite/sequence/align/multiple.pyx +1 -1
- biotite/sequence/align/pairwise.cpython-311-darwin.so +0 -0
- biotite/sequence/align/permutation.cpython-311-darwin.so +0 -0
- biotite/sequence/align/permutation.pyx +12 -4
- biotite/sequence/align/selector.cpython-311-darwin.so +0 -0
- biotite/sequence/align/selector.pyx +52 -54
- biotite/sequence/align/statistics.py +32 -33
- biotite/sequence/align/tracetable.cpython-311-darwin.so +0 -0
- biotite/sequence/alphabet.py +51 -65
- biotite/sequence/annotation.py +78 -77
- biotite/sequence/codec.cpython-311-darwin.so +0 -0
- biotite/sequence/codon.py +90 -79
- biotite/sequence/graphics/__init__.py +1 -1
- biotite/sequence/graphics/alignment.py +184 -103
- biotite/sequence/graphics/colorschemes.py +10 -12
- biotite/sequence/graphics/dendrogram.py +79 -34
- biotite/sequence/graphics/features.py +133 -99
- biotite/sequence/graphics/logo.py +22 -28
- biotite/sequence/graphics/plasmid.py +229 -178
- biotite/sequence/io/fasta/__init__.py +1 -1
- biotite/sequence/io/fasta/convert.py +44 -33
- biotite/sequence/io/fasta/file.py +42 -55
- biotite/sequence/io/fastq/__init__.py +1 -1
- biotite/sequence/io/fastq/convert.py +11 -14
- biotite/sequence/io/fastq/file.py +68 -112
- biotite/sequence/io/genbank/__init__.py +2 -2
- biotite/sequence/io/genbank/annotation.py +12 -20
- biotite/sequence/io/genbank/file.py +74 -76
- biotite/sequence/io/genbank/metadata.py +74 -62
- biotite/sequence/io/genbank/sequence.py +13 -14
- biotite/sequence/io/general.py +39 -30
- biotite/sequence/io/gff/__init__.py +2 -2
- biotite/sequence/io/gff/convert.py +10 -15
- biotite/sequence/io/gff/file.py +81 -65
- biotite/sequence/phylo/__init__.py +1 -1
- biotite/sequence/phylo/nj.cpython-311-darwin.so +0 -0
- biotite/sequence/phylo/tree.cpython-311-darwin.so +0 -0
- biotite/sequence/phylo/upgma.cpython-311-darwin.so +0 -0
- biotite/sequence/profile.py +57 -28
- biotite/sequence/search.py +17 -15
- biotite/sequence/seqtypes.py +200 -164
- biotite/sequence/sequence.py +15 -17
- biotite/structure/__init__.py +3 -3
- biotite/structure/atoms.py +246 -236
- biotite/structure/basepairs.py +260 -271
- biotite/structure/bonds.cpython-311-darwin.so +0 -0
- biotite/structure/bonds.pyx +29 -32
- biotite/structure/box.py +67 -71
- biotite/structure/celllist.cpython-311-darwin.so +0 -0
- biotite/structure/chains.py +55 -39
- biotite/structure/charges.cpython-311-darwin.so +0 -0
- biotite/structure/compare.py +32 -32
- biotite/structure/density.py +13 -18
- biotite/structure/dotbracket.py +20 -22
- biotite/structure/error.py +10 -2
- biotite/structure/filter.py +83 -78
- biotite/structure/geometry.py +130 -119
- biotite/structure/graphics/atoms.py +60 -43
- biotite/structure/graphics/rna.py +81 -68
- biotite/structure/hbond.py +112 -93
- biotite/structure/info/__init__.py +0 -2
- biotite/structure/info/atoms.py +10 -11
- biotite/structure/info/bonds.py +41 -43
- biotite/structure/info/ccd.py +4 -5
- biotite/structure/info/groups.py +1 -3
- biotite/structure/info/masses.py +5 -10
- biotite/structure/info/misc.py +1 -1
- biotite/structure/info/radii.py +20 -20
- biotite/structure/info/standardize.py +15 -26
- biotite/structure/integrity.py +18 -71
- biotite/structure/io/__init__.py +3 -4
- biotite/structure/io/dcd/__init__.py +1 -1
- biotite/structure/io/dcd/file.py +22 -20
- biotite/structure/io/general.py +47 -61
- biotite/structure/io/gro/__init__.py +1 -1
- biotite/structure/io/gro/file.py +73 -72
- biotite/structure/io/mol/__init__.py +1 -1
- biotite/structure/io/mol/convert.py +8 -11
- biotite/structure/io/mol/ctab.py +37 -36
- biotite/structure/io/mol/header.py +14 -10
- biotite/structure/io/mol/mol.py +9 -53
- biotite/structure/io/mol/sdf.py +47 -50
- biotite/structure/io/netcdf/__init__.py +1 -1
- biotite/structure/io/netcdf/file.py +24 -23
- biotite/structure/io/pdb/__init__.py +1 -1
- biotite/structure/io/pdb/convert.py +32 -20
- biotite/structure/io/pdb/file.py +151 -172
- biotite/structure/io/pdb/hybrid36.cpython-311-darwin.so +0 -0
- biotite/structure/io/pdbqt/__init__.py +1 -1
- biotite/structure/io/pdbqt/convert.py +17 -11
- biotite/structure/io/pdbqt/file.py +128 -80
- biotite/structure/io/pdbx/__init__.py +1 -2
- biotite/structure/io/pdbx/bcif.py +36 -44
- biotite/structure/io/pdbx/cif.py +140 -110
- biotite/structure/io/pdbx/component.py +10 -16
- biotite/structure/io/pdbx/convert.py +260 -258
- biotite/structure/io/pdbx/encoding.cpython-311-darwin.so +0 -0
- biotite/structure/io/trajfile.py +90 -107
- biotite/structure/io/trr/__init__.py +1 -1
- biotite/structure/io/trr/file.py +12 -15
- biotite/structure/io/xtc/__init__.py +1 -1
- biotite/structure/io/xtc/file.py +11 -14
- biotite/structure/mechanics.py +9 -11
- biotite/structure/molecules.py +3 -4
- biotite/structure/pseudoknots.py +53 -67
- biotite/structure/rdf.py +23 -21
- biotite/structure/repair.py +137 -86
- biotite/structure/residues.py +26 -16
- biotite/structure/sasa.cpython-311-darwin.so +0 -0
- biotite/structure/{resutil.py → segments.py} +24 -23
- biotite/structure/sequence.py +10 -11
- biotite/structure/sse.py +100 -119
- biotite/structure/superimpose.py +39 -77
- biotite/structure/transform.py +97 -71
- biotite/structure/util.py +11 -13
- biotite/version.py +2 -2
- biotite/visualize.py +69 -55
- {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/METADATA +6 -5
- biotite-1.0.1.dist-info/RECORD +322 -0
- biotite/structure/io/ctab.py +0 -72
- biotite/structure/io/mmtf/__init__.py +0 -21
- biotite/structure/io/mmtf/assembly.py +0 -214
- biotite/structure/io/mmtf/convertarray.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/convertarray.pyx +0 -341
- biotite/structure/io/mmtf/convertfile.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/convertfile.pyx +0 -501
- biotite/structure/io/mmtf/decode.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/decode.pyx +0 -152
- biotite/structure/io/mmtf/encode.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/encode.pyx +0 -183
- biotite/structure/io/mmtf/file.py +0 -233
- biotite/structure/io/npz/__init__.py +0 -20
- biotite/structure/io/npz/file.py +0 -152
- biotite/structure/io/pdbx/legacy.py +0 -267
- biotite/structure/io/tng/__init__.py +0 -13
- biotite/structure/io/tng/file.py +0 -46
- biotite/temp.py +0 -86
- biotite-0.41.2.dist-info/RECORD +0 -340
- {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/WHEEL +0 -0
- {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/licenses/LICENSE.rst +0 -0
biotite/structure/io/pdbx/cif.py
CHANGED
|
@@ -7,13 +7,17 @@ __author__ = "Patrick Kunzmann"
|
|
|
7
7
|
__all__ = ["CIFFile", "CIFBlock", "CIFCategory", "CIFColumn", "CIFData"]
|
|
8
8
|
|
|
9
9
|
import itertools
|
|
10
|
-
import
|
|
10
|
+
import re
|
|
11
11
|
from collections.abc import MutableMapping, Sequence
|
|
12
12
|
import numpy as np
|
|
13
|
-
from .
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
13
|
+
from biotite.file import (
|
|
14
|
+
DeserializationError,
|
|
15
|
+
File,
|
|
16
|
+
SerializationError,
|
|
17
|
+
is_open_compatible,
|
|
18
|
+
is_text,
|
|
19
|
+
)
|
|
20
|
+
from biotite.structure.io.pdbx.component import MaskValue, _Component
|
|
17
21
|
|
|
18
22
|
UNICODE_CHAR_SIZE = 4
|
|
19
23
|
|
|
@@ -133,9 +137,7 @@ class CIFColumn:
|
|
|
133
137
|
if not isinstance(data, CIFData):
|
|
134
138
|
data = CIFData(data, str)
|
|
135
139
|
if mask is None:
|
|
136
|
-
mask = np.full(
|
|
137
|
-
len(data), MaskValue.PRESENT, dtype=np.uint8
|
|
138
|
-
)
|
|
140
|
+
mask = np.full(len(data), MaskValue.PRESENT, dtype=np.uint8)
|
|
139
141
|
mask[data.array == "."] = MaskValue.INAPPLICABLE
|
|
140
142
|
mask[data.array == "?"] = MaskValue.MISSING
|
|
141
143
|
if np.all(mask == MaskValue.PRESENT):
|
|
@@ -148,8 +150,7 @@ class CIFColumn:
|
|
|
148
150
|
mask = CIFData(mask, np.uint8)
|
|
149
151
|
if len(mask) != len(data):
|
|
150
152
|
raise IndexError(
|
|
151
|
-
f"Data has length {len(data)}, "
|
|
152
|
-
f"but mask has length {len(mask)}"
|
|
153
|
+
f"Data has length {len(data)}, " f"but mask has length {len(mask)}"
|
|
153
154
|
)
|
|
154
155
|
self._data = data
|
|
155
156
|
self._mask = mask
|
|
@@ -222,9 +223,7 @@ class CIFColumn:
|
|
|
222
223
|
elif np.issubdtype(dtype, np.str_):
|
|
223
224
|
# Limit float precision to 3 decimals
|
|
224
225
|
if np.issubdtype(self._data.array.dtype, np.floating):
|
|
225
|
-
array = np.array(
|
|
226
|
-
[f"{e:.3f}" for e in self._data.array], type=dtype
|
|
227
|
-
)
|
|
226
|
+
array = np.array([f"{e:.3f}" for e in self._data.array], type=dtype)
|
|
228
227
|
else:
|
|
229
228
|
# Copy, as otherwise original data would be overwritten
|
|
230
229
|
# with mask values
|
|
@@ -247,9 +246,7 @@ class CIFColumn:
|
|
|
247
246
|
array = np.full(len(self._data), masked_value, dtype=dtype)
|
|
248
247
|
|
|
249
248
|
present_mask = self._mask.array == MaskValue.PRESENT
|
|
250
|
-
array[present_mask] = (
|
|
251
|
-
self._data.array[present_mask].astype(dtype)
|
|
252
|
-
)
|
|
249
|
+
array[present_mask] = self._data.array[present_mask].astype(dtype)
|
|
253
250
|
return array
|
|
254
251
|
|
|
255
252
|
def __len__(self):
|
|
@@ -361,9 +358,7 @@ class CIFCategory(_Component, MutableMapping):
|
|
|
361
358
|
|
|
362
359
|
@staticmethod
|
|
363
360
|
def deserialize(text, expect_whitespace=True):
|
|
364
|
-
lines = [
|
|
365
|
-
line.strip() for line in text.splitlines() if not _is_empty(line)
|
|
366
|
-
]
|
|
361
|
+
lines = [line.strip() for line in text.splitlines() if not _is_empty(line)]
|
|
367
362
|
|
|
368
363
|
if _is_loop_start(lines[0]):
|
|
369
364
|
is_looped = True
|
|
@@ -373,15 +368,11 @@ class CIFCategory(_Component, MutableMapping):
|
|
|
373
368
|
|
|
374
369
|
category_name = _parse_category_name(lines[0])
|
|
375
370
|
if category_name is None:
|
|
376
|
-
raise DeserializationError(
|
|
377
|
-
"Failed to parse category name"
|
|
378
|
-
)
|
|
371
|
+
raise DeserializationError("Failed to parse category name")
|
|
379
372
|
|
|
380
|
-
lines = _to_single(lines
|
|
373
|
+
lines = _to_single(lines)
|
|
381
374
|
if is_looped:
|
|
382
|
-
category_dict = CIFCategory._deserialize_looped(
|
|
383
|
-
lines, expect_whitespace
|
|
384
|
-
)
|
|
375
|
+
category_dict = CIFCategory._deserialize_looped(lines, expect_whitespace)
|
|
385
376
|
else:
|
|
386
377
|
category_dict = CIFCategory._deserialize_single(lines)
|
|
387
378
|
return CIFCategory(category_dict, category_name)
|
|
@@ -448,11 +439,28 @@ class CIFCategory(_Component, MutableMapping):
|
|
|
448
439
|
Process a category where each field has a single value.
|
|
449
440
|
"""
|
|
450
441
|
category_dict = {}
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
442
|
+
line_i = 0
|
|
443
|
+
while line_i < len(lines):
|
|
444
|
+
line = lines[line_i]
|
|
445
|
+
parts = _split_one_line(line)
|
|
446
|
+
if len(parts) == 2:
|
|
447
|
+
# Standard case -> name and value in one line
|
|
448
|
+
name_part, value_part = parts
|
|
449
|
+
line_i += 1
|
|
450
|
+
elif len(parts) == 1:
|
|
451
|
+
# Value is a multiline value on the next line
|
|
452
|
+
name_part = parts[0]
|
|
453
|
+
parts = _split_one_line(lines[line_i + 1])
|
|
454
|
+
if len(parts) == 1:
|
|
455
|
+
value_part = parts[0]
|
|
456
|
+
else:
|
|
457
|
+
raise DeserializationError(f"Failed to parse line '{line}'")
|
|
458
|
+
line_i += 2
|
|
459
|
+
elif len(parts) == 0:
|
|
460
|
+
raise DeserializationError("Empty line within category")
|
|
461
|
+
else:
|
|
462
|
+
raise DeserializationError(f"Failed to parse line '{line}'")
|
|
463
|
+
category_dict[name_part.split(".")[1]] = CIFColumn(value_part)
|
|
456
464
|
return category_dict
|
|
457
465
|
|
|
458
466
|
@staticmethod
|
|
@@ -477,15 +485,14 @@ class CIFCategory(_Component, MutableMapping):
|
|
|
477
485
|
data_lines = lines[i:]
|
|
478
486
|
# Rows may be split over multiple lines -> do not rely on
|
|
479
487
|
# row-line-alignment at all and simply cycle through columns
|
|
480
|
-
|
|
488
|
+
column_indices = itertools.cycle(range(len(column_names)))
|
|
481
489
|
for data_line in data_lines:
|
|
482
490
|
# If whitespace is expected in quote protected values,
|
|
483
|
-
# use
|
|
491
|
+
# use regex-based _split_one_line() to split
|
|
484
492
|
# Otherwise use much more faster whitespace split
|
|
485
|
-
# and quote removal if applicable
|
|
486
|
-
# bypassing the slow shlex module
|
|
493
|
+
# and quote removal if applicable.
|
|
487
494
|
if expect_whitespace:
|
|
488
|
-
values =
|
|
495
|
+
values = _split_one_line(data_line)
|
|
489
496
|
else:
|
|
490
497
|
values = data_line.split()
|
|
491
498
|
for k in range(len(values)):
|
|
@@ -495,9 +502,18 @@ class CIFCategory(_Component, MutableMapping):
|
|
|
495
502
|
):
|
|
496
503
|
values[k] = values[k][1:-1]
|
|
497
504
|
for val in values:
|
|
498
|
-
|
|
505
|
+
column_index = next(column_indices)
|
|
506
|
+
column_name = column_names[column_index]
|
|
499
507
|
category_dict[column_name].append(val)
|
|
500
508
|
|
|
509
|
+
# Check if all columns have the same length
|
|
510
|
+
# Otherwise, this would indicate a parsing error or an invalid CIF file
|
|
511
|
+
column_index = next(column_indices)
|
|
512
|
+
if column_index != 0:
|
|
513
|
+
raise DeserializationError(
|
|
514
|
+
"Category contains columns with different lengths"
|
|
515
|
+
)
|
|
516
|
+
|
|
501
517
|
return category_dict
|
|
502
518
|
|
|
503
519
|
def _serialize_single(self):
|
|
@@ -506,39 +522,35 @@ class CIFCategory(_Component, MutableMapping):
|
|
|
506
522
|
# "+3" Because of three whitespace chars after longest key
|
|
507
523
|
req_len = max_len + 3
|
|
508
524
|
return [
|
|
509
|
-
|
|
525
|
+
# Remove potential terminal newlines from multiline values
|
|
526
|
+
(key.ljust(req_len) + _escape(column.as_item())).strip()
|
|
510
527
|
for key, column in zip(keys, self.values())
|
|
511
528
|
]
|
|
512
529
|
|
|
513
530
|
def _serialize_looped(self):
|
|
514
|
-
key_lines = [
|
|
515
|
-
"_" + self._name + "." + key + " "
|
|
516
|
-
for key in self.keys()
|
|
517
|
-
]
|
|
531
|
+
key_lines = ["_" + self._name + "." + key + " " for key in self.keys()]
|
|
518
532
|
|
|
519
533
|
column_arrays = []
|
|
520
534
|
for column in self.values():
|
|
521
535
|
array = column.as_array(str)
|
|
522
536
|
# Quote before measuring the number of chars,
|
|
523
537
|
# as the quote characters modify the length
|
|
524
|
-
array = np.array(
|
|
525
|
-
[_multiline(_quote(element)) for element in array]
|
|
526
|
-
)
|
|
538
|
+
array = np.array([_escape(element) for element in array])
|
|
527
539
|
column_arrays.append(array)
|
|
528
540
|
|
|
529
541
|
# Number of characters the longest string in the column needs
|
|
530
542
|
# This can be deduced from the dtype
|
|
531
543
|
# The "+1" is for the small whitespace column
|
|
532
544
|
column_n_chars = [
|
|
533
|
-
array.dtype.itemsize // UNICODE_CHAR_SIZE + 1
|
|
534
|
-
for array in column_arrays
|
|
545
|
+
array.dtype.itemsize // UNICODE_CHAR_SIZE + 1 for array in column_arrays
|
|
535
546
|
]
|
|
536
547
|
value_lines = [""] * self._row_count
|
|
537
548
|
for i in range(self._row_count):
|
|
538
549
|
for j, array in enumerate(column_arrays):
|
|
539
550
|
value_lines[i] += array[i].ljust(column_n_chars[j])
|
|
540
551
|
# Remove trailing justification of last column
|
|
541
|
-
|
|
552
|
+
# and potential terminal newlines from multiline values
|
|
553
|
+
value_lines[i] = value_lines[i].strip()
|
|
542
554
|
|
|
543
555
|
return ["loop_"] + key_lines + value_lines
|
|
544
556
|
|
|
@@ -615,15 +627,11 @@ class CIFBlock(_Component, MutableMapping):
|
|
|
615
627
|
if is_loop_in_line:
|
|
616
628
|
# In case of lines with "loop_" the category is
|
|
617
629
|
# in the next line
|
|
618
|
-
category_name_in_line = _parse_category_name(
|
|
619
|
-
lines[i + 1]
|
|
620
|
-
)
|
|
630
|
+
category_name_in_line = _parse_category_name(lines[i + 1])
|
|
621
631
|
current_category_name = category_name_in_line
|
|
622
632
|
category_starts.append(i)
|
|
623
633
|
category_names.append(current_category_name)
|
|
624
|
-
return CIFBlock(_create_element_dict(
|
|
625
|
-
lines, category_names, category_starts
|
|
626
|
-
))
|
|
634
|
+
return CIFBlock(_create_element_dict(lines, category_names, category_starts))
|
|
627
635
|
|
|
628
636
|
def serialize(self):
|
|
629
637
|
text_blocks = []
|
|
@@ -635,7 +643,7 @@ class CIFBlock(_Component, MutableMapping):
|
|
|
635
643
|
try:
|
|
636
644
|
category.name = category_name
|
|
637
645
|
text_blocks.append(category.serialize())
|
|
638
|
-
except:
|
|
646
|
+
except Exception:
|
|
639
647
|
raise SerializationError(
|
|
640
648
|
f"Failed to serialize category '{category_name}'"
|
|
641
649
|
)
|
|
@@ -652,16 +660,14 @@ class CIFBlock(_Component, MutableMapping):
|
|
|
652
660
|
# Special optimization for "atom_site":
|
|
653
661
|
# Even if the values are quote protected,
|
|
654
662
|
# no whitespace is expected in escaped values
|
|
655
|
-
# Therefore slow
|
|
663
|
+
# Therefore slow regex-based _split_one_line() call is not necessary
|
|
656
664
|
if key == "atom_site":
|
|
657
665
|
expect_whitespace = False
|
|
658
666
|
else:
|
|
659
667
|
expect_whitespace = True
|
|
660
668
|
category = CIFCategory.deserialize(category, expect_whitespace)
|
|
661
|
-
except:
|
|
662
|
-
raise DeserializationError(
|
|
663
|
-
f"Failed to deserialize category '{key}'"
|
|
664
|
-
)
|
|
669
|
+
except Exception:
|
|
670
|
+
raise DeserializationError(f"Failed to deserialize category '{key}'")
|
|
665
671
|
# Update with deserialized object
|
|
666
672
|
self._categories[key] = category
|
|
667
673
|
return category
|
|
@@ -809,7 +815,7 @@ class CIFFile(_Component, File, MutableMapping):
|
|
|
809
815
|
else:
|
|
810
816
|
try:
|
|
811
817
|
text_blocks.append(block.serialize())
|
|
812
|
-
except:
|
|
818
|
+
except Exception:
|
|
813
819
|
raise SerializationError(
|
|
814
820
|
f"Failed to serialize block '{block_name}'"
|
|
815
821
|
)
|
|
@@ -869,19 +875,15 @@ class CIFFile(_Component, File, MutableMapping):
|
|
|
869
875
|
# -> must be deserialized first
|
|
870
876
|
try:
|
|
871
877
|
block = CIFBlock.deserialize(block)
|
|
872
|
-
except:
|
|
873
|
-
raise DeserializationError(
|
|
874
|
-
f"Failed to deserialize block '{key}'"
|
|
875
|
-
)
|
|
878
|
+
except Exception:
|
|
879
|
+
raise DeserializationError(f"Failed to deserialize block '{key}'")
|
|
876
880
|
# Update with deserialized object
|
|
877
881
|
self._blocks[key] = block
|
|
878
882
|
return block
|
|
879
883
|
|
|
880
884
|
def __setitem__(self, key, block):
|
|
881
885
|
if not isinstance(block, CIFBlock):
|
|
882
|
-
raise TypeError(
|
|
883
|
-
f"Expected 'CIFBlock', but got '{type(block).__name__}'"
|
|
884
|
-
)
|
|
886
|
+
raise TypeError(f"Expected 'CIFBlock', but got '{type(block).__name__}'")
|
|
885
887
|
self._blocks[key] = block
|
|
886
888
|
|
|
887
889
|
def __delitem__(self, key):
|
|
@@ -919,7 +921,7 @@ def _create_element_dict(lines, element_names, element_starts):
|
|
|
919
921
|
# Lazy deserialization
|
|
920
922
|
# -> keep as text for now and deserialize later if needed
|
|
921
923
|
return {
|
|
922
|
-
element_name: "\n".join(lines[element_starts[i] : element_starts[i+1]])
|
|
924
|
+
element_name: "\n".join(lines[element_starts[i] : element_starts[i + 1]])
|
|
923
925
|
for i, element_name in enumerate(element_names)
|
|
924
926
|
}
|
|
925
927
|
|
|
@@ -953,52 +955,50 @@ def _is_loop_start(line):
|
|
|
953
955
|
return line.startswith("loop_")
|
|
954
956
|
|
|
955
957
|
|
|
956
|
-
def _to_single(lines
|
|
957
|
-
"""
|
|
958
|
+
def _to_single(lines):
|
|
959
|
+
r"""
|
|
958
960
|
Convert multiline values into singleline values
|
|
959
961
|
(in terms of 'lines' list elements).
|
|
960
|
-
Linebreaks are preserved.
|
|
962
|
+
Linebreaks are preserved as ``'\n'`` characters within a list element.
|
|
963
|
+
The initial ``';'`` character is also preserved, while the final ``';'`` character
|
|
964
|
+
is removed.
|
|
961
965
|
"""
|
|
962
|
-
processed_lines = [
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
multi_line_str += "\n" + lines[j]
|
|
973
|
-
j += 1
|
|
974
|
-
if is_looped:
|
|
975
|
-
# Create a line for the multiline string only
|
|
976
|
-
processed_lines[out_i] = shlex.quote(multi_line_str)
|
|
977
|
-
out_i += 1
|
|
966
|
+
processed_lines = []
|
|
967
|
+
in_multi_line = False
|
|
968
|
+
mutli_line_value = []
|
|
969
|
+
for line in lines:
|
|
970
|
+
# Multiline value are enclosed by ';' at the start of the beginning and end line
|
|
971
|
+
if line[0] == ";":
|
|
972
|
+
if not in_multi_line:
|
|
973
|
+
# Start of multiline value
|
|
974
|
+
in_multi_line = True
|
|
975
|
+
mutli_line_value.append(line)
|
|
978
976
|
else:
|
|
979
|
-
#
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
processed_lines[out_i - 1] += " " + lines[in_i]
|
|
986
|
-
in_i += 1
|
|
987
|
-
|
|
977
|
+
# End of multiline value
|
|
978
|
+
in_multi_line = False
|
|
979
|
+
# The current line contains only the end character ';'
|
|
980
|
+
# Hence this line is not added to the processed lines
|
|
981
|
+
processed_lines.append("\n".join(mutli_line_value))
|
|
982
|
+
mutli_line_value = []
|
|
988
983
|
else:
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
return [line for line in processed_lines if line is not None]
|
|
984
|
+
if in_multi_line:
|
|
985
|
+
mutli_line_value.append(line)
|
|
986
|
+
else:
|
|
987
|
+
processed_lines.append(line)
|
|
988
|
+
return processed_lines
|
|
995
989
|
|
|
996
990
|
|
|
997
|
-
def
|
|
991
|
+
def _escape(value):
|
|
998
992
|
"""
|
|
999
|
-
|
|
993
|
+
Escape special characters in a value to make it compatible with CIF.
|
|
1000
994
|
"""
|
|
1001
|
-
if
|
|
995
|
+
if "\n" in value:
|
|
996
|
+
# A value with linebreaks must be represented as multiline value
|
|
997
|
+
return _multiline(value)
|
|
998
|
+
elif "'" in value and '"' in value:
|
|
999
|
+
# If both quote types are present, you cannot use them for escaping
|
|
1000
|
+
return _multiline(value)
|
|
1001
|
+
elif len(value) == 0:
|
|
1002
1002
|
return "''"
|
|
1003
1003
|
elif value[0] == "_":
|
|
1004
1004
|
return "'" + value + "'"
|
|
@@ -1016,12 +1016,42 @@ def _quote(value):
|
|
|
1016
1016
|
|
|
1017
1017
|
def _multiline(value):
|
|
1018
1018
|
"""
|
|
1019
|
-
Convert a string
|
|
1019
|
+
Convert a string that may contain linebreaks into CIF-compatible
|
|
1020
1020
|
multiline string.
|
|
1021
1021
|
"""
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1022
|
+
return "\n;" + value + "\n;\n"
|
|
1023
|
+
|
|
1024
|
+
|
|
1025
|
+
def _split_one_line(line):
|
|
1026
|
+
"""
|
|
1027
|
+
Split a line into its fields.
|
|
1028
|
+
Supporting embedded quotes (' or "), like `'a dog's life'` to `a dog's life`
|
|
1029
|
+
"""
|
|
1030
|
+
# Special case of multiline value, where the line starts with ';'
|
|
1031
|
+
if line[0] == ";":
|
|
1032
|
+
return [line[1:]]
|
|
1033
|
+
|
|
1034
|
+
# Define the patterns for different types of fields
|
|
1035
|
+
single_quote_pattern = r"('(?:'(?! )|[^'])*')(?:\s|$)"
|
|
1036
|
+
double_quote_pattern = r'("(?:"(?! )|[^"])*")(?:\s|$)'
|
|
1037
|
+
unquoted_pattern = r"([^\s]+)"
|
|
1038
|
+
|
|
1039
|
+
# Combine the patterns using alternation
|
|
1040
|
+
combined_pattern = (
|
|
1041
|
+
f"{single_quote_pattern}|{double_quote_pattern}|{unquoted_pattern}"
|
|
1042
|
+
)
|
|
1043
|
+
|
|
1044
|
+
# Find all matches
|
|
1045
|
+
matches = re.findall(combined_pattern, line)
|
|
1046
|
+
|
|
1047
|
+
# Extract non-empty groups from the matches
|
|
1048
|
+
fields = []
|
|
1049
|
+
for match in matches:
|
|
1050
|
+
field = next(group for group in match if group)
|
|
1051
|
+
if field[0] == field[-1] == "'" or field[0] == field[-1] == '"':
|
|
1052
|
+
field = field[1:-1]
|
|
1053
|
+
fields.append(field)
|
|
1054
|
+
return fields
|
|
1025
1055
|
|
|
1026
1056
|
|
|
1027
1057
|
def _arrayfy(data):
|
|
@@ -11,10 +11,10 @@ __name__ = "biotite.structure.io.pdbx"
|
|
|
11
11
|
__author__ = "Patrick Kunzmann"
|
|
12
12
|
__all__ = ["MaskValue"]
|
|
13
13
|
|
|
14
|
-
from enum import IntEnum
|
|
15
14
|
from abc import ABCMeta, abstractmethod
|
|
16
15
|
from collections.abc import MutableMapping
|
|
17
|
-
from
|
|
16
|
+
from enum import IntEnum
|
|
17
|
+
from biotite.file import DeserializationError, SerializationError
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
class MaskValue(IntEnum):
|
|
@@ -29,6 +29,7 @@ class MaskValue(IntEnum):
|
|
|
29
29
|
- `MISSING` : For this row the value is missing or unknown
|
|
30
30
|
(``?`` in *CIF*).
|
|
31
31
|
"""
|
|
32
|
+
|
|
32
33
|
PRESENT = 0
|
|
33
34
|
INAPPLICABLE = 1
|
|
34
35
|
MISSING = 2
|
|
@@ -109,8 +110,7 @@ class _Component(metaclass=ABCMeta):
|
|
|
109
110
|
return str(self.serialize())
|
|
110
111
|
|
|
111
112
|
|
|
112
|
-
class _HierarchicalContainer(_Component, MutableMapping,
|
|
113
|
-
metaclass=ABCMeta):
|
|
113
|
+
class _HierarchicalContainer(_Component, MutableMapping, metaclass=ABCMeta):
|
|
114
114
|
"""
|
|
115
115
|
A container for hierarchical data in BinaryCIF files.
|
|
116
116
|
For example, the file contains multiple blocks, each block contains
|
|
@@ -181,10 +181,8 @@ class _HierarchicalContainer(_Component, MutableMapping,
|
|
|
181
181
|
if isinstance(element, self.subcomponent_class()):
|
|
182
182
|
try:
|
|
183
183
|
serialized_element = element.serialize()
|
|
184
|
-
except:
|
|
185
|
-
raise SerializationError(
|
|
186
|
-
f"Failed to serialize element '{key}'"
|
|
187
|
-
)
|
|
184
|
+
except Exception:
|
|
185
|
+
raise SerializationError(f"Failed to serialize element '{key}'")
|
|
188
186
|
else:
|
|
189
187
|
# Element is already stored in serialized form
|
|
190
188
|
serialized_element = element
|
|
@@ -200,10 +198,8 @@ class _HierarchicalContainer(_Component, MutableMapping,
|
|
|
200
198
|
# -> must be deserialized first
|
|
201
199
|
try:
|
|
202
200
|
element = self.subcomponent_class().deserialize(element)
|
|
203
|
-
except:
|
|
204
|
-
raise DeserializationError(
|
|
205
|
-
f"Failed to deserialize element '{key}'"
|
|
206
|
-
)
|
|
201
|
+
except Exception:
|
|
202
|
+
raise DeserializationError(f"Failed to deserialize element '{key}'")
|
|
207
203
|
# Update container with deserialized object
|
|
208
204
|
self._elements[key] = element
|
|
209
205
|
return element
|
|
@@ -220,10 +216,8 @@ class _HierarchicalContainer(_Component, MutableMapping,
|
|
|
220
216
|
else:
|
|
221
217
|
try:
|
|
222
218
|
element = self.subcomponent_class().deserialize(element)
|
|
223
|
-
except:
|
|
224
|
-
raise DeserializationError(
|
|
225
|
-
f"Failed to deserialize given value"
|
|
226
|
-
)
|
|
219
|
+
except Exception:
|
|
220
|
+
raise DeserializationError("Failed to deserialize given value")
|
|
227
221
|
self._elements[key] = element
|
|
228
222
|
|
|
229
223
|
def __delitem__(self, key):
|