biotite 0.41.2__cp312-cp312-win_amd64.whl → 1.0.0__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +2 -3
- biotite/application/__init__.py +1 -1
- biotite/application/application.py +20 -10
- biotite/application/autodock/__init__.py +1 -1
- biotite/application/autodock/app.py +74 -79
- biotite/application/blast/__init__.py +1 -1
- biotite/application/blast/alignment.py +19 -10
- biotite/application/blast/webapp.py +92 -85
- biotite/application/clustalo/__init__.py +1 -1
- biotite/application/clustalo/app.py +46 -61
- biotite/application/dssp/__init__.py +1 -1
- biotite/application/dssp/app.py +8 -11
- biotite/application/localapp.py +62 -60
- biotite/application/mafft/__init__.py +1 -1
- biotite/application/mafft/app.py +16 -22
- biotite/application/msaapp.py +78 -89
- biotite/application/muscle/__init__.py +1 -1
- biotite/application/muscle/app3.py +50 -64
- biotite/application/muscle/app5.py +23 -31
- biotite/application/sra/__init__.py +1 -1
- biotite/application/sra/app.py +64 -68
- biotite/application/tantan/__init__.py +1 -1
- biotite/application/tantan/app.py +22 -45
- biotite/application/util.py +7 -9
- biotite/application/viennarna/rnaalifold.py +34 -28
- biotite/application/viennarna/rnafold.py +24 -39
- biotite/application/viennarna/rnaplot.py +36 -21
- biotite/application/viennarna/util.py +17 -12
- biotite/application/webapp.py +13 -14
- biotite/copyable.py +13 -13
- biotite/database/__init__.py +1 -1
- biotite/database/entrez/__init__.py +1 -1
- biotite/database/entrez/check.py +2 -3
- biotite/database/entrez/dbnames.py +7 -5
- biotite/database/entrez/download.py +55 -49
- biotite/database/entrez/key.py +1 -1
- biotite/database/entrez/query.py +62 -23
- biotite/database/error.py +2 -1
- biotite/database/pubchem/__init__.py +1 -1
- biotite/database/pubchem/download.py +43 -45
- biotite/database/pubchem/error.py +2 -2
- biotite/database/pubchem/query.py +34 -31
- biotite/database/pubchem/throttle.py +3 -4
- biotite/database/rcsb/__init__.py +1 -1
- biotite/database/rcsb/download.py +44 -52
- biotite/database/rcsb/query.py +85 -80
- biotite/database/uniprot/check.py +6 -3
- biotite/database/uniprot/download.py +6 -11
- biotite/database/uniprot/query.py +115 -31
- biotite/file.py +12 -31
- biotite/sequence/__init__.py +3 -3
- biotite/sequence/align/__init__.py +2 -2
- biotite/sequence/align/alignment.py +99 -90
- biotite/sequence/align/banded.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/buckets.py +12 -10
- biotite/sequence/align/cigar.py +43 -52
- biotite/sequence/align/kmeralphabet.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/kmeralphabet.pyx +55 -51
- biotite/sequence/align/kmersimilarity.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.pyx +3 -2
- biotite/sequence/align/localgapped.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/localungapped.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/matrix.py +81 -82
- biotite/sequence/align/multiple.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/multiple.pyx +1 -1
- biotite/sequence/align/pairwise.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/permutation.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/permutation.pyx +12 -4
- biotite/sequence/align/selector.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/selector.pyx +52 -54
- biotite/sequence/align/statistics.py +32 -33
- biotite/sequence/align/tracetable.cp312-win_amd64.pyd +0 -0
- biotite/sequence/alphabet.py +51 -65
- biotite/sequence/annotation.py +78 -77
- biotite/sequence/codec.cp312-win_amd64.pyd +0 -0
- biotite/sequence/codon.py +90 -79
- biotite/sequence/graphics/__init__.py +1 -1
- biotite/sequence/graphics/alignment.py +184 -103
- biotite/sequence/graphics/colorschemes.py +10 -12
- biotite/sequence/graphics/dendrogram.py +79 -34
- biotite/sequence/graphics/features.py +133 -99
- biotite/sequence/graphics/logo.py +22 -28
- biotite/sequence/graphics/plasmid.py +229 -178
- biotite/sequence/io/fasta/__init__.py +1 -1
- biotite/sequence/io/fasta/convert.py +44 -33
- biotite/sequence/io/fasta/file.py +42 -55
- biotite/sequence/io/fastq/__init__.py +1 -1
- biotite/sequence/io/fastq/convert.py +11 -14
- biotite/sequence/io/fastq/file.py +68 -112
- biotite/sequence/io/genbank/__init__.py +2 -2
- biotite/sequence/io/genbank/annotation.py +12 -20
- biotite/sequence/io/genbank/file.py +74 -76
- biotite/sequence/io/genbank/metadata.py +74 -62
- biotite/sequence/io/genbank/sequence.py +13 -14
- biotite/sequence/io/general.py +39 -30
- biotite/sequence/io/gff/__init__.py +2 -2
- biotite/sequence/io/gff/convert.py +10 -15
- biotite/sequence/io/gff/file.py +81 -65
- biotite/sequence/phylo/__init__.py +1 -1
- biotite/sequence/phylo/nj.cp312-win_amd64.pyd +0 -0
- biotite/sequence/phylo/tree.cp312-win_amd64.pyd +0 -0
- biotite/sequence/phylo/upgma.cp312-win_amd64.pyd +0 -0
- biotite/sequence/profile.py +57 -28
- biotite/sequence/search.py +17 -15
- biotite/sequence/seqtypes.py +200 -164
- biotite/sequence/sequence.py +15 -17
- biotite/structure/__init__.py +3 -3
- biotite/structure/atoms.py +221 -235
- biotite/structure/basepairs.py +260 -271
- biotite/structure/bonds.cp312-win_amd64.pyd +0 -0
- biotite/structure/bonds.pyx +29 -32
- biotite/structure/box.py +67 -71
- biotite/structure/celllist.cp312-win_amd64.pyd +0 -0
- biotite/structure/chains.py +55 -39
- biotite/structure/charges.cp312-win_amd64.pyd +0 -0
- biotite/structure/compare.py +32 -32
- biotite/structure/density.py +13 -18
- biotite/structure/dotbracket.py +20 -22
- biotite/structure/error.py +10 -2
- biotite/structure/filter.py +82 -77
- biotite/structure/geometry.py +130 -119
- biotite/structure/graphics/atoms.py +60 -43
- biotite/structure/graphics/rna.py +81 -68
- biotite/structure/hbond.py +112 -93
- biotite/structure/info/__init__.py +0 -2
- biotite/structure/info/atoms.py +10 -11
- biotite/structure/info/bonds.py +41 -43
- biotite/structure/info/ccd.py +4 -5
- biotite/structure/info/groups.py +1 -3
- biotite/structure/info/masses.py +5 -10
- biotite/structure/info/misc.py +1 -1
- biotite/structure/info/radii.py +20 -20
- biotite/structure/info/standardize.py +15 -26
- biotite/structure/integrity.py +18 -71
- biotite/structure/io/__init__.py +3 -4
- biotite/structure/io/dcd/__init__.py +1 -1
- biotite/structure/io/dcd/file.py +22 -20
- biotite/structure/io/general.py +47 -61
- biotite/structure/io/gro/__init__.py +1 -1
- biotite/structure/io/gro/file.py +73 -72
- biotite/structure/io/mol/__init__.py +1 -1
- biotite/structure/io/mol/convert.py +8 -11
- biotite/structure/io/mol/ctab.py +37 -36
- biotite/structure/io/mol/header.py +14 -10
- biotite/structure/io/mol/mol.py +9 -53
- biotite/structure/io/mol/sdf.py +47 -50
- biotite/structure/io/netcdf/__init__.py +1 -1
- biotite/structure/io/netcdf/file.py +24 -23
- biotite/structure/io/pdb/__init__.py +1 -1
- biotite/structure/io/pdb/convert.py +32 -20
- biotite/structure/io/pdb/file.py +151 -172
- biotite/structure/io/pdb/hybrid36.cp312-win_amd64.pyd +0 -0
- biotite/structure/io/pdbqt/__init__.py +1 -1
- biotite/structure/io/pdbqt/convert.py +17 -11
- biotite/structure/io/pdbqt/file.py +128 -80
- biotite/structure/io/pdbx/__init__.py +1 -2
- biotite/structure/io/pdbx/bcif.py +36 -44
- biotite/structure/io/pdbx/cif.py +64 -62
- biotite/structure/io/pdbx/component.py +10 -16
- biotite/structure/io/pdbx/convert.py +235 -246
- biotite/structure/io/pdbx/encoding.cp312-win_amd64.pyd +0 -0
- biotite/structure/io/trajfile.py +76 -93
- biotite/structure/io/trr/__init__.py +1 -1
- biotite/structure/io/trr/file.py +12 -15
- biotite/structure/io/xtc/__init__.py +1 -1
- biotite/structure/io/xtc/file.py +11 -14
- biotite/structure/mechanics.py +9 -11
- biotite/structure/molecules.py +3 -4
- biotite/structure/pseudoknots.py +53 -67
- biotite/structure/rdf.py +23 -21
- biotite/structure/repair.py +137 -86
- biotite/structure/residues.py +26 -16
- biotite/structure/sasa.cp312-win_amd64.pyd +0 -0
- biotite/structure/{resutil.py → segments.py} +24 -23
- biotite/structure/sequence.py +10 -11
- biotite/structure/sse.py +100 -119
- biotite/structure/superimpose.py +39 -77
- biotite/structure/transform.py +97 -71
- biotite/structure/util.py +11 -13
- biotite/version.py +2 -2
- biotite/visualize.py +69 -55
- {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/METADATA +5 -5
- biotite-1.0.0.dist-info/RECORD +322 -0
- biotite/structure/io/ctab.py +0 -72
- biotite/structure/io/mmtf/__init__.py +0 -21
- biotite/structure/io/mmtf/assembly.py +0 -214
- biotite/structure/io/mmtf/convertarray.cp312-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/convertarray.pyx +0 -341
- biotite/structure/io/mmtf/convertfile.cp312-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/convertfile.pyx +0 -501
- biotite/structure/io/mmtf/decode.cp312-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/decode.pyx +0 -152
- biotite/structure/io/mmtf/encode.cp312-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/encode.pyx +0 -183
- biotite/structure/io/mmtf/file.py +0 -233
- biotite/structure/io/npz/__init__.py +0 -20
- biotite/structure/io/npz/file.py +0 -152
- biotite/structure/io/pdbx/legacy.py +0 -267
- biotite/structure/io/tng/__init__.py +0 -13
- biotite/structure/io/tng/file.py +0 -46
- biotite/temp.py +0 -86
- biotite-0.41.2.dist-info/RECORD +0 -340
- {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/WHEEL +0 -0
- {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/licenses/LICENSE.rst +0 -0
biotite/structure/io/pdbx/cif.py
CHANGED
|
@@ -7,13 +7,17 @@ __author__ = "Patrick Kunzmann"
|
|
|
7
7
|
__all__ = ["CIFFile", "CIFBlock", "CIFCategory", "CIFColumn", "CIFData"]
|
|
8
8
|
|
|
9
9
|
import itertools
|
|
10
|
-
import
|
|
10
|
+
import re
|
|
11
11
|
from collections.abc import MutableMapping, Sequence
|
|
12
12
|
import numpy as np
|
|
13
|
-
from .
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
13
|
+
from biotite.file import (
|
|
14
|
+
DeserializationError,
|
|
15
|
+
File,
|
|
16
|
+
SerializationError,
|
|
17
|
+
is_open_compatible,
|
|
18
|
+
is_text,
|
|
19
|
+
)
|
|
20
|
+
from biotite.structure.io.pdbx.component import MaskValue, _Component
|
|
17
21
|
|
|
18
22
|
UNICODE_CHAR_SIZE = 4
|
|
19
23
|
|
|
@@ -133,9 +137,7 @@ class CIFColumn:
|
|
|
133
137
|
if not isinstance(data, CIFData):
|
|
134
138
|
data = CIFData(data, str)
|
|
135
139
|
if mask is None:
|
|
136
|
-
mask = np.full(
|
|
137
|
-
len(data), MaskValue.PRESENT, dtype=np.uint8
|
|
138
|
-
)
|
|
140
|
+
mask = np.full(len(data), MaskValue.PRESENT, dtype=np.uint8)
|
|
139
141
|
mask[data.array == "."] = MaskValue.INAPPLICABLE
|
|
140
142
|
mask[data.array == "?"] = MaskValue.MISSING
|
|
141
143
|
if np.all(mask == MaskValue.PRESENT):
|
|
@@ -148,8 +150,7 @@ class CIFColumn:
|
|
|
148
150
|
mask = CIFData(mask, np.uint8)
|
|
149
151
|
if len(mask) != len(data):
|
|
150
152
|
raise IndexError(
|
|
151
|
-
f"Data has length {len(data)}, "
|
|
152
|
-
f"but mask has length {len(mask)}"
|
|
153
|
+
f"Data has length {len(data)}, " f"but mask has length {len(mask)}"
|
|
153
154
|
)
|
|
154
155
|
self._data = data
|
|
155
156
|
self._mask = mask
|
|
@@ -222,9 +223,7 @@ class CIFColumn:
|
|
|
222
223
|
elif np.issubdtype(dtype, np.str_):
|
|
223
224
|
# Limit float precision to 3 decimals
|
|
224
225
|
if np.issubdtype(self._data.array.dtype, np.floating):
|
|
225
|
-
array = np.array(
|
|
226
|
-
[f"{e:.3f}" for e in self._data.array], type=dtype
|
|
227
|
-
)
|
|
226
|
+
array = np.array([f"{e:.3f}" for e in self._data.array], type=dtype)
|
|
228
227
|
else:
|
|
229
228
|
# Copy, as otherwise original data would be overwritten
|
|
230
229
|
# with mask values
|
|
@@ -247,9 +246,7 @@ class CIFColumn:
|
|
|
247
246
|
array = np.full(len(self._data), masked_value, dtype=dtype)
|
|
248
247
|
|
|
249
248
|
present_mask = self._mask.array == MaskValue.PRESENT
|
|
250
|
-
array[present_mask] = (
|
|
251
|
-
self._data.array[present_mask].astype(dtype)
|
|
252
|
-
)
|
|
249
|
+
array[present_mask] = self._data.array[present_mask].astype(dtype)
|
|
253
250
|
return array
|
|
254
251
|
|
|
255
252
|
def __len__(self):
|
|
@@ -361,9 +358,7 @@ class CIFCategory(_Component, MutableMapping):
|
|
|
361
358
|
|
|
362
359
|
@staticmethod
|
|
363
360
|
def deserialize(text, expect_whitespace=True):
|
|
364
|
-
lines = [
|
|
365
|
-
line.strip() for line in text.splitlines() if not _is_empty(line)
|
|
366
|
-
]
|
|
361
|
+
lines = [line.strip() for line in text.splitlines() if not _is_empty(line)]
|
|
367
362
|
|
|
368
363
|
if _is_loop_start(lines[0]):
|
|
369
364
|
is_looped = True
|
|
@@ -373,15 +368,11 @@ class CIFCategory(_Component, MutableMapping):
|
|
|
373
368
|
|
|
374
369
|
category_name = _parse_category_name(lines[0])
|
|
375
370
|
if category_name is None:
|
|
376
|
-
raise DeserializationError(
|
|
377
|
-
"Failed to parse category name"
|
|
378
|
-
)
|
|
371
|
+
raise DeserializationError("Failed to parse category name")
|
|
379
372
|
|
|
380
373
|
lines = _to_single(lines, is_looped)
|
|
381
374
|
if is_looped:
|
|
382
|
-
category_dict = CIFCategory._deserialize_looped(
|
|
383
|
-
lines, expect_whitespace
|
|
384
|
-
)
|
|
375
|
+
category_dict = CIFCategory._deserialize_looped(lines, expect_whitespace)
|
|
385
376
|
else:
|
|
386
377
|
category_dict = CIFCategory._deserialize_single(lines)
|
|
387
378
|
return CIFCategory(category_dict, category_name)
|
|
@@ -449,7 +440,7 @@ class CIFCategory(_Component, MutableMapping):
|
|
|
449
440
|
"""
|
|
450
441
|
category_dict = {}
|
|
451
442
|
for line in lines:
|
|
452
|
-
parts =
|
|
443
|
+
parts = _split_one_line(line)
|
|
453
444
|
column_name = parts[0].split(".")[1]
|
|
454
445
|
column = parts[1]
|
|
455
446
|
category_dict[column_name] = CIFColumn(column)
|
|
@@ -480,12 +471,11 @@ class CIFCategory(_Component, MutableMapping):
|
|
|
480
471
|
column_names = itertools.cycle(column_names)
|
|
481
472
|
for data_line in data_lines:
|
|
482
473
|
# If whitespace is expected in quote protected values,
|
|
483
|
-
# use
|
|
474
|
+
# use regex-based _split_one_line() to split
|
|
484
475
|
# Otherwise use much more faster whitespace split
|
|
485
|
-
# and quote removal if applicable
|
|
486
|
-
# bypassing the slow shlex module
|
|
476
|
+
# and quote removal if applicable.
|
|
487
477
|
if expect_whitespace:
|
|
488
|
-
values =
|
|
478
|
+
values = _split_one_line(data_line)
|
|
489
479
|
else:
|
|
490
480
|
values = data_line.split()
|
|
491
481
|
for k in range(len(values)):
|
|
@@ -511,27 +501,21 @@ class CIFCategory(_Component, MutableMapping):
|
|
|
511
501
|
]
|
|
512
502
|
|
|
513
503
|
def _serialize_looped(self):
|
|
514
|
-
key_lines = [
|
|
515
|
-
"_" + self._name + "." + key + " "
|
|
516
|
-
for key in self.keys()
|
|
517
|
-
]
|
|
504
|
+
key_lines = ["_" + self._name + "." + key + " " for key in self.keys()]
|
|
518
505
|
|
|
519
506
|
column_arrays = []
|
|
520
507
|
for column in self.values():
|
|
521
508
|
array = column.as_array(str)
|
|
522
509
|
# Quote before measuring the number of chars,
|
|
523
510
|
# as the quote characters modify the length
|
|
524
|
-
array = np.array(
|
|
525
|
-
[_multiline(_quote(element)) for element in array]
|
|
526
|
-
)
|
|
511
|
+
array = np.array([_multiline(_quote(element)) for element in array])
|
|
527
512
|
column_arrays.append(array)
|
|
528
513
|
|
|
529
514
|
# Number of characters the longest string in the column needs
|
|
530
515
|
# This can be deduced from the dtype
|
|
531
516
|
# The "+1" is for the small whitespace column
|
|
532
517
|
column_n_chars = [
|
|
533
|
-
array.dtype.itemsize // UNICODE_CHAR_SIZE + 1
|
|
534
|
-
for array in column_arrays
|
|
518
|
+
array.dtype.itemsize // UNICODE_CHAR_SIZE + 1 for array in column_arrays
|
|
535
519
|
]
|
|
536
520
|
value_lines = [""] * self._row_count
|
|
537
521
|
for i in range(self._row_count):
|
|
@@ -615,15 +599,11 @@ class CIFBlock(_Component, MutableMapping):
|
|
|
615
599
|
if is_loop_in_line:
|
|
616
600
|
# In case of lines with "loop_" the category is
|
|
617
601
|
# in the next line
|
|
618
|
-
category_name_in_line = _parse_category_name(
|
|
619
|
-
lines[i + 1]
|
|
620
|
-
)
|
|
602
|
+
category_name_in_line = _parse_category_name(lines[i + 1])
|
|
621
603
|
current_category_name = category_name_in_line
|
|
622
604
|
category_starts.append(i)
|
|
623
605
|
category_names.append(current_category_name)
|
|
624
|
-
return CIFBlock(_create_element_dict(
|
|
625
|
-
lines, category_names, category_starts
|
|
626
|
-
))
|
|
606
|
+
return CIFBlock(_create_element_dict(lines, category_names, category_starts))
|
|
627
607
|
|
|
628
608
|
def serialize(self):
|
|
629
609
|
text_blocks = []
|
|
@@ -635,7 +615,7 @@ class CIFBlock(_Component, MutableMapping):
|
|
|
635
615
|
try:
|
|
636
616
|
category.name = category_name
|
|
637
617
|
text_blocks.append(category.serialize())
|
|
638
|
-
except:
|
|
618
|
+
except Exception:
|
|
639
619
|
raise SerializationError(
|
|
640
620
|
f"Failed to serialize category '{category_name}'"
|
|
641
621
|
)
|
|
@@ -652,16 +632,14 @@ class CIFBlock(_Component, MutableMapping):
|
|
|
652
632
|
# Special optimization for "atom_site":
|
|
653
633
|
# Even if the values are quote protected,
|
|
654
634
|
# no whitespace is expected in escaped values
|
|
655
|
-
# Therefore slow
|
|
635
|
+
# Therefore slow regex-based _split_one_line() call is not necessary
|
|
656
636
|
if key == "atom_site":
|
|
657
637
|
expect_whitespace = False
|
|
658
638
|
else:
|
|
659
639
|
expect_whitespace = True
|
|
660
640
|
category = CIFCategory.deserialize(category, expect_whitespace)
|
|
661
|
-
except:
|
|
662
|
-
raise DeserializationError(
|
|
663
|
-
f"Failed to deserialize category '{key}'"
|
|
664
|
-
)
|
|
641
|
+
except Exception:
|
|
642
|
+
raise DeserializationError(f"Failed to deserialize category '{key}'")
|
|
665
643
|
# Update with deserialized object
|
|
666
644
|
self._categories[key] = category
|
|
667
645
|
return category
|
|
@@ -809,7 +787,7 @@ class CIFFile(_Component, File, MutableMapping):
|
|
|
809
787
|
else:
|
|
810
788
|
try:
|
|
811
789
|
text_blocks.append(block.serialize())
|
|
812
|
-
except:
|
|
790
|
+
except Exception:
|
|
813
791
|
raise SerializationError(
|
|
814
792
|
f"Failed to serialize block '{block_name}'"
|
|
815
793
|
)
|
|
@@ -869,19 +847,15 @@ class CIFFile(_Component, File, MutableMapping):
|
|
|
869
847
|
# -> must be deserialized first
|
|
870
848
|
try:
|
|
871
849
|
block = CIFBlock.deserialize(block)
|
|
872
|
-
except:
|
|
873
|
-
raise DeserializationError(
|
|
874
|
-
f"Failed to deserialize block '{key}'"
|
|
875
|
-
)
|
|
850
|
+
except Exception:
|
|
851
|
+
raise DeserializationError(f"Failed to deserialize block '{key}'")
|
|
876
852
|
# Update with deserialized object
|
|
877
853
|
self._blocks[key] = block
|
|
878
854
|
return block
|
|
879
855
|
|
|
880
856
|
def __setitem__(self, key, block):
|
|
881
857
|
if not isinstance(block, CIFBlock):
|
|
882
|
-
raise TypeError(
|
|
883
|
-
f"Expected 'CIFBlock', but got '{type(block).__name__}'"
|
|
884
|
-
)
|
|
858
|
+
raise TypeError(f"Expected 'CIFBlock', but got '{type(block).__name__}'")
|
|
885
859
|
self._blocks[key] = block
|
|
886
860
|
|
|
887
861
|
def __delitem__(self, key):
|
|
@@ -919,7 +893,7 @@ def _create_element_dict(lines, element_names, element_starts):
|
|
|
919
893
|
# Lazy deserialization
|
|
920
894
|
# -> keep as text for now and deserialize later if needed
|
|
921
895
|
return {
|
|
922
|
-
element_name: "\n".join(lines[element_starts[i] : element_starts[i+1]])
|
|
896
|
+
element_name: "\n".join(lines[element_starts[i] : element_starts[i + 1]])
|
|
923
897
|
for i, element_name in enumerate(element_names)
|
|
924
898
|
}
|
|
925
899
|
|
|
@@ -973,11 +947,11 @@ def _to_single(lines, is_looped):
|
|
|
973
947
|
j += 1
|
|
974
948
|
if is_looped:
|
|
975
949
|
# Create a line for the multiline string only
|
|
976
|
-
processed_lines[out_i] =
|
|
950
|
+
processed_lines[out_i] = f"'{multi_line_str}'"
|
|
977
951
|
out_i += 1
|
|
978
952
|
else:
|
|
979
953
|
# Append multiline string to previous line
|
|
980
|
-
processed_lines[out_i - 1] += " " +
|
|
954
|
+
processed_lines[out_i - 1] += " " + f"'{multi_line_str}'"
|
|
981
955
|
in_i = j + 1
|
|
982
956
|
|
|
983
957
|
elif not is_looped and lines[in_i][0] != "_":
|
|
@@ -1024,6 +998,34 @@ def _multiline(value):
|
|
|
1024
998
|
return value
|
|
1025
999
|
|
|
1026
1000
|
|
|
1001
|
+
def _split_one_line(line):
|
|
1002
|
+
"""
|
|
1003
|
+
Split a line into its fields.
|
|
1004
|
+
Supporting embedded quotes (' or "), like `'a dog's life'` to `a dog's life`
|
|
1005
|
+
"""
|
|
1006
|
+
# Define the patterns for different types of fields
|
|
1007
|
+
single_quote_pattern = r"('(?:'(?! )|[^'])*')(?:\s|$)"
|
|
1008
|
+
double_quote_pattern = r'("(?:"(?! )|[^"])*")(?:\s|$)'
|
|
1009
|
+
unquoted_pattern = r"([^\s]+)"
|
|
1010
|
+
|
|
1011
|
+
# Combine the patterns using alternation
|
|
1012
|
+
combined_pattern = (
|
|
1013
|
+
f"{single_quote_pattern}|{double_quote_pattern}|{unquoted_pattern}"
|
|
1014
|
+
)
|
|
1015
|
+
|
|
1016
|
+
# Find all matches
|
|
1017
|
+
matches = re.findall(combined_pattern, line)
|
|
1018
|
+
|
|
1019
|
+
# Extract non-empty groups from the matches
|
|
1020
|
+
fields = []
|
|
1021
|
+
for match in matches:
|
|
1022
|
+
field = next(group for group in match if group)
|
|
1023
|
+
if field[0] == field[-1] == "'" or field[0] == field[-1] == '"':
|
|
1024
|
+
field = field[1:-1]
|
|
1025
|
+
fields.append(field)
|
|
1026
|
+
return fields
|
|
1027
|
+
|
|
1028
|
+
|
|
1027
1029
|
def _arrayfy(data):
|
|
1028
1030
|
if not isinstance(data, (Sequence, np.ndarray)) or isinstance(data, str):
|
|
1029
1031
|
data = [data]
|
|
@@ -11,10 +11,10 @@ __name__ = "biotite.structure.io.pdbx"
|
|
|
11
11
|
__author__ = "Patrick Kunzmann"
|
|
12
12
|
__all__ = ["MaskValue"]
|
|
13
13
|
|
|
14
|
-
from enum import IntEnum
|
|
15
14
|
from abc import ABCMeta, abstractmethod
|
|
16
15
|
from collections.abc import MutableMapping
|
|
17
|
-
from
|
|
16
|
+
from enum import IntEnum
|
|
17
|
+
from biotite.file import DeserializationError, SerializationError
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
class MaskValue(IntEnum):
|
|
@@ -29,6 +29,7 @@ class MaskValue(IntEnum):
|
|
|
29
29
|
- `MISSING` : For this row the value is missing or unknown
|
|
30
30
|
(``?`` in *CIF*).
|
|
31
31
|
"""
|
|
32
|
+
|
|
32
33
|
PRESENT = 0
|
|
33
34
|
INAPPLICABLE = 1
|
|
34
35
|
MISSING = 2
|
|
@@ -109,8 +110,7 @@ class _Component(metaclass=ABCMeta):
|
|
|
109
110
|
return str(self.serialize())
|
|
110
111
|
|
|
111
112
|
|
|
112
|
-
class _HierarchicalContainer(_Component, MutableMapping,
|
|
113
|
-
metaclass=ABCMeta):
|
|
113
|
+
class _HierarchicalContainer(_Component, MutableMapping, metaclass=ABCMeta):
|
|
114
114
|
"""
|
|
115
115
|
A container for hierarchical data in BinaryCIF files.
|
|
116
116
|
For example, the file contains multiple blocks, each block contains
|
|
@@ -181,10 +181,8 @@ class _HierarchicalContainer(_Component, MutableMapping,
|
|
|
181
181
|
if isinstance(element, self.subcomponent_class()):
|
|
182
182
|
try:
|
|
183
183
|
serialized_element = element.serialize()
|
|
184
|
-
except:
|
|
185
|
-
raise SerializationError(
|
|
186
|
-
f"Failed to serialize element '{key}'"
|
|
187
|
-
)
|
|
184
|
+
except Exception:
|
|
185
|
+
raise SerializationError(f"Failed to serialize element '{key}'")
|
|
188
186
|
else:
|
|
189
187
|
# Element is already stored in serialized form
|
|
190
188
|
serialized_element = element
|
|
@@ -200,10 +198,8 @@ class _HierarchicalContainer(_Component, MutableMapping,
|
|
|
200
198
|
# -> must be deserialized first
|
|
201
199
|
try:
|
|
202
200
|
element = self.subcomponent_class().deserialize(element)
|
|
203
|
-
except:
|
|
204
|
-
raise DeserializationError(
|
|
205
|
-
f"Failed to deserialize element '{key}'"
|
|
206
|
-
)
|
|
201
|
+
except Exception:
|
|
202
|
+
raise DeserializationError(f"Failed to deserialize element '{key}'")
|
|
207
203
|
# Update container with deserialized object
|
|
208
204
|
self._elements[key] = element
|
|
209
205
|
return element
|
|
@@ -220,10 +216,8 @@ class _HierarchicalContainer(_Component, MutableMapping,
|
|
|
220
216
|
else:
|
|
221
217
|
try:
|
|
222
218
|
element = self.subcomponent_class().deserialize(element)
|
|
223
|
-
except:
|
|
224
|
-
raise DeserializationError(
|
|
225
|
-
f"Failed to deserialize given value"
|
|
226
|
-
)
|
|
219
|
+
except Exception:
|
|
220
|
+
raise DeserializationError("Failed to deserialize given value")
|
|
227
221
|
self._elements[key] = element
|
|
228
222
|
|
|
229
223
|
def __delitem__(self, key):
|