biotite 0.41.2__cp311-cp311-win_amd64.whl → 1.0.1__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (205) hide show
  1. biotite/__init__.py +2 -3
  2. biotite/application/__init__.py +1 -1
  3. biotite/application/application.py +20 -10
  4. biotite/application/autodock/__init__.py +1 -1
  5. biotite/application/autodock/app.py +74 -79
  6. biotite/application/blast/__init__.py +1 -1
  7. biotite/application/blast/alignment.py +19 -10
  8. biotite/application/blast/webapp.py +92 -85
  9. biotite/application/clustalo/__init__.py +1 -1
  10. biotite/application/clustalo/app.py +46 -61
  11. biotite/application/dssp/__init__.py +1 -1
  12. biotite/application/dssp/app.py +8 -11
  13. biotite/application/localapp.py +62 -60
  14. biotite/application/mafft/__init__.py +1 -1
  15. biotite/application/mafft/app.py +16 -22
  16. biotite/application/msaapp.py +78 -89
  17. biotite/application/muscle/__init__.py +1 -1
  18. biotite/application/muscle/app3.py +50 -64
  19. biotite/application/muscle/app5.py +23 -31
  20. biotite/application/sra/__init__.py +1 -1
  21. biotite/application/sra/app.py +64 -68
  22. biotite/application/tantan/__init__.py +1 -1
  23. biotite/application/tantan/app.py +22 -45
  24. biotite/application/util.py +7 -9
  25. biotite/application/viennarna/rnaalifold.py +34 -28
  26. biotite/application/viennarna/rnafold.py +24 -39
  27. biotite/application/viennarna/rnaplot.py +36 -21
  28. biotite/application/viennarna/util.py +17 -12
  29. biotite/application/webapp.py +13 -14
  30. biotite/copyable.py +13 -13
  31. biotite/database/__init__.py +1 -1
  32. biotite/database/entrez/__init__.py +1 -1
  33. biotite/database/entrez/check.py +2 -3
  34. biotite/database/entrez/dbnames.py +7 -5
  35. biotite/database/entrez/download.py +55 -49
  36. biotite/database/entrez/key.py +1 -1
  37. biotite/database/entrez/query.py +62 -23
  38. biotite/database/error.py +2 -1
  39. biotite/database/pubchem/__init__.py +1 -1
  40. biotite/database/pubchem/download.py +43 -45
  41. biotite/database/pubchem/error.py +2 -2
  42. biotite/database/pubchem/query.py +34 -31
  43. biotite/database/pubchem/throttle.py +3 -4
  44. biotite/database/rcsb/__init__.py +1 -1
  45. biotite/database/rcsb/download.py +44 -52
  46. biotite/database/rcsb/query.py +85 -80
  47. biotite/database/uniprot/check.py +6 -3
  48. biotite/database/uniprot/download.py +6 -11
  49. biotite/database/uniprot/query.py +115 -31
  50. biotite/file.py +12 -31
  51. biotite/sequence/__init__.py +3 -3
  52. biotite/sequence/align/__init__.py +2 -2
  53. biotite/sequence/align/alignment.py +99 -90
  54. biotite/sequence/align/banded.cp311-win_amd64.pyd +0 -0
  55. biotite/sequence/align/buckets.py +12 -10
  56. biotite/sequence/align/cigar.py +43 -52
  57. biotite/sequence/align/kmeralphabet.cp311-win_amd64.pyd +0 -0
  58. biotite/sequence/align/kmeralphabet.pyx +55 -51
  59. biotite/sequence/align/kmersimilarity.cp311-win_amd64.pyd +0 -0
  60. biotite/sequence/align/kmertable.cp311-win_amd64.pyd +0 -0
  61. biotite/sequence/align/kmertable.pyx +3 -2
  62. biotite/sequence/align/localgapped.cp311-win_amd64.pyd +0 -0
  63. biotite/sequence/align/localungapped.cp311-win_amd64.pyd +0 -0
  64. biotite/sequence/align/matrix.py +81 -82
  65. biotite/sequence/align/multiple.cp311-win_amd64.pyd +0 -0
  66. biotite/sequence/align/multiple.pyx +1 -1
  67. biotite/sequence/align/pairwise.cp311-win_amd64.pyd +0 -0
  68. biotite/sequence/align/permutation.cp311-win_amd64.pyd +0 -0
  69. biotite/sequence/align/permutation.pyx +12 -4
  70. biotite/sequence/align/selector.cp311-win_amd64.pyd +0 -0
  71. biotite/sequence/align/selector.pyx +52 -54
  72. biotite/sequence/align/statistics.py +32 -33
  73. biotite/sequence/align/tracetable.cp311-win_amd64.pyd +0 -0
  74. biotite/sequence/alphabet.py +51 -65
  75. biotite/sequence/annotation.py +78 -77
  76. biotite/sequence/codec.cp311-win_amd64.pyd +0 -0
  77. biotite/sequence/codon.py +90 -79
  78. biotite/sequence/graphics/__init__.py +1 -1
  79. biotite/sequence/graphics/alignment.py +184 -103
  80. biotite/sequence/graphics/colorschemes.py +10 -12
  81. biotite/sequence/graphics/dendrogram.py +79 -34
  82. biotite/sequence/graphics/features.py +133 -99
  83. biotite/sequence/graphics/logo.py +22 -28
  84. biotite/sequence/graphics/plasmid.py +229 -178
  85. biotite/sequence/io/fasta/__init__.py +1 -1
  86. biotite/sequence/io/fasta/convert.py +44 -33
  87. biotite/sequence/io/fasta/file.py +42 -55
  88. biotite/sequence/io/fastq/__init__.py +1 -1
  89. biotite/sequence/io/fastq/convert.py +11 -14
  90. biotite/sequence/io/fastq/file.py +68 -112
  91. biotite/sequence/io/genbank/__init__.py +2 -2
  92. biotite/sequence/io/genbank/annotation.py +12 -20
  93. biotite/sequence/io/genbank/file.py +74 -76
  94. biotite/sequence/io/genbank/metadata.py +74 -62
  95. biotite/sequence/io/genbank/sequence.py +13 -14
  96. biotite/sequence/io/general.py +39 -30
  97. biotite/sequence/io/gff/__init__.py +2 -2
  98. biotite/sequence/io/gff/convert.py +10 -15
  99. biotite/sequence/io/gff/file.py +81 -65
  100. biotite/sequence/phylo/__init__.py +1 -1
  101. biotite/sequence/phylo/nj.cp311-win_amd64.pyd +0 -0
  102. biotite/sequence/phylo/tree.cp311-win_amd64.pyd +0 -0
  103. biotite/sequence/phylo/upgma.cp311-win_amd64.pyd +0 -0
  104. biotite/sequence/profile.py +57 -28
  105. biotite/sequence/search.py +17 -15
  106. biotite/sequence/seqtypes.py +200 -164
  107. biotite/sequence/sequence.py +15 -17
  108. biotite/structure/__init__.py +3 -3
  109. biotite/structure/atoms.py +246 -236
  110. biotite/structure/basepairs.py +260 -271
  111. biotite/structure/bonds.cp311-win_amd64.pyd +0 -0
  112. biotite/structure/bonds.pyx +29 -32
  113. biotite/structure/box.py +67 -71
  114. biotite/structure/celllist.cp311-win_amd64.pyd +0 -0
  115. biotite/structure/chains.py +55 -39
  116. biotite/structure/charges.cp311-win_amd64.pyd +0 -0
  117. biotite/structure/compare.py +32 -32
  118. biotite/structure/density.py +13 -18
  119. biotite/structure/dotbracket.py +20 -22
  120. biotite/structure/error.py +10 -2
  121. biotite/structure/filter.py +83 -78
  122. biotite/structure/geometry.py +130 -119
  123. biotite/structure/graphics/atoms.py +60 -43
  124. biotite/structure/graphics/rna.py +81 -68
  125. biotite/structure/hbond.py +112 -93
  126. biotite/structure/info/__init__.py +0 -2
  127. biotite/structure/info/atoms.py +10 -11
  128. biotite/structure/info/bonds.py +41 -43
  129. biotite/structure/info/ccd.py +4 -5
  130. biotite/structure/info/groups.py +1 -3
  131. biotite/structure/info/masses.py +5 -10
  132. biotite/structure/info/misc.py +1 -1
  133. biotite/structure/info/radii.py +20 -20
  134. biotite/structure/info/standardize.py +15 -26
  135. biotite/structure/integrity.py +18 -71
  136. biotite/structure/io/__init__.py +3 -4
  137. biotite/structure/io/dcd/__init__.py +1 -1
  138. biotite/structure/io/dcd/file.py +22 -20
  139. biotite/structure/io/general.py +47 -61
  140. biotite/structure/io/gro/__init__.py +1 -1
  141. biotite/structure/io/gro/file.py +73 -72
  142. biotite/structure/io/mol/__init__.py +1 -1
  143. biotite/structure/io/mol/convert.py +8 -11
  144. biotite/structure/io/mol/ctab.py +37 -36
  145. biotite/structure/io/mol/header.py +14 -10
  146. biotite/structure/io/mol/mol.py +9 -53
  147. biotite/structure/io/mol/sdf.py +47 -50
  148. biotite/structure/io/netcdf/__init__.py +1 -1
  149. biotite/structure/io/netcdf/file.py +24 -23
  150. biotite/structure/io/pdb/__init__.py +1 -1
  151. biotite/structure/io/pdb/convert.py +32 -20
  152. biotite/structure/io/pdb/file.py +151 -172
  153. biotite/structure/io/pdb/hybrid36.cp311-win_amd64.pyd +0 -0
  154. biotite/structure/io/pdbqt/__init__.py +1 -1
  155. biotite/structure/io/pdbqt/convert.py +17 -11
  156. biotite/structure/io/pdbqt/file.py +128 -80
  157. biotite/structure/io/pdbx/__init__.py +1 -2
  158. biotite/structure/io/pdbx/bcif.py +36 -44
  159. biotite/structure/io/pdbx/cif.py +140 -110
  160. biotite/structure/io/pdbx/component.py +10 -16
  161. biotite/structure/io/pdbx/convert.py +260 -258
  162. biotite/structure/io/pdbx/encoding.cp311-win_amd64.pyd +0 -0
  163. biotite/structure/io/trajfile.py +90 -107
  164. biotite/structure/io/trr/__init__.py +1 -1
  165. biotite/structure/io/trr/file.py +12 -15
  166. biotite/structure/io/xtc/__init__.py +1 -1
  167. biotite/structure/io/xtc/file.py +11 -14
  168. biotite/structure/mechanics.py +9 -11
  169. biotite/structure/molecules.py +3 -4
  170. biotite/structure/pseudoknots.py +53 -67
  171. biotite/structure/rdf.py +23 -21
  172. biotite/structure/repair.py +137 -86
  173. biotite/structure/residues.py +26 -16
  174. biotite/structure/sasa.cp311-win_amd64.pyd +0 -0
  175. biotite/structure/{resutil.py → segments.py} +24 -23
  176. biotite/structure/sequence.py +10 -11
  177. biotite/structure/sse.py +100 -119
  178. biotite/structure/superimpose.py +39 -77
  179. biotite/structure/transform.py +97 -71
  180. biotite/structure/util.py +11 -13
  181. biotite/version.py +2 -2
  182. biotite/visualize.py +69 -55
  183. {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/METADATA +6 -5
  184. biotite-1.0.1.dist-info/RECORD +322 -0
  185. biotite/structure/io/ctab.py +0 -72
  186. biotite/structure/io/mmtf/__init__.py +0 -21
  187. biotite/structure/io/mmtf/assembly.py +0 -214
  188. biotite/structure/io/mmtf/convertarray.cp311-win_amd64.pyd +0 -0
  189. biotite/structure/io/mmtf/convertarray.pyx +0 -341
  190. biotite/structure/io/mmtf/convertfile.cp311-win_amd64.pyd +0 -0
  191. biotite/structure/io/mmtf/convertfile.pyx +0 -501
  192. biotite/structure/io/mmtf/decode.cp311-win_amd64.pyd +0 -0
  193. biotite/structure/io/mmtf/decode.pyx +0 -152
  194. biotite/structure/io/mmtf/encode.cp311-win_amd64.pyd +0 -0
  195. biotite/structure/io/mmtf/encode.pyx +0 -183
  196. biotite/structure/io/mmtf/file.py +0 -233
  197. biotite/structure/io/npz/__init__.py +0 -20
  198. biotite/structure/io/npz/file.py +0 -152
  199. biotite/structure/io/pdbx/legacy.py +0 -267
  200. biotite/structure/io/tng/__init__.py +0 -13
  201. biotite/structure/io/tng/file.py +0 -46
  202. biotite/temp.py +0 -86
  203. biotite-0.41.2.dist-info/RECORD +0 -340
  204. {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/WHEEL +0 -0
  205. {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/licenses/LICENSE.rst +0 -0
@@ -7,13 +7,17 @@ __author__ = "Patrick Kunzmann"
7
7
  __all__ = ["CIFFile", "CIFBlock", "CIFCategory", "CIFColumn", "CIFData"]
8
8
 
9
9
  import itertools
10
- import shlex
10
+ import re
11
11
  from collections.abc import MutableMapping, Sequence
12
12
  import numpy as np
13
- from .component import _Component, MaskValue
14
- from ....file import File, is_open_compatible, is_text, DeserializationError, \
15
- SerializationError
16
-
13
+ from biotite.file import (
14
+ DeserializationError,
15
+ File,
16
+ SerializationError,
17
+ is_open_compatible,
18
+ is_text,
19
+ )
20
+ from biotite.structure.io.pdbx.component import MaskValue, _Component
17
21
 
18
22
  UNICODE_CHAR_SIZE = 4
19
23
 
@@ -133,9 +137,7 @@ class CIFColumn:
133
137
  if not isinstance(data, CIFData):
134
138
  data = CIFData(data, str)
135
139
  if mask is None:
136
- mask = np.full(
137
- len(data), MaskValue.PRESENT, dtype=np.uint8
138
- )
140
+ mask = np.full(len(data), MaskValue.PRESENT, dtype=np.uint8)
139
141
  mask[data.array == "."] = MaskValue.INAPPLICABLE
140
142
  mask[data.array == "?"] = MaskValue.MISSING
141
143
  if np.all(mask == MaskValue.PRESENT):
@@ -148,8 +150,7 @@ class CIFColumn:
148
150
  mask = CIFData(mask, np.uint8)
149
151
  if len(mask) != len(data):
150
152
  raise IndexError(
151
- f"Data has length {len(data)}, "
152
- f"but mask has length {len(mask)}"
153
+ f"Data has length {len(data)}, " f"but mask has length {len(mask)}"
153
154
  )
154
155
  self._data = data
155
156
  self._mask = mask
@@ -222,9 +223,7 @@ class CIFColumn:
222
223
  elif np.issubdtype(dtype, np.str_):
223
224
  # Limit float precision to 3 decimals
224
225
  if np.issubdtype(self._data.array.dtype, np.floating):
225
- array = np.array(
226
- [f"{e:.3f}" for e in self._data.array], type=dtype
227
- )
226
+ array = np.array([f"{e:.3f}" for e in self._data.array], type=dtype)
228
227
  else:
229
228
  # Copy, as otherwise original data would be overwritten
230
229
  # with mask values
@@ -247,9 +246,7 @@ class CIFColumn:
247
246
  array = np.full(len(self._data), masked_value, dtype=dtype)
248
247
 
249
248
  present_mask = self._mask.array == MaskValue.PRESENT
250
- array[present_mask] = (
251
- self._data.array[present_mask].astype(dtype)
252
- )
249
+ array[present_mask] = self._data.array[present_mask].astype(dtype)
253
250
  return array
254
251
 
255
252
  def __len__(self):
@@ -361,9 +358,7 @@ class CIFCategory(_Component, MutableMapping):
361
358
 
362
359
  @staticmethod
363
360
  def deserialize(text, expect_whitespace=True):
364
- lines = [
365
- line.strip() for line in text.splitlines() if not _is_empty(line)
366
- ]
361
+ lines = [line.strip() for line in text.splitlines() if not _is_empty(line)]
367
362
 
368
363
  if _is_loop_start(lines[0]):
369
364
  is_looped = True
@@ -373,15 +368,11 @@ class CIFCategory(_Component, MutableMapping):
373
368
 
374
369
  category_name = _parse_category_name(lines[0])
375
370
  if category_name is None:
376
- raise DeserializationError(
377
- "Failed to parse category name"
378
- )
371
+ raise DeserializationError("Failed to parse category name")
379
372
 
380
- lines = _to_single(lines, is_looped)
373
+ lines = _to_single(lines)
381
374
  if is_looped:
382
- category_dict = CIFCategory._deserialize_looped(
383
- lines, expect_whitespace
384
- )
375
+ category_dict = CIFCategory._deserialize_looped(lines, expect_whitespace)
385
376
  else:
386
377
  category_dict = CIFCategory._deserialize_single(lines)
387
378
  return CIFCategory(category_dict, category_name)
@@ -448,11 +439,28 @@ class CIFCategory(_Component, MutableMapping):
448
439
  Process a category where each field has a single value.
449
440
  """
450
441
  category_dict = {}
451
- for line in lines:
452
- parts = shlex.split(line)
453
- column_name = parts[0].split(".")[1]
454
- column = parts[1]
455
- category_dict[column_name] = CIFColumn(column)
442
+ line_i = 0
443
+ while line_i < len(lines):
444
+ line = lines[line_i]
445
+ parts = _split_one_line(line)
446
+ if len(parts) == 2:
447
+ # Standard case -> name and value in one line
448
+ name_part, value_part = parts
449
+ line_i += 1
450
+ elif len(parts) == 1:
451
+ # Value is a multiline value on the next line
452
+ name_part = parts[0]
453
+ parts = _split_one_line(lines[line_i + 1])
454
+ if len(parts) == 1:
455
+ value_part = parts[0]
456
+ else:
457
+ raise DeserializationError(f"Failed to parse line '{line}'")
458
+ line_i += 2
459
+ elif len(parts) == 0:
460
+ raise DeserializationError("Empty line within category")
461
+ else:
462
+ raise DeserializationError(f"Failed to parse line '{line}'")
463
+ category_dict[name_part.split(".")[1]] = CIFColumn(value_part)
456
464
  return category_dict
457
465
 
458
466
  @staticmethod
@@ -477,15 +485,14 @@ class CIFCategory(_Component, MutableMapping):
477
485
  data_lines = lines[i:]
478
486
  # Rows may be split over multiple lines -> do not rely on
479
487
  # row-line-alignment at all and simply cycle through columns
480
- column_names = itertools.cycle(column_names)
488
+ column_indices = itertools.cycle(range(len(column_names)))
481
489
  for data_line in data_lines:
482
490
  # If whitespace is expected in quote protected values,
483
- # use standard shlex split
491
+ # use regex-based _split_one_line() to split
484
492
  # Otherwise use much more faster whitespace split
485
- # and quote removal if applicable,
486
- # bypassing the slow shlex module
493
+ # and quote removal if applicable.
487
494
  if expect_whitespace:
488
- values = shlex.split(data_line)
495
+ values = _split_one_line(data_line)
489
496
  else:
490
497
  values = data_line.split()
491
498
  for k in range(len(values)):
@@ -495,9 +502,18 @@ class CIFCategory(_Component, MutableMapping):
495
502
  ):
496
503
  values[k] = values[k][1:-1]
497
504
  for val in values:
498
- column_name = next(column_names)
505
+ column_index = next(column_indices)
506
+ column_name = column_names[column_index]
499
507
  category_dict[column_name].append(val)
500
508
 
509
+ # Check if all columns have the same length
510
+ # Otherwise, this would indicate a parsing error or an invalid CIF file
511
+ column_index = next(column_indices)
512
+ if column_index != 0:
513
+ raise DeserializationError(
514
+ "Category contains columns with different lengths"
515
+ )
516
+
501
517
  return category_dict
502
518
 
503
519
  def _serialize_single(self):
@@ -506,39 +522,35 @@ class CIFCategory(_Component, MutableMapping):
506
522
  # "+3" Because of three whitespace chars after longest key
507
523
  req_len = max_len + 3
508
524
  return [
509
- key.ljust(req_len) + _multiline(_quote(column.as_item()))
525
+ # Remove potential terminal newlines from multiline values
526
+ (key.ljust(req_len) + _escape(column.as_item())).strip()
510
527
  for key, column in zip(keys, self.values())
511
528
  ]
512
529
 
513
530
  def _serialize_looped(self):
514
- key_lines = [
515
- "_" + self._name + "." + key + " "
516
- for key in self.keys()
517
- ]
531
+ key_lines = ["_" + self._name + "." + key + " " for key in self.keys()]
518
532
 
519
533
  column_arrays = []
520
534
  for column in self.values():
521
535
  array = column.as_array(str)
522
536
  # Quote before measuring the number of chars,
523
537
  # as the quote characters modify the length
524
- array = np.array(
525
- [_multiline(_quote(element)) for element in array]
526
- )
538
+ array = np.array([_escape(element) for element in array])
527
539
  column_arrays.append(array)
528
540
 
529
541
  # Number of characters the longest string in the column needs
530
542
  # This can be deduced from the dtype
531
543
  # The "+1" is for the small whitespace column
532
544
  column_n_chars = [
533
- array.dtype.itemsize // UNICODE_CHAR_SIZE + 1
534
- for array in column_arrays
545
+ array.dtype.itemsize // UNICODE_CHAR_SIZE + 1 for array in column_arrays
535
546
  ]
536
547
  value_lines = [""] * self._row_count
537
548
  for i in range(self._row_count):
538
549
  for j, array in enumerate(column_arrays):
539
550
  value_lines[i] += array[i].ljust(column_n_chars[j])
540
551
  # Remove trailing justification of last column
541
- value_lines[i].rstrip()
552
+ # and potential terminal newlines from multiline values
553
+ value_lines[i] = value_lines[i].strip()
542
554
 
543
555
  return ["loop_"] + key_lines + value_lines
544
556
 
@@ -615,15 +627,11 @@ class CIFBlock(_Component, MutableMapping):
615
627
  if is_loop_in_line:
616
628
  # In case of lines with "loop_" the category is
617
629
  # in the next line
618
- category_name_in_line = _parse_category_name(
619
- lines[i + 1]
620
- )
630
+ category_name_in_line = _parse_category_name(lines[i + 1])
621
631
  current_category_name = category_name_in_line
622
632
  category_starts.append(i)
623
633
  category_names.append(current_category_name)
624
- return CIFBlock(_create_element_dict(
625
- lines, category_names, category_starts
626
- ))
634
+ return CIFBlock(_create_element_dict(lines, category_names, category_starts))
627
635
 
628
636
  def serialize(self):
629
637
  text_blocks = []
@@ -635,7 +643,7 @@ class CIFBlock(_Component, MutableMapping):
635
643
  try:
636
644
  category.name = category_name
637
645
  text_blocks.append(category.serialize())
638
- except:
646
+ except Exception:
639
647
  raise SerializationError(
640
648
  f"Failed to serialize category '{category_name}'"
641
649
  )
@@ -652,16 +660,14 @@ class CIFBlock(_Component, MutableMapping):
652
660
  # Special optimization for "atom_site":
653
661
  # Even if the values are quote protected,
654
662
  # no whitespace is expected in escaped values
655
- # Therefore slow shlex.split() call is not necessary
663
+ # Therefore slow regex-based _split_one_line() call is not necessary
656
664
  if key == "atom_site":
657
665
  expect_whitespace = False
658
666
  else:
659
667
  expect_whitespace = True
660
668
  category = CIFCategory.deserialize(category, expect_whitespace)
661
- except:
662
- raise DeserializationError(
663
- f"Failed to deserialize category '{key}'"
664
- )
669
+ except Exception:
670
+ raise DeserializationError(f"Failed to deserialize category '{key}'")
665
671
  # Update with deserialized object
666
672
  self._categories[key] = category
667
673
  return category
@@ -809,7 +815,7 @@ class CIFFile(_Component, File, MutableMapping):
809
815
  else:
810
816
  try:
811
817
  text_blocks.append(block.serialize())
812
- except:
818
+ except Exception:
813
819
  raise SerializationError(
814
820
  f"Failed to serialize block '{block_name}'"
815
821
  )
@@ -869,19 +875,15 @@ class CIFFile(_Component, File, MutableMapping):
869
875
  # -> must be deserialized first
870
876
  try:
871
877
  block = CIFBlock.deserialize(block)
872
- except:
873
- raise DeserializationError(
874
- f"Failed to deserialize block '{key}'"
875
- )
878
+ except Exception:
879
+ raise DeserializationError(f"Failed to deserialize block '{key}'")
876
880
  # Update with deserialized object
877
881
  self._blocks[key] = block
878
882
  return block
879
883
 
880
884
  def __setitem__(self, key, block):
881
885
  if not isinstance(block, CIFBlock):
882
- raise TypeError(
883
- f"Expected 'CIFBlock', but got '{type(block).__name__}'"
884
- )
886
+ raise TypeError(f"Expected 'CIFBlock', but got '{type(block).__name__}'")
885
887
  self._blocks[key] = block
886
888
 
887
889
  def __delitem__(self, key):
@@ -919,7 +921,7 @@ def _create_element_dict(lines, element_names, element_starts):
919
921
  # Lazy deserialization
920
922
  # -> keep as text for now and deserialize later if needed
921
923
  return {
922
- element_name: "\n".join(lines[element_starts[i] : element_starts[i+1]])
924
+ element_name: "\n".join(lines[element_starts[i] : element_starts[i + 1]])
923
925
  for i, element_name in enumerate(element_names)
924
926
  }
925
927
 
@@ -953,52 +955,50 @@ def _is_loop_start(line):
953
955
  return line.startswith("loop_")
954
956
 
955
957
 
956
- def _to_single(lines, is_looped):
957
- """
958
+ def _to_single(lines):
959
+ r"""
958
960
  Convert multiline values into singleline values
959
961
  (in terms of 'lines' list elements).
960
- Linebreaks are preserved.
962
+ Linebreaks are preserved as ``'\n'`` characters within a list element.
963
+ The initial ``';'`` character is also preserved, while the final ``';'`` character
964
+ is removed.
961
965
  """
962
- processed_lines = [None] * len(lines)
963
- in_i = 0
964
- out_i = 0
965
- while in_i < len(lines):
966
- if lines[in_i][0] == ";":
967
- # Multiline value
968
- multi_line_str = lines[in_i][1:]
969
- j = in_i + 1
970
- while lines[j] != ";":
971
- # Preserve linebreaks
972
- multi_line_str += "\n" + lines[j]
973
- j += 1
974
- if is_looped:
975
- # Create a line for the multiline string only
976
- processed_lines[out_i] = shlex.quote(multi_line_str)
977
- out_i += 1
966
+ processed_lines = []
967
+ in_multi_line = False
968
+ mutli_line_value = []
969
+ for line in lines:
970
+ # Multiline value are enclosed by ';' at the start of the beginning and end line
971
+ if line[0] == ";":
972
+ if not in_multi_line:
973
+ # Start of multiline value
974
+ in_multi_line = True
975
+ mutli_line_value.append(line)
978
976
  else:
979
- # Append multiline string to previous line
980
- processed_lines[out_i - 1] += " " + shlex.quote(multi_line_str)
981
- in_i = j + 1
982
-
983
- elif not is_looped and lines[in_i][0] != "_":
984
- # Singleline value in the line after the corresponding key
985
- processed_lines[out_i - 1] += " " + lines[in_i]
986
- in_i += 1
987
-
977
+ # End of multiline value
978
+ in_multi_line = False
979
+ # The current line contains only the end character ';'
980
+ # Hence this line is not added to the processed lines
981
+ processed_lines.append("\n".join(mutli_line_value))
982
+ mutli_line_value = []
988
983
  else:
989
- # Normal singleline value in the same row as the key
990
- processed_lines[out_i] = lines[in_i]
991
- in_i += 1
992
- out_i += 1
993
-
994
- return [line for line in processed_lines if line is not None]
984
+ if in_multi_line:
985
+ mutli_line_value.append(line)
986
+ else:
987
+ processed_lines.append(line)
988
+ return processed_lines
995
989
 
996
990
 
997
- def _quote(value):
991
+ def _escape(value):
998
992
  """
999
- A less secure but much quicker version of ``shlex.quote()``.
993
+ Escape special characters in a value to make it compatible with CIF.
1000
994
  """
1001
- if len(value) == 0:
995
+ if "\n" in value:
996
+ # A value with linebreaks must be represented as multiline value
997
+ return _multiline(value)
998
+ elif "'" in value and '"' in value:
999
+ # If both quote types are present, you cannot use them for escaping
1000
+ return _multiline(value)
1001
+ elif len(value) == 0:
1002
1002
  return "''"
1003
1003
  elif value[0] == "_":
1004
1004
  return "'" + value + "'"
@@ -1016,12 +1016,42 @@ def _quote(value):
1016
1016
 
1017
1017
  def _multiline(value):
1018
1018
  """
1019
- Convert a string containing linebreaks into CIF-compatible
1019
+ Convert a string that may contain linebreaks into CIF-compatible
1020
1020
  multiline string.
1021
1021
  """
1022
- if "\n" in value:
1023
- return "\n;" + value + "\n;\n"
1024
- return value
1022
+ return "\n;" + value + "\n;\n"
1023
+
1024
+
1025
+ def _split_one_line(line):
1026
+ """
1027
+ Split a line into its fields.
1028
+ Supporting embedded quotes (' or "), like `'a dog's life'` to `a dog's life`
1029
+ """
1030
+ # Special case of multiline value, where the line starts with ';'
1031
+ if line[0] == ";":
1032
+ return [line[1:]]
1033
+
1034
+ # Define the patterns for different types of fields
1035
+ single_quote_pattern = r"('(?:'(?! )|[^'])*')(?:\s|$)"
1036
+ double_quote_pattern = r'("(?:"(?! )|[^"])*")(?:\s|$)'
1037
+ unquoted_pattern = r"([^\s]+)"
1038
+
1039
+ # Combine the patterns using alternation
1040
+ combined_pattern = (
1041
+ f"{single_quote_pattern}|{double_quote_pattern}|{unquoted_pattern}"
1042
+ )
1043
+
1044
+ # Find all matches
1045
+ matches = re.findall(combined_pattern, line)
1046
+
1047
+ # Extract non-empty groups from the matches
1048
+ fields = []
1049
+ for match in matches:
1050
+ field = next(group for group in match if group)
1051
+ if field[0] == field[-1] == "'" or field[0] == field[-1] == '"':
1052
+ field = field[1:-1]
1053
+ fields.append(field)
1054
+ return fields
1025
1055
 
1026
1056
 
1027
1057
  def _arrayfy(data):
@@ -11,10 +11,10 @@ __name__ = "biotite.structure.io.pdbx"
11
11
  __author__ = "Patrick Kunzmann"
12
12
  __all__ = ["MaskValue"]
13
13
 
14
- from enum import IntEnum
15
14
  from abc import ABCMeta, abstractmethod
16
15
  from collections.abc import MutableMapping
17
- from ....file import SerializationError, DeserializationError
16
+ from enum import IntEnum
17
+ from biotite.file import DeserializationError, SerializationError
18
18
 
19
19
 
20
20
  class MaskValue(IntEnum):
@@ -29,6 +29,7 @@ class MaskValue(IntEnum):
29
29
  - `MISSING` : For this row the value is missing or unknown
30
30
  (``?`` in *CIF*).
31
31
  """
32
+
32
33
  PRESENT = 0
33
34
  INAPPLICABLE = 1
34
35
  MISSING = 2
@@ -109,8 +110,7 @@ class _Component(metaclass=ABCMeta):
109
110
  return str(self.serialize())
110
111
 
111
112
 
112
- class _HierarchicalContainer(_Component, MutableMapping,
113
- metaclass=ABCMeta):
113
+ class _HierarchicalContainer(_Component, MutableMapping, metaclass=ABCMeta):
114
114
  """
115
115
  A container for hierarchical data in BinaryCIF files.
116
116
  For example, the file contains multiple blocks, each block contains
@@ -181,10 +181,8 @@ class _HierarchicalContainer(_Component, MutableMapping,
181
181
  if isinstance(element, self.subcomponent_class()):
182
182
  try:
183
183
  serialized_element = element.serialize()
184
- except:
185
- raise SerializationError(
186
- f"Failed to serialize element '{key}'"
187
- )
184
+ except Exception:
185
+ raise SerializationError(f"Failed to serialize element '{key}'")
188
186
  else:
189
187
  # Element is already stored in serialized form
190
188
  serialized_element = element
@@ -200,10 +198,8 @@ class _HierarchicalContainer(_Component, MutableMapping,
200
198
  # -> must be deserialized first
201
199
  try:
202
200
  element = self.subcomponent_class().deserialize(element)
203
- except:
204
- raise DeserializationError(
205
- f"Failed to deserialize element '{key}'"
206
- )
201
+ except Exception:
202
+ raise DeserializationError(f"Failed to deserialize element '{key}'")
207
203
  # Update container with deserialized object
208
204
  self._elements[key] = element
209
205
  return element
@@ -220,10 +216,8 @@ class _HierarchicalContainer(_Component, MutableMapping,
220
216
  else:
221
217
  try:
222
218
  element = self.subcomponent_class().deserialize(element)
223
- except:
224
- raise DeserializationError(
225
- f"Failed to deserialize given value"
226
- )
219
+ except Exception:
220
+ raise DeserializationError("Failed to deserialize given value")
227
221
  self._elements[key] = element
228
222
 
229
223
  def __delitem__(self, key):