biotite 0.39.0__cp311-cp311-macosx_11_0_arm64.whl → 0.41.0__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (121) hide show
  1. biotite/__init__.py +3 -3
  2. biotite/application/dssp/app.py +18 -18
  3. biotite/database/pubchem/download.py +23 -23
  4. biotite/database/pubchem/query.py +7 -7
  5. biotite/database/rcsb/download.py +19 -14
  6. biotite/file.py +17 -9
  7. biotite/sequence/align/banded.c +258 -237
  8. biotite/sequence/align/banded.cpython-311-darwin.so +0 -0
  9. biotite/sequence/align/cigar.py +60 -15
  10. biotite/sequence/align/kmeralphabet.c +243 -222
  11. biotite/sequence/align/kmeralphabet.cpython-311-darwin.so +0 -0
  12. biotite/sequence/align/kmersimilarity.c +215 -196
  13. biotite/sequence/align/kmersimilarity.cpython-311-darwin.so +0 -0
  14. biotite/sequence/align/kmertable.cpp +233 -205
  15. biotite/sequence/align/kmertable.cpython-311-darwin.so +0 -0
  16. biotite/sequence/align/localgapped.c +258 -237
  17. biotite/sequence/align/localgapped.cpython-311-darwin.so +0 -0
  18. biotite/sequence/align/localungapped.c +235 -214
  19. biotite/sequence/align/localungapped.cpython-311-darwin.so +0 -0
  20. biotite/sequence/align/multiple.c +255 -234
  21. biotite/sequence/align/multiple.cpython-311-darwin.so +0 -0
  22. biotite/sequence/align/pairwise.c +274 -253
  23. biotite/sequence/align/pairwise.cpython-311-darwin.so +0 -0
  24. biotite/sequence/align/permutation.c +215 -196
  25. biotite/sequence/align/permutation.cpython-311-darwin.so +0 -0
  26. biotite/sequence/align/selector.c +217 -197
  27. biotite/sequence/align/selector.cpython-311-darwin.so +0 -0
  28. biotite/sequence/align/tracetable.c +215 -195
  29. biotite/sequence/align/tracetable.cpython-311-darwin.so +0 -0
  30. biotite/sequence/annotation.py +2 -2
  31. biotite/sequence/codec.c +235 -214
  32. biotite/sequence/codec.cpython-311-darwin.so +0 -0
  33. biotite/sequence/io/fasta/convert.py +27 -24
  34. biotite/sequence/phylo/nj.c +215 -196
  35. biotite/sequence/phylo/nj.cpython-311-darwin.so +0 -0
  36. biotite/sequence/phylo/tree.c +227 -202
  37. biotite/sequence/phylo/tree.cpython-311-darwin.so +0 -0
  38. biotite/sequence/phylo/upgma.c +215 -196
  39. biotite/sequence/phylo/upgma.cpython-311-darwin.so +0 -0
  40. biotite/structure/__init__.py +2 -0
  41. biotite/structure/basepairs.py +7 -12
  42. biotite/structure/bonds.c +1437 -1279
  43. biotite/structure/bonds.cpython-311-darwin.so +0 -0
  44. biotite/structure/celllist.c +217 -197
  45. biotite/structure/celllist.cpython-311-darwin.so +0 -0
  46. biotite/structure/charges.c +1052 -1101
  47. biotite/structure/charges.cpython-311-darwin.so +0 -0
  48. biotite/structure/dotbracket.py +2 -0
  49. biotite/structure/filter.py +30 -37
  50. biotite/structure/info/__init__.py +5 -8
  51. biotite/structure/info/atoms.py +31 -68
  52. biotite/structure/info/bonds.py +47 -101
  53. biotite/structure/info/ccd/README.rst +8 -0
  54. biotite/structure/info/ccd/amino_acids.txt +1663 -0
  55. biotite/structure/info/ccd/carbohydrates.txt +1135 -0
  56. biotite/structure/info/ccd/components.bcif +0 -0
  57. biotite/structure/info/ccd/nucleotides.txt +798 -0
  58. biotite/structure/info/ccd.py +95 -0
  59. biotite/structure/info/groups.py +90 -0
  60. biotite/structure/info/masses.py +21 -20
  61. biotite/structure/info/misc.py +78 -25
  62. biotite/structure/info/standardize.py +17 -12
  63. biotite/structure/integrity.py +19 -70
  64. biotite/structure/io/__init__.py +2 -4
  65. biotite/structure/io/ctab.py +12 -106
  66. biotite/structure/io/general.py +167 -181
  67. biotite/structure/io/gro/file.py +16 -16
  68. biotite/structure/io/mmtf/__init__.py +3 -0
  69. biotite/structure/io/mmtf/convertarray.c +219 -198
  70. biotite/structure/io/mmtf/convertarray.cpython-311-darwin.so +0 -0
  71. biotite/structure/io/mmtf/convertfile.c +217 -197
  72. biotite/structure/io/mmtf/convertfile.cpython-311-darwin.so +0 -0
  73. biotite/structure/io/mmtf/decode.c +225 -204
  74. biotite/structure/io/mmtf/decode.cpython-311-darwin.so +0 -0
  75. biotite/structure/io/mmtf/encode.c +215 -196
  76. biotite/structure/io/mmtf/encode.cpython-311-darwin.so +0 -0
  77. biotite/structure/io/mmtf/file.py +34 -26
  78. biotite/structure/io/mol/__init__.py +4 -2
  79. biotite/structure/io/mol/convert.py +71 -7
  80. biotite/structure/io/mol/ctab.py +414 -0
  81. biotite/structure/io/mol/header.py +116 -0
  82. biotite/structure/io/mol/{file.py → mol.py} +69 -82
  83. biotite/structure/io/mol/sdf.py +909 -0
  84. biotite/structure/io/npz/__init__.py +3 -0
  85. biotite/structure/io/npz/file.py +21 -18
  86. biotite/structure/io/pdb/__init__.py +3 -3
  87. biotite/structure/io/pdb/file.py +89 -34
  88. biotite/structure/io/pdb/hybrid36.c +63 -43
  89. biotite/structure/io/pdb/hybrid36.cpython-311-darwin.so +0 -0
  90. biotite/structure/io/pdbqt/file.py +32 -32
  91. biotite/structure/io/pdbx/__init__.py +12 -6
  92. biotite/structure/io/pdbx/bcif.py +648 -0
  93. biotite/structure/io/pdbx/cif.py +1032 -0
  94. biotite/structure/io/pdbx/component.py +246 -0
  95. biotite/structure/io/pdbx/convert.py +858 -386
  96. biotite/structure/io/pdbx/encoding.c +112813 -0
  97. biotite/structure/io/pdbx/encoding.cpython-311-darwin.so +0 -0
  98. biotite/structure/io/pdbx/legacy.py +267 -0
  99. biotite/structure/molecules.py +151 -151
  100. biotite/structure/repair.py +253 -0
  101. biotite/structure/sasa.c +215 -196
  102. biotite/structure/sasa.cpython-311-darwin.so +0 -0
  103. biotite/structure/sequence.py +112 -0
  104. biotite/structure/superimpose.py +618 -116
  105. {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/METADATA +3 -3
  106. {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/RECORD +109 -103
  107. {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/WHEEL +1 -1
  108. biotite/structure/info/amino_acids.json +0 -1556
  109. biotite/structure/info/amino_acids.py +0 -42
  110. biotite/structure/info/carbohydrates.json +0 -1122
  111. biotite/structure/info/carbohydrates.py +0 -39
  112. biotite/structure/info/intra_bonds.msgpack +0 -0
  113. biotite/structure/info/link_types.msgpack +0 -1
  114. biotite/structure/info/nucleotides.json +0 -772
  115. biotite/structure/info/nucleotides.py +0 -39
  116. biotite/structure/info/residue_masses.msgpack +0 -0
  117. biotite/structure/info/residue_names.msgpack +0 -3
  118. biotite/structure/info/residues.msgpack +0 -0
  119. biotite/structure/io/pdbx/file.py +0 -652
  120. {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/LICENSE.rst +0 -0
  121. {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,648 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.structure.io.pdbx"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["BinaryCIFFile", "BinaryCIFBlock", "BinaryCIFCategory",
8
+ "BinaryCIFColumn", "BinaryCIFData"]
9
+
10
+ from collections.abc import Sequence
11
+ import numpy as np
12
+ import msgpack
13
+ from .component import _Component, _HierarchicalContainer, MaskValue
14
+ from .encoding import decode_stepwise, encode_stepwise, deserialize_encoding, \
15
+ create_uncompressed_encoding
16
+ from ....file import File, is_binary, is_open_compatible, SerializationError
17
+
18
+
19
+ class BinaryCIFData(_Component):
20
+ r"""
21
+ This class represents the data in a :class:`BinaryCIFColumn`.
22
+
23
+ Parameters
24
+ ----------
25
+ array : array_like or int or float or str
26
+ The data array to be stored.
27
+ If a single item is given, it is converted into an array.
28
+ encoding : list of Encoding
29
+ The encoding steps that are successively applied to the data.
30
+
31
+ Attributes
32
+ ----------
33
+ array : ndarray
34
+ The stored data array.
35
+ encoding : list of Encoding
36
+ The encoding steps.
37
+
38
+ Examples
39
+ --------
40
+
41
+ >>> data = BinaryCIFData([1, 2, 3])
42
+ >>> print(data.array)
43
+ [1 2 3]
44
+ >>> print(len(data))
45
+ 3
46
+ >>> # A single item is converted into an array
47
+ >>> data = BinaryCIFData("apple")
48
+ >>> print(data.array)
49
+ ['apple']
50
+
51
+ Well-chosen encoding can significantly reduce the serialized data
52
+ size:
53
+
54
+ >>> # Default uncompressed encoding
55
+ >>> array = np.arange(100)
56
+ >>> uncompressed_bytes = BinaryCIFData(array).serialize()["data"]
57
+ >>> print(len(uncompressed_bytes))
58
+ 400
59
+ >>> # Delta encoding followed by run-length encoding
60
+ >>> # [0, 1, 2, ...] -> [0, 1, 1, ...] -> [0, 1, 1, 99]
61
+ >>> compressed_bytes = BinaryCIFData(
62
+ ... array,
63
+ ... encoding = [
64
+ ... # [0, 1, 2, ...] -> [0, 1, 1, ...]
65
+ ... DeltaEncoding(),
66
+ ... # [0, 1, 1, ...] -> [0, 1, 1, 99]
67
+ ... RunLengthEncoding(),
68
+ ... # [0, 1, 1, 99] -> b"\x00\x00..."
69
+ ... ByteArrayEncoding()
70
+ ... ]
71
+ ... ).serialize()["data"]
72
+ >>> print(len(compressed_bytes))
73
+ 16
74
+ """
75
+
76
+ def __init__(self, array, encoding=None):
77
+ if (
78
+ not isinstance(array, (Sequence, np.ndarray))
79
+ or isinstance(array, str)
80
+ ):
81
+ array = [array]
82
+ array = np.asarray(array)
83
+ if np.issubdtype(array.dtype, np.object_):
84
+ raise ValueError("Object arrays are not supported")
85
+
86
+ self._array = array
87
+ if encoding is None:
88
+ self._encoding = create_uncompressed_encoding(array)
89
+ else:
90
+ self._encoding = list(encoding)
91
+
92
+ @property
93
+ def array(self):
94
+ return self._array
95
+
96
+ @property
97
+ def encoding(self):
98
+ return self._encoding
99
+
100
+ @staticmethod
101
+ def subcomponent_class():
102
+ return None
103
+
104
+ @staticmethod
105
+ def supercomponent_class():
106
+ return BinaryCIFColumn
107
+
108
+ @staticmethod
109
+ def deserialize(content):
110
+ encoding = [
111
+ deserialize_encoding(enc) for enc in content["encoding"]
112
+ ]
113
+ return BinaryCIFData(
114
+ decode_stepwise(content["data"], encoding), encoding
115
+ )
116
+
117
+ def serialize(self):
118
+ serialized_data = encode_stepwise(self._array, self._encoding)
119
+ if not isinstance(serialized_data, bytes):
120
+ raise SerializationError(
121
+ "Final encoding must return 'bytes'"
122
+ )
123
+ serialized_encoding = [enc.serialize() for enc in self._encoding]
124
+ return {"data": serialized_data, "encoding": serialized_encoding}
125
+
126
+ def __len__(self):
127
+ return len(self._array)
128
+
129
+ def __eq__(self, other):
130
+ if not isinstance(other, type(self)):
131
+ return False
132
+ if not np.array_equal(self._array, other._array):
133
+ return False
134
+ if self._encoding != other._encoding:
135
+ return False
136
+ return True
137
+
138
+
139
+ class BinaryCIFColumn(_Component):
140
+ """
141
+ This class represents a single column in a :class:`CIFCategory`.
142
+
143
+ Parameters
144
+ ----------
145
+ data : BinaryCIFData or array_like or int or float or str
146
+ The data to be stored.
147
+ If no :class:`BinaryCIFData` is given, the passed argument is
148
+ coerced into such an object.
149
+ mask : BinaryCIFData or array_like, dtype=int or int
150
+ The mask to be stored.
151
+ If given, the mask indicates whether the `data` is
152
+ inapplicable (``.``) or missing (``?``) in some rows.
153
+ The data presence is indicated by values from the
154
+ :class:`MaskValue` enum.
155
+ If no :class:`BinaryCIFData` is given, the passed argument is
156
+ coerced into such an object.
157
+ By default, no mask is created.
158
+
159
+ Attributes
160
+ ----------
161
+ data : BinaryCIFData
162
+ The stored data.
163
+ mask : BinaryCIFData
164
+ The mask that indicates whether certain data elements are
165
+ inapplicable or missing.
166
+ If no mask is present, this attribute is ``None``.
167
+
168
+ Examples
169
+ --------
170
+
171
+ >>> print(BinaryCIFColumn([1, 2, 3]).as_array())
172
+ [1 2 3]
173
+ >>> mask = [MaskValue.PRESENT, MaskValue.INAPPLICABLE, MaskValue.MISSING]
174
+ >>> # Mask values are only inserted into string arrays
175
+ >>> print(BinaryCIFColumn([1, 2, 3], mask).as_array(int))
176
+ [1 2 3]
177
+ >>> print(BinaryCIFColumn([1, 2, 3], mask).as_array(str))
178
+ ['1' '.' '?']
179
+ >>> print(BinaryCIFColumn([1]).as_item())
180
+ 1
181
+ >>> print(BinaryCIFColumn([1], mask=[MaskValue.MISSING]).as_item())
182
+ ?
183
+ """
184
+
185
+ def __init__(self, data, mask=None):
186
+ if not isinstance(data, BinaryCIFData):
187
+ data = BinaryCIFData(data)
188
+ if mask is not None:
189
+ if not isinstance(mask, BinaryCIFData):
190
+ mask = BinaryCIFData(mask)
191
+ if len(data) != len(mask):
192
+ raise IndexError(
193
+ f"Data has length {len(data)}, "
194
+ f"but mask has length {len(mask)}"
195
+ )
196
+ self._data = data
197
+ self._mask = mask
198
+
199
+ @property
200
+ def data(self):
201
+ return self._data
202
+
203
+ @property
204
+ def mask(self):
205
+ return self._mask
206
+
207
+ @staticmethod
208
+ def subcomponent_class():
209
+ return BinaryCIFData
210
+
211
+ @staticmethod
212
+ def supercomponent_class():
213
+ return BinaryCIFCategory
214
+
215
+ def as_item(self):
216
+ """
217
+ Get the only item in the data of this column.
218
+
219
+ If the data is masked as inapplicable or missing, ``'.'`` or
220
+ ``'?'`` is returned, respectively.
221
+ If the data contains more than one item, an exception is raised.
222
+
223
+ Returns
224
+ -------
225
+ item : str or int or float
226
+ The item in the data.
227
+ """
228
+ if self._mask is None:
229
+ return self._data.array.item()
230
+ mask = self._mask.array.item()
231
+ if mask is None or mask == MaskValue.PRESENT:
232
+ return self._data.array.item()
233
+ elif mask == MaskValue.INAPPLICABLE:
234
+ return "."
235
+ elif mask == MaskValue.MISSING:
236
+ return "?"
237
+
238
+ def as_array(self, dtype=None, masked_value=None):
239
+ """
240
+ Get the data of this column as an :class:`ndarray`.
241
+
242
+ This is a shortcut to get ``BinaryCIFColumn.data.array``.
243
+ Furthermore, the mask is applied to the data.
244
+
245
+ Parameters
246
+ ----------
247
+ dtype : dtype-like, optional
248
+ The data type the array should be converted to.
249
+ By default, the original type is used.
250
+ masked_value : str or int or float, optional
251
+ The value that should be used for masked elements, i.e.
252
+ ``MaskValue.INAPPLICABLE`` or ``MaskValue.MISSING``.
253
+ By default, masked elements are converted to ``'.'`` or
254
+ ``'?'`` depending on the :class:`MaskValue`.
255
+ """
256
+ if dtype is None:
257
+ dtype = self._data.array.dtype
258
+
259
+ if self._mask is None:
260
+ return self._data.array.astype(dtype, copy=False)
261
+
262
+ elif np.issubdtype(dtype, np.str_):
263
+ # Copy, as otherwise original data would be overwritten
264
+ # with mask values
265
+ array = self._data.array.astype(dtype, copy=True)
266
+ if masked_value is None:
267
+ array[self._mask.array == MaskValue.INAPPLICABLE] = "."
268
+ array[self._mask.array == MaskValue.MISSING] = "?"
269
+ else:
270
+ array[self._mask.array == MaskValue.INAPPLICABLE] = masked_value
271
+ array[self._mask.array == MaskValue.MISSING] = masked_value
272
+ return array
273
+
274
+ elif np.dtype(dtype).kind == self._data.array.dtype.kind:
275
+ if masked_value is None:
276
+ return self._data.array.astype(dtype, copy=False)
277
+ else:
278
+ array = self._data.array.astype(dtype, copy=True)
279
+ array[self._mask.array == MaskValue.INAPPLICABLE] = masked_value
280
+ array[self._mask.array == MaskValue.MISSING] = masked_value
281
+ return array
282
+
283
+ else:
284
+ # Array needs to be converted, but masked values are
285
+ # not necessarily convertible
286
+ # (e.g. '' cannot be converted to int)
287
+ if masked_value is None:
288
+ array = np.zeros(len(self._data), dtype=dtype)
289
+ else:
290
+ array = np.full(len(self._data), masked_value, dtype=dtype)
291
+
292
+ present_mask = self._mask.array == MaskValue.PRESENT
293
+ array[present_mask] = (
294
+ self._data.array[present_mask].astype(dtype)
295
+ )
296
+ return array
297
+
298
+ @staticmethod
299
+ def deserialize(content):
300
+ return BinaryCIFColumn(
301
+ BinaryCIFData.deserialize(content["data"]),
302
+ BinaryCIFData.deserialize(content["mask"])
303
+ if content["mask"] is not None else None
304
+ )
305
+
306
+ def serialize(self):
307
+ return {
308
+ "data": self._data.serialize(),
309
+ "mask": self._mask.serialize() if self._mask is not None else None
310
+ }
311
+
312
+ def __len__(self):
313
+ return len(self._data)
314
+
315
+ def __eq__(self, other):
316
+ if not isinstance(other, type(self)):
317
+ return False
318
+ if self._data != other._data:
319
+ return False
320
+ if self._mask != other._mask:
321
+ return False
322
+ return True
323
+
324
+
325
+ class BinaryCIFCategory(_HierarchicalContainer):
326
+ """
327
+ This class represents a category in a :class:`BinaryCIFBlock`.
328
+
329
+ Columns can be accessed and modified like a dictionary.
330
+ The values are :class:`BinaryCIFColumn` objects.
331
+
332
+ Parameters
333
+ ----------
334
+ columns : dict, optional
335
+ The columns of the category.
336
+ The keys are the column names and the values are the
337
+ :class:`BinaryCIFColumn` objects (or objects that can be coerced
338
+ into a :class:`BinaryCIFColumn`).
339
+ By default, an empty category is created.
340
+ Each column must have the same length.
341
+
342
+ Attributes
343
+ ----------
344
+ row_count : int
345
+ The number of rows in the category, i.e. the length of each
346
+ column.
347
+
348
+ Examples
349
+ --------
350
+
351
+ >>> # Add column on creation
352
+ >>> category = BinaryCIFCategory({"fruit": ["apple", "banana"]})
353
+ >>> # Add column later on
354
+ >>> category["taste"] = ["delicious", "tasty"]
355
+ >>> # Add column the formal way
356
+ >>> category["color"] = BinaryCIFColumn(BinaryCIFData(["red", "yellow"]))
357
+ >>> # Access a column
358
+ >>> print(category["fruit"].as_array())
359
+ ['apple' 'banana']
360
+ """
361
+
362
+ def __init__(self, columns=None, row_count=None):
363
+ if columns is None:
364
+ columns = {}
365
+ else:
366
+ columns = {
367
+ key: BinaryCIFColumn(col)
368
+ if not isinstance(col, (BinaryCIFColumn, dict))
369
+ else col
370
+ for key, col in columns.items()
371
+ }
372
+
373
+ self._row_count = row_count
374
+ super().__init__(columns)
375
+
376
+ @property
377
+ def row_count(self):
378
+ if self._row_count is None:
379
+ # Row count is not determined yet
380
+ # -> check the length of the first column
381
+ self._row_count = len(next(iter(self.values())))
382
+ return self._row_count
383
+
384
+ @staticmethod
385
+ def subcomponent_class():
386
+ return BinaryCIFColumn
387
+
388
+ @staticmethod
389
+ def supercomponent_class():
390
+ return BinaryCIFBlock
391
+
392
+ def filter(self, index):
393
+ return BinaryCIFCategory(
394
+ {key: column.filter(index) for key, column in self.items()},
395
+ # Create placeholder array just to check how many elements
396
+ # remain after filtering
397
+ len(np.empty(self.row_count, dtype=bool)[index]),
398
+ )
399
+
400
+ @staticmethod
401
+ def deserialize(content):
402
+ return BinaryCIFCategory(
403
+ BinaryCIFCategory._deserialize_elements(
404
+ content["columns"], "name"
405
+ ),
406
+ content["rowCount"]
407
+ )
408
+
409
+ def serialize(self):
410
+ if len(self) == 0:
411
+ raise SerializationError("At least one column is required")
412
+
413
+ for column_name, column in self.items():
414
+ if self._row_count is None:
415
+ self._row_count = len(column)
416
+ elif len(column) != self._row_count:
417
+ raise SerializationError(
418
+ f"All columns must have the same length, "
419
+ f"but '{column_name}' has length {len(column)}, "
420
+ f"while the first column has row_count {self._row_count}"
421
+ )
422
+
423
+ return {
424
+ "rowCount": self.row_count,
425
+ "columns": self._serialize_elements("name"),
426
+ }
427
+
428
+ def __setitem__(self, key, element):
429
+ if not isinstance(element, (BinaryCIFColumn, dict)):
430
+ element = BinaryCIFColumn(element)
431
+ super().__setitem__(key, element)
432
+
433
+
434
+ class BinaryCIFBlock(_HierarchicalContainer):
435
+ """
436
+ This class represents a block in a :class:`BinaryCIFFile`.
437
+
438
+ Categories can be accessed and modified like a dictionary.
439
+ The values are :class:`BinaryCIFCategory` objects.
440
+
441
+ Parameters
442
+ ----------
443
+ categories : dict, optional
444
+ The categories of the block.
445
+ The keys are the category names and the values are the
446
+ :class:`BinaryCIFCategory` objects.
447
+ By default, an empty block is created.
448
+
449
+ Notes
450
+ -----
451
+ The category names do not include the leading underscore character.
452
+ This character is automatically added when the category is
453
+ serialized.
454
+
455
+ Examples
456
+ --------
457
+
458
+ >>> # Add category on creation
459
+ >>> block = BinaryCIFBlock({"foo": BinaryCIFCategory({"some_column": 1})})
460
+ >>> # Add category later on
461
+ >>> block["bar"] = BinaryCIFCategory({"another_column": [2, 3]})
462
+ >>> # Access a column
463
+ >>> print(block["bar"]["another_column"].as_array())
464
+ [2 3]
465
+ """
466
+
467
+ def __init__(self, categories=None):
468
+ super().__init__(categories)
469
+
470
+ @staticmethod
471
+ def subcomponent_class():
472
+ return BinaryCIFCategory
473
+
474
+ @staticmethod
475
+ def supercomponent_class():
476
+ return BinaryCIFFile
477
+
478
+ @staticmethod
479
+ def deserialize(content):
480
+ return BinaryCIFBlock(
481
+ BinaryCIFBlock._deserialize_elements(
482
+ content["categories"], "name"
483
+ )
484
+ )
485
+
486
+ def serialize(self):
487
+ return {"categories": self._serialize_elements("name")}
488
+
489
+ def __getitem__(self, key):
490
+ # Actual bcif files use leading '_' as categories
491
+ return super().__getitem__("_" + key)
492
+
493
+ def __setitem__(self, key, element):
494
+ return super().__setitem__("_" + key, element)
495
+
496
+ def __delitem__(self, key):
497
+ return super().__setitem__("_" + key)
498
+
499
+ def __iter__(self):
500
+ return (key.lstrip("_") for key in super().__iter__())
501
+
502
+
503
+ class BinaryCIFFile(File, _HierarchicalContainer):
504
+ """
505
+ This class represents a *BinaryCIF* file.
506
+
507
+ The categories of the file can be accessed and modified like a
508
+ dictionary.
509
+ The values are :class:`BinaryCIFBlock` objects.
510
+
511
+ To parse or write a structure from/to a :class:`BinaryCIFFile`
512
+ object, use the high-level :func:`get_structure()` or
513
+ :func:`set_structure()` function respectively.
514
+
515
+ Notes
516
+ -----
517
+ The content of *BinaryCIF* files are lazily deserialized:
518
+ Only when a column is accessed, the time consuming data decoding
519
+ is performed.
520
+ The decoded :class:`BinaryCIFBlock`/:class:`BinaryCIFCategory`
521
+ objects are cached for subsequent accesses.
522
+
523
+ Attributes
524
+ ----------
525
+ block : BinaryCIFBlock
526
+ The sole block of the file.
527
+ If the file contains multiple blocks, an exception is raised.
528
+
529
+ Examples
530
+ --------
531
+ Read a *BinaryCIF* file and access its content:
532
+
533
+ >>> import os.path
534
+ >>> file = BinaryCIFFile.read(os.path.join(path_to_structures, "1l2y.bcif"))
535
+ >>> print(file["1L2Y"]["citation_author"]["name"].as_array())
536
+ ['Neidigh, J.W.' 'Fesinmeyer, R.M.' 'Andersen, N.H.']
537
+ >>> # Access the only block in the file
538
+ >>> print(file.block["entity"]["pdbx_description"].as_item())
539
+ TC5b
540
+
541
+ Create a *BinaryCIF* file and write it to disk:
542
+
543
+ >>> category = BinaryCIFCategory({"some_column": "some_value"})
544
+ >>> block = BinaryCIFBlock({"some_category": category})
545
+ >>> file = BinaryCIFFile({"some_block": block})
546
+ >>> file.write(os.path.join(path_to_directory, "some_file.bcif"))
547
+ """
548
+
549
+ def __init__(self, blocks=None):
550
+ File.__init__(self)
551
+ _HierarchicalContainer.__init__(self, blocks)
552
+
553
+ @property
554
+ def block(self):
555
+ if len(self) != 1:
556
+ raise ValueError("There are multiple blocks in the file")
557
+ return self[next(iter(self))]
558
+
559
+ @staticmethod
560
+ def subcomponent_class():
561
+ return BinaryCIFBlock
562
+
563
+ @staticmethod
564
+ def supercomponent_class():
565
+ return None
566
+
567
+ @staticmethod
568
+ def deserialize(content):
569
+ return BinaryCIFFile(
570
+ BinaryCIFFile._deserialize_elements(
571
+ content["dataBlocks"], "header"
572
+ )
573
+ )
574
+
575
+ def serialize(self):
576
+ return {"dataBlocks": self._serialize_elements("header")}
577
+
578
+ @classmethod
579
+ def read(self, file):
580
+ """
581
+ Read a *BinaryCIF* file.
582
+
583
+ Parameters
584
+ ----------
585
+ file : file-like object or str
586
+ The file to be read.
587
+ Alternatively a file path can be supplied.
588
+
589
+ Returns
590
+ -------
591
+ file_object : BinaryCIFFile
592
+ The parsed file.
593
+ """
594
+ # File name
595
+ if is_open_compatible(file):
596
+ with open(file, "rb") as f:
597
+ return BinaryCIFFile.deserialize(
598
+ msgpack.unpackb(
599
+ f.read(), use_list=True, raw=False
600
+ )
601
+ )
602
+ # File object
603
+ else:
604
+ if not is_binary(file):
605
+ raise TypeError("A file opened in 'binary' mode is required")
606
+ return BinaryCIFFile.deserialize(
607
+ msgpack.unpackb(
608
+ file.read(), use_list=True, raw=False
609
+ )
610
+ )
611
+
612
+ def write(self, file):
613
+ """
614
+ Write contents into a *BinaryCIF* file.
615
+
616
+ Parameters
617
+ ----------
618
+ file : file-like object or str
619
+ The file to be written to.
620
+ Alternatively, a file path can be supplied.
621
+ """
622
+ serialized_content = self.serialize()
623
+ serialized_content["encoder"] = "biotite"
624
+ serialized_content["version"] = "0.3.0"
625
+ packed_bytes = msgpack.packb(
626
+ serialized_content, use_bin_type=True, default=_encode_numpy
627
+ )
628
+ if is_open_compatible(file):
629
+ with open(file, "wb") as f:
630
+ f.write(packed_bytes)
631
+ else:
632
+ if not is_binary(file):
633
+ raise TypeError("A file opened in 'binary' mode is required")
634
+ file.write(packed_bytes)
635
+
636
+
637
+ def _encode_numpy(item):
638
+ """
639
+ Convert NumPy scalar types to native Python types,
640
+ as *Msgpack* cannot handle NumPy types (e.g. float32).
641
+
642
+ The function is given to the Msgpack packer as value for the
643
+ `default` parameter.
644
+ """
645
+ if isinstance(item, np.generic):
646
+ return item.item()
647
+ else:
648
+ raise TypeError(f"can not serialize '{type(item).__name__}' object")