biotite 0.39.0__cp311-cp311-win_amd64.whl → 0.40.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (104) hide show
  1. biotite/__init__.py +3 -3
  2. biotite/application/dssp/app.py +18 -18
  3. biotite/database/rcsb/download.py +19 -14
  4. biotite/sequence/align/banded.c +258 -237
  5. biotite/sequence/align/banded.cp311-win_amd64.pyd +0 -0
  6. biotite/sequence/align/kmeralphabet.c +243 -222
  7. biotite/sequence/align/kmeralphabet.cp311-win_amd64.pyd +0 -0
  8. biotite/sequence/align/kmersimilarity.c +215 -196
  9. biotite/sequence/align/kmersimilarity.cp311-win_amd64.pyd +0 -0
  10. biotite/sequence/align/kmertable.cp311-win_amd64.pyd +0 -0
  11. biotite/sequence/align/kmertable.cpp +233 -205
  12. biotite/sequence/align/localgapped.c +258 -237
  13. biotite/sequence/align/localgapped.cp311-win_amd64.pyd +0 -0
  14. biotite/sequence/align/localungapped.c +235 -214
  15. biotite/sequence/align/localungapped.cp311-win_amd64.pyd +0 -0
  16. biotite/sequence/align/multiple.c +255 -234
  17. biotite/sequence/align/multiple.cp311-win_amd64.pyd +0 -0
  18. biotite/sequence/align/pairwise.c +274 -253
  19. biotite/sequence/align/pairwise.cp311-win_amd64.pyd +0 -0
  20. biotite/sequence/align/permutation.c +215 -196
  21. biotite/sequence/align/permutation.cp311-win_amd64.pyd +0 -0
  22. biotite/sequence/align/selector.c +217 -197
  23. biotite/sequence/align/selector.cp311-win_amd64.pyd +0 -0
  24. biotite/sequence/align/tracetable.c +215 -195
  25. biotite/sequence/align/tracetable.cp311-win_amd64.pyd +0 -0
  26. biotite/sequence/codec.c +235 -214
  27. biotite/sequence/codec.cp311-win_amd64.pyd +0 -0
  28. biotite/sequence/phylo/nj.c +215 -196
  29. biotite/sequence/phylo/nj.cp311-win_amd64.pyd +0 -0
  30. biotite/sequence/phylo/tree.c +227 -202
  31. biotite/sequence/phylo/tree.cp311-win_amd64.pyd +0 -0
  32. biotite/sequence/phylo/upgma.c +215 -196
  33. biotite/sequence/phylo/upgma.cp311-win_amd64.pyd +0 -0
  34. biotite/structure/basepairs.py +7 -12
  35. biotite/structure/bonds.c +1175 -1226
  36. biotite/structure/bonds.cp311-win_amd64.pyd +0 -0
  37. biotite/structure/celllist.c +217 -197
  38. biotite/structure/celllist.cp311-win_amd64.pyd +0 -0
  39. biotite/structure/charges.c +1052 -1101
  40. biotite/structure/charges.cp311-win_amd64.pyd +0 -0
  41. biotite/structure/filter.py +30 -37
  42. biotite/structure/info/__init__.py +5 -8
  43. biotite/structure/info/atoms.py +25 -67
  44. biotite/structure/info/bonds.py +46 -100
  45. biotite/structure/info/ccd/README.rst +8 -0
  46. biotite/structure/info/ccd/amino_acids.txt +1646 -0
  47. biotite/structure/info/ccd/carbohydrates.txt +1133 -0
  48. biotite/structure/info/ccd/components.bcif +0 -0
  49. biotite/structure/info/ccd/nucleotides.txt +797 -0
  50. biotite/structure/info/ccd.py +95 -0
  51. biotite/structure/info/groups.py +90 -0
  52. biotite/structure/info/masses.py +21 -20
  53. biotite/structure/info/misc.py +11 -22
  54. biotite/structure/info/standardize.py +17 -12
  55. biotite/structure/io/__init__.py +2 -4
  56. biotite/structure/io/ctab.py +1 -1
  57. biotite/structure/io/general.py +37 -43
  58. biotite/structure/io/mmtf/__init__.py +3 -0
  59. biotite/structure/io/mmtf/convertarray.c +219 -198
  60. biotite/structure/io/mmtf/convertarray.cp311-win_amd64.pyd +0 -0
  61. biotite/structure/io/mmtf/convertfile.c +217 -197
  62. biotite/structure/io/mmtf/convertfile.cp311-win_amd64.pyd +0 -0
  63. biotite/structure/io/mmtf/decode.c +225 -204
  64. biotite/structure/io/mmtf/decode.cp311-win_amd64.pyd +0 -0
  65. biotite/structure/io/mmtf/encode.c +215 -196
  66. biotite/structure/io/mmtf/encode.cp311-win_amd64.pyd +0 -0
  67. biotite/structure/io/mmtf/file.py +34 -26
  68. biotite/structure/io/npz/__init__.py +3 -0
  69. biotite/structure/io/npz/file.py +21 -18
  70. biotite/structure/io/pdb/__init__.py +3 -3
  71. biotite/structure/io/pdb/file.py +5 -3
  72. biotite/structure/io/pdb/hybrid36.c +63 -43
  73. biotite/structure/io/pdb/hybrid36.cp311-win_amd64.pyd +0 -0
  74. biotite/structure/io/pdbqt/file.py +32 -32
  75. biotite/structure/io/pdbx/__init__.py +13 -6
  76. biotite/structure/io/pdbx/bcif.py +649 -0
  77. biotite/structure/io/pdbx/cif.py +1028 -0
  78. biotite/structure/io/pdbx/component.py +243 -0
  79. biotite/structure/io/pdbx/convert.py +707 -359
  80. biotite/structure/io/pdbx/encoding.c +112813 -0
  81. biotite/structure/io/pdbx/encoding.cp311-win_amd64.pyd +0 -0
  82. biotite/structure/io/pdbx/error.py +14 -0
  83. biotite/structure/io/pdbx/legacy.py +267 -0
  84. biotite/structure/molecules.py +151 -151
  85. biotite/structure/sasa.c +215 -196
  86. biotite/structure/sasa.cp311-win_amd64.pyd +0 -0
  87. biotite/structure/superimpose.py +158 -115
  88. {biotite-0.39.0.dist-info → biotite-0.40.0.dist-info}/METADATA +2 -2
  89. {biotite-0.39.0.dist-info → biotite-0.40.0.dist-info}/RECORD +92 -90
  90. {biotite-0.39.0.dist-info → biotite-0.40.0.dist-info}/WHEEL +1 -1
  91. biotite/structure/info/amino_acids.json +0 -1556
  92. biotite/structure/info/amino_acids.py +0 -42
  93. biotite/structure/info/carbohydrates.json +0 -1122
  94. biotite/structure/info/carbohydrates.py +0 -39
  95. biotite/structure/info/intra_bonds.msgpack +0 -0
  96. biotite/structure/info/link_types.msgpack +0 -1
  97. biotite/structure/info/nucleotides.json +0 -772
  98. biotite/structure/info/nucleotides.py +0 -39
  99. biotite/structure/info/residue_masses.msgpack +0 -0
  100. biotite/structure/info/residue_names.msgpack +0 -3
  101. biotite/structure/info/residues.msgpack +0 -0
  102. biotite/structure/io/pdbx/file.py +0 -652
  103. {biotite-0.39.0.dist-info → biotite-0.40.0.dist-info}/LICENSE.rst +0 -0
  104. {biotite-0.39.0.dist-info → biotite-0.40.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,649 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.structure.io.pdbx"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["BinaryCIFFile", "BinaryCIFBlock", "BinaryCIFCategory",
8
+ "BinaryCIFColumn", "BinaryCIFData"]
9
+
10
+ from collections.abc import Sequence
11
+ import numpy as np
12
+ import msgpack
13
+ from .component import _Component, _HierarchicalContainer, MaskValue
14
+ from .encoding import decode_stepwise, encode_stepwise, deserialize_encoding, \
15
+ create_uncompressed_encoding, ByteArrayEncoding
16
+ from .error import SerializationError
17
+ from ....file import File, is_binary, is_open_compatible
18
+
19
+
20
+ class BinaryCIFData(_Component):
21
+ r"""
22
+ This class represents the data in a :class:`BinaryCIFColumn`.
23
+
24
+ Parameters
25
+ ----------
26
+ array : array_like or int or float or str
27
+ The data array to be stored.
28
+ If a single item is given, it is converted into an array.
29
+ encoding : list of Encoding
30
+ The encoding steps that are successively applied to the data.
31
+
32
+ Attributes
33
+ ----------
34
+ array : ndarray
35
+ The stored data array.
36
+ encoding : list of Encoding
37
+ The encoding steps.
38
+
39
+ Examples
40
+ --------
41
+
42
+ >>> data = BinaryCIFData([1, 2, 3])
43
+ >>> print(data.array)
44
+ [1 2 3]
45
+ >>> print(len(data))
46
+ 3
47
+ >>> # A single item is converted into an array
48
+ >>> data = BinaryCIFData("apple")
49
+ >>> print(data.array)
50
+ ['apple']
51
+
52
+ Well-chosen encoding can significantly reduce the serialized data
53
+ size:
54
+
55
+ >>> # Default uncompressed encoding
56
+ >>> array = np.arange(100)
57
+ >>> uncompressed_bytes = BinaryCIFData(array).serialize()["data"]
58
+ >>> print(len(uncompressed_bytes))
59
+ 400
60
+ >>> # Delta encoding followed by run-length encoding
61
+ >>> # [0, 1, 2, ...] -> [0, 1, 1, ...] -> [0, 1, 1, 99]
62
+ >>> compressed_bytes = BinaryCIFData(
63
+ ... array,
64
+ ... encoding = [
65
+ ... # [0, 1, 2, ...] -> [0, 1, 1, ...]
66
+ ... DeltaEncoding(),
67
+ ... # [0, 1, 1, ...] -> [0, 1, 1, 99]
68
+ ... RunLengthEncoding(),
69
+ ... # [0, 1, 1, 99] -> b"\x00\x00..."
70
+ ... ByteArrayEncoding()
71
+ ... ]
72
+ ... ).serialize()["data"]
73
+ >>> print(len(compressed_bytes))
74
+ 16
75
+ """
76
+
77
+ def __init__(self, array, encoding=None):
78
+ if (
79
+ not isinstance(array, (Sequence, np.ndarray))
80
+ or isinstance(array, str)
81
+ ):
82
+ array = [array]
83
+ array = np.asarray(array)
84
+ if np.issubdtype(array.dtype, np.object_):
85
+ raise ValueError("Object arrays are not supported")
86
+
87
+ self._array = array
88
+ if encoding is None:
89
+ self._encoding = create_uncompressed_encoding(array)
90
+ else:
91
+ self._encoding = list(encoding)
92
+
93
+ @property
94
+ def array(self):
95
+ return self._array
96
+
97
+ @property
98
+ def encoding(self):
99
+ return self._encoding
100
+
101
+ @staticmethod
102
+ def subcomponent_class():
103
+ return None
104
+
105
+ @staticmethod
106
+ def supercomponent_class():
107
+ return BinaryCIFColumn
108
+
109
+ @staticmethod
110
+ def deserialize(content):
111
+ encoding = [
112
+ deserialize_encoding(enc) for enc in content["encoding"]
113
+ ]
114
+ return BinaryCIFData(
115
+ decode_stepwise(content["data"], encoding), encoding
116
+ )
117
+
118
+ def serialize(self):
119
+ serialized_data = encode_stepwise(self._array, self._encoding)
120
+ if not isinstance(serialized_data, bytes):
121
+ raise SerializationError(
122
+ "Final encoding must return 'bytes'"
123
+ )
124
+ serialized_encoding = [enc.serialize() for enc in self._encoding]
125
+ return {"data": serialized_data, "encoding": serialized_encoding}
126
+
127
+ def __len__(self):
128
+ return len(self._array)
129
+
130
+ def __eq__(self, other):
131
+ if not isinstance(other, type(self)):
132
+ return False
133
+ if not np.array_equal(self._array, other._array):
134
+ return False
135
+ if self._encoding != other._encoding:
136
+ return False
137
+ return True
138
+
139
+
140
+ class BinaryCIFColumn(_Component):
141
+ """
142
+ This class represents a single column in a :class:`CIFCategory`.
143
+
144
+ Parameters
145
+ ----------
146
+ data : BinaryCIFData or array_like or int or float or str
147
+ The data to be stored.
148
+ If no :class:`BinaryCIFData` is given, the passed argument is
149
+ coerced into such an object.
150
+ mask : BinaryCIFData or array_like, dtype=int or int
151
+ The mask to be stored.
152
+ If given, the mask indicates whether the `data` is
153
+ inapplicable (``.``) or missing (``?``) in some rows.
154
+ The data presence is indicated by values from the
155
+ :class:`MaskValue` enum.
156
+ If no :class:`BinaryCIFData` is given, the passed argument is
157
+ coerced into such an object.
158
+ By default, no mask is created.
159
+
160
+ Attributes
161
+ ----------
162
+ data : BinaryCIFData
163
+ The stored data.
164
+ mask : BinaryCIFData
165
+ The mask that indicates whether certain data elements are
166
+ inapplicable or missing.
167
+ If no mask is present, this attribute is ``None``.
168
+
169
+ Examples
170
+ --------
171
+
172
+ >>> print(BinaryCIFColumn([1, 2, 3]).as_array())
173
+ [1 2 3]
174
+ >>> mask = [MaskValue.PRESENT, MaskValue.INAPPLICABLE, MaskValue.MISSING]
175
+ >>> # Mask values are only inserted into string arrays
176
+ >>> print(BinaryCIFColumn([1, 2, 3], mask).as_array(int))
177
+ [1 2 3]
178
+ >>> print(BinaryCIFColumn([1, 2, 3], mask).as_array(str))
179
+ ['1' '.' '?']
180
+ >>> print(BinaryCIFColumn([1]).as_item())
181
+ 1
182
+ >>> print(BinaryCIFColumn([1], mask=[MaskValue.MISSING]).as_item())
183
+ ?
184
+ """
185
+
186
+ def __init__(self, data, mask=None):
187
+ if not isinstance(data, BinaryCIFData):
188
+ data = BinaryCIFData(data)
189
+ if mask is not None:
190
+ if not isinstance(mask, BinaryCIFData):
191
+ mask = BinaryCIFData(mask)
192
+ if len(data) != len(mask):
193
+ raise IndexError(
194
+ f"Data has length {len(data)}, "
195
+ f"but mask has length {len(mask)}"
196
+ )
197
+ self._data = data
198
+ self._mask = mask
199
+
200
+ @property
201
+ def data(self):
202
+ return self._data
203
+
204
+ @property
205
+ def mask(self):
206
+ return self._mask
207
+
208
+ @staticmethod
209
+ def subcomponent_class():
210
+ return BinaryCIFData
211
+
212
+ @staticmethod
213
+ def supercomponent_class():
214
+ return BinaryCIFCategory
215
+
216
+ def as_item(self):
217
+ """
218
+ Get the only item in the data of this column.
219
+
220
+ If the data is masked as inapplicable or missing, ``'.'`` or
221
+ ``'?'`` is returned, respectively.
222
+ If the data contains more than one item, an exception is raised.
223
+
224
+ Returns
225
+ -------
226
+ item : str or int or float
227
+ The item in the data.
228
+ """
229
+ if self._mask is None:
230
+ return self._data.array.item()
231
+ mask = self._mask.array.item()
232
+ if mask is None or mask == MaskValue.PRESENT:
233
+ return self._data.array.item()
234
+ elif mask == MaskValue.INAPPLICABLE:
235
+ return "."
236
+ elif mask == MaskValue.MISSING:
237
+ return "?"
238
+
239
+ def as_array(self, dtype=None, masked_value=None):
240
+ """
241
+ Get the data of this column as an :class:`ndarray`.
242
+
243
+ This is a shortcut to get ``BinaryCIFColumn.data.array``.
244
+ Furthermore, the mask is applied to the data.
245
+
246
+ Parameters
247
+ ----------
248
+ dtype : dtype-like, optional
249
+ The data type the array should be converted to.
250
+ By default, the original type is used.
251
+ masked_value : str or int or float, optional
252
+ The value that should be used for masked elements, i.e.
253
+ ``MaskValue.INAPPLICABLE`` or ``MaskValue.MISSING``.
254
+ By default, masked elements are converted to ``'.'`` or
255
+ ``'?'`` depending on the :class:`MaskValue`.
256
+ """
257
+ if dtype is None:
258
+ dtype = self._data.array.dtype
259
+
260
+ if self._mask is None:
261
+ return self._data.array.astype(dtype, copy=False)
262
+
263
+ elif np.issubdtype(dtype, np.str_):
264
+ # Copy, as otherwise original data would be overwritten
265
+ # with mask values
266
+ array = self._data.array.astype(dtype, copy=True)
267
+ if masked_value is None:
268
+ array[self._mask.array == MaskValue.INAPPLICABLE] = "."
269
+ array[self._mask.array == MaskValue.MISSING] = "?"
270
+ else:
271
+ array[self._mask.array == MaskValue.INAPPLICABLE] = masked_value
272
+ array[self._mask.array == MaskValue.MISSING] = masked_value
273
+ return array
274
+
275
+ elif np.dtype(dtype).kind == self._data.array.dtype.kind:
276
+ if masked_value is None:
277
+ return self._data.array.astype(dtype, copy=False)
278
+ else:
279
+ array = self._data.array.astype(dtype, copy=True)
280
+ array[self._mask.array == MaskValue.INAPPLICABLE] = masked_value
281
+ array[self._mask.array == MaskValue.MISSING] = masked_value
282
+ return array
283
+
284
+ else:
285
+ # Array needs to be converted, but masked values are
286
+ # not necessarily convertible
287
+ # (e.g. '' cannot be converted to int)
288
+ if masked_value is None:
289
+ array = np.zeros(len(self._data), dtype=dtype)
290
+ else:
291
+ array = np.full(len(self._data), masked_value, dtype=dtype)
292
+
293
+ present_mask = self._mask.array == MaskValue.PRESENT
294
+ array[present_mask] = (
295
+ self._data.array[present_mask].astype(dtype)
296
+ )
297
+ return array
298
+
299
+ @staticmethod
300
+ def deserialize(content):
301
+ return BinaryCIFColumn(
302
+ BinaryCIFData.deserialize(content["data"]),
303
+ BinaryCIFData.deserialize(content["mask"])
304
+ if content["mask"] is not None else None
305
+ )
306
+
307
+ def serialize(self):
308
+ return {
309
+ "data": self._data.serialize(),
310
+ "mask": self._mask.serialize() if self._mask is not None else None
311
+ }
312
+
313
+ def __len__(self):
314
+ return len(self._data)
315
+
316
+ def __eq__(self, other):
317
+ if not isinstance(other, type(self)):
318
+ return False
319
+ if self._data != other._data:
320
+ return False
321
+ if self._mask != other._mask:
322
+ return False
323
+ return True
324
+
325
+
326
+ class BinaryCIFCategory(_HierarchicalContainer):
327
+ """
328
+ This class represents a category in a :class:`BinaryCIFBlock`.
329
+
330
+ Columns can be accessed and modified like a dictionary.
331
+ The values are :class:`BinaryCIFColumn` objects.
332
+
333
+ Parameters
334
+ ----------
335
+ columns : dict, optional
336
+ The columns of the category.
337
+ The keys are the column names and the values are the
338
+ :class:`BinaryCIFColumn` objects (or objects that can be coerced
339
+ into a :class:`BinaryCIFColumn`).
340
+ By default, an empty category is created.
341
+ Each column must have the same length.
342
+
343
+ Attributes
344
+ ----------
345
+ row_count : int
346
+ The number of rows in the category, i.e. the length of each
347
+ column.
348
+
349
+ Examples
350
+ --------
351
+
352
+ >>> # Add column on creation
353
+ >>> category = BinaryCIFCategory({"fruit": ["apple", "banana"]})
354
+ >>> # Add column later on
355
+ >>> category["taste"] = ["delicious", "tasty"]
356
+ >>> # Add column the formal way
357
+ >>> category["color"] = BinaryCIFColumn(BinaryCIFData(["red", "yellow"]))
358
+ >>> # Access a column
359
+ >>> print(category["fruit"].as_array())
360
+ ['apple' 'banana']
361
+ """
362
+
363
+ def __init__(self, columns=None, row_count=None):
364
+ if columns is None:
365
+ columns = {}
366
+ else:
367
+ columns = {
368
+ key: BinaryCIFColumn(col)
369
+ if not isinstance(col, (BinaryCIFColumn, dict))
370
+ else col
371
+ for key, col in columns.items()
372
+ }
373
+
374
+ self._row_count = row_count
375
+ super().__init__(columns)
376
+
377
+ @property
378
+ def row_count(self):
379
+ if self._row_count is None:
380
+ # Row count is not determined yet
381
+ # -> check the length of the first column
382
+ self._row_count = len(next(iter(self.values())))
383
+ return self._row_count
384
+
385
+ @staticmethod
386
+ def subcomponent_class():
387
+ return BinaryCIFColumn
388
+
389
+ @staticmethod
390
+ def supercomponent_class():
391
+ return BinaryCIFBlock
392
+
393
+ def filter(self, index):
394
+ return BinaryCIFCategory(
395
+ {key: column.filter(index) for key, column in self.items()},
396
+ # Create placeholder array just to check how many elements
397
+ # remain after filtering
398
+ len(np.empty(self.row_count, dtype=bool)[index]),
399
+ )
400
+
401
+ @staticmethod
402
+ def deserialize(content):
403
+ return BinaryCIFCategory(
404
+ BinaryCIFCategory._deserialize_elements(
405
+ content["columns"], "name"
406
+ ),
407
+ content["rowCount"]
408
+ )
409
+
410
+ def serialize(self):
411
+ if len(self) == 0:
412
+ raise SerializationError("At least one column is required")
413
+
414
+ for column_name, column in self.items():
415
+ if self._row_count is None:
416
+ self._row_count = len(column)
417
+ elif len(column) != self._row_count:
418
+ raise SerializationError(
419
+ f"All columns must have the same length, "
420
+ f"but '{column_name}' has length {len(column)}, "
421
+ f"while the first column has row_count {self._row_count}"
422
+ )
423
+
424
+ return {
425
+ "rowCount": self.row_count,
426
+ "columns": self._serialize_elements("name"),
427
+ }
428
+
429
+ def __setitem__(self, key, element):
430
+ if not isinstance(element, (BinaryCIFColumn, dict)):
431
+ element = BinaryCIFColumn(element)
432
+ super().__setitem__(key, element)
433
+
434
+
435
+ class BinaryCIFBlock(_HierarchicalContainer):
436
+ """
437
+ This class represents a block in a :class:`BinaryCIFFile`.
438
+
439
+ Categories can be accessed and modified like a dictionary.
440
+ The values are :class:`BinaryCIFCategory` objects.
441
+
442
+ Parameters
443
+ ----------
444
+ categories : dict, optional
445
+ The categories of the block.
446
+ The keys are the category names and the values are the
447
+ :class:`BinaryCIFCategory` objects.
448
+ By default, an empty block is created.
449
+
450
+ Notes
451
+ -----
452
+ The category names do not include the leading underscore character.
453
+ This character is automatically added when the category is
454
+ serialized.
455
+
456
+ Examples
457
+ --------
458
+
459
+ >>> # Add category on creation
460
+ >>> block = BinaryCIFBlock({"foo": BinaryCIFCategory({"some_column": 1})})
461
+ >>> # Add category later on
462
+ >>> block["bar"] = BinaryCIFCategory({"another_column": [2, 3]})
463
+ >>> # Access a column
464
+ >>> print(block["bar"]["another_column"].as_array())
465
+ [2 3]
466
+ """
467
+
468
+ def __init__(self, categories=None):
469
+ super().__init__(categories)
470
+
471
+ @staticmethod
472
+ def subcomponent_class():
473
+ return BinaryCIFCategory
474
+
475
+ @staticmethod
476
+ def supercomponent_class():
477
+ return BinaryCIFFile
478
+
479
+ @staticmethod
480
+ def deserialize(content):
481
+ return BinaryCIFBlock(
482
+ BinaryCIFBlock._deserialize_elements(
483
+ content["categories"], "name"
484
+ )
485
+ )
486
+
487
+ def serialize(self):
488
+ return {"categories": self._serialize_elements("name")}
489
+
490
+ def __getitem__(self, key):
491
+ # Actual bcif files use leading '_' as categories
492
+ return super().__getitem__("_" + key)
493
+
494
+ def __setitem__(self, key, element):
495
+ return super().__setitem__("_" + key, element)
496
+
497
+ def __delitem__(self, key):
498
+ return super().__setitem__("_" + key)
499
+
500
+ def __iter__(self):
501
+ return (key.lstrip("_") for key in super().__iter__())
502
+
503
+
504
+ class BinaryCIFFile(File, _HierarchicalContainer):
505
+ """
506
+ This class represents a *BinaryCIF* file.
507
+
508
+ The categories of the file can be accessed and modified like a
509
+ dictionary.
510
+ The values are :class:`BinaryCIFBlock` objects.
511
+
512
+ To parse or write a structure from/to a :class:`BinaryCIFFile`
513
+ object, use the high-level :func:`get_structure()` or
514
+ :func:`set_structure()` function respectively.
515
+
516
+ Notes
517
+ -----
518
+ The content of *BinaryCIF* files are lazily deserialized:
519
+ Only when a column is accessed, the time consuming data decoding
520
+ is performed.
521
+ The decoded :class:`BinaryCIFBlock`/:class:`BinaryCIFCategory`
522
+ objects are cached for subsequent accesses.
523
+
524
+ Attributes
525
+ ----------
526
+ block : BinaryCIFBlock
527
+ The sole block of the file.
528
+ If the file contains multiple blocks, an exception is raised.
529
+
530
+ Examples
531
+ --------
532
+ Read a *BinaryCIF* file and access its content:
533
+
534
+ >>> import os.path
535
+ >>> file = BinaryCIFFile.read(os.path.join(path_to_structures, "1l2y.bcif"))
536
+ >>> print(file["1L2Y"]["citation_author"]["name"].as_array())
537
+ ['Neidigh, J.W.' 'Fesinmeyer, R.M.' 'Andersen, N.H.']
538
+ >>> # Access the only block in the file
539
+ >>> print(file.block["entity"]["pdbx_description"].as_item())
540
+ TC5b
541
+
542
+ Create a *BinaryCIF* file and write it to disk:
543
+
544
+ >>> category = BinaryCIFCategory({"some_column": "some_value"})
545
+ >>> block = BinaryCIFBlock({"some_category": category})
546
+ >>> file = BinaryCIFFile({"some_block": block})
547
+ >>> file.write(os.path.join(path_to_directory, "some_file.bcif"))
548
+ """
549
+
550
+ def __init__(self, blocks=None):
551
+ File.__init__(self)
552
+ _HierarchicalContainer.__init__(self, blocks)
553
+
554
+ @property
555
+ def block(self):
556
+ if len(self) != 1:
557
+ raise ValueError("There are multiple blocks in the file")
558
+ return self[next(iter(self))]
559
+
560
+ @staticmethod
561
+ def subcomponent_class():
562
+ return BinaryCIFBlock
563
+
564
+ @staticmethod
565
+ def supercomponent_class():
566
+ return None
567
+
568
+ @staticmethod
569
+ def deserialize(content):
570
+ return BinaryCIFFile(
571
+ BinaryCIFFile._deserialize_elements(
572
+ content["dataBlocks"], "header"
573
+ )
574
+ )
575
+
576
+ def serialize(self):
577
+ return {"dataBlocks": self._serialize_elements("header")}
578
+
579
+ @classmethod
580
+ def read(self, file):
581
+ """
582
+ Read a *BinaryCIF* file.
583
+
584
+ Parameters
585
+ ----------
586
+ file : file-like object or str
587
+ The file to be read.
588
+ Alternatively a file path can be supplied.
589
+
590
+ Returns
591
+ -------
592
+ file_object : BinaryCIFFile
593
+ The parsed file.
594
+ """
595
+ # File name
596
+ if is_open_compatible(file):
597
+ with open(file, "rb") as f:
598
+ return BinaryCIFFile.deserialize(
599
+ msgpack.unpackb(
600
+ f.read(), use_list=True, raw=False
601
+ )
602
+ )
603
+ # File object
604
+ else:
605
+ if not is_binary(file):
606
+ raise TypeError("A file opened in 'binary' mode is required")
607
+ return BinaryCIFFile.deserialize(
608
+ msgpack.unpackb(
609
+ file.read(), use_list=True, raw=False
610
+ )
611
+ )
612
+
613
+ def write(self, file):
614
+ """
615
+ Write contents into a *BinaryCIF* file.
616
+
617
+ Parameters
618
+ ----------
619
+ file : file-like object or str
620
+ The file to be written to.
621
+ Alternatively, a file path can be supplied.
622
+ """
623
+ serialized_content = self.serialize()
624
+ serialized_content["encoder"] = "biotite"
625
+ serialized_content["version"] = "0.3.0"
626
+ packed_bytes = msgpack.packb(
627
+ serialized_content, use_bin_type=True, default=_encode_numpy
628
+ )
629
+ if is_open_compatible(file):
630
+ with open(file, "wb") as f:
631
+ f.write(packed_bytes)
632
+ else:
633
+ if not is_binary(file):
634
+ raise TypeError("A file opened in 'binary' mode is required")
635
+ file.write(packed_bytes)
636
+
637
+
638
+ def _encode_numpy(item):
639
+ """
640
+ Convert NumPy scalar types to native Python types,
641
+ as *Msgpack* cannot handle NumPy types (e.g. float32).
642
+
643
+ The function is given to the Msgpack packer as value for the
644
+ `default` parameter.
645
+ """
646
+ if isinstance(item, np.generic):
647
+ return item.item()
648
+ else:
649
+ raise TypeError(f"can not serialize '{type(item).__name__}' object")