biotite 0.41.2__cp312-cp312-win_amd64.whl → 1.0.1__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (205) hide show
  1. biotite/__init__.py +2 -3
  2. biotite/application/__init__.py +1 -1
  3. biotite/application/application.py +20 -10
  4. biotite/application/autodock/__init__.py +1 -1
  5. biotite/application/autodock/app.py +74 -79
  6. biotite/application/blast/__init__.py +1 -1
  7. biotite/application/blast/alignment.py +19 -10
  8. biotite/application/blast/webapp.py +92 -85
  9. biotite/application/clustalo/__init__.py +1 -1
  10. biotite/application/clustalo/app.py +46 -61
  11. biotite/application/dssp/__init__.py +1 -1
  12. biotite/application/dssp/app.py +8 -11
  13. biotite/application/localapp.py +62 -60
  14. biotite/application/mafft/__init__.py +1 -1
  15. biotite/application/mafft/app.py +16 -22
  16. biotite/application/msaapp.py +78 -89
  17. biotite/application/muscle/__init__.py +1 -1
  18. biotite/application/muscle/app3.py +50 -64
  19. biotite/application/muscle/app5.py +23 -31
  20. biotite/application/sra/__init__.py +1 -1
  21. biotite/application/sra/app.py +64 -68
  22. biotite/application/tantan/__init__.py +1 -1
  23. biotite/application/tantan/app.py +22 -45
  24. biotite/application/util.py +7 -9
  25. biotite/application/viennarna/rnaalifold.py +34 -28
  26. biotite/application/viennarna/rnafold.py +24 -39
  27. biotite/application/viennarna/rnaplot.py +36 -21
  28. biotite/application/viennarna/util.py +17 -12
  29. biotite/application/webapp.py +13 -14
  30. biotite/copyable.py +13 -13
  31. biotite/database/__init__.py +1 -1
  32. biotite/database/entrez/__init__.py +1 -1
  33. biotite/database/entrez/check.py +2 -3
  34. biotite/database/entrez/dbnames.py +7 -5
  35. biotite/database/entrez/download.py +55 -49
  36. biotite/database/entrez/key.py +1 -1
  37. biotite/database/entrez/query.py +62 -23
  38. biotite/database/error.py +2 -1
  39. biotite/database/pubchem/__init__.py +1 -1
  40. biotite/database/pubchem/download.py +43 -45
  41. biotite/database/pubchem/error.py +2 -2
  42. biotite/database/pubchem/query.py +34 -31
  43. biotite/database/pubchem/throttle.py +3 -4
  44. biotite/database/rcsb/__init__.py +1 -1
  45. biotite/database/rcsb/download.py +44 -52
  46. biotite/database/rcsb/query.py +85 -80
  47. biotite/database/uniprot/check.py +6 -3
  48. biotite/database/uniprot/download.py +6 -11
  49. biotite/database/uniprot/query.py +115 -31
  50. biotite/file.py +12 -31
  51. biotite/sequence/__init__.py +3 -3
  52. biotite/sequence/align/__init__.py +2 -2
  53. biotite/sequence/align/alignment.py +99 -90
  54. biotite/sequence/align/banded.cp312-win_amd64.pyd +0 -0
  55. biotite/sequence/align/buckets.py +12 -10
  56. biotite/sequence/align/cigar.py +43 -52
  57. biotite/sequence/align/kmeralphabet.cp312-win_amd64.pyd +0 -0
  58. biotite/sequence/align/kmeralphabet.pyx +55 -51
  59. biotite/sequence/align/kmersimilarity.cp312-win_amd64.pyd +0 -0
  60. biotite/sequence/align/kmertable.cp312-win_amd64.pyd +0 -0
  61. biotite/sequence/align/kmertable.pyx +3 -2
  62. biotite/sequence/align/localgapped.cp312-win_amd64.pyd +0 -0
  63. biotite/sequence/align/localungapped.cp312-win_amd64.pyd +0 -0
  64. biotite/sequence/align/matrix.py +81 -82
  65. biotite/sequence/align/multiple.cp312-win_amd64.pyd +0 -0
  66. biotite/sequence/align/multiple.pyx +1 -1
  67. biotite/sequence/align/pairwise.cp312-win_amd64.pyd +0 -0
  68. biotite/sequence/align/permutation.cp312-win_amd64.pyd +0 -0
  69. biotite/sequence/align/permutation.pyx +12 -4
  70. biotite/sequence/align/selector.cp312-win_amd64.pyd +0 -0
  71. biotite/sequence/align/selector.pyx +52 -54
  72. biotite/sequence/align/statistics.py +32 -33
  73. biotite/sequence/align/tracetable.cp312-win_amd64.pyd +0 -0
  74. biotite/sequence/alphabet.py +51 -65
  75. biotite/sequence/annotation.py +78 -77
  76. biotite/sequence/codec.cp312-win_amd64.pyd +0 -0
  77. biotite/sequence/codon.py +90 -79
  78. biotite/sequence/graphics/__init__.py +1 -1
  79. biotite/sequence/graphics/alignment.py +184 -103
  80. biotite/sequence/graphics/colorschemes.py +10 -12
  81. biotite/sequence/graphics/dendrogram.py +79 -34
  82. biotite/sequence/graphics/features.py +133 -99
  83. biotite/sequence/graphics/logo.py +22 -28
  84. biotite/sequence/graphics/plasmid.py +229 -178
  85. biotite/sequence/io/fasta/__init__.py +1 -1
  86. biotite/sequence/io/fasta/convert.py +44 -33
  87. biotite/sequence/io/fasta/file.py +42 -55
  88. biotite/sequence/io/fastq/__init__.py +1 -1
  89. biotite/sequence/io/fastq/convert.py +11 -14
  90. biotite/sequence/io/fastq/file.py +68 -112
  91. biotite/sequence/io/genbank/__init__.py +2 -2
  92. biotite/sequence/io/genbank/annotation.py +12 -20
  93. biotite/sequence/io/genbank/file.py +74 -76
  94. biotite/sequence/io/genbank/metadata.py +74 -62
  95. biotite/sequence/io/genbank/sequence.py +13 -14
  96. biotite/sequence/io/general.py +39 -30
  97. biotite/sequence/io/gff/__init__.py +2 -2
  98. biotite/sequence/io/gff/convert.py +10 -15
  99. biotite/sequence/io/gff/file.py +81 -65
  100. biotite/sequence/phylo/__init__.py +1 -1
  101. biotite/sequence/phylo/nj.cp312-win_amd64.pyd +0 -0
  102. biotite/sequence/phylo/tree.cp312-win_amd64.pyd +0 -0
  103. biotite/sequence/phylo/upgma.cp312-win_amd64.pyd +0 -0
  104. biotite/sequence/profile.py +57 -28
  105. biotite/sequence/search.py +17 -15
  106. biotite/sequence/seqtypes.py +200 -164
  107. biotite/sequence/sequence.py +15 -17
  108. biotite/structure/__init__.py +3 -3
  109. biotite/structure/atoms.py +246 -236
  110. biotite/structure/basepairs.py +260 -271
  111. biotite/structure/bonds.cp312-win_amd64.pyd +0 -0
  112. biotite/structure/bonds.pyx +29 -32
  113. biotite/structure/box.py +67 -71
  114. biotite/structure/celllist.cp312-win_amd64.pyd +0 -0
  115. biotite/structure/chains.py +55 -39
  116. biotite/structure/charges.cp312-win_amd64.pyd +0 -0
  117. biotite/structure/compare.py +32 -32
  118. biotite/structure/density.py +13 -18
  119. biotite/structure/dotbracket.py +20 -22
  120. biotite/structure/error.py +10 -2
  121. biotite/structure/filter.py +83 -78
  122. biotite/structure/geometry.py +130 -119
  123. biotite/structure/graphics/atoms.py +60 -43
  124. biotite/structure/graphics/rna.py +81 -68
  125. biotite/structure/hbond.py +112 -93
  126. biotite/structure/info/__init__.py +0 -2
  127. biotite/structure/info/atoms.py +10 -11
  128. biotite/structure/info/bonds.py +41 -43
  129. biotite/structure/info/ccd.py +4 -5
  130. biotite/structure/info/groups.py +1 -3
  131. biotite/structure/info/masses.py +5 -10
  132. biotite/structure/info/misc.py +1 -1
  133. biotite/structure/info/radii.py +20 -20
  134. biotite/structure/info/standardize.py +15 -26
  135. biotite/structure/integrity.py +18 -71
  136. biotite/structure/io/__init__.py +3 -4
  137. biotite/structure/io/dcd/__init__.py +1 -1
  138. biotite/structure/io/dcd/file.py +22 -20
  139. biotite/structure/io/general.py +47 -61
  140. biotite/structure/io/gro/__init__.py +1 -1
  141. biotite/structure/io/gro/file.py +73 -72
  142. biotite/structure/io/mol/__init__.py +1 -1
  143. biotite/structure/io/mol/convert.py +8 -11
  144. biotite/structure/io/mol/ctab.py +37 -36
  145. biotite/structure/io/mol/header.py +14 -10
  146. biotite/structure/io/mol/mol.py +9 -53
  147. biotite/structure/io/mol/sdf.py +47 -50
  148. biotite/structure/io/netcdf/__init__.py +1 -1
  149. biotite/structure/io/netcdf/file.py +24 -23
  150. biotite/structure/io/pdb/__init__.py +1 -1
  151. biotite/structure/io/pdb/convert.py +32 -20
  152. biotite/structure/io/pdb/file.py +151 -172
  153. biotite/structure/io/pdb/hybrid36.cp312-win_amd64.pyd +0 -0
  154. biotite/structure/io/pdbqt/__init__.py +1 -1
  155. biotite/structure/io/pdbqt/convert.py +17 -11
  156. biotite/structure/io/pdbqt/file.py +128 -80
  157. biotite/structure/io/pdbx/__init__.py +1 -2
  158. biotite/structure/io/pdbx/bcif.py +36 -44
  159. biotite/structure/io/pdbx/cif.py +140 -110
  160. biotite/structure/io/pdbx/component.py +10 -16
  161. biotite/structure/io/pdbx/convert.py +260 -258
  162. biotite/structure/io/pdbx/encoding.cp312-win_amd64.pyd +0 -0
  163. biotite/structure/io/trajfile.py +90 -107
  164. biotite/structure/io/trr/__init__.py +1 -1
  165. biotite/structure/io/trr/file.py +12 -15
  166. biotite/structure/io/xtc/__init__.py +1 -1
  167. biotite/structure/io/xtc/file.py +11 -14
  168. biotite/structure/mechanics.py +9 -11
  169. biotite/structure/molecules.py +3 -4
  170. biotite/structure/pseudoknots.py +53 -67
  171. biotite/structure/rdf.py +23 -21
  172. biotite/structure/repair.py +137 -86
  173. biotite/structure/residues.py +26 -16
  174. biotite/structure/sasa.cp312-win_amd64.pyd +0 -0
  175. biotite/structure/{resutil.py → segments.py} +24 -23
  176. biotite/structure/sequence.py +10 -11
  177. biotite/structure/sse.py +100 -119
  178. biotite/structure/superimpose.py +39 -77
  179. biotite/structure/transform.py +97 -71
  180. biotite/structure/util.py +11 -13
  181. biotite/version.py +2 -2
  182. biotite/visualize.py +69 -55
  183. {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/METADATA +6 -5
  184. biotite-1.0.1.dist-info/RECORD +322 -0
  185. biotite/structure/io/ctab.py +0 -72
  186. biotite/structure/io/mmtf/__init__.py +0 -21
  187. biotite/structure/io/mmtf/assembly.py +0 -214
  188. biotite/structure/io/mmtf/convertarray.cp312-win_amd64.pyd +0 -0
  189. biotite/structure/io/mmtf/convertarray.pyx +0 -341
  190. biotite/structure/io/mmtf/convertfile.cp312-win_amd64.pyd +0 -0
  191. biotite/structure/io/mmtf/convertfile.pyx +0 -501
  192. biotite/structure/io/mmtf/decode.cp312-win_amd64.pyd +0 -0
  193. biotite/structure/io/mmtf/decode.pyx +0 -152
  194. biotite/structure/io/mmtf/encode.cp312-win_amd64.pyd +0 -0
  195. biotite/structure/io/mmtf/encode.pyx +0 -183
  196. biotite/structure/io/mmtf/file.py +0 -233
  197. biotite/structure/io/npz/__init__.py +0 -20
  198. biotite/structure/io/npz/file.py +0 -152
  199. biotite/structure/io/pdbx/legacy.py +0 -267
  200. biotite/structure/io/tng/__init__.py +0 -13
  201. biotite/structure/io/tng/file.py +0 -46
  202. biotite/temp.py +0 -86
  203. biotite-0.41.2.dist-info/RECORD +0 -340
  204. {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/WHEEL +0 -0
  205. {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/licenses/LICENSE.rst +0 -0
@@ -18,30 +18,41 @@ __all__ = [
18
18
  import itertools
19
19
  import warnings
20
20
  import numpy as np
21
- from ....file import InvalidFileError
22
- from ....sequence.seqtypes import NucleotideSequence, ProteinSequence
23
- from ...atoms import AtomArray, AtomArrayStack, repeat
24
- from ...bonds import BondList, BondType, connect_via_residue_names
25
- from ...box import unitcell_from_vectors, vectors_from_unitcell
26
- from ...filter import filter_first_altloc, filter_highest_occupancy_altloc
27
- from ...residues import get_residue_count, get_residue_starts_for
28
- from ...error import BadStructureError
29
- from ...util import matrix_rotate
30
- from .legacy import PDBxFile
31
- from .component import MaskValue
32
- from .cif import CIFFile, CIFBlock
33
- from .bcif import BinaryCIFFile, BinaryCIFBlock, BinaryCIFColumn
34
- from .encoding import StringArrayEncoding
35
-
21
+ from biotite.file import InvalidFileError
22
+ from biotite.sequence.seqtypes import NucleotideSequence, ProteinSequence
23
+ from biotite.structure.atoms import AtomArray, AtomArrayStack, repeat
24
+ from biotite.structure.bonds import BondList, BondType, connect_via_residue_names
25
+ from biotite.structure.box import unitcell_from_vectors, vectors_from_unitcell
26
+ from biotite.structure.error import BadStructureError
27
+ from biotite.structure.filter import (
28
+ filter_first_altloc,
29
+ filter_highest_occupancy_altloc,
30
+ )
31
+ from biotite.structure.io.pdbx.bcif import (
32
+ BinaryCIFBlock,
33
+ BinaryCIFColumn,
34
+ BinaryCIFFile,
35
+ )
36
+ from biotite.structure.io.pdbx.cif import CIFBlock, CIFFile
37
+ from biotite.structure.io.pdbx.component import MaskValue
38
+ from biotite.structure.io.pdbx.encoding import StringArrayEncoding
39
+ from biotite.structure.residues import get_residue_count, get_residue_starts_for
40
+ from biotite.structure.util import matrix_rotate
36
41
 
37
42
  # Cond types in `struct_conn` category that refer to covalent bonds
38
43
  PDBX_COVALENT_TYPES = [
39
- "covale", "covale_base", "covale_phosphate", "covale_sugar",
40
- "disulf", "modres", "modres_link", "metalc"
44
+ "covale",
45
+ "covale_base",
46
+ "covale_phosphate",
47
+ "covale_sugar",
48
+ "disulf",
49
+ "modres",
50
+ "modres_link",
51
+ "metalc",
41
52
  ]
42
53
  # Map 'struct_conn' bond orders to 'BondType'...
43
54
  PDBX_BOND_ORDER_TO_TYPE = {
44
- "": BondType.ANY,
55
+ "": BondType.ANY,
45
56
  "sing": BondType.SINGLE,
46
57
  "doub": BondType.DOUBLE,
47
58
  "trip": BondType.TRIPLE,
@@ -61,13 +72,13 @@ PDBX_BOND_TYPE_TO_ORDER = {
61
72
  }
62
73
  # Map 'chem_comp_bond' bond orders and aromaticity to 'BondType'...
63
74
  COMP_BOND_ORDER_TO_TYPE = {
64
- ("SING", "N") : BondType.SINGLE,
65
- ("DOUB", "N") : BondType.DOUBLE,
66
- ("TRIP", "N") : BondType.TRIPLE,
67
- ("QUAD", "N") : BondType.QUADRUPLE,
68
- ("SING", "Y") : BondType.AROMATIC_SINGLE,
69
- ("DOUB", "Y") : BondType.AROMATIC_DOUBLE,
70
- ("TRIP", "Y") : BondType.AROMATIC_TRIPLE,
75
+ ("SING", "N"): BondType.SINGLE,
76
+ ("DOUB", "N"): BondType.DOUBLE,
77
+ ("TRIP", "N"): BondType.TRIPLE,
78
+ ("QUAD", "N"): BondType.QUADRUPLE,
79
+ ("SING", "Y"): BondType.AROMATIC_SINGLE,
80
+ ("DOUB", "Y"): BondType.AROMATIC_DOUBLE,
81
+ ("TRIP", "Y"): BondType.AROMATIC_TRIPLE,
71
82
  }
72
83
  # ...and vice versa
73
84
  COMP_BOND_TYPE_TO_ORDER = {
@@ -98,16 +109,15 @@ def _filter(category, index):
98
109
  Column = Category.subcomponent_class()
99
110
  Data = Column.subcomponent_class()
100
111
 
101
- return Category({
102
- key: Column(
103
- Data(column.data.array[index]),
104
- (
105
- Data(column.mask.array[index])
106
- if column.mask is not None else None
112
+ return Category(
113
+ {
114
+ key: Column(
115
+ Data(column.data.array[index]),
116
+ (Data(column.mask.array[index]) if column.mask is not None else None),
107
117
  )
108
- )
109
- for key, column in category.items()
110
- })
118
+ for key, column in category.items()
119
+ }
120
+ )
111
121
 
112
122
 
113
123
  def get_sequence(pdbx_file, data_block=None):
@@ -134,26 +144,47 @@ def get_sequence(pdbx_file, data_block=None):
134
144
 
135
145
  Returns
136
146
  -------
137
- sequences : list of Sequence
138
- The protein and nucleotide sequences for each entity
139
- (equivalent to chains in most cases).
147
+ sequence_dict : Dictionary of Sequences
148
+ Dictionary keys are derived from ``entity_poly.pdbx_strand_id``
149
+ (often equivalent to chain_id and atom_site.auth_asym_id
150
+ in most cases). Dictionary values are sequences.
151
+
152
+ Notes
153
+ -----
154
+ The ``entity_poly.pdbx_seq_one_letter_code_can`` field contains the initial
155
+ complete sequence. If the structure represents a truncated or spliced
156
+ version of this initial sequence, it will include only a subset of the
157
+ initial sequence. Use biotite.structure.get_residues to retrieve only
158
+ the residues that are represented in the structure.
140
159
  """
160
+
141
161
  block = _get_block(pdbx_file, data_block)
162
+ poly_category = block["entity_poly"]
142
163
 
143
- poly_category= block["entity_poly"]
144
164
  seq_string = poly_category["pdbx_seq_one_letter_code_can"].as_array(str)
145
165
  seq_type = poly_category["type"].as_array(str)
146
- sequences = []
147
- for string, stype in zip(seq_string, seq_type):
148
- sequence = _convert_string_to_sequence(string, stype)
149
- if sequence is not None:
150
- sequences.append(sequence)
151
- return sequences
166
+
167
+ sequences = [
168
+ _convert_string_to_sequence(string, stype)
169
+ for string, stype in zip(seq_string, seq_type)
170
+ ]
171
+
172
+ strand_ids = poly_category["pdbx_strand_id"].as_array(str)
173
+ strand_ids = [strand_id.split(",") for strand_id in strand_ids]
174
+
175
+ sequence_dict = {
176
+ strand_id: sequence
177
+ for sequence, strand_ids in zip(sequences, strand_ids)
178
+ for strand_id in strand_ids
179
+ if sequence is not None
180
+ }
181
+
182
+ return sequence_dict
152
183
 
153
184
 
154
185
  def get_model_count(pdbx_file, data_block=None):
155
186
  """
156
- Get the number of models contained in a :class:`PDBxFile`.
187
+ Get the number of models contained in a file.
157
188
 
158
189
  Parameters
159
190
  ----------
@@ -172,17 +203,23 @@ def get_model_count(pdbx_file, data_block=None):
172
203
  The number of models.
173
204
  """
174
205
  block = _get_block(pdbx_file, data_block)
175
- return len(_get_model_starts(
176
- block["atom_site"]["pdbx_PDB_model_num"].as_array(np.int32)
177
- ))
206
+ return len(
207
+ _get_model_starts(block["atom_site"]["pdbx_PDB_model_num"].as_array(np.int32))
208
+ )
178
209
 
179
210
 
180
- def get_structure(pdbx_file, model=None, data_block=None, altloc="first",
181
- extra_fields=None, use_author_fields=True,
182
- include_bonds=False):
211
+ def get_structure(
212
+ pdbx_file,
213
+ model=None,
214
+ data_block=None,
215
+ altloc="first",
216
+ extra_fields=None,
217
+ use_author_fields=True,
218
+ include_bonds=False,
219
+ ):
183
220
  """
184
221
  Create an :class:`AtomArray` or :class:`AtomArrayStack` from the
185
- ``atom_site`` category in a :class:`PDBxFile`.
222
+ ``atom_site`` category in a file.
186
223
 
187
224
  Parameters
188
225
  ----------
@@ -228,7 +265,7 @@ def get_structure(pdbx_file, model=None, data_block=None, altloc="first",
228
265
  for example both, ``label_seq_id`` and ``auth_seq_id`` describe
229
266
  the ID of the residue.
230
267
  While, the ``label_xxx`` fields can be used as official pointers
231
- to other categories in the :class:`PDBxFile`, the ``auth_xxx``
268
+ to other categories in the file, the ``auth_xxx``
232
269
  fields are set by the author(s) of the structure and are
233
270
  consistent with the corresponding values in PDB files.
234
271
  If `use_author_fields` is true, the annotation arrays will be
@@ -290,12 +327,21 @@ def get_structure(pdbx_file, model=None, data_block=None, altloc="first",
290
327
  "instead"
291
328
  )
292
329
 
293
- atoms.coord[:, :, 0] = atom_site["Cartn_x"].as_array(np.float32) \
294
- .reshape((model_count, model_length))
295
- atoms.coord[:, :, 1] = atom_site["Cartn_y"].as_array(np.float32) \
296
- .reshape((model_count, model_length))
297
- atoms.coord[:, :, 2] = atom_site["Cartn_z"].as_array(np.float32) \
298
- .reshape((model_count, model_length))
330
+ atoms.coord[:, :, 0] = (
331
+ atom_site["Cartn_x"]
332
+ .as_array(np.float32)
333
+ .reshape((model_count, model_length))
334
+ )
335
+ atoms.coord[:, :, 1] = (
336
+ atom_site["Cartn_y"]
337
+ .as_array(np.float32)
338
+ .reshape((model_count, model_length))
339
+ )
340
+ atoms.coord[:, :, 2] = (
341
+ atom_site["Cartn_z"]
342
+ .as_array(np.float32)
343
+ .reshape((model_count, model_length))
344
+ )
299
345
 
300
346
  box = _get_box(block)
301
347
  if box is not None:
@@ -325,31 +371,25 @@ def get_structure(pdbx_file, model=None, data_block=None, altloc="first",
325
371
  atoms.box = _get_box(block)
326
372
 
327
373
  # The below part is the same for both, AtomArray and AtomArrayStack
328
- _fill_annotations(
329
- atoms, model_atom_site, extra_fields, use_author_fields
330
- )
374
+ _fill_annotations(atoms, model_atom_site, extra_fields, use_author_fields)
331
375
  if include_bonds:
332
376
  if "chem_comp_bond" in block:
333
377
  try:
334
- custom_bond_dict = _parse_intra_residue_bonds(
335
- block["chem_comp_bond"]
336
- )
378
+ custom_bond_dict = _parse_intra_residue_bonds(block["chem_comp_bond"])
337
379
  except KeyError:
338
380
  warnings.warn(
339
381
  "The 'chem_comp_bond' category has missing columns, "
340
382
  "falling back to using Chemical Component Dictionary",
341
- UserWarning
383
+ UserWarning,
342
384
  )
343
385
  custom_bond_dict = None
344
- bonds = connect_via_residue_names(
345
- atoms, custom_bond_dict=custom_bond_dict
346
- )
386
+ bonds = connect_via_residue_names(atoms, custom_bond_dict=custom_bond_dict)
347
387
  else:
348
388
  bonds = connect_via_residue_names(atoms)
349
389
  if "struct_conn" in block:
350
- bonds = bonds.merge(_parse_inter_residue_bonds(
351
- model_atom_site, block["struct_conn"]
352
- ))
390
+ bonds = bonds.merge(
391
+ _parse_inter_residue_bonds(model_atom_site, block["struct_conn"])
392
+ )
353
393
  atoms.bonds = bonds
354
394
  atoms = _filter_altloc(atoms, model_atom_site, altloc)
355
395
 
@@ -357,10 +397,6 @@ def get_structure(pdbx_file, model=None, data_block=None, altloc="first",
357
397
 
358
398
 
359
399
  def _get_block(pdbx_component, block_name):
360
- if isinstance(pdbx_component, PDBxFile):
361
- # The deprecated 'PDBxFile' is a thin wrapper around 'CIFFile'
362
- pdbx_component = pdbx_component.cif_file
363
-
364
400
  if not isinstance(pdbx_component, (CIFBlock, BinaryCIFBlock)):
365
401
  # Determine block
366
402
  if block_name is None:
@@ -372,24 +408,24 @@ def _get_block(pdbx_component, block_name):
372
408
 
373
409
 
374
410
  def _get_or_fallback(category, key, fallback_key):
375
- """
376
- Return column related to key in category if it exists,
377
- otherwise try to get the column related to fallback key.
378
- """
379
- if key not in category:
380
- warnings.warn(
381
- f"Attribute '{key}' not found within 'atom_site' category. "
382
- f"The fallback attribute '{fallback_key}' will be used instead",
383
- UserWarning
384
- )
385
- try:
386
- return category[fallback_key]
387
- except KeyError as key_exc:
388
- raise InvalidFileError(
389
- f"Fallback attribute '{fallback_key}' not found within "
390
- "'atom_site' category"
391
- ) from key_exc
392
- return category[key]
411
+ """
412
+ Return column related to key in category if it exists,
413
+ otherwise try to get the column related to fallback key.
414
+ """
415
+ if key not in category:
416
+ warnings.warn(
417
+ f"Attribute '{key}' not found within 'atom_site' category. "
418
+ f"The fallback attribute '{fallback_key}' will be used instead",
419
+ UserWarning,
420
+ )
421
+ try:
422
+ return category[fallback_key]
423
+ except KeyError as key_exc:
424
+ raise InvalidFileError(
425
+ f"Fallback attribute '{fallback_key}' not found within "
426
+ "'atom_site' category"
427
+ ) from key_exc
428
+ return category[key]
393
429
 
394
430
 
395
431
  def _fill_annotations(array, atom_site, extra_fields, use_author_fields):
@@ -408,78 +444,52 @@ def _fill_annotations(array, atom_site, extra_fields, use_author_fields):
408
444
  instead of ``label_``.
409
445
  """
410
446
 
411
- prefix, alt_prefix = (
412
- ("auth", "label") if use_author_fields else ("label", "auth")
413
- )
447
+ prefix, alt_prefix = ("auth", "label") if use_author_fields else ("label", "auth")
414
448
 
415
449
  array.set_annotation(
416
450
  "chain_id",
417
451
  _get_or_fallback(
418
452
  atom_site, f"{prefix}_asym_id", f"{alt_prefix}_asym_id"
419
- ).as_array("U4")
453
+ ).as_array(str),
420
454
  )
421
455
  array.set_annotation(
422
456
  "res_id",
423
457
  _get_or_fallback(
424
458
  atom_site, f"{prefix}_seq_id", f"{alt_prefix}_seq_id"
425
- ).as_array(int, -1)
426
- )
427
- array.set_annotation(
428
- "ins_code",
429
- atom_site["pdbx_PDB_ins_code"].as_array("U1", "")
459
+ ).as_array(int, -1),
430
460
  )
461
+ array.set_annotation("ins_code", atom_site["pdbx_PDB_ins_code"].as_array(str, ""))
431
462
  array.set_annotation(
432
463
  "res_name",
433
464
  _get_or_fallback(
434
465
  atom_site, f"{prefix}_comp_id", f"{alt_prefix}_comp_id"
435
- ).as_array("U5")
436
- )
437
- array.set_annotation(
438
- "hetero",
439
- atom_site["group_PDB"].as_array(str) == "HETATM"
466
+ ).as_array(str),
440
467
  )
468
+ array.set_annotation("hetero", atom_site["group_PDB"].as_array(str) == "HETATM")
441
469
  array.set_annotation(
442
470
  "atom_name",
443
471
  _get_or_fallback(
444
472
  atom_site, f"{prefix}_atom_id", f"{alt_prefix}_atom_id"
445
- ).as_array("U6")
446
- )
447
- array.set_annotation(
448
- "element",
449
- atom_site["type_symbol"].as_array("U2")
473
+ ).as_array(str),
450
474
  )
475
+ array.set_annotation("element", atom_site["type_symbol"].as_array(str))
451
476
 
452
477
  if "atom_id" in extra_fields:
453
- array.set_annotation(
454
- "atom_id",
455
- atom_site["id"].as_array(int)
456
- )
478
+ array.set_annotation("atom_id", atom_site["id"].as_array(int))
457
479
  extra_fields.remove("atom_id")
458
480
  if "b_factor" in extra_fields:
459
- array.set_annotation(
460
- "b_factor",
461
- atom_site["B_iso_or_equiv"].as_array(float)
462
- )
481
+ array.set_annotation("b_factor", atom_site["B_iso_or_equiv"].as_array(float))
463
482
  extra_fields.remove("b_factor")
464
483
  if "occupancy" in extra_fields:
465
- array.set_annotation(
466
- "occupancy",
467
- atom_site["occupancy"].as_array(float)
468
- )
484
+ array.set_annotation("occupancy", atom_site["occupancy"].as_array(float))
469
485
  extra_fields.remove("occupancy")
470
486
  if "charge" in extra_fields:
471
- array.set_annotation(
472
- "charge",
473
- atom_site["pdbx_formal_charge"].as_array(int, 0)
474
- )
487
+ array.set_annotation("charge", atom_site["pdbx_formal_charge"].as_array(int, 0))
475
488
  extra_fields.remove("charge")
476
489
 
477
490
  # Handle all remaining custom fields
478
491
  for field in extra_fields:
479
- array.set_annotation(
480
- field,
481
- atom_site[field].as_array(str)
482
- )
492
+ array.set_annotation(field, atom_site[field].as_array(str))
483
493
 
484
494
 
485
495
  def _parse_intra_residue_bonds(chem_comp_bond):
@@ -493,7 +503,7 @@ def _parse_intra_residue_bonds(chem_comp_bond):
493
503
  chem_comp_bond["atom_id_1"].as_array(str),
494
504
  chem_comp_bond["atom_id_2"].as_array(str),
495
505
  chem_comp_bond["value_order"].as_array(str),
496
- chem_comp_bond["pdbx_aromatic_flag"].as_array(str)
506
+ chem_comp_bond["pdbx_aromatic_flag"].as_array(str),
497
507
  ):
498
508
  if res_name not in custom_bond_dict:
499
509
  custom_bond_dict[res_name] = {}
@@ -514,33 +524,32 @@ def _parse_inter_residue_bonds(atom_site, struct_conn):
514
524
  IDENTITY = "1_555"
515
525
  # Columns in 'atom_site' that should be matched by 'struct_conn'
516
526
  COLUMNS = [
517
- "label_asym_id", "label_comp_id", "label_seq_id", "label_atom_id",
518
- "label_alt_id", "auth_asym_id", "auth_comp_id", "auth_seq_id",
519
- "pdbx_PDB_ins_code"
527
+ "label_asym_id",
528
+ "label_comp_id",
529
+ "label_seq_id",
530
+ "label_atom_id",
531
+ "label_alt_id",
532
+ "auth_asym_id",
533
+ "auth_comp_id",
534
+ "auth_seq_id",
535
+ "pdbx_PDB_ins_code",
520
536
  ]
521
537
 
522
538
  covale_mask = np.isin(
523
539
  struct_conn["conn_type_id"].as_array(str), PDBX_COVALENT_TYPES
524
540
  )
525
541
  if "ptnr1_symmetry" in struct_conn:
526
- covale_mask &= (
527
- struct_conn["ptnr1_symmetry"].as_array(str, IDENTITY) == IDENTITY
528
- )
542
+ covale_mask &= struct_conn["ptnr1_symmetry"].as_array(str, IDENTITY) == IDENTITY
529
543
  if "ptnr2_symmetry" in struct_conn:
530
- covale_mask &= (
531
- struct_conn["ptnr2_symmetry"].as_array(str, IDENTITY) == IDENTITY
532
- )
544
+ covale_mask &= struct_conn["ptnr2_symmetry"].as_array(str, IDENTITY) == IDENTITY
533
545
 
534
546
  atom_indices = [None] * 2
535
547
  for i in range(2):
536
548
  reference_arrays = []
537
549
  query_arrays = []
538
550
  for col_name in COLUMNS:
539
- struct_conn_col_name = _get_struct_conn_col_name(col_name, i+1)
540
- if (
541
- col_name not in atom_site
542
- or struct_conn_col_name not in struct_conn
543
- ):
551
+ struct_conn_col_name = _get_struct_conn_col_name(col_name, i + 1)
552
+ if col_name not in atom_site or struct_conn_col_name not in struct_conn:
544
553
  continue
545
554
  # Ensure both arrays have the same dtype to allow comparison
546
555
  reference = atom_site[col_name].as_array()
@@ -568,7 +577,7 @@ def _parse_inter_residue_bonds(atom_site, struct_conn):
568
577
  atoms_indices_2 = atoms_indices_2[mapping_exists_mask]
569
578
 
570
579
  # Interpret missing values as ANY bonds
571
- bond_order = struct_conn["pdbx_value_order"].as_array("U4", "")
580
+ bond_order = struct_conn["pdbx_value_order"].as_array(str, "")
572
581
  # Consecutively apply the same masks as applied to the atom indices
573
582
  # Logical combination does not work here,
574
583
  # as the second mask was created based on already filtered data
@@ -577,7 +586,7 @@ def _parse_inter_residue_bonds(atom_site, struct_conn):
577
586
 
578
587
  return BondList(
579
588
  atom_site.row_count,
580
- np.stack([atoms_indices_1, atoms_indices_2, bond_types], axis=-1)
589
+ np.stack([atoms_indices_1, atoms_indices_2, bond_types], axis=-1),
581
590
  )
582
591
 
583
592
 
@@ -587,10 +596,13 @@ def _find_matches(query_arrays, reference_arrays):
587
596
  `reference_arrays` where all query values the reference counterpart.
588
597
  If no match is found for a query, the corresponding index is -1.
589
598
  """
590
- match_masks_for_all_columns = np.stack([
591
- query[:, np.newaxis] == reference[np.newaxis, :]
592
- for query, reference in zip(query_arrays, reference_arrays)
593
- ], axis=-1)
599
+ match_masks_for_all_columns = np.stack(
600
+ [
601
+ query[:, np.newaxis] == reference[np.newaxis, :]
602
+ for query, reference in zip(query_arrays, reference_arrays)
603
+ ],
604
+ axis=-1,
605
+ )
594
606
  match_masks = np.all(match_masks_for_all_columns, axis=-1)
595
607
  query_matches, reference_matches = np.where(match_masks)
596
608
 
@@ -664,14 +676,8 @@ def _filter_model(atom_site, model_starts, model):
664
676
  Reduce the ``atom_site`` category to the values for the given
665
677
  model.
666
678
  """
667
- Category = type(atom_site)
668
- Column = Category.subcomponent_class()
669
- Data = Column.subcomponent_class()
670
-
671
679
  # Append exclusive stop
672
- model_starts = np.append(
673
- model_starts, [atom_site.row_count]
674
- )
680
+ model_starts = np.append(model_starts, [atom_site.row_count])
675
681
  # Indexing starts at 0, but model number starts at 1
676
682
  model_index = model - 1
677
683
  index = slice(model_starts[model_index], model_starts[model_index + 1])
@@ -757,9 +763,7 @@ def set_structure(pdbx_file, array, data_block=None, include_bonds=False):
757
763
  # Fill PDBx columns from information
758
764
  # in structures' attribute arrays as good as possible
759
765
  atom_site = Category()
760
- atom_site["group_PDB"] = np.where(
761
- array.hetero, "HETATM", "ATOM"
762
- )
766
+ atom_site["group_PDB"] = np.where(array.hetero, "HETATM", "ATOM")
763
767
  atom_site["type_symbol"] = np.copy(array.element)
764
768
  atom_site["label_atom_id"] = np.copy(array.atom_name)
765
769
  atom_site["label_alt_id"] = Column(
@@ -773,7 +777,7 @@ def set_structure(pdbx_file, array, data_block=None, include_bonds=False):
773
777
  atom_site["label_seq_id"] = np.copy(array.res_id)
774
778
  atom_site["pdbx_PDB_ins_code"] = Column(
775
779
  np.copy(array.ins_code),
776
- np.where(array.ins_code == "", MaskValue.INAPPLICABLE, MaskValue.PRESENT)
780
+ np.where(array.ins_code == "", MaskValue.INAPPLICABLE, MaskValue.PRESENT),
777
781
  )
778
782
  atom_site["auth_seq_id"] = atom_site["label_seq_id"]
779
783
  atom_site["auth_comp_id"] = atom_site["label_comp_id"]
@@ -790,11 +794,11 @@ def set_structure(pdbx_file, array, data_block=None, include_bonds=False):
790
794
  if "charge" in annot_categories:
791
795
  atom_site["pdbx_formal_charge"] = Column(
792
796
  np.array([f"{c:+d}" if c != 0 else "?" for c in array.charge]),
793
- np.where(array.charge == 0, MaskValue.MISSING, MaskValue.PRESENT)
797
+ np.where(array.charge == 0, MaskValue.MISSING, MaskValue.PRESENT),
794
798
  )
795
799
 
796
800
  if array.bonds is not None:
797
- struct_conn = _set_inter_residue_bonds(array, atom_site)
801
+ struct_conn = _set_inter_residue_bonds(array, atom_site)
798
802
  if struct_conn is not None:
799
803
  block["struct_conn"] = struct_conn
800
804
  if include_bonds:
@@ -804,24 +808,20 @@ def set_structure(pdbx_file, array, data_block=None, include_bonds=False):
804
808
 
805
809
  # In case of a single model handle each coordinate
806
810
  # simply like a flattened array
807
- if type(array) == AtomArray or (
808
- type(array) == AtomArrayStack and array.stack_depth() == 1
811
+ if isinstance(array, AtomArray) or (
812
+ isinstance(array, AtomArrayStack) and array.stack_depth() == 1
809
813
  ):
810
814
  # 'ravel' flattens coord without copy
811
815
  # in case of stack with stack_depth = 1
812
816
  atom_site["Cartn_x"] = np.copy(np.ravel(array.coord[..., 0]))
813
817
  atom_site["Cartn_y"] = np.copy(np.ravel(array.coord[..., 1]))
814
818
  atom_site["Cartn_z"] = np.copy(np.ravel(array.coord[..., 2]))
815
- atom_site["pdbx_PDB_model_num"] = np.ones(
816
- array.array_length(), dtype=np.int32
817
- )
819
+ atom_site["pdbx_PDB_model_num"] = np.ones(array.array_length(), dtype=np.int32)
818
820
  # In case of multiple models repeat annotations
819
821
  # and use model specific coordinates
820
822
  else:
821
823
  atom_site = _repeat(atom_site, array.stack_depth())
822
- coord = np.reshape(
823
- array.coord, (array.stack_depth() * array.array_length(), 3)
824
- )
824
+ coord = np.reshape(array.coord, (array.stack_depth() * array.array_length(), 3))
825
825
  atom_site["Cartn_x"] = np.copy(coord[:, 0])
826
826
  atom_site["Cartn_y"] = np.copy(coord[:, 1])
827
827
  atom_site["Cartn_z"] = np.copy(coord[:, 2])
@@ -829,11 +829,9 @@ def set_structure(pdbx_file, array, data_block=None, include_bonds=False):
829
829
  np.arange(1, array.stack_depth() + 1, dtype=np.int32),
830
830
  repeats=array.array_length(),
831
831
  )
832
- if not "atom_id" in annot_categories:
832
+ if "atom_id" not in annot_categories:
833
833
  # Count from 1
834
- atom_site["id"] = np.arange(
835
- 1, len(atom_site["group_PDB"]) + 1
836
- )
834
+ atom_site["id"] = np.arange(1, len(atom_site["group_PDB"]) + 1)
837
835
  block["atom_site"] = atom_site
838
836
 
839
837
  # Write box into file
@@ -870,10 +868,6 @@ def _check_non_empty(array):
870
868
 
871
869
 
872
870
  def _get_or_create_block(pdbx_component, block_name):
873
- if isinstance(pdbx_component, PDBxFile):
874
- # The deprecated 'PDBxFile' is a thin wrapper around 'CIFFile'
875
- pdbx_component = pdbx_component.cif_file
876
-
877
871
  Block = pdbx_component.subcomponent_class()
878
872
 
879
873
  if isinstance(pdbx_component, (CIFFile, BinaryCIFFile)):
@@ -901,7 +895,7 @@ def _determine_entity_id(chain_id):
901
895
  for i in range(len(chain_id)):
902
896
  try:
903
897
  entity_id[i] = id_translation[chain_id[i]]
904
- except:
898
+ except KeyError:
905
899
  # chain_id is not in dictionary -> new entry
906
900
  id_translation[chain_id[i]] = id
907
901
  entity_id[i] = id_translation[chain_id[i]]
@@ -926,8 +920,11 @@ def _repeat(category, repetitions):
926
920
  data = Data(np.tile(column.data.array, repetitions), data_encoding)
927
921
  else:
928
922
  data = Data(np.tile(column.data.array, repetitions))
929
- mask = Data(np.tile(column.mask.array, repetitions)) \
930
- if column.mask is not None else None
923
+ mask = (
924
+ Data(np.tile(column.mask.array, repetitions))
925
+ if column.mask is not None
926
+ else None
927
+ )
931
928
  category_dict[key] = Column(data, mask)
932
929
  return Category(category_dict)
933
930
 
@@ -967,28 +964,37 @@ def _set_intra_residue_bonds(array, atom_site):
967
964
  aromatic_flag[i] = aromatic
968
965
  any_mask = bond_array[:, 2] == BondType.ANY
969
966
 
970
- chem_comp_bond = Category()
967
+ # Remove already existing residue and atom name combinations
968
+ # These appear when the structure contains a residue multiple times
969
+ atom_id_1 = array.atom_name[bond_array[:, 0]]
970
+ atom_id_2 = array.atom_name[bond_array[:, 1]]
971
971
  # Take the residue name from the first atom index, as the residue
972
972
  # name is the same for both atoms, since we have only intra bonds
973
- chem_comp_bond["comp_id"] = array.res_name[bond_array[:, 0]]
974
- chem_comp_bond["atom_id_1"] = array.atom_name[bond_array[:, 0]]
975
- chem_comp_bond["atom_id_2"] = array.atom_name[bond_array[:, 1]]
973
+ comp_id = array.res_name[bond_array[:, 0]]
974
+ _, unique_indices = np.unique(
975
+ np.stack([comp_id, atom_id_1, atom_id_2], axis=-1), axis=0, return_index=True
976
+ )
977
+ unique_indices.sort()
978
+
979
+ chem_comp_bond = Category()
980
+ n_bonds = len(unique_indices)
981
+ chem_comp_bond["pdbx_ordinal"] = np.arange(1, n_bonds + 1, dtype=np.int32)
982
+ chem_comp_bond["comp_id"] = comp_id[unique_indices]
983
+ chem_comp_bond["atom_id_1"] = atom_id_1[unique_indices]
984
+ chem_comp_bond["atom_id_2"] = atom_id_2[unique_indices]
976
985
  chem_comp_bond["value_order"] = Column(
977
- value_order,
978
- np.where(any_mask, MaskValue.MISSING, MaskValue.PRESENT)
986
+ value_order[unique_indices],
987
+ np.where(any_mask[unique_indices], MaskValue.MISSING, MaskValue.PRESENT),
979
988
  )
980
989
  chem_comp_bond["pdbx_aromatic_flag"] = Column(
981
- aromatic_flag,
982
- np.where(any_mask, MaskValue.MISSING, MaskValue.PRESENT)
990
+ aromatic_flag[unique_indices],
991
+ np.where(any_mask[unique_indices], MaskValue.MISSING, MaskValue.PRESENT),
983
992
  )
984
993
  # BondList does not contain stereo information
985
994
  # -> all values are missing
986
995
  chem_comp_bond["pdbx_stereo_config"] = Column(
987
- np.zeros(len(bond_array), dtype="U1"),
988
- np.full(len(bond_array), MaskValue.MISSING)
989
- )
990
- chem_comp_bond["pdbx_ordinal"] = np.arange(
991
- 1, len(bond_array) + 1, dtype=np.int32
996
+ np.zeros(n_bonds, dtype="U1"),
997
+ np.full(n_bonds, MaskValue.MISSING),
992
998
  )
993
999
  return chem_comp_bond
994
1000
 
@@ -1001,8 +1007,11 @@ def _set_inter_residue_bonds(array, atom_site):
1001
1007
  ``atom_site`` category.
1002
1008
  """
1003
1009
  COLUMNS = [
1004
- "label_asym_id", "label_comp_id", "label_seq_id", "label_atom_id",
1005
- "pdbx_PDB_ins_code"
1010
+ "label_asym_id",
1011
+ "label_comp_id",
1012
+ "label_seq_id",
1013
+ "label_atom_id",
1014
+ "pdbx_PDB_ins_code",
1006
1015
  ]
1007
1016
 
1008
1017
  Category = type(atom_site)
@@ -1011,17 +1020,17 @@ def _set_inter_residue_bonds(array, atom_site):
1011
1020
  bond_array = _filter_bonds(array, "inter")
1012
1021
  if len(bond_array) == 0:
1013
1022
  return None
1023
+
1014
1024
  struct_conn = Category()
1015
1025
  struct_conn["id"] = np.arange(1, len(bond_array) + 1)
1016
1026
  struct_conn["conn_type_id"] = np.full(len(bond_array), "covale")
1017
1027
  struct_conn["pdbx_value_order"] = Column(
1018
- np.array(
1019
- [PDBX_BOND_TYPE_TO_ORDER[btype] for btype in bond_array[:, 2]]
1020
- ),
1028
+ np.array([PDBX_BOND_TYPE_TO_ORDER[btype] for btype in bond_array[:, 2]]),
1021
1029
  np.where(
1022
1030
  bond_array[:, 2] == BondType.ANY,
1023
- MaskValue.MISSING, MaskValue.PRESENT,
1024
- )
1031
+ MaskValue.MISSING,
1032
+ MaskValue.PRESENT,
1033
+ ),
1025
1034
  )
1026
1035
  # Write the identifying annotation...
1027
1036
  for col_name in COLUMNS:
@@ -1029,8 +1038,9 @@ def _set_inter_residue_bonds(array, atom_site):
1029
1038
  # ...for each bond partner
1030
1039
  for i in range(2):
1031
1040
  atom_indices = bond_array[:, i]
1032
- struct_conn[_get_struct_conn_col_name(col_name, i+1)] \
1033
- = annot[atom_indices]
1041
+ struct_conn[_get_struct_conn_col_name(col_name, i + 1)] = annot[
1042
+ atom_indices
1043
+ ]
1034
1044
  return struct_conn
1035
1045
 
1036
1046
 
@@ -1042,9 +1052,9 @@ def _filter_bonds(array, connection):
1042
1052
  bond_array = array.bonds.as_array()
1043
1053
  # To save computation time call 'get_residue_starts_for()' only once
1044
1054
  # with indices of the first and second atom of each bond
1045
- residue_starts_1, residue_starts_2 = get_residue_starts_for(
1046
- array, bond_array[:, :2].flatten()
1047
- ).reshape(-1, 2).T
1055
+ residue_starts_1, residue_starts_2 = (
1056
+ get_residue_starts_for(array, bond_array[:, :2].flatten()).reshape(-1, 2).T
1057
+ )
1048
1058
  if connection == "intra":
1049
1059
  return bond_array[residue_starts_1 == residue_starts_2]
1050
1060
  elif connection == "inter":
@@ -1053,12 +1063,11 @@ def _filter_bonds(array, connection):
1053
1063
  raise ValueError("Invalid 'connection' option")
1054
1064
 
1055
1065
 
1056
- def get_component(pdbx_file, data_block=None, use_ideal_coord=True,
1057
- res_name=None):
1066
+ def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=None):
1058
1067
  """
1059
1068
  Create an :class:`AtomArray` for a chemical component from the
1060
1069
  ``chem_comp_atom`` and, if available, the ``chem_comp_bond``
1061
- category in a :class:`PDBxFile`.
1070
+ category in a file.
1062
1071
 
1063
1072
  Parameters
1064
1073
  ----------
@@ -1140,12 +1149,11 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True,
1140
1149
 
1141
1150
  array = AtomArray(atom_category.row_count)
1142
1151
 
1143
- array.hetero[:] = True
1144
- array.res_name = atom_category["comp_id"].as_array("U5")
1145
- array.atom_name = atom_category["atom_id"].as_array("U6")
1146
- array.element = atom_category["type_symbol"].as_array("U2")
1147
- array.add_annotation("charge", int)
1148
- array.charge = atom_category["charge"].as_array(int, 0)
1152
+ array.set_annotation("hetero", np.full(len(atom_category["comp_id"]), True))
1153
+ array.set_annotation("res_name", atom_category["comp_id"].as_array(str))
1154
+ array.set_annotation("atom_name", atom_category["atom_id"].as_array(str))
1155
+ array.set_annotation("element", atom_category["type_symbol"].as_array(str))
1156
+ array.set_annotation("charge", atom_category["charge"].as_array(int, 0))
1149
1157
 
1150
1158
  coord_fields = [f"pdbx_model_Cartn_{dim}_ideal" for dim in ("x", "y", "z")]
1151
1159
  alt_coord_fields = [f"model_Cartn_{dim}" for dim in ("x", "y", "z")]
@@ -1154,16 +1162,16 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True,
1154
1162
  coord_fields, alt_coord_fields = alt_coord_fields, coord_fields
1155
1163
  try:
1156
1164
  for i, field in enumerate(coord_fields):
1157
- array.coord[:,i] = atom_category[field].as_array(np.float32)
1165
+ array.coord[:, i] = atom_category[field].as_array(np.float32)
1158
1166
  except KeyError as err:
1159
1167
  key = err.args[0]
1160
1168
  warnings.warn(
1161
1169
  f"Attribute '{key}' not found within 'chem_comp_atom' category. "
1162
1170
  f"The fallback coordinates will be used instead",
1163
- UserWarning
1171
+ UserWarning,
1164
1172
  )
1165
1173
  for i, field in enumerate(alt_coord_fields):
1166
- array.coord[:,i] = atom_category[field].as_array(np.float32)
1174
+ array.coord[:, i] = atom_category[field].as_array(np.float32)
1167
1175
 
1168
1176
  try:
1169
1177
  bond_category = block["chem_comp_bond"]
@@ -1173,9 +1181,8 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True,
1173
1181
  )
1174
1182
  except KeyError:
1175
1183
  warnings.warn(
1176
- f"Category 'chem_comp_bond' not found. "
1177
- f"No bonds will be parsed",
1178
- UserWarning
1184
+ "Category 'chem_comp_bond' not found. " "No bonds will be parsed",
1185
+ UserWarning,
1179
1186
  )
1180
1187
  else:
1181
1188
  bonds = BondList(array.array_length())
@@ -1183,7 +1190,7 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True,
1183
1190
  bond_category["atom_id_1"].as_array(str),
1184
1191
  bond_category["atom_id_2"].as_array(str),
1185
1192
  bond_category["value_order"].as_array(str),
1186
- bond_category["pdbx_aromatic_flag"].as_array(str)
1193
+ bond_category["pdbx_aromatic_flag"].as_array(str),
1187
1194
  ):
1188
1195
  atom_i = np.where(array.atom_name == atom1)[0][0]
1189
1196
  atom_j = np.where(array.atom_name == atom2)[0][0]
@@ -1225,9 +1232,7 @@ def set_component(pdbx_file, array, data_block=None):
1225
1232
  Category = block.subcomponent_class()
1226
1233
 
1227
1234
  if get_residue_count(array) > 1:
1228
- raise BadStructureError(
1229
- "The input atom array must comprise only one residue"
1230
- )
1235
+ raise BadStructureError("The input atom array must comprise only one residue")
1231
1236
  res_name = array.res_name[0]
1232
1237
 
1233
1238
  annot_categories = array.get_annotation_categories()
@@ -1250,31 +1255,28 @@ def set_component(pdbx_file, array, data_block=None):
1250
1255
  atom_cat["pdbx_model_Cartn_z_ideal"] = atom_cat["model_Cartn_z"]
1251
1256
  atom_cat["pdbx_component_atom_id"] = atom_cat["atom_id"]
1252
1257
  atom_cat["pdbx_component_comp_id"] = atom_cat["comp_id"]
1253
- atom_cat["pdbx_ordinal"] = np.arange(
1254
- 1, array.array_length() + 1
1255
- ).astype(str)
1258
+ atom_cat["pdbx_ordinal"] = np.arange(1, array.array_length() + 1).astype(str)
1256
1259
  block["chem_comp_atom"] = atom_cat
1257
1260
 
1258
1261
  if array.bonds is not None and array.bonds.get_bond_count() > 0:
1259
1262
  bond_array = array.bonds.as_array()
1260
1263
  order_flags = []
1261
1264
  aromatic_flags = []
1262
- for bond_type in bond_array[:,2]:
1265
+ for bond_type in bond_array[:, 2]:
1263
1266
  order_flag, aromatic_flag = COMP_BOND_TYPE_TO_ORDER[bond_type]
1264
1267
  order_flags.append(order_flag)
1265
1268
  aromatic_flags.append(aromatic_flag)
1266
1269
 
1267
1270
  bond_cat = Category()
1268
1271
  bond_cat["comp_id"] = np.full(len(bond_array), res_name)
1269
- bond_cat["atom_id_1"] = array.atom_name[bond_array[:,0]]
1270
- bond_cat["atom_id_2"] = array.atom_name[bond_array[:,1]]
1272
+ bond_cat["atom_id_1"] = array.atom_name[bond_array[:, 0]]
1273
+ bond_cat["atom_id_2"] = array.atom_name[bond_array[:, 1]]
1271
1274
  bond_cat["value_order"] = np.array(order_flags)
1272
1275
  bond_cat["pdbx_aromatic_flag"] = np.array(aromatic_flags)
1273
- bond_cat["pdbx_ordinal"] = np.arange(
1274
- 1, len(bond_array) + 1
1275
- ).astype(str)
1276
+ bond_cat["pdbx_ordinal"] = np.arange(1, len(bond_array) + 1).astype(str)
1276
1277
  block["chem_comp_bond"] = bond_cat
1277
1278
 
1279
+
1278
1280
  def list_assemblies(pdbx_file, data_block=None):
1279
1281
  """
1280
1282
  List the biological assemblies that are available for the structure
@@ -1325,14 +1327,21 @@ def list_assemblies(pdbx_file, data_block=None):
1325
1327
  id: details
1326
1328
  for id, details in zip(
1327
1329
  assembly_category["id"].as_array(str),
1328
- assembly_category["details"].as_array(str)
1330
+ assembly_category["details"].as_array(str),
1329
1331
  )
1330
1332
  }
1331
1333
 
1332
1334
 
1333
- def get_assembly(pdbx_file, assembly_id=None, model=None, data_block=None,
1334
- altloc="first", extra_fields=None, use_author_fields=True,
1335
- include_bonds=False):
1335
+ def get_assembly(
1336
+ pdbx_file,
1337
+ assembly_id=None,
1338
+ model=None,
1339
+ data_block=None,
1340
+ altloc="first",
1341
+ extra_fields=None,
1342
+ use_author_fields=True,
1343
+ include_bonds=False,
1344
+ ):
1336
1345
  """
1337
1346
  Build the given biological assembly.
1338
1347
 
@@ -1389,7 +1398,7 @@ def get_assembly(pdbx_file, assembly_id=None, model=None, data_block=None,
1389
1398
  for example both, ``label_seq_id`` and ``auth_seq_id`` describe
1390
1399
  the ID of the residue.
1391
1400
  While, the ``label_xxx`` fields can be used as official pointers
1392
- to other categories in the :class:`PDBxFile`, the ``auth_xxx``
1401
+ to other categories in the file, the ``auth_xxx``
1393
1402
  fields are set by the author(s) of the structure and are
1394
1403
  consistent with the corresponding values in PDB files.
1395
1404
  If `use_author_fields` is true, the annotation arrays will be
@@ -1422,9 +1431,7 @@ def get_assembly(pdbx_file, assembly_id=None, model=None, data_block=None,
1422
1431
  try:
1423
1432
  assembly_gen_category = block["pdbx_struct_assembly_gen"]
1424
1433
  except KeyError:
1425
- raise InvalidFileError(
1426
- "File has no 'pdbx_struct_assembly_gen' category"
1427
- )
1434
+ raise InvalidFileError("File has no 'pdbx_struct_assembly_gen' category")
1428
1435
 
1429
1436
  try:
1430
1437
  struct_oper_category = block["pdbx_struct_oper_list"]
@@ -1457,7 +1464,7 @@ def get_assembly(pdbx_file, assembly_id=None, model=None, data_block=None,
1457
1464
  altloc,
1458
1465
  extra_fields_and_asym,
1459
1466
  use_author_fields,
1460
- include_bonds
1467
+ include_bonds,
1461
1468
  )
1462
1469
 
1463
1470
  ### Get transformations and apply them to the affected asym IDs
@@ -1473,9 +1480,7 @@ def get_assembly(pdbx_file, assembly_id=None, model=None, data_block=None,
1473
1480
  operations = _parse_operation_expression(op_expr)
1474
1481
  asym_ids = asym_id_expr.split(",")
1475
1482
  # Filter affected asym IDs
1476
- sub_structure = structure[
1477
- ..., np.isin(structure.label_asym_id, asym_ids)
1478
- ]
1483
+ sub_structure = structure[..., np.isin(structure.label_asym_id, asym_ids)]
1479
1484
  sub_assembly = _apply_transformations(
1480
1485
  sub_structure, transformations, operations
1481
1486
  )
@@ -1534,10 +1539,9 @@ def _get_transformations(struct_oper):
1534
1539
  for i in (1, 2, 3)
1535
1540
  ]
1536
1541
  )
1537
- translation_vector = np.array([
1538
- struct_oper[f"vector[{i}]"].as_array(float)[index]
1539
- for i in (1, 2, 3)
1540
- ])
1542
+ translation_vector = np.array(
1543
+ [struct_oper[f"vector[{i}]"].as_array(float)[index] for i in (1, 2, 3)]
1544
+ )
1541
1545
  transformation_dict[id] = (rotation_matrix, translation_vector)
1542
1546
  return transformation_dict
1543
1547
 
@@ -1592,6 +1596,4 @@ def _convert_string_to_sequence(string, stype):
1592
1596
  elif stype in _other_type_list:
1593
1597
  return None
1594
1598
  else:
1595
- raise InvalidFileError(
1596
- "mmCIF _entity_poly.type unsupported" " type: " + stype
1597
- )
1599
+ raise InvalidFileError("mmCIF _entity_poly.type unsupported" " type: " + stype)