biotite 1.1.0__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (332) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +159 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +452 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +57 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +206 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/entrez/__init__.py +15 -0
  34. biotite/database/entrez/check.py +60 -0
  35. biotite/database/entrez/dbnames.py +91 -0
  36. biotite/database/entrez/download.py +229 -0
  37. biotite/database/entrez/key.py +44 -0
  38. biotite/database/entrez/query.py +262 -0
  39. biotite/database/error.py +16 -0
  40. biotite/database/pubchem/__init__.py +21 -0
  41. biotite/database/pubchem/download.py +258 -0
  42. biotite/database/pubchem/error.py +20 -0
  43. biotite/database/pubchem/query.py +830 -0
  44. biotite/database/pubchem/throttle.py +98 -0
  45. biotite/database/rcsb/__init__.py +13 -0
  46. biotite/database/rcsb/download.py +159 -0
  47. biotite/database/rcsb/query.py +964 -0
  48. biotite/database/uniprot/__init__.py +13 -0
  49. biotite/database/uniprot/check.py +40 -0
  50. biotite/database/uniprot/download.py +129 -0
  51. biotite/database/uniprot/query.py +293 -0
  52. biotite/file.py +232 -0
  53. biotite/sequence/__init__.py +84 -0
  54. biotite/sequence/align/__init__.py +203 -0
  55. biotite/sequence/align/alignment.py +680 -0
  56. biotite/sequence/align/banded.cpython-313-darwin.so +0 -0
  57. biotite/sequence/align/banded.pyx +652 -0
  58. biotite/sequence/align/buckets.py +71 -0
  59. biotite/sequence/align/cigar.py +425 -0
  60. biotite/sequence/align/kmeralphabet.cpython-313-darwin.so +0 -0
  61. biotite/sequence/align/kmeralphabet.pyx +595 -0
  62. biotite/sequence/align/kmersimilarity.cpython-313-darwin.so +0 -0
  63. biotite/sequence/align/kmersimilarity.pyx +233 -0
  64. biotite/sequence/align/kmertable.cpython-313-darwin.so +0 -0
  65. biotite/sequence/align/kmertable.pyx +3411 -0
  66. biotite/sequence/align/localgapped.cpython-313-darwin.so +0 -0
  67. biotite/sequence/align/localgapped.pyx +892 -0
  68. biotite/sequence/align/localungapped.cpython-313-darwin.so +0 -0
  69. biotite/sequence/align/localungapped.pyx +279 -0
  70. biotite/sequence/align/matrix.py +622 -0
  71. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  72. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  73. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  74. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  75. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  76. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  77. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  78. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  79. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  80. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  81. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  82. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  83. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  84. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  85. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  86. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  87. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  88. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  89. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  94. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  95. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  99. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  100. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  101. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  102. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  103. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  104. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  105. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  106. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  107. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  108. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  109. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  110. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  111. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  112. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  113. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  114. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  115. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  116. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  117. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  118. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  119. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  120. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  121. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  122. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  155. biotite/sequence/align/matrix_data/PB.license +21 -0
  156. biotite/sequence/align/matrix_data/PB.mat +18 -0
  157. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  158. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  159. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  160. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  161. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  162. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  163. biotite/sequence/align/multiple.cpython-313-darwin.so +0 -0
  164. biotite/sequence/align/multiple.pyx +620 -0
  165. biotite/sequence/align/pairwise.cpython-313-darwin.so +0 -0
  166. biotite/sequence/align/pairwise.pyx +587 -0
  167. biotite/sequence/align/permutation.cpython-313-darwin.so +0 -0
  168. biotite/sequence/align/permutation.pyx +313 -0
  169. biotite/sequence/align/primes.txt +821 -0
  170. biotite/sequence/align/selector.cpython-313-darwin.so +0 -0
  171. biotite/sequence/align/selector.pyx +954 -0
  172. biotite/sequence/align/statistics.py +264 -0
  173. biotite/sequence/align/tracetable.cpython-313-darwin.so +0 -0
  174. biotite/sequence/align/tracetable.pxd +64 -0
  175. biotite/sequence/align/tracetable.pyx +370 -0
  176. biotite/sequence/alphabet.py +555 -0
  177. biotite/sequence/annotation.py +830 -0
  178. biotite/sequence/codec.cpython-313-darwin.so +0 -0
  179. biotite/sequence/codec.pyx +155 -0
  180. biotite/sequence/codon.py +477 -0
  181. biotite/sequence/codon_tables.txt +202 -0
  182. biotite/sequence/graphics/__init__.py +33 -0
  183. biotite/sequence/graphics/alignment.py +1115 -0
  184. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  185. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  186. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  187. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  188. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  189. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  190. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  191. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  192. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  193. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  194. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  195. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  196. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  197. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  198. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  199. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  200. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  201. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  202. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  203. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  204. biotite/sequence/graphics/colorschemes.py +170 -0
  205. biotite/sequence/graphics/dendrogram.py +229 -0
  206. biotite/sequence/graphics/features.py +544 -0
  207. biotite/sequence/graphics/logo.py +104 -0
  208. biotite/sequence/graphics/plasmid.py +712 -0
  209. biotite/sequence/io/__init__.py +12 -0
  210. biotite/sequence/io/fasta/__init__.py +22 -0
  211. biotite/sequence/io/fasta/convert.py +284 -0
  212. biotite/sequence/io/fasta/file.py +265 -0
  213. biotite/sequence/io/fastq/__init__.py +19 -0
  214. biotite/sequence/io/fastq/convert.py +117 -0
  215. biotite/sequence/io/fastq/file.py +507 -0
  216. biotite/sequence/io/genbank/__init__.py +17 -0
  217. biotite/sequence/io/genbank/annotation.py +269 -0
  218. biotite/sequence/io/genbank/file.py +573 -0
  219. biotite/sequence/io/genbank/metadata.py +336 -0
  220. biotite/sequence/io/genbank/sequence.py +171 -0
  221. biotite/sequence/io/general.py +201 -0
  222. biotite/sequence/io/gff/__init__.py +26 -0
  223. biotite/sequence/io/gff/convert.py +128 -0
  224. biotite/sequence/io/gff/file.py +450 -0
  225. biotite/sequence/phylo/__init__.py +36 -0
  226. biotite/sequence/phylo/nj.cpython-313-darwin.so +0 -0
  227. biotite/sequence/phylo/nj.pyx +221 -0
  228. biotite/sequence/phylo/tree.cpython-313-darwin.so +0 -0
  229. biotite/sequence/phylo/tree.pyx +1169 -0
  230. biotite/sequence/phylo/upgma.cpython-313-darwin.so +0 -0
  231. biotite/sequence/phylo/upgma.pyx +164 -0
  232. biotite/sequence/profile.py +567 -0
  233. biotite/sequence/search.py +118 -0
  234. biotite/sequence/seqtypes.py +713 -0
  235. biotite/sequence/sequence.py +374 -0
  236. biotite/setup_ccd.py +197 -0
  237. biotite/structure/__init__.py +133 -0
  238. biotite/structure/alphabet/__init__.py +25 -0
  239. biotite/structure/alphabet/encoder.py +332 -0
  240. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  241. biotite/structure/alphabet/i3d.py +110 -0
  242. biotite/structure/alphabet/layers.py +86 -0
  243. biotite/structure/alphabet/pb.license +21 -0
  244. biotite/structure/alphabet/pb.py +171 -0
  245. biotite/structure/alphabet/unkerasify.py +122 -0
  246. biotite/structure/atoms.py +1554 -0
  247. biotite/structure/basepairs.py +1404 -0
  248. biotite/structure/bonds.cpython-313-darwin.so +0 -0
  249. biotite/structure/bonds.pyx +1972 -0
  250. biotite/structure/box.py +588 -0
  251. biotite/structure/celllist.cpython-313-darwin.so +0 -0
  252. biotite/structure/celllist.pyx +849 -0
  253. biotite/structure/chains.py +314 -0
  254. biotite/structure/charges.cpython-313-darwin.so +0 -0
  255. biotite/structure/charges.pyx +520 -0
  256. biotite/structure/compare.py +274 -0
  257. biotite/structure/density.py +109 -0
  258. biotite/structure/dotbracket.py +214 -0
  259. biotite/structure/error.py +39 -0
  260. biotite/structure/filter.py +590 -0
  261. biotite/structure/geometry.py +655 -0
  262. biotite/structure/graphics/__init__.py +13 -0
  263. biotite/structure/graphics/atoms.py +243 -0
  264. biotite/structure/graphics/rna.py +295 -0
  265. biotite/structure/hbond.py +428 -0
  266. biotite/structure/info/__init__.py +24 -0
  267. biotite/structure/info/atom_masses.json +121 -0
  268. biotite/structure/info/atoms.py +81 -0
  269. biotite/structure/info/bonds.py +149 -0
  270. biotite/structure/info/ccd.py +202 -0
  271. biotite/structure/info/components.bcif +0 -0
  272. biotite/structure/info/groups.py +131 -0
  273. biotite/structure/info/masses.py +121 -0
  274. biotite/structure/info/misc.py +138 -0
  275. biotite/structure/info/radii.py +197 -0
  276. biotite/structure/info/standardize.py +186 -0
  277. biotite/structure/integrity.py +215 -0
  278. biotite/structure/io/__init__.py +29 -0
  279. biotite/structure/io/dcd/__init__.py +13 -0
  280. biotite/structure/io/dcd/file.py +67 -0
  281. biotite/structure/io/general.py +243 -0
  282. biotite/structure/io/gro/__init__.py +14 -0
  283. biotite/structure/io/gro/file.py +344 -0
  284. biotite/structure/io/mol/__init__.py +20 -0
  285. biotite/structure/io/mol/convert.py +112 -0
  286. biotite/structure/io/mol/ctab.py +415 -0
  287. biotite/structure/io/mol/header.py +120 -0
  288. biotite/structure/io/mol/mol.py +149 -0
  289. biotite/structure/io/mol/sdf.py +914 -0
  290. biotite/structure/io/netcdf/__init__.py +13 -0
  291. biotite/structure/io/netcdf/file.py +64 -0
  292. biotite/structure/io/pdb/__init__.py +20 -0
  293. biotite/structure/io/pdb/convert.py +307 -0
  294. biotite/structure/io/pdb/file.py +1290 -0
  295. biotite/structure/io/pdb/hybrid36.cpython-313-darwin.so +0 -0
  296. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  297. biotite/structure/io/pdbqt/__init__.py +15 -0
  298. biotite/structure/io/pdbqt/convert.py +113 -0
  299. biotite/structure/io/pdbqt/file.py +688 -0
  300. biotite/structure/io/pdbx/__init__.py +23 -0
  301. biotite/structure/io/pdbx/bcif.py +656 -0
  302. biotite/structure/io/pdbx/cif.py +1075 -0
  303. biotite/structure/io/pdbx/component.py +245 -0
  304. biotite/structure/io/pdbx/compress.py +321 -0
  305. biotite/structure/io/pdbx/convert.py +1745 -0
  306. biotite/structure/io/pdbx/encoding.cpython-313-darwin.so +0 -0
  307. biotite/structure/io/pdbx/encoding.pyx +1031 -0
  308. biotite/structure/io/trajfile.py +693 -0
  309. biotite/structure/io/trr/__init__.py +13 -0
  310. biotite/structure/io/trr/file.py +43 -0
  311. biotite/structure/io/xtc/__init__.py +13 -0
  312. biotite/structure/io/xtc/file.py +43 -0
  313. biotite/structure/mechanics.py +73 -0
  314. biotite/structure/molecules.py +352 -0
  315. biotite/structure/pseudoknots.py +628 -0
  316. biotite/structure/rdf.py +245 -0
  317. biotite/structure/repair.py +304 -0
  318. biotite/structure/residues.py +572 -0
  319. biotite/structure/sasa.cpython-313-darwin.so +0 -0
  320. biotite/structure/sasa.pyx +322 -0
  321. biotite/structure/segments.py +178 -0
  322. biotite/structure/sequence.py +111 -0
  323. biotite/structure/sse.py +308 -0
  324. biotite/structure/superimpose.py +689 -0
  325. biotite/structure/transform.py +530 -0
  326. biotite/structure/util.py +168 -0
  327. biotite/version.py +16 -0
  328. biotite/visualize.py +265 -0
  329. biotite-1.1.0.dist-info/METADATA +190 -0
  330. biotite-1.1.0.dist-info/RECORD +332 -0
  331. biotite-1.1.0.dist-info/WHEEL +4 -0
  332. biotite-1.1.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,1290 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.structure.io.pdb"
6
+ __author__ = "Patrick Kunzmann, Daniel Bauer, Claude J. Rogers"
7
+ __all__ = ["PDBFile"]
8
+
9
+ import warnings
10
+ from collections import namedtuple
11
+ import numpy as np
12
+ from biotite.file import InvalidFileError, TextFile
13
+ from biotite.structure.atoms import AtomArray, AtomArrayStack, repeat
14
+ from biotite.structure.bonds import BondList, connect_via_residue_names
15
+ from biotite.structure.box import unitcell_from_vectors, vectors_from_unitcell
16
+ from biotite.structure.error import BadStructureError
17
+ from biotite.structure.filter import (
18
+ filter_first_altloc,
19
+ filter_highest_occupancy_altloc,
20
+ filter_solvent,
21
+ )
22
+ from biotite.structure.io.pdb.hybrid36 import (
23
+ decode_hybrid36,
24
+ encode_hybrid36,
25
+ max_hybrid36_number,
26
+ )
27
+ from biotite.structure.repair import infer_elements
28
+ from biotite.structure.util import matrix_rotate
29
+
30
+ _PDB_MAX_ATOMS = 99999
31
+ _PDB_MAX_RESIDUES = 9999
32
+
33
+ # slice objects for readability
34
+ # ATOM/HETATM
35
+ _record = slice(0, 6)
36
+ _atom_id = slice(6, 11)
37
+ _atom_name = slice(12, 16)
38
+ _alt_loc = slice(16, 17)
39
+ _res_name = slice(17, 20)
40
+ _chain_id = slice(21, 22)
41
+ _res_id = slice(22, 26)
42
+ _ins_code = slice(26, 27)
43
+ _coord_x = slice(30, 38)
44
+ _coord_y = slice(38, 46)
45
+ _coord_z = slice(46, 54)
46
+ _occupancy = slice(54, 60)
47
+ _temp_f = slice(60, 66)
48
+ _element = slice(76, 78)
49
+ _charge = slice(78, 80)
50
+ # CRYST1
51
+ _a = slice(6, 15)
52
+ _b = slice(15, 24)
53
+ _c = slice(24, 33)
54
+ _alpha = slice(33, 40)
55
+ _beta = slice(40, 47)
56
+ _gamma = slice(47, 54)
57
+ _space = slice(55, 66)
58
+ _z = slice(66, 70)
59
+
60
+
61
+ class PDBFile(TextFile):
62
+ r"""
63
+ This class represents a PDB file.
64
+
65
+ The usage of :mod:`biotite.structure.io.pdbx` is encouraged in favor
66
+ of this class.
67
+
68
+ This class only provides support for reading/writing the pure atom
69
+ information (*ATOM*, *HETATM*, *MODEL* and *ENDMDL* records). *TER*
70
+ records cannot be written.
71
+ Additionally, *REMARK* records can be read
72
+
73
+ See also
74
+ --------
75
+ CIFFile
76
+ BinaryCIFFile
77
+
78
+ Examples
79
+ --------
80
+ Load a `\\*.pdb` file, modify the structure and save the new
81
+ structure into a new file:
82
+
83
+ >>> import os.path
84
+ >>> file = PDBFile.read(os.path.join(path_to_structures, "1l2y.pdb"))
85
+ >>> array_stack = file.get_structure()
86
+ >>> array_stack_mod = rotate(array_stack, [1,2,3])
87
+ >>> file = PDBFile()
88
+ >>> file.set_structure(array_stack_mod)
89
+ >>> file.write(os.path.join(path_to_directory, "1l2y_mod.pdb"))
90
+ """
91
+
92
+ @classmethod
93
+ def read(cls, file):
94
+ file = super().read(file)
95
+ # Pad lines with whitespace if lines are shorter
96
+ # than the required 80 characters
97
+ file.lines = [line.ljust(80) for line in file.lines]
98
+ file._index_models_and_atoms()
99
+ return file
100
+
101
+ def get_remark(self, number):
102
+ r"""
103
+ Get the lines containing the *REMARK* records with the given
104
+ `number`.
105
+
106
+ Parameters
107
+ ----------
108
+ number : int
109
+ The *REMARK* number, i.e. the `XXX` in ``REMARK XXX``.
110
+
111
+ Returns
112
+ -------
113
+ remark_lines : None or list of str
114
+ The content of the selected *REMARK* lines.
115
+ Each line is an element of this list.
116
+ The ``REMARK XXX `` part of each line is omitted.
117
+ Furthermore, the first line, which always must be empty, is
118
+ not included.
119
+ ``None`` is returned, if the selected *REMARK* records do not
120
+ exist in the file.
121
+
122
+ Examples
123
+ --------
124
+
125
+ >>> import os.path
126
+ >>> file = PDBFile.read(os.path.join(path_to_structures, "1l2y.pdb"))
127
+ >>> remarks = file.get_remark(900)
128
+ >>> print("\n".join(remarks))
129
+ RELATED ENTRIES
130
+ RELATED ID: 5292 RELATED DB: BMRB
131
+ BMRB 5292 IS CHEMICAL SHIFTS FOR TC5B IN BUFFER AND BUFFER
132
+ CONTAINING 30 VOL-% TFE.
133
+ RELATED ID: 1JRJ RELATED DB: PDB
134
+ 1JRJ IS AN ANALAGOUS C-TERMINAL STRUCTURE.
135
+ >>> nonexistent_remark = file.get_remark(999)
136
+ >>> print(nonexistent_remark)
137
+ None
138
+ """
139
+ CONTENT_START_COLUMN = 11
140
+
141
+ # in case a non-integer is accidentally given
142
+ number = int(number)
143
+ if number < 0 or number > 999:
144
+ raise ValueError("The number must be in range 0-999")
145
+
146
+ remark_string = f"REMARK {number:>3d}"
147
+ # Find lines and omit ``REMARK XXX `` part
148
+ remark_lines = [
149
+ line[CONTENT_START_COLUMN:]
150
+ for line in self.lines
151
+ if line.startswith(remark_string)
152
+ ]
153
+ if len(remark_lines) == 0:
154
+ return None
155
+ # Remove first empty line
156
+ remark_lines = remark_lines[1:]
157
+ return remark_lines
158
+
159
+ def get_model_count(self):
160
+ """
161
+ Get the number of models contained in the PDB file.
162
+
163
+ Returns
164
+ -------
165
+ model_count : int
166
+ The number of models.
167
+ """
168
+ return len(self._model_start_i)
169
+
170
+ def get_coord(self, model=None):
171
+ """
172
+ Get only the coordinates from the PDB file.
173
+
174
+ Parameters
175
+ ----------
176
+ model : int, optional
177
+ If this parameter is given, the function will return a
178
+ 2D coordinate array from the atoms corresponding to the
179
+ given model number (starting at 1).
180
+ Negative values are used to index models starting from the
181
+ last model instead of the first model.
182
+ If this parameter is omitted, an 3D coordinate array
183
+ containing all models will be returned, even if
184
+ the structure contains only one model.
185
+
186
+ Returns
187
+ -------
188
+ coord : ndarray, shape=(m,n,3) or shape=(n,3), dtype=float
189
+ The coordinates read from the ATOM and HETATM records of the
190
+ file.
191
+
192
+ Notes
193
+ -----
194
+ Note that :func:`get_coord()` may output more coordinates than
195
+ the atom array (stack) from the corresponding
196
+ :func:`get_structure()` call has.
197
+ The reason for this is, that :func:`get_structure()` filters
198
+ *altloc* IDs, while `get_coord()` does not.
199
+
200
+ Examples
201
+ --------
202
+ Read an :class:`AtomArrayStack` from multiple PDB files, where
203
+ each PDB file contains the same atoms but different positions.
204
+ This is an efficient approach when a trajectory is spread into
205
+ multiple PDB files, as done e.g. by the *Rosetta* modeling
206
+ software.
207
+
208
+ For the purpose of this example, the PDB files are created from
209
+ an existing :class:`AtomArrayStack`.
210
+
211
+ >>> import os.path
212
+ >>> from tempfile import gettempdir
213
+ >>> file_names = []
214
+ >>> for i in range(atom_array_stack.stack_depth()):
215
+ ... pdb_file = PDBFile()
216
+ ... pdb_file.set_structure(atom_array_stack[i])
217
+ ... file_name = os.path.join(gettempdir(), f"model_{i+1}.pdb")
218
+ ... pdb_file.write(file_name)
219
+ ... file_names.append(file_name)
220
+ >>> print(file_names)
221
+ ['...model_1.pdb', '...model_2.pdb', ..., '...model_38.pdb']
222
+
223
+ Now the PDB files are used to create an :class:`AtomArrayStack`,
224
+ where each model represents a different model.
225
+
226
+ Construct a new :class:`AtomArrayStack` with annotations taken
227
+ from one of the created files used as template and coordinates
228
+ from all of the PDB files.
229
+
230
+ >>> template_file = PDBFile.read(file_names[0])
231
+ >>> template = template_file.get_structure()
232
+ >>> coord = []
233
+ >>> for i, file_name in enumerate(file_names):
234
+ ... pdb_file = PDBFile.read(file_name)
235
+ ... coord.append(pdb_file.get_coord(model=1))
236
+ >>> new_stack = from_template(template, np.array(coord))
237
+
238
+ The newly created :class:`AtomArrayStack` should now be equal to
239
+ the :class:`AtomArrayStack` the PDB files were created from.
240
+
241
+ >>> print(np.allclose(new_stack.coord, atom_array_stack.coord))
242
+ True
243
+ """
244
+ if model is None:
245
+ coord = np.zeros(
246
+ (len(self._model_start_i), self._get_model_length(), 3),
247
+ dtype=np.float32,
248
+ )
249
+ m = 0
250
+ i = 0
251
+ for line_i in self._atom_line_i:
252
+ if (
253
+ m < len(self._model_start_i) - 1
254
+ and line_i > self._model_start_i[m + 1]
255
+ ):
256
+ m += 1
257
+ i = 0
258
+ line = self.lines[line_i]
259
+ coord[m, i, 0] = float(line[_coord_x])
260
+ coord[m, i, 1] = float(line[_coord_y])
261
+ coord[m, i, 2] = float(line[_coord_z])
262
+ i += 1
263
+ return coord
264
+
265
+ else:
266
+ coord_i = self._get_atom_record_indices_for_model(model)
267
+ coord = np.zeros((len(coord_i), 3), dtype=np.float32)
268
+ for i, line_i in enumerate(coord_i):
269
+ line = self.lines[line_i]
270
+ coord[i, 0] = float(line[_coord_x])
271
+ coord[i, 1] = float(line[_coord_y])
272
+ coord[i, 2] = float(line[_coord_z])
273
+ return coord
274
+
275
+ def get_b_factor(self, model=None):
276
+ """
277
+ Get only the B-factors from the PDB file.
278
+
279
+ Parameters
280
+ ----------
281
+ model : int, optional
282
+ If this parameter is given, the function will return a
283
+ 1D B-factor array from the atoms corresponding to the
284
+ given model number (starting at 1).
285
+ Negative values are used to index models starting from the
286
+ last model instead of the first model.
287
+ If this parameter is omitted, an 2D B-factor array
288
+ containing all models will be returned, even if
289
+ the structure contains only one model.
290
+
291
+ Returns
292
+ -------
293
+ b_factor : ndarray, shape=(m,n) or shape=(n,), dtype=float
294
+ The B-factors read from the ATOM and HETATM records of the
295
+ file.
296
+
297
+ Notes
298
+ -----
299
+ Note that :func:`get_b_factor()` may output more B-factors
300
+ than the atom array (stack) from the corresponding
301
+ :func:`get_structure()` call has atoms.
302
+ The reason for this is, that :func:`get_structure()` filters
303
+ *altloc* IDs, while `get_b_factor()` does not.
304
+ """
305
+ if model is None:
306
+ b_factor = np.zeros(
307
+ (len(self._model_start_i), self._get_model_length()), dtype=np.float32
308
+ )
309
+ m = 0
310
+ i = 0
311
+ for line_i in self._atom_line_i:
312
+ if (
313
+ m < len(self._model_start_i) - 1
314
+ and line_i > self._model_start_i[m + 1]
315
+ ):
316
+ m += 1
317
+ i = 0
318
+ line = self.lines[line_i]
319
+ b_factor[m, i] = float(line[_temp_f])
320
+ i += 1
321
+ return b_factor
322
+
323
+ else:
324
+ b_factor_i = self._get_atom_record_indices_for_model(model)
325
+ b_factor = np.zeros(len(b_factor_i), dtype=np.float32)
326
+ for i, line_i in enumerate(b_factor_i):
327
+ line = self.lines[line_i]
328
+ b_factor[i] = float(line[_temp_f])
329
+ return b_factor
330
+
331
+ def get_structure(
332
+ self, model=None, altloc="first", extra_fields=[], include_bonds=False
333
+ ):
334
+ """
335
+ Get an :class:`AtomArray` or :class:`AtomArrayStack` from the PDB file.
336
+
337
+ This function parses standard base-10 PDB files as well as
338
+ hybrid-36 PDB.
339
+
340
+ Parameters
341
+ ----------
342
+ model : int, optional
343
+ If this parameter is given, the function will return an
344
+ :class:`AtomArray` from the atoms corresponding to the given
345
+ model number (starting at 1).
346
+ Negative values are used to index models starting from the
347
+ last model instead of the first model.
348
+ If this parameter is omitted, an :class:`AtomArrayStack`
349
+ containing all models will be returned, even if the
350
+ structure contains only one model.
351
+ altloc : {'first', 'occupancy', 'all'}
352
+ This parameter defines how *altloc* IDs are handled:
353
+ - ``'first'`` - Use atoms that have the first
354
+ *altloc* ID appearing in a residue.
355
+ - ``'occupancy'`` - Use atoms that have the *altloc* ID
356
+ with the highest occupancy for a residue.
357
+ - ``'all'`` - Use all atoms.
358
+ Note that this leads to duplicate atoms.
359
+ When this option is chosen, the ``altloc_id``
360
+ annotation array is added to the returned structure.
361
+ extra_fields : list of str, optional
362
+ The strings in the list are optional annotation categories
363
+ that should be stored in the output array or stack.
364
+ These are valid values:
365
+ ``'atom_id'``, ``'b_factor'``, ``'occupancy'`` and
366
+ ``'charge'``.
367
+ include_bonds : bool, optional
368
+ If set to true, a :class:`BondList` will be created for the
369
+ resulting :class:`AtomArray` containing the bond information
370
+ from the file.
371
+ Bonds, whose order could not be determined from the
372
+ *Chemical Component Dictionary*
373
+ (e.g. especially inter-residue bonds),
374
+ have :attr:`BondType.ANY`, since the PDB format itself does
375
+ not support bond orders.
376
+
377
+ Returns
378
+ -------
379
+ array : AtomArray or AtomArrayStack
380
+ The return type depends on the `model` parameter.
381
+ """
382
+ if model is None:
383
+ depth = len(self._model_start_i)
384
+ length = self._get_model_length()
385
+ array = AtomArrayStack(depth, length)
386
+ # Record indices for annotation determination
387
+ # Annotation is determined from model 1
388
+ annot_i = self._get_atom_record_indices_for_model(1)
389
+ # Record indices for coordinate determination
390
+ coord_i = self._atom_line_i
391
+
392
+ else:
393
+ annot_i = coord_i = self._get_atom_record_indices_for_model(model)
394
+ array = AtomArray(len(coord_i))
395
+
396
+ # Create mandatory and optional annotation arrays
397
+ chain_id = np.zeros(array.array_length(), array.chain_id.dtype)
398
+ res_id = np.zeros(array.array_length(), array.res_id.dtype)
399
+ ins_code = np.zeros(array.array_length(), array.ins_code.dtype)
400
+ res_name = np.zeros(array.array_length(), array.res_name.dtype)
401
+ hetero = np.zeros(array.array_length(), array.hetero.dtype)
402
+ atom_name = np.zeros(array.array_length(), array.atom_name.dtype)
403
+ element = np.zeros(array.array_length(), array.element.dtype)
404
+ atom_id_raw = np.zeros(array.array_length(), "U5")
405
+ charge_raw = np.zeros(array.array_length(), "U2")
406
+ occupancy = np.zeros(array.array_length(), float)
407
+ b_factor = np.zeros(array.array_length(), float)
408
+ altloc_id = np.zeros(array.array_length(), dtype="U1")
409
+
410
+ # Fill annotation array
411
+ # i is index in array, line_i is line index
412
+ for i, line_i in enumerate(annot_i):
413
+ line = self.lines[line_i]
414
+ chain_id[i] = line[_chain_id].strip()
415
+ res_id[i] = decode_hybrid36(line[_res_id])
416
+ ins_code[i] = line[_ins_code].strip()
417
+ res_name[i] = line[_res_name].strip()
418
+ hetero[i] = line[_record] == "HETATM"
419
+ atom_name[i] = line[_atom_name].strip()
420
+ element[i] = line[_element].strip()
421
+ altloc_id[i] = line[_alt_loc]
422
+ atom_id_raw[i] = line[_atom_id]
423
+ # turn "1-" into "-1", if necessary
424
+ if line[_charge][0] in "+-":
425
+ charge_raw[i] = line[_charge]
426
+ else:
427
+ charge_raw[i] = line[_charge][::-1]
428
+ occupancy[i] = float(line[_occupancy].strip())
429
+ b_factor[i] = float(line[_temp_f].strip())
430
+
431
+ if include_bonds or (extra_fields is not None and "atom_id" in extra_fields):
432
+ # The atom IDs are only required in these two cases
433
+ atom_id = np.array(
434
+ [decode_hybrid36(raw_id.item()) for raw_id in atom_id_raw], dtype=int
435
+ )
436
+ else:
437
+ atom_id = None
438
+
439
+ # Add annotation arrays to atom array (stack)
440
+ array.chain_id = chain_id
441
+ array.res_id = res_id
442
+ array.ins_code = ins_code
443
+ array.res_name = res_name
444
+ array.hetero = hetero
445
+ array.atom_name = atom_name
446
+ array.element = element
447
+
448
+ for field in extra_fields if extra_fields is not None else []:
449
+ if field == "atom_id":
450
+ # Copy is necessary to avoid double masking in
451
+ # later altloc ID filtering
452
+ array.set_annotation("atom_id", atom_id.copy())
453
+ elif field == "charge":
454
+ charge = np.array(charge_raw)
455
+ array.set_annotation(
456
+ "charge", np.where(charge == " ", "0", charge).astype(int)
457
+ )
458
+ elif field == "occupancy":
459
+ array.set_annotation("occupancy", occupancy)
460
+ elif field == "b_factor":
461
+ array.set_annotation("b_factor", b_factor)
462
+ else:
463
+ raise ValueError(f"Unknown extra field: {field}")
464
+
465
+ # Replace empty strings for elements with guessed types
466
+ # This is used e.g. for PDB files created by Gromacs
467
+ empty_element_mask = array.element == ""
468
+ if empty_element_mask.any():
469
+ warnings.warn(
470
+ f"{np.count_nonzero(empty_element_mask)} elements "
471
+ "were guessed from atom name"
472
+ )
473
+ array.element[empty_element_mask] = infer_elements(
474
+ array.atom_name[empty_element_mask]
475
+ )
476
+
477
+ # Fill in coordinates
478
+ if isinstance(array, AtomArray):
479
+ for i, line_i in enumerate(coord_i):
480
+ line = self.lines[line_i]
481
+ array.coord[i, 0] = float(line[_coord_x])
482
+ array.coord[i, 1] = float(line[_coord_y])
483
+ array.coord[i, 2] = float(line[_coord_z])
484
+
485
+ elif isinstance(array, AtomArrayStack):
486
+ m = 0
487
+ i = 0
488
+ for line_i in self._atom_line_i:
489
+ if (
490
+ m < len(self._model_start_i) - 1
491
+ and line_i > self._model_start_i[m + 1]
492
+ ):
493
+ m += 1
494
+ i = 0
495
+ line = self.lines[line_i]
496
+ array.coord[m, i, 0] = float(line[_coord_x])
497
+ array.coord[m, i, 1] = float(line[_coord_y])
498
+ array.coord[m, i, 2] = float(line[_coord_z])
499
+ i += 1
500
+
501
+ # Fill in box vectors
502
+ # PDB does not support changing box dimensions. CRYST1 is a one-time
503
+ # record so we can extract it directly
504
+ for line in self.lines:
505
+ if line.startswith("CRYST1"):
506
+ try:
507
+ len_a = float(line[_a])
508
+ len_b = float(line[_b])
509
+ len_c = float(line[_c])
510
+ alpha = np.deg2rad(float(line[_alpha]))
511
+ beta = np.deg2rad(float(line[_beta]))
512
+ gamma = np.deg2rad(float(line[_gamma]))
513
+ box = vectors_from_unitcell(len_a, len_b, len_c, alpha, beta, gamma)
514
+ except ValueError:
515
+ # File contains invalid 'CRYST1' record
516
+ warnings.warn(
517
+ "File contains invalid 'CRYST1' record, box is ignored"
518
+ )
519
+ break
520
+
521
+ if isinstance(array, AtomArray):
522
+ array.box = box
523
+ else:
524
+ array.box = np.repeat(
525
+ box[np.newaxis, ...], array.stack_depth(), axis=0
526
+ )
527
+ break
528
+
529
+ # Filter altloc IDs
530
+ if altloc == "occupancy":
531
+ filter = filter_highest_occupancy_altloc(array, altloc_id, occupancy)
532
+ array = array[..., filter]
533
+ atom_id = atom_id[filter] if atom_id is not None else None
534
+ elif altloc == "first":
535
+ filter = filter_first_altloc(array, altloc_id)
536
+ array = array[..., filter]
537
+ atom_id = atom_id[filter] if atom_id is not None else None
538
+ elif altloc == "all":
539
+ array.set_annotation("altloc_id", altloc_id)
540
+ else:
541
+ raise ValueError(f"'{altloc}' is not a valid 'altloc' option")
542
+
543
+ # Read bonds
544
+ if include_bonds:
545
+ bond_list = self._get_bonds(atom_id)
546
+ bond_list = bond_list.merge(connect_via_residue_names(array))
547
+ array.bonds = bond_list
548
+
549
+ return array
550
+
551
+ def get_space_group(self):
552
+ """
553
+ Extract the space group and Z value from the CRYST1 record.
554
+
555
+ Returns
556
+ -------
557
+ space_group : str
558
+ The extracted space group.
559
+ z_val : int
560
+ The extracted Z value.
561
+ """
562
+ # Initialize the namedtuple
563
+ SpaceGroupInfo = namedtuple("SpaceGroupInfo", ["space_group", "z_val"])
564
+
565
+ # CRYST1 is a one-time record so we can extract it directly
566
+ for line in self.lines:
567
+ if line.startswith("CRYST1"):
568
+ try:
569
+ # Extract space group and Z value
570
+ space_group = str(line[_space])
571
+ z_val = int(line[_z])
572
+ except ValueError:
573
+ # File contains invalid 'CRYST1' record
574
+ raise InvalidFileError(
575
+ "File does not contain valid space group and/or Z values"
576
+ )
577
+ # Set default values
578
+ space_group = "P 1"
579
+ z_val = 1
580
+ break
581
+ return SpaceGroupInfo(space_group=space_group, z_val=z_val)
582
+
583
+ def set_structure(self, array, hybrid36=False):
584
+ """
585
+ Set the :class:`AtomArray` or :class:`AtomArrayStack` for the
586
+ file.
587
+
588
+ This makes also use of the optional annotation arrays
589
+ ``'atom_id'``, ``'b_factor'``, ``'occupancy'`` and ``'charge'``.
590
+ If the atom array (stack) contains the annotation ``'atom_id'``,
591
+ these values will be used for atom numbering instead of
592
+ continuous numbering.
593
+
594
+ Parameters
595
+ ----------
596
+ array : AtomArray or AtomArrayStack
597
+ The array or stack to be saved into this file. If a stack
598
+ is given, each array in the stack is saved as separate
599
+ model.
600
+ hybrid36: bool, optional
601
+ Defines wether the file should be written in hybrid-36
602
+ format.
603
+
604
+ Notes
605
+ -----
606
+ If `array` has an associated :class:`BondList`, ``CONECT``
607
+ records are also written for all non-water hetero residues
608
+ and all inter-residue connections.
609
+ """
610
+ _check_pdb_compatibility(array, hybrid36)
611
+
612
+ natoms = array.array_length()
613
+ annot_categories = array.get_annotation_categories()
614
+ record = np.char.array(np.where(array.hetero, "HETATM", "ATOM"))
615
+ # Check for optional annotation categories
616
+ if "atom_id" in annot_categories:
617
+ atom_id = array.atom_id
618
+ else:
619
+ atom_id = np.arange(1, natoms + 1)
620
+ if "b_factor" in annot_categories:
621
+ b_factor = np.char.array([f"{b:>6.2f}" for b in array.b_factor])
622
+ else:
623
+ b_factor = np.char.array(np.full(natoms, " 0.00", dtype="U6"))
624
+ if "occupancy" in annot_categories:
625
+ occupancy = np.char.array([f"{o:>6.2f}" for o in array.occupancy])
626
+ else:
627
+ occupancy = np.char.array(np.full(natoms, " 1.00", dtype="U6"))
628
+ if "charge" in annot_categories:
629
+ charge = np.char.array(
630
+ [
631
+ str(np.abs(charge)) + "+"
632
+ if charge > 0
633
+ else (str(np.abs(charge)) + "-" if charge < 0 else "")
634
+ for charge in array.get_annotation("charge")
635
+ ]
636
+ )
637
+ else:
638
+ charge = np.char.array(np.full(natoms, " ", dtype="U2"))
639
+
640
+ if hybrid36:
641
+ pdb_atom_id = np.char.array([encode_hybrid36(i, 5) for i in atom_id])
642
+ pdb_res_id = np.char.array([encode_hybrid36(i, 4) for i in array.res_id])
643
+ else:
644
+ # Atom IDs are supported up to 99999,
645
+ # but negative IDs are also possible
646
+ pdb_atom_id = np.char.array(
647
+ np.where(
648
+ atom_id > 0, ((atom_id - 1) % _PDB_MAX_ATOMS) + 1, atom_id
649
+ ).astype(str)
650
+ )
651
+ # Residue IDs are supported up to 9999,
652
+ # but negative IDs are also possible
653
+ pdb_res_id = np.char.array(
654
+ np.where(
655
+ array.res_id > 0,
656
+ ((array.res_id - 1) % _PDB_MAX_RESIDUES) + 1,
657
+ array.res_id,
658
+ ).astype(str)
659
+ )
660
+
661
+ names = np.char.array(
662
+ [
663
+ f" {atm}" if len(elem) == 1 and len(atm) < 4 else atm
664
+ for atm, elem in zip(array.atom_name, array.element)
665
+ ]
666
+ )
667
+ res_names = np.char.array(array.res_name)
668
+ chain_ids = np.char.array(array.chain_id)
669
+ ins_codes = np.char.array(array.ins_code)
670
+ spaces = np.char.array(np.full(natoms, " ", dtype="U1"))
671
+ elements = np.char.array(array.element)
672
+
673
+ first_half = (
674
+ record.ljust(6)
675
+ + pdb_atom_id.rjust(5)
676
+ + spaces
677
+ + names.ljust(4)
678
+ + spaces
679
+ + res_names.rjust(3)
680
+ + spaces
681
+ + chain_ids
682
+ + pdb_res_id.rjust(4)
683
+ + ins_codes.rjust(1)
684
+ )
685
+
686
+ second_half = (
687
+ occupancy + b_factor + 10 * spaces + elements.rjust(2) + charge.rjust(2)
688
+ )
689
+
690
+ coords = array.coord
691
+ if coords.ndim == 2:
692
+ coords = coords[np.newaxis, ...]
693
+
694
+ self.lines = []
695
+ # Prepend a single CRYST1 record if we have box information
696
+ if array.box is not None:
697
+ box = array.box
698
+ if len(box.shape) == 3:
699
+ box = box[0]
700
+ a, b, c, alpha, beta, gamma = unitcell_from_vectors(box)
701
+ self.lines.append(
702
+ f"CRYST1{a:>9.3f}{b:>9.3f}{c:>9.3f}"
703
+ f"{np.rad2deg(alpha):>7.2f}{np.rad2deg(beta):>7.2f}"
704
+ f"{np.rad2deg(gamma):>7.2f} P 1 1 "
705
+ )
706
+ is_stack = coords.shape[0] > 1
707
+ for model_num, coord_i in enumerate(coords, start=1):
708
+ # for an ArrayStack, this is run once
709
+ # only add model lines if is_stack
710
+ if is_stack:
711
+ self.lines.append(f"MODEL {model_num:4}")
712
+ # Bundle non-coordinate data to simplify iteration
713
+ self.lines.extend(
714
+ [
715
+ f"{start:27} {x:>8.3f}{y:>8.3f}{z:>8.3f}{end:26}"
716
+ for start, (x, y, z), end in zip(first_half, coord_i, second_half)
717
+ ]
718
+ )
719
+ if is_stack:
720
+ self.lines.append("ENDMDL")
721
+
722
+ # Add CONECT records if bonds are present
723
+ if array.bonds is not None:
724
+ # Only non-water hetero records and connections between
725
+ # residues are added to the records
726
+ hetero_indices = np.where(array.hetero & ~filter_solvent(array))[0]
727
+ bond_array = array.bonds.as_array()
728
+ bond_array = bond_array[
729
+ np.isin(bond_array[:, 0], hetero_indices)
730
+ | np.isin(bond_array[:, 1], hetero_indices)
731
+ | (array.res_id[bond_array[:, 0]] != array.res_id[bond_array[:, 1]])
732
+ | (array.chain_id[bond_array[:, 0]] != array.chain_id[bond_array[:, 1]])
733
+ ]
734
+ self._set_bonds(BondList(array.array_length(), bond_array), pdb_atom_id)
735
+
736
+ self._index_models_and_atoms()
737
+
738
+ def set_space_group(self, info):
739
+ """
740
+ Update the CRYST1 record with the provided space group and Z value.
741
+
742
+ Parameters
743
+ ----------
744
+ info : tuple(str, int) or SpaceGroupInfo
745
+ Contains the space group and Z-value.
746
+ """
747
+ for i, line in enumerate(self.lines):
748
+ if line.startswith("CRYST1"):
749
+ try:
750
+ # Format the replacement string
751
+ space_group_str = info.space_group.ljust(11)
752
+ z_val_str = str(info.z_val).rjust(4)
753
+
754
+ # Replace the existing CRYST1 record
755
+ self.lines[i] = line[:55] + space_group_str + z_val_str + line[70:]
756
+ except (ValueError, AttributeError) as e:
757
+ # Raise an exception with context
758
+ raise AttributeError(
759
+ f"Failed to update CRYST1 record. "
760
+ f"Line: {line.strip()} | Error: {e}"
761
+ )
762
+ break
763
+
764
+ def list_assemblies(self):
765
+ """
766
+ List the biological assemblies that are available for the
767
+ structure in the given file.
768
+
769
+ This function receives the data from the ``REMARK 300`` records
770
+ in the file.
771
+ Consequently, this remark must be present in the file.
772
+
773
+ Returns
774
+ -------
775
+ assemblies : list of str
776
+ A list that contains the available assembly IDs.
777
+
778
+ Examples
779
+ --------
780
+ >>> import os.path
781
+ >>> file = PDBFile.read(os.path.join(path_to_structures, "1f2n.pdb"))
782
+ >>> print(file.list_assemblies())
783
+ ['1']
784
+ """
785
+ # Get remarks listing available assemblies
786
+ remark_lines = self.get_remark(300)
787
+ if remark_lines is None:
788
+ raise InvalidFileError(
789
+ "File does not contain assembly information (REMARK 300)"
790
+ )
791
+ return [assembly_id.strip() for assembly_id in remark_lines[0][12:].split(",")]
792
+
793
+ def get_assembly(
794
+ self,
795
+ assembly_id=None,
796
+ model=None,
797
+ altloc="first",
798
+ extra_fields=[],
799
+ include_bonds=False,
800
+ ):
801
+ """
802
+ Build the given biological assembly.
803
+
804
+ This function receives the data from ``REMARK 350`` records in
805
+ the file.
806
+ Consequently, this remark must be present in the file.
807
+
808
+ Parameters
809
+ ----------
810
+ assembly_id : str
811
+ The assembly to build.
812
+ Available assembly IDs can be obtained via
813
+ :func:`list_assemblies()`.
814
+ model : int, optional
815
+ If this parameter is given, the function will return an
816
+ :class:`AtomArray` from the atoms corresponding to the given
817
+ model number (starting at 1).
818
+ Negative values are used to index models starting from the
819
+ last model instead of the first model.
820
+ If this parameter is omitted, an :class:`AtomArrayStack`
821
+ containing all models will be returned, even if the
822
+ structure contains only one model.
823
+ altloc : {'first', 'occupancy', 'all'}
824
+ This parameter defines how *altloc* IDs are handled:
825
+ - ``'first'`` - Use atoms that have the first
826
+ *altloc* ID appearing in a residue.
827
+ - ``'occupancy'`` - Use atoms that have the *altloc* ID
828
+ with the highest occupancy for a residue.
829
+ - ``'all'`` - Use all atoms.
830
+ Note that this leads to duplicate atoms.
831
+ When this option is chosen, the ``altloc_id``
832
+ annotation array is added to the returned structure.
833
+ extra_fields : list of str, optional
834
+ The strings in the list are optional annotation categories
835
+ that should be stored in the output array or stack.
836
+ These are valid values:
837
+ ``'atom_id'``, ``'b_factor'``, ``'occupancy'`` and
838
+ ``'charge'``.
839
+ include_bonds : bool, optional
840
+ If set to true, a :class:`BondList` will be created for the
841
+ resulting :class:`AtomArray` containing the bond information
842
+ from the file.
843
+ Bonds, whose order could not be determined from the
844
+ *Chemical Component Dictionary*
845
+ (e.g. especially inter-residue bonds),
846
+ have :attr:`BondType.ANY`, since the PDB format itself does
847
+ not support bond orders.
848
+
849
+ Returns
850
+ -------
851
+ assembly : AtomArray or AtomArrayStack
852
+ The assembly.
853
+ The return type depends on the `model` parameter.
854
+ Contains the `sym_id` annotation, which enumerates the copies of the
855
+ asymmetric unit in the assembly.
856
+
857
+ Examples
858
+ --------
859
+
860
+ >>> import os.path
861
+ >>> file = PDBFile.read(os.path.join(path_to_structures, "1f2n.pdb"))
862
+ >>> assembly = file.get_assembly(model=1)
863
+ """
864
+ # Get base structure
865
+ structure = self.get_structure(
866
+ model,
867
+ altloc,
868
+ extra_fields,
869
+ include_bonds,
870
+ )
871
+
872
+ # Get lines containing transformations for chosen assembly
873
+ remark_lines = self.get_remark(350)
874
+ if remark_lines is None:
875
+ raise InvalidFileError(
876
+ "File does not contain assembly information (REMARK 350)"
877
+ )
878
+ # Get lines corresponding to selected assembly ID
879
+ assembly_start_i = None
880
+ assembly_stop_i = None
881
+ for i, line in enumerate(remark_lines):
882
+ if line.startswith("BIOMOLECULE"):
883
+ current_assembly_id = line[12:].strip()
884
+ if assembly_start_i is not None:
885
+ # Start was already found -> this is the next entry
886
+ # -> this is the stop
887
+ assembly_stop_i = i
888
+ break
889
+ if current_assembly_id == assembly_id or assembly_id is None:
890
+ assembly_start_i = i
891
+ # In case of the final assembly of the file,
892
+ # the 'stop' is the end of REMARK 350 lines
893
+ assembly_stop_i = len(remark_lines) if assembly_stop_i is None else i
894
+ if assembly_start_i is None:
895
+ if assembly_id is None:
896
+ raise InvalidFileError(
897
+ "File does not contain transformation " "expressions for assemblies"
898
+ )
899
+ else:
900
+ raise KeyError(f"The assembly ID '{assembly_id}' is not found")
901
+ assembly_lines = remark_lines[assembly_start_i:assembly_stop_i]
902
+
903
+ # Get transformations for a set of chains
904
+ chain_set_start_indices = [
905
+ i
906
+ for i, line in enumerate(assembly_lines)
907
+ if line.startswith("APPLY THE FOLLOWING TO CHAINS")
908
+ ]
909
+ # Add exclusive stop at end of records
910
+ chain_set_start_indices.append(len(assembly_lines))
911
+ assembly = None
912
+ for i in range(len(chain_set_start_indices) - 1):
913
+ start = chain_set_start_indices[i]
914
+ stop = chain_set_start_indices[i + 1]
915
+ # Read affected chain IDs from the following line(s)
916
+ affected_chain_ids = []
917
+ transform_start = None
918
+ for j, line in enumerate(assembly_lines[start:stop]):
919
+ if any(
920
+ line.startswith(chain_signal_string)
921
+ for chain_signal_string in [
922
+ "APPLY THE FOLLOWING TO CHAINS:",
923
+ " AND CHAINS:",
924
+ ]
925
+ ):
926
+ affected_chain_ids += [
927
+ chain_id.strip() for chain_id in line[30:].split(",")
928
+ ]
929
+ else:
930
+ # Chain specification has finished
931
+ # BIOMT lines start directly after chain specification
932
+ transform_start = start + j
933
+ break
934
+ # Parse transformations from BIOMT lines
935
+ if transform_start is None:
936
+ raise InvalidFileError("No 'BIOMT' records found for chosen assembly")
937
+ rotations, translations = _parse_transformations(
938
+ assembly_lines[transform_start:stop]
939
+ )
940
+ # Filter affected chains
941
+ sub_structure = structure[
942
+ ..., np.isin(structure.chain_id, affected_chain_ids)
943
+ ]
944
+ sub_assembly = _apply_transformations(
945
+ sub_structure, rotations, translations
946
+ )
947
+ # Merge the chains with IDs for this transformation
948
+ # with chains from other transformations
949
+ if assembly is None:
950
+ assembly = sub_assembly
951
+ else:
952
+ assembly += sub_assembly
953
+
954
+ return assembly
955
+
956
+ def get_symmetry_mates(
957
+ self, model=None, altloc="first", extra_fields=[], include_bonds=False
958
+ ):
959
+ """
960
+ Build a structure model containing all symmetric copies
961
+ of the structure within a single unit cell, given by the space
962
+ group.
963
+
964
+ This function receives the data from ``REMARK 290`` records in
965
+ the file.
966
+ Consequently, this remark must be present in the file, which is
967
+ usually only true for crystal structures.
968
+
969
+ Parameters
970
+ ----------
971
+ model : int, optional
972
+ If this parameter is given, the function will return an
973
+ :class:`AtomArray` from the atoms corresponding to the given
974
+ model number (starting at 1).
975
+ Negative values are used to index models starting from the
976
+ last model instead of the first model.
977
+ If this parameter is omitted, an :class:`AtomArrayStack`
978
+ containing all models will be returned, even if the
979
+ structure contains only one model.
980
+ altloc : {'first', 'occupancy', 'all'}
981
+ This parameter defines how *altloc* IDs are handled:
982
+ - ``'first'`` - Use atoms that have the first
983
+ *altloc* ID appearing in a residue.
984
+ - ``'occupancy'`` - Use atoms that have the *altloc* ID
985
+ with the highest occupancy for a residue.
986
+ - ``'all'`` - Use all atoms.
987
+ Note that this leads to duplicate atoms.
988
+ When this option is chosen, the ``altloc_id``
989
+ annotation array is added to the returned structure.
990
+ extra_fields : list of str, optional
991
+ The strings in the list are optional annotation categories
992
+ that should be stored in the output array or stack.
993
+ These are valid values:
994
+ ``'atom_id'``, ``'b_factor'``, ``'occupancy'`` and
995
+ ``'charge'``.
996
+ include_bonds : bool, optional
997
+ If set to true, a :class:`BondList` will be created for the
998
+ resulting :class:`AtomArray` containing the bond information
999
+ from the file.
1000
+ Bonds, whose order could not be determined from the
1001
+ *Chemical Component Dictionary*
1002
+ (e.g. especially inter-residue bonds),
1003
+ have :attr:`BondType.ANY`, since the PDB format itself does
1004
+ not support bond orders.
1005
+
1006
+ Returns
1007
+ -------
1008
+ symmetry_mates : AtomArray or AtomArrayStack
1009
+ All atoms within a single unit cell.
1010
+ The return type depends on the `model` parameter.
1011
+
1012
+ Notes
1013
+ -----
1014
+ To expand the structure beyond a single unit cell, use
1015
+ :func:`repeat_box()` with the return value as its
1016
+ input.
1017
+
1018
+ Examples
1019
+ --------
1020
+
1021
+ >>> import os.path
1022
+ >>> file = PDBFile.read(os.path.join(path_to_structures, "1aki.pdb"))
1023
+ >>> atoms_in_unit_cell = file.get_symmetry_mates(model=1)
1024
+ """
1025
+ # Get base structure
1026
+ structure = self.get_structure(
1027
+ model,
1028
+ altloc,
1029
+ extra_fields,
1030
+ include_bonds,
1031
+ )
1032
+ # Get lines containing transformations for crystallographic symmetry
1033
+ remark_lines = self.get_remark(290)
1034
+ if remark_lines is None:
1035
+ raise InvalidFileError(
1036
+ "File does not contain crystallographic symmetry "
1037
+ "information (REMARK 350)"
1038
+ )
1039
+ transform_lines = [line for line in remark_lines if line.startswith(" SMTRY")]
1040
+ rotations, translations = _parse_transformations(transform_lines)
1041
+ return _apply_transformations(structure, rotations, translations)
1042
+
1043
+ def _index_models_and_atoms(self):
1044
+ # Line indices where a new model starts
1045
+ self._model_start_i = np.array(
1046
+ [i for i in range(len(self.lines)) if self.lines[i].startswith(("MODEL"))],
1047
+ dtype=int,
1048
+ )
1049
+ if len(self._model_start_i) == 0:
1050
+ # It could be an empty file or a file with a single model,
1051
+ # where the 'MODEL' line is missing
1052
+ for line in self.lines:
1053
+ if line.startswith(("ATOM", "HETATM")):
1054
+ # Single model
1055
+ self._model_start_i = np.array([0])
1056
+ break
1057
+
1058
+ # Line indices with ATOM or HETATM records
1059
+ self._atom_line_i = np.array(
1060
+ [
1061
+ i
1062
+ for i in range(len(self.lines))
1063
+ if self.lines[i].startswith(("ATOM", "HETATM"))
1064
+ ],
1065
+ dtype=int,
1066
+ )
1067
+
1068
+ def _get_atom_record_indices_for_model(self, model):
1069
+ last_model = len(self._model_start_i)
1070
+ if model == 0:
1071
+ raise ValueError("The model index must not be 0")
1072
+ # Negative models mean index starting from last model
1073
+ model = last_model + model + 1 if model < 0 else model
1074
+
1075
+ if model < last_model:
1076
+ line_filter = (self._atom_line_i >= self._model_start_i[model - 1]) & (
1077
+ self._atom_line_i < self._model_start_i[model]
1078
+ )
1079
+ elif model == last_model:
1080
+ line_filter = self._atom_line_i >= self._model_start_i[model - 1]
1081
+ else:
1082
+ raise ValueError(
1083
+ f"The file has {last_model} models, "
1084
+ f"the given model {model} does not exist"
1085
+ )
1086
+ return self._atom_line_i[line_filter]
1087
+
1088
+ def _get_model_length(self):
1089
+ """
1090
+ Determine length of models and check that all models
1091
+ have equal length.
1092
+ """
1093
+ n_models = len(self._model_start_i)
1094
+ length = None
1095
+ for model_i in range(len(self._model_start_i)):
1096
+ model_start = self._model_start_i[model_i]
1097
+ model_stop = (
1098
+ self._model_start_i[model_i + 1]
1099
+ if model_i + 1 < n_models
1100
+ else len(self.lines)
1101
+ )
1102
+ model_length = np.count_nonzero(
1103
+ (self._atom_line_i >= model_start) & (self._atom_line_i < model_stop)
1104
+ )
1105
+ if length is None:
1106
+ length = model_length
1107
+ if model_length != length:
1108
+ raise InvalidFileError(
1109
+ f"Model {model_i+1} has {model_length} atoms, "
1110
+ f"but model 1 has {length} atoms, must be equal"
1111
+ )
1112
+ return length
1113
+
1114
+ def _get_bonds(self, atom_ids):
1115
+ conect_lines = [line for line in self.lines if line.startswith("CONECT")]
1116
+
1117
+ # Mapping from atom ids to indices in an AtomArray
1118
+ atom_id_to_index = np.zeros(atom_ids[-1] + 1, dtype=int)
1119
+ try:
1120
+ for i, id in enumerate(atom_ids):
1121
+ atom_id_to_index[id] = i
1122
+ except IndexError as e:
1123
+ raise InvalidFileError("Atom IDs are not strictly increasing") from e
1124
+
1125
+ bonds = []
1126
+ for line in conect_lines:
1127
+ center_id = atom_id_to_index[decode_hybrid36(line[6:11])]
1128
+ for i in range(11, 31, 5):
1129
+ id_string = line[i : i + 5]
1130
+ try:
1131
+ id = atom_id_to_index[decode_hybrid36(id_string)]
1132
+ except ValueError:
1133
+ # String is empty -> no further IDs
1134
+ break
1135
+ bonds.append((center_id, id))
1136
+
1137
+ # The length of the 'atom_ids' array
1138
+ # is equal to the length of the AtomArray
1139
+ return BondList(len(atom_ids), np.array(bonds, dtype=np.uint32))
1140
+
1141
+ def _set_bonds(self, bond_list, atom_ids):
1142
+ # Bond type is unused since PDB does not support bond orders
1143
+ bonds, _ = bond_list.get_all_bonds()
1144
+
1145
+ for center_i, bonded_indices in enumerate(bonds):
1146
+ n_added = 0
1147
+ for bonded_i in bonded_indices:
1148
+ if bonded_i == -1:
1149
+ # Reached padding values
1150
+ break
1151
+ if n_added == 0:
1152
+ # Add new record
1153
+ line = f"CONECT{atom_ids[center_i]:>5}"
1154
+ line += f"{atom_ids[bonded_i]:>5}"
1155
+ n_added += 1
1156
+ if n_added == 4:
1157
+ # Only a maximum of 4 bond partners can be put
1158
+ # into a single line
1159
+ # If there are more, use an extra record
1160
+ n_added = 0
1161
+ self.lines.append(line)
1162
+ if n_added > 0:
1163
+ self.lines.append(line)
1164
+
1165
+
1166
+ def _parse_transformations(lines):
1167
+ """
1168
+ Parse the rotation and translation transformations from
1169
+ *REMARK* 290 or 350.
1170
+ Return as array of matrices and vectors respectively
1171
+ """
1172
+ # Each transformation requires 3 lines for the (x,y,z) components
1173
+ if len(lines) % 3 != 0:
1174
+ raise InvalidFileError("Invalid number of transformation vectors")
1175
+ n_transformations = len(lines) // 3
1176
+
1177
+ rotations = np.zeros((n_transformations, 3, 3), dtype=float)
1178
+ translations = np.zeros((n_transformations, 3), dtype=float)
1179
+
1180
+ transformation_i = 0
1181
+ component_i = 0
1182
+ for line in lines:
1183
+ # The first two elements (component and
1184
+ # transformation index) are not used
1185
+ transformations = [float(e) for e in line.split()[2:]]
1186
+ if len(transformations) != 4:
1187
+ raise InvalidFileError("Invalid number of transformation vector elements")
1188
+ rotations[transformation_i, component_i, :] = transformations[:3]
1189
+ translations[transformation_i, component_i] = transformations[3]
1190
+
1191
+ component_i += 1
1192
+ if component_i == 3:
1193
+ # All (x,y,z) components were parsed
1194
+ # -> head to the next transformation
1195
+ transformation_i += 1
1196
+ component_i = 0
1197
+
1198
+ return rotations, translations
1199
+
1200
+
1201
+ def _apply_transformations(structure, rotations, translations):
1202
+ """
1203
+ Get subassembly by applying the given transformations to the input
1204
+ structure containing affected chains.
1205
+ """
1206
+ # Additional first dimension for 'structure.repeat()'
1207
+ assembly_coord = np.zeros((len(rotations),) + structure.coord.shape)
1208
+
1209
+ # Apply corresponding transformation for each copy in the assembly
1210
+ for i, (rotation, translation) in enumerate(zip(rotations, translations)):
1211
+ coord = structure.coord
1212
+ # Rotate
1213
+ coord = matrix_rotate(coord, rotation)
1214
+ # Translate
1215
+ coord += translation
1216
+ assembly_coord[i] = coord
1217
+
1218
+ assembly = repeat(structure, assembly_coord)
1219
+ assembly.set_annotation(
1220
+ "sym_id", np.repeat(np.arange(len(rotations)), structure.array_length())
1221
+ )
1222
+ return assembly
1223
+
1224
+
1225
+ def _check_pdb_compatibility(array, hybrid36):
1226
+ annot_categories = array.get_annotation_categories()
1227
+
1228
+ if hybrid36:
1229
+ max_atoms = max_hybrid36_number(5)
1230
+ max_residues = max_hybrid36_number(4)
1231
+ else:
1232
+ max_atoms, max_residues = _PDB_MAX_ATOMS, _PDB_MAX_RESIDUES
1233
+ if "atom_id" in annot_categories:
1234
+ max_atom_id = np.max(array.atom_id)
1235
+ else:
1236
+ max_atom_id = array.array_length()
1237
+
1238
+ if max_atom_id > max_atoms:
1239
+ warnings.warn(f"Atom IDs exceed {max_atoms:,}, will be wrapped")
1240
+ if (array.res_id > max_residues).any():
1241
+ warnings.warn(f"Residue IDs exceed {max_residues:,}, will be wrapped")
1242
+ if np.isnan(array.coord).any():
1243
+ raise BadStructureError("Coordinates contain 'NaN' values")
1244
+ if any([len(name) > 1 for name in array.chain_id]):
1245
+ raise BadStructureError("Some chain IDs exceed 1 character")
1246
+ if any([len(name) > 3 for name in array.res_name]):
1247
+ raise BadStructureError("Some residue names exceed 3 characters")
1248
+ if any([len(name) > 4 for name in array.atom_name]):
1249
+ raise BadStructureError("Some atom names exceed 4 characters")
1250
+ for i, coord_name in enumerate(["x", "y", "z"]):
1251
+ n_coord_digits = _number_of_integer_digits(array.coord[..., i])
1252
+ if n_coord_digits > 4:
1253
+ raise BadStructureError(
1254
+ f"4 pre-decimal columns for {coord_name}-coordinates are "
1255
+ f"available, but array would require {n_coord_digits}"
1256
+ )
1257
+ if "b_factor" in annot_categories:
1258
+ n_b_factor_digits = _number_of_integer_digits(array.b_factor)
1259
+ if n_b_factor_digits > 3:
1260
+ raise BadStructureError(
1261
+ "3 pre-decimal columns for B-factor are available, "
1262
+ f"but array would require {n_b_factor_digits}"
1263
+ )
1264
+ if "occupancy" in annot_categories:
1265
+ n_occupancy_digits = _number_of_integer_digits(array.occupancy)
1266
+ if n_occupancy_digits > 3:
1267
+ raise BadStructureError(
1268
+ "3 pre-decimal columns for occupancy are available, "
1269
+ f"but array would require {n_occupancy_digits}"
1270
+ )
1271
+ if "charge" in annot_categories:
1272
+ # The sign can be omitted is it is put into the adjacent column
1273
+ n_charge_digits = _number_of_integer_digits(np.abs(array.charge))
1274
+ if n_charge_digits > 1:
1275
+ raise BadStructureError(
1276
+ "1 column for charge is available, "
1277
+ f"but array would require {n_charge_digits}"
1278
+ )
1279
+
1280
+
1281
+ def _number_of_integer_digits(values):
1282
+ """
1283
+ Get the maximum number of characters needed to represent the
1284
+ pre-decimal positions of the given numeric values.
1285
+ """
1286
+ values = values.astype(int, copy=False)
1287
+ n_digits = 0
1288
+ n_digits = max(n_digits, len(str(np.min(values))))
1289
+ n_digits = max(n_digits, len(str(np.max(values))))
1290
+ return n_digits