biotite 1.3.0__cp312-cp312-macosx_10_13_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +159 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +191 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +60 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +228 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +258 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +160 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +126 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1226 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +490 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +702 -0
  73. biotite/sequence/align/banded.cpython-312-darwin.so +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cpython-312-darwin.so +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cpython-312-darwin.so +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cpython-312-darwin.so +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cpython-312-darwin.so +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cpython-312-darwin.so +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cpython-312-darwin.so +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cpython-312-darwin.so +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cpython-312-darwin.so +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cpython-312-darwin.so +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cpython-312-darwin.so +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cpython-312-darwin.so +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +283 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cpython-312-darwin.so +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cpython-312-darwin.so +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cpython-312-darwin.so +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1562 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cpython-312-darwin.so +0 -0
  266. biotite/structure/bonds.pyx +1975 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cpython-312-darwin.so +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +276 -0
  271. biotite/structure/charges.cpython-312-darwin.so +0 -0
  272. biotite/structure/charges.pyx +520 -0
  273. biotite/structure/compare.py +681 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +590 -0
  278. biotite/structure/geometry.py +655 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +425 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +90 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +388 -0
  311. biotite/structure/io/pdb/file.py +1356 -0
  312. biotite/structure/io/pdb/hybrid36.cpython-312-darwin.so +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +671 -0
  319. biotite/structure/io/pdbx/cif.py +1088 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +358 -0
  322. biotite/structure/io/pdbx/convert.py +2097 -0
  323. biotite/structure/io/pdbx/encoding.cpython-312-darwin.so +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1047 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +544 -0
  337. biotite/structure/rings.py +335 -0
  338. biotite/structure/sasa.cpython-312-darwin.so +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +292 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +168 -0
  349. biotite/version.py +21 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.3.0.dist-info/METADATA +162 -0
  352. biotite-1.3.0.dist-info/RECORD +354 -0
  353. biotite-1.3.0.dist-info/WHEEL +6 -0
  354. biotite-1.3.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,1356 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.structure.io.pdb"
6
+ __author__ = "Patrick Kunzmann, Daniel Bauer, Claude J. Rogers"
7
+ __all__ = ["PDBFile"]
8
+
9
+ import warnings
10
+ from collections import namedtuple
11
+ import numpy as np
12
+ from biotite.file import InvalidFileError, TextFile
13
+ from biotite.structure.atoms import AtomArray, AtomArrayStack, repeat
14
+ from biotite.structure.bonds import BondList, connect_via_residue_names
15
+ from biotite.structure.box import unitcell_from_vectors, vectors_from_unitcell
16
+ from biotite.structure.error import BadStructureError
17
+ from biotite.structure.filter import (
18
+ filter_first_altloc,
19
+ filter_highest_occupancy_altloc,
20
+ filter_solvent,
21
+ )
22
+ from biotite.structure.io.pdb.hybrid36 import (
23
+ decode_hybrid36,
24
+ encode_hybrid36,
25
+ max_hybrid36_number,
26
+ )
27
+ from biotite.structure.io.util import number_of_integer_digits
28
+ from biotite.structure.repair import infer_elements
29
+ from biotite.structure.util import matrix_rotate
30
+
31
+ _PDB_MAX_ATOMS = 99999
32
+ _PDB_MAX_RESIDUES = 9999
33
+
34
+ # slice objects for readability
35
+ # ATOM/HETATM
36
+ _record = slice(0, 6)
37
+ _atom_id = slice(6, 11)
38
+ _atom_name = slice(12, 16)
39
+ _alt_loc = slice(16, 17)
40
+ _res_name = slice(17, 20)
41
+ _chain_id = slice(21, 22)
42
+ _res_id = slice(22, 26)
43
+ _ins_code = slice(26, 27)
44
+ _coord_x = slice(30, 38)
45
+ _coord_y = slice(38, 46)
46
+ _coord_z = slice(46, 54)
47
+ _occupancy = slice(54, 60)
48
+ _temp_f = slice(60, 66)
49
+ _element = slice(76, 78)
50
+ _charge = slice(78, 80)
51
+ # CRYST1
52
+ _a = slice(6, 15)
53
+ _b = slice(15, 24)
54
+ _c = slice(24, 33)
55
+ _alpha = slice(33, 40)
56
+ _beta = slice(40, 47)
57
+ _gamma = slice(47, 54)
58
+ _space = slice(55, 66)
59
+ _z = slice(66, 70)
60
+
61
+
62
+ class PDBFile(TextFile):
63
+ r"""
64
+ This class represents a PDB file.
65
+
66
+ The usage of :mod:`biotite.structure.io.pdbx` is encouraged in favor
67
+ of this class.
68
+
69
+ This class only provides support for reading/writing the pure atom
70
+ information (*ATOM*, *HETATM*, *MODEL* and *ENDMDL* records). *TER*
71
+ records cannot be written.
72
+ Additionally, *REMARK* records can be read
73
+
74
+ See Also
75
+ --------
76
+ CIFFile : Interface to CIF files, a modern replacement for PDB files.
77
+ BinaryCIFFile : Interface to BinaryCIF files, a binary variant of CIF files.
78
+
79
+ Examples
80
+ --------
81
+ Load a `\\*.pdb` file, modify the structure and save the new
82
+ structure into a new file:
83
+
84
+ >>> import os.path
85
+ >>> file = PDBFile.read(os.path.join(path_to_structures, "1l2y.pdb"))
86
+ >>> array_stack = file.get_structure()
87
+ >>> array_stack_mod = rotate(array_stack, [1,2,3])
88
+ >>> file = PDBFile()
89
+ >>> file.set_structure(array_stack_mod)
90
+ >>> file.write(os.path.join(path_to_directory, "1l2y_mod.pdb"))
91
+ """
92
+
93
+ @classmethod
94
+ def read(cls, file):
95
+ file = super().read(file)
96
+ # Pad lines with whitespace if lines are shorter
97
+ # than the required 80 characters
98
+ file.lines = [line.ljust(80) for line in file.lines]
99
+ file._index_models_and_atoms()
100
+ return file
101
+
102
+ def get_remark(self, number):
103
+ r"""
104
+ Get the lines containing the *REMARK* records with the given
105
+ `number`.
106
+
107
+ Parameters
108
+ ----------
109
+ number : int
110
+ The *REMARK* number, i.e. the `XXX` in ``REMARK XXX``.
111
+
112
+ Returns
113
+ -------
114
+ remark_lines : None or list of str
115
+ The content of the selected *REMARK* lines.
116
+ Each line is an element of this list.
117
+ The ``REMARK XXX `` part of each line is omitted.
118
+ Furthermore, the first line, which always must be empty, is
119
+ not included.
120
+ ``None`` is returned, if the selected *REMARK* records do not
121
+ exist in the file.
122
+
123
+ Examples
124
+ --------
125
+
126
+ >>> import os.path
127
+ >>> file = PDBFile.read(os.path.join(path_to_structures, "1l2y.pdb"))
128
+ >>> remarks = file.get_remark(900)
129
+ >>> print("\n".join(remarks))
130
+ RELATED ENTRIES
131
+ RELATED ID: 5292 RELATED DB: BMRB
132
+ BMRB 5292 IS CHEMICAL SHIFTS FOR TC5B IN BUFFER AND BUFFER
133
+ CONTAINING 30 VOL-% TFE.
134
+ RELATED ID: 1JRJ RELATED DB: PDB
135
+ 1JRJ IS AN ANALAGOUS C-TERMINAL STRUCTURE.
136
+ >>> nonexistent_remark = file.get_remark(999)
137
+ >>> print(nonexistent_remark)
138
+ None
139
+ """
140
+ CONTENT_START_COLUMN = 11
141
+
142
+ # in case a non-integer is accidentally given
143
+ number = int(number)
144
+ if number < 0 or number > 999:
145
+ raise ValueError("The number must be in range 0-999")
146
+
147
+ remark_string = f"REMARK {number:>3d}"
148
+ # Find lines and omit ``REMARK XXX `` part
149
+ remark_lines = [
150
+ line[CONTENT_START_COLUMN:]
151
+ for line in self.lines
152
+ if line.startswith(remark_string)
153
+ ]
154
+ if len(remark_lines) == 0:
155
+ return None
156
+ # Remove first empty line
157
+ remark_lines = remark_lines[1:]
158
+ return remark_lines
159
+
160
+ def get_model_count(self):
161
+ """
162
+ Get the number of models contained in the PDB file.
163
+
164
+ Returns
165
+ -------
166
+ model_count : int
167
+ The number of models.
168
+ """
169
+ return len(self._model_start_i)
170
+
171
+ def get_coord(self, model=None):
172
+ """
173
+ Get only the coordinates from the PDB file.
174
+
175
+ Parameters
176
+ ----------
177
+ model : int, optional
178
+ If this parameter is given, the function will return a
179
+ 2D coordinate array from the atoms corresponding to the
180
+ given model number (starting at 1).
181
+ Negative values are used to index models starting from the
182
+ last model instead of the first model.
183
+ If this parameter is omitted, an 3D coordinate array
184
+ containing all models will be returned, even if
185
+ the structure contains only one model.
186
+
187
+ Returns
188
+ -------
189
+ coord : ndarray, shape=(m,n,3) or shape=(n,3), dtype=float
190
+ The coordinates read from the ATOM and HETATM records of the
191
+ file.
192
+
193
+ Notes
194
+ -----
195
+ Note that :func:`get_coord()` may output more coordinates than
196
+ the atom array (stack) from the corresponding
197
+ :func:`get_structure()` call has.
198
+ The reason for this is, that :func:`get_structure()` filters
199
+ *altloc* IDs, while `get_coord()` does not.
200
+
201
+ Examples
202
+ --------
203
+ Read an :class:`AtomArrayStack` from multiple PDB files, where
204
+ each PDB file contains the same atoms but different positions.
205
+ This is an efficient approach when a trajectory is spread into
206
+ multiple PDB files, as done e.g. by the *Rosetta* modeling
207
+ software.
208
+
209
+ For the purpose of this example, the PDB files are created from
210
+ an existing :class:`AtomArrayStack`.
211
+
212
+ >>> import os.path
213
+ >>> from tempfile import gettempdir
214
+ >>> file_names = []
215
+ >>> for i in range(atom_array_stack.stack_depth()):
216
+ ... pdb_file = PDBFile()
217
+ ... pdb_file.set_structure(atom_array_stack[i])
218
+ ... file_name = os.path.join(gettempdir(), f"model_{i+1}.pdb")
219
+ ... pdb_file.write(file_name)
220
+ ... file_names.append(file_name)
221
+ >>> print(file_names)
222
+ ['...model_1.pdb', '...model_2.pdb', ..., '...model_38.pdb']
223
+
224
+ Now the PDB files are used to create an :class:`AtomArrayStack`,
225
+ where each model represents a different model.
226
+
227
+ Construct a new :class:`AtomArrayStack` with annotations taken
228
+ from one of the created files used as template and coordinates
229
+ from all of the PDB files.
230
+
231
+ >>> template_file = PDBFile.read(file_names[0])
232
+ >>> template = template_file.get_structure()
233
+ >>> coord = []
234
+ >>> for i, file_name in enumerate(file_names):
235
+ ... pdb_file = PDBFile.read(file_name)
236
+ ... coord.append(pdb_file.get_coord(model=1))
237
+ >>> new_stack = from_template(template, np.array(coord))
238
+
239
+ The newly created :class:`AtomArrayStack` should now be equal to
240
+ the :class:`AtomArrayStack` the PDB files were created from.
241
+
242
+ >>> print(np.allclose(new_stack.coord, atom_array_stack.coord))
243
+ True
244
+ """
245
+ if model is None:
246
+ coord = np.zeros(
247
+ (len(self._model_start_i), self._get_model_length(), 3),
248
+ dtype=np.float32,
249
+ )
250
+ m = 0
251
+ i = 0
252
+ for line_i in self._atom_line_i:
253
+ if (
254
+ m < len(self._model_start_i) - 1
255
+ and line_i > self._model_start_i[m + 1]
256
+ ):
257
+ m += 1
258
+ i = 0
259
+ line = self.lines[line_i]
260
+ coord[m, i, 0] = float(line[_coord_x])
261
+ coord[m, i, 1] = float(line[_coord_y])
262
+ coord[m, i, 2] = float(line[_coord_z])
263
+ i += 1
264
+ return coord
265
+
266
+ else:
267
+ coord_i = self._get_atom_record_indices_for_model(model)
268
+ coord = np.zeros((len(coord_i), 3), dtype=np.float32)
269
+ for i, line_i in enumerate(coord_i):
270
+ line = self.lines[line_i]
271
+ coord[i, 0] = float(line[_coord_x])
272
+ coord[i, 1] = float(line[_coord_y])
273
+ coord[i, 2] = float(line[_coord_z])
274
+ return coord
275
+
276
+ def get_b_factor(self, model=None):
277
+ """
278
+ Get only the B-factors from the PDB file.
279
+
280
+ Parameters
281
+ ----------
282
+ model : int, optional
283
+ If this parameter is given, the function will return a
284
+ 1D B-factor array from the atoms corresponding to the
285
+ given model number (starting at 1).
286
+ Negative values are used to index models starting from the
287
+ last model instead of the first model.
288
+ If this parameter is omitted, an 2D B-factor array
289
+ containing all models will be returned, even if
290
+ the structure contains only one model.
291
+
292
+ Returns
293
+ -------
294
+ b_factor : ndarray, shape=(m,n) or shape=(n,), dtype=float
295
+ The B-factors read from the ATOM and HETATM records of the
296
+ file.
297
+
298
+ Notes
299
+ -----
300
+ Note that :func:`get_b_factor()` may output more B-factors
301
+ than the atom array (stack) from the corresponding
302
+ :func:`get_structure()` call has atoms.
303
+ The reason for this is, that :func:`get_structure()` filters
304
+ *altloc* IDs, while `get_b_factor()` does not.
305
+ """
306
+ if model is None:
307
+ b_factor = np.zeros(
308
+ (len(self._model_start_i), self._get_model_length()), dtype=np.float32
309
+ )
310
+ m = 0
311
+ i = 0
312
+ for line_i in self._atom_line_i:
313
+ if (
314
+ m < len(self._model_start_i) - 1
315
+ and line_i > self._model_start_i[m + 1]
316
+ ):
317
+ m += 1
318
+ i = 0
319
+ line = self.lines[line_i]
320
+ b_factor[m, i] = float(line[_temp_f])
321
+ i += 1
322
+ return b_factor
323
+
324
+ else:
325
+ b_factor_i = self._get_atom_record_indices_for_model(model)
326
+ b_factor = np.zeros(len(b_factor_i), dtype=np.float32)
327
+ for i, line_i in enumerate(b_factor_i):
328
+ line = self.lines[line_i]
329
+ b_factor[i] = float(line[_temp_f])
330
+ return b_factor
331
+
332
+ def get_structure(
333
+ self, model=None, altloc="first", extra_fields=[], include_bonds=False
334
+ ):
335
+ """
336
+ Get an :class:`AtomArray` or :class:`AtomArrayStack` from the PDB file.
337
+
338
+ This function parses standard base-10 PDB files as well as
339
+ hybrid-36 PDB.
340
+
341
+ Parameters
342
+ ----------
343
+ model : int, optional
344
+ If this parameter is given, the function will return an
345
+ :class:`AtomArray` from the atoms corresponding to the given
346
+ model number (starting at 1).
347
+ Negative values are used to index models starting from the
348
+ last model instead of the first model.
349
+ If this parameter is omitted, an :class:`AtomArrayStack`
350
+ containing all models will be returned, even if the
351
+ structure contains only one model.
352
+ altloc : {'first', 'occupancy', 'all'}
353
+ This parameter defines how *altloc* IDs are handled:
354
+ - ``'first'`` - Use atoms that have the first
355
+ *altloc* ID appearing in a residue.
356
+ - ``'occupancy'`` - Use atoms that have the *altloc* ID
357
+ with the highest occupancy for a residue.
358
+ - ``'all'`` - Use all atoms.
359
+ Note that this leads to duplicate atoms.
360
+ When this option is chosen, the ``altloc_id``
361
+ annotation array is added to the returned structure.
362
+ extra_fields : list of str, optional
363
+ The strings in the list are optional annotation categories
364
+ that should be stored in the output array or stack.
365
+ These are valid values:
366
+ ``'atom_id'``, ``'b_factor'``, ``'occupancy'`` and
367
+ ``'charge'``.
368
+ include_bonds : bool, optional
369
+ If set to true, a :class:`BondList` will be created for the
370
+ resulting :class:`AtomArray` containing the bond information
371
+ from the file.
372
+ Bonds, whose order could not be determined from the
373
+ *Chemical Component Dictionary*
374
+ (e.g. especially inter-residue bonds),
375
+ have :attr:`BondType.ANY`, since the PDB format itself does
376
+ not support bond orders.
377
+
378
+ Returns
379
+ -------
380
+ array : AtomArray or AtomArrayStack
381
+ The return type depends on the `model` parameter.
382
+ """
383
+ if model is None:
384
+ depth = len(self._model_start_i)
385
+ length = self._get_model_length()
386
+ array = AtomArrayStack(depth, length)
387
+ # Record indices for annotation determination
388
+ # Annotation is determined from model 1
389
+ annot_i = self._get_atom_record_indices_for_model(1)
390
+ # Record indices for coordinate determination
391
+ coord_i = self._atom_line_i
392
+
393
+ else:
394
+ annot_i = coord_i = self._get_atom_record_indices_for_model(model)
395
+ array = AtomArray(len(coord_i))
396
+
397
+ # Create mandatory and optional annotation arrays
398
+ chain_id = np.zeros(array.array_length(), array.chain_id.dtype)
399
+ res_id = np.zeros(array.array_length(), array.res_id.dtype)
400
+ ins_code = np.zeros(array.array_length(), array.ins_code.dtype)
401
+ res_name = np.zeros(array.array_length(), array.res_name.dtype)
402
+ hetero = np.zeros(array.array_length(), array.hetero.dtype)
403
+ atom_name = np.zeros(array.array_length(), array.atom_name.dtype)
404
+ element = np.zeros(array.array_length(), array.element.dtype)
405
+ atom_id_raw = np.zeros(array.array_length(), "U5")
406
+ charge_raw = np.zeros(array.array_length(), "U2")
407
+ occupancy = np.zeros(array.array_length(), float)
408
+ b_factor = np.zeros(array.array_length(), float)
409
+ altloc_id = np.zeros(array.array_length(), dtype="U1")
410
+
411
+ # Fill annotation array
412
+ # i is index in array, line_i is line index
413
+ for i, line_i in enumerate(annot_i):
414
+ line = self.lines[line_i]
415
+ chain_id[i] = line[_chain_id].strip()
416
+ res_id[i] = decode_hybrid36(line[_res_id])
417
+ ins_code[i] = line[_ins_code].strip()
418
+ res_name[i] = line[_res_name].strip()
419
+ hetero[i] = line[_record] == "HETATM"
420
+ atom_name[i] = line[_atom_name].strip()
421
+ element[i] = line[_element].strip()
422
+ altloc_id[i] = line[_alt_loc]
423
+ atom_id_raw[i] = line[_atom_id]
424
+ # turn "1-" into "-1", if necessary
425
+ if line[_charge][0] in "+-":
426
+ charge_raw[i] = line[_charge]
427
+ else:
428
+ charge_raw[i] = line[_charge][::-1]
429
+ occupancy[i] = float(line[_occupancy].strip())
430
+ b_factor[i] = float(line[_temp_f].strip())
431
+
432
+ if include_bonds or (extra_fields is not None and "atom_id" in extra_fields):
433
+ # The atom IDs are only required in these two cases
434
+ atom_id = np.array(
435
+ [decode_hybrid36(raw_id.item()) for raw_id in atom_id_raw], dtype=int
436
+ )
437
+ else:
438
+ atom_id = None
439
+
440
+ # Add annotation arrays to atom array (stack)
441
+ array.chain_id = chain_id
442
+ array.res_id = res_id
443
+ array.ins_code = ins_code
444
+ array.res_name = res_name
445
+ array.hetero = hetero
446
+ array.atom_name = atom_name
447
+ array.element = element
448
+
449
+ for field in extra_fields if extra_fields is not None else []:
450
+ if field == "atom_id":
451
+ # Copy is necessary to avoid double masking in
452
+ # later altloc ID filtering
453
+ array.set_annotation("atom_id", atom_id.copy())
454
+ elif field == "charge":
455
+ charge = np.array(charge_raw)
456
+ array.set_annotation(
457
+ "charge", np.where(charge == " ", "0", charge).astype(int)
458
+ )
459
+ elif field == "occupancy":
460
+ array.set_annotation("occupancy", occupancy)
461
+ elif field == "b_factor":
462
+ array.set_annotation("b_factor", b_factor)
463
+ else:
464
+ raise ValueError(f"Unknown extra field: {field}")
465
+
466
+ # Replace empty strings for elements with guessed types
467
+ # This is used e.g. for PDB files created by Gromacs
468
+ empty_element_mask = array.element == ""
469
+ if empty_element_mask.any():
470
+ warnings.warn(
471
+ f"{np.count_nonzero(empty_element_mask)} elements "
472
+ "were guessed from atom name"
473
+ )
474
+ array.element[empty_element_mask] = infer_elements(
475
+ array.atom_name[empty_element_mask]
476
+ )
477
+
478
+ # Fill in coordinates
479
+ if isinstance(array, AtomArray):
480
+ for i, line_i in enumerate(coord_i):
481
+ line = self.lines[line_i]
482
+ array.coord[i, 0] = float(line[_coord_x])
483
+ array.coord[i, 1] = float(line[_coord_y])
484
+ array.coord[i, 2] = float(line[_coord_z])
485
+
486
+ elif isinstance(array, AtomArrayStack):
487
+ m = 0
488
+ i = 0
489
+ for line_i in self._atom_line_i:
490
+ if (
491
+ m < len(self._model_start_i) - 1
492
+ and line_i > self._model_start_i[m + 1]
493
+ ):
494
+ m += 1
495
+ i = 0
496
+ line = self.lines[line_i]
497
+ array.coord[m, i, 0] = float(line[_coord_x])
498
+ array.coord[m, i, 1] = float(line[_coord_y])
499
+ array.coord[m, i, 2] = float(line[_coord_z])
500
+ i += 1
501
+
502
+ # Fill in box vectors
503
+ # PDB does not support changing box dimensions. CRYST1 is a one-time
504
+ # record so we can extract it directly
505
+ for line in self.lines:
506
+ if line.startswith("CRYST1"):
507
+ try:
508
+ len_a = float(line[_a])
509
+ len_b = float(line[_b])
510
+ len_c = float(line[_c])
511
+ alpha = np.deg2rad(float(line[_alpha]))
512
+ beta = np.deg2rad(float(line[_beta]))
513
+ gamma = np.deg2rad(float(line[_gamma]))
514
+ box = vectors_from_unitcell(len_a, len_b, len_c, alpha, beta, gamma)
515
+ except ValueError:
516
+ # File contains invalid 'CRYST1' record
517
+ warnings.warn(
518
+ "File contains invalid 'CRYST1' record, box is ignored"
519
+ )
520
+ break
521
+
522
+ if isinstance(array, AtomArray):
523
+ array.box = box
524
+ else:
525
+ array.box = np.repeat(
526
+ box[np.newaxis, ...], array.stack_depth(), axis=0
527
+ )
528
+ break
529
+
530
+ # Filter altloc IDs
531
+ if altloc == "occupancy":
532
+ filter = filter_highest_occupancy_altloc(array, altloc_id, occupancy)
533
+ array = array[..., filter]
534
+ atom_id = atom_id[filter] if atom_id is not None else None
535
+ elif altloc == "first":
536
+ filter = filter_first_altloc(array, altloc_id)
537
+ array = array[..., filter]
538
+ atom_id = atom_id[filter] if atom_id is not None else None
539
+ elif altloc == "all":
540
+ array.set_annotation("altloc_id", altloc_id)
541
+ else:
542
+ raise ValueError(f"'{altloc}' is not a valid 'altloc' option")
543
+
544
+ # Read bonds
545
+ if include_bonds:
546
+ bond_list = self._get_bonds(atom_id)
547
+ bond_list = bond_list.merge(connect_via_residue_names(array))
548
+ array.bonds = bond_list
549
+
550
+ return array
551
+
552
+ def get_space_group(self):
553
+ """
554
+ Extract the space group and Z value from the CRYST1 record.
555
+
556
+ Returns
557
+ -------
558
+ space_group : str
559
+ The extracted space group.
560
+ z_val : int
561
+ The extracted Z value.
562
+ """
563
+ # Initialize the namedtuple
564
+ SpaceGroupInfo = namedtuple("SpaceGroupInfo", ["space_group", "z_val"])
565
+
566
+ # CRYST1 is a one-time record so we can extract it directly
567
+ for line in self.lines:
568
+ if line.startswith("CRYST1"):
569
+ try:
570
+ # Extract space group and Z value
571
+ space_group = str(line[_space])
572
+ z_val = int(line[_z])
573
+ except ValueError:
574
+ # File contains invalid 'CRYST1' record
575
+ raise InvalidFileError(
576
+ "File does not contain valid space group and/or Z values"
577
+ )
578
+ # Set default values
579
+ space_group = "P 1"
580
+ z_val = 1
581
+ break
582
+ return SpaceGroupInfo(space_group=space_group, z_val=z_val)
583
+
584
+ def set_structure(self, array, hybrid36=False):
585
+ """
586
+ Set the :class:`AtomArray` or :class:`AtomArrayStack` for the
587
+ file.
588
+
589
+ This makes also use of the optional annotation arrays
590
+ ``'atom_id'``, ``'b_factor'``, ``'occupancy'`` and ``'charge'``.
591
+ If the atom array (stack) contains the annotation ``'atom_id'``,
592
+ these values will be used for atom numbering instead of
593
+ continuous numbering.
594
+
595
+ Parameters
596
+ ----------
597
+ array : AtomArray or AtomArrayStack
598
+ The array or stack to be saved into this file. If a stack
599
+ is given, each array in the stack is saved as separate
600
+ model.
601
+ hybrid36 : bool, optional
602
+ Defines wether the file should be written in hybrid-36
603
+ format.
604
+
605
+ Notes
606
+ -----
607
+ If `array` has an associated :class:`BondList`, ``CONECT``
608
+ records are also written for all non-water hetero residues
609
+ and all inter-residue connections.
610
+ """
611
+ _check_pdb_compatibility(array, hybrid36)
612
+
613
+ natoms = array.array_length()
614
+ annot_categories = array.get_annotation_categories()
615
+ record = np.char.array(np.where(array.hetero, "HETATM", "ATOM"))
616
+ # Check for optional annotation categories
617
+ if "atom_id" in annot_categories:
618
+ atom_id = array.atom_id
619
+ else:
620
+ atom_id = np.arange(1, natoms + 1)
621
+ if "b_factor" in annot_categories:
622
+ b_factor = np.char.array([f"{b:>6.2f}" for b in array.b_factor])
623
+ else:
624
+ b_factor = np.char.array(np.full(natoms, " 0.00", dtype="U6"))
625
+ if "occupancy" in annot_categories:
626
+ occupancy = np.char.array([f"{o:>6.2f}" for o in array.occupancy])
627
+ else:
628
+ occupancy = np.char.array(np.full(natoms, " 1.00", dtype="U6"))
629
+ if "charge" in annot_categories:
630
+ charge = np.char.array(
631
+ [
632
+ str(np.abs(charge)) + "+"
633
+ if charge > 0
634
+ else (str(np.abs(charge)) + "-" if charge < 0 else "")
635
+ for charge in array.get_annotation("charge")
636
+ ]
637
+ )
638
+ else:
639
+ charge = np.char.array(np.full(natoms, " ", dtype="U2"))
640
+
641
+ if hybrid36:
642
+ pdb_atom_id = np.char.array([encode_hybrid36(i, 5) for i in atom_id])
643
+ pdb_res_id = np.char.array([encode_hybrid36(i, 4) for i in array.res_id])
644
+ else:
645
+ # Atom IDs are supported up to 99999,
646
+ # but negative IDs are also possible
647
+ pdb_atom_id = np.char.array(
648
+ np.where(
649
+ atom_id > 0, ((atom_id - 1) % _PDB_MAX_ATOMS) + 1, atom_id
650
+ ).astype(str)
651
+ )
652
+ # Residue IDs are supported up to 9999,
653
+ # but negative IDs are also possible
654
+ pdb_res_id = np.char.array(
655
+ np.where(
656
+ array.res_id > 0,
657
+ ((array.res_id - 1) % _PDB_MAX_RESIDUES) + 1,
658
+ array.res_id,
659
+ ).astype(str)
660
+ )
661
+
662
+ names = np.char.array(
663
+ [
664
+ f" {atm}" if len(elem) == 1 and len(atm) < 4 else atm
665
+ for atm, elem in zip(array.atom_name, array.element)
666
+ ]
667
+ )
668
+ res_names = np.char.array(array.res_name)
669
+ chain_ids = np.char.array(array.chain_id)
670
+ ins_codes = np.char.array(array.ins_code)
671
+ spaces = np.char.array(np.full(natoms, " ", dtype="U1"))
672
+ elements = np.char.array(array.element)
673
+
674
+ first_half = (
675
+ record.ljust(6)
676
+ + pdb_atom_id.rjust(5)
677
+ + spaces
678
+ + names.ljust(4)
679
+ + spaces
680
+ + res_names.rjust(3)
681
+ + spaces
682
+ + chain_ids
683
+ + pdb_res_id.rjust(4)
684
+ + ins_codes.rjust(1)
685
+ )
686
+
687
+ second_half = (
688
+ occupancy + b_factor + 10 * spaces + elements.rjust(2) + charge.rjust(2)
689
+ )
690
+
691
+ coords = array.coord
692
+ if coords.ndim == 2:
693
+ coords = coords[np.newaxis, ...]
694
+
695
+ self.lines = []
696
+ # Prepend a single CRYST1 record if we have box information
697
+ if array.box is not None:
698
+ box = array.box
699
+ if len(box.shape) == 3:
700
+ box = box[0]
701
+ a, b, c, alpha, beta, gamma = unitcell_from_vectors(box)
702
+ self.lines.append(
703
+ f"CRYST1{a:>9.3f}{b:>9.3f}{c:>9.3f}"
704
+ f"{np.rad2deg(alpha):>7.2f}{np.rad2deg(beta):>7.2f}"
705
+ f"{np.rad2deg(gamma):>7.2f} P 1 1 "
706
+ )
707
+ is_stack = coords.shape[0] > 1
708
+ for model_num, coord_i in enumerate(coords, start=1):
709
+ # for an ArrayStack, this is run once
710
+ # only add model lines if is_stack
711
+ if is_stack:
712
+ self.lines.append(f"MODEL {model_num:4}")
713
+ # Bundle non-coordinate data to simplify iteration
714
+ self.lines.extend(
715
+ [
716
+ f"{start:27} {x:>8.3f}{y:>8.3f}{z:>8.3f}{end:26}"
717
+ for start, (x, y, z), end in zip(first_half, coord_i, second_half)
718
+ ]
719
+ )
720
+ if is_stack:
721
+ self.lines.append("ENDMDL")
722
+
723
+ # Add CONECT records if bonds are present
724
+ if array.bonds is not None:
725
+ # Only non-water hetero records and connections between
726
+ # residues are added to the records
727
+ hetero_indices = np.where(array.hetero & ~filter_solvent(array))[0]
728
+ bond_array = array.bonds.as_array()
729
+ bond_array = bond_array[
730
+ np.isin(bond_array[:, 0], hetero_indices)
731
+ | np.isin(bond_array[:, 1], hetero_indices)
732
+ | (array.res_id[bond_array[:, 0]] != array.res_id[bond_array[:, 1]])
733
+ | (array.chain_id[bond_array[:, 0]] != array.chain_id[bond_array[:, 1]])
734
+ ]
735
+ self._set_bonds(BondList(array.array_length(), bond_array), pdb_atom_id)
736
+
737
+ self._index_models_and_atoms()
738
+
739
+ def set_space_group(self, info):
740
+ """
741
+ Update the CRYST1 record with the provided space group and Z value.
742
+
743
+ Parameters
744
+ ----------
745
+ info : tuple(str, int) or SpaceGroupInfo
746
+ Contains the space group and Z-value.
747
+ """
748
+ for i, line in enumerate(self.lines):
749
+ if line.startswith("CRYST1"):
750
+ try:
751
+ # Format the replacement string
752
+ space_group_str = info.space_group.ljust(11)
753
+ z_val_str = str(info.z_val).rjust(4)
754
+
755
+ # Replace the existing CRYST1 record
756
+ self.lines[i] = line[:55] + space_group_str + z_val_str + line[70:]
757
+ except (ValueError, AttributeError) as e:
758
+ # Raise an exception with context
759
+ raise AttributeError(
760
+ f"Failed to update CRYST1 record. "
761
+ f"Line: {line.strip()} | Error: {e}"
762
+ )
763
+ break
764
+
765
+ def list_assemblies(self):
766
+ """
767
+ List the biological assemblies that are available for the
768
+ structure in the given file.
769
+
770
+ This function receives the data from the ``REMARK 300`` records
771
+ in the file.
772
+ Consequently, this remark must be present in the file.
773
+
774
+ Returns
775
+ -------
776
+ assemblies : list of str
777
+ A list that contains the available assembly IDs.
778
+
779
+ Examples
780
+ --------
781
+ >>> import os.path
782
+ >>> file = PDBFile.read(os.path.join(path_to_structures, "1f2n.pdb"))
783
+ >>> print(file.list_assemblies())
784
+ ['1']
785
+ """
786
+ # Get remarks listing available assemblies
787
+ remark_lines = self.get_remark(300)
788
+ if remark_lines is None:
789
+ raise InvalidFileError(
790
+ "File does not contain assembly information (REMARK 300)"
791
+ )
792
+ return [assembly_id.strip() for assembly_id in remark_lines[0][12:].split(",")]
793
+
794
+ def get_assembly(
795
+ self,
796
+ assembly_id=None,
797
+ model=None,
798
+ altloc="first",
799
+ extra_fields=[],
800
+ include_bonds=False,
801
+ ):
802
+ """
803
+ Build the given biological assembly.
804
+
805
+ This function receives the data from ``REMARK 350`` records in
806
+ the file.
807
+ Consequently, this remark must be present in the file.
808
+
809
+ Parameters
810
+ ----------
811
+ assembly_id : str
812
+ The assembly to build.
813
+ Available assembly IDs can be obtained via
814
+ :func:`list_assemblies()`.
815
+ model : int, optional
816
+ If this parameter is given, the function will return an
817
+ :class:`AtomArray` from the atoms corresponding to the given
818
+ model number (starting at 1).
819
+ Negative values are used to index models starting from the
820
+ last model instead of the first model.
821
+ If this parameter is omitted, an :class:`AtomArrayStack`
822
+ containing all models will be returned, even if the
823
+ structure contains only one model.
824
+ altloc : {'first', 'occupancy', 'all'}
825
+ This parameter defines how *altloc* IDs are handled:
826
+ - ``'first'`` - Use atoms that have the first
827
+ *altloc* ID appearing in a residue.
828
+ - ``'occupancy'`` - Use atoms that have the *altloc* ID
829
+ with the highest occupancy for a residue.
830
+ - ``'all'`` - Use all atoms.
831
+ Note that this leads to duplicate atoms.
832
+ When this option is chosen, the ``altloc_id``
833
+ annotation array is added to the returned structure.
834
+ extra_fields : list of str, optional
835
+ The strings in the list are optional annotation categories
836
+ that should be stored in the output array or stack.
837
+ These are valid values:
838
+ ``'atom_id'``, ``'b_factor'``, ``'occupancy'`` and
839
+ ``'charge'``.
840
+ include_bonds : bool, optional
841
+ If set to true, a :class:`BondList` will be created for the
842
+ resulting :class:`AtomArray` containing the bond information
843
+ from the file.
844
+ Bonds, whose order could not be determined from the
845
+ *Chemical Component Dictionary*
846
+ (e.g. especially inter-residue bonds),
847
+ have :attr:`BondType.ANY`, since the PDB format itself does
848
+ not support bond orders.
849
+
850
+ Returns
851
+ -------
852
+ assembly : AtomArray or AtomArrayStack
853
+ The assembly.
854
+ The return type depends on the `model` parameter.
855
+ Contains the `sym_id` annotation, which enumerates the copies of the
856
+ asymmetric unit in the assembly.
857
+
858
+ Examples
859
+ --------
860
+
861
+ >>> import os.path
862
+ >>> file = PDBFile.read(os.path.join(path_to_structures, "1f2n.pdb"))
863
+ >>> assembly = file.get_assembly(model=1)
864
+ """
865
+ # Get base structure
866
+ structure = self.get_structure(
867
+ model,
868
+ altloc,
869
+ extra_fields,
870
+ include_bonds,
871
+ )
872
+
873
+ # Get lines containing transformations for chosen assembly
874
+ remark_lines = self.get_remark(350)
875
+ if remark_lines is None:
876
+ raise InvalidFileError(
877
+ "File does not contain assembly information (REMARK 350)"
878
+ )
879
+ # Get lines corresponding to selected assembly ID
880
+ assembly_start_i = None
881
+ assembly_stop_i = None
882
+ for i, line in enumerate(remark_lines):
883
+ if line.startswith("BIOMOLECULE"):
884
+ current_assembly_id = line[12:].strip()
885
+ if assembly_start_i is not None:
886
+ # Start was already found -> this is the next entry
887
+ # -> this is the stop
888
+ assembly_stop_i = i
889
+ break
890
+ if current_assembly_id == assembly_id or assembly_id is None:
891
+ assembly_start_i = i
892
+ # In case of the final assembly of the file,
893
+ # the 'stop' is the end of REMARK 350 lines
894
+ assembly_stop_i = len(remark_lines) if assembly_stop_i is None else i
895
+ if assembly_start_i is None:
896
+ if assembly_id is None:
897
+ raise InvalidFileError(
898
+ "File does not contain transformation expressions for assemblies"
899
+ )
900
+ else:
901
+ raise KeyError(f"The assembly ID '{assembly_id}' is not found")
902
+ assembly_lines = remark_lines[assembly_start_i:assembly_stop_i]
903
+
904
+ # Get transformations for a set of chains
905
+ chain_set_start_indices = [
906
+ i
907
+ for i, line in enumerate(assembly_lines)
908
+ if line.startswith("APPLY THE FOLLOWING TO CHAINS")
909
+ ]
910
+ # Add exclusive stop at end of records
911
+ chain_set_start_indices.append(len(assembly_lines))
912
+ assembly = None
913
+ for i in range(len(chain_set_start_indices) - 1):
914
+ start = chain_set_start_indices[i]
915
+ stop = chain_set_start_indices[i + 1]
916
+ # Read affected chain IDs from the following line(s)
917
+ affected_chain_ids = []
918
+ transform_start = None
919
+ for j, line in enumerate(assembly_lines[start:stop]):
920
+ if any(
921
+ line.startswith(chain_signal_string)
922
+ for chain_signal_string in [
923
+ "APPLY THE FOLLOWING TO CHAINS:",
924
+ " AND CHAINS:",
925
+ ]
926
+ ):
927
+ affected_chain_ids += [
928
+ chain_id.strip() for chain_id in line[30:].split(",")
929
+ ]
930
+ else:
931
+ # Chain specification has finished
932
+ # BIOMT lines start directly after chain specification
933
+ transform_start = start + j
934
+ break
935
+ # Parse transformations from BIOMT lines
936
+ if transform_start is None:
937
+ raise InvalidFileError("No 'BIOMT' records found for chosen assembly")
938
+ rotations, translations = _parse_transformations(
939
+ assembly_lines[transform_start:stop]
940
+ )
941
+ # Filter affected chains
942
+ sub_structure = structure[
943
+ ..., np.isin(structure.chain_id, affected_chain_ids)
944
+ ]
945
+ sub_assembly = _apply_transformations(
946
+ sub_structure, rotations, translations
947
+ )
948
+ # Merge the chains with IDs for this transformation
949
+ # with chains from other transformations
950
+ if assembly is None:
951
+ assembly = sub_assembly
952
+ else:
953
+ assembly += sub_assembly
954
+
955
+ return assembly
956
+
957
+ def get_unit_cell(
958
+ self, model=None, altloc="first", extra_fields=[], include_bonds=False
959
+ ):
960
+ """
961
+ Build a structure model containing all symmetric copies
962
+ of the structure within a single unit cell, given by the space
963
+ group.
964
+
965
+ This function receives the data from ``REMARK 290`` records in
966
+ the file.
967
+ Consequently, this remark must be present in the file, which is
968
+ usually only true for crystal structures.
969
+
970
+ Parameters
971
+ ----------
972
+ model : int, optional
973
+ If this parameter is given, the function will return an
974
+ :class:`AtomArray` from the atoms corresponding to the given
975
+ model number (starting at 1).
976
+ Negative values are used to index models starting from the
977
+ last model instead of the first model.
978
+ If this parameter is omitted, an :class:`AtomArrayStack`
979
+ containing all models will be returned, even if the
980
+ structure contains only one model.
981
+ altloc : {'first', 'occupancy', 'all'}
982
+ This parameter defines how *altloc* IDs are handled:
983
+ - ``'first'`` - Use atoms that have the first
984
+ *altloc* ID appearing in a residue.
985
+ - ``'occupancy'`` - Use atoms that have the *altloc* ID
986
+ with the highest occupancy for a residue.
987
+ - ``'all'`` - Use all atoms.
988
+ Note that this leads to duplicate atoms.
989
+ When this option is chosen, the ``altloc_id``
990
+ annotation array is added to the returned structure.
991
+ extra_fields : list of str, optional
992
+ The strings in the list are optional annotation categories
993
+ that should be stored in the output array or stack.
994
+ These are valid values:
995
+ ``'atom_id'``, ``'b_factor'``, ``'occupancy'`` and
996
+ ``'charge'``.
997
+ include_bonds : bool, optional
998
+ If set to true, a :class:`BondList` will be created for the
999
+ resulting :class:`AtomArray` containing the bond information
1000
+ from the file.
1001
+ Bonds, whose order could not be determined from the
1002
+ *Chemical Component Dictionary*
1003
+ (e.g. especially inter-residue bonds),
1004
+ have :attr:`BondType.ANY`, since the PDB format itself does
1005
+ not support bond orders.
1006
+
1007
+ Returns
1008
+ -------
1009
+ symmetry_mates : AtomArray or AtomArrayStack
1010
+ All atoms within a single unit cell.
1011
+ The return type depends on the `model` parameter.
1012
+
1013
+ Notes
1014
+ -----
1015
+ To expand the structure beyond a single unit cell, use
1016
+ :func:`repeat_box()` with the return value as its
1017
+ input.
1018
+
1019
+ Examples
1020
+ --------
1021
+
1022
+ >>> import os.path
1023
+ >>> file = PDBFile.read(os.path.join(path_to_structures, "1aki.pdb"))
1024
+ >>> atoms_in_unit_cell = file.get_unit_cell(model=1)
1025
+ """
1026
+ # Get base structure
1027
+ structure = self.get_structure(
1028
+ model,
1029
+ altloc,
1030
+ extra_fields,
1031
+ include_bonds,
1032
+ )
1033
+ # Get lines containing transformations for crystallographic symmetry
1034
+ remark_lines = self.get_remark(290)
1035
+ if remark_lines is None:
1036
+ raise InvalidFileError(
1037
+ "File does not contain crystallographic symmetry "
1038
+ "information (REMARK 350)"
1039
+ )
1040
+ transform_lines = [line for line in remark_lines if line.startswith(" SMTRY")]
1041
+ rotations, translations = _parse_transformations(transform_lines)
1042
+ return _apply_transformations(structure, rotations, translations)
1043
+
1044
+ def get_symmetry_mates(
1045
+ self, model=None, altloc="first", extra_fields=[], include_bonds=False
1046
+ ):
1047
+ """
1048
+ Build a structure model containing all symmetric copies
1049
+ of the structure within a single unit cell, given by the space
1050
+ group.
1051
+
1052
+ This function receives the data from ``REMARK 290`` records in
1053
+ the file.
1054
+ Consequently, this remark must be present in the file, which is
1055
+ usually only true for crystal structures.
1056
+
1057
+ DEPRECATED: Use :meth:`get_unit_cell()` instead.
1058
+
1059
+ Parameters
1060
+ ----------
1061
+ model : int, optional
1062
+ If this parameter is given, the function will return an
1063
+ :class:`AtomArray` from the atoms corresponding to the given
1064
+ model number (starting at 1).
1065
+ Negative values are used to index models starting from the
1066
+ last model instead of the first model.
1067
+ If this parameter is omitted, an :class:`AtomArrayStack`
1068
+ containing all models will be returned, even if the
1069
+ structure contains only one model.
1070
+ altloc : {'first', 'occupancy', 'all'}
1071
+ This parameter defines how *altloc* IDs are handled:
1072
+ - ``'first'`` - Use atoms that have the first
1073
+ *altloc* ID appearing in a residue.
1074
+ - ``'occupancy'`` - Use atoms that have the *altloc* ID
1075
+ with the highest occupancy for a residue.
1076
+ - ``'all'`` - Use all atoms.
1077
+ Note that this leads to duplicate atoms.
1078
+ When this option is chosen, the ``altloc_id``
1079
+ annotation array is added to the returned structure.
1080
+ extra_fields : list of str, optional
1081
+ The strings in the list are optional annotation categories
1082
+ that should be stored in the output array or stack.
1083
+ These are valid values:
1084
+ ``'atom_id'``, ``'b_factor'``, ``'occupancy'`` and
1085
+ ``'charge'``.
1086
+ include_bonds : bool, optional
1087
+ If set to true, a :class:`BondList` will be created for the
1088
+ resulting :class:`AtomArray` containing the bond information
1089
+ from the file.
1090
+ Bonds, whose order could not be determined from the
1091
+ *Chemical Component Dictionary*
1092
+ (e.g. especially inter-residue bonds),
1093
+ have :attr:`BondType.ANY`, since the PDB format itself does
1094
+ not support bond orders.
1095
+
1096
+ Returns
1097
+ -------
1098
+ symmetry_mates : AtomArray or AtomArrayStack
1099
+ All atoms within a single unit cell.
1100
+ The return type depends on the `model` parameter.
1101
+
1102
+ Notes
1103
+ -----
1104
+ To expand the structure beyond a single unit cell, use
1105
+ :func:`repeat_box()` with the return value as its
1106
+ input.
1107
+
1108
+ Examples
1109
+ --------
1110
+
1111
+ >>> import os.path
1112
+ >>> file = PDBFile.read(os.path.join(path_to_structures, "1aki.pdb"))
1113
+ >>> atoms_in_unit_cell = file.get_symmetry_mates(model=1)
1114
+ """
1115
+ warnings.warn(
1116
+ "'get_symmetry_mates()' is deprecated, use 'get_unit_cell()' instead",
1117
+ DeprecationWarning,
1118
+ )
1119
+ return self.get_unit_cell(model, altloc, extra_fields, include_bonds)
1120
+
1121
+ def _index_models_and_atoms(self):
1122
+ # Line indices where a new model starts
1123
+ self._model_start_i = np.array(
1124
+ [i for i in range(len(self.lines)) if self.lines[i].startswith(("MODEL"))],
1125
+ dtype=int,
1126
+ )
1127
+ if len(self._model_start_i) == 0:
1128
+ # It could be an empty file or a file with a single model,
1129
+ # where the 'MODEL' line is missing
1130
+ for line in self.lines:
1131
+ if line.startswith(("ATOM", "HETATM")):
1132
+ # Single model
1133
+ self._model_start_i = np.array([0])
1134
+ break
1135
+
1136
+ # Line indices with ATOM or HETATM records
1137
+ self._atom_line_i = np.array(
1138
+ [
1139
+ i
1140
+ for i in range(len(self.lines))
1141
+ if self.lines[i].startswith(("ATOM", "HETATM"))
1142
+ ],
1143
+ dtype=int,
1144
+ )
1145
+
1146
+ def _get_atom_record_indices_for_model(self, model):
1147
+ last_model = len(self._model_start_i)
1148
+ if model == 0:
1149
+ raise ValueError("The model index must not be 0")
1150
+ # Negative models mean index starting from last model
1151
+ model = last_model + model + 1 if model < 0 else model
1152
+
1153
+ if model < last_model:
1154
+ line_filter = (self._atom_line_i >= self._model_start_i[model - 1]) & (
1155
+ self._atom_line_i < self._model_start_i[model]
1156
+ )
1157
+ elif model == last_model:
1158
+ line_filter = self._atom_line_i >= self._model_start_i[model - 1]
1159
+ else:
1160
+ raise ValueError(
1161
+ f"The file has {last_model} models, "
1162
+ f"the given model {model} does not exist"
1163
+ )
1164
+ return self._atom_line_i[line_filter]
1165
+
1166
+ def _get_model_length(self):
1167
+ """
1168
+ Determine length of models and check that all models
1169
+ have equal length.
1170
+ """
1171
+ n_models = len(self._model_start_i)
1172
+ length = None
1173
+ for model_i in range(len(self._model_start_i)):
1174
+ model_start = self._model_start_i[model_i]
1175
+ model_stop = (
1176
+ self._model_start_i[model_i + 1]
1177
+ if model_i + 1 < n_models
1178
+ else len(self.lines)
1179
+ )
1180
+ model_length = np.count_nonzero(
1181
+ (self._atom_line_i >= model_start) & (self._atom_line_i < model_stop)
1182
+ )
1183
+ if length is None:
1184
+ length = model_length
1185
+ if model_length != length:
1186
+ raise InvalidFileError(
1187
+ f"Model {model_i + 1} has {model_length} atoms, "
1188
+ f"but model 1 has {length} atoms, must be equal"
1189
+ )
1190
+ return length
1191
+
1192
+ def _get_bonds(self, atom_ids):
1193
+ conect_lines = [line for line in self.lines if line.startswith("CONECT")]
1194
+
1195
+ # Mapping from atom ids to indices in an AtomArray
1196
+ atom_id_to_index = np.zeros(atom_ids[-1] + 1, dtype=int)
1197
+ try:
1198
+ for i, id in enumerate(atom_ids):
1199
+ atom_id_to_index[id] = i
1200
+ except IndexError as e:
1201
+ raise InvalidFileError("Atom IDs are not strictly increasing") from e
1202
+
1203
+ bonds = []
1204
+ for line in conect_lines:
1205
+ center_id = atom_id_to_index[decode_hybrid36(line[6:11])]
1206
+ for i in range(11, 31, 5):
1207
+ id_string = line[i : i + 5]
1208
+ try:
1209
+ id = atom_id_to_index[decode_hybrid36(id_string)]
1210
+ except ValueError:
1211
+ # String is empty -> no further IDs
1212
+ break
1213
+ bonds.append((center_id, id))
1214
+
1215
+ # The length of the 'atom_ids' array
1216
+ # is equal to the length of the AtomArray
1217
+ return BondList(len(atom_ids), np.array(bonds, dtype=np.uint32))
1218
+
1219
+ def _set_bonds(self, bond_list, atom_ids):
1220
+ # Bond type is unused since PDB does not support bond orders
1221
+ bonds, _ = bond_list.get_all_bonds()
1222
+
1223
+ for center_i, bonded_indices in enumerate(bonds):
1224
+ n_added = 0
1225
+ for bonded_i in bonded_indices:
1226
+ if bonded_i == -1:
1227
+ # Reached padding values
1228
+ break
1229
+ if n_added == 0:
1230
+ # Add new record
1231
+ line = f"CONECT{atom_ids[center_i]:>5}"
1232
+ line += f"{atom_ids[bonded_i]:>5}"
1233
+ n_added += 1
1234
+ if n_added == 4:
1235
+ # Only a maximum of 4 bond partners can be put
1236
+ # into a single line
1237
+ # If there are more, use an extra record
1238
+ n_added = 0
1239
+ self.lines.append(line)
1240
+ if n_added > 0:
1241
+ self.lines.append(line)
1242
+
1243
+
1244
+ def _parse_transformations(lines):
1245
+ """
1246
+ Parse the rotation and translation transformations from
1247
+ *REMARK* 290 or 350.
1248
+ Return as array of matrices and vectors respectively
1249
+ """
1250
+ # Each transformation requires 3 lines for the (x,y,z) components
1251
+ if len(lines) % 3 != 0:
1252
+ raise InvalidFileError("Invalid number of transformation vectors")
1253
+ n_transformations = len(lines) // 3
1254
+
1255
+ rotations = np.zeros((n_transformations, 3, 3), dtype=float)
1256
+ translations = np.zeros((n_transformations, 3), dtype=float)
1257
+
1258
+ transformation_i = 0
1259
+ component_i = 0
1260
+ for line in lines:
1261
+ # The first two elements (component and
1262
+ # transformation index) are not used
1263
+ transformations = [float(e) for e in line.split()[2:]]
1264
+ if len(transformations) != 4:
1265
+ raise InvalidFileError("Invalid number of transformation vector elements")
1266
+ rotations[transformation_i, component_i, :] = transformations[:3]
1267
+ translations[transformation_i, component_i] = transformations[3]
1268
+
1269
+ component_i += 1
1270
+ if component_i == 3:
1271
+ # All (x,y,z) components were parsed
1272
+ # -> head to the next transformation
1273
+ transformation_i += 1
1274
+ component_i = 0
1275
+
1276
+ return rotations, translations
1277
+
1278
+
1279
+ def _apply_transformations(structure, rotations, translations):
1280
+ """
1281
+ Get subassembly by applying the given transformations to the input
1282
+ structure containing affected chains.
1283
+ """
1284
+ # Additional first dimension for 'structure.repeat()'
1285
+ assembly_coord = np.zeros((len(rotations),) + structure.coord.shape)
1286
+
1287
+ # Apply corresponding transformation for each copy in the assembly
1288
+ for i, (rotation, translation) in enumerate(zip(rotations, translations)):
1289
+ coord = structure.coord
1290
+ # Rotate
1291
+ coord = matrix_rotate(coord, rotation)
1292
+ # Translate
1293
+ coord += translation
1294
+ assembly_coord[i] = coord
1295
+
1296
+ assembly = repeat(structure, assembly_coord)
1297
+ assembly.set_annotation(
1298
+ "sym_id", np.repeat(np.arange(len(rotations)), structure.array_length())
1299
+ )
1300
+ return assembly
1301
+
1302
+
1303
+ def _check_pdb_compatibility(array, hybrid36):
1304
+ annot_categories = array.get_annotation_categories()
1305
+
1306
+ if hybrid36:
1307
+ max_atoms = max_hybrid36_number(5)
1308
+ max_residues = max_hybrid36_number(4)
1309
+ else:
1310
+ max_atoms, max_residues = _PDB_MAX_ATOMS, _PDB_MAX_RESIDUES
1311
+ if "atom_id" in annot_categories:
1312
+ max_atom_id = np.max(array.atom_id)
1313
+ else:
1314
+ max_atom_id = array.array_length()
1315
+
1316
+ if max_atom_id > max_atoms:
1317
+ warnings.warn(f"Atom IDs exceed {max_atoms:,}, will be wrapped")
1318
+ if (array.res_id > max_residues).any():
1319
+ warnings.warn(f"Residue IDs exceed {max_residues:,}, will be wrapped")
1320
+ if np.isnan(array.coord).any():
1321
+ raise BadStructureError("Coordinates contain 'NaN' values")
1322
+ if any([len(name) > 1 for name in array.chain_id]):
1323
+ raise BadStructureError("Some chain IDs exceed 1 character")
1324
+ if any([len(name) > 3 for name in array.res_name]):
1325
+ raise BadStructureError("Some residue names exceed 3 characters")
1326
+ if any([len(name) > 4 for name in array.atom_name]):
1327
+ raise BadStructureError("Some atom names exceed 4 characters")
1328
+ for i, coord_name in enumerate(["x", "y", "z"]):
1329
+ n_coord_digits = number_of_integer_digits(array.coord[..., i])
1330
+ if n_coord_digits > 4:
1331
+ raise BadStructureError(
1332
+ f"4 pre-decimal columns for {coord_name}-coordinates are "
1333
+ f"available, but array would require {n_coord_digits}"
1334
+ )
1335
+ if "b_factor" in annot_categories:
1336
+ n_b_factor_digits = number_of_integer_digits(array.b_factor)
1337
+ if n_b_factor_digits > 3:
1338
+ raise BadStructureError(
1339
+ "3 pre-decimal columns for B-factor are available, "
1340
+ f"but array would require {n_b_factor_digits}"
1341
+ )
1342
+ if "occupancy" in annot_categories:
1343
+ n_occupancy_digits = number_of_integer_digits(array.occupancy)
1344
+ if n_occupancy_digits > 3:
1345
+ raise BadStructureError(
1346
+ "3 pre-decimal columns for occupancy are available, "
1347
+ f"but array would require {n_occupancy_digits}"
1348
+ )
1349
+ if "charge" in annot_categories:
1350
+ # The sign can be omitted is it is put into the adjacent column
1351
+ n_charge_digits = number_of_integer_digits(np.abs(array.charge))
1352
+ if n_charge_digits > 1:
1353
+ raise BadStructureError(
1354
+ "1 column for charge is available, "
1355
+ f"but array would require {n_charge_digits}"
1356
+ )