biotite 1.5.0__cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +216 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +197 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +60 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +228 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +258 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +161 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +126 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1228 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +490 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +702 -0
  73. biotite/sequence/align/banded.cpython-313-x86_64-linux-gnu.so +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cpython-313-x86_64-linux-gnu.so +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cpython-313-x86_64-linux-gnu.so +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cpython-313-x86_64-linux-gnu.so +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cpython-313-x86_64-linux-gnu.so +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cpython-313-x86_64-linux-gnu.so +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cpython-313-x86_64-linux-gnu.so +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cpython-313-x86_64-linux-gnu.so +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cpython-313-x86_64-linux-gnu.so +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cpython-313-x86_64-linux-gnu.so +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cpython-313-x86_64-linux-gnu.so +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cpython-313-x86_64-linux-gnu.so +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +283 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cpython-313-x86_64-linux-gnu.so +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cpython-313-x86_64-linux-gnu.so +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cpython-313-x86_64-linux-gnu.so +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1562 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cpython-313-x86_64-linux-gnu.so +0 -0
  266. biotite/structure/bonds.pyx +2036 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cpython-313-x86_64-linux-gnu.so +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +310 -0
  271. biotite/structure/charges.cpython-313-x86_64-linux-gnu.so +0 -0
  272. biotite/structure/charges.pyx +520 -0
  273. biotite/structure/compare.py +683 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +591 -0
  278. biotite/structure/geometry.py +817 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +425 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +98 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +389 -0
  311. biotite/structure/io/pdb/file.py +1380 -0
  312. biotite/structure/io/pdb/hybrid36.cpython-313-x86_64-linux-gnu.so +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +674 -0
  319. biotite/structure/io/pdbx/cif.py +1091 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +362 -0
  322. biotite/structure/io/pdbx/convert.py +2113 -0
  323. biotite/structure/io/pdbx/encoding.cpython-313-x86_64-linux-gnu.so +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1078 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +716 -0
  337. biotite/structure/rings.py +451 -0
  338. biotite/structure/sasa.cpython-313-x86_64-linux-gnu.so +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +328 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +160 -0
  349. biotite/version.py +34 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.5.0.dist-info/METADATA +162 -0
  352. biotite-1.5.0.dist-info/RECORD +354 -0
  353. biotite-1.5.0.dist-info/WHEEL +6 -0
  354. biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,1380 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.structure.io.pdb"
6
+ __author__ = "Patrick Kunzmann, Daniel Bauer, Claude J. Rogers"
7
+ __all__ = ["PDBFile"]
8
+
9
+ import itertools
10
+ import warnings
11
+ from collections import namedtuple
12
+ import numpy as np
13
+ from biotite.file import InvalidFileError, TextFile
14
+ from biotite.structure.atoms import AtomArray, AtomArrayStack, repeat
15
+ from biotite.structure.bonds import (
16
+ BondList,
17
+ connect_via_residue_names,
18
+ )
19
+ from biotite.structure.box import unitcell_from_vectors, vectors_from_unitcell
20
+ from biotite.structure.error import BadStructureError
21
+ from biotite.structure.filter import (
22
+ filter_first_altloc,
23
+ filter_highest_occupancy_altloc,
24
+ filter_solvent,
25
+ )
26
+ from biotite.structure.info.bonds import bonds_in_residue
27
+ from biotite.structure.io.pdb.hybrid36 import (
28
+ decode_hybrid36,
29
+ encode_hybrid36,
30
+ max_hybrid36_number,
31
+ )
32
+ from biotite.structure.io.util import number_of_integer_digits
33
+ from biotite.structure.repair import infer_elements
34
+ from biotite.structure.util import matrix_rotate
35
+
36
+ _PDB_MAX_ATOMS = 99999
37
+ _PDB_MAX_RESIDUES = 9999
38
+
39
+ # slice objects for readability
40
+ # ATOM/HETATM
41
+ _record = slice(0, 6)
42
+ _atom_id = slice(6, 11)
43
+ _atom_name = slice(12, 16)
44
+ _alt_loc = slice(16, 17)
45
+ _res_name = slice(17, 20)
46
+ _chain_id = slice(21, 22)
47
+ _res_id = slice(22, 26)
48
+ _ins_code = slice(26, 27)
49
+ _coord_x = slice(30, 38)
50
+ _coord_y = slice(38, 46)
51
+ _coord_z = slice(46, 54)
52
+ _occupancy = slice(54, 60)
53
+ _temp_f = slice(60, 66)
54
+ _element = slice(76, 78)
55
+ _charge = slice(78, 80)
56
+ # CRYST1
57
+ _a = slice(6, 15)
58
+ _b = slice(15, 24)
59
+ _c = slice(24, 33)
60
+ _alpha = slice(33, 40)
61
+ _beta = slice(40, 47)
62
+ _gamma = slice(47, 54)
63
+ _space = slice(55, 66)
64
+ _z = slice(66, 70)
65
+
66
+
67
+ class PDBFile(TextFile):
68
+ r"""
69
+ This class represents a PDB file.
70
+
71
+ The usage of :mod:`biotite.structure.io.pdbx` is encouraged in favor
72
+ of this class.
73
+
74
+ This class only provides support for reading/writing the pure atom
75
+ information (*ATOM*, *HETATM*, *MODEL* and *ENDMDL* records). *TER*
76
+ records cannot be written.
77
+ Additionally, *REMARK* records can be read
78
+
79
+ See Also
80
+ --------
81
+ CIFFile : Interface to CIF files, a modern replacement for PDB files.
82
+ BinaryCIFFile : Interface to BinaryCIF files, a binary variant of CIF files.
83
+
84
+ Examples
85
+ --------
86
+ Load a `\\*.pdb` file, modify the structure and save the new
87
+ structure into a new file:
88
+
89
+ >>> import os.path
90
+ >>> file = PDBFile.read(os.path.join(path_to_structures, "1l2y.pdb"))
91
+ >>> array_stack = file.get_structure()
92
+ >>> array_stack_mod = rotate(array_stack, [1,2,3])
93
+ >>> file = PDBFile()
94
+ >>> file.set_structure(array_stack_mod)
95
+ >>> file.write(os.path.join(path_to_directory, "1l2y_mod.pdb"))
96
+ """
97
+
98
+ @classmethod
99
+ def read(cls, file):
100
+ file = super().read(file)
101
+ # Pad lines with whitespace if lines are shorter
102
+ # than the required 80 characters
103
+ file.lines = [line.ljust(80) for line in file.lines]
104
+ file._index_models_and_atoms()
105
+ return file
106
+
107
+ def get_remark(self, number):
108
+ r"""
109
+ Get the lines containing the *REMARK* records with the given
110
+ `number`.
111
+
112
+ Parameters
113
+ ----------
114
+ number : int
115
+ The *REMARK* number, i.e. the `XXX` in ``REMARK XXX``.
116
+
117
+ Returns
118
+ -------
119
+ remark_lines : None or list of str
120
+ The content of the selected *REMARK* lines.
121
+ Each line is an element of this list.
122
+ The ``REMARK XXX `` part of each line is omitted.
123
+ Furthermore, the first line, which always must be empty, is
124
+ not included.
125
+ ``None`` is returned, if the selected *REMARK* records do not
126
+ exist in the file.
127
+
128
+ Examples
129
+ --------
130
+
131
+ >>> import os.path
132
+ >>> file = PDBFile.read(os.path.join(path_to_structures, "1l2y.pdb"))
133
+ >>> remarks = file.get_remark(900)
134
+ >>> print("\n".join(remarks))
135
+ RELATED ENTRIES
136
+ RELATED ID: 5292 RELATED DB: BMRB
137
+ BMRB 5292 IS CHEMICAL SHIFTS FOR TC5B IN BUFFER AND BUFFER
138
+ CONTAINING 30 VOL-% TFE.
139
+ RELATED ID: 1JRJ RELATED DB: PDB
140
+ 1JRJ IS AN ANALAGOUS C-TERMINAL STRUCTURE.
141
+ >>> nonexistent_remark = file.get_remark(999)
142
+ >>> print(nonexistent_remark)
143
+ None
144
+ """
145
+ CONTENT_START_COLUMN = 11
146
+
147
+ # in case a non-integer is accidentally given
148
+ number = int(number)
149
+ if number < 0 or number > 999:
150
+ raise ValueError("The number must be in range 0-999")
151
+
152
+ remark_string = f"REMARK {number:>3d}"
153
+ # Find lines and omit ``REMARK XXX `` part
154
+ remark_lines = [
155
+ line[CONTENT_START_COLUMN:]
156
+ for line in self.lines
157
+ if line.startswith(remark_string)
158
+ ]
159
+ if len(remark_lines) == 0:
160
+ return None
161
+ # Remove first empty line
162
+ remark_lines = remark_lines[1:]
163
+ return remark_lines
164
+
165
+ def get_model_count(self):
166
+ """
167
+ Get the number of models contained in the PDB file.
168
+
169
+ Returns
170
+ -------
171
+ model_count : int
172
+ The number of models.
173
+ """
174
+ return len(self._model_start_i)
175
+
176
+ def get_coord(self, model=None):
177
+ """
178
+ Get only the coordinates from the PDB file.
179
+
180
+ Parameters
181
+ ----------
182
+ model : int, optional
183
+ If this parameter is given, the function will return a
184
+ 2D coordinate array from the atoms corresponding to the
185
+ given model number (starting at 1).
186
+ Negative values are used to index models starting from the
187
+ last model instead of the first model.
188
+ If this parameter is omitted, an 3D coordinate array
189
+ containing all models will be returned, even if
190
+ the structure contains only one model.
191
+
192
+ Returns
193
+ -------
194
+ coord : ndarray, shape=(m,n,3) or shape=(n,3), dtype=float
195
+ The coordinates read from the ATOM and HETATM records of the
196
+ file.
197
+
198
+ Notes
199
+ -----
200
+ Note that :func:`get_coord()` may output more coordinates than
201
+ the atom array (stack) from the corresponding
202
+ :func:`get_structure()` call has.
203
+ The reason for this is, that :func:`get_structure()` filters
204
+ *altloc* IDs, while `get_coord()` does not.
205
+
206
+ Examples
207
+ --------
208
+ Read an :class:`AtomArrayStack` from multiple PDB files, where
209
+ each PDB file contains the same atoms but different positions.
210
+ This is an efficient approach when a trajectory is spread into
211
+ multiple PDB files, as done e.g. by the *Rosetta* modeling
212
+ software.
213
+
214
+ For the purpose of this example, the PDB files are created from
215
+ an existing :class:`AtomArrayStack`.
216
+
217
+ >>> import os.path
218
+ >>> from tempfile import gettempdir
219
+ >>> file_names = []
220
+ >>> for i in range(atom_array_stack.stack_depth()):
221
+ ... pdb_file = PDBFile()
222
+ ... pdb_file.set_structure(atom_array_stack[i])
223
+ ... file_name = os.path.join(gettempdir(), f"model_{i+1}.pdb")
224
+ ... pdb_file.write(file_name)
225
+ ... file_names.append(file_name)
226
+ >>> print(file_names)
227
+ ['...model_1.pdb', '...model_2.pdb', ..., '...model_38.pdb']
228
+
229
+ Now the PDB files are used to create an :class:`AtomArrayStack`,
230
+ where each model represents a different model.
231
+
232
+ Construct a new :class:`AtomArrayStack` with annotations taken
233
+ from one of the created files used as template and coordinates
234
+ from all of the PDB files.
235
+
236
+ >>> template_file = PDBFile.read(file_names[0])
237
+ >>> template = template_file.get_structure()
238
+ >>> coord = []
239
+ >>> for i, file_name in enumerate(file_names):
240
+ ... pdb_file = PDBFile.read(file_name)
241
+ ... coord.append(pdb_file.get_coord(model=1))
242
+ >>> new_stack = from_template(template, np.array(coord))
243
+
244
+ The newly created :class:`AtomArrayStack` should now be equal to
245
+ the :class:`AtomArrayStack` the PDB files were created from.
246
+
247
+ >>> print(np.allclose(new_stack.coord, atom_array_stack.coord))
248
+ True
249
+ """
250
+ if model is None:
251
+ coord = np.zeros(
252
+ (len(self._model_start_i), self._get_model_length(), 3),
253
+ dtype=np.float32,
254
+ )
255
+ m = 0
256
+ i = 0
257
+ for line_i in self._atom_line_i:
258
+ if (
259
+ m < len(self._model_start_i) - 1
260
+ and line_i > self._model_start_i[m + 1]
261
+ ):
262
+ m += 1
263
+ i = 0
264
+ line = self.lines[line_i]
265
+ coord[m, i, 0] = float(line[_coord_x])
266
+ coord[m, i, 1] = float(line[_coord_y])
267
+ coord[m, i, 2] = float(line[_coord_z])
268
+ i += 1
269
+ return coord
270
+
271
+ else:
272
+ coord_i = self._get_atom_record_indices_for_model(model)
273
+ coord = np.zeros((len(coord_i), 3), dtype=np.float32)
274
+ for i, line_i in enumerate(coord_i):
275
+ line = self.lines[line_i]
276
+ coord[i, 0] = float(line[_coord_x])
277
+ coord[i, 1] = float(line[_coord_y])
278
+ coord[i, 2] = float(line[_coord_z])
279
+ return coord
280
+
281
+ def get_b_factor(self, model=None):
282
+ """
283
+ Get only the B-factors from the PDB file.
284
+
285
+ Parameters
286
+ ----------
287
+ model : int, optional
288
+ If this parameter is given, the function will return a
289
+ 1D B-factor array from the atoms corresponding to the
290
+ given model number (starting at 1).
291
+ Negative values are used to index models starting from the
292
+ last model instead of the first model.
293
+ If this parameter is omitted, an 2D B-factor array
294
+ containing all models will be returned, even if
295
+ the structure contains only one model.
296
+
297
+ Returns
298
+ -------
299
+ b_factor : ndarray, shape=(m,n) or shape=(n,), dtype=float
300
+ The B-factors read from the ATOM and HETATM records of the
301
+ file.
302
+
303
+ Notes
304
+ -----
305
+ Note that :func:`get_b_factor()` may output more B-factors
306
+ than the atom array (stack) from the corresponding
307
+ :func:`get_structure()` call has atoms.
308
+ The reason for this is, that :func:`get_structure()` filters
309
+ *altloc* IDs, while `get_b_factor()` does not.
310
+ """
311
+ if model is None:
312
+ b_factor = np.zeros(
313
+ (len(self._model_start_i), self._get_model_length()), dtype=np.float32
314
+ )
315
+ m = 0
316
+ i = 0
317
+ for line_i in self._atom_line_i:
318
+ if (
319
+ m < len(self._model_start_i) - 1
320
+ and line_i > self._model_start_i[m + 1]
321
+ ):
322
+ m += 1
323
+ i = 0
324
+ line = self.lines[line_i]
325
+ b_factor[m, i] = float(line[_temp_f])
326
+ i += 1
327
+ return b_factor
328
+
329
+ else:
330
+ b_factor_i = self._get_atom_record_indices_for_model(model)
331
+ b_factor = np.zeros(len(b_factor_i), dtype=np.float32)
332
+ for i, line_i in enumerate(b_factor_i):
333
+ line = self.lines[line_i]
334
+ b_factor[i] = float(line[_temp_f])
335
+ return b_factor
336
+
337
+ def get_structure(
338
+ self, model=None, altloc="first", extra_fields=[], include_bonds=False
339
+ ):
340
+ """
341
+ Get an :class:`AtomArray` or :class:`AtomArrayStack` from the PDB file.
342
+
343
+ This function parses standard base-10 PDB files as well as
344
+ hybrid-36 PDB.
345
+
346
+ Parameters
347
+ ----------
348
+ model : int, optional
349
+ If this parameter is given, the function will return an
350
+ :class:`AtomArray` from the atoms corresponding to the given
351
+ model number (starting at 1).
352
+ Negative values are used to index models starting from the
353
+ last model instead of the first model.
354
+ If this parameter is omitted, an :class:`AtomArrayStack`
355
+ containing all models will be returned, even if the
356
+ structure contains only one model.
357
+ altloc : {'first', 'occupancy', 'all'}
358
+ This parameter defines how *altloc* IDs are handled:
359
+ - ``'first'`` - Use atoms that have the first
360
+ *altloc* ID appearing in a residue.
361
+ - ``'occupancy'`` - Use atoms that have the *altloc* ID
362
+ with the highest occupancy for a residue.
363
+ - ``'all'`` - Use all atoms.
364
+ Note that this leads to duplicate atoms.
365
+ When this option is chosen, the ``altloc_id``
366
+ annotation array is added to the returned structure.
367
+ extra_fields : list of str, optional
368
+ The strings in the list are optional annotation categories
369
+ that should be stored in the output array or stack.
370
+ These are valid values:
371
+ ``'atom_id'``, ``'b_factor'``, ``'occupancy'`` and
372
+ ``'charge'``.
373
+ include_bonds : bool, optional
374
+ If set to true, a :class:`BondList` will be created for the
375
+ resulting :class:`AtomArray` containing the bond information
376
+ from the file.
377
+ Bonds, whose order could not be determined from the
378
+ *Chemical Component Dictionary*
379
+ (e.g. especially inter-residue bonds),
380
+ have :attr:`BondType.ANY`, since the PDB format itself does
381
+ not support bond orders.
382
+
383
+ Returns
384
+ -------
385
+ array : AtomArray or AtomArrayStack
386
+ The return type depends on the `model` parameter.
387
+ """
388
+ if model is None:
389
+ depth = len(self._model_start_i)
390
+ length = self._get_model_length()
391
+ array = AtomArrayStack(depth, length)
392
+ # Record indices for annotation determination
393
+ # Annotation is determined from model 1
394
+ annot_i = self._get_atom_record_indices_for_model(1)
395
+ # Record indices for coordinate determination
396
+ coord_i = self._atom_line_i
397
+
398
+ else:
399
+ annot_i = coord_i = self._get_atom_record_indices_for_model(model)
400
+ array = AtomArray(len(coord_i))
401
+
402
+ # Create mandatory and optional annotation arrays
403
+ chain_id = np.zeros(array.array_length(), array.chain_id.dtype)
404
+ res_id = np.zeros(array.array_length(), array.res_id.dtype)
405
+ ins_code = np.zeros(array.array_length(), array.ins_code.dtype)
406
+ res_name = np.zeros(array.array_length(), array.res_name.dtype)
407
+ hetero = np.zeros(array.array_length(), array.hetero.dtype)
408
+ atom_name = np.zeros(array.array_length(), array.atom_name.dtype)
409
+ element = np.zeros(array.array_length(), array.element.dtype)
410
+ atom_id_raw = np.zeros(array.array_length(), "U5")
411
+ charge_raw = np.zeros(array.array_length(), "U2")
412
+ occupancy = np.zeros(array.array_length(), float)
413
+ b_factor = np.zeros(array.array_length(), float)
414
+ altloc_id = np.zeros(array.array_length(), dtype="U1")
415
+
416
+ # Fill annotation array
417
+ # i is index in array, line_i is line index
418
+ for i, line_i in enumerate(annot_i):
419
+ line = self.lines[line_i]
420
+ chain_id[i] = line[_chain_id].strip()
421
+ res_id[i] = decode_hybrid36(line[_res_id])
422
+ ins_code[i] = line[_ins_code].strip()
423
+ res_name[i] = line[_res_name].strip()
424
+ hetero[i] = line[_record] == "HETATM"
425
+ atom_name[i] = line[_atom_name].strip()
426
+ element[i] = line[_element].strip()
427
+ altloc_id[i] = line[_alt_loc]
428
+ atom_id_raw[i] = line[_atom_id]
429
+ # turn "1-" into "-1", if necessary
430
+ if line[_charge][0] in "+-":
431
+ charge_raw[i] = line[_charge]
432
+ else:
433
+ charge_raw[i] = line[_charge][::-1]
434
+ occupancy[i] = float(line[_occupancy].strip())
435
+ b_factor[i] = float(line[_temp_f].strip())
436
+
437
+ if include_bonds or (extra_fields is not None and "atom_id" in extra_fields):
438
+ # The atom IDs are only required in these two cases
439
+ atom_id = np.array(
440
+ [decode_hybrid36(raw_id.item()) for raw_id in atom_id_raw], dtype=int
441
+ )
442
+ else:
443
+ atom_id = None
444
+
445
+ # Add annotation arrays to atom array (stack)
446
+ array.chain_id = chain_id
447
+ array.res_id = res_id
448
+ array.ins_code = ins_code
449
+ array.res_name = res_name
450
+ array.hetero = hetero
451
+ array.atom_name = atom_name
452
+ array.element = element
453
+
454
+ for field in extra_fields if extra_fields is not None else []:
455
+ if field == "atom_id":
456
+ # Copy is necessary to avoid double masking in
457
+ # later altloc ID filtering
458
+ array.set_annotation("atom_id", atom_id.copy())
459
+ elif field == "charge":
460
+ charge = np.array(charge_raw)
461
+ array.set_annotation(
462
+ "charge", np.where(charge == " ", "0", charge).astype(int)
463
+ )
464
+ elif field == "occupancy":
465
+ array.set_annotation("occupancy", occupancy)
466
+ elif field == "b_factor":
467
+ array.set_annotation("b_factor", b_factor)
468
+ else:
469
+ raise ValueError(f"Unknown extra field: {field}")
470
+
471
+ # Replace empty strings for elements with guessed types
472
+ # This is used e.g. for PDB files created by Gromacs
473
+ empty_element_mask = array.element == ""
474
+ if empty_element_mask.any():
475
+ warnings.warn(
476
+ f"{np.count_nonzero(empty_element_mask)} elements "
477
+ "were guessed from atom name"
478
+ )
479
+ array.element[empty_element_mask] = infer_elements(
480
+ array.atom_name[empty_element_mask]
481
+ )
482
+
483
+ # Fill in coordinates
484
+ if isinstance(array, AtomArray):
485
+ for i, line_i in enumerate(coord_i):
486
+ line = self.lines[line_i]
487
+ array.coord[i, 0] = float(line[_coord_x])
488
+ array.coord[i, 1] = float(line[_coord_y])
489
+ array.coord[i, 2] = float(line[_coord_z])
490
+
491
+ elif isinstance(array, AtomArrayStack):
492
+ m = 0
493
+ i = 0
494
+ for line_i in self._atom_line_i:
495
+ if (
496
+ m < len(self._model_start_i) - 1
497
+ and line_i > self._model_start_i[m + 1]
498
+ ):
499
+ m += 1
500
+ i = 0
501
+ line = self.lines[line_i]
502
+ array.coord[m, i, 0] = float(line[_coord_x])
503
+ array.coord[m, i, 1] = float(line[_coord_y])
504
+ array.coord[m, i, 2] = float(line[_coord_z])
505
+ i += 1
506
+
507
+ # Fill in box vectors
508
+ # PDB does not support changing box dimensions. CRYST1 is a one-time
509
+ # record so we can extract it directly
510
+ for line in self.lines:
511
+ if line.startswith("CRYST1"):
512
+ try:
513
+ len_a = float(line[_a])
514
+ len_b = float(line[_b])
515
+ len_c = float(line[_c])
516
+ alpha = np.deg2rad(float(line[_alpha]))
517
+ beta = np.deg2rad(float(line[_beta]))
518
+ gamma = np.deg2rad(float(line[_gamma]))
519
+ box = vectors_from_unitcell(len_a, len_b, len_c, alpha, beta, gamma)
520
+ except ValueError:
521
+ # File contains invalid 'CRYST1' record
522
+ warnings.warn(
523
+ "File contains invalid 'CRYST1' record, box is ignored"
524
+ )
525
+ break
526
+
527
+ if isinstance(array, AtomArray):
528
+ array.box = box
529
+ else:
530
+ array.box = np.repeat(
531
+ box[np.newaxis, ...], array.stack_depth(), axis=0
532
+ )
533
+ break
534
+
535
+ # Filter altloc IDs
536
+ if altloc == "occupancy":
537
+ filter = filter_highest_occupancy_altloc(array, altloc_id, occupancy)
538
+ array = array[..., filter]
539
+ atom_id = atom_id[filter] if atom_id is not None else None
540
+ elif altloc == "first":
541
+ filter = filter_first_altloc(array, altloc_id)
542
+ array = array[..., filter]
543
+ atom_id = atom_id[filter] if atom_id is not None else None
544
+ elif altloc == "all":
545
+ array.set_annotation("altloc_id", altloc_id)
546
+ else:
547
+ raise ValueError(f"'{altloc}' is not a valid 'altloc' option")
548
+
549
+ # Read bonds
550
+ if include_bonds:
551
+ bond_list = self._get_bonds(atom_id)
552
+ # Create bond dict containing only non-hetero residues (+ water)
553
+ custom_bond_dict = {
554
+ res_name: bonds_in_residue(res_name)
555
+ for res_name in itertools.chain(
556
+ np.unique(array[..., ~array.hetero].res_name), ["HOH"]
557
+ )
558
+ }
559
+ bond_list = bond_list.merge(
560
+ connect_via_residue_names(array, custom_bond_dict=custom_bond_dict)
561
+ )
562
+ array.bonds = bond_list
563
+
564
+ return array
565
+
566
+ def get_space_group(self):
567
+ """
568
+ Extract the space group and Z value from the CRYST1 record.
569
+
570
+ Returns
571
+ -------
572
+ space_group : str
573
+ The extracted space group.
574
+ z_val : int
575
+ The extracted Z value.
576
+ """
577
+ # Initialize the namedtuple
578
+ SpaceGroupInfo = namedtuple("SpaceGroupInfo", ["space_group", "z_val"])
579
+
580
+ # CRYST1 is a one-time record so we can extract it directly
581
+ for line in self.lines:
582
+ if line.startswith("CRYST1"):
583
+ try:
584
+ # Extract space group and Z value
585
+ space_group = str(line[_space])
586
+ z_val = int(line[_z])
587
+ except ValueError:
588
+ # File contains invalid 'CRYST1' record
589
+ raise InvalidFileError(
590
+ "File does not contain valid space group and/or Z values"
591
+ )
592
+ # Set default values
593
+ space_group = "P 1"
594
+ z_val = 1
595
+ break
596
+ return SpaceGroupInfo(space_group=space_group, z_val=z_val)
597
+
598
+ def set_structure(self, array, hybrid36=False):
599
+ """
600
+ Set the :class:`AtomArray` or :class:`AtomArrayStack` for the
601
+ file.
602
+
603
+ This makes also use of the optional annotation arrays
604
+ ``'atom_id'``, ``'b_factor'``, ``'occupancy'`` and ``'charge'``.
605
+ If the atom array (stack) contains the annotation ``'atom_id'``,
606
+ these values will be used for atom numbering instead of
607
+ continuous numbering.
608
+
609
+ Parameters
610
+ ----------
611
+ array : AtomArray or AtomArrayStack
612
+ The array or stack to be saved into this file. If a stack
613
+ is given, each array in the stack is saved as separate
614
+ model.
615
+ hybrid36 : bool, optional
616
+ Defines wether the file should be written in hybrid-36
617
+ format.
618
+
619
+ Notes
620
+ -----
621
+ If `array` has an associated :class:`BondList`, ``CONECT``
622
+ records are also written for all non-water hetero residues
623
+ and all inter-residue connections.
624
+ """
625
+ _check_pdb_compatibility(array, hybrid36)
626
+
627
+ natoms = array.array_length()
628
+ annot_categories = array.get_annotation_categories()
629
+ record = np.char.array(np.where(array.hetero, "HETATM", "ATOM"))
630
+ # Check for optional annotation categories
631
+ if "atom_id" in annot_categories:
632
+ atom_id = array.atom_id
633
+ else:
634
+ atom_id = np.arange(1, natoms + 1)
635
+ if "b_factor" in annot_categories:
636
+ b_factor = np.char.array([f"{b:>6.2f}" for b in array.b_factor])
637
+ else:
638
+ b_factor = np.char.array(np.full(natoms, " 0.00", dtype="U6"))
639
+ if "occupancy" in annot_categories:
640
+ occupancy = np.char.array([f"{o:>6.2f}" for o in array.occupancy])
641
+ else:
642
+ occupancy = np.char.array(np.full(natoms, " 1.00", dtype="U6"))
643
+ if "charge" in annot_categories:
644
+ charge = np.char.array(
645
+ [
646
+ str(np.abs(charge)) + "+"
647
+ if charge > 0
648
+ else (str(np.abs(charge)) + "-" if charge < 0 else "")
649
+ for charge in array.get_annotation("charge")
650
+ ]
651
+ )
652
+ else:
653
+ charge = np.char.array(np.full(natoms, " ", dtype="U2"))
654
+
655
+ if hybrid36:
656
+ pdb_atom_id = np.char.array([encode_hybrid36(i, 5) for i in atom_id])
657
+ pdb_res_id = np.char.array([encode_hybrid36(i, 4) for i in array.res_id])
658
+ else:
659
+ # Atom IDs are supported up to 99999,
660
+ # but negative IDs are also possible
661
+ pdb_atom_id = np.char.array(
662
+ np.where(
663
+ atom_id > 0, ((atom_id - 1) % _PDB_MAX_ATOMS) + 1, atom_id
664
+ ).astype(str)
665
+ )
666
+ # Residue IDs are supported up to 9999,
667
+ # but negative IDs are also possible
668
+ pdb_res_id = np.char.array(
669
+ np.where(
670
+ array.res_id > 0,
671
+ ((array.res_id - 1) % _PDB_MAX_RESIDUES) + 1,
672
+ array.res_id,
673
+ ).astype(str)
674
+ )
675
+
676
+ names = np.char.array(
677
+ [
678
+ f" {atm}" if len(elem) == 1 and len(atm) < 4 else atm
679
+ for atm, elem in zip(array.atom_name, array.element)
680
+ ]
681
+ )
682
+ res_names = np.char.array(array.res_name)
683
+ chain_ids = np.char.array(array.chain_id)
684
+ ins_codes = np.char.array(array.ins_code)
685
+ spaces = np.char.array(np.full(natoms, " ", dtype="U1"))
686
+ elements = np.char.array(array.element)
687
+
688
+ first_half = (
689
+ record.ljust(6)
690
+ + pdb_atom_id.rjust(5)
691
+ + spaces
692
+ + names.ljust(4)
693
+ + spaces
694
+ + res_names.rjust(3)
695
+ + spaces
696
+ + chain_ids
697
+ + pdb_res_id.rjust(4)
698
+ + ins_codes.rjust(1)
699
+ )
700
+
701
+ second_half = (
702
+ occupancy + b_factor + 10 * spaces + elements.rjust(2) + charge.rjust(2)
703
+ )
704
+
705
+ coords = array.coord
706
+ if coords.ndim == 2:
707
+ coords = coords[np.newaxis, ...]
708
+
709
+ self.lines = []
710
+ # Prepend a single CRYST1 record if we have box information
711
+ if array.box is not None:
712
+ box = array.box
713
+ if len(box.shape) == 3:
714
+ box = box[0]
715
+ a, b, c, alpha, beta, gamma = unitcell_from_vectors(box)
716
+ self.lines.append(
717
+ f"CRYST1{a:>9.3f}{b:>9.3f}{c:>9.3f}"
718
+ f"{np.rad2deg(alpha):>7.2f}{np.rad2deg(beta):>7.2f}"
719
+ f"{np.rad2deg(gamma):>7.2f} P 1 1 "
720
+ )
721
+ is_stack = coords.shape[0] > 1
722
+ for model_num, coord_i in enumerate(coords, start=1):
723
+ # for an ArrayStack, this is run once
724
+ # only add model lines if is_stack
725
+ if is_stack:
726
+ self.lines.append(f"MODEL {model_num:4}")
727
+ # Bundle non-coordinate data to simplify iteration
728
+ self.lines.extend(
729
+ [
730
+ f"{start:27} {x:>8.3f}{y:>8.3f}{z:>8.3f}{end:26}"
731
+ for start, (x, y, z), end in zip(first_half, coord_i, second_half)
732
+ ]
733
+ )
734
+ if is_stack:
735
+ self.lines.append("ENDMDL")
736
+
737
+ # Add CONECT records if bonds are present
738
+ if array.bonds is not None:
739
+ # Only non-water hetero records and connections between
740
+ # residues are added to the records
741
+ hetero_indices = np.where(array.hetero & ~filter_solvent(array))[0]
742
+ bond_array = array.bonds.as_array()
743
+ bond_array = bond_array[
744
+ np.isin(bond_array[:, 0], hetero_indices)
745
+ | np.isin(bond_array[:, 1], hetero_indices)
746
+ | (array.res_id[bond_array[:, 0]] != array.res_id[bond_array[:, 1]])
747
+ | (array.chain_id[bond_array[:, 0]] != array.chain_id[bond_array[:, 1]])
748
+ ]
749
+ self._set_bonds(BondList(array.array_length(), bond_array), pdb_atom_id)
750
+
751
+ self._index_models_and_atoms()
752
+
753
+ def set_space_group(self, info):
754
+ """
755
+ Update the CRYST1 record with the provided space group and Z value.
756
+
757
+ Parameters
758
+ ----------
759
+ info : tuple(str, int) or SpaceGroupInfo
760
+ Contains the space group and Z-value.
761
+ """
762
+ for i, line in enumerate(self.lines):
763
+ if line.startswith("CRYST1"):
764
+ try:
765
+ # Format the replacement string
766
+ space_group_str = info.space_group.ljust(11)
767
+ z_val_str = str(info.z_val).rjust(4)
768
+
769
+ # Replace the existing CRYST1 record
770
+ self.lines[i] = line[:55] + space_group_str + z_val_str + line[70:]
771
+ except (ValueError, AttributeError) as e:
772
+ # Raise an exception with context
773
+ raise AttributeError(
774
+ f"Failed to update CRYST1 record. "
775
+ f"Line: {line.strip()} | Error: {e}"
776
+ )
777
+ break
778
+
779
+ def list_assemblies(self):
780
+ """
781
+ List the biological assemblies that are available for the
782
+ structure in the given file.
783
+
784
+ This function receives the data from the ``REMARK 300`` records
785
+ in the file.
786
+ Consequently, this remark must be present in the file.
787
+
788
+ Returns
789
+ -------
790
+ assemblies : list of str
791
+ A list that contains the available assembly IDs.
792
+
793
+ Examples
794
+ --------
795
+ >>> import os.path
796
+ >>> file = PDBFile.read(os.path.join(path_to_structures, "1f2n.pdb"))
797
+ >>> print(file.list_assemblies())
798
+ ['1']
799
+ """
800
+ # Get remarks listing available assemblies
801
+ remark_lines = self.get_remark(300)
802
+ if remark_lines is None:
803
+ raise InvalidFileError(
804
+ "File does not contain assembly information (REMARK 300)"
805
+ )
806
+ return [assembly_id.strip() for assembly_id in remark_lines[0][12:].split(",")]
807
+
808
+ def get_assembly(
809
+ self,
810
+ assembly_id=None,
811
+ model=None,
812
+ altloc="first",
813
+ extra_fields=[],
814
+ include_bonds=False,
815
+ ):
816
+ """
817
+ Build the given biological assembly.
818
+
819
+ This function receives the data from ``REMARK 350`` records in
820
+ the file.
821
+ Consequently, this remark must be present in the file.
822
+
823
+ Parameters
824
+ ----------
825
+ assembly_id : str
826
+ The assembly to build.
827
+ Available assembly IDs can be obtained via
828
+ :func:`list_assemblies()`.
829
+ model : int, optional
830
+ If this parameter is given, the function will return an
831
+ :class:`AtomArray` from the atoms corresponding to the given
832
+ model number (starting at 1).
833
+ Negative values are used to index models starting from the
834
+ last model instead of the first model.
835
+ If this parameter is omitted, an :class:`AtomArrayStack`
836
+ containing all models will be returned, even if the
837
+ structure contains only one model.
838
+ altloc : {'first', 'occupancy', 'all'}
839
+ This parameter defines how *altloc* IDs are handled:
840
+ - ``'first'`` - Use atoms that have the first
841
+ *altloc* ID appearing in a residue.
842
+ - ``'occupancy'`` - Use atoms that have the *altloc* ID
843
+ with the highest occupancy for a residue.
844
+ - ``'all'`` - Use all atoms.
845
+ Note that this leads to duplicate atoms.
846
+ When this option is chosen, the ``altloc_id``
847
+ annotation array is added to the returned structure.
848
+ extra_fields : list of str, optional
849
+ The strings in the list are optional annotation categories
850
+ that should be stored in the output array or stack.
851
+ These are valid values:
852
+ ``'atom_id'``, ``'b_factor'``, ``'occupancy'`` and
853
+ ``'charge'``.
854
+ include_bonds : bool, optional
855
+ If set to true, a :class:`BondList` will be created for the
856
+ resulting :class:`AtomArray` containing the bond information
857
+ from the file.
858
+ Bonds, whose order could not be determined from the
859
+ *Chemical Component Dictionary*
860
+ (e.g. especially inter-residue bonds),
861
+ have :attr:`BondType.ANY`, since the PDB format itself does
862
+ not support bond orders.
863
+
864
+ Returns
865
+ -------
866
+ assembly : AtomArray or AtomArrayStack
867
+ The assembly.
868
+ The return type depends on the `model` parameter.
869
+ Contains the `sym_id` annotation, which enumerates the copies of the
870
+ asymmetric unit in the assembly.
871
+
872
+ Examples
873
+ --------
874
+
875
+ >>> import os.path
876
+ >>> file = PDBFile.read(os.path.join(path_to_structures, "1f2n.pdb"))
877
+ >>> assembly = file.get_assembly(model=1)
878
+ """
879
+ # Get base structure
880
+ structure = self.get_structure(
881
+ model,
882
+ altloc,
883
+ extra_fields,
884
+ include_bonds,
885
+ )
886
+
887
+ # Get lines containing transformations for chosen assembly
888
+ remark_lines = self.get_remark(350)
889
+ if remark_lines is None:
890
+ raise InvalidFileError(
891
+ "File does not contain assembly information (REMARK 350)"
892
+ )
893
+ # Get lines corresponding to selected assembly ID
894
+ assembly_start_i = None
895
+ assembly_stop_i = None
896
+ for i, line in enumerate(remark_lines):
897
+ if line.startswith("BIOMOLECULE"):
898
+ current_assembly_id = line[12:].strip()
899
+ if assembly_start_i is not None:
900
+ # Start was already found -> this is the next entry
901
+ # -> this is the stop
902
+ assembly_stop_i = i
903
+ break
904
+ if current_assembly_id == assembly_id or assembly_id is None:
905
+ assembly_start_i = i
906
+ # In case of the final assembly of the file,
907
+ # the 'stop' is the end of REMARK 350 lines
908
+ assembly_stop_i = len(remark_lines) if assembly_stop_i is None else i
909
+ if assembly_start_i is None:
910
+ if assembly_id is None:
911
+ raise InvalidFileError(
912
+ "File does not contain transformation expressions for assemblies"
913
+ )
914
+ else:
915
+ raise KeyError(f"The assembly ID '{assembly_id}' is not found")
916
+ assembly_lines = remark_lines[assembly_start_i:assembly_stop_i]
917
+
918
+ # Get transformations for a set of chains
919
+ chain_set_start_indices = [
920
+ i
921
+ for i, line in enumerate(assembly_lines)
922
+ if line.startswith("APPLY THE FOLLOWING TO CHAINS")
923
+ ]
924
+ # Add exclusive stop at end of records
925
+ chain_set_start_indices.append(len(assembly_lines))
926
+ assembly = None
927
+ for i in range(len(chain_set_start_indices) - 1):
928
+ start = chain_set_start_indices[i]
929
+ stop = chain_set_start_indices[i + 1]
930
+ # Read affected chain IDs from the following line(s)
931
+ affected_chain_ids = []
932
+ transform_start = None
933
+ for j, line in enumerate(assembly_lines[start:stop]):
934
+ if any(
935
+ line.startswith(chain_signal_string)
936
+ for chain_signal_string in [
937
+ "APPLY THE FOLLOWING TO CHAINS:",
938
+ " AND CHAINS:",
939
+ ]
940
+ ):
941
+ affected_chain_ids += [
942
+ chain_id.strip() for chain_id in line[30:].split(",")
943
+ ]
944
+ else:
945
+ # Chain specification has finished
946
+ # BIOMT lines start directly after chain specification
947
+ transform_start = start + j
948
+ break
949
+ # Parse transformations from BIOMT lines
950
+ if transform_start is None:
951
+ raise InvalidFileError("No 'BIOMT' records found for chosen assembly")
952
+ rotations, translations = _parse_transformations(
953
+ [
954
+ line
955
+ for line in assembly_lines[transform_start:stop]
956
+ if len(line.strip()) > 0
957
+ ]
958
+ )
959
+ # Filter affected chains
960
+ sub_structure = structure[
961
+ ..., np.isin(structure.chain_id, affected_chain_ids)
962
+ ]
963
+ sub_assembly = _apply_transformations(
964
+ sub_structure, rotations, translations
965
+ )
966
+ # Merge the chains with IDs for this transformation
967
+ # with chains from other transformations
968
+ if assembly is None:
969
+ assembly = sub_assembly
970
+ else:
971
+ assembly += sub_assembly
972
+
973
+ return assembly
974
+
975
+ def get_unit_cell(
976
+ self, model=None, altloc="first", extra_fields=[], include_bonds=False
977
+ ):
978
+ """
979
+ Build a structure model containing all symmetric copies
980
+ of the structure within a single unit cell, given by the space
981
+ group.
982
+
983
+ This function receives the data from ``REMARK 290`` records in
984
+ the file.
985
+ Consequently, this remark must be present in the file, which is
986
+ usually only true for crystal structures.
987
+
988
+ Parameters
989
+ ----------
990
+ model : int, optional
991
+ If this parameter is given, the function will return an
992
+ :class:`AtomArray` from the atoms corresponding to the given
993
+ model number (starting at 1).
994
+ Negative values are used to index models starting from the
995
+ last model instead of the first model.
996
+ If this parameter is omitted, an :class:`AtomArrayStack`
997
+ containing all models will be returned, even if the
998
+ structure contains only one model.
999
+ altloc : {'first', 'occupancy', 'all'}
1000
+ This parameter defines how *altloc* IDs are handled:
1001
+ - ``'first'`` - Use atoms that have the first
1002
+ *altloc* ID appearing in a residue.
1003
+ - ``'occupancy'`` - Use atoms that have the *altloc* ID
1004
+ with the highest occupancy for a residue.
1005
+ - ``'all'`` - Use all atoms.
1006
+ Note that this leads to duplicate atoms.
1007
+ When this option is chosen, the ``altloc_id``
1008
+ annotation array is added to the returned structure.
1009
+ extra_fields : list of str, optional
1010
+ The strings in the list are optional annotation categories
1011
+ that should be stored in the output array or stack.
1012
+ These are valid values:
1013
+ ``'atom_id'``, ``'b_factor'``, ``'occupancy'`` and
1014
+ ``'charge'``.
1015
+ include_bonds : bool, optional
1016
+ If set to true, a :class:`BondList` will be created for the
1017
+ resulting :class:`AtomArray` containing the bond information
1018
+ from the file.
1019
+ Bonds, whose order could not be determined from the
1020
+ *Chemical Component Dictionary*
1021
+ (e.g. especially inter-residue bonds),
1022
+ have :attr:`BondType.ANY`, since the PDB format itself does
1023
+ not support bond orders.
1024
+
1025
+ Returns
1026
+ -------
1027
+ symmetry_mates : AtomArray or AtomArrayStack
1028
+ All atoms within a single unit cell.
1029
+ The return type depends on the `model` parameter.
1030
+
1031
+ Notes
1032
+ -----
1033
+ To expand the structure beyond a single unit cell, use
1034
+ :func:`repeat_box()` with the return value as its
1035
+ input.
1036
+
1037
+ Examples
1038
+ --------
1039
+
1040
+ >>> import os.path
1041
+ >>> file = PDBFile.read(os.path.join(path_to_structures, "1aki.pdb"))
1042
+ >>> atoms_in_unit_cell = file.get_unit_cell(model=1)
1043
+ """
1044
+ # Get base structure
1045
+ structure = self.get_structure(
1046
+ model,
1047
+ altloc,
1048
+ extra_fields,
1049
+ include_bonds,
1050
+ )
1051
+ # Get lines containing transformations for crystallographic symmetry
1052
+ remark_lines = self.get_remark(290)
1053
+ if remark_lines is None:
1054
+ raise InvalidFileError(
1055
+ "File does not contain crystallographic symmetry "
1056
+ "information (REMARK 350)"
1057
+ )
1058
+ transform_lines = [line for line in remark_lines if line.startswith(" SMTRY")]
1059
+ rotations, translations = _parse_transformations(transform_lines)
1060
+ return _apply_transformations(structure, rotations, translations)
1061
+
1062
+ def get_symmetry_mates(
1063
+ self, model=None, altloc="first", extra_fields=[], include_bonds=False
1064
+ ):
1065
+ """
1066
+ Build a structure model containing all symmetric copies
1067
+ of the structure within a single unit cell, given by the space
1068
+ group.
1069
+
1070
+ This function receives the data from ``REMARK 290`` records in
1071
+ the file.
1072
+ Consequently, this remark must be present in the file, which is
1073
+ usually only true for crystal structures.
1074
+
1075
+ DEPRECATED: Use :meth:`get_unit_cell()` instead.
1076
+
1077
+ Parameters
1078
+ ----------
1079
+ model : int, optional
1080
+ If this parameter is given, the function will return an
1081
+ :class:`AtomArray` from the atoms corresponding to the given
1082
+ model number (starting at 1).
1083
+ Negative values are used to index models starting from the
1084
+ last model instead of the first model.
1085
+ If this parameter is omitted, an :class:`AtomArrayStack`
1086
+ containing all models will be returned, even if the
1087
+ structure contains only one model.
1088
+ altloc : {'first', 'occupancy', 'all'}
1089
+ This parameter defines how *altloc* IDs are handled:
1090
+ - ``'first'`` - Use atoms that have the first
1091
+ *altloc* ID appearing in a residue.
1092
+ - ``'occupancy'`` - Use atoms that have the *altloc* ID
1093
+ with the highest occupancy for a residue.
1094
+ - ``'all'`` - Use all atoms.
1095
+ Note that this leads to duplicate atoms.
1096
+ When this option is chosen, the ``altloc_id``
1097
+ annotation array is added to the returned structure.
1098
+ extra_fields : list of str, optional
1099
+ The strings in the list are optional annotation categories
1100
+ that should be stored in the output array or stack.
1101
+ These are valid values:
1102
+ ``'atom_id'``, ``'b_factor'``, ``'occupancy'`` and
1103
+ ``'charge'``.
1104
+ include_bonds : bool, optional
1105
+ If set to true, a :class:`BondList` will be created for the
1106
+ resulting :class:`AtomArray` containing the bond information
1107
+ from the file.
1108
+ Bonds, whose order could not be determined from the
1109
+ *Chemical Component Dictionary*
1110
+ (e.g. especially inter-residue bonds),
1111
+ have :attr:`BondType.ANY`, since the PDB format itself does
1112
+ not support bond orders.
1113
+
1114
+ Returns
1115
+ -------
1116
+ symmetry_mates : AtomArray or AtomArrayStack
1117
+ All atoms within a single unit cell.
1118
+ The return type depends on the `model` parameter.
1119
+
1120
+ Notes
1121
+ -----
1122
+ To expand the structure beyond a single unit cell, use
1123
+ :func:`repeat_box()` with the return value as its
1124
+ input.
1125
+
1126
+ Examples
1127
+ --------
1128
+
1129
+ >>> import os.path
1130
+ >>> file = PDBFile.read(os.path.join(path_to_structures, "1aki.pdb"))
1131
+ >>> atoms_in_unit_cell = file.get_symmetry_mates(model=1)
1132
+ """
1133
+ warnings.warn(
1134
+ "'get_symmetry_mates()' is deprecated, use 'get_unit_cell()' instead",
1135
+ DeprecationWarning,
1136
+ )
1137
+ return self.get_unit_cell(model, altloc, extra_fields, include_bonds)
1138
+
1139
+ def _index_models_and_atoms(self):
1140
+ # Line indices where a new model starts
1141
+ self._model_start_i = np.array(
1142
+ [i for i in range(len(self.lines)) if self.lines[i].startswith(("MODEL"))],
1143
+ dtype=int,
1144
+ )
1145
+ if len(self._model_start_i) == 0:
1146
+ # It could be an empty file or a file with a single model,
1147
+ # where the 'MODEL' line is missing
1148
+ for line in self.lines:
1149
+ if line.startswith(("ATOM", "HETATM")):
1150
+ # Single model
1151
+ self._model_start_i = np.array([0])
1152
+ break
1153
+
1154
+ # Line indices with ATOM or HETATM records
1155
+ self._atom_line_i = np.array(
1156
+ [
1157
+ i
1158
+ for i in range(len(self.lines))
1159
+ if self.lines[i].startswith(("ATOM", "HETATM"))
1160
+ ],
1161
+ dtype=int,
1162
+ )
1163
+
1164
+ def _get_atom_record_indices_for_model(self, model):
1165
+ last_model = len(self._model_start_i)
1166
+ if model == 0:
1167
+ raise ValueError("The model index must not be 0")
1168
+ # Negative models mean index starting from last model
1169
+ model = last_model + model + 1 if model < 0 else model
1170
+
1171
+ if model < last_model:
1172
+ line_filter = (self._atom_line_i >= self._model_start_i[model - 1]) & (
1173
+ self._atom_line_i < self._model_start_i[model]
1174
+ )
1175
+ elif model == last_model:
1176
+ line_filter = self._atom_line_i >= self._model_start_i[model - 1]
1177
+ else:
1178
+ raise ValueError(
1179
+ f"The file has {last_model} models, "
1180
+ f"the given model {model} does not exist"
1181
+ )
1182
+ return self._atom_line_i[line_filter]
1183
+
1184
+ def _get_model_length(self):
1185
+ """
1186
+ Determine length of models and check that all models
1187
+ have equal length.
1188
+ """
1189
+ n_models = len(self._model_start_i)
1190
+ length = None
1191
+ for model_i in range(len(self._model_start_i)):
1192
+ model_start = self._model_start_i[model_i]
1193
+ model_stop = (
1194
+ self._model_start_i[model_i + 1]
1195
+ if model_i + 1 < n_models
1196
+ else len(self.lines)
1197
+ )
1198
+ model_length = np.count_nonzero(
1199
+ (self._atom_line_i >= model_start) & (self._atom_line_i < model_stop)
1200
+ )
1201
+ if length is None:
1202
+ length = model_length
1203
+ if model_length != length:
1204
+ raise InvalidFileError(
1205
+ f"Model {model_i + 1} has {model_length} atoms, "
1206
+ f"but model 1 has {length} atoms, must be equal"
1207
+ )
1208
+ return length
1209
+
1210
+ def _get_bonds(self, atom_ids):
1211
+ conect_lines = [line for line in self.lines if line.startswith("CONECT")]
1212
+
1213
+ # Mapping from atom ids to indices in an AtomArray
1214
+ atom_id_to_index = np.full(atom_ids[-1] + 1, -1, dtype=int)
1215
+ try:
1216
+ for i, id in enumerate(atom_ids):
1217
+ atom_id_to_index[id] = i
1218
+ except IndexError as e:
1219
+ raise InvalidFileError("Atom IDs are not strictly increasing") from e
1220
+
1221
+ bonds = []
1222
+ for line in conect_lines:
1223
+ center_index = atom_id_to_index[decode_hybrid36(line[6:11])]
1224
+ if center_index == -1:
1225
+ # Atom ID is not in the AtomArray (probably removed altloc)
1226
+ continue
1227
+ for i in range(11, 31, 5):
1228
+ id_string = line[i : i + 5]
1229
+ try:
1230
+ contact_index = atom_id_to_index[decode_hybrid36(id_string)]
1231
+ if contact_index == -1:
1232
+ # Atom ID is not in the AtomArray (probably removed altloc)
1233
+ continue
1234
+ except ValueError:
1235
+ # String is empty -> no further IDs
1236
+ break
1237
+ bonds.append((center_index, contact_index))
1238
+
1239
+ # The length of the 'atom_ids' array
1240
+ # is equal to the length of the AtomArray
1241
+ return BondList(len(atom_ids), np.array(bonds, dtype=np.uint32))
1242
+
1243
+ def _set_bonds(self, bond_list, atom_ids):
1244
+ # Bond type is unused since PDB does not support bond orders
1245
+ bonds, _ = bond_list.get_all_bonds()
1246
+
1247
+ for center_i, bonded_indices in enumerate(bonds):
1248
+ n_added = 0
1249
+ for bonded_i in bonded_indices:
1250
+ if bonded_i == -1:
1251
+ # Reached padding values
1252
+ break
1253
+ if n_added == 0:
1254
+ # Add new record
1255
+ line = f"CONECT{atom_ids[center_i]:>5}"
1256
+ line += f"{atom_ids[bonded_i]:>5}"
1257
+ n_added += 1
1258
+ if n_added == 4:
1259
+ # Only a maximum of 4 bond partners can be put
1260
+ # into a single line
1261
+ # If there are more, use an extra record
1262
+ n_added = 0
1263
+ self.lines.append(line)
1264
+ if n_added > 0:
1265
+ self.lines.append(line)
1266
+
1267
+
1268
+ def _parse_transformations(lines):
1269
+ """
1270
+ Parse the rotation and translation transformations from
1271
+ *REMARK* 290 or 350.
1272
+ Return as array of matrices and vectors respectively
1273
+ """
1274
+ # Each transformation requires 3 lines for the (x,y,z) components
1275
+ if len(lines) % 3 != 0:
1276
+ raise InvalidFileError("Invalid number of transformation vectors")
1277
+ n_transformations = len(lines) // 3
1278
+
1279
+ rotations = np.zeros((n_transformations, 3, 3), dtype=float)
1280
+ translations = np.zeros((n_transformations, 3), dtype=float)
1281
+
1282
+ transformation_i = 0
1283
+ component_i = 0
1284
+ for line in lines:
1285
+ # The first two elements (component and
1286
+ # transformation index) are not used
1287
+ transformations = [float(e) for e in line.split()[2:]]
1288
+ if len(transformations) != 4:
1289
+ raise InvalidFileError("Invalid number of transformation vector elements")
1290
+ rotations[transformation_i, component_i, :] = transformations[:3]
1291
+ translations[transformation_i, component_i] = transformations[3]
1292
+
1293
+ component_i += 1
1294
+ if component_i == 3:
1295
+ # All (x,y,z) components were parsed
1296
+ # -> head to the next transformation
1297
+ transformation_i += 1
1298
+ component_i = 0
1299
+
1300
+ return rotations, translations
1301
+
1302
+
1303
+ def _apply_transformations(structure, rotations, translations):
1304
+ """
1305
+ Get subassembly by applying the given transformations to the input
1306
+ structure containing affected chains.
1307
+ """
1308
+ # Additional first dimension for 'structure.repeat()'
1309
+ assembly_coord = np.zeros((len(rotations),) + structure.coord.shape)
1310
+
1311
+ # Apply corresponding transformation for each copy in the assembly
1312
+ for i, (rotation, translation) in enumerate(zip(rotations, translations)):
1313
+ coord = structure.coord
1314
+ # Rotate
1315
+ coord = matrix_rotate(coord, rotation)
1316
+ # Translate
1317
+ coord += translation
1318
+ assembly_coord[i] = coord
1319
+
1320
+ assembly = repeat(structure, assembly_coord)
1321
+ assembly.set_annotation(
1322
+ "sym_id", np.repeat(np.arange(len(rotations)), structure.array_length())
1323
+ )
1324
+ return assembly
1325
+
1326
+
1327
+ def _check_pdb_compatibility(array, hybrid36):
1328
+ annot_categories = array.get_annotation_categories()
1329
+
1330
+ if hybrid36:
1331
+ max_atoms = max_hybrid36_number(5)
1332
+ max_residues = max_hybrid36_number(4)
1333
+ else:
1334
+ max_atoms, max_residues = _PDB_MAX_ATOMS, _PDB_MAX_RESIDUES
1335
+ if "atom_id" in annot_categories:
1336
+ max_atom_id = np.max(array.atom_id)
1337
+ else:
1338
+ max_atom_id = array.array_length()
1339
+
1340
+ if max_atom_id > max_atoms:
1341
+ warnings.warn(f"Atom IDs exceed {max_atoms:,}, will be wrapped")
1342
+ if (array.res_id > max_residues).any():
1343
+ warnings.warn(f"Residue IDs exceed {max_residues:,}, will be wrapped")
1344
+ if np.isnan(array.coord).any():
1345
+ raise BadStructureError("Coordinates contain 'NaN' values")
1346
+ if any([len(name) > 1 for name in array.chain_id]):
1347
+ raise BadStructureError("Some chain IDs exceed 1 character")
1348
+ if any([len(name) > 3 for name in array.res_name]):
1349
+ raise BadStructureError("Some residue names exceed 3 characters")
1350
+ if any([len(name) > 4 for name in array.atom_name]):
1351
+ raise BadStructureError("Some atom names exceed 4 characters")
1352
+ for i, coord_name in enumerate(["x", "y", "z"]):
1353
+ n_coord_digits = number_of_integer_digits(array.coord[..., i])
1354
+ if n_coord_digits > 4:
1355
+ raise BadStructureError(
1356
+ f"4 pre-decimal columns for {coord_name}-coordinates are "
1357
+ f"available, but array would require {n_coord_digits}"
1358
+ )
1359
+ if "b_factor" in annot_categories:
1360
+ n_b_factor_digits = number_of_integer_digits(array.b_factor)
1361
+ if n_b_factor_digits > 3:
1362
+ raise BadStructureError(
1363
+ "3 pre-decimal columns for B-factor are available, "
1364
+ f"but array would require {n_b_factor_digits}"
1365
+ )
1366
+ if "occupancy" in annot_categories:
1367
+ n_occupancy_digits = number_of_integer_digits(array.occupancy)
1368
+ if n_occupancy_digits > 3:
1369
+ raise BadStructureError(
1370
+ "3 pre-decimal columns for occupancy are available, "
1371
+ f"but array would require {n_occupancy_digits}"
1372
+ )
1373
+ if "charge" in annot_categories:
1374
+ # The sign can be omitted is it is put into the adjacent column
1375
+ n_charge_digits = number_of_integer_digits(np.abs(array.charge))
1376
+ if n_charge_digits > 1:
1377
+ raise BadStructureError(
1378
+ "1 column for charge is available, "
1379
+ f"but array would require {n_charge_digits}"
1380
+ )