biotite 0.41.1__cp311-cp311-macosx_10_16_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (340) hide show
  1. biotite/__init__.py +19 -0
  2. biotite/application/__init__.py +43 -0
  3. biotite/application/application.py +265 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +505 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +83 -0
  8. biotite/application/blast/webapp.py +421 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +238 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +152 -0
  13. biotite/application/localapp.py +306 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +122 -0
  16. biotite/application/msaapp.py +374 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +254 -0
  19. biotite/application/muscle/app5.py +171 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +456 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +222 -0
  24. biotite/application/util.py +59 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +304 -0
  27. biotite/application/viennarna/rnafold.py +269 -0
  28. biotite/application/viennarna/rnaplot.py +187 -0
  29. biotite/application/viennarna/util.py +72 -0
  30. biotite/application/webapp.py +77 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/entrez/__init__.py +15 -0
  34. biotite/database/entrez/check.py +61 -0
  35. biotite/database/entrez/dbnames.py +89 -0
  36. biotite/database/entrez/download.py +223 -0
  37. biotite/database/entrez/key.py +44 -0
  38. biotite/database/entrez/query.py +223 -0
  39. biotite/database/error.py +15 -0
  40. biotite/database/pubchem/__init__.py +21 -0
  41. biotite/database/pubchem/download.py +260 -0
  42. biotite/database/pubchem/error.py +20 -0
  43. biotite/database/pubchem/query.py +827 -0
  44. biotite/database/pubchem/throttle.py +99 -0
  45. biotite/database/rcsb/__init__.py +13 -0
  46. biotite/database/rcsb/download.py +167 -0
  47. biotite/database/rcsb/query.py +959 -0
  48. biotite/database/uniprot/__init__.py +13 -0
  49. biotite/database/uniprot/check.py +32 -0
  50. biotite/database/uniprot/download.py +134 -0
  51. biotite/database/uniprot/query.py +209 -0
  52. biotite/file.py +251 -0
  53. biotite/sequence/__init__.py +73 -0
  54. biotite/sequence/align/__init__.py +49 -0
  55. biotite/sequence/align/alignment.py +658 -0
  56. biotite/sequence/align/banded.cpython-311-darwin.so +0 -0
  57. biotite/sequence/align/banded.pyx +652 -0
  58. biotite/sequence/align/buckets.py +69 -0
  59. biotite/sequence/align/cigar.py +434 -0
  60. biotite/sequence/align/kmeralphabet.cpython-311-darwin.so +0 -0
  61. biotite/sequence/align/kmeralphabet.pyx +574 -0
  62. biotite/sequence/align/kmersimilarity.cpython-311-darwin.so +0 -0
  63. biotite/sequence/align/kmersimilarity.pyx +233 -0
  64. biotite/sequence/align/kmertable.cpython-311-darwin.so +0 -0
  65. biotite/sequence/align/kmertable.pyx +3400 -0
  66. biotite/sequence/align/localgapped.cpython-311-darwin.so +0 -0
  67. biotite/sequence/align/localgapped.pyx +892 -0
  68. biotite/sequence/align/localungapped.cpython-311-darwin.so +0 -0
  69. biotite/sequence/align/localungapped.pyx +279 -0
  70. biotite/sequence/align/matrix.py +405 -0
  71. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  72. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  73. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  74. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  75. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  76. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  77. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  78. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  79. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  80. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  81. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  82. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  83. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  84. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  85. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  86. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  87. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  88. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  89. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  93. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  94. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  95. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  96. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  97. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  98. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  99. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  100. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  101. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  102. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  103. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  104. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  105. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  106. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  107. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  108. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  109. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  110. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  111. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  112. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  113. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  114. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  115. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  116. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  117. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  118. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  119. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  120. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  121. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  122. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  154. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  155. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  156. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  157. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  158. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  159. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  160. biotite/sequence/align/multiple.cpython-311-darwin.so +0 -0
  161. biotite/sequence/align/multiple.pyx +620 -0
  162. biotite/sequence/align/pairwise.cpython-311-darwin.so +0 -0
  163. biotite/sequence/align/pairwise.pyx +587 -0
  164. biotite/sequence/align/permutation.cpython-311-darwin.so +0 -0
  165. biotite/sequence/align/permutation.pyx +305 -0
  166. biotite/sequence/align/primes.txt +821 -0
  167. biotite/sequence/align/selector.cpython-311-darwin.so +0 -0
  168. biotite/sequence/align/selector.pyx +956 -0
  169. biotite/sequence/align/statistics.py +265 -0
  170. biotite/sequence/align/tracetable.cpython-311-darwin.so +0 -0
  171. biotite/sequence/align/tracetable.pxd +64 -0
  172. biotite/sequence/align/tracetable.pyx +370 -0
  173. biotite/sequence/alphabet.py +566 -0
  174. biotite/sequence/annotation.py +829 -0
  175. biotite/sequence/codec.cpython-311-darwin.so +0 -0
  176. biotite/sequence/codec.pyx +155 -0
  177. biotite/sequence/codon.py +466 -0
  178. biotite/sequence/codon_tables.txt +202 -0
  179. biotite/sequence/graphics/__init__.py +33 -0
  180. biotite/sequence/graphics/alignment.py +1034 -0
  181. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  182. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  183. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  184. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  185. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  186. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  187. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  188. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  189. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  190. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  191. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  192. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  193. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  194. biotite/sequence/graphics/color_schemes/pb_flower.json +39 -0
  195. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  196. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  197. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  198. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  199. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  200. biotite/sequence/graphics/colorschemes.py +139 -0
  201. biotite/sequence/graphics/dendrogram.py +184 -0
  202. biotite/sequence/graphics/features.py +510 -0
  203. biotite/sequence/graphics/logo.py +110 -0
  204. biotite/sequence/graphics/plasmid.py +661 -0
  205. biotite/sequence/io/__init__.py +12 -0
  206. biotite/sequence/io/fasta/__init__.py +22 -0
  207. biotite/sequence/io/fasta/convert.py +273 -0
  208. biotite/sequence/io/fasta/file.py +278 -0
  209. biotite/sequence/io/fastq/__init__.py +19 -0
  210. biotite/sequence/io/fastq/convert.py +120 -0
  211. biotite/sequence/io/fastq/file.py +551 -0
  212. biotite/sequence/io/genbank/__init__.py +17 -0
  213. biotite/sequence/io/genbank/annotation.py +277 -0
  214. biotite/sequence/io/genbank/file.py +575 -0
  215. biotite/sequence/io/genbank/metadata.py +324 -0
  216. biotite/sequence/io/genbank/sequence.py +172 -0
  217. biotite/sequence/io/general.py +192 -0
  218. biotite/sequence/io/gff/__init__.py +26 -0
  219. biotite/sequence/io/gff/convert.py +133 -0
  220. biotite/sequence/io/gff/file.py +434 -0
  221. biotite/sequence/phylo/__init__.py +36 -0
  222. biotite/sequence/phylo/nj.cpython-311-darwin.so +0 -0
  223. biotite/sequence/phylo/nj.pyx +221 -0
  224. biotite/sequence/phylo/tree.cpython-311-darwin.so +0 -0
  225. biotite/sequence/phylo/tree.pyx +1169 -0
  226. biotite/sequence/phylo/upgma.cpython-311-darwin.so +0 -0
  227. biotite/sequence/phylo/upgma.pyx +164 -0
  228. biotite/sequence/profile.py +456 -0
  229. biotite/sequence/search.py +116 -0
  230. biotite/sequence/seqtypes.py +556 -0
  231. biotite/sequence/sequence.py +374 -0
  232. biotite/structure/__init__.py +132 -0
  233. biotite/structure/atoms.py +1455 -0
  234. biotite/structure/basepairs.py +1415 -0
  235. biotite/structure/bonds.cpython-311-darwin.so +0 -0
  236. biotite/structure/bonds.pyx +1933 -0
  237. biotite/structure/box.py +592 -0
  238. biotite/structure/celllist.cpython-311-darwin.so +0 -0
  239. biotite/structure/celllist.pyx +849 -0
  240. biotite/structure/chains.py +298 -0
  241. biotite/structure/charges.cpython-311-darwin.so +0 -0
  242. biotite/structure/charges.pyx +520 -0
  243. biotite/structure/compare.py +274 -0
  244. biotite/structure/density.py +114 -0
  245. biotite/structure/dotbracket.py +216 -0
  246. biotite/structure/error.py +31 -0
  247. biotite/structure/filter.py +585 -0
  248. biotite/structure/geometry.py +697 -0
  249. biotite/structure/graphics/__init__.py +13 -0
  250. biotite/structure/graphics/atoms.py +226 -0
  251. biotite/structure/graphics/rna.py +282 -0
  252. biotite/structure/hbond.py +409 -0
  253. biotite/structure/info/__init__.py +25 -0
  254. biotite/structure/info/atom_masses.json +121 -0
  255. biotite/structure/info/atoms.py +82 -0
  256. biotite/structure/info/bonds.py +145 -0
  257. biotite/structure/info/ccd/README.rst +8 -0
  258. biotite/structure/info/ccd/amino_acids.txt +1663 -0
  259. biotite/structure/info/ccd/carbohydrates.txt +1135 -0
  260. biotite/structure/info/ccd/components.bcif +0 -0
  261. biotite/structure/info/ccd/nucleotides.txt +798 -0
  262. biotite/structure/info/ccd.py +95 -0
  263. biotite/structure/info/groups.py +90 -0
  264. biotite/structure/info/masses.py +123 -0
  265. biotite/structure/info/misc.py +144 -0
  266. biotite/structure/info/radii.py +197 -0
  267. biotite/structure/info/standardize.py +196 -0
  268. biotite/structure/integrity.py +268 -0
  269. biotite/structure/io/__init__.py +30 -0
  270. biotite/structure/io/ctab.py +72 -0
  271. biotite/structure/io/dcd/__init__.py +13 -0
  272. biotite/structure/io/dcd/file.py +65 -0
  273. biotite/structure/io/general.py +257 -0
  274. biotite/structure/io/gro/__init__.py +14 -0
  275. biotite/structure/io/gro/file.py +343 -0
  276. biotite/structure/io/mmtf/__init__.py +21 -0
  277. biotite/structure/io/mmtf/assembly.py +214 -0
  278. biotite/structure/io/mmtf/convertarray.cpython-311-darwin.so +0 -0
  279. biotite/structure/io/mmtf/convertarray.pyx +341 -0
  280. biotite/structure/io/mmtf/convertfile.cpython-311-darwin.so +0 -0
  281. biotite/structure/io/mmtf/convertfile.pyx +501 -0
  282. biotite/structure/io/mmtf/decode.cpython-311-darwin.so +0 -0
  283. biotite/structure/io/mmtf/decode.pyx +152 -0
  284. biotite/structure/io/mmtf/encode.cpython-311-darwin.so +0 -0
  285. biotite/structure/io/mmtf/encode.pyx +183 -0
  286. biotite/structure/io/mmtf/file.py +233 -0
  287. biotite/structure/io/mol/__init__.py +20 -0
  288. biotite/structure/io/mol/convert.py +115 -0
  289. biotite/structure/io/mol/ctab.py +414 -0
  290. biotite/structure/io/mol/header.py +116 -0
  291. biotite/structure/io/mol/mol.py +193 -0
  292. biotite/structure/io/mol/sdf.py +916 -0
  293. biotite/structure/io/netcdf/__init__.py +13 -0
  294. biotite/structure/io/netcdf/file.py +63 -0
  295. biotite/structure/io/npz/__init__.py +20 -0
  296. biotite/structure/io/npz/file.py +152 -0
  297. biotite/structure/io/pdb/__init__.py +20 -0
  298. biotite/structure/io/pdb/convert.py +293 -0
  299. biotite/structure/io/pdb/file.py +1240 -0
  300. biotite/structure/io/pdb/hybrid36.cpython-311-darwin.so +0 -0
  301. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  302. biotite/structure/io/pdbqt/__init__.py +15 -0
  303. biotite/structure/io/pdbqt/convert.py +107 -0
  304. biotite/structure/io/pdbqt/file.py +640 -0
  305. biotite/structure/io/pdbx/__init__.py +23 -0
  306. biotite/structure/io/pdbx/bcif.py +648 -0
  307. biotite/structure/io/pdbx/cif.py +1032 -0
  308. biotite/structure/io/pdbx/component.py +246 -0
  309. biotite/structure/io/pdbx/convert.py +1597 -0
  310. biotite/structure/io/pdbx/encoding.cpython-311-darwin.so +0 -0
  311. biotite/structure/io/pdbx/encoding.pyx +950 -0
  312. biotite/structure/io/pdbx/legacy.py +267 -0
  313. biotite/structure/io/tng/__init__.py +13 -0
  314. biotite/structure/io/tng/file.py +46 -0
  315. biotite/structure/io/trajfile.py +710 -0
  316. biotite/structure/io/trr/__init__.py +13 -0
  317. biotite/structure/io/trr/file.py +46 -0
  318. biotite/structure/io/xtc/__init__.py +13 -0
  319. biotite/structure/io/xtc/file.py +46 -0
  320. biotite/structure/mechanics.py +75 -0
  321. biotite/structure/molecules.py +353 -0
  322. biotite/structure/pseudoknots.py +642 -0
  323. biotite/structure/rdf.py +243 -0
  324. biotite/structure/repair.py +253 -0
  325. biotite/structure/residues.py +562 -0
  326. biotite/structure/resutil.py +178 -0
  327. biotite/structure/sasa.cpython-311-darwin.so +0 -0
  328. biotite/structure/sasa.pyx +322 -0
  329. biotite/structure/sequence.py +112 -0
  330. biotite/structure/sse.py +327 -0
  331. biotite/structure/superimpose.py +727 -0
  332. biotite/structure/transform.py +504 -0
  333. biotite/structure/util.py +98 -0
  334. biotite/temp.py +86 -0
  335. biotite/version.py +16 -0
  336. biotite/visualize.py +251 -0
  337. biotite-0.41.1.dist-info/METADATA +187 -0
  338. biotite-0.41.1.dist-info/RECORD +340 -0
  339. biotite-0.41.1.dist-info/WHEEL +4 -0
  340. biotite-0.41.1.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,196 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.structure.info"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["standardize_order"]
8
+
9
+ import warnings
10
+ import numpy as np
11
+ from .ccd import get_from_ccd
12
+ from ..residues import get_residue_starts
13
+ from ..error import BadStructureError
14
+
15
+
16
+ def standardize_order(atoms):
17
+ """
18
+ Get an index array for an input :class:`AtomArray` or
19
+ :class:`AtomArrayStack` that reorders the atoms for each residue
20
+ to obtain the standard *RCSB PDB* atom order.
21
+
22
+ The standard atom order is determined from the reference residues in
23
+ the official *Chemical Component Dictionary*.
24
+ If a residue of the input structure contains additional atoms that
25
+ are not present in the reference residue, these indices to these
26
+ atoms are appended to the end of the respective residue.
27
+ A example for this are optional hydrogen atoms, that appear due to
28
+ protonation.
29
+
30
+ Parameters
31
+ ----------
32
+ atoms : AtomArray, shape=(n,) or AtomArrayStack, shape=(m,n)
33
+ Input structure with atoms that are potentially not in the
34
+ *standard* order.
35
+
36
+ Returns
37
+ -------
38
+ indices : ndarray, dtype=int, shape=(n,)
39
+ When this index array is applied on the input `atoms`,
40
+ the atoms for each residue are reordered to obtain the
41
+ standard *RCSB PDB* atom order.
42
+
43
+ Raises
44
+ ------
45
+ BadStructureError
46
+ If the input `atoms` have duplicate atoms (same atom name)
47
+ within a residue.
48
+
49
+ Examples
50
+ --------
51
+
52
+ Use as single residue as example.
53
+
54
+ >>> residue = atom_array[atom_array.res_id == 1]
55
+ >>> print(residue)
56
+ A 1 ASN N N -8.901 4.127 -0.555
57
+ A 1 ASN CA C -8.608 3.135 -1.618
58
+ A 1 ASN C C -7.117 2.964 -1.897
59
+ A 1 ASN O O -6.634 1.849 -1.758
60
+ A 1 ASN CB C -9.437 3.396 -2.889
61
+ A 1 ASN CG C -10.915 3.130 -2.611
62
+ A 1 ASN OD1 O -11.269 2.700 -1.524
63
+ A 1 ASN ND2 N -11.806 3.406 -3.543
64
+ A 1 ASN H1 H -8.330 3.957 0.261
65
+ A 1 ASN H2 H -8.740 5.068 -0.889
66
+ A 1 ASN H3 H -9.877 4.041 -0.293
67
+ A 1 ASN HA H -8.930 2.162 -1.239
68
+ A 1 ASN HB2 H -9.310 4.417 -3.193
69
+ A 1 ASN HB3 H -9.108 2.719 -3.679
70
+ A 1 ASN HD21 H -11.572 3.791 -4.444
71
+ A 1 ASN HD22 H -12.757 3.183 -3.294
72
+
73
+ Reverse the atom array.
74
+ Consequently, this also changes the atom order within the residue.
75
+
76
+ >>> reordered = residue[np.arange(len(residue))[::-1]]
77
+ >>> print(reordered)
78
+ A 1 ASN HD22 H -12.757 3.183 -3.294
79
+ A 1 ASN HD21 H -11.572 3.791 -4.444
80
+ A 1 ASN HB3 H -9.108 2.719 -3.679
81
+ A 1 ASN HB2 H -9.310 4.417 -3.193
82
+ A 1 ASN HA H -8.930 2.162 -1.239
83
+ A 1 ASN H3 H -9.877 4.041 -0.293
84
+ A 1 ASN H2 H -8.740 5.068 -0.889
85
+ A 1 ASN H1 H -8.330 3.957 0.261
86
+ A 1 ASN ND2 N -11.806 3.406 -3.543
87
+ A 1 ASN OD1 O -11.269 2.700 -1.524
88
+ A 1 ASN CG C -10.915 3.130 -2.611
89
+ A 1 ASN CB C -9.437 3.396 -2.889
90
+ A 1 ASN O O -6.634 1.849 -1.758
91
+ A 1 ASN C C -7.117 2.964 -1.897
92
+ A 1 ASN CA C -8.608 3.135 -1.618
93
+ A 1 ASN N N -8.901 4.127 -0.555
94
+
95
+ The order is restored with the exception of the N-terminus protonation.
96
+
97
+ >>> restored = reordered[info.standardize_order(reordered)]
98
+ >>> print(restored)
99
+ A 1 ASN N N -8.901 4.127 -0.555
100
+ A 1 ASN CA C -8.608 3.135 -1.618
101
+ A 1 ASN C C -7.117 2.964 -1.897
102
+ A 1 ASN O O -6.634 1.849 -1.758
103
+ A 1 ASN CB C -9.437 3.396 -2.889
104
+ A 1 ASN CG C -10.915 3.130 -2.611
105
+ A 1 ASN OD1 O -11.269 2.700 -1.524
106
+ A 1 ASN ND2 N -11.806 3.406 -3.543
107
+ A 1 ASN H2 H -8.740 5.068 -0.889
108
+ A 1 ASN HA H -8.930 2.162 -1.239
109
+ A 1 ASN HB2 H -9.310 4.417 -3.193
110
+ A 1 ASN HB3 H -9.108 2.719 -3.679
111
+ A 1 ASN HD21 H -11.572 3.791 -4.444
112
+ A 1 ASN HD22 H -12.757 3.183 -3.294
113
+ A 1 ASN H3 H -9.877 4.041 -0.293
114
+ A 1 ASN H1 H -8.330 3.957 0.261
115
+ """
116
+ reordered_indices = np.zeros(atoms.array_length(), dtype=int)
117
+
118
+ starts = get_residue_starts(atoms, add_exclusive_stop=True)
119
+ for i in range(len(starts)-1):
120
+ start = starts[i]
121
+ stop = starts[i+1]
122
+
123
+ res_name = atoms.res_name[start]
124
+ standard_atom_names = get_from_ccd(
125
+ "chem_comp_atom", res_name, "atom_id"
126
+ )
127
+ if standard_atom_names is None:
128
+ # If the residue is not in the CCD, keep the current order
129
+ warnings.warn(
130
+ f"Residue '{res_name}' is not in the CCD, "
131
+ f"keeping current atom order"
132
+ )
133
+ reordered_indices[start : stop] = np.arange(start, stop)
134
+ continue
135
+
136
+ reordered_indices[start : stop] = _reorder(
137
+ atoms.atom_name[start : stop], standard_atom_names
138
+ ) + start
139
+
140
+ return reordered_indices
141
+
142
+
143
+ def _reorder(origin, target):
144
+ """
145
+ Create indices to `origin`, that changes the order of `origin`,
146
+ so that the order is the same as in `target`.
147
+
148
+ Indices for elements of `target` that are not in `origin`
149
+ are ignored.
150
+ Indices for elements of `origin` that are not in `target`
151
+ are appended to the end of the returned array.
152
+
153
+
154
+ Parameters
155
+ ----------
156
+ origin : ndarray, dtype=str
157
+ The atom names to reorder.
158
+ target : ndarray, dtype=str
159
+ The atom names in target order.
160
+
161
+ Returns
162
+ -------
163
+ indices : ndarray, dtype=int
164
+ Indices for `origin` that that changes the order of `origin`
165
+ to the order of `target`.
166
+ """
167
+ target_hits, origin_hits = np.where(
168
+ target[:, np.newaxis] == origin[np.newaxis, :]
169
+ )
170
+
171
+ counts = np.bincount(target_hits, minlength=len(target))
172
+ if (counts > 1).any():
173
+ counts = np.bincount(target_hits, minlength=len(target))
174
+ # Identify which atom is duplicate
175
+ duplicate_i = np.where(
176
+ counts > 1
177
+ )[0][0]
178
+ duplicate_name = target[duplicate_i]
179
+ raise BadStructureError(
180
+ f"Input structure has duplicate atom '{duplicate_name}'"
181
+ )
182
+
183
+ if len(origin_hits) < len(origin):
184
+ # The origin structure has additional atoms
185
+ # to the target structure
186
+ # -> Identify which atoms are missing in the target structure
187
+ # and append these to the end of the residue
188
+ missing_atom_mask = np.bincount(
189
+ origin_hits, minlength=len(origin)
190
+ ).astype(bool)
191
+ return np.concatenate([
192
+ origin_hits,
193
+ np.where(~missing_atom_mask)[0]
194
+ ])
195
+ else:
196
+ return origin_hits
@@ -0,0 +1,268 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ This module allows checking of atom arrays and atom array stacks for
7
+ errors in the structure.
8
+ """
9
+
10
+ __name__ = "biotite.structure"
11
+ __author__ = "Patrick Kunzmann, Daniel Bauer"
12
+ __all__ = ["check_id_continuity", "check_atom_id_continuity",
13
+ "check_res_id_continuity", "check_backbone_continuity",
14
+ "check_duplicate_atoms", "check_bond_continuity",
15
+ "check_linear_continuity"]
16
+
17
+ import numpy as np
18
+ import warnings
19
+ from .atoms import AtomArray, AtomArrayStack
20
+ from .filter import (
21
+ filter_peptide_backbone, filter_phosphate_backbone, filter_linear_bond_continuity)
22
+ from .box import coord_to_fraction
23
+
24
+
25
+ def _check_continuity(array):
26
+ diff = np.diff(array)
27
+ discontinuity = np.where( ((diff != 0) & (diff != 1)) )
28
+ return discontinuity[0] + 1
29
+
30
+
31
+ def check_id_continuity(array):
32
+ """
33
+ Check if the residue IDs are incremented by more than 1 or
34
+ decremented, from one atom to the next one.
35
+
36
+ An increment by more than 1 is as strong clue for missing residues,
37
+ a decrement means probably a start of a new chain.
38
+
39
+ DEPRECATED: Use :func:`check_res_id_continuity()` instead.
40
+
41
+ Parameters
42
+ ----------
43
+ array : AtomArray or AtomArrayStack
44
+ The array to be checked.
45
+
46
+ Returns
47
+ -------
48
+ discontinuity : ndarray, dtype=int
49
+ Contains the indices of atoms after a discontinuity
50
+ """
51
+ warnings.warn(
52
+ "'check_id_continuity()' is deprecated, "
53
+ "use 'check_res_id_continuity()' instead",
54
+ DeprecationWarning
55
+ )
56
+ return check_res_id_continuity(array)
57
+
58
+
59
+ def check_atom_id_continuity(array):
60
+ """
61
+ Check if the atom IDs are incremented by more than 1 or
62
+ decremented, from one atom to the next one.
63
+
64
+ An increment by more than 1 is as strong clue for missing atoms.
65
+
66
+ Parameters
67
+ ----------
68
+ array : AtomArray or AtomArrayStack
69
+ The array to be checked.
70
+
71
+ Returns
72
+ -------
73
+ discontinuity : ndarray, dtype=int
74
+ Contains the indices of atoms after a discontinuity
75
+ """
76
+ ids = array.atom_id
77
+ return _check_continuity(ids)
78
+
79
+
80
+ def check_res_id_continuity(array):
81
+ """
82
+ Check if the residue IDs are incremented by more than 1 or
83
+ decremented, from one atom to the next one.
84
+
85
+ An increment by more than 1 is as strong clue for missing residues,
86
+ a decrement means probably a start of a new chain.
87
+
88
+ Parameters
89
+ ----------
90
+ array : AtomArray or AtomArrayStack
91
+ The array to be checked.
92
+
93
+ Returns
94
+ -------
95
+ discontinuity : ndarray, dtype=int
96
+ Contains the indices of atoms after a discontinuity
97
+ """
98
+ ids = array.res_id
99
+ return _check_continuity(ids)
100
+
101
+
102
+ def check_bond_continuity(array, min_len=1.2, max_len=1.8):
103
+ """
104
+ Check if the peptide or phosphate backbone atoms have a
105
+ non-reasonable distance to the next residue.
106
+
107
+ A large or very small distance is a very strong clue, that there is
108
+ no bond between those atoms, therefore the chain is discontinued.
109
+
110
+ DEPRECATED: Please use :func:`check_backbone_continuity` for the same functionality.
111
+
112
+ Parameters
113
+ ----------
114
+ array : AtomArray
115
+ The array to be checked.
116
+ min_len, max_len : float, optional
117
+ The interval in which the atom-atom distance is evaluated as
118
+ bond.
119
+
120
+ Returns
121
+ -------
122
+ discontinuity : ndarray, dtype=int
123
+ Contains the indices of atoms after a discontinuity.
124
+ """
125
+ warnings.warn(
126
+ "Reimplemented into `check_backbone_continuity()`",
127
+ DeprecationWarning
128
+ )
129
+ return check_backbone_continuity(array, min_len, max_len)
130
+
131
+
132
+ def check_linear_continuity(array, min_len=1.2, max_len=1.8):
133
+ """
134
+ Check linear (consecutive) bond continuity of atoms in atom array.
135
+
136
+ Parameters
137
+ ----------
138
+ array : AtomArray
139
+ Arbitrary structure.
140
+ min_len : float, optional
141
+ Minimum bond length.
142
+ max_len : float, optional
143
+ Maximum bond length.
144
+
145
+ Returns
146
+ -------
147
+ discontinuity : ndarray, dtype=int
148
+ Indices of `array` corresponding to atoms where the bond
149
+ with the preceding atom is beyond the provided bounds.
150
+
151
+ See Also
152
+ --------
153
+ biotite.structure.filter.filter_linear_bond_continuity :
154
+ A function to filter for atoms preserving the continuity (used here).
155
+ biotite.structure.bonds.BondList :
156
+ A class that doesn't depend on the atoms' order to identify bonds.
157
+ """
158
+ con_mask = filter_linear_bond_continuity(array, min_len, max_len)
159
+ # The continuity mask `con_mask` points to atoms for which the next atom is continuous.
160
+ # We invert this mask and shift-extend by one from the left.
161
+ # The resulting discontinuity mask points to atoms having the preceding atom exceeding
162
+ # the bond length requirements.
163
+ discon_mask = np.insert(~con_mask[:-1], 0, False)
164
+ return np.where(discon_mask)[0]
165
+
166
+
167
+ def check_backbone_continuity(array, min_len=1.2, max_len=1.8):
168
+ """
169
+ Check if the (peptide or phosphate) backbone atoms have
170
+ non-reasonable distance to the next atom.
171
+
172
+ A large or very small distance is a very strong clue, that there is
173
+ no bond between those atoms, therefore the chain is discontinued.
174
+
175
+ Parameters
176
+ ----------
177
+ array : AtomArray
178
+ The array to be checked.
179
+ min_len, max_len : float, optional
180
+ The interval in which the atom-atom distance is evaluated as
181
+ bond.
182
+
183
+ Returns
184
+ -------
185
+ discontinuity : ndarray, dtype=int
186
+ Contains the indices of atoms after a discontinuity.
187
+
188
+ See Also
189
+ --------
190
+ filter_linear_bond_continuity : A function to filter for atoms preserving the continuity.
191
+ filter_peptide_backbone : A function to filter for peptide backbone atoms.
192
+ filter_phosphate_backbone : A function to filter for phosphate backbone atoms.
193
+ """
194
+ backbone_mask = filter_peptide_backbone(array) | filter_phosphate_backbone(array)
195
+ con_mask = filter_linear_bond_continuity(array[backbone_mask], min_len, max_len)
196
+
197
+ # See the comments for `check_linear_continuity()`
198
+ discon_mask = np.insert(~con_mask[:-1], 0, False)
199
+ discon_mask_full = np.full_like(backbone_mask, False)
200
+ discon_mask_full[backbone_mask] = discon_mask
201
+
202
+ return np.where(discon_mask_full)[0]
203
+
204
+
205
+ def check_duplicate_atoms(array):
206
+ """
207
+ Check if a structure contains duplicate atoms, i.e. two atoms in a
208
+ structure have the same annotations (coordinates may be different).
209
+
210
+ Duplicate atoms may appear, when a structure has occupancy for an
211
+ atom at two or more positions or when the *altloc* positions are
212
+ improperly read.
213
+
214
+ Parameters
215
+ ----------
216
+ array : AtomArray or AtomArrayStack
217
+ The array to be checked.
218
+
219
+ Returns
220
+ -------
221
+ duplicate : ndarray, dtype=int
222
+ Contains the indices of duplicate atoms.
223
+ The first occurence of an atom is not counted as duplicate.
224
+ """
225
+ duplicates = []
226
+ annots = [array.get_annotation(category) for category
227
+ in array.get_annotation_categories()]
228
+ for i in range(1, array.array_length()):
229
+ # Start with assumption that all atoms in the array
230
+ # until index i are duplicates of the atom at index i
231
+ is_duplicate = np.full(i, True, dtype=bool)
232
+ for annot in annots:
233
+ # For each annotation array filter out the atoms until
234
+ # index i that have an unequal annotation
235
+ # to the atom at index i
236
+ is_duplicate &= (annot[:i] == annot[i])
237
+ # After checking all annotation arrays,
238
+ # if there still is any duplicate to the atom at index i,
239
+ # add i the the list of duplicate atom indices
240
+ if is_duplicate.any():
241
+ duplicates.append(i)
242
+ return np.array(duplicates)
243
+
244
+
245
+ def check_in_box(array):
246
+ r"""
247
+ Check if a structure contains atoms whose position is outside the
248
+ box.
249
+
250
+ Coordinates are outside the box, when they cannot be represented by
251
+ a linear combination of the box vectors with scalar factors
252
+ :math:`0 \le a_i \le 1`.
253
+
254
+ Parameters
255
+ ----------
256
+ array : AtomArray or AtomArrayStack
257
+ The array to be checked.
258
+
259
+ Returns
260
+ -------
261
+ outside : ndarray, dtype=int
262
+ Contains the indices of atoms outside the atom array's box.
263
+ """
264
+ if array.box is None:
265
+ raise TypeError("Structure has no box")
266
+ box = array.box
267
+ fractions = coord_to_fraction(array, box)
268
+ return np.where(((fractions >= 0) & (fractions < 1)).all(axis=-1))[0]
@@ -0,0 +1,30 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ A subpackage for reading and writing structure related data.
7
+
8
+ Macromolecular structure files (PDB, PDBx/mmCIF, BinaryCIF, etc.) and
9
+ small molecule files (MOL, SDF, etc.) can be used
10
+ to load an :class:`AtomArray` or :class:`AtomArrayStack`.
11
+
12
+ Since the data model for the :class:`AtomArray` and
13
+ :class:`AtomArrayStack` class does not support duplicate atoms,
14
+ only one *altloc* can be chosen for each atom. Hence, the amount of
15
+ atoms may be lower in the atom array (stack) than in respective
16
+ structure file.
17
+
18
+ The recommended format for reading structure files is *BinaryCIF*.
19
+ It has by far the shortest parsing time and file size.
20
+
21
+ Besides the mentioned structure formats, Gromacs trajectory files can be
22
+ loaded, if `mdtraj` is installed.
23
+ """
24
+
25
+ __name__ = "biotite.structure.io"
26
+ __author__ = "Patrick Kunzmann"
27
+
28
+ from .ctab import *
29
+ from .general import *
30
+ from .trajfile import *
@@ -0,0 +1,72 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.structure.io"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["read_structure_from_ctab", "write_structure_to_ctab"]
8
+
9
+ import warnings
10
+ from ..bonds import BondType
11
+
12
+
13
+ def read_structure_from_ctab(ctab_lines):
14
+ """
15
+ Parse a *MDL* connection table (Ctab) to obtain an
16
+ :class:`AtomArray`. :footcite:`Dalby1992`.
17
+
18
+ DEPRECATED: Moved to :mod:`biotite.structure.io.mol.ctab`.
19
+
20
+ Parameters
21
+ ----------
22
+ ctab_lines : lines of str
23
+ The lines containing the *ctab*.
24
+ Must begin with the *counts* line and end with the `M END` line
25
+
26
+ Returns
27
+ -------
28
+ atoms : AtomArray
29
+ This :class:`AtomArray` contains the optional ``charge``
30
+ annotation and has an associated :class:`BondList`.
31
+
32
+ References
33
+ ----------
34
+
35
+ .. footbibliography::
36
+ """
37
+ warnings.warn("Moved to biotite.structure.io.mol.ctab", DeprecationWarning)
38
+ from biotite.structure.io.mol.ctab import read_structure_from_ctab
39
+ return read_structure_from_ctab(ctab_lines)
40
+
41
+
42
+ def write_structure_to_ctab(atoms, default_bond_type=BondType.ANY):
43
+ """
44
+ Convert an :class:`AtomArray` into a
45
+ *MDL* connection table (Ctab). :footcite:`Dalby1992`
46
+
47
+ DEPRECATED: Moved to :mod:`biotite.structure.io.mol.ctab`.
48
+
49
+ Parameters
50
+ ----------
51
+ atoms : AtomArray
52
+ The array must have an associated :class:`BondList`.
53
+
54
+ Returns
55
+ -------
56
+ ctab_lines : lines of str
57
+ The lines containing the *ctab*.
58
+ The lines begin with the *counts* line and end with the `M END`
59
+ .line
60
+ default_bond_type : BondType
61
+ Bond type fallback in the *Bond block* if a bond has no bond_type
62
+ defined in *atoms* array. By default, each bond is treated as
63
+ :attr:`BondType.ANY`.
64
+
65
+ References
66
+ ----------
67
+
68
+ .. footbibliography::
69
+ """
70
+ warnings.warn("Moved to biotite.structure.io.mol.ctab", DeprecationWarning)
71
+ from biotite.structure.io.mol.ctab import write_structure_to_ctab
72
+ return write_structure_to_ctab(atoms, default_bond_type)
@@ -0,0 +1,13 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ This subpackage is used for reading and writing trajectories in the
7
+ CDC format used by software like *CHARMM*, *OpenMM* and *NAMD*.
8
+ """
9
+
10
+ __name__ = "biotite.structure.io.dcd"
11
+ __author__ = "Patrick Kunzmann"
12
+
13
+ from .file import *
@@ -0,0 +1,65 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.structure.io.dcd"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["DCDFile"]
8
+
9
+ import numpy as np
10
+ from ..trajfile import TrajectoryFile
11
+ from ...box import vectors_from_unitcell, unitcell_from_vectors
12
+
13
+
14
+ class DCDFile(TrajectoryFile):
15
+ """
16
+ This file class represents a DCD trajectory file.
17
+ """
18
+
19
+ @classmethod
20
+ def traj_type(cls):
21
+ import mdtraj.formats as traj
22
+ return traj.DCDTrajectoryFile
23
+
24
+ @classmethod
25
+ def process_read_values(cls, read_values):
26
+ # .netcdf files use Angstrom
27
+ coord = read_values[0]
28
+ cell_lengths = read_values[1]
29
+ cell_angles = read_values[2]
30
+ if cell_lengths is None or cell_angles is None:
31
+ box = None
32
+ else:
33
+ box = np.stack(
34
+ [vectors_from_unitcell(a, b, c, alpha, beta, gamma)
35
+ for (a, b, c), (alpha, beta, gamma)
36
+ in zip(cell_lengths, np.deg2rad(cell_angles))],
37
+ axis=0
38
+ )
39
+ return coord, box, None
40
+
41
+ @classmethod
42
+ def prepare_write_values(cls, coord, box, time):
43
+ xyz = coord.astype(np.float32, copy=False) \
44
+ if coord is not None else None
45
+ if box is None:
46
+ cell_lengths = None
47
+ cell_angles = None
48
+ else:
49
+ cell_lengths = np.zeros((len(box), 3), dtype=np.float32)
50
+ cell_angles = np.zeros((len(box), 3), dtype=np.float32)
51
+ for i, model_box in enumerate(box):
52
+ a, b, c, alpha, beta, gamma = unitcell_from_vectors(model_box)
53
+ cell_lengths[i] = np.array((a, b, c))
54
+ cell_angles[i] = np.rad2deg((alpha, beta, gamma))
55
+ return {
56
+ "xyz" : xyz,
57
+ "cell_lengths" : cell_lengths,
58
+ "cell_angles" : cell_angles,
59
+ }
60
+
61
+ def set_time(self, time):
62
+ if time is not None:
63
+ raise NotImplementedError(
64
+ "This trajectory file does not support writing simulation time"
65
+ )