biotite 0.41.1__cp312-cp312-macosx_10_16_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (340) hide show
  1. biotite/__init__.py +19 -0
  2. biotite/application/__init__.py +43 -0
  3. biotite/application/application.py +265 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +505 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +83 -0
  8. biotite/application/blast/webapp.py +421 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +238 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +152 -0
  13. biotite/application/localapp.py +306 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +122 -0
  16. biotite/application/msaapp.py +374 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +254 -0
  19. biotite/application/muscle/app5.py +171 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +456 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +222 -0
  24. biotite/application/util.py +59 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +304 -0
  27. biotite/application/viennarna/rnafold.py +269 -0
  28. biotite/application/viennarna/rnaplot.py +187 -0
  29. biotite/application/viennarna/util.py +72 -0
  30. biotite/application/webapp.py +77 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/entrez/__init__.py +15 -0
  34. biotite/database/entrez/check.py +61 -0
  35. biotite/database/entrez/dbnames.py +89 -0
  36. biotite/database/entrez/download.py +223 -0
  37. biotite/database/entrez/key.py +44 -0
  38. biotite/database/entrez/query.py +223 -0
  39. biotite/database/error.py +15 -0
  40. biotite/database/pubchem/__init__.py +21 -0
  41. biotite/database/pubchem/download.py +260 -0
  42. biotite/database/pubchem/error.py +20 -0
  43. biotite/database/pubchem/query.py +827 -0
  44. biotite/database/pubchem/throttle.py +99 -0
  45. biotite/database/rcsb/__init__.py +13 -0
  46. biotite/database/rcsb/download.py +167 -0
  47. biotite/database/rcsb/query.py +959 -0
  48. biotite/database/uniprot/__init__.py +13 -0
  49. biotite/database/uniprot/check.py +32 -0
  50. biotite/database/uniprot/download.py +134 -0
  51. biotite/database/uniprot/query.py +209 -0
  52. biotite/file.py +251 -0
  53. biotite/sequence/__init__.py +73 -0
  54. biotite/sequence/align/__init__.py +49 -0
  55. biotite/sequence/align/alignment.py +658 -0
  56. biotite/sequence/align/banded.cpython-312-darwin.so +0 -0
  57. biotite/sequence/align/banded.pyx +652 -0
  58. biotite/sequence/align/buckets.py +69 -0
  59. biotite/sequence/align/cigar.py +434 -0
  60. biotite/sequence/align/kmeralphabet.cpython-312-darwin.so +0 -0
  61. biotite/sequence/align/kmeralphabet.pyx +574 -0
  62. biotite/sequence/align/kmersimilarity.cpython-312-darwin.so +0 -0
  63. biotite/sequence/align/kmersimilarity.pyx +233 -0
  64. biotite/sequence/align/kmertable.cpython-312-darwin.so +0 -0
  65. biotite/sequence/align/kmertable.pyx +3400 -0
  66. biotite/sequence/align/localgapped.cpython-312-darwin.so +0 -0
  67. biotite/sequence/align/localgapped.pyx +892 -0
  68. biotite/sequence/align/localungapped.cpython-312-darwin.so +0 -0
  69. biotite/sequence/align/localungapped.pyx +279 -0
  70. biotite/sequence/align/matrix.py +405 -0
  71. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  72. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  73. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  74. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  75. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  76. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  77. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  78. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  79. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  80. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  81. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  82. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  83. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  84. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  85. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  86. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  87. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  88. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  89. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  93. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  94. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  95. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  96. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  97. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  98. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  99. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  100. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  101. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  102. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  103. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  104. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  105. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  106. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  107. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  108. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  109. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  110. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  111. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  112. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  113. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  114. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  115. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  116. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  117. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  118. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  119. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  120. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  121. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  122. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  154. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  155. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  156. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  157. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  158. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  159. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  160. biotite/sequence/align/multiple.cpython-312-darwin.so +0 -0
  161. biotite/sequence/align/multiple.pyx +620 -0
  162. biotite/sequence/align/pairwise.cpython-312-darwin.so +0 -0
  163. biotite/sequence/align/pairwise.pyx +587 -0
  164. biotite/sequence/align/permutation.cpython-312-darwin.so +0 -0
  165. biotite/sequence/align/permutation.pyx +305 -0
  166. biotite/sequence/align/primes.txt +821 -0
  167. biotite/sequence/align/selector.cpython-312-darwin.so +0 -0
  168. biotite/sequence/align/selector.pyx +956 -0
  169. biotite/sequence/align/statistics.py +265 -0
  170. biotite/sequence/align/tracetable.cpython-312-darwin.so +0 -0
  171. biotite/sequence/align/tracetable.pxd +64 -0
  172. biotite/sequence/align/tracetable.pyx +370 -0
  173. biotite/sequence/alphabet.py +566 -0
  174. biotite/sequence/annotation.py +829 -0
  175. biotite/sequence/codec.cpython-312-darwin.so +0 -0
  176. biotite/sequence/codec.pyx +155 -0
  177. biotite/sequence/codon.py +466 -0
  178. biotite/sequence/codon_tables.txt +202 -0
  179. biotite/sequence/graphics/__init__.py +33 -0
  180. biotite/sequence/graphics/alignment.py +1034 -0
  181. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  182. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  183. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  184. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  185. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  186. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  187. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  188. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  189. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  190. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  191. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  192. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  193. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  194. biotite/sequence/graphics/color_schemes/pb_flower.json +39 -0
  195. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  196. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  197. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  198. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  199. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  200. biotite/sequence/graphics/colorschemes.py +139 -0
  201. biotite/sequence/graphics/dendrogram.py +184 -0
  202. biotite/sequence/graphics/features.py +510 -0
  203. biotite/sequence/graphics/logo.py +110 -0
  204. biotite/sequence/graphics/plasmid.py +661 -0
  205. biotite/sequence/io/__init__.py +12 -0
  206. biotite/sequence/io/fasta/__init__.py +22 -0
  207. biotite/sequence/io/fasta/convert.py +273 -0
  208. biotite/sequence/io/fasta/file.py +278 -0
  209. biotite/sequence/io/fastq/__init__.py +19 -0
  210. biotite/sequence/io/fastq/convert.py +120 -0
  211. biotite/sequence/io/fastq/file.py +551 -0
  212. biotite/sequence/io/genbank/__init__.py +17 -0
  213. biotite/sequence/io/genbank/annotation.py +277 -0
  214. biotite/sequence/io/genbank/file.py +575 -0
  215. biotite/sequence/io/genbank/metadata.py +324 -0
  216. biotite/sequence/io/genbank/sequence.py +172 -0
  217. biotite/sequence/io/general.py +192 -0
  218. biotite/sequence/io/gff/__init__.py +26 -0
  219. biotite/sequence/io/gff/convert.py +133 -0
  220. biotite/sequence/io/gff/file.py +434 -0
  221. biotite/sequence/phylo/__init__.py +36 -0
  222. biotite/sequence/phylo/nj.cpython-312-darwin.so +0 -0
  223. biotite/sequence/phylo/nj.pyx +221 -0
  224. biotite/sequence/phylo/tree.cpython-312-darwin.so +0 -0
  225. biotite/sequence/phylo/tree.pyx +1169 -0
  226. biotite/sequence/phylo/upgma.cpython-312-darwin.so +0 -0
  227. biotite/sequence/phylo/upgma.pyx +164 -0
  228. biotite/sequence/profile.py +456 -0
  229. biotite/sequence/search.py +116 -0
  230. biotite/sequence/seqtypes.py +556 -0
  231. biotite/sequence/sequence.py +374 -0
  232. biotite/structure/__init__.py +132 -0
  233. biotite/structure/atoms.py +1455 -0
  234. biotite/structure/basepairs.py +1415 -0
  235. biotite/structure/bonds.cpython-312-darwin.so +0 -0
  236. biotite/structure/bonds.pyx +1933 -0
  237. biotite/structure/box.py +592 -0
  238. biotite/structure/celllist.cpython-312-darwin.so +0 -0
  239. biotite/structure/celllist.pyx +849 -0
  240. biotite/structure/chains.py +298 -0
  241. biotite/structure/charges.cpython-312-darwin.so +0 -0
  242. biotite/structure/charges.pyx +520 -0
  243. biotite/structure/compare.py +274 -0
  244. biotite/structure/density.py +114 -0
  245. biotite/structure/dotbracket.py +216 -0
  246. biotite/structure/error.py +31 -0
  247. biotite/structure/filter.py +585 -0
  248. biotite/structure/geometry.py +697 -0
  249. biotite/structure/graphics/__init__.py +13 -0
  250. biotite/structure/graphics/atoms.py +226 -0
  251. biotite/structure/graphics/rna.py +282 -0
  252. biotite/structure/hbond.py +409 -0
  253. biotite/structure/info/__init__.py +25 -0
  254. biotite/structure/info/atom_masses.json +121 -0
  255. biotite/structure/info/atoms.py +82 -0
  256. biotite/structure/info/bonds.py +145 -0
  257. biotite/structure/info/ccd/README.rst +8 -0
  258. biotite/structure/info/ccd/amino_acids.txt +1663 -0
  259. biotite/structure/info/ccd/carbohydrates.txt +1135 -0
  260. biotite/structure/info/ccd/components.bcif +0 -0
  261. biotite/structure/info/ccd/nucleotides.txt +798 -0
  262. biotite/structure/info/ccd.py +95 -0
  263. biotite/structure/info/groups.py +90 -0
  264. biotite/structure/info/masses.py +123 -0
  265. biotite/structure/info/misc.py +144 -0
  266. biotite/structure/info/radii.py +197 -0
  267. biotite/structure/info/standardize.py +196 -0
  268. biotite/structure/integrity.py +268 -0
  269. biotite/structure/io/__init__.py +30 -0
  270. biotite/structure/io/ctab.py +72 -0
  271. biotite/structure/io/dcd/__init__.py +13 -0
  272. biotite/structure/io/dcd/file.py +65 -0
  273. biotite/structure/io/general.py +257 -0
  274. biotite/structure/io/gro/__init__.py +14 -0
  275. biotite/structure/io/gro/file.py +343 -0
  276. biotite/structure/io/mmtf/__init__.py +21 -0
  277. biotite/structure/io/mmtf/assembly.py +214 -0
  278. biotite/structure/io/mmtf/convertarray.cpython-312-darwin.so +0 -0
  279. biotite/structure/io/mmtf/convertarray.pyx +341 -0
  280. biotite/structure/io/mmtf/convertfile.cpython-312-darwin.so +0 -0
  281. biotite/structure/io/mmtf/convertfile.pyx +501 -0
  282. biotite/structure/io/mmtf/decode.cpython-312-darwin.so +0 -0
  283. biotite/structure/io/mmtf/decode.pyx +152 -0
  284. biotite/structure/io/mmtf/encode.cpython-312-darwin.so +0 -0
  285. biotite/structure/io/mmtf/encode.pyx +183 -0
  286. biotite/structure/io/mmtf/file.py +233 -0
  287. biotite/structure/io/mol/__init__.py +20 -0
  288. biotite/structure/io/mol/convert.py +115 -0
  289. biotite/structure/io/mol/ctab.py +414 -0
  290. biotite/structure/io/mol/header.py +116 -0
  291. biotite/structure/io/mol/mol.py +193 -0
  292. biotite/structure/io/mol/sdf.py +916 -0
  293. biotite/structure/io/netcdf/__init__.py +13 -0
  294. biotite/structure/io/netcdf/file.py +63 -0
  295. biotite/structure/io/npz/__init__.py +20 -0
  296. biotite/structure/io/npz/file.py +152 -0
  297. biotite/structure/io/pdb/__init__.py +20 -0
  298. biotite/structure/io/pdb/convert.py +293 -0
  299. biotite/structure/io/pdb/file.py +1240 -0
  300. biotite/structure/io/pdb/hybrid36.cpython-312-darwin.so +0 -0
  301. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  302. biotite/structure/io/pdbqt/__init__.py +15 -0
  303. biotite/structure/io/pdbqt/convert.py +107 -0
  304. biotite/structure/io/pdbqt/file.py +640 -0
  305. biotite/structure/io/pdbx/__init__.py +23 -0
  306. biotite/structure/io/pdbx/bcif.py +648 -0
  307. biotite/structure/io/pdbx/cif.py +1032 -0
  308. biotite/structure/io/pdbx/component.py +246 -0
  309. biotite/structure/io/pdbx/convert.py +1597 -0
  310. biotite/structure/io/pdbx/encoding.cpython-312-darwin.so +0 -0
  311. biotite/structure/io/pdbx/encoding.pyx +950 -0
  312. biotite/structure/io/pdbx/legacy.py +267 -0
  313. biotite/structure/io/tng/__init__.py +13 -0
  314. biotite/structure/io/tng/file.py +46 -0
  315. biotite/structure/io/trajfile.py +710 -0
  316. biotite/structure/io/trr/__init__.py +13 -0
  317. biotite/structure/io/trr/file.py +46 -0
  318. biotite/structure/io/xtc/__init__.py +13 -0
  319. biotite/structure/io/xtc/file.py +46 -0
  320. biotite/structure/mechanics.py +75 -0
  321. biotite/structure/molecules.py +353 -0
  322. biotite/structure/pseudoknots.py +642 -0
  323. biotite/structure/rdf.py +243 -0
  324. biotite/structure/repair.py +253 -0
  325. biotite/structure/residues.py +562 -0
  326. biotite/structure/resutil.py +178 -0
  327. biotite/structure/sasa.cpython-312-darwin.so +0 -0
  328. biotite/structure/sasa.pyx +322 -0
  329. biotite/structure/sequence.py +112 -0
  330. biotite/structure/sse.py +327 -0
  331. biotite/structure/superimpose.py +727 -0
  332. biotite/structure/transform.py +504 -0
  333. biotite/structure/util.py +98 -0
  334. biotite/temp.py +86 -0
  335. biotite/version.py +16 -0
  336. biotite/visualize.py +251 -0
  337. biotite-0.41.1.dist-info/METADATA +187 -0
  338. biotite-0.41.1.dist-info/RECORD +340 -0
  339. biotite-0.41.1.dist-info/WHEEL +4 -0
  340. biotite-0.41.1.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,95 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.structure.info"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["get_ccd", "get_from_ccd"]
8
+
9
+ from pathlib import Path
10
+ import numpy as np
11
+
12
+
13
+ CCD_DIR = Path(__file__).parent / "ccd"
14
+ INDEX_COLUMN_NAME = {
15
+ "chem_comp": "id",
16
+ "chem_comp_atom": "comp_id",
17
+ "chem_comp_bond": "comp_id",
18
+ }
19
+
20
+ _ccd_block = None
21
+ # For each category this index gives the start and stop for each residue
22
+ _residue_index = {}
23
+
24
+
25
+ def get_ccd():
26
+ """
27
+ Get the PDB *Chemical Component Dictionary* (CCD).
28
+
29
+ Returns
30
+ -------
31
+ ccd : BinaryCIFFile
32
+ The CCD.
33
+ """
34
+ # Avoid circular import
35
+ from ..io.pdbx.bcif import BinaryCIFFile
36
+
37
+ global _ccd_block
38
+ if _ccd_block is None:
39
+ # Load CCD once and cache it for subsequent calls
40
+ _ccd_block = BinaryCIFFile.read(CCD_DIR / "components.bcif").block
41
+ return _ccd_block
42
+
43
+
44
+ def get_from_ccd(category_name, comp_id, column_name=None):
45
+ """
46
+ Get the rows for the given residue in the given category from the
47
+ PDB *Chemical Component Dictionary* (CCD).
48
+
49
+ Parameters
50
+ ----------
51
+ category_name : str
52
+ The category in the CCD.
53
+ comp_id : str
54
+ The residue identifier, i.e. the ``res_name``.
55
+ column_name : str, optional
56
+ The name of the column to be retrieved.
57
+ If None, all columns are returned as dictionary.
58
+ By default None.
59
+
60
+ Returns
61
+ -------
62
+ value : ndarray or dict or None
63
+ The array of the given column or all columns as dictionary.
64
+ ``None`` if the `comp_id` is not found in the category.
65
+ """
66
+ global _residue_index
67
+ ccd = get_ccd()
68
+ category = ccd[category_name]
69
+ if category_name not in _residue_index:
70
+ _residue_index[category_name] = _index_residues(
71
+ category[INDEX_COLUMN_NAME[category_name]].as_array()
72
+ )
73
+ try:
74
+ start, stop = _residue_index[category_name][comp_id]
75
+ except KeyError:
76
+ return None
77
+
78
+ if column_name is None:
79
+ return {
80
+ col_name: category[col_name].as_array()[start:stop]
81
+ for col_name in category.keys()
82
+ }
83
+ else:
84
+ return category[column_name].as_array()[start:stop]
85
+
86
+
87
+ def _index_residues(id_column):
88
+ residue_starts = np.where(id_column[:-1] != id_column[1:])[0] + 1
89
+ # The final start is the exclusive stop of last residue
90
+ residue_starts = np.concatenate(([0], residue_starts, [len(id_column)]))
91
+ index = {}
92
+ for i in range(len(residue_starts)-1):
93
+ comp_id = id_column[residue_starts[i]].item()
94
+ index[comp_id] = (residue_starts[i], residue_starts[i+1])
95
+ return index
@@ -0,0 +1,90 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.structure.info"
6
+ __author__ = "Tom David Müller, Patrick Kunzmann"
7
+ __all__ = ["amino_acid_names", "nucleotide_names", "carbohydrate_names"]
8
+
9
+ from pathlib import Path
10
+ import copy
11
+
12
+
13
+ CCD_DIR = Path(__file__).parent / "ccd"
14
+
15
+
16
+ group_lists = {}
17
+
18
+
19
+ def amino_acid_names():
20
+ """
21
+ Get a tuple of amino acid three-letter codes according to the
22
+ PDB *Chemical Component Dictionary* :footcite:`Westbrook2015`.
23
+
24
+ Returns
25
+ -------
26
+ amino_acid_names : tuple of str
27
+ A list of three-letter-codes containing residues that are
28
+ peptide monomers.
29
+
30
+ Notes
31
+ -----
32
+
33
+ References
34
+ ----------
35
+
36
+ .. footbibliography::
37
+ """
38
+ return _get_group_members("amino_acids")
39
+
40
+
41
+ def nucleotide_names():
42
+ """
43
+ Get a tuple of nucleotide three-letter codes according to the
44
+ PDB *Chemical Component Dictionary* :footcite:`Westbrook2015`.
45
+
46
+ Returns
47
+ -------
48
+ nucleotide_names : tuple of str
49
+ A list of three-letter-codes containing residues that are
50
+ DNA/RNA monomers.
51
+
52
+ Notes
53
+ -----
54
+
55
+ References
56
+ ----------
57
+
58
+ .. footbibliography::
59
+ """
60
+ return _get_group_members("nucleotides")
61
+
62
+
63
+ def carbohydrate_names():
64
+ """
65
+ Get a tuple of carbohydrate three-letter codes according to the
66
+ PDB *Chemical Component Dictionary* :footcite:`Westbrook2015`.
67
+
68
+ Returns
69
+ -------
70
+ carbohydrate_names : tuple of str
71
+ A list of three-letter-codes containing residues that are
72
+ saccharide monomers.
73
+
74
+ Notes
75
+ -----
76
+
77
+ References
78
+ ----------
79
+
80
+ .. footbibliography::
81
+ """
82
+ return _get_group_members("carbohydrates")
83
+
84
+
85
+ def _get_group_members(group_name):
86
+ global group_lists
87
+ if group_name not in group_lists:
88
+ with open(CCD_DIR / f"{group_name}.txt", "r") as file:
89
+ group_lists[group_name] = tuple(file.read().split())
90
+ return group_lists[group_name]
@@ -0,0 +1,123 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.structure.info"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["mass"]
8
+
9
+ import json
10
+ from pathlib import Path
11
+ from ..atoms import Atom, AtomArray, AtomArrayStack
12
+ from .ccd import get_from_ccd
13
+
14
+
15
+ # Masses are taken from http://www.sbcs.qmul.ac.uk/iupac/AtWt/ (2018/03/01)
16
+ ATOM_MASSES_FILE = Path(__file__).parent / "atom_masses.json"
17
+ _atom_masses = None
18
+
19
+
20
+ def mass(item, is_residue=None):
21
+ """
22
+ Calculate the mass for the given object.
23
+ :footcite:`Meija2016`
24
+
25
+ If a residue name is given, the mass values refer to the masses of
26
+ the complete molecule without additional or missing protons.
27
+ In case of residues in a longer chain, some atoms might be missing
28
+ from the molecule.
29
+ For example non-terminal residues in a protein or nucleotide chain
30
+ miss the mass of a water molecule.
31
+
32
+ Parameters
33
+ ----------
34
+ item : str or Atom or AtomArray or AtomArrayStack
35
+ The atom or molecule to get the mass for.
36
+ If a string is given, it is interpreted as residue name or
37
+ chemical element.
38
+ If an :class:`Atom` is given the mass is taken from its element.
39
+ If an :class:`AtomArray` or :class:`AtomArrayStack` is given the
40
+ mass is the sum of the mass of its atoms.
41
+ is_residue : bool, optional
42
+ If set to true and a string is given for `item`, the string
43
+ will be strictly interpreted as residue.
44
+ If set to false, the string is strictly interpreted as element.
45
+ By default the string will be interpreted as element at first
46
+ and secondly as residue name, if the element is unknown.
47
+
48
+ Returns
49
+ -------
50
+ mass : float or None
51
+ The mass of the given object in *u*. None if the mass is unknown.
52
+
53
+ References
54
+ ----------
55
+
56
+ .. footbibliography::
57
+
58
+ Examples
59
+ --------
60
+
61
+ >>> print(mass(atom_array))
62
+ 2170.438
63
+ >>> first_residue = list(residue_iter(atom_array))[0]
64
+ >>> print(first_residue)
65
+ A 1 ASN N N -8.901 4.127 -0.555
66
+ A 1 ASN CA C -8.608 3.135 -1.618
67
+ A 1 ASN C C -7.117 2.964 -1.897
68
+ A 1 ASN O O -6.634 1.849 -1.758
69
+ A 1 ASN CB C -9.437 3.396 -2.889
70
+ A 1 ASN CG C -10.915 3.130 -2.611
71
+ A 1 ASN OD1 O -11.269 2.700 -1.524
72
+ A 1 ASN ND2 N -11.806 3.406 -3.543
73
+ A 1 ASN H1 H -8.330 3.957 0.261
74
+ A 1 ASN H2 H -8.740 5.068 -0.889
75
+ A 1 ASN H3 H -9.877 4.041 -0.293
76
+ A 1 ASN HA H -8.930 2.162 -1.239
77
+ A 1 ASN HB2 H -9.310 4.417 -3.193
78
+ A 1 ASN HB3 H -9.108 2.719 -3.679
79
+ A 1 ASN HD21 H -11.572 3.791 -4.444
80
+ A 1 ASN HD22 H -12.757 3.183 -3.294
81
+ >>> print(mass("ASN"))
82
+ 132.118
83
+ >>> first_atom = first_residue[0]
84
+ >>> print(first_atom)
85
+ A 1 ASN N N -8.901 4.127 -0.555
86
+ >>> print(mass(first_atom))
87
+ 14.007
88
+ >>> print(mass("N"))
89
+ 14.007
90
+ """
91
+ global _atom_masses
92
+ with open(ATOM_MASSES_FILE, "r") as file:
93
+ _atom_masses = json.load(file)
94
+
95
+ if isinstance(item, str):
96
+ if is_residue is None:
97
+ result_mass = _atom_masses.get(item.upper())
98
+ if result_mass is None:
99
+ result_mass = get_from_ccd(
100
+ "chem_comp", item.upper(), "formula_weight"
101
+ ).item()
102
+ elif not is_residue:
103
+ result_mass = _atom_masses.get(item.upper())
104
+ else:
105
+ result_mass = get_from_ccd(
106
+ "chem_comp", item.upper(), "formula_weight"
107
+ ).item()
108
+
109
+ elif isinstance(item, Atom):
110
+ result_mass = mass(item.element, is_residue=False)
111
+ elif isinstance(item, AtomArray) or isinstance(item, AtomArrayStack):
112
+ result_mass = sum(
113
+ (mass(element, is_residue=False) for element in item.element)
114
+ )
115
+
116
+ else:
117
+ raise TypeError(
118
+ f"Cannot calculate mass for {type(item).__name__} objects"
119
+ )
120
+
121
+ if result_mass is None:
122
+ raise KeyError(f"{item} is not known")
123
+ return result_mass
@@ -0,0 +1,144 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.structure.info"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["all_residues", "full_name", "link_type", "one_letter_code"]
8
+
9
+ from .ccd import get_ccd, get_from_ccd
10
+
11
+
12
+ def all_residues():
13
+ """
14
+ Get a list of all residues/compound names in the
15
+ PDB chemical components dictionary.
16
+
17
+ Returns
18
+ -------
19
+ residues : list of str
20
+ A list of all available The up to 3-letter residue names.
21
+
22
+ Examples
23
+ --------
24
+
25
+ >>> print(all_residues()[1000 : 1010])
26
+ ['0V9', '0VA', '0VB', '0VC', '0VD', '0VE', '0VF', '0VG', '0VH', '0VI']
27
+ """
28
+ return get_ccd()["chem_comp"]["id"].as_array().tolist()
29
+
30
+
31
+ def full_name(res_name):
32
+ """
33
+ Get the full name of a residue/compound from the up to 3-letter
34
+ residue name, based on the PDB chemical components dictionary.
35
+
36
+ Parameters
37
+ ----------
38
+ res_name : str
39
+ The up to 3-letter residue name.
40
+
41
+ Returns
42
+ -------
43
+ name : str or None
44
+ The full name of the residue.
45
+ If the residue is unknown to the chemical components dictionary,
46
+ ``None`` is returned.
47
+
48
+ Examples
49
+ --------
50
+
51
+ >>> print(full_name("MAN"))
52
+ alpha-D-mannopyranose
53
+ """
54
+ array = get_from_ccd("chem_comp", res_name.upper(), "name")
55
+ if array is None:
56
+ return None
57
+ return array.item()
58
+
59
+
60
+ def link_type(res_name):
61
+ """
62
+ Get the linking type of a residue/compound,
63
+ based on the PDB chemical components dictionary.
64
+
65
+ Parameters
66
+ ----------
67
+ res_name : str
68
+ The up to 3-letter residue name.
69
+
70
+ Returns
71
+ -------
72
+ link_type : str or None
73
+ The link type.
74
+ If the residue is unknown to the chemical components dictionary,
75
+ ``None`` is returned.
76
+
77
+ Examples
78
+ --------
79
+
80
+ >>> print(link_type("MAN"))
81
+ D-saccharide, alpha linking
82
+ >>> print(link_type("TRP"))
83
+ L-PEPTIDE LINKING
84
+ >>> print(link_type("HOH"))
85
+ NON-POLYMER
86
+ """
87
+ array = get_from_ccd("chem_comp", res_name.upper(), "type")
88
+ if array is None:
89
+ return None
90
+ return array.item()
91
+
92
+
93
+ def one_letter_code(res_name):
94
+ """
95
+ Get the one-letter code of a residue/compound,
96
+ based on the PDB chemical components dictionary.
97
+
98
+ The one-letter code is only defined for amino acids and nucleotides
99
+ and for compounds that are structurally similar to them.
100
+
101
+ Parameters
102
+ ----------
103
+ res_name : str
104
+ The up to 3-letter residue name.
105
+
106
+ Returns
107
+ -------
108
+ one_letter_code : str or None
109
+ The one-letter code.
110
+ None if the compound is not present in the CCD or if no
111
+ one-letter code is defined for this compound.
112
+
113
+ Examples
114
+ --------
115
+
116
+ Get the one letter code for an amino acid (or a nucleotide).
117
+
118
+ >>> print(full_name("ALA"))
119
+ ALANINE
120
+ >>> print(one_letter_code("ALA"))
121
+ A
122
+
123
+ For similar compounds, the one-letter code is also defined.
124
+
125
+ >>> print(full_name("DAL"))
126
+ D-ALANINE
127
+ >>> print(one_letter_code("DAL"))
128
+ A
129
+
130
+ For other compounds, the one-letter code is not defined.
131
+
132
+ >>> print(full_name("MAN"))
133
+ alpha-D-mannopyranose
134
+ >>> print(one_letter_code("MAN"))
135
+ None
136
+
137
+ """
138
+ array = get_from_ccd("chem_comp", res_name.upper(), "one_letter_code")
139
+ if array is None:
140
+ return None
141
+ item = array.item()
142
+ if item == "":
143
+ return None
144
+ return item
@@ -0,0 +1,197 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.structure.info"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["vdw_radius_protor", "vdw_radius_single"]
8
+
9
+ from .bonds import bonds_in_residue
10
+
11
+
12
+ # Contains tuples for the different ProtOr groups:
13
+ # Tuple contains: element, valency, H count
14
+ _PROTOR_RADII = {
15
+ ("C", 3, 0) : 1.61,
16
+ ("C", 3, 1) : 1.76,
17
+ ("C", 4, 1) : 1.88,
18
+ ("C", 4, 2) : 1.88,
19
+ ("C", 4, 3) : 1.88,
20
+ ("N", 3, 0) : 1.64,
21
+ ("N", 3, 1) : 1.64,
22
+ ("N", 3, 2) : 1.64,
23
+ ("N", 4, 3) : 1.64,
24
+ ("O", 1, 0) : 1.42,
25
+ ("O", 2, 1) : 1.46,
26
+ ("S", 1, 0) : 1.77,
27
+ ("S", 2, 0) : 1.77, # Not official, added for completeness (MET)
28
+ ("S", 2, 1) : 1.77,
29
+ ("F", 1, 0) : 1.47, # Taken from _SINGLE_RADII
30
+ ("CL", 1, 0) : 1.75, # Taken from _SINGLE_RADII
31
+ ("BR", 1, 0) : 1.85, # Taken from _SINGLE_RADII
32
+ ("I", 1, 0) : 1.98, # Taken from _SINGLE_RADII
33
+ }
34
+
35
+ _SINGLE_RADII = {
36
+ "H": 1.20,
37
+ "HE": 1.40,
38
+
39
+ "C": 1.70,
40
+ "N": 1.55,
41
+ "O": 1.52,
42
+ "F": 1.47,
43
+ "NE": 1.54,
44
+
45
+ "SI": 2.10,
46
+ "P": 1.80,
47
+ "S": 1.80,
48
+ "CL": 1.75,
49
+ "AR": 1.88,
50
+
51
+ "AS": 1.85,
52
+ "SE": 1.90,
53
+ "BR": 1.85,
54
+ "KR": 2.02,
55
+
56
+ "TE": 2.06,
57
+ "I": 1.98,
58
+ "XE": 2.16,
59
+ }
60
+
61
+ # A dictionary that caches radii for each residue
62
+ _protor_radii = {}
63
+
64
+
65
+ def vdw_radius_protor(res_name, atom_name):
66
+ """
67
+ Estimate the Van-der-Waals radius of an non-hydrogen atom,
68
+ that includes the radius added by potential bonded hydrogen atoms.
69
+ The respective radii are taken from the ProtOr dataset.
70
+ :footcite:`Tsai1999`
71
+
72
+ This is especially useful for macromolecular structures where no
73
+ hydrogen atoms are resolved, e.g. crystal structures.
74
+ The valency of the non-hydrogen atom and the amount of normally
75
+ bonded hydrogen atoms is taken from the chemical compound dictionary
76
+ dataset.
77
+
78
+ Parameters
79
+ ----------
80
+ res_name : str
81
+ The up to 3-letter residue name the non-hydrogen atom belongs
82
+ to.
83
+ atom_name : str
84
+ The name of the non-hydrogen atom.
85
+
86
+ Returns
87
+ -------
88
+ The Van-der-Waals radius of the given atom.
89
+ If the radius cannot be estimated for the atom, `None` is returned.
90
+
91
+ See also
92
+ --------
93
+ vdw_radius_single
94
+
95
+ References
96
+ ----------
97
+
98
+ .. footbibliography::
99
+
100
+ Examples
101
+ --------
102
+
103
+ >>> print(vdw_radius_protor("GLY", "CA"))
104
+ 1.88
105
+ """
106
+ res_name = res_name.upper()
107
+ if atom_name[0] == "H":
108
+ raise ValueError(
109
+ f"Calculating the ProtOr radius for the hydrogen atom "
110
+ f"'{atom_name}' is not meaningful"
111
+ )
112
+ if res_name in _protor_radii:
113
+ # Use cached radii for the residue, if already calculated
114
+ if atom_name not in _protor_radii[res_name]:
115
+ raise KeyError(
116
+ f"Residue '{res_name}' does not contain an atom named "
117
+ f"'{atom_name}'"
118
+ )
119
+ return _protor_radii[res_name].get(atom_name)
120
+ else:
121
+ # Otherwise calculate radii for the given residue and cache
122
+ _protor_radii[res_name] = _calculate_protor_radii(res_name)
123
+ # Recursive call, but this time the radii for the given residue
124
+ # are cached
125
+ return vdw_radius_protor(res_name, atom_name)
126
+
127
+ def _calculate_protor_radii(res_name):
128
+ """
129
+ Calculate the ProtOr VdW radii for all atoms (atom names) in
130
+ a residue.
131
+ """
132
+ bonds = bonds_in_residue(res_name)
133
+ # Maps atom names to a ProtOr group
134
+ # -> tuple(element, valency, H count)
135
+ # Based on the group the radius is chosen from _PROTOR_RADII
136
+ groups = {}
137
+ for atom1, atom2 in bonds:
138
+ # Process each bond two times:
139
+ # One time the first atom is the one to get valency and H count
140
+ # for and the other time vice versa
141
+ for main_atom, bound_atom in ((atom1, atom2), (atom2, atom1)):
142
+ element = main_atom[0]
143
+ # Calculating ProtOr radii for hydrogens in not meaningful
144
+ if element == "H":
145
+ continue
146
+ # Only for these elements ProtOr groups exist
147
+ # Calculation of group for all other elements would be
148
+ # pointless
149
+ if element not in ["C", "N", "O", "S"]:
150
+ # Empty tuple to indicate nonexistent entry
151
+ groups[main_atom] = ()
152
+ continue
153
+ # Update existing entry if already existing
154
+ group = groups.get(main_atom, [element, 0, 0])
155
+ # Increase valency by one, since the bond entry exists
156
+ group[1] += 1
157
+ # If the atom is bonded to hydrogen, increase H count
158
+ if bound_atom[0] == "H":
159
+ group[2] += 1
160
+ groups[main_atom] = group
161
+ # Get radii based on ProtOr groups
162
+ radii = {atom : _PROTOR_RADII.get(tuple(group))
163
+ for atom, group in groups.items()}
164
+ return radii
165
+
166
+
167
+ def vdw_radius_single(element):
168
+ """
169
+ Get the Van-der-Waals radius of an atom from the given element.
170
+ :footcite:`Bondi1964`
171
+
172
+ Parameters
173
+ ----------
174
+ element : str
175
+ The chemical element of the atoms.
176
+
177
+ Returns
178
+ -------
179
+ The Van-der-Waals radius of the atom.
180
+ If the radius is unknown for the element, `None` is returned.
181
+
182
+ See also
183
+ --------
184
+ vdw_radius_protor
185
+
186
+ References
187
+ ----------
188
+
189
+ .. footbibliography::
190
+
191
+ Examples
192
+ --------
193
+
194
+ >>> print(vdw_radius_single("C"))
195
+ 1.7
196
+ """
197
+ return _SINGLE_RADII.get(element.upper())