biotite 1.1.0__cp313-cp313-macosx_10_13_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (332) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +159 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +452 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +57 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +206 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/entrez/__init__.py +15 -0
  34. biotite/database/entrez/check.py +60 -0
  35. biotite/database/entrez/dbnames.py +91 -0
  36. biotite/database/entrez/download.py +229 -0
  37. biotite/database/entrez/key.py +44 -0
  38. biotite/database/entrez/query.py +262 -0
  39. biotite/database/error.py +16 -0
  40. biotite/database/pubchem/__init__.py +21 -0
  41. biotite/database/pubchem/download.py +258 -0
  42. biotite/database/pubchem/error.py +20 -0
  43. biotite/database/pubchem/query.py +830 -0
  44. biotite/database/pubchem/throttle.py +98 -0
  45. biotite/database/rcsb/__init__.py +13 -0
  46. biotite/database/rcsb/download.py +159 -0
  47. biotite/database/rcsb/query.py +964 -0
  48. biotite/database/uniprot/__init__.py +13 -0
  49. biotite/database/uniprot/check.py +40 -0
  50. biotite/database/uniprot/download.py +129 -0
  51. biotite/database/uniprot/query.py +293 -0
  52. biotite/file.py +232 -0
  53. biotite/sequence/__init__.py +84 -0
  54. biotite/sequence/align/__init__.py +203 -0
  55. biotite/sequence/align/alignment.py +680 -0
  56. biotite/sequence/align/banded.cpython-313-darwin.so +0 -0
  57. biotite/sequence/align/banded.pyx +652 -0
  58. biotite/sequence/align/buckets.py +71 -0
  59. biotite/sequence/align/cigar.py +425 -0
  60. biotite/sequence/align/kmeralphabet.cpython-313-darwin.so +0 -0
  61. biotite/sequence/align/kmeralphabet.pyx +595 -0
  62. biotite/sequence/align/kmersimilarity.cpython-313-darwin.so +0 -0
  63. biotite/sequence/align/kmersimilarity.pyx +233 -0
  64. biotite/sequence/align/kmertable.cpython-313-darwin.so +0 -0
  65. biotite/sequence/align/kmertable.pyx +3411 -0
  66. biotite/sequence/align/localgapped.cpython-313-darwin.so +0 -0
  67. biotite/sequence/align/localgapped.pyx +892 -0
  68. biotite/sequence/align/localungapped.cpython-313-darwin.so +0 -0
  69. biotite/sequence/align/localungapped.pyx +279 -0
  70. biotite/sequence/align/matrix.py +622 -0
  71. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  72. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  73. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  74. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  75. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  76. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  77. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  78. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  79. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  80. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  81. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  82. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  83. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  84. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  85. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  86. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  87. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  88. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  89. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  94. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  95. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  99. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  100. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  101. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  102. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  103. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  104. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  105. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  106. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  107. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  108. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  109. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  110. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  111. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  112. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  113. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  114. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  115. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  116. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  117. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  118. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  119. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  120. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  121. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  122. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  155. biotite/sequence/align/matrix_data/PB.license +21 -0
  156. biotite/sequence/align/matrix_data/PB.mat +18 -0
  157. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  158. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  159. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  160. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  161. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  162. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  163. biotite/sequence/align/multiple.cpython-313-darwin.so +0 -0
  164. biotite/sequence/align/multiple.pyx +620 -0
  165. biotite/sequence/align/pairwise.cpython-313-darwin.so +0 -0
  166. biotite/sequence/align/pairwise.pyx +587 -0
  167. biotite/sequence/align/permutation.cpython-313-darwin.so +0 -0
  168. biotite/sequence/align/permutation.pyx +313 -0
  169. biotite/sequence/align/primes.txt +821 -0
  170. biotite/sequence/align/selector.cpython-313-darwin.so +0 -0
  171. biotite/sequence/align/selector.pyx +954 -0
  172. biotite/sequence/align/statistics.py +264 -0
  173. biotite/sequence/align/tracetable.cpython-313-darwin.so +0 -0
  174. biotite/sequence/align/tracetable.pxd +64 -0
  175. biotite/sequence/align/tracetable.pyx +370 -0
  176. biotite/sequence/alphabet.py +555 -0
  177. biotite/sequence/annotation.py +830 -0
  178. biotite/sequence/codec.cpython-313-darwin.so +0 -0
  179. biotite/sequence/codec.pyx +155 -0
  180. biotite/sequence/codon.py +477 -0
  181. biotite/sequence/codon_tables.txt +202 -0
  182. biotite/sequence/graphics/__init__.py +33 -0
  183. biotite/sequence/graphics/alignment.py +1115 -0
  184. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  185. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  186. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  187. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  188. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  189. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  190. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  191. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  192. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  193. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  194. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  195. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  196. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  197. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  198. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  199. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  200. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  201. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  202. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  203. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  204. biotite/sequence/graphics/colorschemes.py +170 -0
  205. biotite/sequence/graphics/dendrogram.py +229 -0
  206. biotite/sequence/graphics/features.py +544 -0
  207. biotite/sequence/graphics/logo.py +104 -0
  208. biotite/sequence/graphics/plasmid.py +712 -0
  209. biotite/sequence/io/__init__.py +12 -0
  210. biotite/sequence/io/fasta/__init__.py +22 -0
  211. biotite/sequence/io/fasta/convert.py +284 -0
  212. biotite/sequence/io/fasta/file.py +265 -0
  213. biotite/sequence/io/fastq/__init__.py +19 -0
  214. biotite/sequence/io/fastq/convert.py +117 -0
  215. biotite/sequence/io/fastq/file.py +507 -0
  216. biotite/sequence/io/genbank/__init__.py +17 -0
  217. biotite/sequence/io/genbank/annotation.py +269 -0
  218. biotite/sequence/io/genbank/file.py +573 -0
  219. biotite/sequence/io/genbank/metadata.py +336 -0
  220. biotite/sequence/io/genbank/sequence.py +171 -0
  221. biotite/sequence/io/general.py +201 -0
  222. biotite/sequence/io/gff/__init__.py +26 -0
  223. biotite/sequence/io/gff/convert.py +128 -0
  224. biotite/sequence/io/gff/file.py +450 -0
  225. biotite/sequence/phylo/__init__.py +36 -0
  226. biotite/sequence/phylo/nj.cpython-313-darwin.so +0 -0
  227. biotite/sequence/phylo/nj.pyx +221 -0
  228. biotite/sequence/phylo/tree.cpython-313-darwin.so +0 -0
  229. biotite/sequence/phylo/tree.pyx +1169 -0
  230. biotite/sequence/phylo/upgma.cpython-313-darwin.so +0 -0
  231. biotite/sequence/phylo/upgma.pyx +164 -0
  232. biotite/sequence/profile.py +567 -0
  233. biotite/sequence/search.py +118 -0
  234. biotite/sequence/seqtypes.py +713 -0
  235. biotite/sequence/sequence.py +374 -0
  236. biotite/setup_ccd.py +197 -0
  237. biotite/structure/__init__.py +133 -0
  238. biotite/structure/alphabet/__init__.py +25 -0
  239. biotite/structure/alphabet/encoder.py +332 -0
  240. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  241. biotite/structure/alphabet/i3d.py +110 -0
  242. biotite/structure/alphabet/layers.py +86 -0
  243. biotite/structure/alphabet/pb.license +21 -0
  244. biotite/structure/alphabet/pb.py +171 -0
  245. biotite/structure/alphabet/unkerasify.py +122 -0
  246. biotite/structure/atoms.py +1554 -0
  247. biotite/structure/basepairs.py +1404 -0
  248. biotite/structure/bonds.cpython-313-darwin.so +0 -0
  249. biotite/structure/bonds.pyx +1972 -0
  250. biotite/structure/box.py +588 -0
  251. biotite/structure/celllist.cpython-313-darwin.so +0 -0
  252. biotite/structure/celllist.pyx +849 -0
  253. biotite/structure/chains.py +314 -0
  254. biotite/structure/charges.cpython-313-darwin.so +0 -0
  255. biotite/structure/charges.pyx +520 -0
  256. biotite/structure/compare.py +274 -0
  257. biotite/structure/density.py +109 -0
  258. biotite/structure/dotbracket.py +214 -0
  259. biotite/structure/error.py +39 -0
  260. biotite/structure/filter.py +590 -0
  261. biotite/structure/geometry.py +655 -0
  262. biotite/structure/graphics/__init__.py +13 -0
  263. biotite/structure/graphics/atoms.py +243 -0
  264. biotite/structure/graphics/rna.py +295 -0
  265. biotite/structure/hbond.py +428 -0
  266. biotite/structure/info/__init__.py +24 -0
  267. biotite/structure/info/atom_masses.json +121 -0
  268. biotite/structure/info/atoms.py +81 -0
  269. biotite/structure/info/bonds.py +149 -0
  270. biotite/structure/info/ccd.py +202 -0
  271. biotite/structure/info/components.bcif +0 -0
  272. biotite/structure/info/groups.py +131 -0
  273. biotite/structure/info/masses.py +121 -0
  274. biotite/structure/info/misc.py +138 -0
  275. biotite/structure/info/radii.py +197 -0
  276. biotite/structure/info/standardize.py +186 -0
  277. biotite/structure/integrity.py +215 -0
  278. biotite/structure/io/__init__.py +29 -0
  279. biotite/structure/io/dcd/__init__.py +13 -0
  280. biotite/structure/io/dcd/file.py +67 -0
  281. biotite/structure/io/general.py +243 -0
  282. biotite/structure/io/gro/__init__.py +14 -0
  283. biotite/structure/io/gro/file.py +344 -0
  284. biotite/structure/io/mol/__init__.py +20 -0
  285. biotite/structure/io/mol/convert.py +112 -0
  286. biotite/structure/io/mol/ctab.py +415 -0
  287. biotite/structure/io/mol/header.py +120 -0
  288. biotite/structure/io/mol/mol.py +149 -0
  289. biotite/structure/io/mol/sdf.py +914 -0
  290. biotite/structure/io/netcdf/__init__.py +13 -0
  291. biotite/structure/io/netcdf/file.py +64 -0
  292. biotite/structure/io/pdb/__init__.py +20 -0
  293. biotite/structure/io/pdb/convert.py +307 -0
  294. biotite/structure/io/pdb/file.py +1290 -0
  295. biotite/structure/io/pdb/hybrid36.cpython-313-darwin.so +0 -0
  296. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  297. biotite/structure/io/pdbqt/__init__.py +15 -0
  298. biotite/structure/io/pdbqt/convert.py +113 -0
  299. biotite/structure/io/pdbqt/file.py +688 -0
  300. biotite/structure/io/pdbx/__init__.py +23 -0
  301. biotite/structure/io/pdbx/bcif.py +656 -0
  302. biotite/structure/io/pdbx/cif.py +1075 -0
  303. biotite/structure/io/pdbx/component.py +245 -0
  304. biotite/structure/io/pdbx/compress.py +321 -0
  305. biotite/structure/io/pdbx/convert.py +1745 -0
  306. biotite/structure/io/pdbx/encoding.cpython-313-darwin.so +0 -0
  307. biotite/structure/io/pdbx/encoding.pyx +1031 -0
  308. biotite/structure/io/trajfile.py +693 -0
  309. biotite/structure/io/trr/__init__.py +13 -0
  310. biotite/structure/io/trr/file.py +43 -0
  311. biotite/structure/io/xtc/__init__.py +13 -0
  312. biotite/structure/io/xtc/file.py +43 -0
  313. biotite/structure/mechanics.py +73 -0
  314. biotite/structure/molecules.py +352 -0
  315. biotite/structure/pseudoknots.py +628 -0
  316. biotite/structure/rdf.py +245 -0
  317. biotite/structure/repair.py +304 -0
  318. biotite/structure/residues.py +572 -0
  319. biotite/structure/sasa.cpython-313-darwin.so +0 -0
  320. biotite/structure/sasa.pyx +322 -0
  321. biotite/structure/segments.py +178 -0
  322. biotite/structure/sequence.py +111 -0
  323. biotite/structure/sse.py +308 -0
  324. biotite/structure/superimpose.py +689 -0
  325. biotite/structure/transform.py +530 -0
  326. biotite/structure/util.py +168 -0
  327. biotite/version.py +16 -0
  328. biotite/visualize.py +265 -0
  329. biotite-1.1.0.dist-info/METADATA +190 -0
  330. biotite-1.1.0.dist-info/RECORD +332 -0
  331. biotite-1.1.0.dist-info/WHEEL +4 -0
  332. biotite-1.1.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,149 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.structure.info"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["bond_type", "bonds_in_residue"]
8
+
9
+ import functools
10
+ from biotite.structure.bonds import BondType
11
+ from biotite.structure.info.ccd import get_from_ccd
12
+
13
+ BOND_TYPES = {
14
+ ("SING", "N"): BondType.SINGLE,
15
+ ("DOUB", "N"): BondType.DOUBLE,
16
+ ("TRIP", "N"): BondType.TRIPLE,
17
+ ("QUAD", "N"): BondType.QUADRUPLE,
18
+ ("SING", "Y"): BondType.AROMATIC_SINGLE,
19
+ ("DOUB", "Y"): BondType.AROMATIC_DOUBLE,
20
+ ("TRIP", "Y"): BondType.AROMATIC_TRIPLE,
21
+ }
22
+
23
+ _intra_bonds = {}
24
+
25
+
26
+ def bond_type(res_name, atom_name1, atom_name2):
27
+ """
28
+ Get the :class:`BondType` for two atoms of the same residue, based
29
+ on the PDB chemical components dictionary.
30
+
31
+ Parameters
32
+ ----------
33
+ res_name : str
34
+ The up to 3-letter name of the residue
35
+ `atom_name1` and `atom_name2` belong to.
36
+ atom_name1, atom_name2 : str
37
+ The names of the two atoms to get the bond order from.
38
+
39
+ Returns
40
+ -------
41
+ order : BondType or None
42
+ The :class:`BondType` of the bond between `atom_name1` and
43
+ `atom_name2`.
44
+ If the atoms form no bond, if any of the two atoms does not
45
+ exist in the context of the residue or if the residue is unknown
46
+ to the chemical components dictionary, `None` is returned.
47
+
48
+ Examples
49
+ --------
50
+
51
+ >>> print(repr(bond_type("PHE", "CA", "CB")))
52
+ <BondType.SINGLE: 1>
53
+ >>> print(repr(bond_type("PHE", "CG", "CD1")))
54
+ <BondType.AROMATIC_DOUBLE: 6>
55
+ >>> print(repr(bond_type("PHE", "CA", "CG")))
56
+ None
57
+ >>> print(repr(bond_type("PHE", "FOO", "BAR")))
58
+ None
59
+ """
60
+ bonds_for_residue = bonds_in_residue(res_name)
61
+ if bonds_for_residue is None:
62
+ return None
63
+ # Try both atom orders
64
+ bond_type_int = bonds_for_residue.get(
65
+ (atom_name1, atom_name2), bonds_for_residue.get((atom_name2, atom_name1))
66
+ )
67
+ if bond_type_int is not None:
68
+ return BondType(bond_type_int)
69
+ else:
70
+ return None
71
+
72
+
73
+ @functools.cache
74
+ def bonds_in_residue(res_name):
75
+ """
76
+ Get a dictionary containing all atoms inside a given residue
77
+ that form a bond.
78
+
79
+ Parameters
80
+ ----------
81
+ res_name : str
82
+ The up to 3-letter name of the residue to get the bonds for.
83
+
84
+ Returns
85
+ -------
86
+ bonds : dict ((str, str) -> int)
87
+ A dictionary that maps tuples of two atom names to their
88
+ respective bond types (represented as integer).
89
+ Empty, if the residue is unknown to the
90
+ chemical components dictionary.
91
+
92
+ Warnings
93
+ --------
94
+ Treat the returned dictionary as immutable.
95
+ Modifying the dictionary may lead to unexpected behavior.
96
+ In other functionalities throughout *Biotite* that uses this
97
+ function.
98
+
99
+ Notes
100
+ -----
101
+ The returned values are cached for faster access in subsequent calls.
102
+
103
+ Examples
104
+ --------
105
+ >>> bonds = bonds_in_residue("PHE")
106
+ >>> for atoms, bond_type_int in sorted(bonds.items()):
107
+ ... atom1, atom2 = sorted(atoms)
108
+ ... print(f"{atom1:3} + {atom2:3} -> {BondType(bond_type_int).name}")
109
+ C + O -> DOUBLE
110
+ C + OXT -> SINGLE
111
+ C + CA -> SINGLE
112
+ CA + CB -> SINGLE
113
+ CA + HA -> SINGLE
114
+ CB + CG -> SINGLE
115
+ CB + HB2 -> SINGLE
116
+ CB + HB3 -> SINGLE
117
+ CD1 + CE1 -> AROMATIC_SINGLE
118
+ CD1 + HD1 -> SINGLE
119
+ CD2 + CE2 -> AROMATIC_DOUBLE
120
+ CD2 + HD2 -> SINGLE
121
+ CE1 + CZ -> AROMATIC_DOUBLE
122
+ CE1 + HE1 -> SINGLE
123
+ CE2 + CZ -> AROMATIC_SINGLE
124
+ CE2 + HE2 -> SINGLE
125
+ CD1 + CG -> AROMATIC_DOUBLE
126
+ CD2 + CG -> AROMATIC_SINGLE
127
+ CZ + HZ -> SINGLE
128
+ CA + N -> SINGLE
129
+ H + N -> SINGLE
130
+ H2 + N -> SINGLE
131
+ HXT + OXT -> SINGLE
132
+ """
133
+ global _intra_bonds
134
+ if res_name not in _intra_bonds:
135
+ chem_comp_bond = get_from_ccd("chem_comp_bond", res_name)
136
+ if chem_comp_bond is None:
137
+ _intra_bonds[res_name] = {}
138
+ else:
139
+ bonds_for_residue = {}
140
+ for atom1, atom2, order, aromatic_flag in zip(
141
+ chem_comp_bond["atom_id_1"].as_array(),
142
+ chem_comp_bond["atom_id_2"].as_array(),
143
+ chem_comp_bond["value_order"].as_array(),
144
+ chem_comp_bond["pdbx_aromatic_flag"].as_array(),
145
+ ):
146
+ bond_type = BOND_TYPES[order, aromatic_flag]
147
+ bonds_for_residue[atom1.item(), atom2.item()] = bond_type
148
+ _intra_bonds[res_name] = bonds_for_residue
149
+ return _intra_bonds[res_name]
@@ -0,0 +1,202 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.structure.info"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["get_ccd", "set_ccd_path", "get_from_ccd"]
8
+
9
+ import functools
10
+ import importlib
11
+ import inspect
12
+ import pkgutil
13
+ from pathlib import Path
14
+ import numpy as np
15
+
16
+ _CCD_FILE = Path(__file__).parent / "components.bcif"
17
+ _SPECIAL_ID_COLUMN_NAMES = {
18
+ "chem_comp": "id",
19
+ }
20
+ _DEFAULT_ID_COLUMN_NAME = "comp_id"
21
+
22
+
23
+ @functools.cache
24
+ def get_ccd():
25
+ """
26
+ Get the internal subset of the PDB
27
+ *Chemical Component Dictionary* (CCD).
28
+ :footcite:`Westbrook2015`
29
+
30
+ Returns
31
+ -------
32
+ ccd : BinaryCIFBlock
33
+ The CCD.
34
+ It contains the categories `chem_comp`, `chem_comp_atom` and `chem_comp_bond`.
35
+
36
+ Warnings
37
+ --------
38
+
39
+ Consider the return value as read-only.
40
+ As other functions cache data from it, changing data may lead to undefined
41
+ behavior.
42
+
43
+ References
44
+ ----------
45
+
46
+ .. footbibliography::
47
+
48
+ """
49
+ # Avoid circular import
50
+ from biotite.structure.io.pdbx.bcif import BinaryCIFFile
51
+
52
+ try:
53
+ return BinaryCIFFile.read(_CCD_FILE).block
54
+ except FileNotFoundError:
55
+ raise RuntimeError(
56
+ "Internal CCD not found. Please run 'python -m biotite.setup_ccd'."
57
+ )
58
+
59
+
60
+ def set_ccd_path(ccd_path):
61
+ """
62
+ Replace the internal *Chemical Component Dictionary* (CCD) with a custom one.
63
+
64
+ This function also clears the cache of functions depending on the CCD to ensure
65
+ that the new CCD is used.
66
+
67
+ Parameters
68
+ ----------
69
+ ccd_path : path-like
70
+ The path to the custom CCD in BinaryCIF format, prepared with the
71
+ ``setup_ccd.py`` module.
72
+
73
+ Notes
74
+ -----
75
+ This function is intended for advanced users who need to add information for
76
+ compounds, which are not part of the internal CCD.
77
+ The reason might be that an updated version already exists upstream or that
78
+ the user wants to add custom compounds to the CCD.
79
+ """
80
+ global _CCD_FILE
81
+ _CCD_FILE = Path(ccd_path)
82
+
83
+ # Clear caches in all functions in biotite.structure.info
84
+ info_modules = [
85
+ importlib.import_module(f"biotite.structure.info.{mod_name}")
86
+ for _, mod_name, _ in pkgutil.iter_modules([str(Path(__file__).parent)])
87
+ ]
88
+ for module in info_modules:
89
+ for _, function in inspect.getmembers(module, callable):
90
+ if hasattr(function, "cache_clear"):
91
+ function.cache_clear()
92
+
93
+
94
+ @functools.cache
95
+ def get_from_ccd(category_name, comp_id, column_name=None):
96
+ """
97
+ Get the rows for the given residue in the given category from the
98
+ internal subset of the PDB *Chemical Component Dictionary* (CCD).
99
+ :footcite:`Westbrook2015`
100
+
101
+ Parameters
102
+ ----------
103
+ category_name : str
104
+ The category in the CCD.
105
+ comp_id : str
106
+ The residue identifier, i.e. the ``res_name``.
107
+ column_name : str, optional
108
+ The name of the column to be retrieved.
109
+ If None, all columns are returned as dictionary.
110
+ By default None.
111
+
112
+ Returns
113
+ -------
114
+ slice : BinaryCIFCategory or BinaryCIFColumn
115
+ The category or column (if `column_name` is provided) containing only the rows
116
+ for the given residue.
117
+
118
+ Notes
119
+ -----
120
+ The returned values are cached for faster access in subsequent calls.
121
+
122
+ References
123
+ ----------
124
+
125
+ .. footbibliography::
126
+
127
+ """
128
+ try:
129
+ start, stop = _residue_index(category_name)[comp_id]
130
+ except KeyError:
131
+ return None
132
+
133
+ category = get_ccd()[category_name]
134
+ if column_name is None:
135
+ return _filter_category(category, slice(start, stop))
136
+ else:
137
+ return _filter_column(category[column_name], slice(start, stop))
138
+
139
+
140
+ @functools.cache
141
+ def _residue_index(category_name):
142
+ """
143
+ Get the start and stop index for each component name in the given
144
+ CCD category.
145
+
146
+ Parameters
147
+ ----------
148
+ category_name : str
149
+ The category to determine start and stop indices for each component in.
150
+
151
+ Returns
152
+ -------
153
+ index : dict (str -> (int, int))
154
+ The index maps each present component name to the corresponding
155
+ start and exclusive stop index in `id_column`.
156
+ """
157
+ category = get_ccd()[category_name]
158
+ id_column_name = _SPECIAL_ID_COLUMN_NAMES.get(
159
+ category_name, _DEFAULT_ID_COLUMN_NAME
160
+ )
161
+ id_column = category[id_column_name].as_array()
162
+
163
+ residue_starts = np.where(id_column[:-1] != id_column[1:])[0] + 1
164
+ # The final start is the exclusive stop of last residue
165
+ residue_starts = np.concatenate(([0], residue_starts, [len(id_column)]))
166
+ index = {}
167
+ for i in range(len(residue_starts) - 1):
168
+ comp_id = id_column[residue_starts[i]].item()
169
+ index[comp_id] = (residue_starts[i], residue_starts[i + 1])
170
+ return index
171
+
172
+
173
+ def _filter_category(category, index):
174
+ """
175
+ Reduce the category to the values for the given index.∂
176
+ """
177
+ # Avoid circular import
178
+ from biotite.structure.io.pdbx.bcif import BinaryCIFCategory
179
+
180
+ return BinaryCIFCategory(
181
+ {key: _filter_column(column, index) for key, column in category.items()}
182
+ )
183
+
184
+
185
+ def _filter_column(column, index):
186
+ """
187
+ Reduce the column to the values for the given index.
188
+ """
189
+ # Avoid circular import
190
+ from biotite.structure.io.pdbx.bcif import BinaryCIFColumn, BinaryCIFData
191
+ from biotite.structure.io.pdbx.component import MaskValue
192
+
193
+ data_array = column.data.array[index]
194
+ mask_array = column.mask.array[index] if column.mask is not None else None
195
+ return BinaryCIFColumn(
196
+ BinaryCIFData(data_array),
197
+ (
198
+ BinaryCIFData(mask_array)
199
+ if column.mask is not None and (mask_array != MaskValue.PRESENT).any()
200
+ else None
201
+ ),
202
+ )
Binary file
@@ -0,0 +1,131 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.structure.info"
6
+ __author__ = "Tom David Müller, Patrick Kunzmann"
7
+ __all__ = ["amino_acid_names", "nucleotide_names", "carbohydrate_names"]
8
+
9
+ import functools
10
+ import numpy as np
11
+ from biotite.structure.info.ccd import get_ccd
12
+
13
+ _AMINO_ACID_TYPES = [
14
+ "D-beta-peptide, C-gamma linking",
15
+ "D-gamma-peptide, C-delta linking",
16
+ "D-peptide COOH carboxy terminus",
17
+ "D-peptide NH3 amino terminus",
18
+ "D-peptide linking",
19
+ "L-beta-peptide, C-gamma linking",
20
+ "L-gamma-peptide, C-delta linking",
21
+ "L-peptide COOH carboxy terminus",
22
+ "L-peptide NH3 amino terminus",
23
+ "L-peptide linking",
24
+ "peptide linking",
25
+ ]
26
+ _NUCLEOTIDE_TYPES = [
27
+ "DNA OH 3 prime terminus",
28
+ "DNA OH 5 prime terminus",
29
+ "DNA linking",
30
+ "L-DNA linking",
31
+ "L-RNA linking",
32
+ "RNA OH 3 prime terminus",
33
+ "RNA OH 5 prime terminus",
34
+ "RNA linking",
35
+ ]
36
+ _CARBOHYDRATE_TYPES = [
37
+ "D-saccharide",
38
+ "D-saccharide, alpha linking",
39
+ "D-saccharide, beta linking",
40
+ "L-saccharide",
41
+ "L-saccharide, alpha linking",
42
+ "L-saccharide, beta linking",
43
+ "saccharide",
44
+ ]
45
+
46
+
47
+ @functools.cache
48
+ def amino_acid_names():
49
+ """
50
+ Get a tuple of amino acid three-letter codes according to the
51
+ PDB *Chemical Component Dictionary*.
52
+ :footcite:`Westbrook2015`
53
+
54
+ Returns
55
+ -------
56
+ amino_acid_names : tuple of str
57
+ A list of three-letter-codes containing residues that are
58
+ peptide monomers.
59
+
60
+ References
61
+ ----------
62
+
63
+ .. footbibliography::
64
+
65
+ """
66
+ return _get_group_members(_AMINO_ACID_TYPES)
67
+
68
+
69
+ @functools.cache
70
+ def nucleotide_names():
71
+ """
72
+ Get a tuple of nucleotide three-letter codes according to the
73
+ PDB *Chemical Component Dictionary*.
74
+ :footcite:`Westbrook2015`
75
+
76
+ Returns
77
+ -------
78
+ nucleotide_names : tuple of str
79
+ A list of three-letter-codes containing residues that are
80
+ DNA/RNA monomers.
81
+
82
+ References
83
+ ----------
84
+
85
+ .. footbibliography::
86
+
87
+ """
88
+ return _get_group_members(_NUCLEOTIDE_TYPES)
89
+
90
+
91
+ @functools.cache
92
+ def carbohydrate_names():
93
+ """
94
+ Get a tuple of carbohydrate three-letter codes according to the
95
+ PDB *Chemical Component Dictionary*.
96
+ :footcite:`Westbrook2015`
97
+
98
+ Returns
99
+ -------
100
+ carbohydrate_names : tuple of str
101
+ A list of three-letter-codes containing residues that are
102
+ saccharide monomers.
103
+
104
+ References
105
+ ----------
106
+
107
+ .. footbibliography::
108
+
109
+ """
110
+ return _get_group_members(_CARBOHYDRATE_TYPES)
111
+
112
+
113
+ def _get_group_members(match_types):
114
+ """
115
+ Identify component IDs that matches a given component *type* from the CCD.
116
+
117
+ Parameters
118
+ ----------
119
+ match_types : list of str
120
+ The component types to extract.
121
+
122
+ Returns
123
+ -------
124
+ comp_ids : list of str
125
+ The extracted component IDs.
126
+ """
127
+ category = get_ccd()["chem_comp"]
128
+ comp_ids = category["id"].as_array()
129
+ types = category["type"].as_array()
130
+ # Ignore case
131
+ return comp_ids[np.isin(np.char.lower(types), np.char.lower(match_types))].tolist()
@@ -0,0 +1,121 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.structure.info"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["mass"]
8
+
9
+ import json
10
+ from pathlib import Path
11
+ from biotite.structure.atoms import Atom, AtomArray, AtomArrayStack
12
+ from biotite.structure.info.ccd import get_from_ccd
13
+
14
+ # Masses are taken from http://www.sbcs.qmul.ac.uk/iupac/AtWt/ (2018/03/01)
15
+ ATOM_MASSES_FILE = Path(__file__).parent / "atom_masses.json"
16
+ _atom_masses = None
17
+
18
+
19
+ def mass(item, is_residue=None):
20
+ """
21
+ Calculate the mass for the given object.
22
+ :footcite:`Meija2016`
23
+
24
+ If a residue name is given, the mass values refer to the masses of
25
+ the complete molecule without additional or missing protons.
26
+ In case of residues in a longer chain, some atoms might be missing
27
+ from the molecule.
28
+ For example non-terminal residues in a protein or nucleotide chain
29
+ miss the mass of a water molecule.
30
+
31
+ Parameters
32
+ ----------
33
+ item : str or Atom or AtomArray or AtomArrayStack
34
+ The atom or molecule to get the mass for.
35
+ If a string is given, it is interpreted as residue name or
36
+ chemical element.
37
+ If an :class:`Atom` is given the mass is taken from its element.
38
+ If an :class:`AtomArray` or :class:`AtomArrayStack` is given the
39
+ mass is the sum of the mass of its atoms.
40
+ is_residue : bool, optional
41
+ If set to true and a string is given for `item`, the string
42
+ will be strictly interpreted as residue.
43
+ If set to false, the string is strictly interpreted as element.
44
+ By default the string will be interpreted as element at first
45
+ and secondly as residue name, if the element is unknown.
46
+
47
+ Returns
48
+ -------
49
+ mass : float or None
50
+ The mass of the given object in *u*. None if the mass is unknown.
51
+
52
+ References
53
+ ----------
54
+
55
+ .. footbibliography::
56
+
57
+ Examples
58
+ --------
59
+
60
+ >>> print(mass(atom_array))
61
+ 2170.438
62
+ >>> first_residue = list(residue_iter(atom_array))[0]
63
+ >>> print(first_residue)
64
+ A 1 ASN N N -8.901 4.127 -0.555
65
+ A 1 ASN CA C -8.608 3.135 -1.618
66
+ A 1 ASN C C -7.117 2.964 -1.897
67
+ A 1 ASN O O -6.634 1.849 -1.758
68
+ A 1 ASN CB C -9.437 3.396 -2.889
69
+ A 1 ASN CG C -10.915 3.130 -2.611
70
+ A 1 ASN OD1 O -11.269 2.700 -1.524
71
+ A 1 ASN ND2 N -11.806 3.406 -3.543
72
+ A 1 ASN H1 H -8.330 3.957 0.261
73
+ A 1 ASN H2 H -8.740 5.068 -0.889
74
+ A 1 ASN H3 H -9.877 4.041 -0.293
75
+ A 1 ASN HA H -8.930 2.162 -1.239
76
+ A 1 ASN HB2 H -9.310 4.417 -3.193
77
+ A 1 ASN HB3 H -9.108 2.719 -3.679
78
+ A 1 ASN HD21 H -11.572 3.791 -4.444
79
+ A 1 ASN HD22 H -12.757 3.183 -3.294
80
+ >>> print(mass("ASN"))
81
+ 132.118
82
+ >>> first_atom = first_residue[0]
83
+ >>> print(first_atom)
84
+ A 1 ASN N N -8.901 4.127 -0.555
85
+ >>> print(mass(first_atom))
86
+ 14.007
87
+ >>> print(mass("N"))
88
+ 14.007
89
+ """
90
+ global _atom_masses
91
+ with open(ATOM_MASSES_FILE, "r") as file:
92
+ _atom_masses = json.load(file)
93
+
94
+ if isinstance(item, str):
95
+ if is_residue is None:
96
+ result_mass = _atom_masses.get(item.upper())
97
+ if result_mass is None:
98
+ result_mass = _mass_for_residue(item)
99
+ elif not is_residue:
100
+ result_mass = _atom_masses.get(item.upper())
101
+ else:
102
+ result_mass = _mass_for_residue(item)
103
+
104
+ elif isinstance(item, Atom):
105
+ result_mass = mass(item.element, is_residue=False)
106
+ elif isinstance(item, AtomArray) or isinstance(item, AtomArrayStack):
107
+ result_mass = sum((mass(element, is_residue=False) for element in item.element))
108
+
109
+ else:
110
+ raise TypeError(f"Cannot calculate mass for {type(item).__name__} objects")
111
+
112
+ if result_mass is None:
113
+ raise KeyError(f"{item} is not known")
114
+ return result_mass
115
+
116
+
117
+ def _mass_for_residue(res_name):
118
+ column = get_from_ccd("chem_comp", res_name.upper(), "formula_weight")
119
+ if column is None:
120
+ raise KeyError(f"Residue '{res_name}' is not known")
121
+ return column.as_item()