biotite 0.41.1__cp311-cp311-macosx_10_16_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (340) hide show
  1. biotite/__init__.py +19 -0
  2. biotite/application/__init__.py +43 -0
  3. biotite/application/application.py +265 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +505 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +83 -0
  8. biotite/application/blast/webapp.py +421 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +238 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +152 -0
  13. biotite/application/localapp.py +306 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +122 -0
  16. biotite/application/msaapp.py +374 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +254 -0
  19. biotite/application/muscle/app5.py +171 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +456 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +222 -0
  24. biotite/application/util.py +59 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +304 -0
  27. biotite/application/viennarna/rnafold.py +269 -0
  28. biotite/application/viennarna/rnaplot.py +187 -0
  29. biotite/application/viennarna/util.py +72 -0
  30. biotite/application/webapp.py +77 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/entrez/__init__.py +15 -0
  34. biotite/database/entrez/check.py +61 -0
  35. biotite/database/entrez/dbnames.py +89 -0
  36. biotite/database/entrez/download.py +223 -0
  37. biotite/database/entrez/key.py +44 -0
  38. biotite/database/entrez/query.py +223 -0
  39. biotite/database/error.py +15 -0
  40. biotite/database/pubchem/__init__.py +21 -0
  41. biotite/database/pubchem/download.py +260 -0
  42. biotite/database/pubchem/error.py +20 -0
  43. biotite/database/pubchem/query.py +827 -0
  44. biotite/database/pubchem/throttle.py +99 -0
  45. biotite/database/rcsb/__init__.py +13 -0
  46. biotite/database/rcsb/download.py +167 -0
  47. biotite/database/rcsb/query.py +959 -0
  48. biotite/database/uniprot/__init__.py +13 -0
  49. biotite/database/uniprot/check.py +32 -0
  50. biotite/database/uniprot/download.py +134 -0
  51. biotite/database/uniprot/query.py +209 -0
  52. biotite/file.py +251 -0
  53. biotite/sequence/__init__.py +73 -0
  54. biotite/sequence/align/__init__.py +49 -0
  55. biotite/sequence/align/alignment.py +658 -0
  56. biotite/sequence/align/banded.cpython-311-darwin.so +0 -0
  57. biotite/sequence/align/banded.pyx +652 -0
  58. biotite/sequence/align/buckets.py +69 -0
  59. biotite/sequence/align/cigar.py +434 -0
  60. biotite/sequence/align/kmeralphabet.cpython-311-darwin.so +0 -0
  61. biotite/sequence/align/kmeralphabet.pyx +574 -0
  62. biotite/sequence/align/kmersimilarity.cpython-311-darwin.so +0 -0
  63. biotite/sequence/align/kmersimilarity.pyx +233 -0
  64. biotite/sequence/align/kmertable.cpython-311-darwin.so +0 -0
  65. biotite/sequence/align/kmertable.pyx +3400 -0
  66. biotite/sequence/align/localgapped.cpython-311-darwin.so +0 -0
  67. biotite/sequence/align/localgapped.pyx +892 -0
  68. biotite/sequence/align/localungapped.cpython-311-darwin.so +0 -0
  69. biotite/sequence/align/localungapped.pyx +279 -0
  70. biotite/sequence/align/matrix.py +405 -0
  71. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  72. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  73. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  74. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  75. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  76. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  77. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  78. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  79. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  80. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  81. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  82. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  83. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  84. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  85. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  86. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  87. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  88. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  89. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  93. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  94. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  95. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  96. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  97. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  98. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  99. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  100. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  101. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  102. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  103. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  104. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  105. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  106. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  107. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  108. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  109. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  110. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  111. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  112. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  113. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  114. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  115. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  116. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  117. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  118. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  119. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  120. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  121. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  122. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  154. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  155. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  156. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  157. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  158. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  159. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  160. biotite/sequence/align/multiple.cpython-311-darwin.so +0 -0
  161. biotite/sequence/align/multiple.pyx +620 -0
  162. biotite/sequence/align/pairwise.cpython-311-darwin.so +0 -0
  163. biotite/sequence/align/pairwise.pyx +587 -0
  164. biotite/sequence/align/permutation.cpython-311-darwin.so +0 -0
  165. biotite/sequence/align/permutation.pyx +305 -0
  166. biotite/sequence/align/primes.txt +821 -0
  167. biotite/sequence/align/selector.cpython-311-darwin.so +0 -0
  168. biotite/sequence/align/selector.pyx +956 -0
  169. biotite/sequence/align/statistics.py +265 -0
  170. biotite/sequence/align/tracetable.cpython-311-darwin.so +0 -0
  171. biotite/sequence/align/tracetable.pxd +64 -0
  172. biotite/sequence/align/tracetable.pyx +370 -0
  173. biotite/sequence/alphabet.py +566 -0
  174. biotite/sequence/annotation.py +829 -0
  175. biotite/sequence/codec.cpython-311-darwin.so +0 -0
  176. biotite/sequence/codec.pyx +155 -0
  177. biotite/sequence/codon.py +466 -0
  178. biotite/sequence/codon_tables.txt +202 -0
  179. biotite/sequence/graphics/__init__.py +33 -0
  180. biotite/sequence/graphics/alignment.py +1034 -0
  181. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  182. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  183. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  184. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  185. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  186. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  187. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  188. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  189. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  190. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  191. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  192. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  193. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  194. biotite/sequence/graphics/color_schemes/pb_flower.json +39 -0
  195. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  196. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  197. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  198. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  199. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  200. biotite/sequence/graphics/colorschemes.py +139 -0
  201. biotite/sequence/graphics/dendrogram.py +184 -0
  202. biotite/sequence/graphics/features.py +510 -0
  203. biotite/sequence/graphics/logo.py +110 -0
  204. biotite/sequence/graphics/plasmid.py +661 -0
  205. biotite/sequence/io/__init__.py +12 -0
  206. biotite/sequence/io/fasta/__init__.py +22 -0
  207. biotite/sequence/io/fasta/convert.py +273 -0
  208. biotite/sequence/io/fasta/file.py +278 -0
  209. biotite/sequence/io/fastq/__init__.py +19 -0
  210. biotite/sequence/io/fastq/convert.py +120 -0
  211. biotite/sequence/io/fastq/file.py +551 -0
  212. biotite/sequence/io/genbank/__init__.py +17 -0
  213. biotite/sequence/io/genbank/annotation.py +277 -0
  214. biotite/sequence/io/genbank/file.py +575 -0
  215. biotite/sequence/io/genbank/metadata.py +324 -0
  216. biotite/sequence/io/genbank/sequence.py +172 -0
  217. biotite/sequence/io/general.py +192 -0
  218. biotite/sequence/io/gff/__init__.py +26 -0
  219. biotite/sequence/io/gff/convert.py +133 -0
  220. biotite/sequence/io/gff/file.py +434 -0
  221. biotite/sequence/phylo/__init__.py +36 -0
  222. biotite/sequence/phylo/nj.cpython-311-darwin.so +0 -0
  223. biotite/sequence/phylo/nj.pyx +221 -0
  224. biotite/sequence/phylo/tree.cpython-311-darwin.so +0 -0
  225. biotite/sequence/phylo/tree.pyx +1169 -0
  226. biotite/sequence/phylo/upgma.cpython-311-darwin.so +0 -0
  227. biotite/sequence/phylo/upgma.pyx +164 -0
  228. biotite/sequence/profile.py +456 -0
  229. biotite/sequence/search.py +116 -0
  230. biotite/sequence/seqtypes.py +556 -0
  231. biotite/sequence/sequence.py +374 -0
  232. biotite/structure/__init__.py +132 -0
  233. biotite/structure/atoms.py +1455 -0
  234. biotite/structure/basepairs.py +1415 -0
  235. biotite/structure/bonds.cpython-311-darwin.so +0 -0
  236. biotite/structure/bonds.pyx +1933 -0
  237. biotite/structure/box.py +592 -0
  238. biotite/structure/celllist.cpython-311-darwin.so +0 -0
  239. biotite/structure/celllist.pyx +849 -0
  240. biotite/structure/chains.py +298 -0
  241. biotite/structure/charges.cpython-311-darwin.so +0 -0
  242. biotite/structure/charges.pyx +520 -0
  243. biotite/structure/compare.py +274 -0
  244. biotite/structure/density.py +114 -0
  245. biotite/structure/dotbracket.py +216 -0
  246. biotite/structure/error.py +31 -0
  247. biotite/structure/filter.py +585 -0
  248. biotite/structure/geometry.py +697 -0
  249. biotite/structure/graphics/__init__.py +13 -0
  250. biotite/structure/graphics/atoms.py +226 -0
  251. biotite/structure/graphics/rna.py +282 -0
  252. biotite/structure/hbond.py +409 -0
  253. biotite/structure/info/__init__.py +25 -0
  254. biotite/structure/info/atom_masses.json +121 -0
  255. biotite/structure/info/atoms.py +82 -0
  256. biotite/structure/info/bonds.py +145 -0
  257. biotite/structure/info/ccd/README.rst +8 -0
  258. biotite/structure/info/ccd/amino_acids.txt +1663 -0
  259. biotite/structure/info/ccd/carbohydrates.txt +1135 -0
  260. biotite/structure/info/ccd/components.bcif +0 -0
  261. biotite/structure/info/ccd/nucleotides.txt +798 -0
  262. biotite/structure/info/ccd.py +95 -0
  263. biotite/structure/info/groups.py +90 -0
  264. biotite/structure/info/masses.py +123 -0
  265. biotite/structure/info/misc.py +144 -0
  266. biotite/structure/info/radii.py +197 -0
  267. biotite/structure/info/standardize.py +196 -0
  268. biotite/structure/integrity.py +268 -0
  269. biotite/structure/io/__init__.py +30 -0
  270. biotite/structure/io/ctab.py +72 -0
  271. biotite/structure/io/dcd/__init__.py +13 -0
  272. biotite/structure/io/dcd/file.py +65 -0
  273. biotite/structure/io/general.py +257 -0
  274. biotite/structure/io/gro/__init__.py +14 -0
  275. biotite/structure/io/gro/file.py +343 -0
  276. biotite/structure/io/mmtf/__init__.py +21 -0
  277. biotite/structure/io/mmtf/assembly.py +214 -0
  278. biotite/structure/io/mmtf/convertarray.cpython-311-darwin.so +0 -0
  279. biotite/structure/io/mmtf/convertarray.pyx +341 -0
  280. biotite/structure/io/mmtf/convertfile.cpython-311-darwin.so +0 -0
  281. biotite/structure/io/mmtf/convertfile.pyx +501 -0
  282. biotite/structure/io/mmtf/decode.cpython-311-darwin.so +0 -0
  283. biotite/structure/io/mmtf/decode.pyx +152 -0
  284. biotite/structure/io/mmtf/encode.cpython-311-darwin.so +0 -0
  285. biotite/structure/io/mmtf/encode.pyx +183 -0
  286. biotite/structure/io/mmtf/file.py +233 -0
  287. biotite/structure/io/mol/__init__.py +20 -0
  288. biotite/structure/io/mol/convert.py +115 -0
  289. biotite/structure/io/mol/ctab.py +414 -0
  290. biotite/structure/io/mol/header.py +116 -0
  291. biotite/structure/io/mol/mol.py +193 -0
  292. biotite/structure/io/mol/sdf.py +916 -0
  293. biotite/structure/io/netcdf/__init__.py +13 -0
  294. biotite/structure/io/netcdf/file.py +63 -0
  295. biotite/structure/io/npz/__init__.py +20 -0
  296. biotite/structure/io/npz/file.py +152 -0
  297. biotite/structure/io/pdb/__init__.py +20 -0
  298. biotite/structure/io/pdb/convert.py +293 -0
  299. biotite/structure/io/pdb/file.py +1240 -0
  300. biotite/structure/io/pdb/hybrid36.cpython-311-darwin.so +0 -0
  301. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  302. biotite/structure/io/pdbqt/__init__.py +15 -0
  303. biotite/structure/io/pdbqt/convert.py +107 -0
  304. biotite/structure/io/pdbqt/file.py +640 -0
  305. biotite/structure/io/pdbx/__init__.py +23 -0
  306. biotite/structure/io/pdbx/bcif.py +648 -0
  307. biotite/structure/io/pdbx/cif.py +1032 -0
  308. biotite/structure/io/pdbx/component.py +246 -0
  309. biotite/structure/io/pdbx/convert.py +1597 -0
  310. biotite/structure/io/pdbx/encoding.cpython-311-darwin.so +0 -0
  311. biotite/structure/io/pdbx/encoding.pyx +950 -0
  312. biotite/structure/io/pdbx/legacy.py +267 -0
  313. biotite/structure/io/tng/__init__.py +13 -0
  314. biotite/structure/io/tng/file.py +46 -0
  315. biotite/structure/io/trajfile.py +710 -0
  316. biotite/structure/io/trr/__init__.py +13 -0
  317. biotite/structure/io/trr/file.py +46 -0
  318. biotite/structure/io/xtc/__init__.py +13 -0
  319. biotite/structure/io/xtc/file.py +46 -0
  320. biotite/structure/mechanics.py +75 -0
  321. biotite/structure/molecules.py +353 -0
  322. biotite/structure/pseudoknots.py +642 -0
  323. biotite/structure/rdf.py +243 -0
  324. biotite/structure/repair.py +253 -0
  325. biotite/structure/residues.py +562 -0
  326. biotite/structure/resutil.py +178 -0
  327. biotite/structure/sasa.cpython-311-darwin.so +0 -0
  328. biotite/structure/sasa.pyx +322 -0
  329. biotite/structure/sequence.py +112 -0
  330. biotite/structure/sse.py +327 -0
  331. biotite/structure/superimpose.py +727 -0
  332. biotite/structure/transform.py +504 -0
  333. biotite/structure/util.py +98 -0
  334. biotite/temp.py +86 -0
  335. biotite/version.py +16 -0
  336. biotite/visualize.py +251 -0
  337. biotite-0.41.1.dist-info/METADATA +187 -0
  338. biotite-0.41.1.dist-info/RECORD +340 -0
  339. biotite-0.41.1.dist-info/WHEEL +4 -0
  340. biotite-0.41.1.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,178 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.structure"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["apply_segment_wise", "spread_segment_wise", "get_segment_masks",
8
+ "get_segment_starts_for", "get_segment_positions", "segment_iter"]
9
+
10
+ import numpy as np
11
+
12
+
13
+ def apply_segment_wise(starts, data, function, axis):
14
+ """
15
+ Generalized version of :func:`apply_residue_wise()` for
16
+ residues and chains.
17
+
18
+ Parameters
19
+ ----------
20
+ starts : ndarray, dtype=int
21
+ The sorted start indices of segments.
22
+ Includes exclusive stop, i.e. the length of the corresponding
23
+ atom array.
24
+ """
25
+ # The result array
26
+ processed_data = None
27
+ for i in range(len(starts)-1):
28
+ segment = data[starts[i]:starts[i+1]]
29
+ if axis == None:
30
+ value = function(segment)
31
+ else:
32
+ value = function(segment, axis=axis)
33
+ value = function(segment, axis=axis)
34
+ # Identify the shape of the resulting array by evaluation
35
+ # of the function return value for the first segment
36
+ if processed_data is None:
37
+ if isinstance(value, np.ndarray):
38
+ # Maximum length of the processed data
39
+ # is length of segment of size 1 -> length of all IDs
40
+ # (equal to atom array length)
41
+ processed_data = np.zeros(
42
+ (len(starts)-1,) + value.shape, dtype=value.dtype
43
+ )
44
+ else:
45
+ # Scalar value -> one dimensional result array
46
+ processed_data = np.zeros(
47
+ len(starts)-1, dtype=type(value)
48
+ )
49
+ # Write values into result arrays
50
+ processed_data[i] = value
51
+ return processed_data
52
+
53
+
54
+ def spread_segment_wise(starts, input_data):
55
+ """
56
+ Generalized version of :func:`spread_residue_wise()`
57
+ for residues and chains.
58
+
59
+ Parameters
60
+ ----------
61
+ starts : ndarray, dtype=int
62
+ The sorted start indices of segments.
63
+ Includes exclusive stop, i.e. the length of the corresponding
64
+ atom array.
65
+ """
66
+ output_data = np.zeros(starts[-1], dtype=input_data.dtype)
67
+ for i in range(len(starts)-1):
68
+ start = starts[i]
69
+ stop = starts[i + 1]
70
+ output_data[start:stop] = input_data[i]
71
+ return output_data
72
+
73
+
74
+ def get_segment_masks(starts, indices):
75
+ """
76
+ Generalized version of :func:`get_residue_masks()`
77
+ for residues and chains.
78
+
79
+ Parameters
80
+ ----------
81
+ starts : ndarray, dtype=int
82
+ The sorted start indices of segments.
83
+ Includes exclusive stop, i.e. the length of the corresponding
84
+ atom array.
85
+ """
86
+ indices = np.asarray(indices)
87
+ length = starts[-1]
88
+ masks = np.zeros((len(indices), length), dtype=bool)
89
+
90
+ if (indices < 0).any():
91
+ raise ValueError("This function does not support negative indices")
92
+ if (indices >= length).any():
93
+ index = np.min(np.where(indices >= length)[0])
94
+ raise ValueError(
95
+ f"Index {index} is out of range for "
96
+ f"an atom array with length {length}"
97
+ )
98
+
99
+ insertion_points = np.searchsorted(starts, indices, side="right") - 1
100
+ for i, point in enumerate(insertion_points):
101
+ masks[i, starts[point] : starts[point+1]] = True
102
+
103
+ return masks
104
+
105
+
106
+ def get_segment_starts_for(starts, indices):
107
+ """
108
+ Generalized version of :func:`get_residue_starts_for()`
109
+ for residues and chains.
110
+
111
+ Parameters
112
+ ----------
113
+ starts : ndarray, dtype=int
114
+ The sorted start indices of segments.
115
+ Includes exclusive stop, i.e. the length of the corresponding
116
+ atom array.
117
+ """
118
+ indices = np.asarray(indices)
119
+ length = starts[-1]
120
+ # Remove exclusive stop
121
+ starts = starts[:-1]
122
+
123
+ if (indices < 0).any():
124
+ raise ValueError("This function does not support negative indices")
125
+ if (indices >= length).any():
126
+ index = np.min(np.where(indices >= length)[0])
127
+ raise ValueError(
128
+ f"Index {index} is out of range for "
129
+ f"an atom array with length {length}"
130
+ )
131
+
132
+ insertion_points = np.searchsorted(starts, indices, side="right") - 1
133
+ return starts[insertion_points]
134
+
135
+
136
+ def get_segment_positions(starts, indices):
137
+ """
138
+ Generalized version of :func:`get_residue_positions()`
139
+ for residues and chains.
140
+
141
+ Parameters
142
+ ----------
143
+ starts : ndarray, dtype=int
144
+ The sorted start indices of segments.
145
+ Includes exclusive stop, i.e. the length of the corresponding
146
+ atom array.
147
+ """
148
+ indices = np.asarray(indices)
149
+ length = starts[-1]
150
+ # Remove exclusive stop
151
+ starts = starts[:-1]
152
+
153
+ if (indices < 0).any():
154
+ raise ValueError("This function does not support negative indices")
155
+ if (indices >= length).any():
156
+ index = np.min(np.where(indices >= length)[0])
157
+ raise ValueError(
158
+ f"Index {index} is out of range for "
159
+ f"an atom array with length {length}"
160
+ )
161
+
162
+ return np.searchsorted(starts, indices, side="right") - 1
163
+
164
+
165
+ def segment_iter(array, starts):
166
+ """
167
+ Generalized version of :func:`residue_iter()`
168
+ for residues and chains.
169
+
170
+ Parameters
171
+ ----------
172
+ starts : ndarray, dtype=int
173
+ The sorted start indices of segments.
174
+ Includes exclusive stop, i.e. the length of the corresponding
175
+ atom array.
176
+ """
177
+ for i in range(len(starts)-1):
178
+ yield array[..., starts[i] : starts[i+1]]
@@ -0,0 +1,322 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ Use this module to calculate the Solvent Accessible Surface Area (SASA) of
7
+ a protein or single atoms.
8
+ """
9
+
10
+ __name__ = "biotite.structure"
11
+ __author__ = "Patrick Kunzmann"
12
+ __all__ = ["sasa"]
13
+
14
+ cimport cython
15
+ cimport numpy as np
16
+ from libc.stdlib cimport malloc, free
17
+
18
+ import numpy as np
19
+ from .celllist import CellList
20
+ from .filter import filter_solvent, filter_monoatomic_ions
21
+ from .info.radii import vdw_radius_protor, vdw_radius_single
22
+
23
+ ctypedef np.uint8_t np_bool
24
+ ctypedef np.int64_t int64
25
+ ctypedef np.float32_t float32
26
+
27
+
28
+ @cython.boundscheck(False)
29
+ @cython.wraparound(False)
30
+ def sasa(array, float probe_radius=1.4, np.ndarray atom_filter=None,
31
+ bint ignore_ions=True, int point_number=1000,
32
+ point_distr="Fibonacci", vdw_radii="ProtOr"):
33
+ """
34
+ sasa(array, probe_radius=1.4, atom_filter=None, ignore_ions=True,
35
+ point_number=1000, point_distr="Fibonacci", vdw_radii="ProtOr")
36
+
37
+ Calculate the Solvent Accessible Surface Area (SASA) of a protein.
38
+
39
+ This function uses the Shrake-Rupley ("rolling probe")
40
+ algorithm :footcite:`Shrake1973`:
41
+ Every atom is occupied by a evenly distributed point mesh. The
42
+ points that can be reached by the "rolling probe", are surface
43
+ accessible.
44
+
45
+ Parameters
46
+ ----------
47
+ array : AtomArray
48
+ The protein model to calculate the SASA for.
49
+ probe_radius : float, optional
50
+ The VdW-radius of the solvent molecules (default: 1.4).
51
+ atom_filter : ndarray, dtype=bool, optional
52
+ If this parameter is given, SASA is only calculated for the
53
+ filtered atoms.
54
+ ignore_ions : bool, optional
55
+ If true, all monoatomic ions are removed before SASA calculation
56
+ (default: True).
57
+ point_number : int, optional
58
+ The number of points in the mesh occupying each atom for SASA
59
+ calculation (default: 100). The SASA calculation time is
60
+ proportional to the amount of sphere points.
61
+ point_distr : str or function, optional
62
+ If a function is given, the function is used to calculate the
63
+ point distribution for the mesh (the function must take `float`
64
+ *n* as parameter and return a *(n x 3)* :class:`ndarray`).
65
+ Alternatively a string can be given to choose a built-in
66
+ distribution:
67
+
68
+ - **Fibonacci** - Distribute points using a golden section
69
+ spiral.
70
+
71
+ By default *Fibonacci* is used.
72
+ vdw_radii : str or ndarray, dtype=float, optional
73
+ Indicates the set of VdW radii to be used. If an `array`-length
74
+ :class:`ndarray` is given, each atom gets the radius at the
75
+ corresponding index. Radii given for atoms that are not used in
76
+ SASA calculation (e.g. solvent atoms) can have arbitrary values
77
+ (e.g. `NaN`). If instead a string is given, one of the
78
+ built-in sets is used:
79
+
80
+ - **ProtOr** - A set, which does not require hydrogen atoms
81
+ in the model. Suitable for crystal structures.
82
+ :footcite:`Tsai1999`
83
+ - **Single** - A set, which uses a defined VdW radius for
84
+ every single atom, therefore hydrogen atoms are required
85
+ in the model (e.g. NMR elucidated structures).
86
+ :footcite:`Bondi1964`
87
+
88
+ By default *ProtOr* is used.
89
+
90
+
91
+ Returns
92
+ -------
93
+ sasa : ndarray, dtype=bool, shape=(n,)
94
+ Atom-wise SASA. `NaN` for atoms where SASA has not been
95
+ calculated
96
+ (solvent atoms, hydrogen atoms (ProtOr), atoms not in `filter`).
97
+
98
+ References
99
+ ----------
100
+
101
+ .. footbibliography::
102
+
103
+ """
104
+ cdef int i=0, j=0, k=0, adj_atom_i=0, rel_atom_i=0
105
+
106
+ cdef np.ndarray sasa_filter
107
+ cdef np.ndarray occl_filter
108
+ if atom_filter is not None:
109
+ # Filter for all atoms to calculate SASA for
110
+ sasa_filter = np.array(atom_filter, dtype=bool)
111
+ else:
112
+ sasa_filter = np.ones(len(array), dtype=bool)
113
+ # Filter for all atoms that are considered for occlusion calculation
114
+ # sasa_filter is subfilter of occlusion_filter
115
+ occl_filter = np.ones(len(array), dtype=bool)
116
+ # Remove water residues, since it is the solvent
117
+ filter = ~filter_solvent(array)
118
+ sasa_filter = sasa_filter & filter
119
+ occl_filter = occl_filter & filter
120
+ if ignore_ions:
121
+ filter = ~filter_monoatomic_ions(array)
122
+ sasa_filter = sasa_filter & filter
123
+ occl_filter = occl_filter & filter
124
+
125
+ cdef np.ndarray sphere_points
126
+ if callable(point_distr):
127
+ sphere_points = point_distr(point_number)
128
+ elif point_distr == "Fibonacci":
129
+ sphere_points = _create_fibonacci_points(point_number)
130
+ else:
131
+ raise ValueError(f"'{point_distr}' is not a valid point distribution")
132
+ sphere_points = sphere_points.astype(np.float32)
133
+
134
+ cdef np.ndarray radii
135
+ if isinstance(vdw_radii, np.ndarray):
136
+ radii = vdw_radii.astype(np.float32)
137
+ if len(radii) != array.array_length():
138
+ raise ValueError(
139
+ f"Amount VdW radii ({len(radii)}) and "
140
+ f"amount of atoms ({array.array_length()}) are not equal"
141
+ )
142
+ elif vdw_radii == "ProtOr":
143
+ filter = (array.element != "H")
144
+ sasa_filter = sasa_filter & filter
145
+ occl_filter = occl_filter & filter
146
+ radii = np.full(len(array), np.nan, dtype=np.float32)
147
+ for i in np.arange(len(radii))[occl_filter]:
148
+ rad = vdw_radius_protor(array.res_name[i], array.atom_name[i])
149
+ # 1.8 is default radius
150
+ radii[i] = rad if rad is not None else 1.8
151
+ elif vdw_radii == "Single":
152
+ radii = np.full(len(array), np.nan, dtype=np.float32)
153
+ for i in np.arange(len(radii))[occl_filter]:
154
+ rad = vdw_radius_single(array.element[i])
155
+ # 1.5 is default radius
156
+ radii[i] = rad if rad is not None else 1.8
157
+ else:
158
+ raise KeyError(f"'{vdw_radii}' is not a valid radii set")
159
+ # Increase atom radii by probe size ("rolling probe")
160
+ radii += probe_radius
161
+
162
+ # Memoryview for filter
163
+ # Problem with creating boolean memoryviews
164
+ # -> Type uint8 is used
165
+ cdef np_bool[:] sasa_filter_view = np.frombuffer(sasa_filter,
166
+ dtype=np.uint8)
167
+
168
+ cdef np.ndarray occl_r = radii[occl_filter]
169
+ # Atom array containing occluding atoms
170
+ occl_array = array[occl_filter]
171
+
172
+ # Memoryviews for coordinates of entire (main) array
173
+ # and for coordinates of occluding atom array
174
+ cdef float32[:,:] main_coord = array.coord.astype(np.float32,
175
+ copy=False)
176
+ cdef float32[:,:] occl_coord = occl_array.coord.astype(np.float32,
177
+ copy=False)
178
+ # Memoryviews for sphere points
179
+ cdef float32[:,:] sphere_coord = sphere_points
180
+ # Check if any of these arrays are empty to prevent segfault
181
+ if main_coord.shape[0] == 0 \
182
+ or occl_coord.shape[0] == 0 \
183
+ or sphere_coord.shape[0] == 0:
184
+ raise ValueError("Coordinates are empty")
185
+ # Memoryviews for radii of SASA and occluding atoms
186
+ # their squares and their sum of sqaures
187
+ cdef float32[:] atom_radii = radii
188
+ cdef float32[:] atom_radii_sq = radii * radii
189
+ cdef float32[:] occl_radii = occl_r
190
+ cdef float32[:] occl_radii_sq = occl_r * occl_r
191
+ # Memoryview for atomwise SASA
192
+ cdef float32[:] sasa = np.full(len(array), np.nan, dtype=np.float32)
193
+
194
+ # Area of a sphere point on a unit sphere
195
+ cdef float32 area_per_point = 4.0 * np.pi / point_number
196
+
197
+ # Define further statically typed variables
198
+ # that are needed for SASA calculation
199
+ cdef int n_accesible = 0
200
+ cdef float32 radius = 0
201
+ cdef float32 radius_sq = 0
202
+ cdef float32 adj_radius = 0
203
+ cdef float32 adj_radius_sq = 0
204
+ cdef float32 dist_sq = 0
205
+ cdef float32 point_x = 0
206
+ cdef float32 point_y = 0
207
+ cdef float32 point_z = 0
208
+ cdef float32 atom_x = 0
209
+ cdef float32 atom_y = 0
210
+ cdef float32 atom_z = 0
211
+ cdef float32 occl_x = 0
212
+ cdef float32 occl_y = 0
213
+ cdef float32 occl_z = 0
214
+ cdef float32[:,:] relevant_occl_coord = None
215
+
216
+ # Cell size is as large as the maximum distance,
217
+ # where two atom can intersect.
218
+ # Therefore intersecting atoms are always in the same or adjacent cell.
219
+ cell_list = CellList(occl_array, np.max(radii[occl_filter])*2)
220
+ cdef np.ndarray cell_indices
221
+ cdef int[:,:] cell_indices_view
222
+ cdef int length
223
+ cdef int max_adj_list_length = 0
224
+ cdef int array_length = array.array_length()
225
+
226
+ cell_indices = cell_list.get_atoms_in_cells(array.coord)
227
+ cell_indices_view = cell_indices
228
+ max_adj_list_length = cell_indices.shape[0]
229
+
230
+ # Later on, this array stores coordinates for actual
231
+ # occluding atoms for a certain atom to calculate the
232
+ # SASA for
233
+ # The first three indices of the second axis
234
+ # are x, y and z, the last one is the squared radius
235
+ # This list is as long as the maximal length of a list of
236
+ # adjacent atoms
237
+ relevant_occl_coord = np.zeros((max_adj_list_length, 4),
238
+ dtype=np.float32)
239
+
240
+ # Actual SASA calculation
241
+ for i in range(array_length):
242
+ # First level: The atoms to calculate SASA for
243
+ if not sasa_filter_view[i]:
244
+ # SASA is not calculated for this atom
245
+ continue
246
+ n_accesible = point_number
247
+ atom_x = main_coord[i,0]
248
+ atom_y = main_coord[i,1]
249
+ atom_z = main_coord[i,2]
250
+ radius = atom_radii[i]
251
+ radius_sq = atom_radii_sq[i]
252
+ # Find occluding atoms from list of adjacent atoms
253
+ rel_atom_i = 0
254
+ for j in range(max_adj_list_length):
255
+ # Remove all atoms, where the distance to the relevant atom
256
+ # is larger than the sum of the radii,
257
+ # since those atoms do not touch
258
+ # If distance is 0, it is the same atom,
259
+ # and the atom is removed from the list as well
260
+ adj_atom_i = cell_indices_view[i,j]
261
+ if adj_atom_i == -1:
262
+ # -1 means end of list
263
+ break
264
+ occl_x = occl_coord[adj_atom_i,0]
265
+ occl_y = occl_coord[adj_atom_i,1]
266
+ occl_z = occl_coord[adj_atom_i,2]
267
+ adj_radius = occl_radii[adj_atom_i]
268
+ adj_radius_sq = occl_radii_sq[adj_atom_i]
269
+ dist_sq = distance_sq(atom_x, atom_y, atom_z,
270
+ occl_x, occl_y, occl_z)
271
+ if dist_sq != 0 \
272
+ and dist_sq < (adj_radius+radius) * (adj_radius+radius):
273
+ relevant_occl_coord[rel_atom_i,0] = occl_x
274
+ relevant_occl_coord[rel_atom_i,1] = occl_y
275
+ relevant_occl_coord[rel_atom_i,2] = occl_z
276
+ relevant_occl_coord[rel_atom_i,3] = adj_radius_sq
277
+ rel_atom_i += 1
278
+ for j in range(sphere_coord.shape[0]):
279
+ # Second level: The sphere points for that atom
280
+ # Transform sphere point to sphere of current atom
281
+ point_x = sphere_coord[j,0] * radius + atom_x
282
+ point_y = sphere_coord[j,1] * radius + atom_y
283
+ point_z = sphere_coord[j,2] * radius + atom_z
284
+ for k in range(rel_atom_i):
285
+ # Third level: Compare point to occluding atoms
286
+ dist_sq = distance_sq(point_x, point_y, point_z,
287
+ relevant_occl_coord[k, 0],
288
+ relevant_occl_coord[k, 1],
289
+ relevant_occl_coord[k, 2])
290
+ # Compare squared distance
291
+ # to squared radius of occluding atom
292
+ # (Radius is relevant_occl_coord[3])
293
+ if dist_sq < relevant_occl_coord[k, 3]:
294
+ # Point is occluded
295
+ # -> Continue with next point
296
+ n_accesible -= 1
297
+ break
298
+ sasa[i] = area_per_point * n_accesible * radius_sq
299
+ return np.asarray(sasa)
300
+
301
+
302
+ cdef inline float32 distance_sq(float32 x1, float32 y1, float32 z1,
303
+ float32 x2, float32 y2, float32 z2):
304
+ cdef float32 dx = x2 - x1
305
+ cdef float32 dy = y2 - y1
306
+ cdef float32 dz = z2 - z1
307
+ return dx*dx + dy*dy + dz*dz
308
+
309
+
310
+ def _create_fibonacci_points(n):
311
+ """
312
+ Get an array of approximately equidistant points on a sphere surface
313
+ using a golden section spiral.
314
+ """
315
+ phi = (3 - np.sqrt(5)) * np.pi * np.arange(n)
316
+ z = np.linspace(1 - 1.0/n, 1.0/n - 1, n)
317
+ radius = np.sqrt(1 - z*z)
318
+ coords = np.zeros((n, 3))
319
+ coords[:,0] = radius * np.cos(phi)
320
+ coords[:,1] = radius * np.sin(phi)
321
+ coords[:,2] = z
322
+ return coords
@@ -0,0 +1,112 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ Function for converting a structure into a sequence.
7
+ """
8
+
9
+ __name__ = "biotite.structure"
10
+ __author__ = "Patrick Kunzmann"
11
+ __all__ = ["to_sequence"]
12
+
13
+ import numpy as np
14
+ from .info.misc import one_letter_code
15
+ from .info.groups import amino_acid_names, nucleotide_names
16
+ from .residues import get_residues
17
+ from .chains import get_chain_starts
18
+ from .error import BadStructureError
19
+ from ..sequence.seqtypes import ProteinSequence, NucleotideSequence
20
+
21
+
22
+ HETERO_PLACEHOLDER = "."
23
+
24
+
25
+ def to_sequence(atoms, allow_hetero=False):
26
+ """
27
+ Convert each chain in a structure into a sequence.
28
+
29
+ Parameters
30
+ ----------
31
+ atoms : AtomArray or AtomArrayStack
32
+ The structure.
33
+ May contain multiple chains.
34
+ Each chain must be either a peptide or a nucleic acid.
35
+ allow_hetero : bool, optional
36
+ If true, residues inside a amino acid or nucleotide chain,
37
+ that have no one-letter code, are replaced by the respective
38
+ '*any*' symbol (`"X"` or `"N"`, respectively).
39
+ The same is true for amino acids in nucleotide chains and vice
40
+ versa.
41
+ By default, an exception is raised.
42
+
43
+ Returns
44
+ -------
45
+ sequences : list of Sequence, length=n
46
+ The sequence for each chain in the structure.
47
+ chain_start_indices : ndarray, shape=(n,), dtype=int
48
+ The atom index where each chain starts.
49
+
50
+ Notes
51
+ -----
52
+ Residues are considered amino acids or nucleotides based on their
53
+ appearance :func:`info.amino_acid_names()` or
54
+ :func:`info.nucleotide_names()`, respectively.
55
+
56
+ Examples
57
+ --------
58
+
59
+ >>> sequences, chain_starts = to_sequence(atom_array)
60
+ >>> print(sequences)
61
+ [ProteinSequence("NLYIQWLKDGGPSSGRPPPS")]
62
+
63
+ """
64
+ sequences = []
65
+ chain_start_indices = get_chain_starts(atoms, add_exclusive_stop=True)
66
+ for i in range(len(chain_start_indices)-1):
67
+ start = chain_start_indices[i]
68
+ stop = chain_start_indices[i+1]
69
+ chain = atoms[start:stop]
70
+ _, residues = get_residues(chain)
71
+ one_letter_symbols = np.array(
72
+ [one_letter_code(res) or HETERO_PLACEHOLDER for res in residues]
73
+ )
74
+ hetero_mask = one_letter_symbols == HETERO_PLACEHOLDER
75
+
76
+ aa_count = np.count_nonzero(np.isin(residues, amino_acid_names()))
77
+ nuc_count = np.count_nonzero(np.isin(residues, nucleotide_names()))
78
+ if aa_count == 0 and nuc_count == 0:
79
+ raise BadStructureError(
80
+ f"Chain {chain.chain_id[0]} contains neither amino acids "
81
+ "nor nucleotides"
82
+ )
83
+ elif aa_count > nuc_count:
84
+ # Chain is a peptide
85
+ hetero_mask |= ~np.isin(residues, amino_acid_names())
86
+ if not allow_hetero and np.any(hetero_mask):
87
+ hetero_indices = np.where(hetero_mask)[0]
88
+ raise BadStructureError(
89
+ f"Hetero residue(s) "
90
+ f"{', '.join(residues[hetero_indices])} in peptide"
91
+ )
92
+ one_letter_symbols[hetero_mask] = "X"
93
+ # Replace selenocysteine and pyrrolysine
94
+ one_letter_symbols[one_letter_symbols == "U"] = "C"
95
+ one_letter_symbols[one_letter_symbols == "O"] = "K"
96
+ sequences.append(ProteinSequence("".join(one_letter_symbols)))
97
+ else:
98
+ # Chain is a nucleic acid
99
+ hetero_mask |= ~np.isin(residues, nucleotide_names())
100
+ if not allow_hetero and np.any(hetero_mask):
101
+ hetero_indices = np.where(hetero_mask)[0]
102
+ raise BadStructureError(
103
+ f"Hetero residue(s) "
104
+ f"{', '.join(residues[hetero_indices])} in nucleic acid"
105
+ )
106
+ one_letter_symbols[hetero_mask] = "N"
107
+ # Replace uracil
108
+ one_letter_symbols[one_letter_symbols == "U"] = "T"
109
+ sequences.append(NucleotideSequence("".join(one_letter_symbols)))
110
+
111
+ # Remove exclusive stop
112
+ return sequences, chain_start_indices[:-1]