biotite 1.3.0__cp312-cp312-macosx_10_13_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +159 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +191 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +60 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +228 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +258 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +160 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +126 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1226 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +490 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +702 -0
  73. biotite/sequence/align/banded.cpython-312-darwin.so +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cpython-312-darwin.so +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cpython-312-darwin.so +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cpython-312-darwin.so +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cpython-312-darwin.so +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cpython-312-darwin.so +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cpython-312-darwin.so +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cpython-312-darwin.so +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cpython-312-darwin.so +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cpython-312-darwin.so +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cpython-312-darwin.so +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cpython-312-darwin.so +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +283 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cpython-312-darwin.so +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cpython-312-darwin.so +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cpython-312-darwin.so +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1562 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cpython-312-darwin.so +0 -0
  266. biotite/structure/bonds.pyx +1975 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cpython-312-darwin.so +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +276 -0
  271. biotite/structure/charges.cpython-312-darwin.so +0 -0
  272. biotite/structure/charges.pyx +520 -0
  273. biotite/structure/compare.py +681 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +590 -0
  278. biotite/structure/geometry.py +655 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +425 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +90 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +388 -0
  311. biotite/structure/io/pdb/file.py +1356 -0
  312. biotite/structure/io/pdb/hybrid36.cpython-312-darwin.so +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +671 -0
  319. biotite/structure/io/pdbx/cif.py +1088 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +358 -0
  322. biotite/structure/io/pdbx/convert.py +2097 -0
  323. biotite/structure/io/pdbx/encoding.cpython-312-darwin.so +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1047 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +544 -0
  337. biotite/structure/rings.py +335 -0
  338. biotite/structure/sasa.cpython-312-darwin.so +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +292 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +168 -0
  349. biotite/version.py +21 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.3.0.dist-info/METADATA +162 -0
  352. biotite-1.3.0.dist-info/RECORD +354 -0
  353. biotite-1.3.0.dist-info/WHEEL +6 -0
  354. biotite-1.3.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,292 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.structure"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = [
8
+ "get_segment_starts",
9
+ "apply_segment_wise",
10
+ "spread_segment_wise",
11
+ "get_segment_masks",
12
+ "get_segment_starts_for",
13
+ "get_segment_positions",
14
+ "segment_iter",
15
+ ]
16
+
17
+ import numpy as np
18
+
19
+
20
+ def get_segment_starts(
21
+ array, add_exclusive_stop, continuous_categories=(), equal_categories=()
22
+ ):
23
+ """
24
+ Generalized version of :func:`get_residue_starts()` for residues and chains.
25
+
26
+ The starts are determined from value changes in the given annotations.
27
+
28
+ Parameters
29
+ ----------
30
+ array : AtomArray or AtomArrayStack
31
+ The atom array (stack) to get the segment starts from.
32
+ add_exclusive_stop : bool, optional
33
+ If true, the exclusive stop of the input atom array,
34
+ i.e. ``array.array_length()``, is added to the returned array of start indices
35
+ as last element.
36
+ continuous_categories : tuple of str, optional
37
+ Annotation categories that are expected to be continuously increasing within a
38
+ segment.
39
+ This means if the value of such an annotation decreases from one atom to
40
+ another, a new segment is started.
41
+ equal_categories : tuple of str, optional
42
+ Annotation categories that are expected to be equal within a segment.
43
+ This means if the value of such an annotation changes from one atom to
44
+ another, a new segment is started.
45
+
46
+ Returns
47
+ -------
48
+ starts : ndarray, dtype=int
49
+ The start indices of segments in `array`.
50
+ """
51
+ if array.array_length() == 0:
52
+ return np.array([], dtype=int)
53
+
54
+ segment_start_mask = np.zeros(array.array_length() - 1, dtype=bool)
55
+ for annot_name in continuous_categories:
56
+ annotation = array.get_annotation(annot_name)
57
+ segment_start_mask |= np.diff(annotation) < 0
58
+ for annot_name in equal_categories:
59
+ annotation = array.get_annotation(annot_name)
60
+ segment_start_mask |= annotation[1:] != annotation[:-1]
61
+
62
+ # Convert mask to indices
63
+ # Add 1, to shift the indices from the end of a segment
64
+ # to the start of a new segment
65
+ chain_starts = np.where(segment_start_mask)[0] + 1
66
+
67
+ # The first chain is not included yet -> Insert '[0]'
68
+ if add_exclusive_stop:
69
+ return np.concatenate(([0], chain_starts, [array.array_length()]))
70
+ else:
71
+ return np.concatenate(([0], chain_starts))
72
+
73
+
74
+ def apply_segment_wise(starts, data, function, axis=None):
75
+ """
76
+ Generalized version of :func:`apply_residue_wise()` for
77
+ residues and chains.
78
+
79
+ Parameters
80
+ ----------
81
+ starts : ndarray, dtype=int
82
+ The sorted start indices of segments.
83
+ Includes exclusive stop, i.e. the length of the corresponding
84
+ atom array.
85
+ data : ndarray
86
+ The data, whose intervals are the parameter for `function`.
87
+ Must have same length as `array`.
88
+ function : function
89
+ The `function` must have either the form *f(data)* or
90
+ *f(data, axis)* in case `axis` is given. Every `function` call
91
+ must return a value with the same shape and data type.
92
+ axis : int, optional
93
+ This value is given to the `axis` parameter of `function`.
94
+
95
+ Returns
96
+ -------
97
+ processed_data : ndarray
98
+ Segment-wise evaluation of `data` by `function`.
99
+ The size of the first dimension of this array is equal to the amount of
100
+ residues.
101
+ """
102
+ # The result array
103
+ processed_data = None
104
+ for i in range(len(starts) - 1):
105
+ segment = data[starts[i] : starts[i + 1]]
106
+ if axis is None:
107
+ value = function(segment)
108
+ else:
109
+ value = function(segment, axis=axis)
110
+ # Identify the shape of the resulting array by evaluation
111
+ # of the function return value for the first segment
112
+ if processed_data is None:
113
+ if isinstance(value, np.ndarray):
114
+ # Maximum length of the processed data
115
+ # is length of segment of size 1 -> length of all IDs
116
+ # (equal to atom array length)
117
+ processed_data = np.zeros(
118
+ (len(starts) - 1,) + value.shape, dtype=value.dtype
119
+ )
120
+ else:
121
+ # Scalar value -> one dimensional result array
122
+ processed_data = np.zeros(len(starts) - 1, dtype=type(value))
123
+ # Write values into result arrays
124
+ processed_data[i] = value
125
+ return processed_data
126
+
127
+
128
+ def spread_segment_wise(starts, input_data):
129
+ """
130
+ Generalized version of :func:`spread_residue_wise()`
131
+ for residues and chains.
132
+
133
+ Parameters
134
+ ----------
135
+ starts : ndarray, dtype=int
136
+ The sorted start indices of segments.
137
+ Includes exclusive stop, i.e. the length of the corresponding
138
+ atom array.
139
+ input_data : ndarray
140
+ The data to be spread.
141
+ The length of the 0-th axis must be equal to the amount of different residue IDs
142
+ in `array`.
143
+
144
+ Returns
145
+ -------
146
+ output_data : ndarray
147
+ Segment-wise spread `input_data`.
148
+ Length is the same as `array_length()` of `array`.
149
+ """
150
+ seg_lens = starts[1:] - starts[:-1]
151
+ return np.repeat(input_data, seg_lens, axis=0)
152
+
153
+
154
+ def get_segment_masks(starts, indices):
155
+ """
156
+ Generalized version of :func:`get_residue_masks()`
157
+ for residues and chains.
158
+
159
+ Parameters
160
+ ----------
161
+ starts : ndarray, dtype=int
162
+ The sorted start indices of segments.
163
+ Includes exclusive stop, i.e. the length of the corresponding
164
+ atom array.
165
+ indices : ndarray, dtype=int, shape=(k,)
166
+ These indices indicate the atoms to get the corresponding
167
+ segments for.
168
+ Negative indices are not allowed.
169
+
170
+ Returns
171
+ -------
172
+ residues_masks : ndarray, dtype=bool, shape=(k,n)
173
+ Multiple boolean masks, one for each given index in `indices`.
174
+ Each array masks the atoms that belong to the same segment as
175
+ the atom at the given index.
176
+ """
177
+ indices = np.asarray(indices)
178
+ length = starts[-1]
179
+ masks = np.zeros((len(indices), length), dtype=bool)
180
+
181
+ if (indices < 0).any():
182
+ raise ValueError("This function does not support negative indices")
183
+ if (indices >= length).any():
184
+ index = np.min(np.where(indices >= length)[0])
185
+ raise ValueError(
186
+ f"Index {index} is out of range for an atom array with length {length}"
187
+ )
188
+
189
+ insertion_points = np.searchsorted(starts, indices, side="right") - 1
190
+ for i, point in enumerate(insertion_points):
191
+ masks[i, starts[point] : starts[point + 1]] = True
192
+
193
+ return masks
194
+
195
+
196
+ def get_segment_starts_for(starts, indices):
197
+ """
198
+ Generalized version of :func:`get_residue_starts_for()`
199
+ for residues and chains.
200
+
201
+ Parameters
202
+ ----------
203
+ starts : ndarray, dtype=int
204
+ The sorted start indices of segments.
205
+ Includes exclusive stop, i.e. the length of the corresponding
206
+ atom array.
207
+ indices : ndarray, dtype=int, shape=(k,)
208
+ These indices point to the atoms to get the corresponding
209
+ segment starts for.
210
+ Negative indices are not allowed.
211
+
212
+ Returns
213
+ -------
214
+ start_indices : ndarray, dtype=int, shape=(k,)
215
+ The indices that point to the segment starts for the input
216
+ `indices`.
217
+ """
218
+ indices = np.asarray(indices)
219
+ length = starts[-1]
220
+ # Remove exclusive stop
221
+ starts = starts[:-1]
222
+
223
+ if (indices < 0).any():
224
+ raise ValueError("This function does not support negative indices")
225
+ if (indices >= length).any():
226
+ index = np.min(np.where(indices >= length)[0])
227
+ raise ValueError(
228
+ f"Index {index} is out of range for an atom array with length {length}"
229
+ )
230
+
231
+ insertion_points = np.searchsorted(starts, indices, side="right") - 1
232
+ return starts[insertion_points]
233
+
234
+
235
+ def get_segment_positions(starts, indices):
236
+ """
237
+ Generalized version of :func:`get_residue_positions()`
238
+ for residues and chains.
239
+
240
+ Parameters
241
+ ----------
242
+ starts : ndarray, dtype=int
243
+ The sorted start indices of segments.
244
+ Includes exclusive stop, i.e. the length of the corresponding
245
+ atom array.
246
+ indices : ndarray, shape=(k,)
247
+ These indices point to the atoms to get the corresponding
248
+ residue positions for.
249
+ Negative indices are not allowed.
250
+
251
+ Returns
252
+ -------
253
+ segment_indices : ndarray, shape=(k,)
254
+ The indices that point to the position of the segments.
255
+ """
256
+ indices = np.asarray(indices)
257
+ length = starts[-1]
258
+ # Remove exclusive stop
259
+ starts = starts[:-1]
260
+
261
+ if (indices < 0).any():
262
+ raise ValueError("This function does not support negative indices")
263
+ if (indices >= length).any():
264
+ index = np.min(np.where(indices >= length)[0])
265
+ raise ValueError(
266
+ f"Index {index} is out of range for an atom array with length {length}"
267
+ )
268
+
269
+ return np.searchsorted(starts, indices, side="right") - 1
270
+
271
+
272
+ def segment_iter(array, starts):
273
+ """
274
+ Generalized version of :func:`residue_iter()`
275
+ for residues and chains.
276
+
277
+ Parameters
278
+ ----------
279
+ array : AtomArray or AtomArrayStack
280
+ The structure to iterate over.
281
+ starts : ndarray, dtype=int
282
+ The sorted start indices of segments.
283
+ Includes exclusive stop, i.e. the length of the corresponding
284
+ atom array.
285
+
286
+ Yields
287
+ ------
288
+ segment : AtomArray or AtomArrayStack
289
+ Each residue or chain of the structure.
290
+ """
291
+ for i in range(len(starts) - 1):
292
+ yield array[..., starts[i] : starts[i + 1]]
@@ -0,0 +1,110 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ Function for converting a structure into a sequence.
7
+ """
8
+
9
+ __name__ = "biotite.structure"
10
+ __author__ = "Patrick Kunzmann"
11
+ __all__ = ["to_sequence"]
12
+
13
+ import numpy as np
14
+ from biotite.sequence.seqtypes import NucleotideSequence, ProteinSequence
15
+ from biotite.structure.chains import get_chain_starts
16
+ from biotite.structure.error import BadStructureError
17
+ from biotite.structure.info.groups import amino_acid_names, nucleotide_names
18
+ from biotite.structure.info.misc import one_letter_code
19
+ from biotite.structure.residues import get_residues
20
+
21
+ HETERO_PLACEHOLDER = "."
22
+
23
+
24
+ def to_sequence(atoms, allow_hetero=False):
25
+ """
26
+ Convert each chain in a structure into a sequence.
27
+
28
+ Parameters
29
+ ----------
30
+ atoms : AtomArray or AtomArrayStack
31
+ The structure.
32
+ May contain multiple chains.
33
+ Each chain must be either a peptide or a nucleic acid.
34
+ allow_hetero : bool, optional
35
+ If true, residues inside a amino acid or nucleotide chain,
36
+ that have no one-letter code, are replaced by the respective
37
+ '*any*' symbol (`"X"` or `"N"`, respectively).
38
+ The same is true for amino acids in nucleotide chains and vice
39
+ versa.
40
+ By default, an exception is raised.
41
+
42
+ Returns
43
+ -------
44
+ sequences : list of Sequence, length=n
45
+ The sequence for each chain in the structure.
46
+ chain_start_indices : ndarray, shape=(n,), dtype=int
47
+ The atom index where each chain starts.
48
+
49
+ Notes
50
+ -----
51
+ Residues are considered amino acids or nucleotides based on their
52
+ appearance :func:`info.amino_acid_names()` or
53
+ :func:`info.nucleotide_names()`, respectively.
54
+
55
+ Examples
56
+ --------
57
+
58
+ >>> sequences, chain_starts = to_sequence(atom_array)
59
+ >>> print(sequences)
60
+ [ProteinSequence("NLYIQWLKDGGPSSGRPPPS")]
61
+ """
62
+ sequences = []
63
+ chain_start_indices = get_chain_starts(atoms, add_exclusive_stop=True)
64
+ for i in range(len(chain_start_indices) - 1):
65
+ start = chain_start_indices[i]
66
+ stop = chain_start_indices[i + 1]
67
+ chain = atoms[start:stop]
68
+ _, residues = get_residues(chain)
69
+ one_letter_symbols = np.array(
70
+ [one_letter_code(res) or HETERO_PLACEHOLDER for res in residues]
71
+ )
72
+ hetero_mask = one_letter_symbols == HETERO_PLACEHOLDER
73
+
74
+ aa_count = np.count_nonzero(np.isin(residues, amino_acid_names()))
75
+ nuc_count = np.count_nonzero(np.isin(residues, nucleotide_names()))
76
+ if aa_count == 0 and nuc_count == 0:
77
+ raise BadStructureError(
78
+ f"Chain {chain.chain_id[0]} contains neither amino acids "
79
+ "nor nucleotides"
80
+ )
81
+ elif aa_count > nuc_count:
82
+ # Chain is a peptide
83
+ hetero_mask |= ~np.isin(residues, amino_acid_names())
84
+ if not allow_hetero and np.any(hetero_mask):
85
+ hetero_indices = np.where(hetero_mask)[0]
86
+ raise BadStructureError(
87
+ f"Hetero residue(s) "
88
+ f"{', '.join(residues[hetero_indices])} in peptide"
89
+ )
90
+ one_letter_symbols[hetero_mask] = "X"
91
+ # Replace selenocysteine and pyrrolysine
92
+ one_letter_symbols[one_letter_symbols == "U"] = "C"
93
+ one_letter_symbols[one_letter_symbols == "O"] = "K"
94
+ sequences.append(ProteinSequence("".join(one_letter_symbols)))
95
+ else:
96
+ # Chain is a nucleic acid
97
+ hetero_mask |= ~np.isin(residues, nucleotide_names())
98
+ if not allow_hetero and np.any(hetero_mask):
99
+ hetero_indices = np.where(hetero_mask)[0]
100
+ raise BadStructureError(
101
+ f"Hetero residue(s) "
102
+ f"{', '.join(residues[hetero_indices])} in nucleic acid"
103
+ )
104
+ one_letter_symbols[hetero_mask] = "N"
105
+ # Replace uracil
106
+ one_letter_symbols[one_letter_symbols == "U"] = "T"
107
+ sequences.append(NucleotideSequence("".join(one_letter_symbols)))
108
+
109
+ # Remove exclusive stop
110
+ return sequences, chain_start_indices[:-1]