biotite 1.5.0__cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +216 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +197 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +60 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +228 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +258 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +161 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +126 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1228 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +490 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +702 -0
  73. biotite/sequence/align/banded.cpython-314-x86_64-linux-gnu.so +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cpython-314-x86_64-linux-gnu.so +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cpython-314-x86_64-linux-gnu.so +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cpython-314-x86_64-linux-gnu.so +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cpython-314-x86_64-linux-gnu.so +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cpython-314-x86_64-linux-gnu.so +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cpython-314-x86_64-linux-gnu.so +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cpython-314-x86_64-linux-gnu.so +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cpython-314-x86_64-linux-gnu.so +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cpython-314-x86_64-linux-gnu.so +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cpython-314-x86_64-linux-gnu.so +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cpython-314-x86_64-linux-gnu.so +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +283 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cpython-314-x86_64-linux-gnu.so +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cpython-314-x86_64-linux-gnu.so +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cpython-314-x86_64-linux-gnu.so +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1562 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cpython-314-x86_64-linux-gnu.so +0 -0
  266. biotite/structure/bonds.pyx +2036 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cpython-314-x86_64-linux-gnu.so +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +310 -0
  271. biotite/structure/charges.cpython-314-x86_64-linux-gnu.so +0 -0
  272. biotite/structure/charges.pyx +520 -0
  273. biotite/structure/compare.py +683 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +591 -0
  278. biotite/structure/geometry.py +817 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +425 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +98 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +389 -0
  311. biotite/structure/io/pdb/file.py +1380 -0
  312. biotite/structure/io/pdb/hybrid36.cpython-314-x86_64-linux-gnu.so +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +674 -0
  319. biotite/structure/io/pdbx/cif.py +1091 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +362 -0
  322. biotite/structure/io/pdbx/convert.py +2113 -0
  323. biotite/structure/io/pdbx/encoding.cpython-314-x86_64-linux-gnu.so +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1078 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +716 -0
  337. biotite/structure/rings.py +451 -0
  338. biotite/structure/sasa.cpython-314-x86_64-linux-gnu.so +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +328 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +160 -0
  349. biotite/version.py +34 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.5.0.dist-info/METADATA +162 -0
  352. biotite-1.5.0.dist-info/RECORD +354 -0
  353. biotite-1.5.0.dist-info/WHEEL +6 -0
  354. biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,573 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.sequence.io.genbank"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["GenBankFile", "MultiFile"]
8
+
9
+ # import textwrap
10
+ import copy
11
+
12
+ # import re
13
+ import io
14
+ from collections import OrderedDict
15
+ from biotite.file import InvalidFileError, TextFile
16
+
17
+ # from ...annotation import Location, Feature, Annotation, AnnotatedSequence
18
+ # from ...seqtypes import NucleotideSequence, ProteinSequence
19
+
20
+
21
+ class GenBankFile(TextFile):
22
+ """
23
+ This class represents a file in GenBank format (including GenPept).
24
+
25
+ A GenBank file annotates a reference sequence with features such as
26
+ positions of genes, promoters, etc.
27
+ Additionally, it provides metadata further describing the file.
28
+
29
+ A file is divided into separate fields, e.g. the *DEFINITION*
30
+ field contains a description of the file.
31
+ The field name starts at the beginning of a line,
32
+ followed by the content.
33
+ A field may contain subfields, whose name is indented.
34
+ For example, the *SOURCE* field contains the *ORGANISM* subfield.
35
+ Some fields may occur multiple times, e.g. the *REFERENCE* field.
36
+ A sample GenBank file can be viewed at
37
+ `<https://www.ncbi.nlm.nih.gov/Sitemap/samplerecord.html>`_.
38
+
39
+ This class provides a low-level interface for parsing, editing and
40
+ writing GenBank files.
41
+ It works like a list of field entries, where a field consists of the
42
+ field name, the field content and the subfields.
43
+ The field content is separated into the lines belonging to the
44
+ content.
45
+ While the content of metadata fields starts at the standard
46
+ GenBank indentation of 12, the content of the *FEATURES*
47
+ (contains the annotation) and *ORIGIN* (contains the sequence)
48
+ fields starts without indentation.
49
+ The subfields are represented by a dictionary, with subfield names
50
+ being keys and the corresponding lines being values.
51
+ The *FEATURES* and *ORIGIN* fields have no subfields.
52
+
53
+ Every entry can be obtained, set and deleted via the index operator.
54
+
55
+ Notes
56
+ -----
57
+ This class does not support location identifiers with references
58
+ to other Entrez database entries, e.g.
59
+ ``join(1..100,J00194.1:100..202)``.
60
+
61
+ Examples
62
+ --------
63
+ Create a GenBank file from scratch:
64
+
65
+ >>> file = GenBankFile()
66
+ >>> file.append(
67
+ ... "SOMEFIELD", ["One line", "A second line"],
68
+ ... subfields={"SUBFIELD1": ["Single Line"], "SUBFIELD2": ["Two", "lines"]}
69
+ ... )
70
+ >>> print(file)
71
+ SOMEFIELD One line
72
+ A second line
73
+ SUBFIELD1 Single Line
74
+ SUBFIELD2 Two
75
+ lines
76
+ //
77
+ >>> name, content, subfields = file[0]
78
+ >>> print(name)
79
+ SOMEFIELD
80
+ >>> print(content)
81
+ ['One line', 'A second line']
82
+ >>> print(subfields)
83
+ OrderedDict({'SUBFIELD1': ['Single Line'], 'SUBFIELD2': ['Two', 'lines']})
84
+
85
+ Adding an additional field:
86
+
87
+ >>> file.insert(0, "OTHERFIELD", ["Another line"])
88
+ >>> print(len(file))
89
+ 2
90
+ >>> print(file)
91
+ OTHERFIELD Another line
92
+ SOMEFIELD One line
93
+ A second line
94
+ SUBFIELD1 Single Line
95
+ SUBFIELD2 Two
96
+ lines
97
+ //
98
+
99
+ Overwriting and deleting an existing field:
100
+
101
+ >>> file[1] = "NEWFIELD", ["Yet another line"]
102
+ >>> print(file)
103
+ OTHERFIELD Another line
104
+ NEWFIELD Yet another line
105
+ //
106
+ >>> file[1] = "NEWFIELD", ["Yet another line"], {"NEWSUB": ["Subfield line"]}
107
+ >>> print(file)
108
+ OTHERFIELD Another line
109
+ NEWFIELD Yet another line
110
+ NEWSUB Subfield line
111
+ //
112
+ >>> del file[1]
113
+ >>> print(file)
114
+ OTHERFIELD Another line
115
+ //
116
+
117
+ Parsing fields from a real GenBank file:
118
+
119
+ >>> import os.path
120
+ >>> file = GenBankFile.read(os.path.join(path_to_sequences, "gg_avidin.gb"))
121
+ >>> print(file)
122
+ LOCUS AJ311647 1224 bp DNA linear VRT 14-NOV-2006
123
+ DEFINITION Gallus gallus AVD gene for avidin, exons 1-4.
124
+ ACCESSION AJ311647
125
+ VERSION AJ311647.1 GI:13397825
126
+ KEYWORDS AVD gene; avidin.
127
+ SOURCE Gallus gallus (chicken)
128
+ ORGANISM Gallus gallus
129
+ Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
130
+ Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda;
131
+ Coelurosauria; Aves; Neognathae; Galloanserae; Galliformes;
132
+ Phasianidae; Phasianinae; Gallus.
133
+ REFERENCE 1
134
+ AUTHORS Wallen,M.J., Laukkanen,M.O. and Kulomaa,M.S.
135
+ TITLE Cloning and sequencing of the chicken egg-white avidin-encoding
136
+ gene and its relationship with the avidin-related genes Avr1-Avr5
137
+ JOURNAL Gene 161 (2), 205-209 (1995)
138
+ PUBMED 7665080
139
+ REFERENCE 2
140
+ AUTHORS Ahlroth,M.K., Kola,E.H., Ewald,D., Masabanda,J., Sazanov,A.,
141
+ Fries,R. and Kulomaa,M.S.
142
+ TITLE Characterization and chromosomal localization of the chicken avidin
143
+ gene family
144
+ JOURNAL Anim. Genet. 31 (6), 367-375 (2000)
145
+ PUBMED 11167523
146
+ REFERENCE 3 (bases 1 to 1224)
147
+ AUTHORS Ahlroth,M.K.
148
+ TITLE Direct Submission
149
+ JOURNAL Submitted (09-MAR-2001) Ahlroth M.K., Department of Biological and
150
+ Environmental Science, University of Jyvaskyla, PO Box 35,
151
+ FIN-40351 Jyvaskyla, FINLAND
152
+ FEATURES Location/Qualifiers
153
+ source 1..1224
154
+ /organism="Gallus gallus"
155
+ /mol_type="genomic DNA"
156
+ ...
157
+ >>> name, content, _ = file[3]
158
+ >>> print(name)
159
+ VERSION
160
+ >>> print(content)
161
+ ['AJ311647.1 GI:13397825']
162
+ >>> name, content, subfields = file[5]
163
+ >>> print(name)
164
+ SOURCE
165
+ >>> print(content)
166
+ ['Gallus gallus (chicken)']
167
+ >>> print(dict(subfields))
168
+ {'ORGANISM': ['Gallus gallus', 'Eukaryota; Metazoa; Chordata; ...', ...]}
169
+ """
170
+
171
+ def __init__(self):
172
+ super().__init__()
173
+ # Add '//' as general terminator of a GenBank file
174
+ self.lines = ["//"]
175
+ # Field start and stop indices in list of lines
176
+ # and names of categories
177
+ self._field_pos = []
178
+ self._find_field_indices()
179
+
180
+ @classmethod
181
+ def read(cls, file):
182
+ """
183
+ Read a GenBank file.
184
+
185
+ Parameters
186
+ ----------
187
+ file : file-like object or str
188
+ The file to be read.
189
+ Alternatively a file path can be supplied.
190
+
191
+ Returns
192
+ -------
193
+ file_object : GenBankFile
194
+ The parsed file.
195
+ """
196
+ file = super().read(file)
197
+ file._find_field_indices()
198
+ return file
199
+
200
+ def get_fields(self, name):
201
+ """
202
+ Get all *GenBank* fields associated with a given field name.
203
+
204
+ Parameters
205
+ ----------
206
+ name : str
207
+ The field name.
208
+
209
+ Returns
210
+ -------
211
+ fields : list of (list of str, OrderedDict of str -> str)
212
+ A list containing the fields.
213
+ For most field names, the list will only contain one
214
+ element, but fields like *REFERENCE* are an exception.
215
+ Each field is represented by a tuple.
216
+ Each tuple contains as first element the content lines and
217
+ as second element the subfields as dictionary.
218
+ If the field has no subfields, the dictionary is empty.
219
+ """
220
+ indices = self.get_indices(name)
221
+ # Omit the field name
222
+ return [self[i][1:] for i in indices]
223
+
224
+ def get_indices(self, name):
225
+ """
226
+ Get the indices to all *GenBank* fields associated with a given
227
+ field name.
228
+
229
+ Parameters
230
+ ----------
231
+ name : str
232
+ The field name.
233
+
234
+ Returns
235
+ -------
236
+ fields : list of int
237
+ A list of indices.
238
+ For most field names, the list will only contain one
239
+ element, but fields like *REFERENCE* are an exception.
240
+ """
241
+ name = name.upper()
242
+ indices = []
243
+ for i, (_, _, fname) in enumerate(self._field_pos):
244
+ if fname == name:
245
+ indices.append(i)
246
+ return indices
247
+
248
+ def set_field(self, name, content, subfield_dict=None):
249
+ """
250
+ Set a *GenBank* field with the given content.
251
+
252
+ If the field already exists in the file, the field is
253
+ overwritten, otherwise a new field is created at the end of
254
+ the file.
255
+
256
+ Parameters
257
+ ----------
258
+ name : str
259
+ The field name.
260
+ content : list of str
261
+ The content lines.
262
+ subfield_dict : dict of str -> str, optional
263
+ The subfields of the field.
264
+ The dictionary maps subfield names to the content lines of
265
+ the respective subfield.
266
+
267
+ Raises
268
+ ------
269
+ InvalidFileError
270
+ If the field occurs multiple times in the file.
271
+ In this case it is ambiguous which field to overwrite.
272
+ """
273
+ name = name.upper()
274
+ indices = self.get_indices(name)
275
+ if len(indices) > 1:
276
+ raise InvalidFileError(f"File contains multiple '{name}' fields")
277
+ elif len(indices) == 1:
278
+ # Replace existing entry
279
+ index = indices[0]
280
+ self[index] = name, content, subfield_dict
281
+ else:
282
+ # Add new entry as no entry exists yet
283
+ self.append(name, content, subfield_dict)
284
+
285
+ def __getitem__(self, index):
286
+ index = self._translate_idx(index)
287
+ start, stop, name = self._field_pos[index]
288
+
289
+ if name in ["FEATURES", "ORIGIN"]:
290
+ # For those two fields return the complete lines,
291
+ # beginning with the line after the field name
292
+ content = self._get_field_content(start + 1, stop, indent=0)
293
+ subfield_dict = OrderedDict()
294
+
295
+ else:
296
+ # For all metadata fields use the
297
+ # standard GenBank indentation (=12)
298
+ # Find subfields
299
+ subfield_dict = OrderedDict()
300
+ subfield_start = None
301
+ first_subfield_start = None
302
+ header = None
303
+ for i in range(start + 1, stop):
304
+ line = self.lines[i]
305
+ if len(line) != 0 and line[:12].strip() != "":
306
+ # New header -> new subfield
307
+ if first_subfield_start is None:
308
+ first_subfield_start = i
309
+ # Store previous subfield
310
+ if subfield_start is not None:
311
+ subfield_dict[header] = self._get_field_content(
312
+ subfield_start, i, indent=12
313
+ )
314
+ header = line[:12].strip()
315
+ subfield_start = i
316
+ # Store last subfield
317
+ if subfield_start is not None:
318
+ subfield_dict[header] = self._get_field_content(
319
+ subfield_start, stop, indent=12
320
+ )
321
+ # Only include lines in field content,
322
+ # that are not part of a subfield
323
+ if first_subfield_start is not None:
324
+ stop = first_subfield_start
325
+ content = self._get_field_content(start, stop, indent=12)
326
+
327
+ return name, content, subfield_dict
328
+
329
+ def __setitem__(self, index, item):
330
+ index = self._translate_idx(index)
331
+ if not isinstance(item, tuple):
332
+ raise TypeError(
333
+ "Expected a tuple of name, content and optionally subfields"
334
+ )
335
+ if len(item) == 2:
336
+ name, content = item
337
+ subfields = None
338
+ elif len(item) == 3:
339
+ name, content, subfields = item
340
+ else:
341
+ raise TypeError(
342
+ "Expected a tuple of name, content and optionally subfields"
343
+ )
344
+ inserted_lines = self._to_lines(name, content, subfields)
345
+
346
+ # Stop of field to be replaced is start of new field
347
+ start, old_stop, _ = self._field_pos[index]
348
+ # If not the last element is set,
349
+ # the following lines need to be added, too
350
+ if old_stop is not len(self.lines):
351
+ follow_lines = self.lines[old_stop:]
352
+ else:
353
+ follow_lines = []
354
+ self.lines = self.lines[:start] + inserted_lines + follow_lines
355
+ # Shift the start/stop indices of the following fields
356
+ # by the amount of created fields
357
+ shift = len(inserted_lines) - (old_stop - start)
358
+ for i in range(index + 1, len(self._field_pos)):
359
+ old_start, old_stop, fname = self._field_pos[i]
360
+ self._field_pos[i] = old_start + shift, old_stop + shift, fname
361
+ # Add new entry
362
+ self._field_pos[index] = start, start + len(inserted_lines), name.upper()
363
+
364
+ def __delitem__(self, index):
365
+ index = self._translate_idx(index)
366
+ start, stop, _ = self._field_pos[index]
367
+ # Shift the start/stop indices of the following fields
368
+ # by the amount of deleted fields
369
+ shift = stop - start
370
+ for i in range(index, len(self._field_pos)):
371
+ old_start, old_stop, name = self._field_pos[i]
372
+ self._field_pos[i] = old_start - shift, old_stop - shift, name
373
+ del self.lines[start:stop]
374
+ del self._field_pos[index]
375
+
376
+ def __len__(self):
377
+ return len(self._field_pos)
378
+
379
+ def insert(self, index, name, content, subfields=None):
380
+ """
381
+ Insert a *GenBank* field at the given position.
382
+
383
+ Parameters
384
+ ----------
385
+ index : int
386
+ The new field is inserted before the current field at this
387
+ index.
388
+ If the index is after the last field, the new field
389
+ is appended to the end of the file.
390
+ name : str
391
+ The field name.
392
+ content : list of str
393
+ The content lines.
394
+ subfields : dict of str -> str, optional
395
+ The subfields of the field.
396
+ The dictionary maps subfield names to the content lines of
397
+ the respective subfield.
398
+ """
399
+ index = self._translate_idx(index, length_exclusive=False)
400
+ inserted_lines = self._to_lines(name, content, subfields)
401
+
402
+ # Stop of previous field is start of new field
403
+ if index == 0:
404
+ start = 0
405
+ else:
406
+ _, start, _ = self._field_pos[index - 1]
407
+ # If the new lines are not inserted at the end,
408
+ # the following lines need to be added, too
409
+ if start is not len(self.lines):
410
+ follow_lines = self.lines[start:]
411
+ else:
412
+ follow_lines = []
413
+ self.lines = self.lines[:start] + inserted_lines + follow_lines
414
+ # Shift the start/stop indices of the following fields
415
+ # by the amount of created fields
416
+ shift = len(inserted_lines)
417
+ for i in range(index, len(self._field_pos)):
418
+ old_start, old_stop, fname = self._field_pos[i]
419
+ self._field_pos[i] = old_start + shift, old_stop + shift, fname
420
+ # Add new entry
421
+ self._field_pos.insert(
422
+ index, (start, start + len(inserted_lines), name.upper())
423
+ )
424
+
425
+ def append(self, name, content, subfields=None):
426
+ """
427
+ Create a new *GenBank* field at the end of the file.
428
+
429
+ Parameters
430
+ ----------
431
+ name : str
432
+ The field name.
433
+ content : list of str
434
+ The content lines.
435
+ subfields : dict of str -> str, optional
436
+ The subfields of the field.
437
+ The dictionary maps subfield names to the content lines of
438
+ the respective subfield.
439
+ """
440
+ self.insert(len(self), name, content, subfields)
441
+
442
+ def _find_field_indices(self):
443
+ """
444
+ Identify the start and exclusive stop indices of lines
445
+ corresponding to a field name for all fields in the file.
446
+ """
447
+ start = None
448
+ name = ""
449
+ self._field_pos = []
450
+ for i, line in enumerate(self.lines):
451
+ # Check if line contains a new major field
452
+ # (Header beginning from first column)
453
+ if len(line) != 0 and line[0] != " ":
454
+ if line[:2] != "//":
455
+ stop = i
456
+ if start is not None:
457
+ # Store previous field
458
+ self._field_pos.append((start, stop, name))
459
+ start = i
460
+ name = line[0:12].strip()
461
+ else:
462
+ # '//' means end of file
463
+ # -> Store last field
464
+ if start is not None:
465
+ stop = i
466
+ self._field_pos.append((start, stop, name))
467
+
468
+ def _get_field_content(self, start, stop, indent):
469
+ if indent == 0:
470
+ return self.lines[start:stop]
471
+ else:
472
+ return [line[12:] for line in self.lines[start:stop]]
473
+
474
+ def _to_lines(self, name, content, subfields):
475
+ """
476
+ Convert the field name, field content und subfield dictionary
477
+ into text lines
478
+ """
479
+ if subfields is None:
480
+ subfields = {}
481
+
482
+ name = name.strip().upper()
483
+ if len(name) == 0:
484
+ raise ValueError("Must give a non emtpy name")
485
+ subfields = OrderedDict(
486
+ {
487
+ subfield_name.upper().strip(): subfield_lines
488
+ for subfield_name, subfield_lines in subfields.items()
489
+ }
490
+ )
491
+
492
+ # Create lines for new field
493
+ if name == "FEATURES":
494
+ # Header line plus all actual feature lines
495
+ lines = copy.copy(content)
496
+ lines.insert(0, "FEATURES" + " " * 13 + "Location/Qualifiers")
497
+ elif name == "ORIGIN":
498
+ # Header line plus all actual sequence lines
499
+ lines = copy.copy(content)
500
+ lines.insert(0, "ORIGIN")
501
+ else:
502
+ name_column = []
503
+ content_column = []
504
+ # Create a line for the field name and empty lines
505
+ # for each additional line required by the content
506
+ name_column += [name] + [""] * (len(content) - 1)
507
+ content_column += content
508
+ for subfield_name, subfield_lines in subfields.items():
509
+ name_column += [" " + subfield_name] + [""] * (len(subfield_lines) - 1)
510
+ content_column += subfield_lines
511
+ lines = [
512
+ f"{n_col:12}{c_col}"
513
+ for n_col, c_col in zip(name_column, content_column)
514
+ ]
515
+
516
+ return lines
517
+
518
+ def _translate_idx(self, index, length_exclusive=True):
519
+ """
520
+ Check index boundaries and convert negative index to positive
521
+ index.
522
+ """
523
+ if index < 0:
524
+ new_index = len(self) + index
525
+ else:
526
+ new_index = index
527
+ if length_exclusive:
528
+ if new_index >= len(self):
529
+ raise IndexError(f"Index {index} is out of range")
530
+ else:
531
+ if new_index > len(self):
532
+ raise IndexError(f"Index {index} is out of range")
533
+ return new_index
534
+
535
+
536
+ class MultiFile(TextFile):
537
+ """
538
+ This class represents a file in *GenBank* or *GenPept* format,
539
+ that contains multiple entries, for more than one UID.
540
+
541
+ The information for each UID are appended to each other in such a
542
+ file.
543
+ Objects of this class can be iterated to obtain a
544
+ :class:`GenBankFile` for each entry in the file.
545
+
546
+ Examples
547
+ --------
548
+
549
+ >>> import os.path
550
+ >>> file_name = fetch_single_file(
551
+ ... ["1L2Y_A", "3O5R_A", "5UGO_A"],
552
+ ... os.path.join(path_to_directory, "multifile.gp"),
553
+ ... "protein", "gp"
554
+ ... )
555
+ >>> multi_file = MultiFile.read(file_name)
556
+ >>> for gp_file in multi_file:
557
+ ... print(get_accession(gp_file))
558
+ 1L2Y_A
559
+ 3O5R_A
560
+ 5UGO_A
561
+ """
562
+
563
+ def __iter__(self):
564
+ start_i = 0
565
+ for i in range(len(self.lines)):
566
+ line = self.lines[i]
567
+ if line.strip() == "//":
568
+ # Create file with lines corresponding to that file
569
+ file_content = "\n".join(self.lines[start_i : i + 1])
570
+ file = GenBankFile.read(io.StringIO(file_content))
571
+ # Reset file start index
572
+ start_i = i
573
+ yield file