biotite 1.5.0__cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +216 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +197 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +60 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +228 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +258 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +161 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +126 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1228 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +490 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +702 -0
  73. biotite/sequence/align/banded.cpython-313-x86_64-linux-gnu.so +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cpython-313-x86_64-linux-gnu.so +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cpython-313-x86_64-linux-gnu.so +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cpython-313-x86_64-linux-gnu.so +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cpython-313-x86_64-linux-gnu.so +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cpython-313-x86_64-linux-gnu.so +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cpython-313-x86_64-linux-gnu.so +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cpython-313-x86_64-linux-gnu.so +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cpython-313-x86_64-linux-gnu.so +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cpython-313-x86_64-linux-gnu.so +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cpython-313-x86_64-linux-gnu.so +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cpython-313-x86_64-linux-gnu.so +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +283 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cpython-313-x86_64-linux-gnu.so +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cpython-313-x86_64-linux-gnu.so +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cpython-313-x86_64-linux-gnu.so +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1562 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cpython-313-x86_64-linux-gnu.so +0 -0
  266. biotite/structure/bonds.pyx +2036 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cpython-313-x86_64-linux-gnu.so +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +310 -0
  271. biotite/structure/charges.cpython-313-x86_64-linux-gnu.so +0 -0
  272. biotite/structure/charges.pyx +520 -0
  273. biotite/structure/compare.py +683 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +591 -0
  278. biotite/structure/geometry.py +817 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +425 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +98 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +389 -0
  311. biotite/structure/io/pdb/file.py +1380 -0
  312. biotite/structure/io/pdb/hybrid36.cpython-313-x86_64-linux-gnu.so +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +674 -0
  319. biotite/structure/io/pdbx/cif.py +1091 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +362 -0
  322. biotite/structure/io/pdbx/convert.py +2113 -0
  323. biotite/structure/io/pdbx/encoding.cpython-313-x86_64-linux-gnu.so +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1078 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +716 -0
  337. biotite/structure/rings.py +451 -0
  338. biotite/structure/sasa.cpython-313-x86_64-linux-gnu.so +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +328 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +160 -0
  349. biotite/version.py +34 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.5.0.dist-info/METADATA +162 -0
  352. biotite-1.5.0.dist-info/RECORD +354 -0
  353. biotite-1.5.0.dist-info/WHEEL +6 -0
  354. biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,251 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ This module contains internally abstract classes for representing parts
7
+ of CIF/BinaryCIF files, such as categories and columns.
8
+ """
9
+
10
+ __name__ = "biotite.structure.io.pdbx"
11
+ __author__ = "Patrick Kunzmann"
12
+ __all__ = ["MaskValue"]
13
+
14
+ from abc import ABCMeta, abstractmethod
15
+ from collections.abc import MutableMapping
16
+ from enum import IntEnum
17
+ from biotite.file import DeserializationError, SerializationError
18
+
19
+
20
+ class MaskValue(IntEnum):
21
+ """
22
+ This enum type represents the possible values of a mask array.
23
+
24
+ - `PRESENT` : A value is present.
25
+ - `INAPPLICABLE` : For this row no value is applicable or
26
+ inappropriate (``.`` in *CIF*).
27
+ In some cases it may also refer to a default value for the
28
+ respective column.
29
+ - `MISSING` : For this row the value is missing or unknown
30
+ (``?`` in *CIF*).
31
+ """
32
+
33
+ PRESENT = 0
34
+ INAPPLICABLE = 1
35
+ MISSING = 2
36
+
37
+
38
+ class _Component(metaclass=ABCMeta):
39
+ """
40
+ Base class for all components in a CIF/BinaryCIF file.
41
+ """
42
+
43
+ @staticmethod
44
+ def subcomponent_class():
45
+ """
46
+ Get the class of the components that are stored in this component.
47
+
48
+ Returns
49
+ -------
50
+ subcomponent_class : type
51
+ The class of the subcomponent.
52
+ If this component already represents the lowest level, i.e.
53
+ it does not contain subcomponents, ``None`` is
54
+ returned.
55
+ """
56
+ return None
57
+
58
+ @staticmethod
59
+ def supercomponent_class():
60
+ """
61
+ Get the class of the component that contains this component.
62
+
63
+ Returns
64
+ -------
65
+ supercomponent_class : type
66
+ The class of the supercomponent.
67
+ If this component present already the highest level, i.e.
68
+ it is not contained in another component, ``None`` is
69
+ returned.
70
+ """
71
+ return None
72
+
73
+ @staticmethod
74
+ @abstractmethod
75
+ def deserialize(content):
76
+ """
77
+ Create this component by deserializing the given content.
78
+
79
+ Parameters
80
+ ----------
81
+ content : str or dict
82
+ The content to be deserialized.
83
+ The type of this parameter depends on the file format.
84
+ In case of *CIF* files, this is the text of the lines
85
+ that represent this component.
86
+ In case of *BinaryCIF* files, this is a dictionary
87
+ parsed from the *MessagePack* data.
88
+ """
89
+ raise NotImplementedError()
90
+
91
+ @abstractmethod
92
+ def serialize(self):
93
+ """
94
+ Convert this component into a Python object that can be written
95
+ to a file.
96
+
97
+ Returns
98
+ -------
99
+ content : str or dict
100
+ The content to be serialized.
101
+ The type of this return value depends on the file format.
102
+ In case of *CIF* files, this is the text of the lines
103
+ that represent this component.
104
+ In case of *BinaryCIF* files, this is a dictionary
105
+ that can be encoded into *MessagePack*.
106
+ """
107
+ raise NotImplementedError()
108
+
109
+ def __str__(self):
110
+ return str(self.serialize())
111
+
112
+
113
+ class _HierarchicalContainer(_Component, MutableMapping, metaclass=ABCMeta):
114
+ """
115
+ A container for hierarchical data in BinaryCIF files.
116
+ For example, the file contains multiple blocks, each block contains
117
+ multiple categories and each category contains multiple columns.
118
+
119
+ It uses lazy deserialization:
120
+ A component is only deserialized from the serialized data, if it
121
+ is accessed.
122
+ The deserialized component is then cached in the container.
123
+
124
+ Parameters
125
+ ----------
126
+ elements : dict, optional
127
+ The initial elements of the container.
128
+ By default no initial elements are added.
129
+ """
130
+
131
+ def __init__(self, elements=None):
132
+ if elements is None:
133
+ elements = {}
134
+ for element in elements.values():
135
+ if not isinstance(element, (dict, self.subcomponent_class())):
136
+ raise TypeError(
137
+ f"Expected '{self.subcomponent_class().__name__}', "
138
+ f"but got '{type(element).__name__}'"
139
+ )
140
+ self._elements = elements
141
+
142
+ @staticmethod
143
+ def _deserialize_elements(content, take_key_from):
144
+ """
145
+ Lazily deserialize the elements of this container.
146
+
147
+ Parameters
148
+ ----------
149
+ content : dict
150
+ The serialized content describing the elements for this
151
+ container.
152
+ take_key_from : str
153
+ The key in each element of `content`, whose value is used as
154
+ the key for the respective element.
155
+
156
+ Returns
157
+ -------
158
+ elements : dict
159
+ The elements that should be stored in this container.
160
+ This return value can be given to the constructor.
161
+ """
162
+ elements = {}
163
+ for serialized_element in content:
164
+ key = serialized_element[take_key_from]
165
+ # Lazy deserialization
166
+ # -> keep serialized for now and deserialize later if needed
167
+ elements[key] = serialized_element
168
+ return elements
169
+
170
+ def _serialize_elements(self, store_key_in=None):
171
+ """
172
+ Serialize the elements that are stored in this container.
173
+
174
+ Each element that is still serialized (due to lazy
175
+ deserialization), is kept as it is.
176
+
177
+ Parameters
178
+ ----------
179
+ store_key_in: str, optional
180
+ If given, the key of each element is stored as value in the
181
+ serialized element.
182
+ This is basically the reverse operation of `take_key_from` in
183
+ :meth:`_deserialize_elements()`.
184
+ """
185
+ serialized_elements = []
186
+ for key, element in self._elements.items():
187
+ if isinstance(element, self.subcomponent_class()):
188
+ try:
189
+ serialized_element = element.serialize()
190
+ except Exception:
191
+ raise SerializationError(f"Failed to serialize element '{key}'")
192
+ else:
193
+ # Element is already stored in serialized form
194
+ serialized_element = element
195
+ if store_key_in is not None:
196
+ serialized_element[store_key_in] = key
197
+ serialized_elements.append(serialized_element)
198
+ return serialized_elements
199
+
200
+ def __getitem__(self, key):
201
+ element = self._elements[key]
202
+ if not isinstance(element, self.subcomponent_class()):
203
+ # Element is stored in serialized form
204
+ # -> must be deserialized first
205
+ try:
206
+ element = self.subcomponent_class().deserialize(element)
207
+ except Exception:
208
+ raise DeserializationError(f"Failed to deserialize element '{key}'")
209
+ # Update container with deserialized object
210
+ self._elements[key] = element
211
+ return element
212
+
213
+ def __setitem__(self, key, element):
214
+ if isinstance(element, self.subcomponent_class()):
215
+ pass
216
+ elif isinstance(element, _HierarchicalContainer):
217
+ # A common mistake may be to use the wrong container type
218
+ raise TypeError(
219
+ f"Expected '{self.subcomponent_class().__name__}', "
220
+ f"but got '{type(element).__name__}'"
221
+ )
222
+ else:
223
+ try:
224
+ element = self.subcomponent_class().deserialize(element)
225
+ except Exception:
226
+ raise DeserializationError("Failed to deserialize given value")
227
+ self._elements[key] = element
228
+
229
+ def __delitem__(self, key):
230
+ del self._elements[key]
231
+
232
+ # Implement `__contains__()` explicitly,
233
+ # because the mixin method unnecessarily deserializes the value, if available
234
+ def __contains__(self, key):
235
+ return key in self._elements
236
+
237
+ def __iter__(self):
238
+ return iter(self._elements)
239
+
240
+ def __len__(self):
241
+ return len(self._elements)
242
+
243
+ def __eq__(self, other):
244
+ if not isinstance(other, type(self)):
245
+ return False
246
+ if set(self.keys()) != set(other.keys()):
247
+ return False
248
+ for key in self.keys():
249
+ if self[key] != other[key]:
250
+ return False
251
+ return True
@@ -0,0 +1,362 @@
1
+ __all__ = ["compress"]
2
+ __name__ = "biotite.structure.io.pdbx"
3
+ __author__ = "Patrick Kunzmann"
4
+
5
+ import itertools
6
+ import warnings
7
+ import msgpack
8
+ import numpy as np
9
+ import biotite.structure.io.pdbx.bcif as bcif
10
+ from biotite.structure.io.pdbx.bcif import _encode_numpy as encode_numpy
11
+ from biotite.structure.io.pdbx.encoding import (
12
+ ByteArrayEncoding,
13
+ DeltaEncoding,
14
+ FixedPointEncoding,
15
+ IntegerPackingEncoding,
16
+ RunLengthEncoding,
17
+ StringArrayEncoding,
18
+ )
19
+
20
+
21
+ def compress(data, float_tolerance=None, rtol=1e-6, atol=1e-4):
22
+ """
23
+ Try to reduce the size of a *BinaryCIF* file (or block, category, etc.) by testing
24
+ different data encodings for each data array and selecting the one, which results in
25
+ the smallest size.
26
+
27
+ Parameters
28
+ ----------
29
+ data : BinaryCIFFile or BinaryCIFBlock or BinaryCIFCategory or BinaryCIFColumn or BinaryCIFData
30
+ The data to compress.
31
+ float_tolerance : float, optional
32
+ The relative error that is accepted when compressing floating point numbers.
33
+ DEPRECATED: Use `rtol` instead.
34
+ rtol, atol : float, optional
35
+ The compression factor of floating point numbers is chosen such that
36
+ either the relative (`rtol`) or absolute (`atol`) tolerance is fulfilled
37
+ for each value, i.e. the difference between the compressed and uncompressed
38
+ value is smaller than the tolerance.
39
+
40
+ Returns
41
+ -------
42
+ compressed_file : BinaryCIFFile or BinaryCIFBlock or BinaryCIFCategory or BinaryCIFColumn or BinaryCIFData
43
+ The compressed data with the same type as the input data.
44
+ If no improved compression is found for a :class:`BinaryCIFData` array,
45
+ the input data is kept.
46
+ Hence, the return value is no deep copy of the input data.
47
+
48
+ Examples
49
+ --------
50
+
51
+ >>> from io import BytesIO
52
+ >>> pdbx_file = BinaryCIFFile()
53
+ >>> set_structure(pdbx_file, atom_array_stack)
54
+ >>> # Write uncompressed file
55
+ >>> uncompressed_file = BytesIO()
56
+ >>> pdbx_file.write(uncompressed_file)
57
+ >>> _ = uncompressed_file.seek(0)
58
+ >>> print(f"{len(uncompressed_file.read()) // 1000} KB")
59
+ 937 KB
60
+ >>> # Write compressed file
61
+ >>> pdbx_file = compress(pdbx_file)
62
+ >>> compressed_file = BytesIO()
63
+ >>> pdbx_file.write(compressed_file)
64
+ >>> _ = compressed_file.seek(0)
65
+ >>> print(f"{len(compressed_file.read()) // 1000} KB")
66
+ 114 KB
67
+ """
68
+ if float_tolerance is not None:
69
+ warnings.warn(
70
+ "The 'float_tolerance' parameter is deprecated, use 'rtol' instead",
71
+ DeprecationWarning,
72
+ )
73
+
74
+ match type(data):
75
+ case bcif.BinaryCIFFile:
76
+ return _compress_file(data, rtol, atol)
77
+ case bcif.BinaryCIFBlock:
78
+ return _compress_block(data, rtol, atol)
79
+ case bcif.BinaryCIFCategory:
80
+ return _compress_category(data, rtol, atol)
81
+ case bcif.BinaryCIFColumn:
82
+ return _compress_column(data, rtol, atol)
83
+ case bcif.BinaryCIFData:
84
+ return _compress_data(data, rtol, atol)
85
+ case _:
86
+ raise TypeError(f"Unsupported type {type(data).__name__}")
87
+
88
+
89
+ def _compress_file(bcif_file, rtol, atol):
90
+ compressed_file = bcif.BinaryCIFFile()
91
+ for block_name, bcif_block in bcif_file.items():
92
+ try:
93
+ compressed_block = _compress_block(bcif_block, rtol, atol)
94
+ except Exception:
95
+ raise ValueError(f"Failed to compress block '{block_name}'")
96
+ compressed_file[block_name] = compressed_block
97
+ return compressed_file
98
+
99
+
100
+ def _compress_block(bcif_block, rtol, atol):
101
+ compressed_block = bcif.BinaryCIFBlock()
102
+ for category_name, bcif_category in bcif_block.items():
103
+ try:
104
+ compressed_category = _compress_category(bcif_category, rtol, atol)
105
+ except Exception:
106
+ raise ValueError(f"Failed to compress category '{category_name}'")
107
+ compressed_block[category_name] = compressed_category
108
+ return compressed_block
109
+
110
+
111
+ def _compress_category(bcif_category, rtol, atol):
112
+ compressed_category = bcif.BinaryCIFCategory()
113
+ for column_name, bcif_column in bcif_category.items():
114
+ try:
115
+ compressed_column = _compress_column(bcif_column, rtol, atol)
116
+ except Exception:
117
+ raise ValueError(f"Failed to compress column '{column_name}'")
118
+ compressed_category[column_name] = compressed_column
119
+ return compressed_category
120
+
121
+
122
+ def _compress_column(bcif_column, rtol, atol):
123
+ data = _compress_data(bcif_column.data, rtol, atol)
124
+ if bcif_column.mask is not None:
125
+ mask = _compress_data(bcif_column.mask, rtol, atol)
126
+ else:
127
+ mask = None
128
+ return bcif.BinaryCIFColumn(data, mask)
129
+
130
+
131
+ def _compress_data(bcif_data, rtol, atol):
132
+ array = bcif_data.array
133
+ if len(array) == 1:
134
+ # No need to compress a single value -> Use default uncompressed encoding
135
+ return bcif.BinaryCIFData(array)
136
+
137
+ if np.issubdtype(array.dtype, np.str_):
138
+ # Leave encoding empty for now, as it is explicitly set later
139
+ encoding = StringArrayEncoding(data_encoding=[], offset_encoding=[])
140
+ # Run encode to initialize the data and offset arrays
141
+ indices = encoding.encode(array)
142
+ offsets = np.cumsum([0] + [len(s) for s in encoding.strings])
143
+ encoding.data_encoding = _find_best_integer_compression(indices)
144
+ encoding.offset_encoding = _find_best_integer_compression(offsets)
145
+ return bcif.BinaryCIFData(array, [encoding])
146
+
147
+ elif np.issubdtype(array.dtype, np.floating):
148
+ if not np.isfinite(array).all():
149
+ # NaN/inf values cannot be represented by integers
150
+ # -> do not use integer encoding
151
+ return bcif.BinaryCIFData(array, [ByteArrayEncoding()])
152
+ to_integer_encoding = FixedPointEncoding(
153
+ 10 ** _get_decimal_places(array, rtol, atol)
154
+ )
155
+ try:
156
+ integer_array = to_integer_encoding.encode(array)
157
+ except ValueError:
158
+ # With the given tolerances integer underflow/overflow would occur
159
+ # -> do not use integer encoding
160
+ return bcif.BinaryCIFData(array, [ByteArrayEncoding()])
161
+ else:
162
+ best_encoding = _find_best_integer_compression(integer_array)
163
+ compressed_data = bcif.BinaryCIFData(
164
+ array, [to_integer_encoding] + best_encoding
165
+ )
166
+ uncompressed_data = bcif.BinaryCIFData(array, [ByteArrayEncoding()])
167
+ if _data_size_in_file(compressed_data) < _data_size_in_file(
168
+ uncompressed_data
169
+ ):
170
+ return compressed_data
171
+ else:
172
+ # The float array is smaller -> encode it directly as bytes
173
+ return uncompressed_data
174
+
175
+ elif np.issubdtype(array.dtype, np.integer):
176
+ array = _to_smallest_integer_type(array)
177
+ encodings = _find_best_integer_compression(array)
178
+ return bcif.BinaryCIFData(array, encodings)
179
+
180
+ else:
181
+ raise TypeError(f"Unsupported data type {array.dtype}")
182
+
183
+
184
+ def _find_best_integer_compression(array):
185
+ """
186
+ Try different data encodings on an integer array and return the one that results in
187
+ the smallest size.
188
+ """
189
+ best_encoding_sequence = None
190
+ smallest_size = np.inf
191
+
192
+ for use_delta in [False, True]:
193
+ if use_delta:
194
+ encoding = DeltaEncoding()
195
+ array_after_delta = encoding.encode(array)
196
+ encodings_after_delta = [encoding]
197
+ else:
198
+ encodings_after_delta = []
199
+ array_after_delta = array
200
+ for use_run_length in [False, True]:
201
+ # Use encoded data from previous step to save time
202
+ if use_run_length:
203
+ encoding = RunLengthEncoding()
204
+ array_after_rle = encoding.encode(array_after_delta)
205
+ encodings_after_rle = encodings_after_delta + [encoding]
206
+ else:
207
+ encodings_after_rle = encodings_after_delta
208
+ array_after_rle = array_after_delta
209
+ for packed_byte_count in [None, 1, 2]:
210
+ if packed_byte_count is not None:
211
+ # Quickly check this heuristic
212
+ # to avoid computing an exploding packed data array
213
+ if (
214
+ _estimate_packed_length(array_after_rle, packed_byte_count)
215
+ >= array_after_rle.nbytes
216
+ ):
217
+ # Packing would not reduce the size
218
+ continue
219
+ encoding = IntegerPackingEncoding(packed_byte_count)
220
+ array_after_packing = encoding.encode(array_after_rle)
221
+ encodings_after_packing = encodings_after_rle + [encoding]
222
+ else:
223
+ encodings_after_packing = encodings_after_rle
224
+ array_after_packing = array_after_rle
225
+ encoding = ByteArrayEncoding()
226
+ encoded_array = encoding.encode(array_after_packing)
227
+ encodings = encodings_after_packing + [encoding]
228
+ # Pack data directly instead of using the BinaryCIFData class
229
+ # to avoid the unnecessary re-encoding of the array,
230
+ # as it is already available in 'encoded_array'
231
+ serialized_encoding = [enc.serialize() for enc in encodings]
232
+ serialized_data = {
233
+ "data": encoded_array,
234
+ "encoding": serialized_encoding,
235
+ }
236
+ size = _data_size_in_file(serialized_data)
237
+ if size < smallest_size:
238
+ best_encoding_sequence = encodings
239
+ smallest_size = size
240
+ return best_encoding_sequence
241
+
242
+
243
+ def _estimate_packed_length(array, packed_byte_count):
244
+ """
245
+ Estimate the length of an integer array after packing it with a given number of
246
+ bytes.
247
+
248
+ Parameters
249
+ ----------
250
+ array : numpy.ndarray
251
+ The array to pack.
252
+ packed_byte_count : int
253
+ The number of bytes used for packing.
254
+
255
+ Returns
256
+ -------
257
+ length : int
258
+ The estimated length of the packed array.
259
+ """
260
+ # Use int64 to avoid integer overflow in the following line
261
+ max_val_per_element = np.int64(2 ** (8 * packed_byte_count))
262
+ n_bytes_per_element = packed_byte_count * (np.abs(array // max_val_per_element) + 1)
263
+ return np.sum(n_bytes_per_element, dtype=np.int64)
264
+
265
+
266
+ def _to_smallest_integer_type(array):
267
+ """
268
+ Convert an integer array to the smallest possible integer type, that is still able
269
+ to represent all values in the array.
270
+
271
+ Parameters
272
+ ----------
273
+ array : numpy.ndarray
274
+ The array to convert.
275
+
276
+ Returns
277
+ -------
278
+ array : numpy.ndarray
279
+ The converted array.
280
+ """
281
+ if array.min() >= 0:
282
+ for dtype in [np.uint8, np.uint16, np.uint32, np.uint64]:
283
+ if np.all(array <= np.iinfo(dtype).max):
284
+ return array.astype(dtype)
285
+ for dtype in [np.int8, np.int16, np.int32, np.int64]:
286
+ if np.all(array >= np.iinfo(dtype).min) and np.all(
287
+ array <= np.iinfo(dtype).max
288
+ ):
289
+ return array.astype(dtype)
290
+ raise ValueError("Array is out of bounds for all integer types")
291
+
292
+
293
+ def _data_size_in_file(data):
294
+ """
295
+ Get the size of the data, it would have when written into a *BinaryCIF* file.
296
+
297
+ Parameters
298
+ ----------
299
+ data : BinaryCIFData or dict
300
+ The data array whose size is measured.
301
+ Can be either a :class:`BinaryCIFData` object or already serialized data.
302
+
303
+ Returns
304
+ -------
305
+ size : int
306
+ The size of the data array in the file in bytes.
307
+ """
308
+ if isinstance(data, bcif.BinaryCIFData):
309
+ data = data.serialize()
310
+ bytes_in_file = msgpack.packb(data, use_bin_type=True, default=encode_numpy)
311
+ return len(bytes_in_file)
312
+
313
+
314
+ def _get_decimal_places(array, rtol, atol):
315
+ """
316
+ Get the number of decimal places in a floating point array.
317
+
318
+ Parameters
319
+ ----------
320
+ array : numpy.ndarray
321
+ The array to analyze.
322
+ rtol, atol : float, optional
323
+ The relative and absolute tolerance allowed when the values are cut off after
324
+ the returned number of decimal places.
325
+
326
+ Returns
327
+ -------
328
+ decimals : int
329
+ The number of decimal places.
330
+ """
331
+ if rtol <= 0 and atol <= 0:
332
+ raise ValueError("At least one of 'rtol' and 'atol' must be greater than 0")
333
+ # 0 would give NaN when rounding on decimals
334
+ array = array[array != 0]
335
+ for decimals in itertools.count(start=min(0, -_order_magnitude(array))):
336
+ error = np.abs(np.round(array, decimals) - array)
337
+ if decimals == 100:
338
+ raise
339
+ if np.all((error < rtol * np.abs(array)) | (error < atol)):
340
+ return decimals
341
+
342
+
343
+ def _order_magnitude(array):
344
+ """
345
+ Get the order of magnitude of floating point values.
346
+
347
+ Parameters
348
+ ----------
349
+ array : ndarray, dtype=float
350
+ The value to analyze.
351
+
352
+ Returns
353
+ -------
354
+ magnitude : int
355
+ The order of magnitude, i.e. the maximum exponent a number in the array would
356
+ have in scientific notation, if only one digit is left of the decimal point.
357
+ """
358
+ array = array[array != 0]
359
+ if len(array) == 0:
360
+ # No non-zero values -> define order of magnitude as 0
361
+ return 0
362
+ return int(np.max(np.floor(np.log10(np.abs(array)))).item())