biotite 1.1.0__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (332) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +159 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +452 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +57 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +206 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/entrez/__init__.py +15 -0
  34. biotite/database/entrez/check.py +60 -0
  35. biotite/database/entrez/dbnames.py +91 -0
  36. biotite/database/entrez/download.py +229 -0
  37. biotite/database/entrez/key.py +44 -0
  38. biotite/database/entrez/query.py +262 -0
  39. biotite/database/error.py +16 -0
  40. biotite/database/pubchem/__init__.py +21 -0
  41. biotite/database/pubchem/download.py +258 -0
  42. biotite/database/pubchem/error.py +20 -0
  43. biotite/database/pubchem/query.py +830 -0
  44. biotite/database/pubchem/throttle.py +98 -0
  45. biotite/database/rcsb/__init__.py +13 -0
  46. biotite/database/rcsb/download.py +159 -0
  47. biotite/database/rcsb/query.py +964 -0
  48. biotite/database/uniprot/__init__.py +13 -0
  49. biotite/database/uniprot/check.py +40 -0
  50. biotite/database/uniprot/download.py +129 -0
  51. biotite/database/uniprot/query.py +293 -0
  52. biotite/file.py +232 -0
  53. biotite/sequence/__init__.py +84 -0
  54. biotite/sequence/align/__init__.py +203 -0
  55. biotite/sequence/align/alignment.py +680 -0
  56. biotite/sequence/align/banded.cp313-win_amd64.pyd +0 -0
  57. biotite/sequence/align/banded.pyx +652 -0
  58. biotite/sequence/align/buckets.py +71 -0
  59. biotite/sequence/align/cigar.py +425 -0
  60. biotite/sequence/align/kmeralphabet.cp313-win_amd64.pyd +0 -0
  61. biotite/sequence/align/kmeralphabet.pyx +595 -0
  62. biotite/sequence/align/kmersimilarity.cp313-win_amd64.pyd +0 -0
  63. biotite/sequence/align/kmersimilarity.pyx +233 -0
  64. biotite/sequence/align/kmertable.cp313-win_amd64.pyd +0 -0
  65. biotite/sequence/align/kmertable.pyx +3411 -0
  66. biotite/sequence/align/localgapped.cp313-win_amd64.pyd +0 -0
  67. biotite/sequence/align/localgapped.pyx +892 -0
  68. biotite/sequence/align/localungapped.cp313-win_amd64.pyd +0 -0
  69. biotite/sequence/align/localungapped.pyx +279 -0
  70. biotite/sequence/align/matrix.py +622 -0
  71. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  72. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  73. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  74. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  75. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  76. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  77. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  78. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  79. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  80. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  81. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  82. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  83. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  84. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  85. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  86. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  87. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  88. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  89. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  94. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  95. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  99. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  100. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  101. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  102. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  103. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  104. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  105. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  106. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  107. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  108. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  109. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  110. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  111. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  112. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  113. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  114. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  115. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  116. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  117. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  118. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  119. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  120. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  121. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  122. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  155. biotite/sequence/align/matrix_data/PB.license +21 -0
  156. biotite/sequence/align/matrix_data/PB.mat +18 -0
  157. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  158. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  159. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  160. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  161. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  162. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  163. biotite/sequence/align/multiple.cp313-win_amd64.pyd +0 -0
  164. biotite/sequence/align/multiple.pyx +620 -0
  165. biotite/sequence/align/pairwise.cp313-win_amd64.pyd +0 -0
  166. biotite/sequence/align/pairwise.pyx +587 -0
  167. biotite/sequence/align/permutation.cp313-win_amd64.pyd +0 -0
  168. biotite/sequence/align/permutation.pyx +313 -0
  169. biotite/sequence/align/primes.txt +821 -0
  170. biotite/sequence/align/selector.cp313-win_amd64.pyd +0 -0
  171. biotite/sequence/align/selector.pyx +954 -0
  172. biotite/sequence/align/statistics.py +264 -0
  173. biotite/sequence/align/tracetable.cp313-win_amd64.pyd +0 -0
  174. biotite/sequence/align/tracetable.pxd +64 -0
  175. biotite/sequence/align/tracetable.pyx +370 -0
  176. biotite/sequence/alphabet.py +555 -0
  177. biotite/sequence/annotation.py +830 -0
  178. biotite/sequence/codec.cp313-win_amd64.pyd +0 -0
  179. biotite/sequence/codec.pyx +155 -0
  180. biotite/sequence/codon.py +477 -0
  181. biotite/sequence/codon_tables.txt +202 -0
  182. biotite/sequence/graphics/__init__.py +33 -0
  183. biotite/sequence/graphics/alignment.py +1115 -0
  184. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  185. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  186. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  187. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  188. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  189. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  190. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  191. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  192. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  193. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  194. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  195. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  196. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  197. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  198. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  199. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  200. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  201. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  202. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  203. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  204. biotite/sequence/graphics/colorschemes.py +170 -0
  205. biotite/sequence/graphics/dendrogram.py +229 -0
  206. biotite/sequence/graphics/features.py +544 -0
  207. biotite/sequence/graphics/logo.py +104 -0
  208. biotite/sequence/graphics/plasmid.py +712 -0
  209. biotite/sequence/io/__init__.py +12 -0
  210. biotite/sequence/io/fasta/__init__.py +22 -0
  211. biotite/sequence/io/fasta/convert.py +284 -0
  212. biotite/sequence/io/fasta/file.py +265 -0
  213. biotite/sequence/io/fastq/__init__.py +19 -0
  214. biotite/sequence/io/fastq/convert.py +117 -0
  215. biotite/sequence/io/fastq/file.py +507 -0
  216. biotite/sequence/io/genbank/__init__.py +17 -0
  217. biotite/sequence/io/genbank/annotation.py +269 -0
  218. biotite/sequence/io/genbank/file.py +573 -0
  219. biotite/sequence/io/genbank/metadata.py +336 -0
  220. biotite/sequence/io/genbank/sequence.py +171 -0
  221. biotite/sequence/io/general.py +201 -0
  222. biotite/sequence/io/gff/__init__.py +26 -0
  223. biotite/sequence/io/gff/convert.py +128 -0
  224. biotite/sequence/io/gff/file.py +450 -0
  225. biotite/sequence/phylo/__init__.py +36 -0
  226. biotite/sequence/phylo/nj.cp313-win_amd64.pyd +0 -0
  227. biotite/sequence/phylo/nj.pyx +221 -0
  228. biotite/sequence/phylo/tree.cp313-win_amd64.pyd +0 -0
  229. biotite/sequence/phylo/tree.pyx +1169 -0
  230. biotite/sequence/phylo/upgma.cp313-win_amd64.pyd +0 -0
  231. biotite/sequence/phylo/upgma.pyx +164 -0
  232. biotite/sequence/profile.py +567 -0
  233. biotite/sequence/search.py +118 -0
  234. biotite/sequence/seqtypes.py +713 -0
  235. biotite/sequence/sequence.py +374 -0
  236. biotite/setup_ccd.py +197 -0
  237. biotite/structure/__init__.py +133 -0
  238. biotite/structure/alphabet/__init__.py +25 -0
  239. biotite/structure/alphabet/encoder.py +332 -0
  240. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  241. biotite/structure/alphabet/i3d.py +110 -0
  242. biotite/structure/alphabet/layers.py +86 -0
  243. biotite/structure/alphabet/pb.license +21 -0
  244. biotite/structure/alphabet/pb.py +171 -0
  245. biotite/structure/alphabet/unkerasify.py +122 -0
  246. biotite/structure/atoms.py +1554 -0
  247. biotite/structure/basepairs.py +1404 -0
  248. biotite/structure/bonds.cp313-win_amd64.pyd +0 -0
  249. biotite/structure/bonds.pyx +1972 -0
  250. biotite/structure/box.py +588 -0
  251. biotite/structure/celllist.cp313-win_amd64.pyd +0 -0
  252. biotite/structure/celllist.pyx +849 -0
  253. biotite/structure/chains.py +314 -0
  254. biotite/structure/charges.cp313-win_amd64.pyd +0 -0
  255. biotite/structure/charges.pyx +520 -0
  256. biotite/structure/compare.py +274 -0
  257. biotite/structure/density.py +109 -0
  258. biotite/structure/dotbracket.py +214 -0
  259. biotite/structure/error.py +39 -0
  260. biotite/structure/filter.py +590 -0
  261. biotite/structure/geometry.py +655 -0
  262. biotite/structure/graphics/__init__.py +13 -0
  263. biotite/structure/graphics/atoms.py +243 -0
  264. biotite/structure/graphics/rna.py +295 -0
  265. biotite/structure/hbond.py +428 -0
  266. biotite/structure/info/__init__.py +24 -0
  267. biotite/structure/info/atom_masses.json +121 -0
  268. biotite/structure/info/atoms.py +81 -0
  269. biotite/structure/info/bonds.py +149 -0
  270. biotite/structure/info/ccd.py +202 -0
  271. biotite/structure/info/components.bcif +0 -0
  272. biotite/structure/info/groups.py +131 -0
  273. biotite/structure/info/masses.py +121 -0
  274. biotite/structure/info/misc.py +138 -0
  275. biotite/structure/info/radii.py +197 -0
  276. biotite/structure/info/standardize.py +186 -0
  277. biotite/structure/integrity.py +215 -0
  278. biotite/structure/io/__init__.py +29 -0
  279. biotite/structure/io/dcd/__init__.py +13 -0
  280. biotite/structure/io/dcd/file.py +67 -0
  281. biotite/structure/io/general.py +243 -0
  282. biotite/structure/io/gro/__init__.py +14 -0
  283. biotite/structure/io/gro/file.py +344 -0
  284. biotite/structure/io/mol/__init__.py +20 -0
  285. biotite/structure/io/mol/convert.py +112 -0
  286. biotite/structure/io/mol/ctab.py +415 -0
  287. biotite/structure/io/mol/header.py +120 -0
  288. biotite/structure/io/mol/mol.py +149 -0
  289. biotite/structure/io/mol/sdf.py +914 -0
  290. biotite/structure/io/netcdf/__init__.py +13 -0
  291. biotite/structure/io/netcdf/file.py +64 -0
  292. biotite/structure/io/pdb/__init__.py +20 -0
  293. biotite/structure/io/pdb/convert.py +307 -0
  294. biotite/structure/io/pdb/file.py +1290 -0
  295. biotite/structure/io/pdb/hybrid36.cp313-win_amd64.pyd +0 -0
  296. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  297. biotite/structure/io/pdbqt/__init__.py +15 -0
  298. biotite/structure/io/pdbqt/convert.py +113 -0
  299. biotite/structure/io/pdbqt/file.py +688 -0
  300. biotite/structure/io/pdbx/__init__.py +23 -0
  301. biotite/structure/io/pdbx/bcif.py +656 -0
  302. biotite/structure/io/pdbx/cif.py +1075 -0
  303. biotite/structure/io/pdbx/component.py +245 -0
  304. biotite/structure/io/pdbx/compress.py +321 -0
  305. biotite/structure/io/pdbx/convert.py +1745 -0
  306. biotite/structure/io/pdbx/encoding.cp313-win_amd64.pyd +0 -0
  307. biotite/structure/io/pdbx/encoding.pyx +1031 -0
  308. biotite/structure/io/trajfile.py +693 -0
  309. biotite/structure/io/trr/__init__.py +13 -0
  310. biotite/structure/io/trr/file.py +43 -0
  311. biotite/structure/io/xtc/__init__.py +13 -0
  312. biotite/structure/io/xtc/file.py +43 -0
  313. biotite/structure/mechanics.py +73 -0
  314. biotite/structure/molecules.py +352 -0
  315. biotite/structure/pseudoknots.py +628 -0
  316. biotite/structure/rdf.py +245 -0
  317. biotite/structure/repair.py +304 -0
  318. biotite/structure/residues.py +572 -0
  319. biotite/structure/sasa.cp313-win_amd64.pyd +0 -0
  320. biotite/structure/sasa.pyx +322 -0
  321. biotite/structure/segments.py +178 -0
  322. biotite/structure/sequence.py +111 -0
  323. biotite/structure/sse.py +308 -0
  324. biotite/structure/superimpose.py +689 -0
  325. biotite/structure/transform.py +530 -0
  326. biotite/structure/util.py +168 -0
  327. biotite/version.py +16 -0
  328. biotite/visualize.py +265 -0
  329. biotite-1.1.0.dist-info/METADATA +190 -0
  330. biotite-1.1.0.dist-info/RECORD +332 -0
  331. biotite-1.1.0.dist-info/WHEEL +4 -0
  332. biotite-1.1.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,245 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ This module contains internally abstract classes for representing parts
7
+ of CIF/BinaryCIF files, such as categories and columns.
8
+ """
9
+
10
+ __name__ = "biotite.structure.io.pdbx"
11
+ __author__ = "Patrick Kunzmann"
12
+ __all__ = ["MaskValue"]
13
+
14
+ from abc import ABCMeta, abstractmethod
15
+ from collections.abc import MutableMapping
16
+ from enum import IntEnum
17
+ from biotite.file import DeserializationError, SerializationError
18
+
19
+
20
+ class MaskValue(IntEnum):
21
+ """
22
+ This enum type represents the possible values of a mask array.
23
+
24
+ - `PRESENT` : A value is present.
25
+ - `INAPPLICABLE` : For this row no value is applicable or
26
+ inappropriate (``.`` in *CIF*).
27
+ In some cases it may also refer to a default value for the
28
+ respective column.
29
+ - `MISSING` : For this row the value is missing or unknown
30
+ (``?`` in *CIF*).
31
+ """
32
+
33
+ PRESENT = 0
34
+ INAPPLICABLE = 1
35
+ MISSING = 2
36
+
37
+
38
+ class _Component(metaclass=ABCMeta):
39
+ """
40
+ Base class for all components in a CIF/BinaryCIF file.
41
+ """
42
+
43
+ @staticmethod
44
+ def subcomponent_class():
45
+ """
46
+ Get the class of the components that are stored in this component.
47
+
48
+ Returns
49
+ -------
50
+ subcomponent_class : type
51
+ The class of the subcomponent.
52
+ If this component already represents the lowest level, i.e.
53
+ it does not contain subcomponents, ``None`` is
54
+ returned.
55
+ """
56
+ return None
57
+
58
+ @staticmethod
59
+ def supercomponent_class():
60
+ """
61
+ Get the class of the component that contains this component.
62
+
63
+ Returns
64
+ -------
65
+ supercomponent_class : type
66
+ The class of the supercomponent.
67
+ If this component present already the highest level, i.e.
68
+ it is not contained in another component, ``None`` is
69
+ returned.
70
+ """
71
+ return None
72
+
73
+ @staticmethod
74
+ @abstractmethod
75
+ def deserialize(content):
76
+ """
77
+ Create this component by deserializing the given content.
78
+
79
+ Parameters
80
+ ----------
81
+ content : str or dict
82
+ The content to be deserialized.
83
+ The type of this parameter depends on the file format.
84
+ In case of *CIF* files, this is the text of the lines
85
+ that represent this component.
86
+ In case of *BinaryCIF* files, this is a dictionary
87
+ parsed from the *MessagePack* data.
88
+ """
89
+ raise NotImplementedError()
90
+
91
+ @abstractmethod
92
+ def serialize(self):
93
+ """
94
+ Convert this component into a Python object that can be written
95
+ to a file.
96
+
97
+ Returns
98
+ -------
99
+ content : str or dict
100
+ The content to be serialized.
101
+ The type of this return value depends on the file format.
102
+ In case of *CIF* files, this is the text of the lines
103
+ that represent this component.
104
+ In case of *BinaryCIF* files, this is a dictionary
105
+ that can be encoded into *MessagePack*.
106
+ """
107
+ raise NotImplementedError()
108
+
109
+ def __str__(self):
110
+ return str(self.serialize())
111
+
112
+
113
+ class _HierarchicalContainer(_Component, MutableMapping, metaclass=ABCMeta):
114
+ """
115
+ A container for hierarchical data in BinaryCIF files.
116
+ For example, the file contains multiple blocks, each block contains
117
+ multiple categories and each category contains multiple columns.
118
+
119
+ It uses lazy deserialization:
120
+ A component is only deserialized from the serialized data, if it
121
+ is accessed.
122
+ The deserialized component is then cached in the container.
123
+ """
124
+
125
+ def __init__(self, elements=None):
126
+ if elements is None:
127
+ elements = {}
128
+ for element in elements.values():
129
+ if not isinstance(element, (dict, self.subcomponent_class())):
130
+ raise TypeError(
131
+ f"Expected '{self.subcomponent_class().__name__}', "
132
+ f"but got '{type(element).__name__}'"
133
+ )
134
+ self._elements = elements
135
+
136
+ @staticmethod
137
+ def _deserialize_elements(content, take_key_from):
138
+ """
139
+ Lazily deserialize the elements of this container.
140
+
141
+ Parameters
142
+ ----------
143
+ content : dict
144
+ The serialized content describing the elements for this
145
+ container.
146
+ take_key_from : str
147
+ The key in each element of `content`, whose value is used as
148
+ the key for the respective element.
149
+
150
+ Returns
151
+ -------
152
+ elements : dict
153
+ The elements that should be stored in this container.
154
+ This return value can be given to the constructor.
155
+ """
156
+ elements = {}
157
+ for serialized_element in content:
158
+ key = serialized_element[take_key_from]
159
+ # Lazy deserialization
160
+ # -> keep serialized for now and deserialize later if needed
161
+ elements[key] = serialized_element
162
+ return elements
163
+
164
+ def _serialize_elements(self, store_key_in=None):
165
+ """
166
+ Serialize the elements that are stored in this container.
167
+
168
+ Each element that is still serialized (due to lazy
169
+ deserialization), is kept as it is.
170
+
171
+ Parameters
172
+ ----------
173
+ store_key_in: str, optional
174
+ If given, the key of each element is stored as value in the
175
+ serialized element.
176
+ This is basically the reverse operation of `take_key_from` in
177
+ :meth:`_deserialize_elements()`.
178
+ """
179
+ serialized_elements = []
180
+ for key, element in self._elements.items():
181
+ if isinstance(element, self.subcomponent_class()):
182
+ try:
183
+ serialized_element = element.serialize()
184
+ except Exception:
185
+ raise SerializationError(f"Failed to serialize element '{key}'")
186
+ else:
187
+ # Element is already stored in serialized form
188
+ serialized_element = element
189
+ if store_key_in is not None:
190
+ serialized_element[store_key_in] = key
191
+ serialized_elements.append(serialized_element)
192
+ return serialized_elements
193
+
194
+ def __getitem__(self, key):
195
+ element = self._elements[key]
196
+ if not isinstance(element, self.subcomponent_class()):
197
+ # Element is stored in serialized form
198
+ # -> must be deserialized first
199
+ try:
200
+ element = self.subcomponent_class().deserialize(element)
201
+ except Exception:
202
+ raise DeserializationError(f"Failed to deserialize element '{key}'")
203
+ # Update container with deserialized object
204
+ self._elements[key] = element
205
+ return element
206
+
207
+ def __setitem__(self, key, element):
208
+ if isinstance(element, self.subcomponent_class()):
209
+ pass
210
+ elif isinstance(element, _HierarchicalContainer):
211
+ # A common mistake may be to use the wrong container type
212
+ raise TypeError(
213
+ f"Expected '{self.subcomponent_class().__name__}', "
214
+ f"but got '{type(element).__name__}'"
215
+ )
216
+ else:
217
+ try:
218
+ element = self.subcomponent_class().deserialize(element)
219
+ except Exception:
220
+ raise DeserializationError("Failed to deserialize given value")
221
+ self._elements[key] = element
222
+
223
+ def __delitem__(self, key):
224
+ del self._elements[key]
225
+
226
+ # Implement `__contains__()` explicitly,
227
+ # because the mixin method unnecessarily deserializes the value, if available
228
+ def __contains__(self, key):
229
+ return key in self._elements
230
+
231
+ def __iter__(self):
232
+ return iter(self._elements)
233
+
234
+ def __len__(self):
235
+ return len(self._elements)
236
+
237
+ def __eq__(self, other):
238
+ if not isinstance(other, type(self)):
239
+ return False
240
+ if set(self.keys()) != set(other.keys()):
241
+ return False
242
+ for key in self.keys():
243
+ if self[key] != other[key]:
244
+ return False
245
+ return True
@@ -0,0 +1,321 @@
1
+ __all__ = ["compress"]
2
+ __name__ = "biotite.structure.io.pdbx"
3
+ __author__ = "Patrick Kunzmann"
4
+
5
+ import itertools
6
+ import msgpack
7
+ import numpy as np
8
+ import biotite.structure.io.pdbx.bcif as bcif
9
+ from biotite.structure.io.pdbx.bcif import _encode_numpy as encode_numpy
10
+ from biotite.structure.io.pdbx.encoding import (
11
+ ByteArrayEncoding,
12
+ DeltaEncoding,
13
+ FixedPointEncoding,
14
+ IntegerPackingEncoding,
15
+ RunLengthEncoding,
16
+ StringArrayEncoding,
17
+ )
18
+
19
+
20
+ def compress(data, float_tolerance=1e-6):
21
+ """
22
+ Try to reduce the size of a *BinaryCIF* file (or block, category, etc.) by testing
23
+ different data encodings for each data array and selecting the one, which results in
24
+ the smallest size.
25
+
26
+ Parameters
27
+ ----------
28
+ data : BinaryCIFFile or BinaryCIFBlock or BinaryCIFCategory or BinaryCIFColumn or BinaryCIFData
29
+ The data to compress.
30
+
31
+ Returns
32
+ -------
33
+ compressed_file : BinaryCIFFile or BinaryCIFBlock or BinaryCIFCategory or BinaryCIFColumn or BinaryCIFData
34
+ The compressed data with the same type as the input data.
35
+ If no improved compression is found for a :class:`BinaryCIFData` array,
36
+ the input data is kept.
37
+ Hence, the return value is no deep copy of the input data.
38
+ float_tolerance : float, optional
39
+ The relative error that is accepted when compressing floating point numbers.
40
+
41
+ Examples
42
+ --------
43
+
44
+ >>> from io import BytesIO
45
+ >>> pdbx_file = BinaryCIFFile()
46
+ >>> set_structure(pdbx_file, atom_array_stack)
47
+ >>> # Write uncompressed file
48
+ >>> uncompressed_file = BytesIO()
49
+ >>> pdbx_file.write(uncompressed_file)
50
+ >>> _ = uncompressed_file.seek(0)
51
+ >>> print(f"{len(uncompressed_file.read()) // 1000} KB")
52
+ 927 KB
53
+ >>> # Write compressed file
54
+ >>> pdbx_file = compress(pdbx_file)
55
+ >>> compressed_file = BytesIO()
56
+ >>> pdbx_file.write(compressed_file)
57
+ >>> _ = compressed_file.seek(0)
58
+ >>> print(f"{len(compressed_file.read()) // 1000} KB")
59
+ 111 KB
60
+ """
61
+ match type(data):
62
+ case bcif.BinaryCIFFile:
63
+ return _compress_file(data, float_tolerance)
64
+ case bcif.BinaryCIFBlock:
65
+ return _compress_block(data, float_tolerance)
66
+ case bcif.BinaryCIFCategory:
67
+ return _compress_category(data, float_tolerance)
68
+ case bcif.BinaryCIFColumn:
69
+ return _compress_column(data, float_tolerance)
70
+ case bcif.BinaryCIFData:
71
+ return _compress_data(data, float_tolerance)
72
+ case _:
73
+ raise TypeError(f"Unsupported type {type(data).__name__}")
74
+
75
+
76
+ def _compress_file(bcif_file, float_tolerance):
77
+ compressed_file = bcif.BinaryCIFFile()
78
+ for block_name, bcif_block in bcif_file.items():
79
+ compressed_block = _compress_block(bcif_block, float_tolerance)
80
+ compressed_file[block_name] = compressed_block
81
+ return compressed_file
82
+
83
+
84
+ def _compress_block(bcif_block, float_tolerance):
85
+ compressed_block = bcif.BinaryCIFBlock()
86
+ for category_name, bcif_category in bcif_block.items():
87
+ compressed_category = _compress_category(bcif_category, float_tolerance)
88
+ compressed_block[category_name] = compressed_category
89
+ return compressed_block
90
+
91
+
92
+ def _compress_category(bcif_category, float_tolerance):
93
+ compressed_category = bcif.BinaryCIFCategory()
94
+ for column_name, bcif_column in bcif_category.items():
95
+ compressed_column = _compress_column(bcif_column, float_tolerance)
96
+ compressed_category[column_name] = compressed_column
97
+ return compressed_category
98
+
99
+
100
+ def _compress_column(bcif_column, float_tolerance):
101
+ data = _compress_data(bcif_column.data, float_tolerance)
102
+ if bcif_column.mask is not None:
103
+ mask = _compress_data(bcif_column.mask, float_tolerance)
104
+ else:
105
+ mask = None
106
+ return bcif.BinaryCIFColumn(data, mask)
107
+
108
+
109
+ def _compress_data(bcif_data, float_tolerance):
110
+ array = bcif_data.array
111
+ if len(array) == 1:
112
+ # No need to compress a single value -> Use default uncompressed encoding
113
+ return bcif.BinaryCIFData(array)
114
+
115
+ if np.issubdtype(array.dtype, np.str_):
116
+ # Leave encoding empty for now, as it is explicitly set later
117
+ encoding = StringArrayEncoding(data_encoding=[], offset_encoding=[])
118
+ # Run encode to initialize the data and offset arrays
119
+ indices = encoding.encode(array)
120
+ offsets = np.cumsum([0] + [len(s) for s in encoding.strings])
121
+ encoding.data_encoding, _ = _find_best_integer_compression(indices)
122
+ encoding.offset_encoding, _ = _find_best_integer_compression(offsets)
123
+ return bcif.BinaryCIFData(array, [encoding])
124
+
125
+ elif np.issubdtype(array.dtype, np.floating):
126
+ to_integer_encoding = FixedPointEncoding(
127
+ 10 ** _get_decimal_places(array, float_tolerance)
128
+ )
129
+ integer_array = to_integer_encoding.encode(array)
130
+ best_encoding, size_compressed = _find_best_integer_compression(integer_array)
131
+ if size_compressed < _data_size_in_file(bcif.BinaryCIFData(array)):
132
+ return bcif.BinaryCIFData(array, [to_integer_encoding] + best_encoding)
133
+ else:
134
+ # The float array is smaller -> encode it directly as bytes
135
+ return bcif.BinaryCIFData(array, [ByteArrayEncoding()])
136
+
137
+ elif np.issubdtype(array.dtype, np.integer):
138
+ array = _to_smallest_integer_type(array)
139
+ encodings, _ = _find_best_integer_compression(array)
140
+ return bcif.BinaryCIFData(array, encodings)
141
+
142
+ else:
143
+ raise TypeError(f"Unsupported data type {array.dtype}")
144
+
145
+
146
+ def _find_best_integer_compression(array):
147
+ """
148
+ Try different data encodings on an integer array and return the one that results in
149
+ the smallest size.
150
+ """
151
+ best_encoding_sequence = None
152
+ smallest_size = np.inf
153
+
154
+ for use_delta in [False, True]:
155
+ if use_delta:
156
+ encoding = DeltaEncoding()
157
+ array_after_delta = encoding.encode(array)
158
+ encodings_after_delta = [encoding]
159
+ else:
160
+ encodings_after_delta = []
161
+ array_after_delta = array
162
+ for use_run_length in [False, True]:
163
+ # Use encoded data from previous step to save time
164
+ if use_run_length:
165
+ encoding = RunLengthEncoding()
166
+ array_after_rle = encoding.encode(array_after_delta)
167
+ encodings_after_rle = encodings_after_delta + [encoding]
168
+ else:
169
+ encodings_after_rle = encodings_after_delta
170
+ array_after_rle = array_after_delta
171
+ for packed_byte_count in [None, 1, 2]:
172
+ if packed_byte_count is not None:
173
+ # Quickly check this heuristic
174
+ # to avoid computing an exploding packed data array
175
+ if (
176
+ _estimate_packed_length(array_after_rle, packed_byte_count)
177
+ >= array_after_rle.nbytes
178
+ ):
179
+ # Packing would not reduce the size
180
+ continue
181
+ encoding = IntegerPackingEncoding(packed_byte_count)
182
+ array_after_packing = encoding.encode(array_after_rle)
183
+ encodings_after_packing = encodings_after_rle + [encoding]
184
+ else:
185
+ encodings_after_packing = encodings_after_rle
186
+ array_after_packing = array_after_rle
187
+ encoding = ByteArrayEncoding()
188
+ encoded_array = encoding.encode(array_after_packing)
189
+ encodings = encodings_after_packing + [encoding]
190
+ # Pack data directly instead of using the BinaryCIFData class
191
+ # to avoid the unnecessary re-encoding of the array,
192
+ # as it is already available in 'encoded_array'
193
+ serialized_encoding = [enc.serialize() for enc in encodings]
194
+ serialized_data = {
195
+ "data": encoded_array,
196
+ "encoding": serialized_encoding,
197
+ }
198
+ size = _data_size_in_file(serialized_data)
199
+ if size < smallest_size:
200
+ best_encoding_sequence = encodings
201
+ smallest_size = size
202
+ return best_encoding_sequence, smallest_size
203
+
204
+
205
+ def _estimate_packed_length(array, packed_byte_count):
206
+ """
207
+ Estimate the length of an integer array after packing it with a given number of
208
+ bytes.
209
+
210
+ Parameters
211
+ ----------
212
+ array : numpy.ndarray
213
+ The array to pack.
214
+ packed_byte_count : int
215
+ The number of bytes used for packing.
216
+
217
+ Returns
218
+ -------
219
+ length : int
220
+ The estimated length of the packed array.
221
+ """
222
+ # Use int64 to avoid integer overflow in the following line
223
+ max_val_per_element = np.int64(2 ** (8 * packed_byte_count))
224
+ n_bytes_per_element = packed_byte_count * (np.abs(array // max_val_per_element) + 1)
225
+ return np.sum(n_bytes_per_element, dtype=np.int64)
226
+
227
+
228
+ def _to_smallest_integer_type(array):
229
+ """
230
+ Convert an integer array to the smallest possible integer type, that is still able
231
+ to represent all values in the array.
232
+
233
+ Parameters
234
+ ----------
235
+ array : numpy.ndarray
236
+ The array to convert.
237
+
238
+ Returns
239
+ -------
240
+ array : numpy.ndarray
241
+ The converted array.
242
+ """
243
+ if array.min() >= 0:
244
+ for dtype in [np.uint8, np.uint16, np.uint32, np.uint64]:
245
+ if np.all(array <= np.iinfo(dtype).max):
246
+ return array.astype(dtype)
247
+ for dtype in [np.int8, np.int16, np.int32, np.int64]:
248
+ if np.all(array >= np.iinfo(dtype).min) and np.all(
249
+ array <= np.iinfo(dtype).max
250
+ ):
251
+ return array.astype(dtype)
252
+ raise ValueError("Array is out of bounds for all integer types")
253
+
254
+
255
+ def _data_size_in_file(data):
256
+ """
257
+ Get the size of the data, it would have when written into a *BinaryCIF* file.
258
+
259
+ Parameters
260
+ ----------
261
+ data : BinaryCIFData or dict
262
+ The data array whose size is measured.
263
+ Can be either a :class:`BinaryCIFData` object or already serialized data.
264
+
265
+ Returns
266
+ -------
267
+ size : int
268
+ The size of the data array in the file in bytes.
269
+ """
270
+ if isinstance(data, bcif.BinaryCIFData):
271
+ data = data.serialize()
272
+ bytes_in_file = msgpack.packb(data, use_bin_type=True, default=encode_numpy)
273
+ return len(bytes_in_file)
274
+
275
+
276
+ def _get_decimal_places(array, tol):
277
+ """
278
+ Get the number of decimal places in a floating point array.
279
+
280
+ Parameters
281
+ ----------
282
+ array : numpy.ndarray
283
+ The array to analyze.
284
+ tol : float, optional
285
+ The relative tolerance allowed when the values are cut off after the returned
286
+ number of decimal places.
287
+
288
+ Returns
289
+ -------
290
+ decimals : int
291
+ The number of decimal places.
292
+ """
293
+ # Decimals of NaN or infinite values do not make sense
294
+ # and 0 would give NaN when rounding on decimals
295
+ array = array[np.isfinite(array) & (array != 0)]
296
+ for decimals in itertools.count(start=-_order_magnitude(array)):
297
+ error = np.abs(np.round(array, decimals) - array)
298
+ if np.all(error < tol * np.abs(array)):
299
+ return decimals
300
+
301
+
302
+ def _order_magnitude(array):
303
+ """
304
+ Get the order of magnitude of floating point values.
305
+
306
+ Parameters
307
+ ----------
308
+ array : ndarray, dtype=float
309
+ The value to analyze.
310
+
311
+ Returns
312
+ -------
313
+ magnitude : int
314
+ The order of magnitude, i.e. the maximum exponent a number in the array would
315
+ have in scientific notation, if only one digit is left of the decimal point.
316
+ """
317
+ array = array[array != 0]
318
+ if len(array) == 0:
319
+ # No non-zero values -> define order of magnitude as 0
320
+ return 0
321
+ return int(np.max(np.floor(np.log10(np.abs(array)))).item())