biotite 1.1.0__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (332) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +159 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +452 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +57 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +206 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/entrez/__init__.py +15 -0
  34. biotite/database/entrez/check.py +60 -0
  35. biotite/database/entrez/dbnames.py +91 -0
  36. biotite/database/entrez/download.py +229 -0
  37. biotite/database/entrez/key.py +44 -0
  38. biotite/database/entrez/query.py +262 -0
  39. biotite/database/error.py +16 -0
  40. biotite/database/pubchem/__init__.py +21 -0
  41. biotite/database/pubchem/download.py +258 -0
  42. biotite/database/pubchem/error.py +20 -0
  43. biotite/database/pubchem/query.py +830 -0
  44. biotite/database/pubchem/throttle.py +98 -0
  45. biotite/database/rcsb/__init__.py +13 -0
  46. biotite/database/rcsb/download.py +159 -0
  47. biotite/database/rcsb/query.py +964 -0
  48. biotite/database/uniprot/__init__.py +13 -0
  49. biotite/database/uniprot/check.py +40 -0
  50. biotite/database/uniprot/download.py +129 -0
  51. biotite/database/uniprot/query.py +293 -0
  52. biotite/file.py +232 -0
  53. biotite/sequence/__init__.py +84 -0
  54. biotite/sequence/align/__init__.py +203 -0
  55. biotite/sequence/align/alignment.py +680 -0
  56. biotite/sequence/align/banded.cp313-win_amd64.pyd +0 -0
  57. biotite/sequence/align/banded.pyx +652 -0
  58. biotite/sequence/align/buckets.py +71 -0
  59. biotite/sequence/align/cigar.py +425 -0
  60. biotite/sequence/align/kmeralphabet.cp313-win_amd64.pyd +0 -0
  61. biotite/sequence/align/kmeralphabet.pyx +595 -0
  62. biotite/sequence/align/kmersimilarity.cp313-win_amd64.pyd +0 -0
  63. biotite/sequence/align/kmersimilarity.pyx +233 -0
  64. biotite/sequence/align/kmertable.cp313-win_amd64.pyd +0 -0
  65. biotite/sequence/align/kmertable.pyx +3411 -0
  66. biotite/sequence/align/localgapped.cp313-win_amd64.pyd +0 -0
  67. biotite/sequence/align/localgapped.pyx +892 -0
  68. biotite/sequence/align/localungapped.cp313-win_amd64.pyd +0 -0
  69. biotite/sequence/align/localungapped.pyx +279 -0
  70. biotite/sequence/align/matrix.py +622 -0
  71. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  72. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  73. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  74. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  75. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  76. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  77. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  78. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  79. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  80. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  81. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  82. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  83. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  84. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  85. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  86. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  87. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  88. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  89. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  94. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  95. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  99. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  100. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  101. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  102. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  103. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  104. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  105. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  106. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  107. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  108. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  109. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  110. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  111. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  112. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  113. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  114. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  115. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  116. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  117. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  118. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  119. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  120. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  121. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  122. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  155. biotite/sequence/align/matrix_data/PB.license +21 -0
  156. biotite/sequence/align/matrix_data/PB.mat +18 -0
  157. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  158. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  159. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  160. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  161. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  162. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  163. biotite/sequence/align/multiple.cp313-win_amd64.pyd +0 -0
  164. biotite/sequence/align/multiple.pyx +620 -0
  165. biotite/sequence/align/pairwise.cp313-win_amd64.pyd +0 -0
  166. biotite/sequence/align/pairwise.pyx +587 -0
  167. biotite/sequence/align/permutation.cp313-win_amd64.pyd +0 -0
  168. biotite/sequence/align/permutation.pyx +313 -0
  169. biotite/sequence/align/primes.txt +821 -0
  170. biotite/sequence/align/selector.cp313-win_amd64.pyd +0 -0
  171. biotite/sequence/align/selector.pyx +954 -0
  172. biotite/sequence/align/statistics.py +264 -0
  173. biotite/sequence/align/tracetable.cp313-win_amd64.pyd +0 -0
  174. biotite/sequence/align/tracetable.pxd +64 -0
  175. biotite/sequence/align/tracetable.pyx +370 -0
  176. biotite/sequence/alphabet.py +555 -0
  177. biotite/sequence/annotation.py +830 -0
  178. biotite/sequence/codec.cp313-win_amd64.pyd +0 -0
  179. biotite/sequence/codec.pyx +155 -0
  180. biotite/sequence/codon.py +477 -0
  181. biotite/sequence/codon_tables.txt +202 -0
  182. biotite/sequence/graphics/__init__.py +33 -0
  183. biotite/sequence/graphics/alignment.py +1115 -0
  184. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  185. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  186. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  187. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  188. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  189. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  190. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  191. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  192. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  193. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  194. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  195. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  196. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  197. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  198. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  199. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  200. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  201. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  202. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  203. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  204. biotite/sequence/graphics/colorschemes.py +170 -0
  205. biotite/sequence/graphics/dendrogram.py +229 -0
  206. biotite/sequence/graphics/features.py +544 -0
  207. biotite/sequence/graphics/logo.py +104 -0
  208. biotite/sequence/graphics/plasmid.py +712 -0
  209. biotite/sequence/io/__init__.py +12 -0
  210. biotite/sequence/io/fasta/__init__.py +22 -0
  211. biotite/sequence/io/fasta/convert.py +284 -0
  212. biotite/sequence/io/fasta/file.py +265 -0
  213. biotite/sequence/io/fastq/__init__.py +19 -0
  214. biotite/sequence/io/fastq/convert.py +117 -0
  215. biotite/sequence/io/fastq/file.py +507 -0
  216. biotite/sequence/io/genbank/__init__.py +17 -0
  217. biotite/sequence/io/genbank/annotation.py +269 -0
  218. biotite/sequence/io/genbank/file.py +573 -0
  219. biotite/sequence/io/genbank/metadata.py +336 -0
  220. biotite/sequence/io/genbank/sequence.py +171 -0
  221. biotite/sequence/io/general.py +201 -0
  222. biotite/sequence/io/gff/__init__.py +26 -0
  223. biotite/sequence/io/gff/convert.py +128 -0
  224. biotite/sequence/io/gff/file.py +450 -0
  225. biotite/sequence/phylo/__init__.py +36 -0
  226. biotite/sequence/phylo/nj.cp313-win_amd64.pyd +0 -0
  227. biotite/sequence/phylo/nj.pyx +221 -0
  228. biotite/sequence/phylo/tree.cp313-win_amd64.pyd +0 -0
  229. biotite/sequence/phylo/tree.pyx +1169 -0
  230. biotite/sequence/phylo/upgma.cp313-win_amd64.pyd +0 -0
  231. biotite/sequence/phylo/upgma.pyx +164 -0
  232. biotite/sequence/profile.py +567 -0
  233. biotite/sequence/search.py +118 -0
  234. biotite/sequence/seqtypes.py +713 -0
  235. biotite/sequence/sequence.py +374 -0
  236. biotite/setup_ccd.py +197 -0
  237. biotite/structure/__init__.py +133 -0
  238. biotite/structure/alphabet/__init__.py +25 -0
  239. biotite/structure/alphabet/encoder.py +332 -0
  240. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  241. biotite/structure/alphabet/i3d.py +110 -0
  242. biotite/structure/alphabet/layers.py +86 -0
  243. biotite/structure/alphabet/pb.license +21 -0
  244. biotite/structure/alphabet/pb.py +171 -0
  245. biotite/structure/alphabet/unkerasify.py +122 -0
  246. biotite/structure/atoms.py +1554 -0
  247. biotite/structure/basepairs.py +1404 -0
  248. biotite/structure/bonds.cp313-win_amd64.pyd +0 -0
  249. biotite/structure/bonds.pyx +1972 -0
  250. biotite/structure/box.py +588 -0
  251. biotite/structure/celllist.cp313-win_amd64.pyd +0 -0
  252. biotite/structure/celllist.pyx +849 -0
  253. biotite/structure/chains.py +314 -0
  254. biotite/structure/charges.cp313-win_amd64.pyd +0 -0
  255. biotite/structure/charges.pyx +520 -0
  256. biotite/structure/compare.py +274 -0
  257. biotite/structure/density.py +109 -0
  258. biotite/structure/dotbracket.py +214 -0
  259. biotite/structure/error.py +39 -0
  260. biotite/structure/filter.py +590 -0
  261. biotite/structure/geometry.py +655 -0
  262. biotite/structure/graphics/__init__.py +13 -0
  263. biotite/structure/graphics/atoms.py +243 -0
  264. biotite/structure/graphics/rna.py +295 -0
  265. biotite/structure/hbond.py +428 -0
  266. biotite/structure/info/__init__.py +24 -0
  267. biotite/structure/info/atom_masses.json +121 -0
  268. biotite/structure/info/atoms.py +81 -0
  269. biotite/structure/info/bonds.py +149 -0
  270. biotite/structure/info/ccd.py +202 -0
  271. biotite/structure/info/components.bcif +0 -0
  272. biotite/structure/info/groups.py +131 -0
  273. biotite/structure/info/masses.py +121 -0
  274. biotite/structure/info/misc.py +138 -0
  275. biotite/structure/info/radii.py +197 -0
  276. biotite/structure/info/standardize.py +186 -0
  277. biotite/structure/integrity.py +215 -0
  278. biotite/structure/io/__init__.py +29 -0
  279. biotite/structure/io/dcd/__init__.py +13 -0
  280. biotite/structure/io/dcd/file.py +67 -0
  281. biotite/structure/io/general.py +243 -0
  282. biotite/structure/io/gro/__init__.py +14 -0
  283. biotite/structure/io/gro/file.py +344 -0
  284. biotite/structure/io/mol/__init__.py +20 -0
  285. biotite/structure/io/mol/convert.py +112 -0
  286. biotite/structure/io/mol/ctab.py +415 -0
  287. biotite/structure/io/mol/header.py +120 -0
  288. biotite/structure/io/mol/mol.py +149 -0
  289. biotite/structure/io/mol/sdf.py +914 -0
  290. biotite/structure/io/netcdf/__init__.py +13 -0
  291. biotite/structure/io/netcdf/file.py +64 -0
  292. biotite/structure/io/pdb/__init__.py +20 -0
  293. biotite/structure/io/pdb/convert.py +307 -0
  294. biotite/structure/io/pdb/file.py +1290 -0
  295. biotite/structure/io/pdb/hybrid36.cp313-win_amd64.pyd +0 -0
  296. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  297. biotite/structure/io/pdbqt/__init__.py +15 -0
  298. biotite/structure/io/pdbqt/convert.py +113 -0
  299. biotite/structure/io/pdbqt/file.py +688 -0
  300. biotite/structure/io/pdbx/__init__.py +23 -0
  301. biotite/structure/io/pdbx/bcif.py +656 -0
  302. biotite/structure/io/pdbx/cif.py +1075 -0
  303. biotite/structure/io/pdbx/component.py +245 -0
  304. biotite/structure/io/pdbx/compress.py +321 -0
  305. biotite/structure/io/pdbx/convert.py +1745 -0
  306. biotite/structure/io/pdbx/encoding.cp313-win_amd64.pyd +0 -0
  307. biotite/structure/io/pdbx/encoding.pyx +1031 -0
  308. biotite/structure/io/trajfile.py +693 -0
  309. biotite/structure/io/trr/__init__.py +13 -0
  310. biotite/structure/io/trr/file.py +43 -0
  311. biotite/structure/io/xtc/__init__.py +13 -0
  312. biotite/structure/io/xtc/file.py +43 -0
  313. biotite/structure/mechanics.py +73 -0
  314. biotite/structure/molecules.py +352 -0
  315. biotite/structure/pseudoknots.py +628 -0
  316. biotite/structure/rdf.py +245 -0
  317. biotite/structure/repair.py +304 -0
  318. biotite/structure/residues.py +572 -0
  319. biotite/structure/sasa.cp313-win_amd64.pyd +0 -0
  320. biotite/structure/sasa.pyx +322 -0
  321. biotite/structure/segments.py +178 -0
  322. biotite/structure/sequence.py +111 -0
  323. biotite/structure/sse.py +308 -0
  324. biotite/structure/superimpose.py +689 -0
  325. biotite/structure/transform.py +530 -0
  326. biotite/structure/util.py +168 -0
  327. biotite/version.py +16 -0
  328. biotite/visualize.py +265 -0
  329. biotite-1.1.0.dist-info/METADATA +190 -0
  330. biotite-1.1.0.dist-info/RECORD +332 -0
  331. biotite-1.1.0.dist-info/WHEEL +4 -0
  332. biotite-1.1.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,23 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ This subpackage provides support for the the modern PDBx file formats.
7
+ The :class:`CIFFile` class provides dictionary-like access to
8
+ every field in text-based *mmCIF* files.
9
+ :class:`BinaryCIFFile` provides analogous functionality for the
10
+ *BinaryCIF* format.
11
+ Additional utility functions allow reading and writing structures
12
+ from/to these files.
13
+ """
14
+
15
+ __name__ = "biotite.structure.io.pdbx"
16
+ __author__ = "Patrick Kunzmann"
17
+
18
+ from .bcif import *
19
+ from .cif import *
20
+ from .component import *
21
+ from .compress import *
22
+ from .convert import *
23
+ from .encoding import *
@@ -0,0 +1,656 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.structure.io.pdbx"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = [
8
+ "BinaryCIFFile",
9
+ "BinaryCIFBlock",
10
+ "BinaryCIFCategory",
11
+ "BinaryCIFColumn",
12
+ "BinaryCIFData",
13
+ ]
14
+
15
+ from collections.abc import Sequence
16
+ import msgpack
17
+ import numpy as np
18
+ from biotite.file import File, SerializationError, is_binary, is_open_compatible
19
+ from biotite.structure.io.pdbx.component import (
20
+ MaskValue,
21
+ _Component,
22
+ _HierarchicalContainer,
23
+ )
24
+ from biotite.structure.io.pdbx.encoding import (
25
+ create_uncompressed_encoding,
26
+ decode_stepwise,
27
+ deserialize_encoding,
28
+ encode_stepwise,
29
+ )
30
+
31
+
32
+ class BinaryCIFData(_Component):
33
+ r"""
34
+ This class represents the data in a :class:`BinaryCIFColumn`.
35
+
36
+ Parameters
37
+ ----------
38
+ array : array_like or int or float or str
39
+ The data array to be stored.
40
+ If a single item is given, it is converted into an array.
41
+ encoding : list of Encoding , optional
42
+ The encoding steps that are successively applied to the data.
43
+ By default, the data is stored uncompressed directly as bytes.
44
+
45
+ Attributes
46
+ ----------
47
+ array : ndarray
48
+ The stored data array.
49
+ encoding : list of Encoding
50
+ The encoding steps.
51
+
52
+ Examples
53
+ --------
54
+
55
+ >>> data = BinaryCIFData([1, 2, 3])
56
+ >>> print(data.array)
57
+ [1 2 3]
58
+ >>> print(len(data))
59
+ 3
60
+ >>> # A single item is converted into an array
61
+ >>> data = BinaryCIFData("apple")
62
+ >>> print(data.array)
63
+ ['apple']
64
+
65
+ A well-chosen encoding can significantly reduce the serialized data
66
+ size:
67
+
68
+ >>> # Default uncompressed encoding
69
+ >>> array = np.arange(100)
70
+ >>> uncompressed_bytes = BinaryCIFData(array).serialize()["data"]
71
+ >>> print(len(uncompressed_bytes))
72
+ 400
73
+ >>> # Delta encoding followed by run-length encoding
74
+ >>> # [0, 1, 2, ...] -> [0, 1, 1, ...] -> [0, 1, 1, 99]
75
+ >>> compressed_bytes = BinaryCIFData(
76
+ ... array,
77
+ ... encoding = [
78
+ ... # [0, 1, 2, ...] -> [0, 1, 1, ...]
79
+ ... DeltaEncoding(),
80
+ ... # [0, 1, 1, ...] -> [0, 1, 1, 99]
81
+ ... RunLengthEncoding(),
82
+ ... # [0, 1, 1, 99] -> b"\x00\x00..."
83
+ ... ByteArrayEncoding()
84
+ ... ]
85
+ ... ).serialize()["data"]
86
+ >>> print(len(compressed_bytes))
87
+ 16
88
+ """
89
+
90
+ def __init__(self, array, encoding=None):
91
+ if not isinstance(array, (Sequence, np.ndarray)) or isinstance(array, str):
92
+ array = [array]
93
+ array = np.asarray(array)
94
+ if np.issubdtype(array.dtype, np.object_):
95
+ raise ValueError("Object arrays are not supported")
96
+
97
+ self._array = array
98
+ if encoding is None:
99
+ self._encoding = create_uncompressed_encoding(array)
100
+ else:
101
+ self._encoding = list(encoding)
102
+
103
+ @property
104
+ def array(self):
105
+ return self._array
106
+
107
+ @property
108
+ def encoding(self):
109
+ return self._encoding
110
+
111
+ @staticmethod
112
+ def subcomponent_class():
113
+ return None
114
+
115
+ @staticmethod
116
+ def supercomponent_class():
117
+ return BinaryCIFColumn
118
+
119
+ @staticmethod
120
+ def deserialize(content):
121
+ encoding = [deserialize_encoding(enc) for enc in content["encoding"]]
122
+ return BinaryCIFData(decode_stepwise(content["data"], encoding), encoding)
123
+
124
+ def serialize(self):
125
+ serialized_data = encode_stepwise(self._array, self._encoding)
126
+ if not isinstance(serialized_data, bytes):
127
+ raise SerializationError("Final encoding must return 'bytes'")
128
+ serialized_encoding = [enc.serialize() for enc in self._encoding]
129
+ return {"data": serialized_data, "encoding": serialized_encoding}
130
+
131
+ def __len__(self):
132
+ return len(self._array)
133
+
134
+ def __eq__(self, other):
135
+ if not isinstance(other, type(self)):
136
+ return False
137
+ if not np.array_equal(self._array, other._array):
138
+ return False
139
+ if self._encoding != other._encoding:
140
+ return False
141
+ return True
142
+
143
+
144
+ class BinaryCIFColumn(_Component):
145
+ """
146
+ This class represents a single column in a :class:`CIFCategory`.
147
+
148
+ Parameters
149
+ ----------
150
+ data : BinaryCIFData or array_like or int or float or str
151
+ The data to be stored.
152
+ If no :class:`BinaryCIFData` is given, the passed argument is
153
+ coerced into such an object.
154
+ mask : BinaryCIFData or array_like, dtype=int or int
155
+ The mask to be stored.
156
+ If given, the mask indicates whether the `data` is
157
+ inapplicable (``.``) or missing (``?``) in some rows.
158
+ The data presence is indicated by values from the
159
+ :class:`MaskValue` enum.
160
+ If no :class:`BinaryCIFData` is given, the passed argument is
161
+ coerced into such an object.
162
+ By default, no mask is created.
163
+
164
+ Attributes
165
+ ----------
166
+ data : BinaryCIFData
167
+ The stored data.
168
+ mask : BinaryCIFData
169
+ The mask that indicates whether certain data elements are
170
+ inapplicable or missing.
171
+ If no mask is present, this attribute is ``None``.
172
+
173
+ Examples
174
+ --------
175
+
176
+ >>> print(BinaryCIFColumn([1, 2, 3]).as_array())
177
+ [1 2 3]
178
+ >>> mask = [MaskValue.PRESENT, MaskValue.INAPPLICABLE, MaskValue.MISSING]
179
+ >>> # Mask values are only inserted into string arrays
180
+ >>> print(BinaryCIFColumn([1, 2, 3], mask).as_array(int))
181
+ [1 2 3]
182
+ >>> print(BinaryCIFColumn([1, 2, 3], mask).as_array(str))
183
+ ['1' '.' '?']
184
+ >>> print(BinaryCIFColumn([1]).as_item())
185
+ 1
186
+ >>> print(BinaryCIFColumn([1], mask=[MaskValue.MISSING]).as_item())
187
+ ?
188
+ """
189
+
190
+ def __init__(self, data, mask=None):
191
+ if not isinstance(data, BinaryCIFData):
192
+ data = BinaryCIFData(data)
193
+ if mask is not None:
194
+ if not isinstance(mask, BinaryCIFData):
195
+ mask = BinaryCIFData(mask)
196
+ if len(data) != len(mask):
197
+ raise IndexError(
198
+ f"Data has length {len(data)}, " f"but mask has length {len(mask)}"
199
+ )
200
+ self._data = data
201
+ self._mask = mask
202
+
203
+ @property
204
+ def data(self):
205
+ return self._data
206
+
207
+ @property
208
+ def mask(self):
209
+ return self._mask
210
+
211
+ @staticmethod
212
+ def subcomponent_class():
213
+ return BinaryCIFData
214
+
215
+ @staticmethod
216
+ def supercomponent_class():
217
+ return BinaryCIFCategory
218
+
219
+ def as_item(self):
220
+ """
221
+ Get the only item in the data of this column.
222
+
223
+ If the data is masked as inapplicable or missing, ``'.'`` or
224
+ ``'?'`` is returned, respectively.
225
+ If the data contains more than one item, an exception is raised.
226
+
227
+ Returns
228
+ -------
229
+ item : str or int or float
230
+ The item in the data.
231
+ """
232
+ if self._mask is None:
233
+ return self._data.array.item()
234
+ mask = self._mask.array.item()
235
+ if mask is None or mask == MaskValue.PRESENT:
236
+ return self._data.array.item()
237
+ elif mask == MaskValue.INAPPLICABLE:
238
+ return "."
239
+ elif mask == MaskValue.MISSING:
240
+ return "?"
241
+
242
+ def as_array(self, dtype=None, masked_value=None):
243
+ """
244
+ Get the data of this column as an :class:`ndarray`.
245
+
246
+ This is a shortcut to get ``BinaryCIFColumn.data.array``.
247
+ Furthermore, the mask is applied to the data.
248
+
249
+ Parameters
250
+ ----------
251
+ dtype : dtype-like, optional
252
+ The data type the array should be converted to.
253
+ By default, the original type is used.
254
+ masked_value : str or int or float, optional
255
+ The value that should be used for masked elements, i.e.
256
+ ``MaskValue.INAPPLICABLE`` or ``MaskValue.MISSING``.
257
+ By default, masked elements are converted to ``'.'`` or
258
+ ``'?'`` depending on the :class:`MaskValue`.
259
+ """
260
+ if dtype is None:
261
+ dtype = self._data.array.dtype
262
+
263
+ if self._mask is None:
264
+ return self._data.array.astype(dtype, copy=False)
265
+
266
+ elif np.issubdtype(dtype, np.str_):
267
+ # Copy, as otherwise original data would be overwritten
268
+ # with mask values
269
+ array = self._data.array.astype(dtype, copy=True)
270
+ if masked_value is None:
271
+ array[self._mask.array == MaskValue.INAPPLICABLE] = "."
272
+ array[self._mask.array == MaskValue.MISSING] = "?"
273
+ else:
274
+ array[self._mask.array == MaskValue.INAPPLICABLE] = masked_value
275
+ array[self._mask.array == MaskValue.MISSING] = masked_value
276
+ return array
277
+
278
+ elif np.dtype(dtype).kind == self._data.array.dtype.kind:
279
+ if masked_value is None:
280
+ return self._data.array.astype(dtype, copy=False)
281
+ else:
282
+ array = self._data.array.astype(dtype, copy=True)
283
+ array[self._mask.array == MaskValue.INAPPLICABLE] = masked_value
284
+ array[self._mask.array == MaskValue.MISSING] = masked_value
285
+ return array
286
+
287
+ else:
288
+ # Array needs to be converted, but masked values are
289
+ # not necessarily convertible
290
+ # (e.g. '' cannot be converted to int)
291
+ if masked_value is None:
292
+ array = np.zeros(len(self._data), dtype=dtype)
293
+ else:
294
+ array = np.full(len(self._data), masked_value, dtype=dtype)
295
+
296
+ present_mask = self._mask.array == MaskValue.PRESENT
297
+ array[present_mask] = self._data.array[present_mask].astype(dtype)
298
+ return array
299
+
300
+ @staticmethod
301
+ def deserialize(content):
302
+ return BinaryCIFColumn(
303
+ BinaryCIFData.deserialize(content["data"]),
304
+ BinaryCIFData.deserialize(content["mask"])
305
+ if content["mask"] is not None
306
+ else None,
307
+ )
308
+
309
+ def serialize(self):
310
+ return {
311
+ "data": self._data.serialize(),
312
+ "mask": self._mask.serialize() if self._mask is not None else None,
313
+ }
314
+
315
+ def __len__(self):
316
+ return len(self._data)
317
+
318
+ def __eq__(self, other):
319
+ if not isinstance(other, type(self)):
320
+ return False
321
+ if self._data != other._data:
322
+ return False
323
+ if self._mask != other._mask:
324
+ return False
325
+ return True
326
+
327
+
328
+ class BinaryCIFCategory(_HierarchicalContainer):
329
+ """
330
+ This class represents a category in a :class:`BinaryCIFBlock`.
331
+
332
+ Columns can be accessed and modified like a dictionary.
333
+ The values are :class:`BinaryCIFColumn` objects.
334
+
335
+ Parameters
336
+ ----------
337
+ columns : dict, optional
338
+ The columns of the category.
339
+ The keys are the column names and the values are the
340
+ :class:`BinaryCIFColumn` objects (or objects that can be coerced
341
+ into a :class:`BinaryCIFColumn`).
342
+ By default, an empty category is created.
343
+ Each column must have the same length.
344
+
345
+ Attributes
346
+ ----------
347
+ row_count : int
348
+ The number of rows in the category, i.e. the length of each
349
+ column.
350
+
351
+ Examples
352
+ --------
353
+
354
+ >>> # Add column on creation
355
+ >>> category = BinaryCIFCategory({"fruit": ["apple", "banana"]})
356
+ >>> # Add column later on
357
+ >>> category["taste"] = ["delicious", "tasty"]
358
+ >>> # Add column the formal way
359
+ >>> category["color"] = BinaryCIFColumn(BinaryCIFData(["red", "yellow"]))
360
+ >>> # Access a column
361
+ >>> print(category["fruit"].as_array())
362
+ ['apple' 'banana']
363
+ """
364
+
365
+ def __init__(self, columns=None, row_count=None):
366
+ if columns is None:
367
+ columns = {}
368
+ else:
369
+ columns = {
370
+ key: BinaryCIFColumn(col)
371
+ if not isinstance(col, (BinaryCIFColumn, dict))
372
+ else col
373
+ for key, col in columns.items()
374
+ }
375
+
376
+ self._row_count = row_count
377
+ super().__init__(columns)
378
+
379
+ @property
380
+ def row_count(self):
381
+ if self._row_count is None:
382
+ # Row count is not determined yet
383
+ # -> check the length of the first column
384
+ self._row_count = len(next(iter(self.values())))
385
+ return self._row_count
386
+
387
+ @staticmethod
388
+ def subcomponent_class():
389
+ return BinaryCIFColumn
390
+
391
+ @staticmethod
392
+ def supercomponent_class():
393
+ return BinaryCIFBlock
394
+
395
+ @staticmethod
396
+ def deserialize(content):
397
+ return BinaryCIFCategory(
398
+ BinaryCIFCategory._deserialize_elements(content["columns"], "name"),
399
+ content["rowCount"],
400
+ )
401
+
402
+ def serialize(self):
403
+ if len(self) == 0:
404
+ raise SerializationError("At least one column is required")
405
+
406
+ for column_name, column in self.items():
407
+ if self._row_count is None:
408
+ self._row_count = len(column)
409
+ elif len(column) != self._row_count:
410
+ raise SerializationError(
411
+ f"All columns must have the same length, "
412
+ f"but '{column_name}' has length {len(column)}, "
413
+ f"while the first column has row_count {self._row_count}"
414
+ )
415
+
416
+ return {
417
+ "rowCount": self.row_count,
418
+ "columns": self._serialize_elements("name"),
419
+ }
420
+
421
+ def __setitem__(self, key, element):
422
+ if not isinstance(element, (BinaryCIFColumn, dict)):
423
+ element = BinaryCIFColumn(element)
424
+ super().__setitem__(key, element)
425
+
426
+
427
+ class BinaryCIFBlock(_HierarchicalContainer):
428
+ """
429
+ This class represents a block in a :class:`BinaryCIFFile`.
430
+
431
+ Categories can be accessed and modified like a dictionary.
432
+ The values are :class:`BinaryCIFCategory` objects.
433
+
434
+ Parameters
435
+ ----------
436
+ categories : dict, optional
437
+ The categories of the block.
438
+ The keys are the category names and the values are the
439
+ :class:`BinaryCIFCategory` objects.
440
+ By default, an empty block is created.
441
+
442
+ Notes
443
+ -----
444
+ The category names do not include the leading underscore character.
445
+ This character is automatically added when the category is
446
+ serialized.
447
+
448
+ Examples
449
+ --------
450
+
451
+ >>> # Add category on creation
452
+ >>> block = BinaryCIFBlock({"foo": BinaryCIFCategory({"some_column": 1})})
453
+ >>> # Add category later on
454
+ >>> block["bar"] = BinaryCIFCategory({"another_column": [2, 3]})
455
+ >>> # Access a column
456
+ >>> print(block["bar"]["another_column"].as_array())
457
+ [2 3]
458
+ """
459
+
460
+ def __init__(self, categories=None):
461
+ if categories is None:
462
+ categories = {}
463
+ super().__init__(
464
+ # Actual bcif files use leading '_' as category names
465
+ {"_" + name: category for name, category in categories.items()}
466
+ )
467
+
468
+ @staticmethod
469
+ def subcomponent_class():
470
+ return BinaryCIFCategory
471
+
472
+ @staticmethod
473
+ def supercomponent_class():
474
+ return BinaryCIFFile
475
+
476
+ @staticmethod
477
+ def deserialize(content):
478
+ return BinaryCIFBlock(
479
+ {
480
+ # The superclass uses leading '_' in category names,
481
+ # but on the level of this class, the leading '_' is omitted
482
+ name.lstrip("_"): category
483
+ for name, category in BinaryCIFBlock._deserialize_elements(
484
+ content["categories"], "name"
485
+ ).items()
486
+ }
487
+ )
488
+
489
+ def serialize(self):
490
+ return {"categories": self._serialize_elements("name")}
491
+
492
+ def __getitem__(self, key):
493
+ try:
494
+ return super().__getitem__("_" + key)
495
+ except KeyError:
496
+ raise KeyError(key)
497
+
498
+ def __setitem__(self, key, element):
499
+ try:
500
+ return super().__setitem__("_" + key, element)
501
+ except KeyError:
502
+ raise KeyError(key)
503
+
504
+ def __delitem__(self, key):
505
+ try:
506
+ return super().__setitem__("_" + key)
507
+ except KeyError:
508
+ raise KeyError(key)
509
+
510
+ def __iter__(self):
511
+ return (key.lstrip("_") for key in super().__iter__())
512
+
513
+ def __contains__(self, key):
514
+ return super().__contains__("_" + key)
515
+
516
+
517
+ class BinaryCIFFile(File, _HierarchicalContainer):
518
+ """
519
+ This class represents a *BinaryCIF* file.
520
+
521
+ The categories of the file can be accessed and modified like a
522
+ dictionary.
523
+ The values are :class:`BinaryCIFBlock` objects.
524
+
525
+ To parse or write a structure from/to a :class:`BinaryCIFFile`
526
+ object, use the high-level :func:`get_structure()` or
527
+ :func:`set_structure()` function respectively.
528
+
529
+ Notes
530
+ -----
531
+ The content of *BinaryCIF* files are lazily deserialized:
532
+ Only when a column is accessed, the time consuming data decoding
533
+ is performed.
534
+ The decoded :class:`BinaryCIFBlock`/:class:`BinaryCIFCategory`
535
+ objects are cached for subsequent accesses.
536
+
537
+ Attributes
538
+ ----------
539
+ block : BinaryCIFBlock
540
+ The sole block of the file.
541
+ If the file contains multiple blocks, an exception is raised.
542
+
543
+ Examples
544
+ --------
545
+ Read a *BinaryCIF* file and access its content:
546
+
547
+ >>> import os.path
548
+ >>> file = BinaryCIFFile.read(os.path.join(path_to_structures, "1l2y.bcif"))
549
+ >>> print(file["1L2Y"]["citation_author"]["name"].as_array())
550
+ ['Neidigh, J.W.' 'Fesinmeyer, R.M.' 'Andersen, N.H.']
551
+ >>> # Access the only block in the file
552
+ >>> print(file.block["entity"]["pdbx_description"].as_item())
553
+ TC5b
554
+
555
+ Create a *BinaryCIF* file and write it to disk:
556
+
557
+ >>> category = BinaryCIFCategory({"some_column": "some_value"})
558
+ >>> block = BinaryCIFBlock({"some_category": category})
559
+ >>> file = BinaryCIFFile({"some_block": block})
560
+ >>> file.write(os.path.join(path_to_directory, "some_file.bcif"))
561
+ """
562
+
563
+ def __init__(self, blocks=None):
564
+ File.__init__(self)
565
+ _HierarchicalContainer.__init__(self, blocks)
566
+
567
+ @property
568
+ def block(self):
569
+ if len(self) != 1:
570
+ raise ValueError("There are multiple blocks in the file")
571
+ return self[next(iter(self))]
572
+
573
+ @staticmethod
574
+ def subcomponent_class():
575
+ return BinaryCIFBlock
576
+
577
+ @staticmethod
578
+ def supercomponent_class():
579
+ return None
580
+
581
+ @staticmethod
582
+ def deserialize(content):
583
+ return BinaryCIFFile(
584
+ BinaryCIFFile._deserialize_elements(content["dataBlocks"], "header")
585
+ )
586
+
587
+ def serialize(self):
588
+ return {"dataBlocks": self._serialize_elements("header")}
589
+
590
+ @classmethod
591
+ def read(cls, file):
592
+ """
593
+ Read a *BinaryCIF* file.
594
+
595
+ Parameters
596
+ ----------
597
+ file : file-like object or str
598
+ The file to be read.
599
+ Alternatively a file path can be supplied.
600
+
601
+ Returns
602
+ -------
603
+ file_object : BinaryCIFFile
604
+ The parsed file.
605
+ """
606
+ # File name
607
+ if is_open_compatible(file):
608
+ with open(file, "rb") as f:
609
+ return BinaryCIFFile.deserialize(
610
+ msgpack.unpackb(f.read(), use_list=True, raw=False)
611
+ )
612
+ # File object
613
+ else:
614
+ if not is_binary(file):
615
+ raise TypeError("A file opened in 'binary' mode is required")
616
+ return BinaryCIFFile.deserialize(
617
+ msgpack.unpackb(file.read(), use_list=True, raw=False)
618
+ )
619
+
620
+ def write(self, file):
621
+ """
622
+ Write contents into a *BinaryCIF* file.
623
+
624
+ Parameters
625
+ ----------
626
+ file : file-like object or str
627
+ The file to be written to.
628
+ Alternatively, a file path can be supplied.
629
+ """
630
+ serialized_content = self.serialize()
631
+ serialized_content["encoder"] = "biotite"
632
+ serialized_content["version"] = "0.3.0"
633
+ packed_bytes = msgpack.packb(
634
+ serialized_content, use_bin_type=True, default=_encode_numpy
635
+ )
636
+ if is_open_compatible(file):
637
+ with open(file, "wb") as f:
638
+ f.write(packed_bytes)
639
+ else:
640
+ if not is_binary(file):
641
+ raise TypeError("A file opened in 'binary' mode is required")
642
+ file.write(packed_bytes)
643
+
644
+
645
+ def _encode_numpy(item):
646
+ """
647
+ Convert NumPy scalar types to native Python types,
648
+ as *Msgpack* cannot handle NumPy types (e.g. float32).
649
+
650
+ The function is given to the Msgpack packer as value for the
651
+ `default` parameter.
652
+ """
653
+ if isinstance(item, np.generic):
654
+ return item.item()
655
+ else:
656
+ raise TypeError(f"can not serialize '{type(item).__name__}' object")