biotite 0.41.1__cp312-cp312-macosx_10_16_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (340) hide show
  1. biotite/__init__.py +19 -0
  2. biotite/application/__init__.py +43 -0
  3. biotite/application/application.py +265 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +505 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +83 -0
  8. biotite/application/blast/webapp.py +421 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +238 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +152 -0
  13. biotite/application/localapp.py +306 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +122 -0
  16. biotite/application/msaapp.py +374 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +254 -0
  19. biotite/application/muscle/app5.py +171 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +456 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +222 -0
  24. biotite/application/util.py +59 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +304 -0
  27. biotite/application/viennarna/rnafold.py +269 -0
  28. biotite/application/viennarna/rnaplot.py +187 -0
  29. biotite/application/viennarna/util.py +72 -0
  30. biotite/application/webapp.py +77 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/entrez/__init__.py +15 -0
  34. biotite/database/entrez/check.py +61 -0
  35. biotite/database/entrez/dbnames.py +89 -0
  36. biotite/database/entrez/download.py +223 -0
  37. biotite/database/entrez/key.py +44 -0
  38. biotite/database/entrez/query.py +223 -0
  39. biotite/database/error.py +15 -0
  40. biotite/database/pubchem/__init__.py +21 -0
  41. biotite/database/pubchem/download.py +260 -0
  42. biotite/database/pubchem/error.py +20 -0
  43. biotite/database/pubchem/query.py +827 -0
  44. biotite/database/pubchem/throttle.py +99 -0
  45. biotite/database/rcsb/__init__.py +13 -0
  46. biotite/database/rcsb/download.py +167 -0
  47. biotite/database/rcsb/query.py +959 -0
  48. biotite/database/uniprot/__init__.py +13 -0
  49. biotite/database/uniprot/check.py +32 -0
  50. biotite/database/uniprot/download.py +134 -0
  51. biotite/database/uniprot/query.py +209 -0
  52. biotite/file.py +251 -0
  53. biotite/sequence/__init__.py +73 -0
  54. biotite/sequence/align/__init__.py +49 -0
  55. biotite/sequence/align/alignment.py +658 -0
  56. biotite/sequence/align/banded.cpython-312-darwin.so +0 -0
  57. biotite/sequence/align/banded.pyx +652 -0
  58. biotite/sequence/align/buckets.py +69 -0
  59. biotite/sequence/align/cigar.py +434 -0
  60. biotite/sequence/align/kmeralphabet.cpython-312-darwin.so +0 -0
  61. biotite/sequence/align/kmeralphabet.pyx +574 -0
  62. biotite/sequence/align/kmersimilarity.cpython-312-darwin.so +0 -0
  63. biotite/sequence/align/kmersimilarity.pyx +233 -0
  64. biotite/sequence/align/kmertable.cpython-312-darwin.so +0 -0
  65. biotite/sequence/align/kmertable.pyx +3400 -0
  66. biotite/sequence/align/localgapped.cpython-312-darwin.so +0 -0
  67. biotite/sequence/align/localgapped.pyx +892 -0
  68. biotite/sequence/align/localungapped.cpython-312-darwin.so +0 -0
  69. biotite/sequence/align/localungapped.pyx +279 -0
  70. biotite/sequence/align/matrix.py +405 -0
  71. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  72. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  73. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  74. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  75. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  76. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  77. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  78. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  79. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  80. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  81. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  82. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  83. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  84. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  85. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  86. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  87. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  88. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  89. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  93. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  94. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  95. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  96. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  97. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  98. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  99. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  100. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  101. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  102. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  103. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  104. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  105. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  106. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  107. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  108. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  109. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  110. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  111. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  112. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  113. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  114. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  115. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  116. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  117. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  118. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  119. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  120. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  121. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  122. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  154. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  155. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  156. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  157. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  158. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  159. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  160. biotite/sequence/align/multiple.cpython-312-darwin.so +0 -0
  161. biotite/sequence/align/multiple.pyx +620 -0
  162. biotite/sequence/align/pairwise.cpython-312-darwin.so +0 -0
  163. biotite/sequence/align/pairwise.pyx +587 -0
  164. biotite/sequence/align/permutation.cpython-312-darwin.so +0 -0
  165. biotite/sequence/align/permutation.pyx +305 -0
  166. biotite/sequence/align/primes.txt +821 -0
  167. biotite/sequence/align/selector.cpython-312-darwin.so +0 -0
  168. biotite/sequence/align/selector.pyx +956 -0
  169. biotite/sequence/align/statistics.py +265 -0
  170. biotite/sequence/align/tracetable.cpython-312-darwin.so +0 -0
  171. biotite/sequence/align/tracetable.pxd +64 -0
  172. biotite/sequence/align/tracetable.pyx +370 -0
  173. biotite/sequence/alphabet.py +566 -0
  174. biotite/sequence/annotation.py +829 -0
  175. biotite/sequence/codec.cpython-312-darwin.so +0 -0
  176. biotite/sequence/codec.pyx +155 -0
  177. biotite/sequence/codon.py +466 -0
  178. biotite/sequence/codon_tables.txt +202 -0
  179. biotite/sequence/graphics/__init__.py +33 -0
  180. biotite/sequence/graphics/alignment.py +1034 -0
  181. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  182. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  183. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  184. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  185. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  186. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  187. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  188. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  189. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  190. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  191. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  192. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  193. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  194. biotite/sequence/graphics/color_schemes/pb_flower.json +39 -0
  195. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  196. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  197. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  198. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  199. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  200. biotite/sequence/graphics/colorschemes.py +139 -0
  201. biotite/sequence/graphics/dendrogram.py +184 -0
  202. biotite/sequence/graphics/features.py +510 -0
  203. biotite/sequence/graphics/logo.py +110 -0
  204. biotite/sequence/graphics/plasmid.py +661 -0
  205. biotite/sequence/io/__init__.py +12 -0
  206. biotite/sequence/io/fasta/__init__.py +22 -0
  207. biotite/sequence/io/fasta/convert.py +273 -0
  208. biotite/sequence/io/fasta/file.py +278 -0
  209. biotite/sequence/io/fastq/__init__.py +19 -0
  210. biotite/sequence/io/fastq/convert.py +120 -0
  211. biotite/sequence/io/fastq/file.py +551 -0
  212. biotite/sequence/io/genbank/__init__.py +17 -0
  213. biotite/sequence/io/genbank/annotation.py +277 -0
  214. biotite/sequence/io/genbank/file.py +575 -0
  215. biotite/sequence/io/genbank/metadata.py +324 -0
  216. biotite/sequence/io/genbank/sequence.py +172 -0
  217. biotite/sequence/io/general.py +192 -0
  218. biotite/sequence/io/gff/__init__.py +26 -0
  219. biotite/sequence/io/gff/convert.py +133 -0
  220. biotite/sequence/io/gff/file.py +434 -0
  221. biotite/sequence/phylo/__init__.py +36 -0
  222. biotite/sequence/phylo/nj.cpython-312-darwin.so +0 -0
  223. biotite/sequence/phylo/nj.pyx +221 -0
  224. biotite/sequence/phylo/tree.cpython-312-darwin.so +0 -0
  225. biotite/sequence/phylo/tree.pyx +1169 -0
  226. biotite/sequence/phylo/upgma.cpython-312-darwin.so +0 -0
  227. biotite/sequence/phylo/upgma.pyx +164 -0
  228. biotite/sequence/profile.py +456 -0
  229. biotite/sequence/search.py +116 -0
  230. biotite/sequence/seqtypes.py +556 -0
  231. biotite/sequence/sequence.py +374 -0
  232. biotite/structure/__init__.py +132 -0
  233. biotite/structure/atoms.py +1455 -0
  234. biotite/structure/basepairs.py +1415 -0
  235. biotite/structure/bonds.cpython-312-darwin.so +0 -0
  236. biotite/structure/bonds.pyx +1933 -0
  237. biotite/structure/box.py +592 -0
  238. biotite/structure/celllist.cpython-312-darwin.so +0 -0
  239. biotite/structure/celllist.pyx +849 -0
  240. biotite/structure/chains.py +298 -0
  241. biotite/structure/charges.cpython-312-darwin.so +0 -0
  242. biotite/structure/charges.pyx +520 -0
  243. biotite/structure/compare.py +274 -0
  244. biotite/structure/density.py +114 -0
  245. biotite/structure/dotbracket.py +216 -0
  246. biotite/structure/error.py +31 -0
  247. biotite/structure/filter.py +585 -0
  248. biotite/structure/geometry.py +697 -0
  249. biotite/structure/graphics/__init__.py +13 -0
  250. biotite/structure/graphics/atoms.py +226 -0
  251. biotite/structure/graphics/rna.py +282 -0
  252. biotite/structure/hbond.py +409 -0
  253. biotite/structure/info/__init__.py +25 -0
  254. biotite/structure/info/atom_masses.json +121 -0
  255. biotite/structure/info/atoms.py +82 -0
  256. biotite/structure/info/bonds.py +145 -0
  257. biotite/structure/info/ccd/README.rst +8 -0
  258. biotite/structure/info/ccd/amino_acids.txt +1663 -0
  259. biotite/structure/info/ccd/carbohydrates.txt +1135 -0
  260. biotite/structure/info/ccd/components.bcif +0 -0
  261. biotite/structure/info/ccd/nucleotides.txt +798 -0
  262. biotite/structure/info/ccd.py +95 -0
  263. biotite/structure/info/groups.py +90 -0
  264. biotite/structure/info/masses.py +123 -0
  265. biotite/structure/info/misc.py +144 -0
  266. biotite/structure/info/radii.py +197 -0
  267. biotite/structure/info/standardize.py +196 -0
  268. biotite/structure/integrity.py +268 -0
  269. biotite/structure/io/__init__.py +30 -0
  270. biotite/structure/io/ctab.py +72 -0
  271. biotite/structure/io/dcd/__init__.py +13 -0
  272. biotite/structure/io/dcd/file.py +65 -0
  273. biotite/structure/io/general.py +257 -0
  274. biotite/structure/io/gro/__init__.py +14 -0
  275. biotite/structure/io/gro/file.py +343 -0
  276. biotite/structure/io/mmtf/__init__.py +21 -0
  277. biotite/structure/io/mmtf/assembly.py +214 -0
  278. biotite/structure/io/mmtf/convertarray.cpython-312-darwin.so +0 -0
  279. biotite/structure/io/mmtf/convertarray.pyx +341 -0
  280. biotite/structure/io/mmtf/convertfile.cpython-312-darwin.so +0 -0
  281. biotite/structure/io/mmtf/convertfile.pyx +501 -0
  282. biotite/structure/io/mmtf/decode.cpython-312-darwin.so +0 -0
  283. biotite/structure/io/mmtf/decode.pyx +152 -0
  284. biotite/structure/io/mmtf/encode.cpython-312-darwin.so +0 -0
  285. biotite/structure/io/mmtf/encode.pyx +183 -0
  286. biotite/structure/io/mmtf/file.py +233 -0
  287. biotite/structure/io/mol/__init__.py +20 -0
  288. biotite/structure/io/mol/convert.py +115 -0
  289. biotite/structure/io/mol/ctab.py +414 -0
  290. biotite/structure/io/mol/header.py +116 -0
  291. biotite/structure/io/mol/mol.py +193 -0
  292. biotite/structure/io/mol/sdf.py +916 -0
  293. biotite/structure/io/netcdf/__init__.py +13 -0
  294. biotite/structure/io/netcdf/file.py +63 -0
  295. biotite/structure/io/npz/__init__.py +20 -0
  296. biotite/structure/io/npz/file.py +152 -0
  297. biotite/structure/io/pdb/__init__.py +20 -0
  298. biotite/structure/io/pdb/convert.py +293 -0
  299. biotite/structure/io/pdb/file.py +1240 -0
  300. biotite/structure/io/pdb/hybrid36.cpython-312-darwin.so +0 -0
  301. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  302. biotite/structure/io/pdbqt/__init__.py +15 -0
  303. biotite/structure/io/pdbqt/convert.py +107 -0
  304. biotite/structure/io/pdbqt/file.py +640 -0
  305. biotite/structure/io/pdbx/__init__.py +23 -0
  306. biotite/structure/io/pdbx/bcif.py +648 -0
  307. biotite/structure/io/pdbx/cif.py +1032 -0
  308. biotite/structure/io/pdbx/component.py +246 -0
  309. biotite/structure/io/pdbx/convert.py +1597 -0
  310. biotite/structure/io/pdbx/encoding.cpython-312-darwin.so +0 -0
  311. biotite/structure/io/pdbx/encoding.pyx +950 -0
  312. biotite/structure/io/pdbx/legacy.py +267 -0
  313. biotite/structure/io/tng/__init__.py +13 -0
  314. biotite/structure/io/tng/file.py +46 -0
  315. biotite/structure/io/trajfile.py +710 -0
  316. biotite/structure/io/trr/__init__.py +13 -0
  317. biotite/structure/io/trr/file.py +46 -0
  318. biotite/structure/io/xtc/__init__.py +13 -0
  319. biotite/structure/io/xtc/file.py +46 -0
  320. biotite/structure/mechanics.py +75 -0
  321. biotite/structure/molecules.py +353 -0
  322. biotite/structure/pseudoknots.py +642 -0
  323. biotite/structure/rdf.py +243 -0
  324. biotite/structure/repair.py +253 -0
  325. biotite/structure/residues.py +562 -0
  326. biotite/structure/resutil.py +178 -0
  327. biotite/structure/sasa.cpython-312-darwin.so +0 -0
  328. biotite/structure/sasa.pyx +322 -0
  329. biotite/structure/sequence.py +112 -0
  330. biotite/structure/sse.py +327 -0
  331. biotite/structure/superimpose.py +727 -0
  332. biotite/structure/transform.py +504 -0
  333. biotite/structure/util.py +98 -0
  334. biotite/temp.py +86 -0
  335. biotite/version.py +16 -0
  336. biotite/visualize.py +251 -0
  337. biotite-0.41.1.dist-info/METADATA +187 -0
  338. biotite-0.41.1.dist-info/RECORD +340 -0
  339. biotite-0.41.1.dist-info/WHEEL +4 -0
  340. biotite-0.41.1.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,501 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.structure.io.mmtf"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["get_model_count", "get_structure"]
8
+
9
+ cimport cython
10
+ cimport numpy as np
11
+
12
+ import numpy as np
13
+ from .file import MMTFFile
14
+ from ...atoms import Atom, AtomArray, AtomArrayStack
15
+ from ...bonds import BondList
16
+ from ...error import BadStructureError
17
+ from ...filter import filter_first_altloc, filter_highest_occupancy_altloc
18
+ from ...residues import get_residue_starts
19
+ from ...box import vectors_from_unitcell
20
+ from ....file import InvalidFileError
21
+
22
+ ctypedef np.int8_t int8
23
+ ctypedef np.int16_t int16
24
+ ctypedef np.int32_t int32
25
+ ctypedef np.uint8_t uint8
26
+ ctypedef np.uint16_t uint16
27
+ ctypedef np.uint32_t uint32
28
+ ctypedef np.uint64_t uint64
29
+ ctypedef np.float32_t float32
30
+
31
+
32
+ def get_model_count(file):
33
+ """
34
+ Get the number of models contained in a MMTF file.
35
+
36
+ Parameters
37
+ ----------
38
+ file : MMTFFile
39
+ The file object.
40
+
41
+ Returns
42
+ -------
43
+ model_count : int
44
+ The number of models.
45
+ """
46
+ return file["numModels"]
47
+
48
+
49
+ def get_structure(file, model=None, altloc="first",
50
+ extra_fields=[], include_bonds=False):
51
+ """
52
+ get_structure(file, model=None, altloc=[], extra_fields=[],
53
+ include_bonds=False)
54
+
55
+ Get an :class:`AtomArray` or :class:`AtomArrayStack` from the MMTF file.
56
+
57
+ Parameters
58
+ ----------
59
+ file : MMTFFile
60
+ The file object.
61
+ model : int, optional
62
+ If this parameter is given, the function will return an
63
+ :class:`AtomArray` from the atoms corresponding to the given
64
+ model number (starting at 1).
65
+ Negative values are used to index models starting from the last
66
+ model insted of the first model.
67
+ If this parameter is omitted, an :class:`AtomArrayStack`
68
+ containing all models will be returned, even if the structure
69
+ contains only one model.
70
+ altloc : {'first', 'occupancy', 'all'}
71
+ This parameter defines how *altloc* IDs are handled:
72
+ - ``'first'`` - Use atoms that have the first *altloc* ID
73
+ appearing in a residue.
74
+ - ``'occupancy'`` - Use atoms that have the *altloc* ID
75
+ with the highest occupancy for a residue.
76
+ - ``'all'`` - Use all atoms.
77
+ Note that this leads to duplicate atoms.
78
+ When this option is chosen, the ``altloc_id`` annotation
79
+ array is added to the returned structure.
80
+ extra_fields : list of str, optional
81
+ The strings in the list are optional annotation categories
82
+ that should be stored in the output array or stack.
83
+ These are valid values:
84
+ ``'atom_id'``, ``'b_factor'``, ``'occupancy'`` and ``'charge'``.
85
+ include_bonds : bool, optional
86
+ If set to true, a :class:`BondList` will be created for the
87
+ resulting :class:`AtomArray` containing the bond information
88
+ from the file.
89
+
90
+ Returns
91
+ -------
92
+ array : AtomArray or AtomArrayStack
93
+ The return type depends on the `model` parameter.
94
+
95
+ Examples
96
+ --------
97
+
98
+ >>> import os.path
99
+ >>> file = MMTFFile.read(os.path.join(path_to_structures, "1l2y.mmtf"))
100
+ >>> array = get_structure(file, model=1)
101
+ >>> print(array.array_length())
102
+ 304
103
+ >>> stack = get_structure(file)
104
+ >>> print(stack.stack_depth(), stack.array_length())
105
+ 38 304
106
+ """
107
+ cdef int i, j, m
108
+
109
+
110
+ # Obtain (and potentially decode) required arrays/values from file
111
+ cdef int atom_count = file["numAtoms"]
112
+ cdef int model_count = file["numModels"]
113
+ cdef np.ndarray chain_names = file["chainNameList"]
114
+ cdef int32[:] chains_per_model = np.array(file["chainsPerModel"], np.int32)
115
+ cdef int32[:] res_per_chain = np.array(file["groupsPerChain"], np.int32)
116
+ cdef int32[:] res_type_i = file["groupTypeList"]
117
+ cdef np.ndarray index_list = file["groupIdList"]
118
+ cdef int32[:] res_ids = index_list
119
+ cdef np.ndarray x_coord = file["xCoordList"]
120
+ cdef np.ndarray y_coord = file["yCoordList"]
121
+ cdef np.ndarray z_coord = file["zCoordList"]
122
+ cdef np.ndarray occupancy = file.get("occupancyList")
123
+ cdef np.ndarray b_factor
124
+ if "b_factor" in extra_fields:
125
+ b_factor = file["bFactorList"]
126
+ cdef np.ndarray atom_ids
127
+ if "atom_id" in extra_fields:
128
+ atom_ids = file["atomIdList"]
129
+ cdef np.ndarray all_altloc_ids
130
+ cdef np.ndarray inscode
131
+ all_altloc_ids = file.get("altLocList")
132
+ inscode = file.get("insCodeList")
133
+
134
+
135
+ # Create arrays from 'groupList' list of dictionaries
136
+ cdef list group_list = file["groupList"]
137
+ cdef list non_hetero_list = ["L-PEPTIDE LINKING", "PEPTIDE LINKING",
138
+ "DNA LINKING", "RNA LINKING"]
139
+ # Determine per-residue-count and maximum count
140
+ # of atoms in each residue
141
+ cdef np.ndarray atoms_per_res = np.zeros(len(group_list), dtype=np.int32)
142
+ for i in range(len(group_list)):
143
+ atoms_per_res[i] = len(group_list[i]["atomNameList"])
144
+ cdef int32 max_atoms_per_res = np.max(atoms_per_res)
145
+ # Create the arrays
146
+ cdef np.ndarray res_names = np.zeros(len(group_list), dtype="U5")
147
+ cdef np.ndarray hetero_res = np.zeros(len(group_list), dtype=bool)
148
+ cdef np.ndarray atom_names = np.zeros((len(group_list), max_atoms_per_res),
149
+ dtype="U6")
150
+ cdef np.ndarray elements = np.zeros((len(group_list), max_atoms_per_res),
151
+ dtype="U2")
152
+ cdef np.ndarray charges = np.zeros((len(group_list), max_atoms_per_res),
153
+ dtype=np.int32)
154
+ # Fill the arrays
155
+ for i in range(len(group_list)):
156
+ residue = group_list[i]
157
+ res_names[i] = residue["groupName"]
158
+ hetero_res[i] = (residue["chemCompType"] not in non_hetero_list)
159
+ atom_names[i, :atoms_per_res[i]] = residue["atomNameList"]
160
+ elements[i, :atoms_per_res[i]] = residue["elementList"]
161
+ charges[i, :atoms_per_res[i]] = residue["formalChargeList"]
162
+
163
+
164
+ # Create the atom array (stack)
165
+ cdef int depth, length
166
+ cdef int start_i, stop_i
167
+ cdef bint extra_charge
168
+ cdef np.ndarray altloc_ids
169
+ cdef np.ndarray inscode_array
170
+
171
+
172
+ if model == None:
173
+ lengths = _get_model_lengths(res_type_i, chains_per_model,
174
+ res_per_chain, atoms_per_res)
175
+ # Check if each model has the same amount of atoms
176
+ # If not, raise exception
177
+ if (lengths != lengths[0]).any():
178
+ raise InvalidFileError("The models in the file have unequal "
179
+ "amount of atoms, give an explicit "
180
+ "model instead")
181
+ length = lengths[0]
182
+
183
+ depth = model_count
184
+
185
+
186
+ array = AtomArrayStack(depth, length)
187
+ array.coord = np.stack(
188
+ [x_coord,
189
+ y_coord,
190
+ z_coord],
191
+ axis=1
192
+ ).reshape(depth, length, 3)
193
+
194
+ # Create altloc array for the final filtering
195
+ if all_altloc_ids is not None:
196
+ altloc_ids = all_altloc_ids[:length]
197
+ else:
198
+ altloc_ids = None
199
+
200
+ extra_charge = False
201
+ if "ins_code" in extra_fields:
202
+ extra_inscode = True
203
+ array.add_annotation("ins_code", "U1")
204
+ if "charge" in extra_fields:
205
+ extra_charge = True
206
+ array.add_annotation("charge", int)
207
+ if "atom_id" in extra_fields:
208
+ array.set_annotation("atom_id", atom_ids[:length])
209
+ if "b_factor" in extra_fields:
210
+ array.set_annotation("b_factor", b_factor[:length])
211
+ if "occupancy" in extra_fields:
212
+ array.set_annotation("occupancy", occupancy[:length])
213
+
214
+ _fill_annotations(1, array, extra_charge,
215
+ chain_names, chains_per_model, res_per_chain,
216
+ res_type_i, res_ids, inscode, atoms_per_res,
217
+ res_names, hetero_res, atom_names, elements, charges)
218
+
219
+ if include_bonds:
220
+ array.bonds = _create_bond_list(
221
+ 1, file["bondAtomList"], file["bondOrderList"],
222
+ 0, length, file["numAtoms"], group_list, res_type_i,
223
+ atoms_per_res, res_per_chain, chains_per_model
224
+ )
225
+
226
+
227
+ else:
228
+ lengths = _get_model_lengths(res_type_i, chains_per_model,
229
+ res_per_chain, atoms_per_res)
230
+ if model == 0:
231
+ raise ValueError("The model index must not be 0")
232
+ # Negative models mean model index starting from last model
233
+ model = len(lengths) + model + 1 if model < 0 else model
234
+ if model > len(lengths):
235
+ raise ValueError(
236
+ f"The file has {len(lengths)} models, "
237
+ f"the given model {model} does not exist"
238
+ )
239
+
240
+ length = lengths[model-1]
241
+ # Indices to filter coords and some annotations
242
+ # for the specified model
243
+ start_i = np.sum(lengths[:model-1])
244
+ stop_i = start_i + length
245
+
246
+ array = AtomArray(length)
247
+ array.coord[:,0] = x_coord[start_i : stop_i]
248
+ array.coord[:,1] = y_coord[start_i : stop_i]
249
+ array.coord[:,2] = z_coord[start_i : stop_i]
250
+
251
+ # Create altloc array for the final filtering
252
+ if all_altloc_ids is not None:
253
+ altloc_ids = np.array(all_altloc_ids[start_i : stop_i], dtype="U1")
254
+ else:
255
+ altloc_ids = None
256
+
257
+ extra_charge = False
258
+ if "charge" in extra_fields:
259
+ extra_charge = True
260
+ array.add_annotation("charge", int)
261
+ if "atom_id" in extra_fields:
262
+ array.set_annotation("atom_id", atom_ids[start_i : stop_i])
263
+ if "b_factor" in extra_fields:
264
+ array.set_annotation("b_factor", b_factor[start_i : stop_i])
265
+ if "occupancy" in extra_fields:
266
+ array.set_annotation("occupancy", occupancy[start_i : stop_i])
267
+
268
+ _fill_annotations(model, array, extra_charge,
269
+ chain_names, chains_per_model, res_per_chain,
270
+ res_type_i, res_ids, inscode, atoms_per_res,
271
+ res_names, hetero_res, atom_names, elements, charges)
272
+
273
+ if include_bonds:
274
+ array.bonds = _create_bond_list(
275
+ model, file["bondAtomList"], file["bondOrderList"],
276
+ start_i, stop_i, file["numAtoms"], group_list, res_type_i,
277
+ atoms_per_res, res_per_chain, chains_per_model
278
+ )
279
+
280
+ # Get box
281
+ if "unitCell" in file:
282
+ a_len, b_len, c_len, alpha, beta, gamma = file["unitCell"]
283
+ alpha = np.deg2rad(alpha)
284
+ beta = np.deg2rad(beta )
285
+ gamma = np.deg2rad(gamma)
286
+ box = vectors_from_unitcell(
287
+ a_len, b_len, c_len, alpha, beta, gamma
288
+ )
289
+ if isinstance(array, AtomArrayStack):
290
+ array.box = np.repeat(
291
+ box[np.newaxis, ...], array.stack_depth(), axis=0
292
+ )
293
+ else:
294
+ # AtomArray
295
+ array.box = box
296
+
297
+
298
+ # Filter altloc IDs and return
299
+ if altloc_ids is None:
300
+ return array
301
+ elif altloc == "occupancy" and occupancy is not None:
302
+ return array[
303
+ ...,
304
+ filter_highest_occupancy_altloc(array, altloc_ids, occupancy)
305
+ ]
306
+ # 'first' is also fallback if file has no occupancy information
307
+ elif altloc == "first":
308
+ return array[..., filter_first_altloc(array, altloc_ids)]
309
+ elif altloc == "all":
310
+ array.set_annotation("altloc_id", altloc_ids)
311
+ return array
312
+ else:
313
+ raise ValueError(f"'{altloc}' is not a valid 'altloc' option")
314
+
315
+
316
+ def _get_model_lengths(int32[:] res_type_i,
317
+ int32[:] chains_per_model,
318
+ int32[:] res_per_chain,
319
+ int32[:] atoms_per_res):
320
+ cdef int[:] model_lengths = np.zeros(len(chains_per_model), np.int32)
321
+ cdef int atom_count = 0
322
+ cdef int model_i = 0
323
+ cdef int chain_i = 0
324
+ cdef int res_i
325
+ cdef int res_count_in_chain = 0
326
+ cdef int chain_count_in_model = 0
327
+ # The length of 'res_type_i'
328
+ # is equal to the total number of residues
329
+ for res_i in range(res_type_i.shape[0]):
330
+ atom_count += atoms_per_res[res_type_i[res_i]]
331
+ res_count_in_chain += 1
332
+ if res_count_in_chain == res_per_chain[chain_i]:
333
+ # Chain is full -> Bump chain index and reset residue count
334
+ res_count_in_chain = 0
335
+ chain_i += 1
336
+ chain_count_in_model += 1
337
+ if chain_count_in_model == chains_per_model[model_i]:
338
+ # Model is full -> Bump model index and reset chain count
339
+ chain_count_in_model = 0
340
+ model_lengths[model_i] = atom_count
341
+ # Restart counting for the next model
342
+ atom_count = 0
343
+ model_i += 1
344
+ return np.asarray(model_lengths)
345
+
346
+
347
+ def _fill_annotations(int model, array,
348
+ bint extra_charge,
349
+ np.ndarray chain_names,
350
+ int32[:] chains_per_model,
351
+ int32[:] res_per_chain,
352
+ int32[:] res_type_i,
353
+ int32[:] res_ids,
354
+ np.ndarray res_inscodes,
355
+ np.ndarray atoms_per_res,
356
+ np.ndarray res_names,
357
+ np.ndarray hetero_res,
358
+ np.ndarray atom_names,
359
+ np.ndarray elements,
360
+ np.ndarray charges):
361
+ # Get annotation arrays from atom array (stack)
362
+ cdef np.ndarray chain_id = array.chain_id
363
+ cdef np.ndarray res_id = array.res_id
364
+ cdef np.ndarray ins_code = array.ins_code
365
+ cdef np.ndarray res_name = array.res_name
366
+ cdef np.ndarray hetero = array.hetero
367
+ cdef np.ndarray atom_name = array.atom_name
368
+ cdef np.ndarray element = array.element
369
+ if extra_charge:
370
+ charge = array.charge
371
+
372
+ cdef int model_i = 0
373
+ cdef int chain_i = 0
374
+ cdef int res_i
375
+ cdef int atom_i = 0
376
+ cdef int res_count_in_chain = 0
377
+ cdef int chain_count_in_model = 0
378
+ cdef int atom_index_in_res
379
+
380
+ cdef chain_id_for_chain
381
+ cdef res_name_for_res
382
+ cdef inscode_for_res
383
+ cdef bint hetero_for_res
384
+ cdef int res_id_for_res
385
+ cdef int type_i
386
+
387
+ # The length of 'res_type_i'
388
+ # is equal to the total number of residues
389
+ for res_i in range(res_type_i.shape[0]):
390
+ # Wait for the data of the given model
391
+ if model_i == model-1:
392
+ chain_id_for_chain = chain_names[chain_i]
393
+ res_id_for_res = res_ids[res_i]
394
+ if res_inscodes is not None:
395
+ inscode_for_res = res_inscodes[res_i]
396
+ type_i = res_type_i[res_i]
397
+ res_name_for_res = res_names[type_i]
398
+ hetero_for_res = hetero_res[type_i]
399
+
400
+ for atom_index_in_res in range(atoms_per_res[type_i]):
401
+ chain_id[atom_i] = chain_id_for_chain
402
+ res_id[atom_i] = res_id_for_res
403
+ ins_code[atom_i] = inscode_for_res
404
+ hetero[atom_i] = hetero_for_res
405
+ res_name[atom_i] = res_name_for_res
406
+ atom_name[atom_i] = atom_names[type_i][atom_index_in_res]
407
+ element[atom_i] = elements[type_i][atom_index_in_res].upper()
408
+ if extra_charge:
409
+ charge[atom_i] = charges[type_i][atom_index_in_res]
410
+ atom_i += 1
411
+
412
+ elif model_i > model-1:
413
+ # The given model has already been parsed
414
+ # -> parsing is finished
415
+ break
416
+
417
+ res_count_in_chain += 1
418
+ if res_count_in_chain == res_per_chain[chain_i]:
419
+ # Chain is full -> Bump chain index and reset residue count
420
+ res_count_in_chain = 0
421
+ chain_i += 1
422
+ chain_count_in_model += 1
423
+ if chain_count_in_model == chains_per_model[model_i]:
424
+ # Model is full -> Bump model index and reset chain count
425
+ chain_count_in_model = 0
426
+ model_i += 1
427
+
428
+
429
+ def _create_bond_list(int model, np.ndarray bonds, np.ndarray bond_types,
430
+ int model_start, int model_stop, int atom_count,
431
+ list group_list, int32[:] res_type_i,
432
+ int32[:] atoms_per_res,
433
+ int32[:] res_per_chain, int32[:] chains_per_model):
434
+ cdef int i=0, j=0
435
+
436
+ # Determine per-residue-count and maximum count
437
+ # of bonds in each residue
438
+ cdef int32[:] bonds_per_res = np.zeros(len(group_list), dtype=np.int32)
439
+ for i in range(len(group_list)):
440
+ bonds_per_res[i] = len(group_list[i]["bondOrderList"])
441
+ cdef int32 max_bonds_per_res = np.max(bonds_per_res)
442
+
443
+ # Create arrays for intra-residue bonds and bond types
444
+ cdef np.ndarray intra_bonds = np.zeros(
445
+ (len(group_list), max_bonds_per_res, 3), dtype=np.uint32
446
+ )
447
+ # Dictionary for groupList entry
448
+ cdef dict residue
449
+ # Fill the array
450
+ for i in range(len(group_list)):
451
+ residue = group_list[i]
452
+ bonds_in_residue = np.array(residue["bondAtomList"], dtype=np.uint32)
453
+ intra_bonds[i, :bonds_per_res[i], :2] = \
454
+ np.array(residue["bondAtomList"], dtype=np.uint32).reshape((-1, 2))
455
+ intra_bonds[i, :bonds_per_res[i], 2] = residue["bondOrderList"]
456
+
457
+ # Unify intra-residue bonds to one BondList
458
+ cdef int model_i = 0
459
+ cdef int chain_i = 0
460
+ cdef int res_i
461
+ cdef int res_count_in_chain = 0
462
+ cdef int chain_count_in_model = 0
463
+ cdef int type_i
464
+ intra_bond_list = BondList(0)
465
+ # The length of 'res_type_i'
466
+ # is equal to the total number of residues
467
+ for res_i in range(res_type_i.shape[0]):
468
+ # Wait for the data of the given model
469
+ if model_i == model-1:
470
+ type_i = res_type_i[res_i]
471
+ bond_list_per_res = BondList(
472
+ atoms_per_res[type_i],
473
+ intra_bonds[type_i, :bonds_per_res[type_i]]
474
+ )
475
+ intra_bond_list += bond_list_per_res
476
+
477
+ elif model_i > model-1:
478
+ # The given model has already been parsed
479
+ # -> parsing is finished
480
+ break
481
+
482
+ res_count_in_chain += 1
483
+ if res_count_in_chain == res_per_chain[chain_i]:
484
+ # Chain is full -> Bump chain index and reset residue count
485
+ res_count_in_chain = 0
486
+ chain_i += 1
487
+ chain_count_in_model += 1
488
+ if chain_count_in_model == chains_per_model[model_i]:
489
+ # Model is full -> Bump model index and reset chain count
490
+ chain_count_in_model = 0
491
+ model_i += 1
492
+
493
+ # Add inter-residue bonds to BondList
494
+ cdef np.ndarray inter_bonds = np.zeros((len(bond_types), 3),
495
+ dtype=np.uint32)
496
+ inter_bonds[:,:2] = bonds.reshape((len(bond_types), 2))
497
+ inter_bonds[:,2] = bond_types
498
+ inter_bond_list = BondList(atom_count, inter_bonds)
499
+ inter_bond_list = inter_bond_list[model_start : model_stop]
500
+ global_bond_list = inter_bond_list.merge(intra_bond_list)
501
+ return global_bond_list
@@ -0,0 +1,152 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.structure.io.mmtf"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["decode_array"]
8
+
9
+ cimport cython
10
+ cimport numpy as np
11
+
12
+ import numpy as np
13
+
14
+ ctypedef np.int8_t int8
15
+ ctypedef np.int16_t int16
16
+ ctypedef np.int32_t int32
17
+ ctypedef np.uint8_t uint8
18
+ ctypedef np.uint16_t uint16
19
+ ctypedef np.uint32_t uint32
20
+ ctypedef np.uint64_t uint64
21
+ ctypedef np.float32_t float32
22
+
23
+
24
+ def decode_array(int codec, bytes raw_bytes, int param):
25
+ cdef np.ndarray array
26
+ # Pass-through: 32-bit floating-point number array
27
+ if codec == 1:
28
+ array = np.frombuffer(raw_bytes, dtype=">f4").astype(np.float32)
29
+ return array
30
+ # Pass-through: 8-bit signed integer array
31
+ elif codec == 2:
32
+ array = np.frombuffer(raw_bytes, dtype=">i1").astype(np.int8)
33
+ return array
34
+ # Pass-through: 16-bit signed integer array
35
+ elif codec == 3:
36
+ array = np.frombuffer(raw_bytes, dtype=">i2").astype(np.int16)
37
+ return array
38
+ # Pass-through: 32-bit signed integer array
39
+ elif codec == 4:
40
+ array = np.frombuffer(raw_bytes, dtype=">i4").astype(np.int32)
41
+ return array
42
+ # UTF8/ASCII fixed-length string array
43
+ elif codec == 5:
44
+ array = np.frombuffer(raw_bytes, np.dtype("S" + str(param)))
45
+ return array.astype(np.dtype("U" + str(param)))
46
+ # Run-length encoded character array
47
+ elif codec == 6:
48
+ array = np.frombuffer(raw_bytes, dtype=">i4").astype(np.int32)
49
+ return np.frombuffer(_decode_run_length(array), dtype="U1")
50
+ # Run-length encoded 32-bit signed integer array
51
+ elif codec == 7:
52
+ array = np.frombuffer(raw_bytes, dtype=">i4").astype(np.int32)
53
+ return _decode_run_length(array)
54
+ # Delta & run-length encoded 32-bit signed integer array
55
+ elif codec == 8:
56
+ array = np.frombuffer(raw_bytes, dtype=">i4").astype(np.int32)
57
+ return _decode_delta(
58
+ _decode_run_length(array))
59
+ # Integer & run-length encoded 32-bit floating-point number array
60
+ elif codec == 9:
61
+ array = np.frombuffer(raw_bytes, dtype=">i4").astype(np.int32)
62
+ return _decode_integer(param,
63
+ _decode_run_length(array))
64
+ # Integer & delta encoded
65
+ # & two-byte-packed 32-bit floating-point number array
66
+ elif codec == 10:
67
+ array = np.frombuffer(raw_bytes, dtype=">i2").astype(np.int16)
68
+ return _decode_integer(param,
69
+ _decode_delta(
70
+ _decode_packed(array)))
71
+ # Integer encoded 32-bit floating-point number array
72
+ elif codec == 11:
73
+ array = np.frombuffer(raw_bytes, dtype=">i2").astype(np.int16)
74
+ return _decode_integer(param, array)
75
+ # Integer & two-byte-packed 32-bit floating-point number array
76
+ elif codec == 12:
77
+ array = np.frombuffer(raw_bytes, dtype=">i2").astype(np.int16)
78
+ return _decode_integer(param,
79
+ _decode_packed(array))
80
+ # Integer & one-byte-packed 32-bit floating-point number array
81
+ elif codec == 13:
82
+ array = np.frombuffer(raw_bytes, dtype=">i1").astype(np.int8)
83
+ return _decode_integer(param,
84
+ _decode_packed(array))
85
+ # Two-byte-packed 32-bit signed integer array
86
+ elif codec == 14:
87
+ array = np.frombuffer(raw_bytes, dtype=">i2").astype(np.int16)
88
+ return _decode_packed(array)
89
+ # One-byte-packed 32-bit signed integer array
90
+ elif codec == 15:
91
+ array = np.frombuffer(raw_bytes, dtype=">i1").astype(np.int8)
92
+ return _decode_packed(array)
93
+ else:
94
+ raise ValueError("Unknown codec with ID {codec}")
95
+
96
+
97
+ def _decode_delta(np.ndarray array):
98
+ return np.cumsum(array, dtype=np.int32)
99
+
100
+
101
+ def _decode_run_length(int32[:] array):
102
+ cdef int length = 0
103
+ cdef int i, j
104
+ cdef int value, repeat
105
+ # Determine length of output array by summing the run lengths
106
+ for i in range(1, array.shape[0], 2):
107
+ length += array[i]
108
+ cdef int32[:] output = np.zeros(length, dtype=np.int32)
109
+ # Fill output array
110
+ j = 0
111
+ for i in range(0, array.shape[0], 2):
112
+ value = array[i]
113
+ repeat = array[i+1]
114
+ output[j : j+repeat] = value
115
+ j += repeat
116
+ return np.asarray(output)
117
+
118
+
119
+ ctypedef fused PackedType:
120
+ int8
121
+ int16
122
+ def _decode_packed(PackedType[:] array):
123
+ cdef int min_val, max_val
124
+ if PackedType is int8:
125
+ min_val = np.iinfo(np.int8).min
126
+ max_val = np.iinfo(np.int8).max
127
+ else:
128
+ min_val = np.iinfo(np.int16).min
129
+ max_val = np.iinfo(np.int16).max
130
+ cdef int i, j
131
+ cdef int packed_val, unpacked_val
132
+ # Pessimistic size assumption:
133
+ # The maximum output array length is the input array length
134
+ # in case all values are within the type limits
135
+ cdef int32[:] output = np.zeros(array.shape[0], dtype=np.int32)
136
+ j = 0
137
+ unpacked_val = 0
138
+ for i in range(array.shape[0]):
139
+ packed_val = array[i]
140
+ if packed_val == max_val or packed_val == min_val:
141
+ unpacked_val += packed_val
142
+ else:
143
+ unpacked_val += packed_val
144
+ output[j] = unpacked_val
145
+ unpacked_val = 0
146
+ j += 1
147
+ # Trim to correct size and return
148
+ return np.asarray(output[:j])
149
+
150
+
151
+ def _decode_integer(int divisor, np.ndarray array):
152
+ return np.divide(array, divisor, dtype=np.float32)