biotite 1.5.0__cp314-cp314-macosx_10_13_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +216 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +197 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +60 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +228 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +258 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +161 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +126 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1228 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +490 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +702 -0
  73. biotite/sequence/align/banded.cpython-314-darwin.so +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cpython-314-darwin.so +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cpython-314-darwin.so +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cpython-314-darwin.so +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cpython-314-darwin.so +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cpython-314-darwin.so +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cpython-314-darwin.so +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cpython-314-darwin.so +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cpython-314-darwin.so +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cpython-314-darwin.so +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cpython-314-darwin.so +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cpython-314-darwin.so +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +283 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cpython-314-darwin.so +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cpython-314-darwin.so +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cpython-314-darwin.so +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1562 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cpython-314-darwin.so +0 -0
  266. biotite/structure/bonds.pyx +2036 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cpython-314-darwin.so +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +310 -0
  271. biotite/structure/charges.cpython-314-darwin.so +0 -0
  272. biotite/structure/charges.pyx +520 -0
  273. biotite/structure/compare.py +683 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +591 -0
  278. biotite/structure/geometry.py +817 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +425 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +98 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +389 -0
  311. biotite/structure/io/pdb/file.py +1380 -0
  312. biotite/structure/io/pdb/hybrid36.cpython-314-darwin.so +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +674 -0
  319. biotite/structure/io/pdbx/cif.py +1091 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +362 -0
  322. biotite/structure/io/pdbx/convert.py +2113 -0
  323. biotite/structure/io/pdbx/encoding.cpython-314-darwin.so +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1078 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +716 -0
  337. biotite/structure/rings.py +451 -0
  338. biotite/structure/sasa.cpython-314-darwin.so +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +328 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +160 -0
  349. biotite/version.py +34 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.5.0.dist-info/METADATA +162 -0
  352. biotite-1.5.0.dist-info/RECORD +354 -0
  353. biotite-1.5.0.dist-info/WHEEL +6 -0
  354. biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,1091 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.structure.io.pdbx"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["CIFFile", "CIFBlock", "CIFCategory", "CIFColumn", "CIFData"]
8
+
9
+ import itertools
10
+ from collections.abc import MutableMapping, Sequence
11
+ import numpy as np
12
+ from biotite.file import (
13
+ DeserializationError,
14
+ File,
15
+ SerializationError,
16
+ is_open_compatible,
17
+ is_text,
18
+ )
19
+ from biotite.structure.io.pdbx.component import MaskValue, _Component
20
+
21
+ UNICODE_CHAR_SIZE = 4
22
+
23
+
24
+ # Small class without much functionality
25
+ # It exists merely for consistency with BinaryCIFFile
26
+ class CIFData:
27
+ """
28
+ This class represents the data in a :class:`CIFColumn`.
29
+
30
+ Parameters
31
+ ----------
32
+ array : array_like or int or float or str
33
+ The data array to be stored.
34
+ If a single item is given, it is converted into an array.
35
+ dtype : dtype-like, optional
36
+ If given, the *dtype* the stored array should be converted to.
37
+
38
+ Attributes
39
+ ----------
40
+ array : ndarray
41
+ The stored data array.
42
+
43
+ Notes
44
+ -----
45
+ When a :class:`CIFFile` is written, the data type is automatically
46
+ converted to string.
47
+ The other way around, when a :class:`CIFFile` is read, the data type
48
+ is always a string type.
49
+
50
+ Examples
51
+ --------
52
+
53
+ >>> data = CIFData([1, 2, 3])
54
+ >>> print(data.array)
55
+ [1 2 3]
56
+ >>> print(len(data))
57
+ 3
58
+ >>> # A single item is converted into an array
59
+ >>> data = CIFData("apple")
60
+ >>> print(data.array)
61
+ ['apple']
62
+ """
63
+
64
+ def __init__(self, array, dtype=None):
65
+ self._array = _arrayfy(array)
66
+ if np.issubdtype(self._array.dtype, np.object_):
67
+ raise ValueError("Object arrays are not supported")
68
+ if dtype is not None:
69
+ self._array = self._array.astype(dtype)
70
+
71
+ @property
72
+ def array(self):
73
+ return self._array
74
+
75
+ @staticmethod
76
+ def subcomponent_class():
77
+ return None
78
+
79
+ @staticmethod
80
+ def supercomponent_class():
81
+ return CIFColumn
82
+
83
+ def __len__(self):
84
+ return len(self._array)
85
+
86
+ def __eq__(self, other):
87
+ if not isinstance(other, type(self)):
88
+ return False
89
+ return np.array_equal(self._array, other._array)
90
+
91
+
92
+ class CIFColumn:
93
+ """
94
+ This class represents a single column in a :class:`CIFCategory`.
95
+
96
+ Parameters
97
+ ----------
98
+ data : CIFData or array_like or int or float or str
99
+ The data to be stored.
100
+ If no :class:`CIFData` is given, the passed argument is
101
+ coerced into such an object.
102
+ mask : CIFData or array_like, dtype=int or int
103
+ The mask to be stored.
104
+ If given, the mask indicates whether the `data` is
105
+ inapplicable (``.``) or missing (``?``) in some rows.
106
+ The data presence is indicated by values from the
107
+ :class:`MaskValue` enum.
108
+ If no :class:`CIFData` is given, the passed argument is
109
+ coerced into such an object.
110
+ By default, no mask is created.
111
+
112
+ Attributes
113
+ ----------
114
+ data : CIFData
115
+ The stored data.
116
+ mask : CIFData
117
+ The mask that indicates whether certain data elements are
118
+ inapplicable or missing.
119
+ If no mask is present, this attribute is ``None``.
120
+
121
+ Examples
122
+ --------
123
+
124
+ >>> print(CIFColumn([1, 2, 3]).as_array())
125
+ ['1' '2' '3']
126
+ >>> mask = [MaskValue.PRESENT, MaskValue.INAPPLICABLE, MaskValue.MISSING]
127
+ >>> print(CIFColumn([1, 2, 3], mask).as_array())
128
+ ['1' '.' '?']
129
+ >>> print(CIFColumn([1]).as_item())
130
+ 1
131
+ >>> print(CIFColumn([1], mask=[MaskValue.MISSING]).as_item())
132
+ ?
133
+ """
134
+
135
+ def __init__(self, data, mask=None):
136
+ if not isinstance(data, CIFData):
137
+ data = CIFData(data, str)
138
+ if mask is None:
139
+ mask = np.full(len(data), MaskValue.PRESENT, dtype=np.uint8)
140
+ mask[data.array == "."] = MaskValue.INAPPLICABLE
141
+ mask[data.array == "?"] = MaskValue.MISSING
142
+ if np.all(mask == MaskValue.PRESENT):
143
+ # No mask required
144
+ mask = None
145
+ else:
146
+ mask = CIFData(mask)
147
+ else:
148
+ if not isinstance(mask, CIFData):
149
+ mask = CIFData(mask, np.uint8)
150
+ if len(mask) != len(data):
151
+ raise IndexError(
152
+ f"Data has length {len(data)}, but mask has length {len(mask)}"
153
+ )
154
+ self._data = data
155
+ self._mask = mask
156
+
157
+ @property
158
+ def data(self):
159
+ return self._data
160
+
161
+ @property
162
+ def mask(self):
163
+ return self._mask
164
+
165
+ @staticmethod
166
+ def subcomponent_class():
167
+ return CIFData
168
+
169
+ @staticmethod
170
+ def supercomponent_class():
171
+ return CIFCategory
172
+
173
+ def as_item(self):
174
+ """
175
+ Get the only item in the data of this column.
176
+
177
+ If the data is masked as inapplicable or missing, ``'.'`` or
178
+ ``'?'`` is returned, respectively.
179
+ If the data contains more than one item, an exception is raised.
180
+
181
+ Returns
182
+ -------
183
+ item : str
184
+ The item in the data.
185
+ """
186
+ if self._mask is None:
187
+ return self._data.array.item()
188
+ mask = self._mask.array.item()
189
+ if self._mask is None or mask == MaskValue.PRESENT:
190
+ item = self._data.array.item()
191
+ # Limit float precision to 3 decimals
192
+ if isinstance(item, float):
193
+ return f"{item:.3f}"
194
+ else:
195
+ return str(item)
196
+ elif mask == MaskValue.INAPPLICABLE:
197
+ return "."
198
+ elif mask == MaskValue.MISSING:
199
+ return "?"
200
+
201
+ def as_array(self, dtype=str, masked_value=None):
202
+ """
203
+ Get the data of this column as an :class:`ndarray`.
204
+
205
+ This is a shortcut to get ``CIFColumn.data.array``.
206
+ Furthermore, the mask is applied to the data.
207
+
208
+ Parameters
209
+ ----------
210
+ dtype : dtype-like, optional
211
+ The data type the array should be converted to.
212
+ By default, a string type is used.
213
+ masked_value : str, optional
214
+ The value that should be used for masked elements, i.e.
215
+ ``MaskValue.INAPPLICABLE`` or ``MaskValue.MISSING``.
216
+ By default, masked elements are converted to ``'.'`` or
217
+ ``'?'`` depending on the :class:`MaskValue`.
218
+
219
+ Returns
220
+ -------
221
+ array : ndarray
222
+ The column data as array.
223
+ """
224
+ if self._mask is None:
225
+ return self._data.array.astype(dtype, copy=False)
226
+
227
+ elif np.issubdtype(dtype, np.str_):
228
+ # Limit float precision to 3 decimals
229
+ if np.issubdtype(self._data.array.dtype, np.floating):
230
+ array = np.array([f"{e:.3f}" for e in self._data.array], type=dtype)
231
+ else:
232
+ # Copy, as otherwise original data would be overwritten
233
+ # with mask values
234
+ array = self._data.array.astype(dtype, copy=True)
235
+ if masked_value is None:
236
+ array[self._mask.array == MaskValue.INAPPLICABLE] = "."
237
+ array[self._mask.array == MaskValue.MISSING] = "?"
238
+ else:
239
+ array[self._mask.array == MaskValue.INAPPLICABLE] = masked_value
240
+ array[self._mask.array == MaskValue.MISSING] = masked_value
241
+ return array
242
+
243
+ else:
244
+ # Array needs to be converted, but masked values are
245
+ # not necessarily convertible
246
+ # (e.g. '.' cannot be converted to int)
247
+ if masked_value is None:
248
+ array = np.zeros(len(self._data), dtype=dtype)
249
+ else:
250
+ array = np.full(len(self._data), masked_value, dtype=dtype)
251
+
252
+ present_mask = self._mask.array == MaskValue.PRESENT
253
+ array[present_mask] = self._data.array[present_mask].astype(dtype)
254
+ return array
255
+
256
+ def __len__(self):
257
+ return len(self._data)
258
+
259
+ def __eq__(self, other):
260
+ if not isinstance(other, type(self)):
261
+ return False
262
+ if self._data != other._data:
263
+ return False
264
+ if self._mask != other._mask:
265
+ return False
266
+ return True
267
+
268
+
269
+ class CIFCategory(_Component, MutableMapping):
270
+ """
271
+ This class represents a category in a :class:`CIFBlock`.
272
+
273
+ Columns can be accessed and modified like a dictionary.
274
+ The values are :class:`CIFColumn` objects.
275
+
276
+ Parameters
277
+ ----------
278
+ columns : dict, optional
279
+ The columns of the category.
280
+ The keys are the column names and the values are the
281
+ :class:`CIFColumn` objects (or objects that can be coerced into
282
+ a :class:`CIFColumn`).
283
+ By default, an empty category is created.
284
+ Each column must have the same length.
285
+ name : str, optional
286
+ The name of the category.
287
+ This is only used for serialization and is automatically set,
288
+ when the :class:`CIFCategory` is added to a :class:`CIFBlock`.
289
+ It only needs to be set manually, when the category is directly
290
+ serialized.
291
+
292
+ Attributes
293
+ ----------
294
+ name : str
295
+ The name of the category.
296
+ row_count : int
297
+ The number of rows in the category, i.e. the length of each
298
+ column.
299
+
300
+ Notes
301
+ -----
302
+ When a column containing strings with line breaks are added, these
303
+ strings are written as multiline strings to the CIF file.
304
+
305
+ Examples
306
+ --------
307
+
308
+ >>> # Add column on creation
309
+ >>> category = CIFCategory({"fruit": ["apple", "banana"]}, name="fruits")
310
+ >>> # Add column later on
311
+ >>> category["taste"] = ["delicious", "tasty"]
312
+ >>> # Add column the formal way
313
+ >>> category["color"] = CIFColumn(CIFData(["red", "yellow"]))
314
+ >>> # Access a column
315
+ >>> print(category["fruit"].as_array())
316
+ ['apple' 'banana']
317
+ >>> print(category.serialize())
318
+ loop_
319
+ _fruits.fruit
320
+ _fruits.taste
321
+ _fruits.color
322
+ apple delicious red
323
+ banana tasty yellow
324
+ """
325
+
326
+ def __init__(self, columns=None, name=None):
327
+ self._name = name
328
+ if columns is None:
329
+ columns = {}
330
+ else:
331
+ columns = {
332
+ key: CIFColumn(col) if not isinstance(col, CIFColumn) else col
333
+ for key, col in columns.items()
334
+ }
335
+
336
+ self._row_count = None
337
+ self._columns = columns
338
+
339
+ @property
340
+ def name(self):
341
+ return self._name
342
+
343
+ @name.setter
344
+ def name(self, name):
345
+ self._name = name
346
+
347
+ @property
348
+ def row_count(self):
349
+ if self._row_count is None:
350
+ # Row count is not determined yet
351
+ # -> check the length of the first column
352
+ self._row_count = len(next(iter(self.values())))
353
+ return self._row_count
354
+
355
+ @staticmethod
356
+ def subcomponent_class():
357
+ return CIFColumn
358
+
359
+ @staticmethod
360
+ def supercomponent_class():
361
+ return CIFBlock
362
+
363
+ @staticmethod
364
+ def deserialize(text):
365
+ lines = [line.strip() for line in text.splitlines() if not _is_empty(line)]
366
+
367
+ if _is_loop_start(lines[0]):
368
+ is_looped = True
369
+ lines.pop(0)
370
+ else:
371
+ is_looped = False
372
+
373
+ category_name = _parse_category_name(lines[0])
374
+ if category_name is None:
375
+ raise DeserializationError("Failed to parse category name")
376
+
377
+ lines = _to_single(lines)
378
+ if is_looped:
379
+ category_dict = CIFCategory._deserialize_looped(lines)
380
+ else:
381
+ category_dict = CIFCategory._deserialize_single(lines)
382
+ return CIFCategory(category_dict, category_name)
383
+
384
+ def serialize(self):
385
+ if self._name is None:
386
+ raise SerializationError("Category name is required")
387
+ if not self._columns:
388
+ raise ValueError("At least one column is required")
389
+
390
+ for column_name, column in self.items():
391
+ if self._row_count is None:
392
+ self._row_count = len(column)
393
+ elif len(column) != self._row_count:
394
+ raise SerializationError(
395
+ f"All columns must have the same length, "
396
+ f"but '{column_name}' has length {len(column)}, "
397
+ f"while the first column has row_count {self._row_count}"
398
+ )
399
+
400
+ if self._row_count == 0:
401
+ raise ValueError("At least one row is required")
402
+ elif self._row_count == 1:
403
+ lines = self._serialize_single()
404
+ else:
405
+ lines = self._serialize_looped()
406
+ # Enforce terminal line break
407
+ lines.append("")
408
+ return "\n".join(lines)
409
+
410
+ def __getitem__(self, key):
411
+ return self._columns[key]
412
+
413
+ def __setitem__(self, key, column):
414
+ if not isinstance(column, CIFColumn):
415
+ column = CIFColumn(column)
416
+ self._columns[key] = column
417
+
418
+ def __delitem__(self, key):
419
+ if len(self._columns) == 1:
420
+ raise ValueError("At least one column must remain")
421
+ del self._columns[key]
422
+
423
+ def __contains__(self, key):
424
+ return key in self._columns
425
+
426
+ def __iter__(self):
427
+ return iter(self._columns)
428
+
429
+ def __len__(self):
430
+ return len(self._columns)
431
+
432
+ def __eq__(self, other):
433
+ # Row count can be omitted here, as it is based on the columns
434
+ if not isinstance(other, type(self)):
435
+ return False
436
+ if set(self.keys()) != set(other.keys()):
437
+ return False
438
+ for col_name in self.keys():
439
+ if self[col_name] != other[col_name]:
440
+ return False
441
+ return True
442
+
443
+ @staticmethod
444
+ def _deserialize_single(lines):
445
+ """
446
+ Process a category where each field has a single value.
447
+ """
448
+ category_dict = {}
449
+ line_i = 0
450
+ while line_i < len(lines):
451
+ line = lines[line_i]
452
+ parts = list(_split_one_line(line))
453
+ if len(parts) == 2:
454
+ # Standard case -> name and value in one line
455
+ name_part, value_part = parts
456
+ line_i += 1
457
+ elif len(parts) == 1:
458
+ # Value is a multiline value on the next line
459
+ name_part = parts[0]
460
+ parts = list(_split_one_line(lines[line_i + 1]))
461
+ if len(parts) == 1:
462
+ value_part = parts[0]
463
+ else:
464
+ raise DeserializationError(f"Failed to parse line '{line}'")
465
+ line_i += 2
466
+ elif len(parts) == 0:
467
+ raise DeserializationError("Empty line within category")
468
+ else:
469
+ raise DeserializationError(f"Failed to parse line '{line}'")
470
+ category_dict[name_part.split(".")[1]] = CIFColumn(value_part)
471
+ return category_dict
472
+
473
+ @staticmethod
474
+ def _deserialize_looped(lines):
475
+ """
476
+ Process a category where each field has multiple values
477
+ (category is a table).
478
+ """
479
+ category_dict = {}
480
+ column_names = []
481
+ i = 0
482
+ for key_line in lines:
483
+ if key_line[0] == "_":
484
+ # Key line
485
+ key = key_line.split(".")[1]
486
+ column_names.append(key)
487
+ category_dict[key] = []
488
+ i += 1
489
+ else:
490
+ break
491
+
492
+ data_lines = lines[i:]
493
+ # Rows may be split over multiple lines -> do not rely on
494
+ # row-line-alignment at all and simply cycle through columns
495
+ column_indices = itertools.cycle(range(len(column_names)))
496
+ for data_line in data_lines:
497
+ values = _split_one_line(data_line)
498
+ for val in values:
499
+ column_index = next(column_indices)
500
+ column_name = column_names[column_index]
501
+ category_dict[column_name].append(val)
502
+
503
+ # Check if all columns have the same length
504
+ # Otherwise, this would indicate a parsing error or an invalid CIF file
505
+ column_index = next(column_indices)
506
+ if column_index != 0:
507
+ raise DeserializationError(
508
+ "Category contains columns with different lengths"
509
+ )
510
+
511
+ return category_dict
512
+
513
+ def _serialize_single(self):
514
+ keys = ["_" + self._name + "." + name for name in self.keys()]
515
+ max_len = max(len(key) for key in keys)
516
+ # "+3" Because of three whitespace chars after longest key
517
+ req_len = max_len + 3
518
+ return [
519
+ # Remove potential terminal newlines from multiline values
520
+ (key.ljust(req_len) + _escape(column.as_item())).strip()
521
+ for key, column in zip(keys, self.values())
522
+ ]
523
+
524
+ def _serialize_looped(self):
525
+ key_lines = ["_" + self._name + "." + key + " " for key in self.keys()]
526
+
527
+ column_arrays = []
528
+ for column in self.values():
529
+ array = column.as_array(str)
530
+ # Quote before measuring the number of chars,
531
+ # as the quote characters modify the length
532
+ array = np.array([_escape(element) for element in array])
533
+ column_arrays.append(array)
534
+
535
+ # Number of characters the longest string in the column needs
536
+ # This can be deduced from the dtype
537
+ # The "+1" is for the small whitespace column
538
+ column_n_chars = [
539
+ array.dtype.itemsize // UNICODE_CHAR_SIZE + 1 for array in column_arrays
540
+ ]
541
+ value_lines = [""] * self._row_count
542
+ for i in range(self._row_count):
543
+ for j, array in enumerate(column_arrays):
544
+ value_lines[i] += array[i].ljust(column_n_chars[j])
545
+ # Remove trailing justification of last column
546
+ # and potential terminal newlines from multiline values
547
+ value_lines[i] = value_lines[i].strip()
548
+
549
+ return ["loop_"] + key_lines + value_lines
550
+
551
+
552
+ class CIFBlock(_Component, MutableMapping):
553
+ """
554
+ This class represents a block in a :class:`CIFFile`.
555
+
556
+ Categories can be accessed and modified like a dictionary.
557
+ The values are :class:`CIFCategory` objects.
558
+
559
+ Parameters
560
+ ----------
561
+ categories : dict, optional
562
+ The categories of the block.
563
+ The keys are the category names and the values are the
564
+ :class:`CIFCategory` objects.
565
+ By default, an empty block is created.
566
+ name : str, optional
567
+ The name of the block.
568
+ This is only used for serialization and is automatically set,
569
+ when the :class:`CIFBlock` is added to a :class:`CIFFile`.
570
+ It only needs to be set manually, when the block is directly
571
+ serialized.
572
+
573
+ Attributes
574
+ ----------
575
+ name : str
576
+ The name of the block.
577
+
578
+ Notes
579
+ -----
580
+ The category names do not include the leading underscore character.
581
+ This character is automatically added when the category is
582
+ serialized.
583
+
584
+ Examples
585
+ --------
586
+
587
+ >>> # Add category on creation
588
+ >>> block = CIFBlock({"foo": CIFCategory({"some_column": 1})}, name="baz")
589
+ >>> # Add category later on
590
+ >>> block["bar"] = CIFCategory({"another_column": [2, 3]})
591
+ >>> # Access a column
592
+ >>> print(block["bar"]["another_column"].as_array())
593
+ ['2' '3']
594
+ >>> print(block.serialize())
595
+ data_baz
596
+ #
597
+ _foo.some_column 1
598
+ #
599
+ loop_
600
+ _bar.another_column
601
+ 2
602
+ 3
603
+ #
604
+ """
605
+
606
+ def __init__(self, categories=None, name=None):
607
+ self._name = name
608
+ if categories is None:
609
+ categories = {}
610
+ self._categories = categories
611
+
612
+ @property
613
+ def name(self):
614
+ return self._name
615
+
616
+ @name.setter
617
+ def name(self, name):
618
+ self._name = name
619
+
620
+ @staticmethod
621
+ def subcomponent_class():
622
+ return CIFCategory
623
+
624
+ @staticmethod
625
+ def supercomponent_class():
626
+ return CIFFile
627
+
628
+ @staticmethod
629
+ def deserialize(text):
630
+ lines = text.splitlines()
631
+ current_category_name = None
632
+ category_starts = []
633
+ category_names = []
634
+ for i, line in enumerate(lines):
635
+ if not _is_empty(line):
636
+ is_loop_in_line = _is_loop_start(line)
637
+ category_name_in_line = _parse_category_name(line)
638
+ if is_loop_in_line or (
639
+ category_name_in_line != current_category_name
640
+ and category_name_in_line is not None
641
+ ):
642
+ # Track the new category
643
+ if is_loop_in_line:
644
+ # In case of lines with "loop_" the category is
645
+ # in the next line
646
+ category_name_in_line = _parse_category_name(lines[i + 1])
647
+ current_category_name = category_name_in_line
648
+ category_starts.append(i)
649
+ category_names.append(current_category_name)
650
+ return CIFBlock(_create_element_dict(lines, category_names, category_starts))
651
+
652
+ def serialize(self):
653
+ if self._name is None:
654
+ raise SerializationError("Block name is required")
655
+ # The block starts with the black name line followed by a comment line
656
+ text_blocks = ["data_" + self._name + "\n#\n"]
657
+ for category_name, category in self._categories.items():
658
+ if isinstance(category, str):
659
+ # Category is already stored as lines
660
+ text_blocks.append(category)
661
+ else:
662
+ try:
663
+ category.name = category_name
664
+ text_blocks.append(category.serialize())
665
+ except Exception:
666
+ raise SerializationError(
667
+ f"Failed to serialize category '{category_name}'"
668
+ )
669
+ # A comment line is set after each category
670
+ text_blocks.append("#\n")
671
+ return "".join(text_blocks)
672
+
673
+ def __getitem__(self, key):
674
+ category = self._categories[key]
675
+ if isinstance(category, str):
676
+ # Element is stored in serialized form
677
+ # -> must be deserialized first
678
+ try:
679
+ category = CIFCategory.deserialize(category)
680
+ except Exception:
681
+ raise DeserializationError(f"Failed to deserialize category '{key}'")
682
+ # Update with deserialized object
683
+ self._categories[key] = category
684
+ return category
685
+
686
+ def __setitem__(self, key, category):
687
+ if not isinstance(category, CIFCategory):
688
+ raise TypeError(
689
+ f"Expected 'CIFCategory', but got '{type(category).__name__}'"
690
+ )
691
+ category.name = key
692
+ self._categories[key] = category
693
+
694
+ def __delitem__(self, key):
695
+ del self._categories[key]
696
+
697
+ def __contains__(self, key):
698
+ return key in self._categories
699
+
700
+ def __iter__(self):
701
+ return iter(self._categories)
702
+
703
+ def __len__(self):
704
+ return len(self._categories)
705
+
706
+ def __eq__(self, other):
707
+ if not isinstance(other, type(self)):
708
+ return False
709
+ if set(self.keys()) != set(other.keys()):
710
+ return False
711
+ for cat_name in self.keys():
712
+ if self[cat_name] != other[cat_name]:
713
+ return False
714
+ return True
715
+
716
+
717
+ class CIFFile(_Component, File, MutableMapping):
718
+ """
719
+ This class represents a CIF file.
720
+
721
+ The categories of the file can be accessed and modified like a
722
+ dictionary.
723
+ The values are :class:`CIFBlock` objects.
724
+
725
+ To parse or write a structure from/to a :class:`CIFFile` object,
726
+ use the high-level :func:`get_structure()` or
727
+ :func:`set_structure()` function respectively.
728
+
729
+ Parameters
730
+ ----------
731
+ blocks : dict (str -> CIFBlock), optional
732
+ The initial blocks of the file.
733
+ Maps the block names to the corresponding :class:`CIFBlock` objects.
734
+ By default no initial blocks are added.
735
+
736
+ Attributes
737
+ ----------
738
+ block : CIFBlock
739
+ The sole block of the file.
740
+ If the file contains multiple blocks, an exception is raised.
741
+
742
+ Notes
743
+ -----
744
+ The content of CIF files are lazily deserialized:
745
+ When reading the file only the line positions of all blocks are
746
+ indexed.
747
+ The time consuming deserialization of a block/category is only
748
+ performed when accessed.
749
+ The deserialized :class:`CIFBlock`/:class:`CIFCategory` objects
750
+ are cached for subsequent accesses.
751
+
752
+ Examples
753
+ --------
754
+ Read a CIF file and access its content:
755
+
756
+ >>> import os.path
757
+ >>> file = CIFFile.read(os.path.join(path_to_structures, "1l2y.cif"))
758
+ >>> print(file["1L2Y"]["citation_author"]["name"].as_array())
759
+ ['Neidigh, J.W.' 'Fesinmeyer, R.M.' 'Andersen, N.H.']
760
+ >>> # Access the only block in the file
761
+ >>> print(file.block["entity"]["pdbx_description"].as_item())
762
+ TC5b
763
+
764
+ Create a CIF file and write it to disk:
765
+
766
+ >>> category = CIFCategory(
767
+ ... {"some_column": "some_value", "another_column": "another_value"}
768
+ ... )
769
+ >>> block = CIFBlock({"some_category": category, "another_category": category})
770
+ >>> file = CIFFile({"some_block": block, "another_block": block})
771
+ >>> print(file.serialize())
772
+ data_some_block
773
+ #
774
+ _some_category.some_column some_value
775
+ _some_category.another_column another_value
776
+ #
777
+ _another_category.some_column some_value
778
+ _another_category.another_column another_value
779
+ #
780
+ data_another_block
781
+ #
782
+ _some_category.some_column some_value
783
+ _some_category.another_column another_value
784
+ #
785
+ _another_category.some_column some_value
786
+ _another_category.another_column another_value
787
+ #
788
+ >>> file.write(os.path.join(path_to_directory, "some_file.cif"))
789
+ """
790
+
791
+ def __init__(self, blocks=None):
792
+ if blocks is None:
793
+ blocks = {}
794
+ self._blocks = blocks
795
+
796
+ @property
797
+ def lines(self):
798
+ return self.serialize().splitlines()
799
+
800
+ @property
801
+ def block(self):
802
+ if len(self) == 0:
803
+ raise ValueError("There are no blocks in the file")
804
+ elif len(self) > 1:
805
+ raise ValueError("There are multiple blocks in the file")
806
+ else:
807
+ return self[next(iter(self))]
808
+
809
+ @staticmethod
810
+ def subcomponent_class():
811
+ return CIFBlock
812
+
813
+ @staticmethod
814
+ def supercomponent_class():
815
+ return None
816
+
817
+ @staticmethod
818
+ def deserialize(text):
819
+ lines = text.splitlines()
820
+ block_starts = []
821
+ block_names = []
822
+ for i, line in enumerate(lines):
823
+ if not _is_empty(line):
824
+ data_block_name = _parse_data_block_name(line)
825
+ if data_block_name is not None:
826
+ block_starts.append(i)
827
+ block_names.append(data_block_name)
828
+ return CIFFile(_create_element_dict(lines, block_names, block_starts))
829
+
830
+ def serialize(self):
831
+ text_blocks = []
832
+ for block_name, block in self._blocks.items():
833
+ if isinstance(block, str):
834
+ # Block is already stored as text
835
+ text_blocks.append(block)
836
+ else:
837
+ try:
838
+ block.name = block_name
839
+ text_blocks.append(block.serialize())
840
+ except Exception:
841
+ raise SerializationError(
842
+ f"Failed to serialize block '{block_name}'"
843
+ )
844
+ # Enforce terminal line break
845
+ text_blocks.append("")
846
+ return "".join(text_blocks)
847
+
848
+ @classmethod
849
+ def read(cls, file):
850
+ """
851
+ Read a CIF file.
852
+
853
+ Parameters
854
+ ----------
855
+ file : file-like object or str
856
+ The file to be read.
857
+ Alternatively a file path can be supplied.
858
+
859
+ Returns
860
+ -------
861
+ file_object : CIFFile
862
+ The parsed file.
863
+ """
864
+ # File name
865
+ if is_open_compatible(file):
866
+ with open(file, "r") as f:
867
+ text = f.read()
868
+ # File object
869
+ else:
870
+ if not is_text(file):
871
+ raise TypeError("A file opened in 'text' mode is required")
872
+ text = file.read()
873
+ return CIFFile.deserialize(text)
874
+
875
+ def write(self, file):
876
+ """
877
+ Write the contents of this object into a CIF file.
878
+
879
+ Parameters
880
+ ----------
881
+ file : file-like object or str
882
+ The file to be written to.
883
+ Alternatively a file path can be supplied.
884
+ """
885
+ if is_open_compatible(file):
886
+ with open(file, "w") as f:
887
+ f.write(self.serialize())
888
+ else:
889
+ if not is_text(file):
890
+ raise TypeError("A file opened in 'text' mode is required")
891
+ file.write(self.serialize())
892
+
893
+ def __getitem__(self, key):
894
+ block = self._blocks[key]
895
+ if isinstance(block, str):
896
+ # Element is stored in serialized form
897
+ # -> must be deserialized first
898
+ try:
899
+ block = CIFBlock.deserialize(block)
900
+ except Exception:
901
+ raise DeserializationError(f"Failed to deserialize block '{key}'")
902
+ block.name = key
903
+ # Update with deserialized object
904
+ self._blocks[key] = block
905
+ return block
906
+
907
+ def __setitem__(self, key, block):
908
+ if not isinstance(block, CIFBlock):
909
+ raise TypeError(f"Expected 'CIFBlock', but got '{type(block).__name__}'")
910
+ block.name = key
911
+ self._blocks[key] = block
912
+
913
+ def __delitem__(self, key):
914
+ del self._blocks[key]
915
+
916
+ def __contains__(self, key):
917
+ return key in self._blocks
918
+
919
+ def __iter__(self):
920
+ return iter(self._blocks)
921
+
922
+ def __len__(self):
923
+ return len(self._blocks)
924
+
925
+ def __eq__(self, other):
926
+ if not isinstance(other, type(self)):
927
+ return False
928
+ if set(self.keys()) != set(other.keys()):
929
+ return False
930
+ for block_name in self.keys():
931
+ if self[block_name] != other[block_name]:
932
+ return False
933
+ return True
934
+
935
+
936
+ def _is_empty(line):
937
+ return len(line.strip()) == 0 or line[0] == "#"
938
+
939
+
940
+ def _create_element_dict(lines, element_names, element_starts):
941
+ """
942
+ Create a dict mapping the `element_names` to the corresponding
943
+ `lines`, which are located between ``element_starts[i]`` and
944
+ ``element_starts[i+1]``.
945
+ """
946
+ # Add exclusive stop to indices for easier slicing
947
+ element_starts.append(len(lines))
948
+ # Lazy deserialization
949
+ # -> keep as text for now and deserialize later if needed
950
+ return {
951
+ element_name: "\n".join(lines[element_starts[i] : element_starts[i + 1]]) + "\n"
952
+ for i, element_name in enumerate(element_names)
953
+ }
954
+
955
+
956
+ def _parse_data_block_name(line):
957
+ """
958
+ If the line defines a data block, return this name.
959
+ Return ``None`` otherwise.
960
+ """
961
+ if line.startswith("data_"):
962
+ return line[5:]
963
+ else:
964
+ return None
965
+
966
+
967
+ def _parse_category_name(line):
968
+ """
969
+ If the line defines a category, return this name.
970
+ Return ``None`` otherwise.
971
+ """
972
+ if line[0] != "_":
973
+ return None
974
+ else:
975
+ return line[1 : line.find(".")]
976
+
977
+
978
+ def _is_loop_start(line):
979
+ """
980
+ Return whether the line starts a looped category.
981
+ """
982
+ return line.startswith("loop_")
983
+
984
+
985
+ def _to_single(lines):
986
+ r"""
987
+ Convert multiline values into singleline values
988
+ (in terms of 'lines' list elements).
989
+ Linebreaks are preserved as ``'\n'`` characters within a list element.
990
+ The initial ``';'`` character is also preserved, while the final ``';'`` character
991
+ is removed.
992
+ """
993
+ processed_lines = []
994
+ in_multi_line = False
995
+ mutli_line_value = []
996
+ for line in lines:
997
+ # Multiline value are enclosed by ';' at the start of the beginning and end line
998
+ if line[0] == ";":
999
+ if not in_multi_line:
1000
+ # Start of multiline value
1001
+ in_multi_line = True
1002
+ mutli_line_value.append(line)
1003
+ else:
1004
+ # End of multiline value
1005
+ in_multi_line = False
1006
+ # The current line contains only the end character ';'
1007
+ # Hence this line is not added to the processed lines
1008
+ processed_lines.append("\n".join(mutli_line_value))
1009
+ mutli_line_value = []
1010
+ else:
1011
+ if in_multi_line:
1012
+ mutli_line_value.append(line)
1013
+ else:
1014
+ processed_lines.append(line)
1015
+ return processed_lines
1016
+
1017
+
1018
+ def _escape(value):
1019
+ """
1020
+ Escape special characters in a value to make it compatible with CIF.
1021
+ """
1022
+ if "\n" in value:
1023
+ # A value with linebreaks must be represented as multiline value
1024
+ return _multiline(value)
1025
+ elif "'" in value and '"' in value:
1026
+ # If both quote types are present, you cannot use them for escaping
1027
+ return _multiline(value)
1028
+ elif len(value) == 0:
1029
+ return "''"
1030
+ elif value[0] == "_":
1031
+ return "'" + value + "'"
1032
+ elif "'" in value:
1033
+ return '"' + value + '"'
1034
+ elif '"' in value:
1035
+ return "'" + value + "'"
1036
+ elif " " in value:
1037
+ return "'" + value + "'"
1038
+ elif "\t" in value:
1039
+ return "'" + value + "'"
1040
+ else:
1041
+ return value
1042
+
1043
+
1044
+ def _multiline(value):
1045
+ """
1046
+ Convert a string that may contain linebreaks into CIF-compatible
1047
+ multiline string.
1048
+ """
1049
+ return "\n;" + value + "\n;\n"
1050
+
1051
+
1052
+ def _split_one_line(line):
1053
+ """
1054
+ Split a line into its fields.
1055
+ Supporting embedded quotes (' or "), like `'a dog's life'` to `a dog's life`
1056
+ """
1057
+ # Special case of multiline value, where the line starts with ';'
1058
+ if line[0] == ";":
1059
+ yield line[1:]
1060
+ elif "'" in line or '"' in line:
1061
+ # Quoted values in the line
1062
+ while line:
1063
+ # Strip leading whitespace(s)
1064
+ stripped_line = line.lstrip()
1065
+ # Split the line on whitespace
1066
+ word, _, line = stripped_line.partition(" ")
1067
+ # Handle the case where the word start with a quote
1068
+ if word.startswith(("'", '"')):
1069
+ # Set the separator to the quote found
1070
+ separator = word[0]
1071
+ # Handle the case of a quoted word without space
1072
+ if word.endswith(separator) and len(word) > 1:
1073
+ # Yield the word without the opening and closing quotes
1074
+ yield word[1:-1]
1075
+ continue
1076
+ # split the word on the separator
1077
+ word, _, line = stripped_line[1:].partition(separator)
1078
+
1079
+ yield word
1080
+ else:
1081
+ # No quoted values in the line -> simple whitespace split
1082
+ for line in line.split():
1083
+ yield line
1084
+
1085
+
1086
+ def _arrayfy(data):
1087
+ if not isinstance(data, (Sequence, np.ndarray)) or isinstance(data, str):
1088
+ data = [data]
1089
+ elif len(data) == 0:
1090
+ raise ValueError("Array must contain at least one element")
1091
+ return np.asarray(data)