biotite 0.41.1__cp312-cp312-macosx_10_16_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (340) hide show
  1. biotite/__init__.py +19 -0
  2. biotite/application/__init__.py +43 -0
  3. biotite/application/application.py +265 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +505 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +83 -0
  8. biotite/application/blast/webapp.py +421 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +238 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +152 -0
  13. biotite/application/localapp.py +306 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +122 -0
  16. biotite/application/msaapp.py +374 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +254 -0
  19. biotite/application/muscle/app5.py +171 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +456 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +222 -0
  24. biotite/application/util.py +59 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +304 -0
  27. biotite/application/viennarna/rnafold.py +269 -0
  28. biotite/application/viennarna/rnaplot.py +187 -0
  29. biotite/application/viennarna/util.py +72 -0
  30. biotite/application/webapp.py +77 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/entrez/__init__.py +15 -0
  34. biotite/database/entrez/check.py +61 -0
  35. biotite/database/entrez/dbnames.py +89 -0
  36. biotite/database/entrez/download.py +223 -0
  37. biotite/database/entrez/key.py +44 -0
  38. biotite/database/entrez/query.py +223 -0
  39. biotite/database/error.py +15 -0
  40. biotite/database/pubchem/__init__.py +21 -0
  41. biotite/database/pubchem/download.py +260 -0
  42. biotite/database/pubchem/error.py +20 -0
  43. biotite/database/pubchem/query.py +827 -0
  44. biotite/database/pubchem/throttle.py +99 -0
  45. biotite/database/rcsb/__init__.py +13 -0
  46. biotite/database/rcsb/download.py +167 -0
  47. biotite/database/rcsb/query.py +959 -0
  48. biotite/database/uniprot/__init__.py +13 -0
  49. biotite/database/uniprot/check.py +32 -0
  50. biotite/database/uniprot/download.py +134 -0
  51. biotite/database/uniprot/query.py +209 -0
  52. biotite/file.py +251 -0
  53. biotite/sequence/__init__.py +73 -0
  54. biotite/sequence/align/__init__.py +49 -0
  55. biotite/sequence/align/alignment.py +658 -0
  56. biotite/sequence/align/banded.cpython-312-darwin.so +0 -0
  57. biotite/sequence/align/banded.pyx +652 -0
  58. biotite/sequence/align/buckets.py +69 -0
  59. biotite/sequence/align/cigar.py +434 -0
  60. biotite/sequence/align/kmeralphabet.cpython-312-darwin.so +0 -0
  61. biotite/sequence/align/kmeralphabet.pyx +574 -0
  62. biotite/sequence/align/kmersimilarity.cpython-312-darwin.so +0 -0
  63. biotite/sequence/align/kmersimilarity.pyx +233 -0
  64. biotite/sequence/align/kmertable.cpython-312-darwin.so +0 -0
  65. biotite/sequence/align/kmertable.pyx +3400 -0
  66. biotite/sequence/align/localgapped.cpython-312-darwin.so +0 -0
  67. biotite/sequence/align/localgapped.pyx +892 -0
  68. biotite/sequence/align/localungapped.cpython-312-darwin.so +0 -0
  69. biotite/sequence/align/localungapped.pyx +279 -0
  70. biotite/sequence/align/matrix.py +405 -0
  71. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  72. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  73. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  74. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  75. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  76. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  77. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  78. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  79. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  80. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  81. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  82. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  83. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  84. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  85. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  86. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  87. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  88. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  89. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  93. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  94. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  95. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  96. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  97. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  98. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  99. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  100. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  101. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  102. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  103. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  104. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  105. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  106. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  107. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  108. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  109. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  110. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  111. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  112. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  113. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  114. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  115. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  116. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  117. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  118. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  119. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  120. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  121. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  122. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  154. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  155. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  156. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  157. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  158. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  159. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  160. biotite/sequence/align/multiple.cpython-312-darwin.so +0 -0
  161. biotite/sequence/align/multiple.pyx +620 -0
  162. biotite/sequence/align/pairwise.cpython-312-darwin.so +0 -0
  163. biotite/sequence/align/pairwise.pyx +587 -0
  164. biotite/sequence/align/permutation.cpython-312-darwin.so +0 -0
  165. biotite/sequence/align/permutation.pyx +305 -0
  166. biotite/sequence/align/primes.txt +821 -0
  167. biotite/sequence/align/selector.cpython-312-darwin.so +0 -0
  168. biotite/sequence/align/selector.pyx +956 -0
  169. biotite/sequence/align/statistics.py +265 -0
  170. biotite/sequence/align/tracetable.cpython-312-darwin.so +0 -0
  171. biotite/sequence/align/tracetable.pxd +64 -0
  172. biotite/sequence/align/tracetable.pyx +370 -0
  173. biotite/sequence/alphabet.py +566 -0
  174. biotite/sequence/annotation.py +829 -0
  175. biotite/sequence/codec.cpython-312-darwin.so +0 -0
  176. biotite/sequence/codec.pyx +155 -0
  177. biotite/sequence/codon.py +466 -0
  178. biotite/sequence/codon_tables.txt +202 -0
  179. biotite/sequence/graphics/__init__.py +33 -0
  180. biotite/sequence/graphics/alignment.py +1034 -0
  181. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  182. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  183. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  184. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  185. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  186. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  187. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  188. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  189. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  190. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  191. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  192. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  193. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  194. biotite/sequence/graphics/color_schemes/pb_flower.json +39 -0
  195. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  196. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  197. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  198. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  199. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  200. biotite/sequence/graphics/colorschemes.py +139 -0
  201. biotite/sequence/graphics/dendrogram.py +184 -0
  202. biotite/sequence/graphics/features.py +510 -0
  203. biotite/sequence/graphics/logo.py +110 -0
  204. biotite/sequence/graphics/plasmid.py +661 -0
  205. biotite/sequence/io/__init__.py +12 -0
  206. biotite/sequence/io/fasta/__init__.py +22 -0
  207. biotite/sequence/io/fasta/convert.py +273 -0
  208. biotite/sequence/io/fasta/file.py +278 -0
  209. biotite/sequence/io/fastq/__init__.py +19 -0
  210. biotite/sequence/io/fastq/convert.py +120 -0
  211. biotite/sequence/io/fastq/file.py +551 -0
  212. biotite/sequence/io/genbank/__init__.py +17 -0
  213. biotite/sequence/io/genbank/annotation.py +277 -0
  214. biotite/sequence/io/genbank/file.py +575 -0
  215. biotite/sequence/io/genbank/metadata.py +324 -0
  216. biotite/sequence/io/genbank/sequence.py +172 -0
  217. biotite/sequence/io/general.py +192 -0
  218. biotite/sequence/io/gff/__init__.py +26 -0
  219. biotite/sequence/io/gff/convert.py +133 -0
  220. biotite/sequence/io/gff/file.py +434 -0
  221. biotite/sequence/phylo/__init__.py +36 -0
  222. biotite/sequence/phylo/nj.cpython-312-darwin.so +0 -0
  223. biotite/sequence/phylo/nj.pyx +221 -0
  224. biotite/sequence/phylo/tree.cpython-312-darwin.so +0 -0
  225. biotite/sequence/phylo/tree.pyx +1169 -0
  226. biotite/sequence/phylo/upgma.cpython-312-darwin.so +0 -0
  227. biotite/sequence/phylo/upgma.pyx +164 -0
  228. biotite/sequence/profile.py +456 -0
  229. biotite/sequence/search.py +116 -0
  230. biotite/sequence/seqtypes.py +556 -0
  231. biotite/sequence/sequence.py +374 -0
  232. biotite/structure/__init__.py +132 -0
  233. biotite/structure/atoms.py +1455 -0
  234. biotite/structure/basepairs.py +1415 -0
  235. biotite/structure/bonds.cpython-312-darwin.so +0 -0
  236. biotite/structure/bonds.pyx +1933 -0
  237. biotite/structure/box.py +592 -0
  238. biotite/structure/celllist.cpython-312-darwin.so +0 -0
  239. biotite/structure/celllist.pyx +849 -0
  240. biotite/structure/chains.py +298 -0
  241. biotite/structure/charges.cpython-312-darwin.so +0 -0
  242. biotite/structure/charges.pyx +520 -0
  243. biotite/structure/compare.py +274 -0
  244. biotite/structure/density.py +114 -0
  245. biotite/structure/dotbracket.py +216 -0
  246. biotite/structure/error.py +31 -0
  247. biotite/structure/filter.py +585 -0
  248. biotite/structure/geometry.py +697 -0
  249. biotite/structure/graphics/__init__.py +13 -0
  250. biotite/structure/graphics/atoms.py +226 -0
  251. biotite/structure/graphics/rna.py +282 -0
  252. biotite/structure/hbond.py +409 -0
  253. biotite/structure/info/__init__.py +25 -0
  254. biotite/structure/info/atom_masses.json +121 -0
  255. biotite/structure/info/atoms.py +82 -0
  256. biotite/structure/info/bonds.py +145 -0
  257. biotite/structure/info/ccd/README.rst +8 -0
  258. biotite/structure/info/ccd/amino_acids.txt +1663 -0
  259. biotite/structure/info/ccd/carbohydrates.txt +1135 -0
  260. biotite/structure/info/ccd/components.bcif +0 -0
  261. biotite/structure/info/ccd/nucleotides.txt +798 -0
  262. biotite/structure/info/ccd.py +95 -0
  263. biotite/structure/info/groups.py +90 -0
  264. biotite/structure/info/masses.py +123 -0
  265. biotite/structure/info/misc.py +144 -0
  266. biotite/structure/info/radii.py +197 -0
  267. biotite/structure/info/standardize.py +196 -0
  268. biotite/structure/integrity.py +268 -0
  269. biotite/structure/io/__init__.py +30 -0
  270. biotite/structure/io/ctab.py +72 -0
  271. biotite/structure/io/dcd/__init__.py +13 -0
  272. biotite/structure/io/dcd/file.py +65 -0
  273. biotite/structure/io/general.py +257 -0
  274. biotite/structure/io/gro/__init__.py +14 -0
  275. biotite/structure/io/gro/file.py +343 -0
  276. biotite/structure/io/mmtf/__init__.py +21 -0
  277. biotite/structure/io/mmtf/assembly.py +214 -0
  278. biotite/structure/io/mmtf/convertarray.cpython-312-darwin.so +0 -0
  279. biotite/structure/io/mmtf/convertarray.pyx +341 -0
  280. biotite/structure/io/mmtf/convertfile.cpython-312-darwin.so +0 -0
  281. biotite/structure/io/mmtf/convertfile.pyx +501 -0
  282. biotite/structure/io/mmtf/decode.cpython-312-darwin.so +0 -0
  283. biotite/structure/io/mmtf/decode.pyx +152 -0
  284. biotite/structure/io/mmtf/encode.cpython-312-darwin.so +0 -0
  285. biotite/structure/io/mmtf/encode.pyx +183 -0
  286. biotite/structure/io/mmtf/file.py +233 -0
  287. biotite/structure/io/mol/__init__.py +20 -0
  288. biotite/structure/io/mol/convert.py +115 -0
  289. biotite/structure/io/mol/ctab.py +414 -0
  290. biotite/structure/io/mol/header.py +116 -0
  291. biotite/structure/io/mol/mol.py +193 -0
  292. biotite/structure/io/mol/sdf.py +916 -0
  293. biotite/structure/io/netcdf/__init__.py +13 -0
  294. biotite/structure/io/netcdf/file.py +63 -0
  295. biotite/structure/io/npz/__init__.py +20 -0
  296. biotite/structure/io/npz/file.py +152 -0
  297. biotite/structure/io/pdb/__init__.py +20 -0
  298. biotite/structure/io/pdb/convert.py +293 -0
  299. biotite/structure/io/pdb/file.py +1240 -0
  300. biotite/structure/io/pdb/hybrid36.cpython-312-darwin.so +0 -0
  301. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  302. biotite/structure/io/pdbqt/__init__.py +15 -0
  303. biotite/structure/io/pdbqt/convert.py +107 -0
  304. biotite/structure/io/pdbqt/file.py +640 -0
  305. biotite/structure/io/pdbx/__init__.py +23 -0
  306. biotite/structure/io/pdbx/bcif.py +648 -0
  307. biotite/structure/io/pdbx/cif.py +1032 -0
  308. biotite/structure/io/pdbx/component.py +246 -0
  309. biotite/structure/io/pdbx/convert.py +1597 -0
  310. biotite/structure/io/pdbx/encoding.cpython-312-darwin.so +0 -0
  311. biotite/structure/io/pdbx/encoding.pyx +950 -0
  312. biotite/structure/io/pdbx/legacy.py +267 -0
  313. biotite/structure/io/tng/__init__.py +13 -0
  314. biotite/structure/io/tng/file.py +46 -0
  315. biotite/structure/io/trajfile.py +710 -0
  316. biotite/structure/io/trr/__init__.py +13 -0
  317. biotite/structure/io/trr/file.py +46 -0
  318. biotite/structure/io/xtc/__init__.py +13 -0
  319. biotite/structure/io/xtc/file.py +46 -0
  320. biotite/structure/mechanics.py +75 -0
  321. biotite/structure/molecules.py +353 -0
  322. biotite/structure/pseudoknots.py +642 -0
  323. biotite/structure/rdf.py +243 -0
  324. biotite/structure/repair.py +253 -0
  325. biotite/structure/residues.py +562 -0
  326. biotite/structure/resutil.py +178 -0
  327. biotite/structure/sasa.cpython-312-darwin.so +0 -0
  328. biotite/structure/sasa.pyx +322 -0
  329. biotite/structure/sequence.py +112 -0
  330. biotite/structure/sse.py +327 -0
  331. biotite/structure/superimpose.py +727 -0
  332. biotite/structure/transform.py +504 -0
  333. biotite/structure/util.py +98 -0
  334. biotite/temp.py +86 -0
  335. biotite/version.py +16 -0
  336. biotite/visualize.py +251 -0
  337. biotite-0.41.1.dist-info/METADATA +187 -0
  338. biotite-0.41.1.dist-info/RECORD +340 -0
  339. biotite-0.41.1.dist-info/WHEEL +4 -0
  340. biotite-0.41.1.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,1032 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.structure.io.pdbx"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["CIFFile", "CIFBlock", "CIFCategory", "CIFColumn", "CIFData"]
8
+
9
+ import itertools
10
+ import shlex
11
+ from collections.abc import MutableMapping, Sequence
12
+ import numpy as np
13
+ from .component import _Component, MaskValue
14
+ from ....file import File, is_open_compatible, is_text, DeserializationError, \
15
+ SerializationError
16
+
17
+
18
+ UNICODE_CHAR_SIZE = 4
19
+
20
+
21
+ # Small class without much functionality
22
+ # It exists merely for consistency with BinaryCIFFile
23
+ class CIFData:
24
+ """
25
+ This class represents the data in a :class:`CIFColumn`.
26
+
27
+ Parameters
28
+ ----------
29
+ array : array_like or int or float or str
30
+ The data array to be stored.
31
+ If a single item is given, it is converted into an array.
32
+ dtype : dtype-like, optional
33
+ If given, the *dtype* the stored array should be converted to.
34
+
35
+ Attributes
36
+ ----------
37
+ array : ndarray
38
+ The stored data array.
39
+
40
+ Notes
41
+ -----
42
+ When a :class:`CIFFile` is written, the data type is automatically
43
+ converted to string.
44
+ The other way around, when a :class:`CIFFile` is read, the data type
45
+ is always a string type.
46
+
47
+ Examples
48
+ --------
49
+
50
+ >>> data = CIFData([1, 2, 3])
51
+ >>> print(data.array)
52
+ [1 2 3]
53
+ >>> print(len(data))
54
+ 3
55
+ >>> # A single item is converted into an array
56
+ >>> data = CIFData("apple")
57
+ >>> print(data.array)
58
+ ['apple']
59
+ """
60
+
61
+ def __init__(self, array, dtype=None):
62
+ self._array = _arrayfy(array)
63
+ if np.issubdtype(self._array.dtype, np.object_):
64
+ raise ValueError("Object arrays are not supported")
65
+ if dtype is not None:
66
+ self._array = self._array.astype(dtype)
67
+
68
+ @property
69
+ def array(self):
70
+ return self._array
71
+
72
+ @staticmethod
73
+ def subcomponent_class():
74
+ return None
75
+
76
+ @staticmethod
77
+ def supercomponent_class():
78
+ return CIFColumn
79
+
80
+ def __len__(self):
81
+ return len(self._array)
82
+
83
+ def __eq__(self, other):
84
+ if not isinstance(other, type(self)):
85
+ return False
86
+ return np.array_equal(self._array, other._array)
87
+
88
+
89
+ class CIFColumn:
90
+ """
91
+ This class represents a single column in a :class:`CIFCategory`.
92
+
93
+ Parameters
94
+ ----------
95
+ data : CIFData or array_like or int or float or str
96
+ The data to be stored.
97
+ If no :class:`CIFData` is given, the passed argument is
98
+ coerced into such an object.
99
+ mask : CIFData or array_like, dtype=int or int
100
+ The mask to be stored.
101
+ If given, the mask indicates whether the `data` is
102
+ inapplicable (``.``) or missing (``?``) in some rows.
103
+ The data presence is indicated by values from the
104
+ :class:`MaskValue` enum.
105
+ If no :class:`CIFData` is given, the passed argument is
106
+ coerced into such an object.
107
+ By default, no mask is created.
108
+
109
+ Attributes
110
+ ----------
111
+ data : CIFData
112
+ The stored data.
113
+ mask : CIFData
114
+ The mask that indicates whether certain data elements are
115
+ inapplicable or missing.
116
+ If no mask is present, this attribute is ``None``.
117
+
118
+ Examples
119
+ --------
120
+
121
+ >>> print(CIFColumn([1, 2, 3]).as_array())
122
+ ['1' '2' '3']
123
+ >>> mask = [MaskValue.PRESENT, MaskValue.INAPPLICABLE, MaskValue.MISSING]
124
+ >>> print(CIFColumn([1, 2, 3], mask).as_array())
125
+ ['1' '.' '?']
126
+ >>> print(CIFColumn([1]).as_item())
127
+ 1
128
+ >>> print(CIFColumn([1], mask=[MaskValue.MISSING]).as_item())
129
+ ?
130
+ """
131
+
132
+ def __init__(self, data, mask=None):
133
+ if not isinstance(data, CIFData):
134
+ data = CIFData(data, str)
135
+ if mask is None:
136
+ mask = np.full(
137
+ len(data), MaskValue.PRESENT, dtype=np.uint8
138
+ )
139
+ mask[data.array == "."] = MaskValue.INAPPLICABLE
140
+ mask[data.array == "?"] = MaskValue.MISSING
141
+ if np.all(mask == MaskValue.PRESENT):
142
+ # No mask required
143
+ mask = None
144
+ else:
145
+ mask = CIFData(mask)
146
+ else:
147
+ if not isinstance(mask, CIFData):
148
+ mask = CIFData(mask, np.uint8)
149
+ if len(mask) != len(data):
150
+ raise IndexError(
151
+ f"Data has length {len(data)}, "
152
+ f"but mask has length {len(mask)}"
153
+ )
154
+ self._data = data
155
+ self._mask = mask
156
+
157
+ @property
158
+ def data(self):
159
+ return self._data
160
+
161
+ @property
162
+ def mask(self):
163
+ return self._mask
164
+
165
+ @staticmethod
166
+ def subcomponent_class():
167
+ return CIFData
168
+
169
+ @staticmethod
170
+ def supercomponent_class():
171
+ return CIFCategory
172
+
173
+ def as_item(self):
174
+ """
175
+ Get the only item in the data of this column.
176
+
177
+ If the data is masked as inapplicable or missing, ``'.'`` or
178
+ ``'?'`` is returned, respectively.
179
+ If the data contains more than one item, an exception is raised.
180
+
181
+ Returns
182
+ -------
183
+ item : str
184
+ The item in the data.
185
+ """
186
+ if self._mask is None:
187
+ return self._data.array.item()
188
+ mask = self._mask.array.item()
189
+ if self._mask is None or mask == MaskValue.PRESENT:
190
+ item = self._data.array.item()
191
+ # Limit float precision to 3 decimals
192
+ if isinstance(item, float):
193
+ return f"{item:.3f}"
194
+ else:
195
+ return str(item)
196
+ elif mask == MaskValue.INAPPLICABLE:
197
+ return "."
198
+ elif mask == MaskValue.MISSING:
199
+ return "?"
200
+
201
+ def as_array(self, dtype=str, masked_value=None):
202
+ """
203
+ Get the data of this column as an :class:`ndarray`.
204
+
205
+ This is a shortcut to get ``CIFColumn.data.array``.
206
+ Furthermore, the mask is applied to the data.
207
+
208
+ Parameters
209
+ ----------
210
+ dtype : dtype-like, optional
211
+ The data type the array should be converted to.
212
+ By default, a string type is used.
213
+ masked_value : str, optional
214
+ The value that should be used for masked elements, i.e.
215
+ ``MaskValue.INAPPLICABLE`` or ``MaskValue.MISSING``.
216
+ By default, masked elements are converted to ``'.'`` or
217
+ ``'?'`` depending on the :class:`MaskValue`.
218
+ """
219
+ if self._mask is None:
220
+ return self._data.array.astype(dtype, copy=False)
221
+
222
+ elif np.issubdtype(dtype, np.str_):
223
+ # Limit float precision to 3 decimals
224
+ if np.issubdtype(self._data.array.dtype, np.floating):
225
+ array = np.array(
226
+ [f"{e:.3f}" for e in self._data.array], type=dtype
227
+ )
228
+ else:
229
+ # Copy, as otherwise original data would be overwritten
230
+ # with mask values
231
+ array = self._data.array.astype(dtype, copy=True)
232
+ if masked_value is None:
233
+ array[self._mask.array == MaskValue.INAPPLICABLE] = "."
234
+ array[self._mask.array == MaskValue.MISSING] = "?"
235
+ else:
236
+ array[self._mask.array == MaskValue.INAPPLICABLE] = masked_value
237
+ array[self._mask.array == MaskValue.MISSING] = masked_value
238
+ return array
239
+
240
+ else:
241
+ # Array needs to be converted, but masked values are
242
+ # not necessarily convertible
243
+ # (e.g. '' cannot be converted to int)
244
+ if masked_value is None:
245
+ array = np.zeros(len(self._data), dtype=dtype)
246
+ else:
247
+ array = np.full(len(self._data), masked_value, dtype=dtype)
248
+
249
+ present_mask = self._mask.array == MaskValue.PRESENT
250
+ array[present_mask] = (
251
+ self._data.array[present_mask].astype(dtype)
252
+ )
253
+ return array
254
+
255
+ def __len__(self):
256
+ return len(self._data)
257
+
258
+ def __eq__(self, other):
259
+ if not isinstance(other, type(self)):
260
+ return False
261
+ if self._data != other._data:
262
+ return False
263
+ if self._mask != other._mask:
264
+ return False
265
+ return True
266
+
267
+
268
+ class CIFCategory(_Component, MutableMapping):
269
+ """
270
+ This class represents a category in a :class:`CIFBlock`.
271
+
272
+ Columns can be accessed and modified like a dictionary.
273
+ The values are :class:`CIFColumn` objects.
274
+
275
+ Parameters
276
+ ----------
277
+ columns : dict, optional
278
+ The columns of the category.
279
+ The keys are the column names and the values are the
280
+ :class:`CIFColumn` objects (or objects that can be coerced into
281
+ a :class:`CIFColumn`).
282
+ By default, an empty category is created.
283
+ Each column must have the same length.
284
+ name : str, optional
285
+ The name of the category.
286
+ This is only used for serialization and is automatically set,
287
+ when the :class:`CIFCategory` is added to a :class:`CIFBlock`.
288
+ It only needs to be set manually, when the category is directly
289
+ serialized.
290
+
291
+ Attributes
292
+ ----------
293
+ name : str
294
+ The name of the category.
295
+ row_count : int
296
+ The number of rows in the category, i.e. the length of each
297
+ column.
298
+
299
+ Notes
300
+ -----
301
+ When a column containing strings with line breaks are added, these
302
+ strings are written as multiline strings to the CIF file.
303
+
304
+ Examples
305
+ --------
306
+
307
+ >>> # Add column on creation
308
+ >>> category = CIFCategory({"fruit": ["apple", "banana"]}, name="fruits")
309
+ >>> # Add column later on
310
+ >>> category["taste"] = ["delicious", "tasty"]
311
+ >>> # Add column the formal way
312
+ >>> category["color"] = CIFColumn(CIFData(["red", "yellow"]))
313
+ >>> # Access a column
314
+ >>> print(category["fruit"].as_array())
315
+ ['apple' 'banana']
316
+ >>> print(category.serialize())
317
+ loop_
318
+ _fruits.fruit
319
+ _fruits.taste
320
+ _fruits.color
321
+ apple delicious red
322
+ banana tasty yellow
323
+ """
324
+
325
+ def __init__(self, columns=None, name=None):
326
+ self._name = name
327
+ if columns is None:
328
+ columns = {}
329
+ else:
330
+ columns = {
331
+ key: CIFColumn(col) if not isinstance(col, CIFColumn) else col
332
+ for key, col in columns.items()
333
+ }
334
+
335
+ self._row_count = None
336
+ self._columns = columns
337
+
338
+ @property
339
+ def name(self):
340
+ return self._name
341
+
342
+ @name.setter
343
+ def name(self, name):
344
+ self._name = name
345
+
346
+ @property
347
+ def row_count(self):
348
+ if self._row_count is None:
349
+ # Row count is not determined yet
350
+ # -> check the length of the first column
351
+ self._row_count = len(next(iter(self.values())))
352
+ return self._row_count
353
+
354
+ @staticmethod
355
+ def subcomponent_class():
356
+ return CIFColumn
357
+
358
+ @staticmethod
359
+ def supercomponent_class():
360
+ return CIFBlock
361
+
362
+ @staticmethod
363
+ def deserialize(text, expect_whitespace=True):
364
+ lines = [
365
+ line.strip() for line in text.splitlines() if not _is_empty(line)
366
+ ]
367
+
368
+ if _is_loop_start(lines[0]):
369
+ is_looped = True
370
+ lines.pop(0)
371
+ else:
372
+ is_looped = False
373
+
374
+ category_name = _parse_category_name(lines[0])
375
+ if category_name is None:
376
+ raise DeserializationError(
377
+ "Failed to parse category name"
378
+ )
379
+
380
+ lines = _to_single(lines, is_looped)
381
+ if is_looped:
382
+ category_dict = CIFCategory._deserialize_looped(
383
+ lines, expect_whitespace
384
+ )
385
+ else:
386
+ category_dict = CIFCategory._deserialize_single(lines)
387
+ return CIFCategory(category_dict, category_name)
388
+
389
+ def serialize(self):
390
+ if self._name is None:
391
+ raise SerializationError("Category name is required")
392
+ if not self._columns:
393
+ raise ValueError("At least one column is required")
394
+
395
+ for column_name, column in self.items():
396
+ if self._row_count is None:
397
+ self._row_count = len(column)
398
+ elif len(column) != self._row_count:
399
+ raise SerializationError(
400
+ f"All columns must have the same length, "
401
+ f"but '{column_name}' has length {len(column)}, "
402
+ f"while the first column has row_count {self._row_count}"
403
+ )
404
+
405
+ if self._row_count == 0:
406
+ raise ValueError("At least one row is required")
407
+ elif self._row_count == 1:
408
+ lines = self._serialize_single()
409
+ else:
410
+ lines = self._serialize_looped()
411
+ # Enforce terminal line break
412
+ lines.append("")
413
+ return "\n".join(lines)
414
+
415
+ def __getitem__(self, key):
416
+ return self._columns[key]
417
+
418
+ def __setitem__(self, key, column):
419
+ if not isinstance(column, CIFColumn):
420
+ column = CIFColumn(column)
421
+ self._columns[key] = column
422
+
423
+ def __delitem__(self, key):
424
+ if len(self._columns) == 1:
425
+ raise ValueError("At least one column must remain")
426
+ del self._columns[key]
427
+
428
+ def __iter__(self):
429
+ return iter(self._columns)
430
+
431
+ def __len__(self):
432
+ return len(self._columns)
433
+
434
+ def __eq__(self, other):
435
+ # Row count can be omitted here, as it is based on the columns
436
+ if not isinstance(other, type(self)):
437
+ return False
438
+ if set(self.keys()) != set(other.keys()):
439
+ return False
440
+ for col_name in self.keys():
441
+ if self[col_name] != other[col_name]:
442
+ return False
443
+ return True
444
+
445
+ @staticmethod
446
+ def _deserialize_single(lines):
447
+ """
448
+ Process a category where each field has a single value.
449
+ """
450
+ category_dict = {}
451
+ for line in lines:
452
+ parts = shlex.split(line)
453
+ column_name = parts[0].split(".")[1]
454
+ column = parts[1]
455
+ category_dict[column_name] = CIFColumn(column)
456
+ return category_dict
457
+
458
+ @staticmethod
459
+ def _deserialize_looped(lines, expect_whitespace):
460
+ """
461
+ Process a category where each field has multiple values
462
+ (category is a table).
463
+ """
464
+ category_dict = {}
465
+ column_names = []
466
+ i = 0
467
+ for key_line in lines:
468
+ if key_line[0] == "_":
469
+ # Key line
470
+ key = key_line.split(".")[1]
471
+ column_names.append(key)
472
+ category_dict[key] = []
473
+ i += 1
474
+ else:
475
+ break
476
+
477
+ data_lines = lines[i:]
478
+ # Rows may be split over multiple lines -> do not rely on
479
+ # row-line-alignment at all and simply cycle through columns
480
+ column_names = itertools.cycle(column_names)
481
+ for data_line in data_lines:
482
+ # If whitespace is expected in quote protected values,
483
+ # use standard shlex split
484
+ # Otherwise use much more faster whitespace split
485
+ # and quote removal if applicable,
486
+ # bypassing the slow shlex module
487
+ if expect_whitespace:
488
+ values = shlex.split(data_line)
489
+ else:
490
+ values = data_line.split()
491
+ for k in range(len(values)):
492
+ # Remove quotes
493
+ if (values[k][0] == '"' and values[k][-1] == '"') or (
494
+ values[k][0] == "'" and values[k][-1] == "'"
495
+ ):
496
+ values[k] = values[k][1:-1]
497
+ for val in values:
498
+ column_name = next(column_names)
499
+ category_dict[column_name].append(val)
500
+
501
+ return category_dict
502
+
503
+ def _serialize_single(self):
504
+ keys = ["_" + self._name + "." + name for name in self.keys()]
505
+ max_len = max(len(key) for key in keys)
506
+ # "+3" Because of three whitespace chars after longest key
507
+ req_len = max_len + 3
508
+ return [
509
+ key.ljust(req_len) + _multiline(_quote(column.as_item()))
510
+ for key, column in zip(keys, self.values())
511
+ ]
512
+
513
+ def _serialize_looped(self):
514
+ key_lines = [
515
+ "_" + self._name + "." + key + " "
516
+ for key in self.keys()
517
+ ]
518
+
519
+ column_arrays = []
520
+ for column in self.values():
521
+ array = column.as_array(str)
522
+ # Quote before measuring the number of chars,
523
+ # as the quote characters modify the length
524
+ array = np.array(
525
+ [_multiline(_quote(element)) for element in array]
526
+ )
527
+ column_arrays.append(array)
528
+
529
+ # Number of characters the longest string in the column needs
530
+ # This can be deduced from the dtype
531
+ # The "+1" is for the small whitespace column
532
+ column_n_chars = [
533
+ array.dtype.itemsize // UNICODE_CHAR_SIZE + 1
534
+ for array in column_arrays
535
+ ]
536
+ value_lines = [""] * self._row_count
537
+ for i in range(self._row_count):
538
+ for j, array in enumerate(column_arrays):
539
+ value_lines[i] += array[i].ljust(column_n_chars[j])
540
+ # Remove trailing justification of last column
541
+ value_lines[i].rstrip()
542
+
543
+ return ["loop_"] + key_lines + value_lines
544
+
545
+
546
+ class CIFBlock(_Component, MutableMapping):
547
+ """
548
+ This class represents a block in a :class:`CIFFile`.
549
+
550
+ Categories can be accessed and modified like a dictionary.
551
+ The values are :class:`CIFCategory` objects.
552
+
553
+ Parameters
554
+ ----------
555
+ categories : dict, optional
556
+ The categories of the block.
557
+ The keys are the category names and the values are the
558
+ :class:`CIFCategory` objects.
559
+ By default, an empty block is created.
560
+
561
+ Notes
562
+ -----
563
+ The category names do not include the leading underscore character.
564
+ This character is automatically added when the category is
565
+ serialized.
566
+
567
+ Examples
568
+ --------
569
+
570
+ >>> # Add category on creation
571
+ >>> block = CIFBlock({"foo": CIFCategory({"some_column": 1})})
572
+ >>> # Add category later on
573
+ >>> block["bar"] = CIFCategory({"another_column": [2, 3]})
574
+ >>> # Access a column
575
+ >>> print(block["bar"]["another_column"].as_array())
576
+ ['2' '3']
577
+ >>> print(block.serialize())
578
+ _foo.some_column 1
579
+ #
580
+ loop_
581
+ _bar.another_column
582
+ 2
583
+ 3
584
+ #
585
+ """
586
+
587
+ def __init__(self, categories=None):
588
+ if categories is None:
589
+ categories = {}
590
+ self._categories = categories
591
+
592
+ @staticmethod
593
+ def subcomponent_class():
594
+ return CIFCategory
595
+
596
+ @staticmethod
597
+ def supercomponent_class():
598
+ return CIFFile
599
+
600
+ @staticmethod
601
+ def deserialize(text):
602
+ lines = text.splitlines()
603
+ current_category_name = None
604
+ category_starts = []
605
+ category_names = []
606
+ for i, line in enumerate(lines):
607
+ if not _is_empty(line):
608
+ is_loop_in_line = _is_loop_start(line)
609
+ category_name_in_line = _parse_category_name(line)
610
+ if is_loop_in_line or (
611
+ category_name_in_line != current_category_name
612
+ and category_name_in_line is not None
613
+ ):
614
+ # Track the new category
615
+ if is_loop_in_line:
616
+ # In case of lines with "loop_" the category is
617
+ # in the next line
618
+ category_name_in_line = _parse_category_name(
619
+ lines[i + 1]
620
+ )
621
+ current_category_name = category_name_in_line
622
+ category_starts.append(i)
623
+ category_names.append(current_category_name)
624
+ return CIFBlock(_create_element_dict(
625
+ lines, category_names, category_starts
626
+ ))
627
+
628
+ def serialize(self):
629
+ text_blocks = []
630
+ for category_name, category in self._categories.items():
631
+ if isinstance(category, str):
632
+ # Category is already stored as lines
633
+ text_blocks.append(category)
634
+ else:
635
+ try:
636
+ category.name = category_name
637
+ text_blocks.append(category.serialize())
638
+ except:
639
+ raise SerializationError(
640
+ f"Failed to serialize category '{category_name}'"
641
+ )
642
+ # A comment line is set after each category
643
+ text_blocks.append("#\n")
644
+ return "".join(text_blocks)
645
+
646
+ def __getitem__(self, key):
647
+ category = self._categories[key]
648
+ if isinstance(category, str):
649
+ # Element is stored in serialized form
650
+ # -> must be deserialized first
651
+ try:
652
+ # Special optimization for "atom_site":
653
+ # Even if the values are quote protected,
654
+ # no whitespace is expected in escaped values
655
+ # Therefore slow shlex.split() call is not necessary
656
+ if key == "atom_site":
657
+ expect_whitespace = False
658
+ else:
659
+ expect_whitespace = True
660
+ category = CIFCategory.deserialize(category, expect_whitespace)
661
+ except:
662
+ raise DeserializationError(
663
+ f"Failed to deserialize category '{key}'"
664
+ )
665
+ # Update with deserialized object
666
+ self._categories[key] = category
667
+ return category
668
+
669
+ def __setitem__(self, key, category):
670
+ if not isinstance(category, CIFCategory):
671
+ raise TypeError(
672
+ f"Expected 'CIFCategory', but got '{type(category).__name__}'"
673
+ )
674
+ category.name = key
675
+ self._categories[key] = category
676
+
677
+ def __delitem__(self, key):
678
+ del self._categories[key]
679
+
680
+ def __iter__(self):
681
+ return iter(self._categories)
682
+
683
+ def __len__(self):
684
+ return len(self._categories)
685
+
686
+ def __eq__(self, other):
687
+ if not isinstance(other, type(self)):
688
+ return False
689
+ if set(self.keys()) != set(other.keys()):
690
+ return False
691
+ for cat_name in self.keys():
692
+ if self[cat_name] != other[cat_name]:
693
+ return False
694
+ return True
695
+
696
+
697
+ class CIFFile(_Component, File, MutableMapping):
698
+ """
699
+ This class represents a CIF file.
700
+
701
+ The categories of the file can be accessed and modified like a
702
+ dictionary.
703
+ The values are :class:`CIFBlock` objects.
704
+
705
+ To parse or write a structure from/to a :class:`CIFFile` object,
706
+ use the high-level :func:`get_structure()` or
707
+ :func:`set_structure()` function respectively.
708
+
709
+ Notes
710
+ -----
711
+ The content of CIF files are lazily deserialized:
712
+ When reading the file only the line positions of all blocks are
713
+ indexed.
714
+ The time consuming deserialization of a block/category is only
715
+ performed when accessed.
716
+ The deserialized :class:`CIFBlock`/:class:`CIFCategory` objects
717
+ are cached for subsequent accesses.
718
+
719
+ Attributes
720
+ ----------
721
+ block : CIFBlock
722
+ The sole block of the file.
723
+ If the file contains multiple blocks, an exception is raised.
724
+
725
+ Examples
726
+ --------
727
+ Read a CIF file and access its content:
728
+
729
+ >>> import os.path
730
+ >>> file = CIFFile.read(os.path.join(path_to_structures, "1l2y.cif"))
731
+ >>> print(file["1L2Y"]["citation_author"]["name"].as_array())
732
+ ['Neidigh, J.W.' 'Fesinmeyer, R.M.' 'Andersen, N.H.']
733
+ >>> # Access the only block in the file
734
+ >>> print(file.block["entity"]["pdbx_description"].as_item())
735
+ TC5b
736
+
737
+ Create a CIF file and write it to disk:
738
+
739
+ >>> category = CIFCategory(
740
+ ... {"some_column": "some_value", "another_column": "another_value"}
741
+ ... )
742
+ >>> block = CIFBlock({"some_category": category, "another_category": category})
743
+ >>> file = CIFFile({"some_block": block, "another_block": block})
744
+ >>> print(file.serialize())
745
+ data_some_block
746
+ #
747
+ _some_category.some_column some_value
748
+ _some_category.another_column another_value
749
+ #
750
+ _another_category.some_column some_value
751
+ _another_category.another_column another_value
752
+ #
753
+ data_another_block
754
+ #
755
+ _some_category.some_column some_value
756
+ _some_category.another_column another_value
757
+ #
758
+ _another_category.some_column some_value
759
+ _another_category.another_column another_value
760
+ #
761
+ >>> file.write(os.path.join(path_to_directory, "some_file.cif"))
762
+ """
763
+
764
+ def __init__(self, blocks=None):
765
+ if blocks is None:
766
+ blocks = {}
767
+ self._blocks = blocks
768
+
769
+ @property
770
+ def lines(self):
771
+ return self.serialize().splitlines()
772
+
773
+ @property
774
+ def block(self):
775
+ if len(self) != 1:
776
+ raise ValueError("There are multiple blocks in the file")
777
+ return self[next(iter(self))]
778
+
779
+ @staticmethod
780
+ def subcomponent_class():
781
+ return CIFBlock
782
+
783
+ @staticmethod
784
+ def supercomponent_class():
785
+ return None
786
+
787
+ @staticmethod
788
+ def deserialize(text):
789
+ lines = text.splitlines()
790
+ block_starts = []
791
+ block_names = []
792
+ for i, line in enumerate(lines):
793
+ if not _is_empty(line):
794
+ data_block_name = _parse_data_block_name(line)
795
+ if data_block_name is not None:
796
+ block_starts.append(i)
797
+ block_names.append(data_block_name)
798
+ return CIFFile(_create_element_dict(lines, block_names, block_starts))
799
+
800
+ def serialize(self):
801
+ text_blocks = []
802
+ for block_name, block in self._blocks.items():
803
+ text_blocks.append("data_" + block_name + "\n")
804
+ # A comment line is set after the block indicator
805
+ text_blocks.append("#\n")
806
+ if isinstance(block, str):
807
+ # Block is already stored as text
808
+ text_blocks.append(block)
809
+ else:
810
+ try:
811
+ text_blocks.append(block.serialize())
812
+ except:
813
+ raise SerializationError(
814
+ f"Failed to serialize block '{block_name}'"
815
+ )
816
+ # Enforce terminal line break
817
+ text_blocks.append("")
818
+ return "".join(text_blocks)
819
+
820
+ @classmethod
821
+ def read(cls, file):
822
+ """
823
+ Read a CIF file.
824
+
825
+ Parameters
826
+ ----------
827
+ file : file-like object or str
828
+ The file to be read.
829
+ Alternatively a file path can be supplied.
830
+
831
+ Returns
832
+ -------
833
+ file_object : CIFFile
834
+ The parsed file.
835
+ """
836
+ # File name
837
+ if is_open_compatible(file):
838
+ with open(file, "r") as f:
839
+ text = f.read()
840
+ # File object
841
+ else:
842
+ if not is_text(file):
843
+ raise TypeError("A file opened in 'text' mode is required")
844
+ text = file.read()
845
+ return CIFFile.deserialize(text)
846
+
847
+ def write(self, file):
848
+ """
849
+ Write the contents of this object into a CIF file.
850
+
851
+ Parameters
852
+ ----------
853
+ file : file-like object or str
854
+ The file to be written to.
855
+ Alternatively a file path can be supplied.
856
+ """
857
+ if is_open_compatible(file):
858
+ with open(file, "w") as f:
859
+ f.write(self.serialize())
860
+ else:
861
+ if not is_text(file):
862
+ raise TypeError("A file opened in 'text' mode is required")
863
+ file.write(self.serialize())
864
+
865
+ def __getitem__(self, key):
866
+ block = self._blocks[key]
867
+ if isinstance(block, str):
868
+ # Element is stored in serialized form
869
+ # -> must be deserialized first
870
+ try:
871
+ block = CIFBlock.deserialize(block)
872
+ except:
873
+ raise DeserializationError(
874
+ f"Failed to deserialize block '{key}'"
875
+ )
876
+ # Update with deserialized object
877
+ self._blocks[key] = block
878
+ return block
879
+
880
+ def __setitem__(self, key, block):
881
+ if not isinstance(block, CIFBlock):
882
+ raise TypeError(
883
+ f"Expected 'CIFBlock', but got '{type(block).__name__}'"
884
+ )
885
+ self._blocks[key] = block
886
+
887
+ def __delitem__(self, key):
888
+ del self._blocks[key]
889
+
890
+ def __iter__(self):
891
+ return iter(self._blocks)
892
+
893
+ def __len__(self):
894
+ return len(self._blocks)
895
+
896
+ def __eq__(self, other):
897
+ if not isinstance(other, type(self)):
898
+ return False
899
+ if set(self.keys()) != set(other.keys()):
900
+ return False
901
+ for block_name in self.keys():
902
+ if self[block_name] != other[block_name]:
903
+ return False
904
+ return True
905
+
906
+
907
+ def _is_empty(line):
908
+ return len(line.strip()) == 0 or line[0] == "#"
909
+
910
+
911
+ def _create_element_dict(lines, element_names, element_starts):
912
+ """
913
+ Create a dict mapping the `element_names` to the corresponding
914
+ `lines`, which are located between ``element_starts[i]`` and
915
+ ``element_starts[i+1]``.
916
+ """
917
+ # Add exclusive stop to indices for easier slicing
918
+ element_starts.append(len(lines))
919
+ # Lazy deserialization
920
+ # -> keep as text for now and deserialize later if needed
921
+ return {
922
+ element_name: "\n".join(lines[element_starts[i] : element_starts[i+1]])
923
+ for i, element_name in enumerate(element_names)
924
+ }
925
+
926
+
927
+ def _parse_data_block_name(line):
928
+ """
929
+ If the line defines a data block, return this name.
930
+ Return ``None`` otherwise.
931
+ """
932
+ if line.startswith("data_"):
933
+ return line[5:]
934
+ else:
935
+ return None
936
+
937
+
938
+ def _parse_category_name(line):
939
+ """
940
+ If the line defines a category, return this name.
941
+ Return ``None`` otherwise.
942
+ """
943
+ if line[0] != "_":
944
+ return None
945
+ else:
946
+ return line[1 : line.find(".")]
947
+
948
+
949
+ def _is_loop_start(line):
950
+ """
951
+ Return whether the line starts a looped category.
952
+ """
953
+ return line.startswith("loop_")
954
+
955
+
956
+ def _to_single(lines, is_looped):
957
+ """
958
+ Convert multiline values into singleline values
959
+ (in terms of 'lines' list elements).
960
+ Linebreaks are preserved.
961
+ """
962
+ processed_lines = [None] * len(lines)
963
+ in_i = 0
964
+ out_i = 0
965
+ while in_i < len(lines):
966
+ if lines[in_i][0] == ";":
967
+ # Multiline value
968
+ multi_line_str = lines[in_i][1:]
969
+ j = in_i + 1
970
+ while lines[j] != ";":
971
+ # Preserve linebreaks
972
+ multi_line_str += "\n" + lines[j]
973
+ j += 1
974
+ if is_looped:
975
+ # Create a line for the multiline string only
976
+ processed_lines[out_i] = shlex.quote(multi_line_str)
977
+ out_i += 1
978
+ else:
979
+ # Append multiline string to previous line
980
+ processed_lines[out_i - 1] += " " + shlex.quote(multi_line_str)
981
+ in_i = j + 1
982
+
983
+ elif not is_looped and lines[in_i][0] != "_":
984
+ # Singleline value in the line after the corresponding key
985
+ processed_lines[out_i - 1] += " " + lines[in_i]
986
+ in_i += 1
987
+
988
+ else:
989
+ # Normal singleline value in the same row as the key
990
+ processed_lines[out_i] = lines[in_i]
991
+ in_i += 1
992
+ out_i += 1
993
+
994
+ return [line for line in processed_lines if line is not None]
995
+
996
+
997
+ def _quote(value):
998
+ """
999
+ A less secure but much quicker version of ``shlex.quote()``.
1000
+ """
1001
+ if len(value) == 0:
1002
+ return "''"
1003
+ elif value[0] == "_":
1004
+ return "'" + value + "'"
1005
+ elif "'" in value:
1006
+ return '"' + value + '"'
1007
+ elif '"' in value:
1008
+ return "'" + value + "'"
1009
+ elif " " in value:
1010
+ return "'" + value + "'"
1011
+ elif "\t" in value:
1012
+ return "'" + value + "'"
1013
+ else:
1014
+ return value
1015
+
1016
+
1017
+ def _multiline(value):
1018
+ """
1019
+ Convert a string containing linebreaks into CIF-compatible
1020
+ multiline string.
1021
+ """
1022
+ if "\n" in value:
1023
+ return "\n;" + value + "\n;\n"
1024
+ return value
1025
+
1026
+
1027
+ def _arrayfy(data):
1028
+ if not isinstance(data, (Sequence, np.ndarray)) or isinstance(data, str):
1029
+ data = [data]
1030
+ elif len(data) == 0:
1031
+ raise ValueError("Array must contain at least one element")
1032
+ return np.asarray(data)