biotite 1.1.0__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (332) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +159 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +452 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +57 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +206 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/entrez/__init__.py +15 -0
  34. biotite/database/entrez/check.py +60 -0
  35. biotite/database/entrez/dbnames.py +91 -0
  36. biotite/database/entrez/download.py +229 -0
  37. biotite/database/entrez/key.py +44 -0
  38. biotite/database/entrez/query.py +262 -0
  39. biotite/database/error.py +16 -0
  40. biotite/database/pubchem/__init__.py +21 -0
  41. biotite/database/pubchem/download.py +258 -0
  42. biotite/database/pubchem/error.py +20 -0
  43. biotite/database/pubchem/query.py +830 -0
  44. biotite/database/pubchem/throttle.py +98 -0
  45. biotite/database/rcsb/__init__.py +13 -0
  46. biotite/database/rcsb/download.py +159 -0
  47. biotite/database/rcsb/query.py +964 -0
  48. biotite/database/uniprot/__init__.py +13 -0
  49. biotite/database/uniprot/check.py +40 -0
  50. biotite/database/uniprot/download.py +129 -0
  51. biotite/database/uniprot/query.py +293 -0
  52. biotite/file.py +232 -0
  53. biotite/sequence/__init__.py +84 -0
  54. biotite/sequence/align/__init__.py +203 -0
  55. biotite/sequence/align/alignment.py +680 -0
  56. biotite/sequence/align/banded.cp313-win_amd64.pyd +0 -0
  57. biotite/sequence/align/banded.pyx +652 -0
  58. biotite/sequence/align/buckets.py +71 -0
  59. biotite/sequence/align/cigar.py +425 -0
  60. biotite/sequence/align/kmeralphabet.cp313-win_amd64.pyd +0 -0
  61. biotite/sequence/align/kmeralphabet.pyx +595 -0
  62. biotite/sequence/align/kmersimilarity.cp313-win_amd64.pyd +0 -0
  63. biotite/sequence/align/kmersimilarity.pyx +233 -0
  64. biotite/sequence/align/kmertable.cp313-win_amd64.pyd +0 -0
  65. biotite/sequence/align/kmertable.pyx +3411 -0
  66. biotite/sequence/align/localgapped.cp313-win_amd64.pyd +0 -0
  67. biotite/sequence/align/localgapped.pyx +892 -0
  68. biotite/sequence/align/localungapped.cp313-win_amd64.pyd +0 -0
  69. biotite/sequence/align/localungapped.pyx +279 -0
  70. biotite/sequence/align/matrix.py +622 -0
  71. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  72. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  73. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  74. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  75. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  76. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  77. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  78. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  79. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  80. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  81. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  82. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  83. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  84. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  85. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  86. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  87. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  88. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  89. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  94. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  95. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  99. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  100. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  101. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  102. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  103. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  104. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  105. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  106. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  107. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  108. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  109. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  110. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  111. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  112. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  113. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  114. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  115. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  116. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  117. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  118. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  119. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  120. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  121. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  122. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  155. biotite/sequence/align/matrix_data/PB.license +21 -0
  156. biotite/sequence/align/matrix_data/PB.mat +18 -0
  157. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  158. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  159. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  160. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  161. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  162. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  163. biotite/sequence/align/multiple.cp313-win_amd64.pyd +0 -0
  164. biotite/sequence/align/multiple.pyx +620 -0
  165. biotite/sequence/align/pairwise.cp313-win_amd64.pyd +0 -0
  166. biotite/sequence/align/pairwise.pyx +587 -0
  167. biotite/sequence/align/permutation.cp313-win_amd64.pyd +0 -0
  168. biotite/sequence/align/permutation.pyx +313 -0
  169. biotite/sequence/align/primes.txt +821 -0
  170. biotite/sequence/align/selector.cp313-win_amd64.pyd +0 -0
  171. biotite/sequence/align/selector.pyx +954 -0
  172. biotite/sequence/align/statistics.py +264 -0
  173. biotite/sequence/align/tracetable.cp313-win_amd64.pyd +0 -0
  174. biotite/sequence/align/tracetable.pxd +64 -0
  175. biotite/sequence/align/tracetable.pyx +370 -0
  176. biotite/sequence/alphabet.py +555 -0
  177. biotite/sequence/annotation.py +830 -0
  178. biotite/sequence/codec.cp313-win_amd64.pyd +0 -0
  179. biotite/sequence/codec.pyx +155 -0
  180. biotite/sequence/codon.py +477 -0
  181. biotite/sequence/codon_tables.txt +202 -0
  182. biotite/sequence/graphics/__init__.py +33 -0
  183. biotite/sequence/graphics/alignment.py +1115 -0
  184. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  185. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  186. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  187. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  188. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  189. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  190. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  191. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  192. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  193. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  194. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  195. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  196. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  197. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  198. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  199. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  200. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  201. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  202. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  203. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  204. biotite/sequence/graphics/colorschemes.py +170 -0
  205. biotite/sequence/graphics/dendrogram.py +229 -0
  206. biotite/sequence/graphics/features.py +544 -0
  207. biotite/sequence/graphics/logo.py +104 -0
  208. biotite/sequence/graphics/plasmid.py +712 -0
  209. biotite/sequence/io/__init__.py +12 -0
  210. biotite/sequence/io/fasta/__init__.py +22 -0
  211. biotite/sequence/io/fasta/convert.py +284 -0
  212. biotite/sequence/io/fasta/file.py +265 -0
  213. biotite/sequence/io/fastq/__init__.py +19 -0
  214. biotite/sequence/io/fastq/convert.py +117 -0
  215. biotite/sequence/io/fastq/file.py +507 -0
  216. biotite/sequence/io/genbank/__init__.py +17 -0
  217. biotite/sequence/io/genbank/annotation.py +269 -0
  218. biotite/sequence/io/genbank/file.py +573 -0
  219. biotite/sequence/io/genbank/metadata.py +336 -0
  220. biotite/sequence/io/genbank/sequence.py +171 -0
  221. biotite/sequence/io/general.py +201 -0
  222. biotite/sequence/io/gff/__init__.py +26 -0
  223. biotite/sequence/io/gff/convert.py +128 -0
  224. biotite/sequence/io/gff/file.py +450 -0
  225. biotite/sequence/phylo/__init__.py +36 -0
  226. biotite/sequence/phylo/nj.cp313-win_amd64.pyd +0 -0
  227. biotite/sequence/phylo/nj.pyx +221 -0
  228. biotite/sequence/phylo/tree.cp313-win_amd64.pyd +0 -0
  229. biotite/sequence/phylo/tree.pyx +1169 -0
  230. biotite/sequence/phylo/upgma.cp313-win_amd64.pyd +0 -0
  231. biotite/sequence/phylo/upgma.pyx +164 -0
  232. biotite/sequence/profile.py +567 -0
  233. biotite/sequence/search.py +118 -0
  234. biotite/sequence/seqtypes.py +713 -0
  235. biotite/sequence/sequence.py +374 -0
  236. biotite/setup_ccd.py +197 -0
  237. biotite/structure/__init__.py +133 -0
  238. biotite/structure/alphabet/__init__.py +25 -0
  239. biotite/structure/alphabet/encoder.py +332 -0
  240. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  241. biotite/structure/alphabet/i3d.py +110 -0
  242. biotite/structure/alphabet/layers.py +86 -0
  243. biotite/structure/alphabet/pb.license +21 -0
  244. biotite/structure/alphabet/pb.py +171 -0
  245. biotite/structure/alphabet/unkerasify.py +122 -0
  246. biotite/structure/atoms.py +1554 -0
  247. biotite/structure/basepairs.py +1404 -0
  248. biotite/structure/bonds.cp313-win_amd64.pyd +0 -0
  249. biotite/structure/bonds.pyx +1972 -0
  250. biotite/structure/box.py +588 -0
  251. biotite/structure/celllist.cp313-win_amd64.pyd +0 -0
  252. biotite/structure/celllist.pyx +849 -0
  253. biotite/structure/chains.py +314 -0
  254. biotite/structure/charges.cp313-win_amd64.pyd +0 -0
  255. biotite/structure/charges.pyx +520 -0
  256. biotite/structure/compare.py +274 -0
  257. biotite/structure/density.py +109 -0
  258. biotite/structure/dotbracket.py +214 -0
  259. biotite/structure/error.py +39 -0
  260. biotite/structure/filter.py +590 -0
  261. biotite/structure/geometry.py +655 -0
  262. biotite/structure/graphics/__init__.py +13 -0
  263. biotite/structure/graphics/atoms.py +243 -0
  264. biotite/structure/graphics/rna.py +295 -0
  265. biotite/structure/hbond.py +428 -0
  266. biotite/structure/info/__init__.py +24 -0
  267. biotite/structure/info/atom_masses.json +121 -0
  268. biotite/structure/info/atoms.py +81 -0
  269. biotite/structure/info/bonds.py +149 -0
  270. biotite/structure/info/ccd.py +202 -0
  271. biotite/structure/info/components.bcif +0 -0
  272. biotite/structure/info/groups.py +131 -0
  273. biotite/structure/info/masses.py +121 -0
  274. biotite/structure/info/misc.py +138 -0
  275. biotite/structure/info/radii.py +197 -0
  276. biotite/structure/info/standardize.py +186 -0
  277. biotite/structure/integrity.py +215 -0
  278. biotite/structure/io/__init__.py +29 -0
  279. biotite/structure/io/dcd/__init__.py +13 -0
  280. biotite/structure/io/dcd/file.py +67 -0
  281. biotite/structure/io/general.py +243 -0
  282. biotite/structure/io/gro/__init__.py +14 -0
  283. biotite/structure/io/gro/file.py +344 -0
  284. biotite/structure/io/mol/__init__.py +20 -0
  285. biotite/structure/io/mol/convert.py +112 -0
  286. biotite/structure/io/mol/ctab.py +415 -0
  287. biotite/structure/io/mol/header.py +120 -0
  288. biotite/structure/io/mol/mol.py +149 -0
  289. biotite/structure/io/mol/sdf.py +914 -0
  290. biotite/structure/io/netcdf/__init__.py +13 -0
  291. biotite/structure/io/netcdf/file.py +64 -0
  292. biotite/structure/io/pdb/__init__.py +20 -0
  293. biotite/structure/io/pdb/convert.py +307 -0
  294. biotite/structure/io/pdb/file.py +1290 -0
  295. biotite/structure/io/pdb/hybrid36.cp313-win_amd64.pyd +0 -0
  296. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  297. biotite/structure/io/pdbqt/__init__.py +15 -0
  298. biotite/structure/io/pdbqt/convert.py +113 -0
  299. biotite/structure/io/pdbqt/file.py +688 -0
  300. biotite/structure/io/pdbx/__init__.py +23 -0
  301. biotite/structure/io/pdbx/bcif.py +656 -0
  302. biotite/structure/io/pdbx/cif.py +1075 -0
  303. biotite/structure/io/pdbx/component.py +245 -0
  304. biotite/structure/io/pdbx/compress.py +321 -0
  305. biotite/structure/io/pdbx/convert.py +1745 -0
  306. biotite/structure/io/pdbx/encoding.cp313-win_amd64.pyd +0 -0
  307. biotite/structure/io/pdbx/encoding.pyx +1031 -0
  308. biotite/structure/io/trajfile.py +693 -0
  309. biotite/structure/io/trr/__init__.py +13 -0
  310. biotite/structure/io/trr/file.py +43 -0
  311. biotite/structure/io/xtc/__init__.py +13 -0
  312. biotite/structure/io/xtc/file.py +43 -0
  313. biotite/structure/mechanics.py +73 -0
  314. biotite/structure/molecules.py +352 -0
  315. biotite/structure/pseudoknots.py +628 -0
  316. biotite/structure/rdf.py +245 -0
  317. biotite/structure/repair.py +304 -0
  318. biotite/structure/residues.py +572 -0
  319. biotite/structure/sasa.cp313-win_amd64.pyd +0 -0
  320. biotite/structure/sasa.pyx +322 -0
  321. biotite/structure/segments.py +178 -0
  322. biotite/structure/sequence.py +111 -0
  323. biotite/structure/sse.py +308 -0
  324. biotite/structure/superimpose.py +689 -0
  325. biotite/structure/transform.py +530 -0
  326. biotite/structure/util.py +168 -0
  327. biotite/version.py +16 -0
  328. biotite/visualize.py +265 -0
  329. biotite-1.1.0.dist-info/METADATA +190 -0
  330. biotite-1.1.0.dist-info/RECORD +332 -0
  331. biotite-1.1.0.dist-info/WHEEL +4 -0
  332. biotite-1.1.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,1075 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.structure.io.pdbx"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["CIFFile", "CIFBlock", "CIFCategory", "CIFColumn", "CIFData"]
8
+
9
+ import itertools
10
+ from collections.abc import MutableMapping, Sequence
11
+ import numpy as np
12
+ from biotite.file import (
13
+ DeserializationError,
14
+ File,
15
+ SerializationError,
16
+ is_open_compatible,
17
+ is_text,
18
+ )
19
+ from biotite.structure.io.pdbx.component import MaskValue, _Component
20
+
21
+ UNICODE_CHAR_SIZE = 4
22
+
23
+
24
+ # Small class without much functionality
25
+ # It exists merely for consistency with BinaryCIFFile
26
+ class CIFData:
27
+ """
28
+ This class represents the data in a :class:`CIFColumn`.
29
+
30
+ Parameters
31
+ ----------
32
+ array : array_like or int or float or str
33
+ The data array to be stored.
34
+ If a single item is given, it is converted into an array.
35
+ dtype : dtype-like, optional
36
+ If given, the *dtype* the stored array should be converted to.
37
+
38
+ Attributes
39
+ ----------
40
+ array : ndarray
41
+ The stored data array.
42
+
43
+ Notes
44
+ -----
45
+ When a :class:`CIFFile` is written, the data type is automatically
46
+ converted to string.
47
+ The other way around, when a :class:`CIFFile` is read, the data type
48
+ is always a string type.
49
+
50
+ Examples
51
+ --------
52
+
53
+ >>> data = CIFData([1, 2, 3])
54
+ >>> print(data.array)
55
+ [1 2 3]
56
+ >>> print(len(data))
57
+ 3
58
+ >>> # A single item is converted into an array
59
+ >>> data = CIFData("apple")
60
+ >>> print(data.array)
61
+ ['apple']
62
+ """
63
+
64
+ def __init__(self, array, dtype=None):
65
+ self._array = _arrayfy(array)
66
+ if np.issubdtype(self._array.dtype, np.object_):
67
+ raise ValueError("Object arrays are not supported")
68
+ if dtype is not None:
69
+ self._array = self._array.astype(dtype)
70
+
71
+ @property
72
+ def array(self):
73
+ return self._array
74
+
75
+ @staticmethod
76
+ def subcomponent_class():
77
+ return None
78
+
79
+ @staticmethod
80
+ def supercomponent_class():
81
+ return CIFColumn
82
+
83
+ def __len__(self):
84
+ return len(self._array)
85
+
86
+ def __eq__(self, other):
87
+ if not isinstance(other, type(self)):
88
+ return False
89
+ return np.array_equal(self._array, other._array)
90
+
91
+
92
+ class CIFColumn:
93
+ """
94
+ This class represents a single column in a :class:`CIFCategory`.
95
+
96
+ Parameters
97
+ ----------
98
+ data : CIFData or array_like or int or float or str
99
+ The data to be stored.
100
+ If no :class:`CIFData` is given, the passed argument is
101
+ coerced into such an object.
102
+ mask : CIFData or array_like, dtype=int or int
103
+ The mask to be stored.
104
+ If given, the mask indicates whether the `data` is
105
+ inapplicable (``.``) or missing (``?``) in some rows.
106
+ The data presence is indicated by values from the
107
+ :class:`MaskValue` enum.
108
+ If no :class:`CIFData` is given, the passed argument is
109
+ coerced into such an object.
110
+ By default, no mask is created.
111
+
112
+ Attributes
113
+ ----------
114
+ data : CIFData
115
+ The stored data.
116
+ mask : CIFData
117
+ The mask that indicates whether certain data elements are
118
+ inapplicable or missing.
119
+ If no mask is present, this attribute is ``None``.
120
+
121
+ Examples
122
+ --------
123
+
124
+ >>> print(CIFColumn([1, 2, 3]).as_array())
125
+ ['1' '2' '3']
126
+ >>> mask = [MaskValue.PRESENT, MaskValue.INAPPLICABLE, MaskValue.MISSING]
127
+ >>> print(CIFColumn([1, 2, 3], mask).as_array())
128
+ ['1' '.' '?']
129
+ >>> print(CIFColumn([1]).as_item())
130
+ 1
131
+ >>> print(CIFColumn([1], mask=[MaskValue.MISSING]).as_item())
132
+ ?
133
+ """
134
+
135
+ def __init__(self, data, mask=None):
136
+ if not isinstance(data, CIFData):
137
+ data = CIFData(data, str)
138
+ if mask is None:
139
+ mask = np.full(len(data), MaskValue.PRESENT, dtype=np.uint8)
140
+ mask[data.array == "."] = MaskValue.INAPPLICABLE
141
+ mask[data.array == "?"] = MaskValue.MISSING
142
+ if np.all(mask == MaskValue.PRESENT):
143
+ # No mask required
144
+ mask = None
145
+ else:
146
+ mask = CIFData(mask)
147
+ else:
148
+ if not isinstance(mask, CIFData):
149
+ mask = CIFData(mask, np.uint8)
150
+ if len(mask) != len(data):
151
+ raise IndexError(
152
+ f"Data has length {len(data)}, " f"but mask has length {len(mask)}"
153
+ )
154
+ self._data = data
155
+ self._mask = mask
156
+
157
+ @property
158
+ def data(self):
159
+ return self._data
160
+
161
+ @property
162
+ def mask(self):
163
+ return self._mask
164
+
165
+ @staticmethod
166
+ def subcomponent_class():
167
+ return CIFData
168
+
169
+ @staticmethod
170
+ def supercomponent_class():
171
+ return CIFCategory
172
+
173
+ def as_item(self):
174
+ """
175
+ Get the only item in the data of this column.
176
+
177
+ If the data is masked as inapplicable or missing, ``'.'`` or
178
+ ``'?'`` is returned, respectively.
179
+ If the data contains more than one item, an exception is raised.
180
+
181
+ Returns
182
+ -------
183
+ item : str
184
+ The item in the data.
185
+ """
186
+ if self._mask is None:
187
+ return self._data.array.item()
188
+ mask = self._mask.array.item()
189
+ if self._mask is None or mask == MaskValue.PRESENT:
190
+ item = self._data.array.item()
191
+ # Limit float precision to 3 decimals
192
+ if isinstance(item, float):
193
+ return f"{item:.3f}"
194
+ else:
195
+ return str(item)
196
+ elif mask == MaskValue.INAPPLICABLE:
197
+ return "."
198
+ elif mask == MaskValue.MISSING:
199
+ return "?"
200
+
201
+ def as_array(self, dtype=str, masked_value=None):
202
+ """
203
+ Get the data of this column as an :class:`ndarray`.
204
+
205
+ This is a shortcut to get ``CIFColumn.data.array``.
206
+ Furthermore, the mask is applied to the data.
207
+
208
+ Parameters
209
+ ----------
210
+ dtype : dtype-like, optional
211
+ The data type the array should be converted to.
212
+ By default, a string type is used.
213
+ masked_value : str, optional
214
+ The value that should be used for masked elements, i.e.
215
+ ``MaskValue.INAPPLICABLE`` or ``MaskValue.MISSING``.
216
+ By default, masked elements are converted to ``'.'`` or
217
+ ``'?'`` depending on the :class:`MaskValue`.
218
+ """
219
+ if self._mask is None:
220
+ return self._data.array.astype(dtype, copy=False)
221
+
222
+ elif np.issubdtype(dtype, np.str_):
223
+ # Limit float precision to 3 decimals
224
+ if np.issubdtype(self._data.array.dtype, np.floating):
225
+ array = np.array([f"{e:.3f}" for e in self._data.array], type=dtype)
226
+ else:
227
+ # Copy, as otherwise original data would be overwritten
228
+ # with mask values
229
+ array = self._data.array.astype(dtype, copy=True)
230
+ if masked_value is None:
231
+ array[self._mask.array == MaskValue.INAPPLICABLE] = "."
232
+ array[self._mask.array == MaskValue.MISSING] = "?"
233
+ else:
234
+ array[self._mask.array == MaskValue.INAPPLICABLE] = masked_value
235
+ array[self._mask.array == MaskValue.MISSING] = masked_value
236
+ return array
237
+
238
+ else:
239
+ # Array needs to be converted, but masked values are
240
+ # not necessarily convertible
241
+ # (e.g. '' cannot be converted to int)
242
+ if masked_value is None:
243
+ array = np.zeros(len(self._data), dtype=dtype)
244
+ else:
245
+ array = np.full(len(self._data), masked_value, dtype=dtype)
246
+
247
+ present_mask = self._mask.array == MaskValue.PRESENT
248
+ array[present_mask] = self._data.array[present_mask].astype(dtype)
249
+ return array
250
+
251
+ def __len__(self):
252
+ return len(self._data)
253
+
254
+ def __eq__(self, other):
255
+ if not isinstance(other, type(self)):
256
+ return False
257
+ if self._data != other._data:
258
+ return False
259
+ if self._mask != other._mask:
260
+ return False
261
+ return True
262
+
263
+
264
+ class CIFCategory(_Component, MutableMapping):
265
+ """
266
+ This class represents a category in a :class:`CIFBlock`.
267
+
268
+ Columns can be accessed and modified like a dictionary.
269
+ The values are :class:`CIFColumn` objects.
270
+
271
+ Parameters
272
+ ----------
273
+ columns : dict, optional
274
+ The columns of the category.
275
+ The keys are the column names and the values are the
276
+ :class:`CIFColumn` objects (or objects that can be coerced into
277
+ a :class:`CIFColumn`).
278
+ By default, an empty category is created.
279
+ Each column must have the same length.
280
+ name : str, optional
281
+ The name of the category.
282
+ This is only used for serialization and is automatically set,
283
+ when the :class:`CIFCategory` is added to a :class:`CIFBlock`.
284
+ It only needs to be set manually, when the category is directly
285
+ serialized.
286
+
287
+ Attributes
288
+ ----------
289
+ name : str
290
+ The name of the category.
291
+ row_count : int
292
+ The number of rows in the category, i.e. the length of each
293
+ column.
294
+
295
+ Notes
296
+ -----
297
+ When a column containing strings with line breaks are added, these
298
+ strings are written as multiline strings to the CIF file.
299
+
300
+ Examples
301
+ --------
302
+
303
+ >>> # Add column on creation
304
+ >>> category = CIFCategory({"fruit": ["apple", "banana"]}, name="fruits")
305
+ >>> # Add column later on
306
+ >>> category["taste"] = ["delicious", "tasty"]
307
+ >>> # Add column the formal way
308
+ >>> category["color"] = CIFColumn(CIFData(["red", "yellow"]))
309
+ >>> # Access a column
310
+ >>> print(category["fruit"].as_array())
311
+ ['apple' 'banana']
312
+ >>> print(category.serialize())
313
+ loop_
314
+ _fruits.fruit
315
+ _fruits.taste
316
+ _fruits.color
317
+ apple delicious red
318
+ banana tasty yellow
319
+ """
320
+
321
+ def __init__(self, columns=None, name=None):
322
+ self._name = name
323
+ if columns is None:
324
+ columns = {}
325
+ else:
326
+ columns = {
327
+ key: CIFColumn(col) if not isinstance(col, CIFColumn) else col
328
+ for key, col in columns.items()
329
+ }
330
+
331
+ self._row_count = None
332
+ self._columns = columns
333
+
334
+ @property
335
+ def name(self):
336
+ return self._name
337
+
338
+ @name.setter
339
+ def name(self, name):
340
+ self._name = name
341
+
342
+ @property
343
+ def row_count(self):
344
+ if self._row_count is None:
345
+ # Row count is not determined yet
346
+ # -> check the length of the first column
347
+ self._row_count = len(next(iter(self.values())))
348
+ return self._row_count
349
+
350
+ @staticmethod
351
+ def subcomponent_class():
352
+ return CIFColumn
353
+
354
+ @staticmethod
355
+ def supercomponent_class():
356
+ return CIFBlock
357
+
358
+ @staticmethod
359
+ def deserialize(text):
360
+ lines = [line.strip() for line in text.splitlines() if not _is_empty(line)]
361
+
362
+ if _is_loop_start(lines[0]):
363
+ is_looped = True
364
+ lines.pop(0)
365
+ else:
366
+ is_looped = False
367
+
368
+ category_name = _parse_category_name(lines[0])
369
+ if category_name is None:
370
+ raise DeserializationError("Failed to parse category name")
371
+
372
+ lines = _to_single(lines)
373
+ if is_looped:
374
+ category_dict = CIFCategory._deserialize_looped(lines)
375
+ else:
376
+ category_dict = CIFCategory._deserialize_single(lines)
377
+ return CIFCategory(category_dict, category_name)
378
+
379
+ def serialize(self):
380
+ if self._name is None:
381
+ raise SerializationError("Category name is required")
382
+ if not self._columns:
383
+ raise ValueError("At least one column is required")
384
+
385
+ for column_name, column in self.items():
386
+ if self._row_count is None:
387
+ self._row_count = len(column)
388
+ elif len(column) != self._row_count:
389
+ raise SerializationError(
390
+ f"All columns must have the same length, "
391
+ f"but '{column_name}' has length {len(column)}, "
392
+ f"while the first column has row_count {self._row_count}"
393
+ )
394
+
395
+ if self._row_count == 0:
396
+ raise ValueError("At least one row is required")
397
+ elif self._row_count == 1:
398
+ lines = self._serialize_single()
399
+ else:
400
+ lines = self._serialize_looped()
401
+ # Enforce terminal line break
402
+ lines.append("")
403
+ return "\n".join(lines)
404
+
405
+ def __getitem__(self, key):
406
+ return self._columns[key]
407
+
408
+ def __setitem__(self, key, column):
409
+ if not isinstance(column, CIFColumn):
410
+ column = CIFColumn(column)
411
+ self._columns[key] = column
412
+
413
+ def __delitem__(self, key):
414
+ if len(self._columns) == 1:
415
+ raise ValueError("At least one column must remain")
416
+ del self._columns[key]
417
+
418
+ def __contains__(self, key):
419
+ return key in self._columns
420
+
421
+ def __iter__(self):
422
+ return iter(self._columns)
423
+
424
+ def __len__(self):
425
+ return len(self._columns)
426
+
427
+ def __eq__(self, other):
428
+ # Row count can be omitted here, as it is based on the columns
429
+ if not isinstance(other, type(self)):
430
+ return False
431
+ if set(self.keys()) != set(other.keys()):
432
+ return False
433
+ for col_name in self.keys():
434
+ if self[col_name] != other[col_name]:
435
+ return False
436
+ return True
437
+
438
+ @staticmethod
439
+ def _deserialize_single(lines):
440
+ """
441
+ Process a category where each field has a single value.
442
+ """
443
+ category_dict = {}
444
+ line_i = 0
445
+ while line_i < len(lines):
446
+ line = lines[line_i]
447
+ parts = list(_split_one_line(line))
448
+ if len(parts) == 2:
449
+ # Standard case -> name and value in one line
450
+ name_part, value_part = parts
451
+ line_i += 1
452
+ elif len(parts) == 1:
453
+ # Value is a multiline value on the next line
454
+ name_part = parts[0]
455
+ parts = list(_split_one_line(lines[line_i + 1]))
456
+ if len(parts) == 1:
457
+ value_part = parts[0]
458
+ else:
459
+ raise DeserializationError(f"Failed to parse line '{line}'")
460
+ line_i += 2
461
+ elif len(parts) == 0:
462
+ raise DeserializationError("Empty line within category")
463
+ else:
464
+ raise DeserializationError(f"Failed to parse line '{line}'")
465
+ category_dict[name_part.split(".")[1]] = CIFColumn(value_part)
466
+ return category_dict
467
+
468
+ @staticmethod
469
+ def _deserialize_looped(lines):
470
+ """
471
+ Process a category where each field has multiple values
472
+ (category is a table).
473
+ """
474
+ category_dict = {}
475
+ column_names = []
476
+ i = 0
477
+ for key_line in lines:
478
+ if key_line[0] == "_":
479
+ # Key line
480
+ key = key_line.split(".")[1]
481
+ column_names.append(key)
482
+ category_dict[key] = []
483
+ i += 1
484
+ else:
485
+ break
486
+
487
+ data_lines = lines[i:]
488
+ # Rows may be split over multiple lines -> do not rely on
489
+ # row-line-alignment at all and simply cycle through columns
490
+ column_indices = itertools.cycle(range(len(column_names)))
491
+ for data_line in data_lines:
492
+ values = _split_one_line(data_line)
493
+ for val in values:
494
+ column_index = next(column_indices)
495
+ column_name = column_names[column_index]
496
+ category_dict[column_name].append(val)
497
+
498
+ # Check if all columns have the same length
499
+ # Otherwise, this would indicate a parsing error or an invalid CIF file
500
+ column_index = next(column_indices)
501
+ if column_index != 0:
502
+ raise DeserializationError(
503
+ "Category contains columns with different lengths"
504
+ )
505
+
506
+ return category_dict
507
+
508
+ def _serialize_single(self):
509
+ keys = ["_" + self._name + "." + name for name in self.keys()]
510
+ max_len = max(len(key) for key in keys)
511
+ # "+3" Because of three whitespace chars after longest key
512
+ req_len = max_len + 3
513
+ return [
514
+ # Remove potential terminal newlines from multiline values
515
+ (key.ljust(req_len) + _escape(column.as_item())).strip()
516
+ for key, column in zip(keys, self.values())
517
+ ]
518
+
519
+ def _serialize_looped(self):
520
+ key_lines = ["_" + self._name + "." + key + " " for key in self.keys()]
521
+
522
+ column_arrays = []
523
+ for column in self.values():
524
+ array = column.as_array(str)
525
+ # Quote before measuring the number of chars,
526
+ # as the quote characters modify the length
527
+ array = np.array([_escape(element) for element in array])
528
+ column_arrays.append(array)
529
+
530
+ # Number of characters the longest string in the column needs
531
+ # This can be deduced from the dtype
532
+ # The "+1" is for the small whitespace column
533
+ column_n_chars = [
534
+ array.dtype.itemsize // UNICODE_CHAR_SIZE + 1 for array in column_arrays
535
+ ]
536
+ value_lines = [""] * self._row_count
537
+ for i in range(self._row_count):
538
+ for j, array in enumerate(column_arrays):
539
+ value_lines[i] += array[i].ljust(column_n_chars[j])
540
+ # Remove trailing justification of last column
541
+ # and potential terminal newlines from multiline values
542
+ value_lines[i] = value_lines[i].strip()
543
+
544
+ return ["loop_"] + key_lines + value_lines
545
+
546
+
547
+ class CIFBlock(_Component, MutableMapping):
548
+ """
549
+ This class represents a block in a :class:`CIFFile`.
550
+
551
+ Categories can be accessed and modified like a dictionary.
552
+ The values are :class:`CIFCategory` objects.
553
+
554
+ Parameters
555
+ ----------
556
+ categories : dict, optional
557
+ The categories of the block.
558
+ The keys are the category names and the values are the
559
+ :class:`CIFCategory` objects.
560
+ By default, an empty block is created.
561
+ name : str, optional
562
+ The name of the block.
563
+ This is only used for serialization and is automatically set,
564
+ when the :class:`CIFBlock` is added to a :class:`CIFFile`.
565
+ It only needs to be set manually, when the block is directly
566
+ serialized.
567
+
568
+ Attributes
569
+ ----------
570
+ name : str
571
+ The name of the block.
572
+
573
+ Notes
574
+ -----
575
+ The category names do not include the leading underscore character.
576
+ This character is automatically added when the category is
577
+ serialized.
578
+
579
+ Examples
580
+ --------
581
+
582
+ >>> # Add category on creation
583
+ >>> block = CIFBlock({"foo": CIFCategory({"some_column": 1})}, name="baz")
584
+ >>> # Add category later on
585
+ >>> block["bar"] = CIFCategory({"another_column": [2, 3]})
586
+ >>> # Access a column
587
+ >>> print(block["bar"]["another_column"].as_array())
588
+ ['2' '3']
589
+ >>> print(block.serialize())
590
+ data_baz
591
+ #
592
+ _foo.some_column 1
593
+ #
594
+ loop_
595
+ _bar.another_column
596
+ 2
597
+ 3
598
+ #
599
+ """
600
+
601
+ def __init__(self, categories=None, name=None):
602
+ self._name = name
603
+ if categories is None:
604
+ categories = {}
605
+ self._categories = categories
606
+
607
+ @property
608
+ def name(self):
609
+ return self._name
610
+
611
+ @name.setter
612
+ def name(self, name):
613
+ self._name = name
614
+
615
+ @staticmethod
616
+ def subcomponent_class():
617
+ return CIFCategory
618
+
619
+ @staticmethod
620
+ def supercomponent_class():
621
+ return CIFFile
622
+
623
+ @staticmethod
624
+ def deserialize(text):
625
+ lines = text.splitlines()
626
+ current_category_name = None
627
+ category_starts = []
628
+ category_names = []
629
+ for i, line in enumerate(lines):
630
+ if not _is_empty(line):
631
+ is_loop_in_line = _is_loop_start(line)
632
+ category_name_in_line = _parse_category_name(line)
633
+ if is_loop_in_line or (
634
+ category_name_in_line != current_category_name
635
+ and category_name_in_line is not None
636
+ ):
637
+ # Track the new category
638
+ if is_loop_in_line:
639
+ # In case of lines with "loop_" the category is
640
+ # in the next line
641
+ category_name_in_line = _parse_category_name(lines[i + 1])
642
+ current_category_name = category_name_in_line
643
+ category_starts.append(i)
644
+ category_names.append(current_category_name)
645
+ return CIFBlock(_create_element_dict(lines, category_names, category_starts))
646
+
647
+ def serialize(self):
648
+ if self._name is None:
649
+ raise SerializationError("Block name is required")
650
+ # The block starts with the black name line followed by a comment line
651
+ text_blocks = ["data_" + self._name + "\n#\n"]
652
+ for category_name, category in self._categories.items():
653
+ if isinstance(category, str):
654
+ # Category is already stored as lines
655
+ text_blocks.append(category)
656
+ else:
657
+ try:
658
+ category.name = category_name
659
+ text_blocks.append(category.serialize())
660
+ except Exception:
661
+ raise SerializationError(
662
+ f"Failed to serialize category '{category_name}'"
663
+ )
664
+ # A comment line is set after each category
665
+ text_blocks.append("#\n")
666
+ return "".join(text_blocks)
667
+
668
+ def __getitem__(self, key):
669
+ category = self._categories[key]
670
+ if isinstance(category, str):
671
+ # Element is stored in serialized form
672
+ # -> must be deserialized first
673
+ try:
674
+ category = CIFCategory.deserialize(category)
675
+ except Exception:
676
+ raise DeserializationError(f"Failed to deserialize category '{key}'")
677
+ # Update with deserialized object
678
+ self._categories[key] = category
679
+ return category
680
+
681
+ def __setitem__(self, key, category):
682
+ if not isinstance(category, CIFCategory):
683
+ raise TypeError(
684
+ f"Expected 'CIFCategory', but got '{type(category).__name__}'"
685
+ )
686
+ category.name = key
687
+ self._categories[key] = category
688
+
689
+ def __delitem__(self, key):
690
+ del self._categories[key]
691
+
692
+ def __contains__(self, key):
693
+ return key in self._categories
694
+
695
+ def __iter__(self):
696
+ return iter(self._categories)
697
+
698
+ def __len__(self):
699
+ return len(self._categories)
700
+
701
+ def __eq__(self, other):
702
+ if not isinstance(other, type(self)):
703
+ return False
704
+ if set(self.keys()) != set(other.keys()):
705
+ return False
706
+ for cat_name in self.keys():
707
+ if self[cat_name] != other[cat_name]:
708
+ return False
709
+ return True
710
+
711
+
712
+ class CIFFile(_Component, File, MutableMapping):
713
+ """
714
+ This class represents a CIF file.
715
+
716
+ The categories of the file can be accessed and modified like a
717
+ dictionary.
718
+ The values are :class:`CIFBlock` objects.
719
+
720
+ To parse or write a structure from/to a :class:`CIFFile` object,
721
+ use the high-level :func:`get_structure()` or
722
+ :func:`set_structure()` function respectively.
723
+
724
+ Notes
725
+ -----
726
+ The content of CIF files are lazily deserialized:
727
+ When reading the file only the line positions of all blocks are
728
+ indexed.
729
+ The time consuming deserialization of a block/category is only
730
+ performed when accessed.
731
+ The deserialized :class:`CIFBlock`/:class:`CIFCategory` objects
732
+ are cached for subsequent accesses.
733
+
734
+ Attributes
735
+ ----------
736
+ block : CIFBlock
737
+ The sole block of the file.
738
+ If the file contains multiple blocks, an exception is raised.
739
+
740
+ Examples
741
+ --------
742
+ Read a CIF file and access its content:
743
+
744
+ >>> import os.path
745
+ >>> file = CIFFile.read(os.path.join(path_to_structures, "1l2y.cif"))
746
+ >>> print(file["1L2Y"]["citation_author"]["name"].as_array())
747
+ ['Neidigh, J.W.' 'Fesinmeyer, R.M.' 'Andersen, N.H.']
748
+ >>> # Access the only block in the file
749
+ >>> print(file.block["entity"]["pdbx_description"].as_item())
750
+ TC5b
751
+
752
+ Create a CIF file and write it to disk:
753
+
754
+ >>> category = CIFCategory(
755
+ ... {"some_column": "some_value", "another_column": "another_value"}
756
+ ... )
757
+ >>> block = CIFBlock({"some_category": category, "another_category": category})
758
+ >>> file = CIFFile({"some_block": block, "another_block": block})
759
+ >>> print(file.serialize())
760
+ data_some_block
761
+ #
762
+ _some_category.some_column some_value
763
+ _some_category.another_column another_value
764
+ #
765
+ _another_category.some_column some_value
766
+ _another_category.another_column another_value
767
+ #
768
+ data_another_block
769
+ #
770
+ _some_category.some_column some_value
771
+ _some_category.another_column another_value
772
+ #
773
+ _another_category.some_column some_value
774
+ _another_category.another_column another_value
775
+ #
776
+ >>> file.write(os.path.join(path_to_directory, "some_file.cif"))
777
+ """
778
+
779
+ def __init__(self, blocks=None):
780
+ if blocks is None:
781
+ blocks = {}
782
+ self._blocks = blocks
783
+
784
+ @property
785
+ def lines(self):
786
+ return self.serialize().splitlines()
787
+
788
+ @property
789
+ def block(self):
790
+ if len(self) != 1:
791
+ raise ValueError("There are multiple blocks in the file")
792
+ return self[next(iter(self))]
793
+
794
+ @staticmethod
795
+ def subcomponent_class():
796
+ return CIFBlock
797
+
798
+ @staticmethod
799
+ def supercomponent_class():
800
+ return None
801
+
802
+ @staticmethod
803
+ def deserialize(text):
804
+ lines = text.splitlines()
805
+ block_starts = []
806
+ block_names = []
807
+ for i, line in enumerate(lines):
808
+ if not _is_empty(line):
809
+ data_block_name = _parse_data_block_name(line)
810
+ if data_block_name is not None:
811
+ block_starts.append(i)
812
+ block_names.append(data_block_name)
813
+ return CIFFile(_create_element_dict(lines, block_names, block_starts))
814
+
815
+ def serialize(self):
816
+ text_blocks = []
817
+ for block_name, block in self._blocks.items():
818
+ if isinstance(block, str):
819
+ # Block is already stored as text
820
+ text_blocks.append(block)
821
+ else:
822
+ try:
823
+ block.name = block_name
824
+ text_blocks.append(block.serialize())
825
+ except Exception:
826
+ raise SerializationError(
827
+ f"Failed to serialize block '{block_name}'"
828
+ )
829
+ # Enforce terminal line break
830
+ text_blocks.append("")
831
+ return "".join(text_blocks)
832
+
833
+ @classmethod
834
+ def read(cls, file):
835
+ """
836
+ Read a CIF file.
837
+
838
+ Parameters
839
+ ----------
840
+ file : file-like object or str
841
+ The file to be read.
842
+ Alternatively a file path can be supplied.
843
+
844
+ Returns
845
+ -------
846
+ file_object : CIFFile
847
+ The parsed file.
848
+ """
849
+ # File name
850
+ if is_open_compatible(file):
851
+ with open(file, "r") as f:
852
+ text = f.read()
853
+ # File object
854
+ else:
855
+ if not is_text(file):
856
+ raise TypeError("A file opened in 'text' mode is required")
857
+ text = file.read()
858
+ return CIFFile.deserialize(text)
859
+
860
+ def write(self, file):
861
+ """
862
+ Write the contents of this object into a CIF file.
863
+
864
+ Parameters
865
+ ----------
866
+ file : file-like object or str
867
+ The file to be written to.
868
+ Alternatively a file path can be supplied.
869
+ """
870
+ if is_open_compatible(file):
871
+ with open(file, "w") as f:
872
+ f.write(self.serialize())
873
+ else:
874
+ if not is_text(file):
875
+ raise TypeError("A file opened in 'text' mode is required")
876
+ file.write(self.serialize())
877
+
878
+ def __getitem__(self, key):
879
+ block = self._blocks[key]
880
+ if isinstance(block, str):
881
+ # Element is stored in serialized form
882
+ # -> must be deserialized first
883
+ try:
884
+ block = CIFBlock.deserialize(block)
885
+ except Exception:
886
+ raise DeserializationError(f"Failed to deserialize block '{key}'")
887
+ # Update with deserialized object
888
+ self._blocks[key] = block
889
+ return block
890
+
891
+ def __setitem__(self, key, block):
892
+ if not isinstance(block, CIFBlock):
893
+ raise TypeError(f"Expected 'CIFBlock', but got '{type(block).__name__}'")
894
+ block.name = key
895
+ self._blocks[key] = block
896
+
897
+ def __delitem__(self, key):
898
+ del self._blocks[key]
899
+
900
+ def __contains__(self, key):
901
+ return key in self._blocks
902
+
903
+ def __iter__(self):
904
+ return iter(self._blocks)
905
+
906
+ def __len__(self):
907
+ return len(self._blocks)
908
+
909
+ def __eq__(self, other):
910
+ if not isinstance(other, type(self)):
911
+ return False
912
+ if set(self.keys()) != set(other.keys()):
913
+ return False
914
+ for block_name in self.keys():
915
+ if self[block_name] != other[block_name]:
916
+ return False
917
+ return True
918
+
919
+
920
+ def _is_empty(line):
921
+ return len(line.strip()) == 0 or line[0] == "#"
922
+
923
+
924
+ def _create_element_dict(lines, element_names, element_starts):
925
+ """
926
+ Create a dict mapping the `element_names` to the corresponding
927
+ `lines`, which are located between ``element_starts[i]`` and
928
+ ``element_starts[i+1]``.
929
+ """
930
+ # Add exclusive stop to indices for easier slicing
931
+ element_starts.append(len(lines))
932
+ # Lazy deserialization
933
+ # -> keep as text for now and deserialize later if needed
934
+ return {
935
+ element_name: "\n".join(lines[element_starts[i] : element_starts[i + 1]]) + "\n"
936
+ for i, element_name in enumerate(element_names)
937
+ }
938
+
939
+
940
+ def _parse_data_block_name(line):
941
+ """
942
+ If the line defines a data block, return this name.
943
+ Return ``None`` otherwise.
944
+ """
945
+ if line.startswith("data_"):
946
+ return line[5:]
947
+ else:
948
+ return None
949
+
950
+
951
+ def _parse_category_name(line):
952
+ """
953
+ If the line defines a category, return this name.
954
+ Return ``None`` otherwise.
955
+ """
956
+ if line[0] != "_":
957
+ return None
958
+ else:
959
+ return line[1 : line.find(".")]
960
+
961
+
962
+ def _is_loop_start(line):
963
+ """
964
+ Return whether the line starts a looped category.
965
+ """
966
+ return line.startswith("loop_")
967
+
968
+
969
+ def _to_single(lines):
970
+ r"""
971
+ Convert multiline values into singleline values
972
+ (in terms of 'lines' list elements).
973
+ Linebreaks are preserved as ``'\n'`` characters within a list element.
974
+ The initial ``';'`` character is also preserved, while the final ``';'`` character
975
+ is removed.
976
+ """
977
+ processed_lines = []
978
+ in_multi_line = False
979
+ mutli_line_value = []
980
+ for line in lines:
981
+ # Multiline value are enclosed by ';' at the start of the beginning and end line
982
+ if line[0] == ";":
983
+ if not in_multi_line:
984
+ # Start of multiline value
985
+ in_multi_line = True
986
+ mutli_line_value.append(line)
987
+ else:
988
+ # End of multiline value
989
+ in_multi_line = False
990
+ # The current line contains only the end character ';'
991
+ # Hence this line is not added to the processed lines
992
+ processed_lines.append("\n".join(mutli_line_value))
993
+ mutli_line_value = []
994
+ else:
995
+ if in_multi_line:
996
+ mutli_line_value.append(line)
997
+ else:
998
+ processed_lines.append(line)
999
+ return processed_lines
1000
+
1001
+
1002
+ def _escape(value):
1003
+ """
1004
+ Escape special characters in a value to make it compatible with CIF.
1005
+ """
1006
+ if "\n" in value:
1007
+ # A value with linebreaks must be represented as multiline value
1008
+ return _multiline(value)
1009
+ elif "'" in value and '"' in value:
1010
+ # If both quote types are present, you cannot use them for escaping
1011
+ return _multiline(value)
1012
+ elif len(value) == 0:
1013
+ return "''"
1014
+ elif value[0] == "_":
1015
+ return "'" + value + "'"
1016
+ elif "'" in value:
1017
+ return '"' + value + '"'
1018
+ elif '"' in value:
1019
+ return "'" + value + "'"
1020
+ elif " " in value:
1021
+ return "'" + value + "'"
1022
+ elif "\t" in value:
1023
+ return "'" + value + "'"
1024
+ else:
1025
+ return value
1026
+
1027
+
1028
+ def _multiline(value):
1029
+ """
1030
+ Convert a string that may contain linebreaks into CIF-compatible
1031
+ multiline string.
1032
+ """
1033
+ return "\n;" + value + "\n;\n"
1034
+
1035
+
1036
+ def _split_one_line(line):
1037
+ """
1038
+ Split a line into its fields.
1039
+ Supporting embedded quotes (' or "), like `'a dog's life'` to `a dog's life`
1040
+ """
1041
+ # Special case of multiline value, where the line starts with ';'
1042
+ if line[0] == ";":
1043
+ yield line[1:]
1044
+ elif "'" in line or '"' in line:
1045
+ # Quoted values in the line
1046
+ while line:
1047
+ # Strip leading whitespace(s)
1048
+ stripped_line = line.lstrip()
1049
+ # Split the line on whitespace
1050
+ word, _, line = stripped_line.partition(" ")
1051
+ # Handle the case where the word start with a quote
1052
+ if word.startswith(("'", '"')):
1053
+ # Set the separator to the quote found
1054
+ separator = word[0]
1055
+ # Handle the case of a quoted word without space
1056
+ if word.endswith(separator) and len(word) > 1:
1057
+ # Yield the word without the opening and closing quotes
1058
+ yield word[1:-1]
1059
+ continue
1060
+ # split the word on the separator
1061
+ word, _, line = stripped_line[1:].partition(separator)
1062
+
1063
+ yield word
1064
+ else:
1065
+ # No quoted values in the line -> simple whitespace split
1066
+ for line in line.split():
1067
+ yield line
1068
+
1069
+
1070
+ def _arrayfy(data):
1071
+ if not isinstance(data, (Sequence, np.ndarray)) or isinstance(data, str):
1072
+ data = [data]
1073
+ elif len(data) == 0:
1074
+ raise ValueError("Array must contain at least one element")
1075
+ return np.asarray(data)