biotite 1.1.0__cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (332) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +159 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +452 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +57 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +206 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/entrez/__init__.py +15 -0
  34. biotite/database/entrez/check.py +60 -0
  35. biotite/database/entrez/dbnames.py +91 -0
  36. biotite/database/entrez/download.py +229 -0
  37. biotite/database/entrez/key.py +44 -0
  38. biotite/database/entrez/query.py +262 -0
  39. biotite/database/error.py +16 -0
  40. biotite/database/pubchem/__init__.py +21 -0
  41. biotite/database/pubchem/download.py +258 -0
  42. biotite/database/pubchem/error.py +20 -0
  43. biotite/database/pubchem/query.py +830 -0
  44. biotite/database/pubchem/throttle.py +98 -0
  45. biotite/database/rcsb/__init__.py +13 -0
  46. biotite/database/rcsb/download.py +159 -0
  47. biotite/database/rcsb/query.py +964 -0
  48. biotite/database/uniprot/__init__.py +13 -0
  49. biotite/database/uniprot/check.py +40 -0
  50. biotite/database/uniprot/download.py +129 -0
  51. biotite/database/uniprot/query.py +293 -0
  52. biotite/file.py +232 -0
  53. biotite/sequence/__init__.py +84 -0
  54. biotite/sequence/align/__init__.py +203 -0
  55. biotite/sequence/align/alignment.py +680 -0
  56. biotite/sequence/align/banded.cpython-313-x86_64-linux-gnu.so +0 -0
  57. biotite/sequence/align/banded.pyx +652 -0
  58. biotite/sequence/align/buckets.py +71 -0
  59. biotite/sequence/align/cigar.py +425 -0
  60. biotite/sequence/align/kmeralphabet.cpython-313-x86_64-linux-gnu.so +0 -0
  61. biotite/sequence/align/kmeralphabet.pyx +595 -0
  62. biotite/sequence/align/kmersimilarity.cpython-313-x86_64-linux-gnu.so +0 -0
  63. biotite/sequence/align/kmersimilarity.pyx +233 -0
  64. biotite/sequence/align/kmertable.cpython-313-x86_64-linux-gnu.so +0 -0
  65. biotite/sequence/align/kmertable.pyx +3411 -0
  66. biotite/sequence/align/localgapped.cpython-313-x86_64-linux-gnu.so +0 -0
  67. biotite/sequence/align/localgapped.pyx +892 -0
  68. biotite/sequence/align/localungapped.cpython-313-x86_64-linux-gnu.so +0 -0
  69. biotite/sequence/align/localungapped.pyx +279 -0
  70. biotite/sequence/align/matrix.py +622 -0
  71. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  72. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  73. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  74. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  75. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  76. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  77. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  78. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  79. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  80. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  81. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  82. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  83. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  84. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  85. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  86. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  87. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  88. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  89. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  94. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  95. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  99. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  100. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  101. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  102. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  103. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  104. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  105. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  106. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  107. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  108. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  109. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  110. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  111. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  112. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  113. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  114. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  115. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  116. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  117. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  118. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  119. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  120. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  121. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  122. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  155. biotite/sequence/align/matrix_data/PB.license +21 -0
  156. biotite/sequence/align/matrix_data/PB.mat +18 -0
  157. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  158. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  159. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  160. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  161. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  162. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  163. biotite/sequence/align/multiple.cpython-313-x86_64-linux-gnu.so +0 -0
  164. biotite/sequence/align/multiple.pyx +620 -0
  165. biotite/sequence/align/pairwise.cpython-313-x86_64-linux-gnu.so +0 -0
  166. biotite/sequence/align/pairwise.pyx +587 -0
  167. biotite/sequence/align/permutation.cpython-313-x86_64-linux-gnu.so +0 -0
  168. biotite/sequence/align/permutation.pyx +313 -0
  169. biotite/sequence/align/primes.txt +821 -0
  170. biotite/sequence/align/selector.cpython-313-x86_64-linux-gnu.so +0 -0
  171. biotite/sequence/align/selector.pyx +954 -0
  172. biotite/sequence/align/statistics.py +264 -0
  173. biotite/sequence/align/tracetable.cpython-313-x86_64-linux-gnu.so +0 -0
  174. biotite/sequence/align/tracetable.pxd +64 -0
  175. biotite/sequence/align/tracetable.pyx +370 -0
  176. biotite/sequence/alphabet.py +555 -0
  177. biotite/sequence/annotation.py +830 -0
  178. biotite/sequence/codec.cpython-313-x86_64-linux-gnu.so +0 -0
  179. biotite/sequence/codec.pyx +155 -0
  180. biotite/sequence/codon.py +477 -0
  181. biotite/sequence/codon_tables.txt +202 -0
  182. biotite/sequence/graphics/__init__.py +33 -0
  183. biotite/sequence/graphics/alignment.py +1115 -0
  184. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  185. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  186. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  187. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  188. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  189. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  190. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  191. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  192. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  193. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  194. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  195. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  196. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  197. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  198. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  199. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  200. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  201. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  202. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  203. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  204. biotite/sequence/graphics/colorschemes.py +170 -0
  205. biotite/sequence/graphics/dendrogram.py +229 -0
  206. biotite/sequence/graphics/features.py +544 -0
  207. biotite/sequence/graphics/logo.py +104 -0
  208. biotite/sequence/graphics/plasmid.py +712 -0
  209. biotite/sequence/io/__init__.py +12 -0
  210. biotite/sequence/io/fasta/__init__.py +22 -0
  211. biotite/sequence/io/fasta/convert.py +284 -0
  212. biotite/sequence/io/fasta/file.py +265 -0
  213. biotite/sequence/io/fastq/__init__.py +19 -0
  214. biotite/sequence/io/fastq/convert.py +117 -0
  215. biotite/sequence/io/fastq/file.py +507 -0
  216. biotite/sequence/io/genbank/__init__.py +17 -0
  217. biotite/sequence/io/genbank/annotation.py +269 -0
  218. biotite/sequence/io/genbank/file.py +573 -0
  219. biotite/sequence/io/genbank/metadata.py +336 -0
  220. biotite/sequence/io/genbank/sequence.py +171 -0
  221. biotite/sequence/io/general.py +201 -0
  222. biotite/sequence/io/gff/__init__.py +26 -0
  223. biotite/sequence/io/gff/convert.py +128 -0
  224. biotite/sequence/io/gff/file.py +450 -0
  225. biotite/sequence/phylo/__init__.py +36 -0
  226. biotite/sequence/phylo/nj.cpython-313-x86_64-linux-gnu.so +0 -0
  227. biotite/sequence/phylo/nj.pyx +221 -0
  228. biotite/sequence/phylo/tree.cpython-313-x86_64-linux-gnu.so +0 -0
  229. biotite/sequence/phylo/tree.pyx +1169 -0
  230. biotite/sequence/phylo/upgma.cpython-313-x86_64-linux-gnu.so +0 -0
  231. biotite/sequence/phylo/upgma.pyx +164 -0
  232. biotite/sequence/profile.py +567 -0
  233. biotite/sequence/search.py +118 -0
  234. biotite/sequence/seqtypes.py +713 -0
  235. biotite/sequence/sequence.py +374 -0
  236. biotite/setup_ccd.py +197 -0
  237. biotite/structure/__init__.py +133 -0
  238. biotite/structure/alphabet/__init__.py +25 -0
  239. biotite/structure/alphabet/encoder.py +332 -0
  240. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  241. biotite/structure/alphabet/i3d.py +110 -0
  242. biotite/structure/alphabet/layers.py +86 -0
  243. biotite/structure/alphabet/pb.license +21 -0
  244. biotite/structure/alphabet/pb.py +171 -0
  245. biotite/structure/alphabet/unkerasify.py +122 -0
  246. biotite/structure/atoms.py +1554 -0
  247. biotite/structure/basepairs.py +1404 -0
  248. biotite/structure/bonds.cpython-313-x86_64-linux-gnu.so +0 -0
  249. biotite/structure/bonds.pyx +1972 -0
  250. biotite/structure/box.py +588 -0
  251. biotite/structure/celllist.cpython-313-x86_64-linux-gnu.so +0 -0
  252. biotite/structure/celllist.pyx +849 -0
  253. biotite/structure/chains.py +314 -0
  254. biotite/structure/charges.cpython-313-x86_64-linux-gnu.so +0 -0
  255. biotite/structure/charges.pyx +520 -0
  256. biotite/structure/compare.py +274 -0
  257. biotite/structure/density.py +109 -0
  258. biotite/structure/dotbracket.py +214 -0
  259. biotite/structure/error.py +39 -0
  260. biotite/structure/filter.py +590 -0
  261. biotite/structure/geometry.py +655 -0
  262. biotite/structure/graphics/__init__.py +13 -0
  263. biotite/structure/graphics/atoms.py +243 -0
  264. biotite/structure/graphics/rna.py +295 -0
  265. biotite/structure/hbond.py +428 -0
  266. biotite/structure/info/__init__.py +24 -0
  267. biotite/structure/info/atom_masses.json +121 -0
  268. biotite/structure/info/atoms.py +81 -0
  269. biotite/structure/info/bonds.py +149 -0
  270. biotite/structure/info/ccd.py +202 -0
  271. biotite/structure/info/components.bcif +0 -0
  272. biotite/structure/info/groups.py +131 -0
  273. biotite/structure/info/masses.py +121 -0
  274. biotite/structure/info/misc.py +138 -0
  275. biotite/structure/info/radii.py +197 -0
  276. biotite/structure/info/standardize.py +186 -0
  277. biotite/structure/integrity.py +215 -0
  278. biotite/structure/io/__init__.py +29 -0
  279. biotite/structure/io/dcd/__init__.py +13 -0
  280. biotite/structure/io/dcd/file.py +67 -0
  281. biotite/structure/io/general.py +243 -0
  282. biotite/structure/io/gro/__init__.py +14 -0
  283. biotite/structure/io/gro/file.py +344 -0
  284. biotite/structure/io/mol/__init__.py +20 -0
  285. biotite/structure/io/mol/convert.py +112 -0
  286. biotite/structure/io/mol/ctab.py +415 -0
  287. biotite/structure/io/mol/header.py +120 -0
  288. biotite/structure/io/mol/mol.py +149 -0
  289. biotite/structure/io/mol/sdf.py +914 -0
  290. biotite/structure/io/netcdf/__init__.py +13 -0
  291. biotite/structure/io/netcdf/file.py +64 -0
  292. biotite/structure/io/pdb/__init__.py +20 -0
  293. biotite/structure/io/pdb/convert.py +307 -0
  294. biotite/structure/io/pdb/file.py +1290 -0
  295. biotite/structure/io/pdb/hybrid36.cpython-313-x86_64-linux-gnu.so +0 -0
  296. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  297. biotite/structure/io/pdbqt/__init__.py +15 -0
  298. biotite/structure/io/pdbqt/convert.py +113 -0
  299. biotite/structure/io/pdbqt/file.py +688 -0
  300. biotite/structure/io/pdbx/__init__.py +23 -0
  301. biotite/structure/io/pdbx/bcif.py +656 -0
  302. biotite/structure/io/pdbx/cif.py +1075 -0
  303. biotite/structure/io/pdbx/component.py +245 -0
  304. biotite/structure/io/pdbx/compress.py +321 -0
  305. biotite/structure/io/pdbx/convert.py +1745 -0
  306. biotite/structure/io/pdbx/encoding.cpython-313-x86_64-linux-gnu.so +0 -0
  307. biotite/structure/io/pdbx/encoding.pyx +1031 -0
  308. biotite/structure/io/trajfile.py +693 -0
  309. biotite/structure/io/trr/__init__.py +13 -0
  310. biotite/structure/io/trr/file.py +43 -0
  311. biotite/structure/io/xtc/__init__.py +13 -0
  312. biotite/structure/io/xtc/file.py +43 -0
  313. biotite/structure/mechanics.py +73 -0
  314. biotite/structure/molecules.py +352 -0
  315. biotite/structure/pseudoknots.py +628 -0
  316. biotite/structure/rdf.py +245 -0
  317. biotite/structure/repair.py +304 -0
  318. biotite/structure/residues.py +572 -0
  319. biotite/structure/sasa.cpython-313-x86_64-linux-gnu.so +0 -0
  320. biotite/structure/sasa.pyx +322 -0
  321. biotite/structure/segments.py +178 -0
  322. biotite/structure/sequence.py +111 -0
  323. biotite/structure/sse.py +308 -0
  324. biotite/structure/superimpose.py +689 -0
  325. biotite/structure/transform.py +530 -0
  326. biotite/structure/util.py +168 -0
  327. biotite/version.py +16 -0
  328. biotite/visualize.py +265 -0
  329. biotite-1.1.0.dist-info/METADATA +190 -0
  330. biotite-1.1.0.dist-info/RECORD +332 -0
  331. biotite-1.1.0.dist-info/WHEEL +6 -0
  332. biotite-1.1.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,914 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.structure.io.mol"
6
+ __author__ = "Patrick Kunzmann, Benjamin Mayer"
7
+ __all__ = ["SDFile", "SDRecord", "Metadata"]
8
+
9
+ import re
10
+ import warnings
11
+ from collections.abc import Mapping, MutableMapping
12
+ from dataclasses import dataclass
13
+ import numpy as np
14
+ from biotite.file import (
15
+ DeserializationError,
16
+ File,
17
+ InvalidFileError,
18
+ SerializationError,
19
+ is_open_compatible,
20
+ is_text,
21
+ )
22
+ from biotite.structure.atoms import AtomArray
23
+ from biotite.structure.bonds import BondList, BondType
24
+ from biotite.structure.io.mol.ctab import (
25
+ read_structure_from_ctab,
26
+ write_structure_to_ctab,
27
+ )
28
+ from biotite.structure.io.mol.header import Header
29
+
30
+ _N_HEADER = 3
31
+ # Number of header lines
32
+ _RECORD_DELIMITER = "$$$$"
33
+
34
+
35
+ class Metadata(MutableMapping):
36
+ r"""
37
+ Additional nonstructural data in an SD record.
38
+
39
+ The metadata is stored as key-value pairs.
40
+ As SDF allows multiple different identifiers for keys,
41
+ the keys are represented by :class:`Metadata.Key`.
42
+
43
+ Parameters
44
+ ----------
45
+ metadata : dict, optional
46
+ The metadata as key-value pairs.
47
+ Keys are instances of :class:`Metadata.Key`.
48
+ Alternatively, keys can be given as strings, in which case the
49
+ string is used as the :attr:`Metadata.Key.name`.
50
+ Values are strings.
51
+ Line breaks in values are allowed.
52
+
53
+ Notes
54
+ -----
55
+ Key names may only contain alphanumeric characters, underscores and
56
+ periods.
57
+
58
+ Examples
59
+ --------
60
+
61
+ >>> metadata = Metadata({
62
+ ... "foo": "Lorem ipsum",
63
+ ... Metadata.Key(number=42, name="bar"): "dolor sit amet,\nconsectetur"
64
+ ... })
65
+ >>> print(metadata)
66
+ > <foo>
67
+ Lorem ipsum
68
+ <BLANKLINE>
69
+ > DT42 <bar>
70
+ dolor sit amet,
71
+ consectetur
72
+ <BLANKLINE>
73
+ >>> print(metadata["foo"])
74
+ Lorem ipsum
75
+ >>> # Strings can be only used for access, if the key contains only a name
76
+ >>> print("bar" in metadata)
77
+ False
78
+ >>> print(metadata[Metadata.Key(number=42, name="bar")])
79
+ dolor sit amet,
80
+ consectetur
81
+
82
+ """
83
+
84
+ @dataclass(frozen=True, kw_only=True)
85
+ class Key:
86
+ """
87
+ A metadata key.
88
+
89
+ Parameters
90
+ ----------
91
+ number : int, optional
92
+ number of the field in the database.
93
+ name : str, optional
94
+ Name of the field.
95
+ May only contain alphanumeric characters, underscores and
96
+ periods.
97
+ registry_internal : int, optional
98
+ Internal registry number.
99
+ registry_external : str, optional
100
+ External registry number.
101
+
102
+ Attributes
103
+ ----------
104
+ number, name, registry_internal, registry_external
105
+ The same as the parameters.
106
+ """
107
+
108
+ # The characters that can be given as input to `name`
109
+ # First character must be alphanumeric,
110
+ # following characters may include underscores and periods
111
+ # Although the V3000 specification forbids the use of periods,
112
+ # they are still used in practice and therefore allowed here
113
+ _NAME_INPUT_REGEX = re.compile(r"^[a-zA-Z0-9][\w.]*$")
114
+ # These regexes are used to parse the key from a line
115
+ _COMPONENT_REGEX = {
116
+ "number": re.compile(r"^DT(\d+)$"),
117
+ "name": re.compile(r"^<([a-zA-Z0-9][\w.]*)>$"),
118
+ "registry_internal": re.compile(r"^(\d+)$"),
119
+ "registry_external": re.compile(r"^\(([\w.-]*)\)$"),
120
+ }
121
+
122
+ number: ... = None
123
+ name: ... = None
124
+ registry_internal: ... = None
125
+ registry_external: ... = None
126
+
127
+ def __post_init__(self):
128
+ if self.name is None and self.number is None:
129
+ raise ValueError("At least the field number or name must be set")
130
+ if self.name is not None:
131
+ if not Metadata.Key._NAME_INPUT_REGEX.match(self.name):
132
+ raise ValueError(
133
+ f"Invalid name '{self.name}', must only contains "
134
+ "alphanumeric characters, underscores and periods"
135
+ )
136
+ if self.number is not None:
137
+ # Cannot set field directly as 'frozen=True'
138
+ object.__setattr__(self, "number", int(self.number))
139
+ if self.registry_internal is not None:
140
+ object.__setattr__(
141
+ self, "registry_internal", int(self.registry_internal)
142
+ )
143
+
144
+ @staticmethod
145
+ def deserialize(text):
146
+ """
147
+ Create an object by deserializing the given text content.
148
+
149
+ Parameters
150
+ ----------
151
+ content : str
152
+ The content to be deserialized.
153
+ """
154
+ # Omit the leading '>'
155
+ key_components = text[1:].split()
156
+ parsed_component_dict = {}
157
+ for component in key_components:
158
+ # For each component in each the key,
159
+ # try to match it with each of the regexes
160
+ for attr_name, regex in Metadata.Key._COMPONENT_REGEX.items():
161
+ pattern_match = regex.match(component)
162
+ if pattern_match is None:
163
+ # Try next pattern
164
+ continue
165
+ if attr_name in parsed_component_dict:
166
+ raise DeserializationError(
167
+ f"Duplicate key component for '{attr_name}'"
168
+ )
169
+ value = pattern_match.group(1)
170
+ parsed_component_dict[attr_name] = value
171
+ break
172
+ else:
173
+ # There is no matching pattern
174
+ raise DeserializationError(f"Invalid key component '{component}'")
175
+ return Metadata.Key(**parsed_component_dict)
176
+
177
+ def serialize(self):
178
+ """
179
+ Convert this object into text content.
180
+
181
+ Returns
182
+ -------
183
+ content : str
184
+ The serialized content.
185
+ """
186
+ key_string = "> "
187
+ if self.number is not None:
188
+ key_string += f"DT{self.number} "
189
+ if self.name is not None:
190
+ key_string += f"<{self.name}> "
191
+ if self.registry_internal is not None:
192
+ key_string += f"{self.registry_internal} "
193
+ if self.registry_external is not None:
194
+ key_string += f"({self.registry_external}) "
195
+ return key_string
196
+
197
+ def __str__(self):
198
+ return self.serialize()
199
+
200
+ def __init__(self, metadata=None):
201
+ if metadata is None:
202
+ metadata = {}
203
+ self._metadata = {}
204
+ for key, value in metadata.items():
205
+ self._metadata[_to_metadata_key(key)] = value
206
+
207
+ @staticmethod
208
+ def deserialize(text):
209
+ """
210
+ Create an object by deserializing the given text content.
211
+
212
+ Parameters
213
+ ----------
214
+ content : str
215
+ The content to be deserialized.
216
+ """
217
+ metadata = {}
218
+ current_key = None
219
+ current_value = None
220
+ for line in text.splitlines():
221
+ line = line.strip()
222
+ if len(line) == 0:
223
+ # Skip empty lines
224
+ continue
225
+ if line.startswith(">"):
226
+ _add_key_value_pair(metadata, current_key, current_value)
227
+ current_key = Metadata.Key.deserialize(line)
228
+ current_value = None
229
+ else:
230
+ if current_key is None:
231
+ raise DeserializationError("Value found before metadata key")
232
+ if current_value is None:
233
+ current_value = line
234
+ else:
235
+ current_value += "\n" + line
236
+ # Add final pair
237
+ _add_key_value_pair(metadata, current_key, current_value)
238
+ return Metadata(metadata)
239
+
240
+ def serialize(self):
241
+ """
242
+ Convert this object into text content.
243
+
244
+ Returns
245
+ -------
246
+ content : str
247
+ The serialized content.
248
+ """
249
+ text_blocks = []
250
+ for key, value in self._metadata.items():
251
+ text_blocks.append(key.serialize())
252
+ # Add empty line after value
253
+ text_blocks.append(value + "\n")
254
+ return _join_with_terminal_newline(text_blocks)
255
+
256
+ def __getitem__(self, key):
257
+ return self._metadata[_to_metadata_key(key)]
258
+
259
+ def __setitem__(self, key, value):
260
+ if len(value) == 0:
261
+ raise ValueError("Metadata value must not be empty")
262
+ self._metadata[_to_metadata_key(key)] = value
263
+
264
+ def __delitem__(self, key):
265
+ del self._metadata[_to_metadata_key(key)]
266
+
267
+ def __iter__(self):
268
+ return iter(self._metadata)
269
+
270
+ def __len__(self):
271
+ return len(self._metadata)
272
+
273
+ def __eq__(self, other):
274
+ if not isinstance(other, type(self)):
275
+ return False
276
+ if set(self.keys()) != set(other.keys()):
277
+ return False
278
+ for key in self.keys():
279
+ if self[key] != other[key]:
280
+ return False
281
+ return True
282
+
283
+ def __str__(self):
284
+ return self.serialize()
285
+
286
+
287
+ class SDRecord:
288
+ """
289
+ A record in a SD file.
290
+
291
+ Parameters
292
+ ----------
293
+ header : Header, optional
294
+ The header of the record.
295
+ By default, an empty header is created.
296
+ ctab : str, optional
297
+ The connection table (atoms and bonds) in the record.
298
+ By default, an empty structure is created.
299
+ metadata : Metadata, Mapping or str, optional
300
+ The metadata of the record.
301
+ Can be given as dictionary mapping :attr:`Metadata.Key.name`
302
+ to the respective metadata value.
303
+ By default, no metadata is appended to the record.
304
+
305
+ Attributes
306
+ ----------
307
+ header, ctab, metadata
308
+ The same as the parameters.
309
+
310
+ Examples
311
+ --------
312
+
313
+ >>> atoms = residue("ALA")
314
+ >>> record = SDRecord(header=Header(mol_name="ALA", dimensions="3D"))
315
+ >>> record.set_structure(atoms)
316
+ >>> print(record.get_structure())
317
+ 0 N -0.966 0.493 1.500
318
+ 0 C 0.257 0.418 0.692
319
+ 0 C -0.094 0.017 -0.716
320
+ 0 O -1.056 -0.682 -0.923
321
+ 0 C 1.204 -0.620 1.296
322
+ 0 O 0.661 0.439 -1.742
323
+ 0 H -1.383 -0.425 1.482
324
+ 0 H -0.676 0.661 2.452
325
+ 0 H 0.746 1.392 0.682
326
+ 0 H 1.459 -0.330 2.316
327
+ 0 H 0.715 -1.594 1.307
328
+ 0 H 2.113 -0.676 0.697
329
+ 0 H 0.435 0.182 -2.647
330
+ >>> # Add the record to an SD file
331
+ >>> file = SDFile()
332
+ >>> file["ALA"] = record
333
+ >>> print(file)
334
+ ALA
335
+ 3D
336
+ <BLANKLINE>
337
+ 13 12 0 0 0 0 0 0 0 1 V2000
338
+ -0.9660 0.4930 1.5000 N 0 0 0 0 0 0 0 0 0 0 0 0
339
+ 0.2570 0.4180 0.6920 C 0 0 0 0 0 0 0 0 0 0 0 0
340
+ -0.0940 0.0170 -0.7160 C 0 0 0 0 0 0 0 0 0 0 0 0
341
+ -1.0560 -0.6820 -0.9230 O 0 0 0 0 0 0 0 0 0 0 0 0
342
+ 1.2040 -0.6200 1.2960 C 0 0 0 0 0 0 0 0 0 0 0 0
343
+ 0.6610 0.4390 -1.7420 O 0 0 0 0 0 0 0 0 0 0 0 0
344
+ -1.3830 -0.4250 1.4820 H 0 0 0 0 0 0 0 0 0 0 0 0
345
+ -0.6760 0.6610 2.4520 H 0 0 0 0 0 0 0 0 0 0 0 0
346
+ 0.7460 1.3920 0.6820 H 0 0 0 0 0 0 0 0 0 0 0 0
347
+ 1.4590 -0.3300 2.3160 H 0 0 0 0 0 0 0 0 0 0 0 0
348
+ 0.7150 -1.5940 1.3070 H 0 0 0 0 0 0 0 0 0 0 0 0
349
+ 2.1130 -0.6760 0.6970 H 0 0 0 0 0 0 0 0 0 0 0 0
350
+ 0.4350 0.1820 -2.6470 H 0 0 0 0 0 0 0 0 0 0 0 0
351
+ 1 2 1 0 0 0 0
352
+ 1 7 1 0 0 0 0
353
+ 1 8 1 0 0 0 0
354
+ 2 3 1 0 0 0 0
355
+ 2 5 1 0 0 0 0
356
+ 2 9 1 0 0 0 0
357
+ 3 4 2 0 0 0 0
358
+ 3 6 1 0 0 0 0
359
+ 5 10 1 0 0 0 0
360
+ 5 11 1 0 0 0 0
361
+ 5 12 1 0 0 0 0
362
+ 6 13 1 0 0 0 0
363
+ M END
364
+ $$$$
365
+ <BLANKLINE>
366
+ """
367
+
368
+ def __init__(self, header=None, ctab=None, metadata=None):
369
+ if header is None:
370
+ self._header = Header()
371
+ else:
372
+ self._header = header
373
+
374
+ self._ctab = ctab
375
+
376
+ if metadata is None:
377
+ self._metadata = Metadata()
378
+ elif isinstance(metadata, Metadata):
379
+ self._metadata = metadata
380
+ elif isinstance(metadata, Mapping):
381
+ self._metadata = Metadata(metadata)
382
+ elif isinstance(metadata, str):
383
+ # Serialized form -> will be lazily deserialized
384
+ self._metadata = metadata
385
+ else:
386
+ raise TypeError(
387
+ "Expected 'Metadata', Mapping or str, "
388
+ f"but got '{type(metadata).__name__}'"
389
+ )
390
+
391
+ @property
392
+ def header(self):
393
+ if isinstance(self._header, str):
394
+ try:
395
+ self._header = Header.deserialize(self._header)
396
+ except Exception:
397
+ raise DeserializationError("Failed to deserialize header")
398
+ return self._header
399
+
400
+ @header.setter
401
+ def header(self, header):
402
+ self._header = header
403
+
404
+ @property
405
+ def ctab(self):
406
+ # CTAB string cannot be changed directly -> no setter
407
+ return self._ctab
408
+
409
+ @property
410
+ def metadata(self):
411
+ if isinstance(self._metadata, str):
412
+ try:
413
+ self._metadata = Metadata.deserialize(self._metadata)
414
+ except Exception:
415
+ raise DeserializationError("Failed to deserialize metadata")
416
+ return self._metadata
417
+
418
+ @metadata.setter
419
+ def metadata(self, metadata):
420
+ if isinstance(metadata, Metadata):
421
+ self._metadata = metadata
422
+ elif isinstance(metadata, Mapping):
423
+ self._metadata = Metadata(metadata)
424
+ else:
425
+ raise TypeError(
426
+ "Expected 'Metadata' or Mapping, "
427
+ f"but got '{type(metadata).__name__}'"
428
+ )
429
+
430
+ @staticmethod
431
+ def deserialize(text):
432
+ """
433
+ Create an object by deserializing the given text content.
434
+
435
+ Parameters
436
+ ----------
437
+ content : str
438
+ The content to be deserialized.
439
+ """
440
+ lines = text.splitlines()
441
+ ctab_end = _get_ctab_stop(lines)
442
+
443
+ header = _join_with_terminal_newline(lines[:_N_HEADER])
444
+ ctab = _join_with_terminal_newline(lines[_N_HEADER:ctab_end])
445
+ metadata = _join_with_terminal_newline(lines[ctab_end:])
446
+ return SDRecord(header, ctab, metadata)
447
+
448
+ def serialize(self):
449
+ """
450
+ Convert this object into text content.
451
+
452
+ Returns
453
+ -------
454
+ content : str
455
+ The serialized content.
456
+ """
457
+ if isinstance(self._header, str):
458
+ header_string = self._header
459
+ else:
460
+ header_string = self._header.serialize()
461
+
462
+ if self._ctab is None:
463
+ ctab_string = _empty_ctab()
464
+ else:
465
+ ctab_string = self._ctab
466
+
467
+ if isinstance(self._metadata, str):
468
+ metadata_string = self._metadata
469
+ else:
470
+ metadata_string = self._metadata.serialize()
471
+
472
+ return header_string + ctab_string + metadata_string
473
+
474
+ def get_structure(self):
475
+ """
476
+ Parse the structural data in the SD record.
477
+
478
+ Returns
479
+ -------
480
+ array : AtomArray
481
+ This :class:`AtomArray` contains the optional ``charge``
482
+ annotation and has an associated :class:`BondList`.
483
+ All other annotation categories, except ``element`` are
484
+ empty.
485
+ """
486
+ ctab_lines = self._ctab.splitlines()
487
+ if len(ctab_lines) == 0:
488
+ raise InvalidFileError("File does not contain structure data")
489
+ return read_structure_from_ctab(ctab_lines)
490
+
491
+ def set_structure(self, atoms, default_bond_type=BondType.ANY, version=None):
492
+ """
493
+ Set the structural data in the SD record.
494
+
495
+ Parameters
496
+ ----------
497
+ array : AtomArray
498
+ The array to be saved into this file.
499
+ Must have an associated :class:`BondList`.
500
+ default_bond_type : BondType, optional
501
+ Bond type fallback for the *Bond block*, if a
502
+ :class:`BondType` has no CTAB counterpart.
503
+ By default, each such bond is treated as
504
+ :attr:`BondType.ANY`.
505
+ version : {"V2000", "V3000"}, optional
506
+ The version of the CTAB format.
507
+ ``"V2000"`` uses the *Atom* and *Bond* block, while
508
+ ``"V3000"`` uses the *Properties* block.
509
+ By default, ``"V2000"`` is used, unless the number of atoms
510
+ or bonds exceeds 999, in which case ``"V3000"`` is used.
511
+ """
512
+ self._ctab = _join_with_terminal_newline(
513
+ write_structure_to_ctab(atoms, default_bond_type, version)
514
+ )
515
+
516
+ def __eq__(self, other):
517
+ if not isinstance(other, type(self)):
518
+ return False
519
+ if not self.header == other.header:
520
+ return False
521
+ if not self.ctab == other.ctab:
522
+ return False
523
+ if not self.metadata == other.metadata:
524
+ return False
525
+ return True
526
+
527
+ def __str__(self):
528
+ return self.serialize()
529
+
530
+
531
+ class SDFile(File, MutableMapping):
532
+ """
533
+ This class represents an SD file for storing small molecule
534
+ structures.
535
+
536
+ The records for each molecule in the file can be accessed and
537
+ modified like a dictionary.
538
+ The structures can be parsed and written from/to each
539
+ :class:`SDRecord` object via :func:`get_structure()` or
540
+ :func:`set_structure()`, respectively.
541
+
542
+ Attributes
543
+ ----------
544
+ record : CIFBlock
545
+ The sole record of the file.
546
+ If the file contains multiple records, an exception is raised.
547
+
548
+ Examples
549
+ --------
550
+ Read a SD file and parse the molecular structure:
551
+
552
+ >>> import os.path
553
+ >>> file = SDFile.read(os.path.join(path_to_structures, "molecules", "TYR.sdf"))
554
+ >>> molecule = file.record.get_structure()
555
+ >>> print(molecule)
556
+ 0 N 1.320 0.952 1.428
557
+ 0 C -0.018 0.429 1.734
558
+ 0 C -0.103 0.094 3.201
559
+ 0 O 0.886 -0.254 3.799
560
+ 0 C -0.274 -0.831 0.907
561
+ 0 C -0.189 -0.496 -0.559
562
+ 0 C 1.022 -0.589 -1.219
563
+ 0 C -1.324 -0.102 -1.244
564
+ 0 C 1.103 -0.282 -2.563
565
+ 0 C -1.247 0.210 -2.587
566
+ 0 C -0.032 0.118 -3.252
567
+ 0 O 0.044 0.420 -4.574
568
+ 0 O -1.279 0.184 3.842
569
+ 0 H 1.977 0.225 1.669
570
+ 0 H 1.365 1.063 0.426
571
+ 0 H -0.767 1.183 1.489
572
+ 0 H 0.473 -1.585 1.152
573
+ 0 H -1.268 -1.219 1.134
574
+ 0 H 1.905 -0.902 -0.683
575
+ 0 H -2.269 -0.031 -0.727
576
+ 0 H 2.049 -0.354 -3.078
577
+ 0 H -2.132 0.523 -3.121
578
+ 0 H -0.123 -0.399 -5.059
579
+ 0 H -1.333 -0.030 4.784
580
+
581
+ Note that important atom annotations may be missing.
582
+ These can be set afterwards:
583
+
584
+ >>> molecule.res_name[:] = "TYR"
585
+ >>> molecule.atom_name[:] = create_atom_names(molecule)
586
+ >>> print(molecule)
587
+ 0 TYR N1 N 1.320 0.952 1.428
588
+ 0 TYR C1 C -0.018 0.429 1.734
589
+ 0 TYR C2 C -0.103 0.094 3.201
590
+ 0 TYR O1 O 0.886 -0.254 3.799
591
+ 0 TYR C3 C -0.274 -0.831 0.907
592
+ 0 TYR C4 C -0.189 -0.496 -0.559
593
+ 0 TYR C5 C 1.022 -0.589 -1.219
594
+ 0 TYR C6 C -1.324 -0.102 -1.244
595
+ 0 TYR C7 C 1.103 -0.282 -2.563
596
+ 0 TYR C8 C -1.247 0.210 -2.587
597
+ 0 TYR C9 C -0.032 0.118 -3.252
598
+ 0 TYR O2 O 0.044 0.420 -4.574
599
+ 0 TYR O3 O -1.279 0.184 3.842
600
+ 0 TYR H1 H 1.977 0.225 1.669
601
+ 0 TYR H2 H 1.365 1.063 0.426
602
+ 0 TYR H3 H -0.767 1.183 1.489
603
+ 0 TYR H4 H 0.473 -1.585 1.152
604
+ 0 TYR H5 H -1.268 -1.219 1.134
605
+ 0 TYR H6 H 1.905 -0.902 -0.683
606
+ 0 TYR H7 H -2.269 -0.031 -0.727
607
+ 0 TYR H8 H 2.049 -0.354 -3.078
608
+ 0 TYR H9 H -2.132 0.523 -3.121
609
+ 0 TYR H10 H -0.123 -0.399 -5.059
610
+ 0 TYR H11 H -1.333 -0.030 4.784
611
+
612
+ Create a SD file and write it to disk:
613
+
614
+ >>> another_molecule = residue("ALA")
615
+ >>> file = SDFile()
616
+ >>> record = SDRecord()
617
+ >>> record.set_structure(molecule)
618
+ >>> file["TYR"] = record
619
+ >>> record = SDRecord()
620
+ >>> record.set_structure(another_molecule)
621
+ >>> file["ALA"] = record
622
+ >>> file.write(os.path.join(path_to_directory, "some_file.cif"))
623
+ >>> print(file)
624
+ TYR
625
+ <BLANKLINE>
626
+ <BLANKLINE>
627
+ 24 24 0 0 0 0 0 0 0 1 V2000
628
+ 1.3200 0.9520 1.4280 N 0 0 0 0 0 0 0 0 0 0 0 0
629
+ -0.0180 0.4290 1.7340 C 0 0 0 0 0 0 0 0 0 0 0 0
630
+ -0.1030 0.0940 3.2010 C 0 0 0 0 0 0 0 0 0 0 0 0
631
+ 0.8860 -0.2540 3.7990 O 0 0 0 0 0 0 0 0 0 0 0 0
632
+ -0.2740 -0.8310 0.9070 C 0 0 0 0 0 0 0 0 0 0 0 0
633
+ -0.1890 -0.4960 -0.5590 C 0 0 0 0 0 0 0 0 0 0 0 0
634
+ 1.0220 -0.5890 -1.2190 C 0 0 0 0 0 0 0 0 0 0 0 0
635
+ -1.3240 -0.1020 -1.2440 C 0 0 0 0 0 0 0 0 0 0 0 0
636
+ 1.1030 -0.2820 -2.5630 C 0 0 0 0 0 0 0 0 0 0 0 0
637
+ -1.2470 0.2100 -2.5870 C 0 0 0 0 0 0 0 0 0 0 0 0
638
+ -0.0320 0.1180 -3.2520 C 0 0 0 0 0 0 0 0 0 0 0 0
639
+ 0.0440 0.4200 -4.5740 O 0 0 0 0 0 0 0 0 0 0 0 0
640
+ -1.2790 0.1840 3.8420 O 0 0 0 0 0 0 0 0 0 0 0 0
641
+ 1.9770 0.2250 1.6690 H 0 0 0 0 0 0 0 0 0 0 0 0
642
+ 1.3650 1.0630 0.4260 H 0 0 0 0 0 0 0 0 0 0 0 0
643
+ -0.7670 1.1830 1.4890 H 0 0 0 0 0 0 0 0 0 0 0 0
644
+ 0.4730 -1.5850 1.1520 H 0 0 0 0 0 0 0 0 0 0 0 0
645
+ -1.2680 -1.2190 1.1340 H 0 0 0 0 0 0 0 0 0 0 0 0
646
+ 1.9050 -0.9020 -0.6830 H 0 0 0 0 0 0 0 0 0 0 0 0
647
+ -2.2690 -0.0310 -0.7270 H 0 0 0 0 0 0 0 0 0 0 0 0
648
+ 2.0490 -0.3540 -3.0780 H 0 0 0 0 0 0 0 0 0 0 0 0
649
+ -2.1320 0.5230 -3.1210 H 0 0 0 0 0 0 0 0 0 0 0 0
650
+ -0.1230 -0.3990 -5.0590 H 0 0 0 0 0 0 0 0 0 0 0 0
651
+ -1.3330 -0.0300 4.7840 H 0 0 0 0 0 0 0 0 0 0 0 0
652
+ 1 2 1 0 0 0 0
653
+ 1 14 1 0 0 0 0
654
+ 1 15 1 0 0 0 0
655
+ 2 3 1 0 0 0 0
656
+ 2 5 1 0 0 0 0
657
+ 2 16 1 0 0 0 0
658
+ 3 4 2 0 0 0 0
659
+ 3 13 1 0 0 0 0
660
+ 5 6 1 0 0 0 0
661
+ 5 17 1 0 0 0 0
662
+ 5 18 1 0 0 0 0
663
+ 6 7 2 0 0 0 0
664
+ 6 8 1 0 0 0 0
665
+ 7 9 1 0 0 0 0
666
+ 7 19 1 0 0 0 0
667
+ 8 10 2 0 0 0 0
668
+ 8 20 1 0 0 0 0
669
+ 9 11 2 0 0 0 0
670
+ 9 21 1 0 0 0 0
671
+ 10 11 1 0 0 0 0
672
+ 10 22 1 0 0 0 0
673
+ 11 12 1 0 0 0 0
674
+ 12 23 1 0 0 0 0
675
+ 13 24 1 0 0 0 0
676
+ M END
677
+ $$$$
678
+ ALA
679
+ <BLANKLINE>
680
+ <BLANKLINE>
681
+ 13 12 0 0 0 0 0 0 0 1 V2000
682
+ -0.9660 0.4930 1.5000 N 0 0 0 0 0 0 0 0 0 0 0 0
683
+ 0.2570 0.4180 0.6920 C 0 0 0 0 0 0 0 0 0 0 0 0
684
+ -0.0940 0.0170 -0.7160 C 0 0 0 0 0 0 0 0 0 0 0 0
685
+ -1.0560 -0.6820 -0.9230 O 0 0 0 0 0 0 0 0 0 0 0 0
686
+ 1.2040 -0.6200 1.2960 C 0 0 0 0 0 0 0 0 0 0 0 0
687
+ 0.6610 0.4390 -1.7420 O 0 0 0 0 0 0 0 0 0 0 0 0
688
+ -1.3830 -0.4250 1.4820 H 0 0 0 0 0 0 0 0 0 0 0 0
689
+ -0.6760 0.6610 2.4520 H 0 0 0 0 0 0 0 0 0 0 0 0
690
+ 0.7460 1.3920 0.6820 H 0 0 0 0 0 0 0 0 0 0 0 0
691
+ 1.4590 -0.3300 2.3160 H 0 0 0 0 0 0 0 0 0 0 0 0
692
+ 0.7150 -1.5940 1.3070 H 0 0 0 0 0 0 0 0 0 0 0 0
693
+ 2.1130 -0.6760 0.6970 H 0 0 0 0 0 0 0 0 0 0 0 0
694
+ 0.4350 0.1820 -2.6470 H 0 0 0 0 0 0 0 0 0 0 0 0
695
+ 1 2 1 0 0 0 0
696
+ 1 7 1 0 0 0 0
697
+ 1 8 1 0 0 0 0
698
+ 2 3 1 0 0 0 0
699
+ 2 5 1 0 0 0 0
700
+ 2 9 1 0 0 0 0
701
+ 3 4 2 0 0 0 0
702
+ 3 6 1 0 0 0 0
703
+ 5 10 1 0 0 0 0
704
+ 5 11 1 0 0 0 0
705
+ 5 12 1 0 0 0 0
706
+ 6 13 1 0 0 0 0
707
+ M END
708
+ $$$$
709
+ <BLANKLINE>
710
+ """
711
+
712
+ def __init__(self, records=None):
713
+ self._records = {}
714
+ if records is not None:
715
+ for mol_name, record in records.items():
716
+ if isinstance(record, SDRecord):
717
+ record.header.mol_name = mol_name
718
+ self._records[mol_name] = record
719
+
720
+ @property
721
+ def lines(self):
722
+ return self.serialize().splitlines()
723
+
724
+ @property
725
+ def record(self):
726
+ if len(self) == 0:
727
+ raise ValueError("There are no records in the file")
728
+ if len(self) > 1:
729
+ raise ValueError("There are multiple records in the file")
730
+ return self[next(iter(self))]
731
+
732
+ @staticmethod
733
+ def deserialize(text):
734
+ """
735
+ Create an object by deserializing the given text content.
736
+
737
+ Parameters
738
+ ----------
739
+ content : str
740
+ The content to be deserialized.
741
+ """
742
+ lines = text.splitlines()
743
+ record_ends = np.array(
744
+ [i for i, line in enumerate(lines) if line.startswith(_RECORD_DELIMITER)],
745
+ dtype=int,
746
+ )
747
+ if len(record_ends) == 0:
748
+ warnings.warn(
749
+ "Final record delimiter missing, "
750
+ "maybe this is a MOL file instead of a SD file"
751
+ )
752
+ record_ends = np.array([len(lines) - 1], dtype=int)
753
+ # The first record starts at the first line and the last
754
+ # delimiter is at the end of the file
755
+ # Records in the middle start directly after the delimiter
756
+ record_starts = np.concatenate(([0], record_ends[:-1] + 1), dtype=int)
757
+ record_names = [lines[start].strip() for start in record_starts]
758
+ return SDFile(
759
+ {
760
+ # Do not include the delimiter
761
+ # -> stop at end (instead of end + 1)
762
+ name: _join_with_terminal_newline(lines[start:end])
763
+ for name, start, end in zip(record_names, record_starts, record_ends)
764
+ }
765
+ )
766
+
767
+ def serialize(self):
768
+ """
769
+ Convert this object into text content.
770
+
771
+ Returns
772
+ -------
773
+ content : str
774
+ The serialized content.
775
+ """
776
+ text_blocks = []
777
+ for record_name, record in self._records.items():
778
+ if isinstance(record, str):
779
+ # Record is already stored as text
780
+ text_blocks.append(record)
781
+ else:
782
+ try:
783
+ text_blocks.append(record.serialize())
784
+ except Exception:
785
+ raise SerializationError(
786
+ f"Failed to serialize record '{record_name}'"
787
+ )
788
+ text_blocks.append(_RECORD_DELIMITER + "\n")
789
+ return "".join(text_blocks)
790
+
791
+ @classmethod
792
+ def read(cls, file):
793
+ """
794
+ Read a SD file.
795
+
796
+ Parameters
797
+ ----------
798
+ file : file-like object or str
799
+ The file to be read.
800
+ Alternatively a file path can be supplied.
801
+
802
+ Returns
803
+ -------
804
+ file_object : SDFile
805
+ The parsed file.
806
+ """
807
+ # File name
808
+ if is_open_compatible(file):
809
+ with open(file, "r") as f:
810
+ text = f.read()
811
+ # File object
812
+ else:
813
+ if not is_text(file):
814
+ raise TypeError("A file opened in 'text' mode is required")
815
+ text = file.read()
816
+ return SDFile.deserialize(text)
817
+
818
+ def write(self, file):
819
+ """
820
+ Write the contents of this object into a SD file.
821
+
822
+ Parameters
823
+ ----------
824
+ file : file-like object or str
825
+ The file to be written to.
826
+ Alternatively a file path can be supplied.
827
+ """
828
+ if is_open_compatible(file):
829
+ with open(file, "w") as f:
830
+ f.write(self.serialize())
831
+ else:
832
+ if not is_text(file):
833
+ raise TypeError("A file opened in 'text' mode is required")
834
+ file.write(self.serialize())
835
+
836
+ def __getitem__(self, key):
837
+ record = self._records[key]
838
+ if isinstance(record, str):
839
+ # Element is stored in serialized form
840
+ # -> must be deserialized first
841
+ try:
842
+ record = SDRecord.deserialize(record)
843
+ except Exception:
844
+ raise DeserializationError(f"Failed to deserialize record '{key}'")
845
+ # Update with deserialized object
846
+ self._records[key] = record
847
+ return record
848
+
849
+ def __setitem__(self, key, record):
850
+ if not isinstance(record, SDRecord):
851
+ raise TypeError(f"Expected 'SDRecord', but got '{type(record).__name__}'")
852
+ # The molecule name in the header is unique across the file
853
+ record.header.mol_name = key
854
+ self._records[key] = record
855
+
856
+ def __delitem__(self, key):
857
+ del self._records[key]
858
+
859
+ def __iter__(self):
860
+ return iter(self._records)
861
+
862
+ def __len__(self):
863
+ return len(self._records)
864
+
865
+ def __eq__(self, other):
866
+ if not isinstance(other, type(self)):
867
+ return False
868
+ if set(self.keys()) != set(other.keys()):
869
+ return False
870
+ for record_name in self.keys():
871
+ if self[record_name] != other[record_name]:
872
+ return False
873
+ return True
874
+
875
+ def __str__(self):
876
+ return self.serialize()
877
+
878
+
879
+ def _join_with_terminal_newline(text_blocks):
880
+ if len(text_blocks) == 0:
881
+ return ""
882
+ else:
883
+ return "\n".join(text_blocks) + "\n"
884
+
885
+
886
+ def _empty_ctab():
887
+ empty_atoms = AtomArray(0)
888
+ empty_atoms.bonds = BondList(0)
889
+ return _join_with_terminal_newline(write_structure_to_ctab(empty_atoms))
890
+
891
+
892
+ def _to_metadata_key(key):
893
+ if isinstance(key, Metadata.Key):
894
+ return key
895
+ elif isinstance(key, str):
896
+ return Metadata.Key(name=key)
897
+ else:
898
+ raise TypeError(
899
+ "Expected 'Metadata.Key' or str, " f"but got '{type(key).__name__}'"
900
+ )
901
+
902
+
903
+ def _add_key_value_pair(metadata, key, value):
904
+ if key is not None:
905
+ if value is None:
906
+ raise DeserializationError(f"No value found for metadata key {key}")
907
+ metadata[key] = value
908
+
909
+
910
+ def _get_ctab_stop(lines):
911
+ for i in range(_N_HEADER, len(lines)):
912
+ if lines[i].startswith("M END"):
913
+ return i + 1
914
+ return len(lines)