biotite 0.41.1__cp310-cp310-macosx_10_16_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (340) hide show
  1. biotite/__init__.py +19 -0
  2. biotite/application/__init__.py +43 -0
  3. biotite/application/application.py +265 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +505 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +83 -0
  8. biotite/application/blast/webapp.py +421 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +238 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +152 -0
  13. biotite/application/localapp.py +306 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +122 -0
  16. biotite/application/msaapp.py +374 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +254 -0
  19. biotite/application/muscle/app5.py +171 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +456 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +222 -0
  24. biotite/application/util.py +59 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +304 -0
  27. biotite/application/viennarna/rnafold.py +269 -0
  28. biotite/application/viennarna/rnaplot.py +187 -0
  29. biotite/application/viennarna/util.py +72 -0
  30. biotite/application/webapp.py +77 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/entrez/__init__.py +15 -0
  34. biotite/database/entrez/check.py +61 -0
  35. biotite/database/entrez/dbnames.py +89 -0
  36. biotite/database/entrez/download.py +223 -0
  37. biotite/database/entrez/key.py +44 -0
  38. biotite/database/entrez/query.py +223 -0
  39. biotite/database/error.py +15 -0
  40. biotite/database/pubchem/__init__.py +21 -0
  41. biotite/database/pubchem/download.py +260 -0
  42. biotite/database/pubchem/error.py +20 -0
  43. biotite/database/pubchem/query.py +827 -0
  44. biotite/database/pubchem/throttle.py +99 -0
  45. biotite/database/rcsb/__init__.py +13 -0
  46. biotite/database/rcsb/download.py +167 -0
  47. biotite/database/rcsb/query.py +959 -0
  48. biotite/database/uniprot/__init__.py +13 -0
  49. biotite/database/uniprot/check.py +32 -0
  50. biotite/database/uniprot/download.py +134 -0
  51. biotite/database/uniprot/query.py +209 -0
  52. biotite/file.py +251 -0
  53. biotite/sequence/__init__.py +73 -0
  54. biotite/sequence/align/__init__.py +49 -0
  55. biotite/sequence/align/alignment.py +658 -0
  56. biotite/sequence/align/banded.cpython-310-darwin.so +0 -0
  57. biotite/sequence/align/banded.pyx +652 -0
  58. biotite/sequence/align/buckets.py +69 -0
  59. biotite/sequence/align/cigar.py +434 -0
  60. biotite/sequence/align/kmeralphabet.cpython-310-darwin.so +0 -0
  61. biotite/sequence/align/kmeralphabet.pyx +574 -0
  62. biotite/sequence/align/kmersimilarity.cpython-310-darwin.so +0 -0
  63. biotite/sequence/align/kmersimilarity.pyx +233 -0
  64. biotite/sequence/align/kmertable.cpython-310-darwin.so +0 -0
  65. biotite/sequence/align/kmertable.pyx +3400 -0
  66. biotite/sequence/align/localgapped.cpython-310-darwin.so +0 -0
  67. biotite/sequence/align/localgapped.pyx +892 -0
  68. biotite/sequence/align/localungapped.cpython-310-darwin.so +0 -0
  69. biotite/sequence/align/localungapped.pyx +279 -0
  70. biotite/sequence/align/matrix.py +405 -0
  71. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  72. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  73. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  74. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  75. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  76. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  77. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  78. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  79. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  80. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  81. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  82. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  83. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  84. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  85. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  86. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  87. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  88. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  89. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  93. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  94. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  95. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  96. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  97. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  98. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  99. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  100. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  101. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  102. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  103. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  104. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  105. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  106. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  107. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  108. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  109. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  110. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  111. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  112. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  113. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  114. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  115. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  116. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  117. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  118. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  119. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  120. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  121. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  122. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  154. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  155. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  156. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  157. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  158. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  159. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  160. biotite/sequence/align/multiple.cpython-310-darwin.so +0 -0
  161. biotite/sequence/align/multiple.pyx +620 -0
  162. biotite/sequence/align/pairwise.cpython-310-darwin.so +0 -0
  163. biotite/sequence/align/pairwise.pyx +587 -0
  164. biotite/sequence/align/permutation.cpython-310-darwin.so +0 -0
  165. biotite/sequence/align/permutation.pyx +305 -0
  166. biotite/sequence/align/primes.txt +821 -0
  167. biotite/sequence/align/selector.cpython-310-darwin.so +0 -0
  168. biotite/sequence/align/selector.pyx +956 -0
  169. biotite/sequence/align/statistics.py +265 -0
  170. biotite/sequence/align/tracetable.cpython-310-darwin.so +0 -0
  171. biotite/sequence/align/tracetable.pxd +64 -0
  172. biotite/sequence/align/tracetable.pyx +370 -0
  173. biotite/sequence/alphabet.py +566 -0
  174. biotite/sequence/annotation.py +829 -0
  175. biotite/sequence/codec.cpython-310-darwin.so +0 -0
  176. biotite/sequence/codec.pyx +155 -0
  177. biotite/sequence/codon.py +466 -0
  178. biotite/sequence/codon_tables.txt +202 -0
  179. biotite/sequence/graphics/__init__.py +33 -0
  180. biotite/sequence/graphics/alignment.py +1034 -0
  181. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  182. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  183. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  184. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  185. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  186. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  187. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  188. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  189. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  190. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  191. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  192. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  193. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  194. biotite/sequence/graphics/color_schemes/pb_flower.json +39 -0
  195. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  196. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  197. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  198. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  199. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  200. biotite/sequence/graphics/colorschemes.py +139 -0
  201. biotite/sequence/graphics/dendrogram.py +184 -0
  202. biotite/sequence/graphics/features.py +510 -0
  203. biotite/sequence/graphics/logo.py +110 -0
  204. biotite/sequence/graphics/plasmid.py +661 -0
  205. biotite/sequence/io/__init__.py +12 -0
  206. biotite/sequence/io/fasta/__init__.py +22 -0
  207. biotite/sequence/io/fasta/convert.py +273 -0
  208. biotite/sequence/io/fasta/file.py +278 -0
  209. biotite/sequence/io/fastq/__init__.py +19 -0
  210. biotite/sequence/io/fastq/convert.py +120 -0
  211. biotite/sequence/io/fastq/file.py +551 -0
  212. biotite/sequence/io/genbank/__init__.py +17 -0
  213. biotite/sequence/io/genbank/annotation.py +277 -0
  214. biotite/sequence/io/genbank/file.py +575 -0
  215. biotite/sequence/io/genbank/metadata.py +324 -0
  216. biotite/sequence/io/genbank/sequence.py +172 -0
  217. biotite/sequence/io/general.py +192 -0
  218. biotite/sequence/io/gff/__init__.py +26 -0
  219. biotite/sequence/io/gff/convert.py +133 -0
  220. biotite/sequence/io/gff/file.py +434 -0
  221. biotite/sequence/phylo/__init__.py +36 -0
  222. biotite/sequence/phylo/nj.cpython-310-darwin.so +0 -0
  223. biotite/sequence/phylo/nj.pyx +221 -0
  224. biotite/sequence/phylo/tree.cpython-310-darwin.so +0 -0
  225. biotite/sequence/phylo/tree.pyx +1169 -0
  226. biotite/sequence/phylo/upgma.cpython-310-darwin.so +0 -0
  227. biotite/sequence/phylo/upgma.pyx +164 -0
  228. biotite/sequence/profile.py +456 -0
  229. biotite/sequence/search.py +116 -0
  230. biotite/sequence/seqtypes.py +556 -0
  231. biotite/sequence/sequence.py +374 -0
  232. biotite/structure/__init__.py +132 -0
  233. biotite/structure/atoms.py +1455 -0
  234. biotite/structure/basepairs.py +1415 -0
  235. biotite/structure/bonds.cpython-310-darwin.so +0 -0
  236. biotite/structure/bonds.pyx +1933 -0
  237. biotite/structure/box.py +592 -0
  238. biotite/structure/celllist.cpython-310-darwin.so +0 -0
  239. biotite/structure/celllist.pyx +849 -0
  240. biotite/structure/chains.py +298 -0
  241. biotite/structure/charges.cpython-310-darwin.so +0 -0
  242. biotite/structure/charges.pyx +520 -0
  243. biotite/structure/compare.py +274 -0
  244. biotite/structure/density.py +114 -0
  245. biotite/structure/dotbracket.py +216 -0
  246. biotite/structure/error.py +31 -0
  247. biotite/structure/filter.py +585 -0
  248. biotite/structure/geometry.py +697 -0
  249. biotite/structure/graphics/__init__.py +13 -0
  250. biotite/structure/graphics/atoms.py +226 -0
  251. biotite/structure/graphics/rna.py +282 -0
  252. biotite/structure/hbond.py +409 -0
  253. biotite/structure/info/__init__.py +25 -0
  254. biotite/structure/info/atom_masses.json +121 -0
  255. biotite/structure/info/atoms.py +82 -0
  256. biotite/structure/info/bonds.py +145 -0
  257. biotite/structure/info/ccd/README.rst +8 -0
  258. biotite/structure/info/ccd/amino_acids.txt +1663 -0
  259. biotite/structure/info/ccd/carbohydrates.txt +1135 -0
  260. biotite/structure/info/ccd/components.bcif +0 -0
  261. biotite/structure/info/ccd/nucleotides.txt +798 -0
  262. biotite/structure/info/ccd.py +95 -0
  263. biotite/structure/info/groups.py +90 -0
  264. biotite/structure/info/masses.py +123 -0
  265. biotite/structure/info/misc.py +144 -0
  266. biotite/structure/info/radii.py +197 -0
  267. biotite/structure/info/standardize.py +196 -0
  268. biotite/structure/integrity.py +268 -0
  269. biotite/structure/io/__init__.py +30 -0
  270. biotite/structure/io/ctab.py +72 -0
  271. biotite/structure/io/dcd/__init__.py +13 -0
  272. biotite/structure/io/dcd/file.py +65 -0
  273. biotite/structure/io/general.py +257 -0
  274. biotite/structure/io/gro/__init__.py +14 -0
  275. biotite/structure/io/gro/file.py +343 -0
  276. biotite/structure/io/mmtf/__init__.py +21 -0
  277. biotite/structure/io/mmtf/assembly.py +214 -0
  278. biotite/structure/io/mmtf/convertarray.cpython-310-darwin.so +0 -0
  279. biotite/structure/io/mmtf/convertarray.pyx +341 -0
  280. biotite/structure/io/mmtf/convertfile.cpython-310-darwin.so +0 -0
  281. biotite/structure/io/mmtf/convertfile.pyx +501 -0
  282. biotite/structure/io/mmtf/decode.cpython-310-darwin.so +0 -0
  283. biotite/structure/io/mmtf/decode.pyx +152 -0
  284. biotite/structure/io/mmtf/encode.cpython-310-darwin.so +0 -0
  285. biotite/structure/io/mmtf/encode.pyx +183 -0
  286. biotite/structure/io/mmtf/file.py +233 -0
  287. biotite/structure/io/mol/__init__.py +20 -0
  288. biotite/structure/io/mol/convert.py +115 -0
  289. biotite/structure/io/mol/ctab.py +414 -0
  290. biotite/structure/io/mol/header.py +116 -0
  291. biotite/structure/io/mol/mol.py +193 -0
  292. biotite/structure/io/mol/sdf.py +916 -0
  293. biotite/structure/io/netcdf/__init__.py +13 -0
  294. biotite/structure/io/netcdf/file.py +63 -0
  295. biotite/structure/io/npz/__init__.py +20 -0
  296. biotite/structure/io/npz/file.py +152 -0
  297. biotite/structure/io/pdb/__init__.py +20 -0
  298. biotite/structure/io/pdb/convert.py +293 -0
  299. biotite/structure/io/pdb/file.py +1240 -0
  300. biotite/structure/io/pdb/hybrid36.cpython-310-darwin.so +0 -0
  301. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  302. biotite/structure/io/pdbqt/__init__.py +15 -0
  303. biotite/structure/io/pdbqt/convert.py +107 -0
  304. biotite/structure/io/pdbqt/file.py +640 -0
  305. biotite/structure/io/pdbx/__init__.py +23 -0
  306. biotite/structure/io/pdbx/bcif.py +648 -0
  307. biotite/structure/io/pdbx/cif.py +1032 -0
  308. biotite/structure/io/pdbx/component.py +246 -0
  309. biotite/structure/io/pdbx/convert.py +1597 -0
  310. biotite/structure/io/pdbx/encoding.cpython-310-darwin.so +0 -0
  311. biotite/structure/io/pdbx/encoding.pyx +950 -0
  312. biotite/structure/io/pdbx/legacy.py +267 -0
  313. biotite/structure/io/tng/__init__.py +13 -0
  314. biotite/structure/io/tng/file.py +46 -0
  315. biotite/structure/io/trajfile.py +710 -0
  316. biotite/structure/io/trr/__init__.py +13 -0
  317. biotite/structure/io/trr/file.py +46 -0
  318. biotite/structure/io/xtc/__init__.py +13 -0
  319. biotite/structure/io/xtc/file.py +46 -0
  320. biotite/structure/mechanics.py +75 -0
  321. biotite/structure/molecules.py +353 -0
  322. biotite/structure/pseudoknots.py +642 -0
  323. biotite/structure/rdf.py +243 -0
  324. biotite/structure/repair.py +253 -0
  325. biotite/structure/residues.py +562 -0
  326. biotite/structure/resutil.py +178 -0
  327. biotite/structure/sasa.cpython-310-darwin.so +0 -0
  328. biotite/structure/sasa.pyx +322 -0
  329. biotite/structure/sequence.py +112 -0
  330. biotite/structure/sse.py +327 -0
  331. biotite/structure/superimpose.py +727 -0
  332. biotite/structure/transform.py +504 -0
  333. biotite/structure/util.py +98 -0
  334. biotite/temp.py +86 -0
  335. biotite/version.py +16 -0
  336. biotite/visualize.py +251 -0
  337. biotite-0.41.1.dist-info/METADATA +187 -0
  338. biotite-0.41.1.dist-info/RECORD +340 -0
  339. biotite-0.41.1.dist-info/WHEEL +4 -0
  340. biotite-0.41.1.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,916 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.structure.io.mol"
6
+ __author__ = "Patrick Kunzmann, Benjamin Mayer"
7
+ __all__ = ["SDFile", "SDRecord", "Metadata"]
8
+
9
+ import re
10
+ import warnings
11
+ from dataclasses import dataclass
12
+ from collections.abc import MutableMapping, Mapping
13
+ import numpy as np
14
+ from ....file import File, InvalidFileError, is_open_compatible, is_text, \
15
+ DeserializationError, SerializationError
16
+ from .ctab import read_structure_from_ctab, write_structure_to_ctab
17
+ from .header import Header
18
+ from ...atoms import AtomArray
19
+ from ...bonds import BondList, BondType
20
+
21
+
22
+ _N_HEADER = 3
23
+ # Number of header lines
24
+ _RECORD_DELIMITER = "$$$$"
25
+
26
+
27
+ class Metadata(MutableMapping):
28
+ r"""
29
+ Additional nonstructural data in an SD record.
30
+
31
+ The metadata is stored as key-value pairs.
32
+ As SDF allows multiple different identifiers for keys,
33
+ the keys are represented by :class:`Metadata.Key`.
34
+
35
+ Parameters
36
+ ----------
37
+ metadata : dict, optional
38
+ The metadata as key-value pairs.
39
+ Keys are instances of :class:`Metadata.Key`.
40
+ Alternatively, keys can be given as strings, in which case the
41
+ string is used as the :attr:`Metadata.Key.name`.
42
+ Values are strings.
43
+ Line breaks in values are allowed.
44
+
45
+ Notes
46
+ -----
47
+ Key names may only contain alphanumeric characters, underscores and
48
+ periods.
49
+
50
+ Examples
51
+ --------
52
+
53
+ >>> metadata = Metadata({
54
+ ... "foo": "Lorem ipsum",
55
+ ... Metadata.Key(number=42, name="bar"): "dolor sit amet,\nconsectetur"
56
+ ... })
57
+ >>> print(metadata)
58
+ > <foo>
59
+ Lorem ipsum
60
+ <BLANKLINE>
61
+ > DT42 <bar>
62
+ dolor sit amet,
63
+ consectetur
64
+ <BLANKLINE>
65
+ >>> print(metadata["foo"])
66
+ Lorem ipsum
67
+ >>> # Strings can be only used for access, if the key contains only a name
68
+ >>> print("bar" in metadata)
69
+ False
70
+ >>> print(metadata[Metadata.Key(number=42, name="bar")])
71
+ dolor sit amet,
72
+ consectetur
73
+
74
+ """
75
+
76
+ @dataclass(frozen=True, kw_only=True)
77
+ class Key:
78
+ """
79
+ A metadata key.
80
+
81
+ Parameters
82
+ ----------
83
+ number : int, optional
84
+ number of the field in the database.
85
+ name : str, optional
86
+ Name of the field.
87
+ May only contain alphanumeric characters, underscores and
88
+ periods.
89
+ registry_internal : int, optional
90
+ Internal registry number.
91
+ registry_external : str, optional
92
+ External registry number.
93
+
94
+ Attributes
95
+ ----------
96
+ number, name, registry_internal, registry_external
97
+ The same as the parameters.
98
+ """
99
+ # The characters that can be given as input to `name`
100
+ # First character must be alphanumeric,
101
+ # following characters may include underscores and periods
102
+ # Although the V3000 specification forbids the use of periods,
103
+ # they are still used in practice and therefore allowed here
104
+ _NAME_INPUT_REGEX = re.compile(r"^[a-zA-Z0-9][\w.]*$")
105
+ # These regexes are used to parse the key from a line
106
+ _COMPONENT_REGEX = {
107
+ "number": re.compile(r"^DT(\d+)$"),
108
+ "name": re.compile(r"^<([a-zA-Z0-9][\w.]*)>$"),
109
+ "registry_internal": re.compile(r"^(\d+)$"),
110
+ "registry_external": re.compile(r"^\(([\w.-]*)\)$"),
111
+ }
112
+
113
+ number: ... = None
114
+ name: ... = None
115
+ registry_internal: ... = None
116
+ registry_external: ... = None
117
+
118
+ def __post_init__(self):
119
+ if self.name is None and self.number is None:
120
+ raise ValueError("At least the field number or name must be set")
121
+ if self.name is not None:
122
+ if not Metadata.Key._NAME_INPUT_REGEX.match(self.name):
123
+ raise ValueError(
124
+ f"Invalid name '{self.name}', must only contains "
125
+ "alphanumeric characters, underscores and periods"
126
+ )
127
+ if self.number is not None:
128
+ # Cannot set field directly as 'frozen=True'
129
+ object.__setattr__(self, "number", int(self.number))
130
+ if self.registry_internal is not None:
131
+ object.__setattr__(
132
+ self, "registry_internal", int(self.registry_internal)
133
+ )
134
+
135
+ @staticmethod
136
+ def deserialize(text):
137
+ """
138
+ Create an object by deserializing the given text content.
139
+
140
+ Parameters
141
+ ----------
142
+ content : str
143
+ The content to be deserialized.
144
+ """
145
+ # Omit the leading '>'
146
+ key_components = text[1:].split()
147
+ parsed_component_dict = {}
148
+ for component in key_components:
149
+ # For each component in each the key,
150
+ # try to match it with each of the regexes
151
+ for attr_name, regex in Metadata.Key._COMPONENT_REGEX.items():
152
+ pattern_match = regex.match(component)
153
+ if pattern_match is None:
154
+ # Try next pattern
155
+ continue
156
+ if attr_name in parsed_component_dict:
157
+ raise DeserializationError(
158
+ f"Duplicate key component for '{attr_name}'"
159
+ )
160
+ value = pattern_match.group(1)
161
+ parsed_component_dict[attr_name] = value
162
+ break
163
+ else:
164
+ # There is no matching pattern
165
+ raise DeserializationError(
166
+ f"Invalid key component '{component}'"
167
+ )
168
+ return Metadata.Key(**parsed_component_dict)
169
+
170
+ def serialize(self):
171
+ """
172
+ Convert this object into text content.
173
+
174
+ Returns
175
+ -------
176
+ content : str
177
+ The serialized content.
178
+ """
179
+ key_string = "> "
180
+ if self.number is not None:
181
+ key_string += f"DT{self.number} "
182
+ if self.name is not None:
183
+ key_string += f"<{self.name}> "
184
+ if self.registry_internal is not None:
185
+ key_string += f"{self.registry_internal} "
186
+ if self.registry_external is not None:
187
+ key_string += f"({self.registry_external}) "
188
+ return key_string
189
+
190
+ def __str__(self):
191
+ return self.serialize()
192
+
193
+
194
+ def __init__(self, metadata=None):
195
+ if metadata is None:
196
+ metadata = {}
197
+ self._metadata = {}
198
+ for key, value in metadata.items():
199
+ self._metadata[_to_metadata_key(key)] = value
200
+
201
+ @staticmethod
202
+ def deserialize(text):
203
+ """
204
+ Create an object by deserializing the given text content.
205
+
206
+ Parameters
207
+ ----------
208
+ content : str
209
+ The content to be deserialized.
210
+ """
211
+ metadata = {}
212
+ current_key = None
213
+ current_value = None
214
+ for line in text.splitlines():
215
+ line = line.strip()
216
+ if len(line) == 0:
217
+ # Skip empty lines
218
+ continue
219
+ if line.startswith(">"):
220
+ _add_key_value_pair(metadata, current_key, current_value)
221
+ current_key = Metadata.Key.deserialize(line)
222
+ current_value = None
223
+ else:
224
+ if current_key is None:
225
+ raise DeserializationError(
226
+ "Value found before metadata key"
227
+ )
228
+ if current_value is None:
229
+ current_value = line
230
+ else:
231
+ current_value += "\n" + line
232
+ # Add final pair
233
+ _add_key_value_pair(metadata, current_key, current_value)
234
+ return Metadata(metadata)
235
+
236
+ def serialize(self):
237
+ """
238
+ Convert this object into text content.
239
+
240
+ Returns
241
+ -------
242
+ content : str
243
+ The serialized content.
244
+ """
245
+ text_blocks = []
246
+ for key, value in self._metadata.items():
247
+ text_blocks.append(key.serialize())
248
+ # Add empty line after value
249
+ text_blocks.append(value + "\n")
250
+ return _join_with_terminal_newline(text_blocks)
251
+
252
+ def __getitem__(self, key):
253
+ return self._metadata[_to_metadata_key(key)]
254
+
255
+ def __setitem__(self, key, value):
256
+ if len(value) == 0:
257
+ raise ValueError("Metadata value must not be empty")
258
+ self._metadata[_to_metadata_key(key)] = value
259
+
260
+ def __delitem__(self, key):
261
+ del self._metadata[_to_metadata_key(key)]
262
+
263
+ def __iter__(self):
264
+ return iter(self._metadata)
265
+
266
+ def __len__(self):
267
+ return len(self._metadata)
268
+
269
+ def __eq__(self, other):
270
+ if not isinstance(other, type(self)):
271
+ return False
272
+ if set(self.keys()) != set(other.keys()):
273
+ return False
274
+ for key in self.keys():
275
+ if self[key] != other[key]:
276
+ return False
277
+ return True
278
+
279
+ def __str__(self):
280
+ return self.serialize()
281
+
282
+
283
+ class SDRecord:
284
+ """
285
+ A record in a SD file.
286
+
287
+ Parameters
288
+ ----------
289
+ header : Header, optional
290
+ The header of the record.
291
+ By default, an empty header is created.
292
+ ctab : str, optional
293
+ The connection table (atoms and bonds) in the record.
294
+ By default, an empty structure is created.
295
+ metadata : Metadata, Mapping or str, optional
296
+ The metadata of the record.
297
+ Can be given as dictionary mapping :attr:`Metadata.Key.name`
298
+ to the respective metadata value.
299
+ By default, no metadata is appended to the record.
300
+
301
+ Attributes
302
+ ----------
303
+ header, ctab, metadata
304
+ The same as the parameters.
305
+
306
+ Examples
307
+ --------
308
+
309
+ >>> atoms = residue("ALA")
310
+ >>> record = SDRecord(header=Header(mol_name="ALA", dimensions="3D"))
311
+ >>> record.set_structure(atoms)
312
+ >>> print(record.get_structure())
313
+ 0 N -0.970 0.490 1.500
314
+ 0 C 0.260 0.420 0.690
315
+ 0 C -0.090 0.020 -0.720
316
+ 0 O -1.060 -0.680 -0.920
317
+ 0 C 1.200 -0.620 1.300
318
+ 0 O 0.660 0.440 -1.740
319
+ 0 H -1.380 -0.420 1.480
320
+ 0 H -0.680 0.660 2.450
321
+ 0 H 0.750 1.390 0.680
322
+ 0 H 1.460 -0.330 2.320
323
+ 0 H 0.720 -1.590 1.310
324
+ 0 H 2.110 -0.680 0.700
325
+ 0 H 0.440 0.180 -2.650
326
+ >>> # Add the record to an SD file
327
+ >>> file = SDFile()
328
+ >>> file["ALA"] = record
329
+ >>> print(file)
330
+ ALA
331
+ 3D
332
+ <BLANKLINE>
333
+ 13 12 0 0 0 0 0 0 0 1 V2000
334
+ -0.9700 0.4900 1.5000 N 0 0 0 0 0 0 0 0 0 0 0 0
335
+ 0.2600 0.4200 0.6900 C 0 0 0 0 0 0 0 0 0 0 0 0
336
+ -0.0900 0.0200 -0.7200 C 0 0 0 0 0 0 0 0 0 0 0 0
337
+ -1.0600 -0.6800 -0.9200 O 0 0 0 0 0 0 0 0 0 0 0 0
338
+ 1.2000 -0.6200 1.3000 C 0 0 0 0 0 0 0 0 0 0 0 0
339
+ 0.6600 0.4400 -1.7400 O 0 0 0 0 0 0 0 0 0 0 0 0
340
+ -1.3800 -0.4200 1.4800 H 0 0 0 0 0 0 0 0 0 0 0 0
341
+ -0.6800 0.6600 2.4500 H 0 0 0 0 0 0 0 0 0 0 0 0
342
+ 0.7500 1.3900 0.6800 H 0 0 0 0 0 0 0 0 0 0 0 0
343
+ 1.4600 -0.3300 2.3200 H 0 0 0 0 0 0 0 0 0 0 0 0
344
+ 0.7200 -1.5900 1.3100 H 0 0 0 0 0 0 0 0 0 0 0 0
345
+ 2.1100 -0.6800 0.7000 H 0 0 0 0 0 0 0 0 0 0 0 0
346
+ 0.4400 0.1800 -2.6500 H 0 0 0 0 0 0 0 0 0 0 0 0
347
+ 1 2 1 0 0 0 0
348
+ 1 7 1 0 0 0 0
349
+ 1 8 1 0 0 0 0
350
+ 2 3 1 0 0 0 0
351
+ 2 5 1 0 0 0 0
352
+ 2 9 1 0 0 0 0
353
+ 3 4 2 0 0 0 0
354
+ 3 6 1 0 0 0 0
355
+ 5 10 1 0 0 0 0
356
+ 5 11 1 0 0 0 0
357
+ 5 12 1 0 0 0 0
358
+ 6 13 1 0 0 0 0
359
+ M END
360
+ $$$$
361
+ """
362
+
363
+ def __init__(self, header=None, ctab=None, metadata=None):
364
+ if header is None:
365
+ self._header = Header()
366
+ else:
367
+ self._header = header
368
+
369
+ self._ctab = ctab
370
+
371
+ if metadata is None:
372
+ self._metadata = Metadata()
373
+ elif isinstance(metadata, Metadata):
374
+ self._metadata = metadata
375
+ elif isinstance(metadata, Mapping):
376
+ self._metadata = Metadata(metadata)
377
+ elif isinstance(metadata, str):
378
+ # Serialized form -> will be lazily deserialized
379
+ self._metadata = metadata
380
+ else:
381
+ raise TypeError(
382
+ "Expected 'Metadata', Mapping or str, "
383
+ f"but got '{type(metadata).__name__}'"
384
+ )
385
+
386
+ @property
387
+ def header(self):
388
+ if isinstance(self._header, str):
389
+ try:
390
+ self._header = Header.deserialize(self._header)
391
+ except:
392
+ raise DeserializationError("Failed to deserialize header")
393
+ return self._header
394
+
395
+ @header.setter
396
+ def header(self, header):
397
+ self._header = header
398
+
399
+ @property
400
+ def ctab(self):
401
+ # CTAB string cannot be changed directly -> no setter
402
+ return self._ctab
403
+
404
+ @property
405
+ def metadata(self):
406
+ if isinstance(self._metadata, str):
407
+ try:
408
+ self._metadata = Metadata.deserialize(self._metadata)
409
+ except:
410
+ raise DeserializationError("Failed to deserialize metadata")
411
+ return self._metadata
412
+
413
+ @metadata.setter
414
+ def metadata(self, metadata):
415
+ if isinstance(metadata, Metadata):
416
+ self._metadata = metadata
417
+ elif isinstance(metadata, Mapping):
418
+ self._metadata = Metadata(metadata)
419
+ else:
420
+ raise TypeError(
421
+ "Expected 'Metadata' or Mapping, "
422
+ f"but got '{type(metadata).__name__}'"
423
+ )
424
+
425
+ @staticmethod
426
+ def deserialize(text):
427
+ """
428
+ Create an object by deserializing the given text content.
429
+
430
+ Parameters
431
+ ----------
432
+ content : str
433
+ The content to be deserialized.
434
+ """
435
+ lines = text.splitlines()
436
+ ctab_end = _get_ctab_stop(lines)
437
+
438
+ header = _join_with_terminal_newline(lines[:_N_HEADER])
439
+ ctab = _join_with_terminal_newline(lines[_N_HEADER:ctab_end])
440
+ metadata = _join_with_terminal_newline(lines[ctab_end:])
441
+ return SDRecord(header, ctab, metadata)
442
+
443
+ def serialize(self):
444
+ """
445
+ Convert this object into text content.
446
+
447
+ Returns
448
+ -------
449
+ content : str
450
+ The serialized content.
451
+ """
452
+ if isinstance(self._header, str):
453
+ header_string = self._header
454
+ else:
455
+ header_string = self._header.serialize()
456
+
457
+ if self._ctab is None:
458
+ ctab_string = _empty_ctab()
459
+ else:
460
+ ctab_string = self._ctab
461
+
462
+ if isinstance(self._metadata, str):
463
+ metadata_string = self._metadata
464
+ else:
465
+ metadata_string = self._metadata.serialize()
466
+
467
+ return header_string + ctab_string + metadata_string
468
+
469
+ def get_structure(self):
470
+ """
471
+ Parse the structural data in the SD record.
472
+
473
+ Returns
474
+ -------
475
+ array : AtomArray
476
+ This :class:`AtomArray` contains the optional ``charge``
477
+ annotation and has an associated :class:`BondList`.
478
+ All other annotation categories, except ``element`` are
479
+ empty.
480
+ """
481
+ ctab_lines = self._ctab.splitlines()
482
+ if len(ctab_lines) == 0:
483
+ raise InvalidFileError("File does not contain structure data")
484
+ return read_structure_from_ctab(ctab_lines)
485
+
486
+ def set_structure(self, atoms, default_bond_type=BondType.ANY,
487
+ version=None):
488
+ """
489
+ Set the structural data in the SD record.
490
+
491
+ Parameters
492
+ ----------
493
+ array : AtomArray
494
+ The array to be saved into this file.
495
+ Must have an associated :class:`BondList`.
496
+ default_bond_type : BondType, optional
497
+ Bond type fallback for the *Bond block*, if a
498
+ :class:`BondType` has no CTAB counterpart.
499
+ By default, each such bond is treated as
500
+ :attr:`BondType.ANY`.
501
+ version : {"V2000", "V3000"}, optional
502
+ The version of the CTAB format.
503
+ ``"V2000"`` uses the *Atom* and *Bond* block, while
504
+ ``"V3000"`` uses the *Properties* block.
505
+ By default, ``"V2000"`` is used, unless the number of atoms
506
+ or bonds exceeds 999, in which case ``"V3000"`` is used.
507
+ """
508
+ self._ctab = _join_with_terminal_newline(write_structure_to_ctab(
509
+ atoms, default_bond_type, version
510
+ ))
511
+
512
+ def __eq__(self, other):
513
+ if not isinstance(other, type(self)):
514
+ return False
515
+ if not self.header == other.header:
516
+ return False
517
+ if not self.ctab == other.ctab:
518
+ return False
519
+ if not self.metadata == other.metadata:
520
+ return False
521
+ return True
522
+
523
+ def __str__(self):
524
+ return self.serialize()
525
+
526
+
527
+ class SDFile(File, MutableMapping):
528
+ """
529
+ This class represents an SD file for storing small molecule
530
+ structures.
531
+
532
+ The records for each molecule in the file can be accessed and
533
+ modified like a dictionary.
534
+ The structures can be parsed and written from/to each
535
+ :class:`SDRecord` object via :func:`get_structure()` or
536
+ :func:`set_structure()`, respectively.
537
+
538
+ Attributes
539
+ ----------
540
+ record : CIFBlock
541
+ The sole record of the file.
542
+ If the file contains multiple blocks, an exception is raised.
543
+
544
+ Examples
545
+ --------
546
+ Read a SD file and parse the molecular structure:
547
+
548
+ >>> import os.path
549
+ >>> file = SDFile.read(os.path.join(path_to_structures, "molecules", "TYR.sdf"))
550
+ >>> molecule = file.record.get_structure()
551
+ >>> print(molecule)
552
+ 0 N 1.320 0.952 1.428
553
+ 0 C -0.018 0.429 1.734
554
+ 0 C -0.103 0.094 3.201
555
+ 0 O 0.886 -0.254 3.799
556
+ 0 C -0.274 -0.831 0.907
557
+ 0 C -0.189 -0.496 -0.559
558
+ 0 C 1.022 -0.589 -1.219
559
+ 0 C -1.324 -0.102 -1.244
560
+ 0 C 1.103 -0.282 -2.563
561
+ 0 C -1.247 0.210 -2.587
562
+ 0 C -0.032 0.118 -3.252
563
+ 0 O 0.044 0.420 -4.574
564
+ 0 O -1.279 0.184 3.842
565
+ 0 H 1.977 0.225 1.669
566
+ 0 H 1.365 1.063 0.426
567
+ 0 H -0.767 1.183 1.489
568
+ 0 H 0.473 -1.585 1.152
569
+ 0 H -1.268 -1.219 1.134
570
+ 0 H 1.905 -0.902 -0.683
571
+ 0 H -2.269 -0.031 -0.727
572
+ 0 H 2.049 -0.354 -3.078
573
+ 0 H -2.132 0.523 -3.121
574
+ 0 H -0.123 -0.399 -5.059
575
+ 0 H -1.333 -0.030 4.784
576
+
577
+ Note that important atom annotations may be missing.
578
+ These can be set afterwards:
579
+
580
+ >>> molecule.res_name[:] = "TYR"
581
+ >>> molecule.atom_name[:] = create_atom_names(molecule)
582
+ >>> print(molecule)
583
+ 0 TYR N1 N 1.320 0.952 1.428
584
+ 0 TYR C1 C -0.018 0.429 1.734
585
+ 0 TYR C2 C -0.103 0.094 3.201
586
+ 0 TYR O1 O 0.886 -0.254 3.799
587
+ 0 TYR C3 C -0.274 -0.831 0.907
588
+ 0 TYR C4 C -0.189 -0.496 -0.559
589
+ 0 TYR C5 C 1.022 -0.589 -1.219
590
+ 0 TYR C6 C -1.324 -0.102 -1.244
591
+ 0 TYR C7 C 1.103 -0.282 -2.563
592
+ 0 TYR C8 C -1.247 0.210 -2.587
593
+ 0 TYR C9 C -0.032 0.118 -3.252
594
+ 0 TYR O2 O 0.044 0.420 -4.574
595
+ 0 TYR O3 O -1.279 0.184 3.842
596
+ 0 TYR H1 H 1.977 0.225 1.669
597
+ 0 TYR H2 H 1.365 1.063 0.426
598
+ 0 TYR H3 H -0.767 1.183 1.489
599
+ 0 TYR H4 H 0.473 -1.585 1.152
600
+ 0 TYR H5 H -1.268 -1.219 1.134
601
+ 0 TYR H6 H 1.905 -0.902 -0.683
602
+ 0 TYR H7 H -2.269 -0.031 -0.727
603
+ 0 TYR H8 H 2.049 -0.354 -3.078
604
+ 0 TYR H9 H -2.132 0.523 -3.121
605
+ 0 TYR H10 H -0.123 -0.399 -5.059
606
+ 0 TYR H11 H -1.333 -0.030 4.784
607
+
608
+ Create a SD file and write it to disk:
609
+
610
+ >>> another_molecule = residue("ALA")
611
+ >>> file = SDFile()
612
+ >>> record = SDRecord()
613
+ >>> record.set_structure(molecule)
614
+ >>> file["TYR"] = record
615
+ >>> record = SDRecord()
616
+ >>> record.set_structure(another_molecule)
617
+ >>> file["ALA"] = record
618
+ >>> file.write(os.path.join(path_to_directory, "some_file.cif"))
619
+ >>> print(file)
620
+ TYR
621
+ <BLANKLINE>
622
+ <BLANKLINE>
623
+ 24 24 0 0 0 0 0 0 0 1 V2000
624
+ 1.3200 0.9520 1.4280 N 0 0 0 0 0 0 0 0 0 0 0 0
625
+ -0.0180 0.4290 1.7340 C 0 0 0 0 0 0 0 0 0 0 0 0
626
+ -0.1030 0.0940 3.2010 C 0 0 0 0 0 0 0 0 0 0 0 0
627
+ 0.8860 -0.2540 3.7990 O 0 0 0 0 0 0 0 0 0 0 0 0
628
+ -0.2740 -0.8310 0.9070 C 0 0 0 0 0 0 0 0 0 0 0 0
629
+ -0.1890 -0.4960 -0.5590 C 0 0 0 0 0 0 0 0 0 0 0 0
630
+ 1.0220 -0.5890 -1.2190 C 0 0 0 0 0 0 0 0 0 0 0 0
631
+ -1.3240 -0.1020 -1.2440 C 0 0 0 0 0 0 0 0 0 0 0 0
632
+ 1.1030 -0.2820 -2.5630 C 0 0 0 0 0 0 0 0 0 0 0 0
633
+ -1.2470 0.2100 -2.5870 C 0 0 0 0 0 0 0 0 0 0 0 0
634
+ -0.0320 0.1180 -3.2520 C 0 0 0 0 0 0 0 0 0 0 0 0
635
+ 0.0440 0.4200 -4.5740 O 0 0 0 0 0 0 0 0 0 0 0 0
636
+ -1.2790 0.1840 3.8420 O 0 0 0 0 0 0 0 0 0 0 0 0
637
+ 1.9770 0.2250 1.6690 H 0 0 0 0 0 0 0 0 0 0 0 0
638
+ 1.3650 1.0630 0.4260 H 0 0 0 0 0 0 0 0 0 0 0 0
639
+ -0.7670 1.1830 1.4890 H 0 0 0 0 0 0 0 0 0 0 0 0
640
+ 0.4730 -1.5850 1.1520 H 0 0 0 0 0 0 0 0 0 0 0 0
641
+ -1.2680 -1.2190 1.1340 H 0 0 0 0 0 0 0 0 0 0 0 0
642
+ 1.9050 -0.9020 -0.6830 H 0 0 0 0 0 0 0 0 0 0 0 0
643
+ -2.2690 -0.0310 -0.7270 H 0 0 0 0 0 0 0 0 0 0 0 0
644
+ 2.0490 -0.3540 -3.0780 H 0 0 0 0 0 0 0 0 0 0 0 0
645
+ -2.1320 0.5230 -3.1210 H 0 0 0 0 0 0 0 0 0 0 0 0
646
+ -0.1230 -0.3990 -5.0590 H 0 0 0 0 0 0 0 0 0 0 0 0
647
+ -1.3330 -0.0300 4.7840 H 0 0 0 0 0 0 0 0 0 0 0 0
648
+ 1 2 1 0 0 0 0
649
+ 1 14 1 0 0 0 0
650
+ 1 15 1 0 0 0 0
651
+ 2 3 1 0 0 0 0
652
+ 2 5 1 0 0 0 0
653
+ 2 16 1 0 0 0 0
654
+ 3 4 2 0 0 0 0
655
+ 3 13 1 0 0 0 0
656
+ 5 6 1 0 0 0 0
657
+ 5 17 1 0 0 0 0
658
+ 5 18 1 0 0 0 0
659
+ 6 7 2 0 0 0 0
660
+ 6 8 1 0 0 0 0
661
+ 7 9 1 0 0 0 0
662
+ 7 19 1 0 0 0 0
663
+ 8 10 2 0 0 0 0
664
+ 8 20 1 0 0 0 0
665
+ 9 11 2 0 0 0 0
666
+ 9 21 1 0 0 0 0
667
+ 10 11 1 0 0 0 0
668
+ 10 22 1 0 0 0 0
669
+ 11 12 1 0 0 0 0
670
+ 12 23 1 0 0 0 0
671
+ 13 24 1 0 0 0 0
672
+ M END
673
+ $$$$
674
+ ALA
675
+ <BLANKLINE>
676
+ <BLANKLINE>
677
+ 13 12 0 0 0 0 0 0 0 1 V2000
678
+ -0.9700 0.4900 1.5000 N 0 0 0 0 0 0 0 0 0 0 0 0
679
+ 0.2600 0.4200 0.6900 C 0 0 0 0 0 0 0 0 0 0 0 0
680
+ -0.0900 0.0200 -0.7200 C 0 0 0 0 0 0 0 0 0 0 0 0
681
+ -1.0600 -0.6800 -0.9200 O 0 0 0 0 0 0 0 0 0 0 0 0
682
+ 1.2000 -0.6200 1.3000 C 0 0 0 0 0 0 0 0 0 0 0 0
683
+ 0.6600 0.4400 -1.7400 O 0 0 0 0 0 0 0 0 0 0 0 0
684
+ -1.3800 -0.4200 1.4800 H 0 0 0 0 0 0 0 0 0 0 0 0
685
+ -0.6800 0.6600 2.4500 H 0 0 0 0 0 0 0 0 0 0 0 0
686
+ 0.7500 1.3900 0.6800 H 0 0 0 0 0 0 0 0 0 0 0 0
687
+ 1.4600 -0.3300 2.3200 H 0 0 0 0 0 0 0 0 0 0 0 0
688
+ 0.7200 -1.5900 1.3100 H 0 0 0 0 0 0 0 0 0 0 0 0
689
+ 2.1100 -0.6800 0.7000 H 0 0 0 0 0 0 0 0 0 0 0 0
690
+ 0.4400 0.1800 -2.6500 H 0 0 0 0 0 0 0 0 0 0 0 0
691
+ 1 2 1 0 0 0 0
692
+ 1 7 1 0 0 0 0
693
+ 1 8 1 0 0 0 0
694
+ 2 3 1 0 0 0 0
695
+ 2 5 1 0 0 0 0
696
+ 2 9 1 0 0 0 0
697
+ 3 4 2 0 0 0 0
698
+ 3 6 1 0 0 0 0
699
+ 5 10 1 0 0 0 0
700
+ 5 11 1 0 0 0 0
701
+ 5 12 1 0 0 0 0
702
+ 6 13 1 0 0 0 0
703
+ M END
704
+ $$$$
705
+ <BLANKLINE>
706
+ """
707
+
708
+ def __init__(self, records=None):
709
+ self._records = {}
710
+ if records is not None:
711
+ for mol_name, record in records.items():
712
+ if isinstance(record, SDRecord):
713
+ record.header.mol_name = mol_name
714
+ self._records[mol_name] = record
715
+
716
+ @property
717
+ def lines(self):
718
+ return self.serialize().splitlines()
719
+
720
+ @property
721
+ def record(self):
722
+ if len(self) == 0:
723
+ raise ValueError("There are no records in the file")
724
+ if len(self) > 1:
725
+ raise ValueError("There are multiple records in the file")
726
+ return self[next(iter(self))]
727
+
728
+ @staticmethod
729
+ def deserialize(text):
730
+ """
731
+ Create an object by deserializing the given text content.
732
+
733
+ Parameters
734
+ ----------
735
+ content : str
736
+ The content to be deserialized.
737
+ """
738
+ lines = text.splitlines()
739
+ record_ends = np.array([
740
+ i for i, line in enumerate(lines)
741
+ if line.startswith(_RECORD_DELIMITER)
742
+ ], dtype=int)
743
+ if len(record_ends) == 0:
744
+ warnings.warn(
745
+ "Final record delimiter missing, "
746
+ "maybe this is a MOL file instead of a SD file"
747
+ )
748
+ record_ends = np.array([len(lines)-1], dtype=int)
749
+ # The first record starts at the first line and the last
750
+ # delimiter is at the end of the file
751
+ # Records in the middle start directly after the delimiter
752
+ record_starts = np.concatenate(([0], record_ends[:-1] + 1), dtype=int)
753
+ record_names = [lines[start].strip() for start in record_starts]
754
+ return SDFile({
755
+ # Do not include the delimiter
756
+ # -> stop at end (instead of end + 1)
757
+ name: _join_with_terminal_newline(lines[start : end])
758
+ for name, start, end
759
+ in zip(record_names, record_starts, record_ends)
760
+ })
761
+
762
+ def serialize(self):
763
+ """
764
+ Convert this object into text content.
765
+
766
+ Returns
767
+ -------
768
+ content : str
769
+ The serialized content.
770
+ """
771
+ text_blocks = []
772
+ for record_name, record in self._records.items():
773
+ if isinstance(record, str):
774
+ # Record is already stored as text
775
+ text_blocks.append(record)
776
+ else:
777
+ try:
778
+ text_blocks.append(record.serialize())
779
+ except:
780
+ raise SerializationError(
781
+ f"Failed to serialize record '{record_name}'"
782
+ )
783
+ text_blocks.append(_RECORD_DELIMITER + "\n")
784
+ return "".join(text_blocks)
785
+
786
+ @classmethod
787
+ def read(cls, file):
788
+ """
789
+ Read a SD file.
790
+
791
+ Parameters
792
+ ----------
793
+ file : file-like object or str
794
+ The file to be read.
795
+ Alternatively a file path can be supplied.
796
+
797
+ Returns
798
+ -------
799
+ file_object : SDFile
800
+ The parsed file.
801
+ """
802
+ # File name
803
+ if is_open_compatible(file):
804
+ with open(file, "r") as f:
805
+ text = f.read()
806
+ # File object
807
+ else:
808
+ if not is_text(file):
809
+ raise TypeError("A file opened in 'text' mode is required")
810
+ text = file.read()
811
+ return SDFile.deserialize(text)
812
+
813
+ def write(self, file):
814
+ """
815
+ Write the contents of this object into a SD file.
816
+
817
+ Parameters
818
+ ----------
819
+ file : file-like object or str
820
+ The file to be written to.
821
+ Alternatively a file path can be supplied.
822
+ """
823
+ if is_open_compatible(file):
824
+ with open(file, "w") as f:
825
+ f.write(self.serialize())
826
+ else:
827
+ if not is_text(file):
828
+ raise TypeError("A file opened in 'text' mode is required")
829
+ file.write(self.serialize())
830
+
831
+ def __getitem__(self, key):
832
+ record = self._records[key]
833
+ if isinstance(record, str):
834
+ # Element is stored in serialized form
835
+ # -> must be deserialized first
836
+ try:
837
+ record = SDRecord.deserialize(record)
838
+ except:
839
+ raise DeserializationError(
840
+ f"Failed to deserialize record '{key}'"
841
+ )
842
+ # Update with deserialized object
843
+ self._records[key] = record
844
+ return record
845
+
846
+ def __setitem__(self, key, record):
847
+ if not isinstance(record, SDRecord):
848
+ raise TypeError(
849
+ f"Expected 'SDRecord', but got '{type(record).__name__}'"
850
+ )
851
+ # The molecule name in the header is unique across the file
852
+ record.header.mol_name = key
853
+ self._records[key] = record
854
+
855
+ def __delitem__(self, key):
856
+ del self._records[key]
857
+
858
+ def __iter__(self):
859
+ return iter(self._records)
860
+
861
+ def __len__(self):
862
+ return len(self._records)
863
+
864
+ def __eq__(self, other):
865
+ if not isinstance(other, type(self)):
866
+ return False
867
+ if set(self.keys()) != set(other.keys()):
868
+ return False
869
+ for record_name in self.keys():
870
+ if self[record_name] != other[record_name]:
871
+ return False
872
+ return True
873
+
874
+ def __str__(self):
875
+ return self.serialize()
876
+
877
+
878
+ def _join_with_terminal_newline(text_blocks):
879
+ if len(text_blocks) == 0:
880
+ return ""
881
+ else:
882
+ return "\n".join(text_blocks) + "\n"
883
+
884
+
885
+ def _empty_ctab():
886
+ empty_atoms = AtomArray(0)
887
+ empty_atoms.bonds = BondList(0)
888
+ return _join_with_terminal_newline(write_structure_to_ctab(empty_atoms))
889
+
890
+
891
+ def _to_metadata_key(key):
892
+ if isinstance(key, Metadata.Key):
893
+ return key
894
+ elif isinstance(key, str):
895
+ return Metadata.Key(name=key)
896
+ else:
897
+ raise TypeError(
898
+ "Expected 'Metadata.Key' or str, "
899
+ f"but got '{type(key).__name__}'"
900
+ )
901
+
902
+
903
+ def _add_key_value_pair(metadata, key, value):
904
+ if key is not None:
905
+ if value is None:
906
+ raise DeserializationError(
907
+ f"No value found for metadata key {key}"
908
+ )
909
+ metadata[key] = value
910
+
911
+
912
+ def _get_ctab_stop(lines):
913
+ for i in range(_N_HEADER, len(lines)):
914
+ if lines[i].startswith("M END"):
915
+ return i+1
916
+ return len(lines)