biotite 1.5.0__cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +216 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +197 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +60 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +228 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +258 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +161 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +126 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1228 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +490 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +702 -0
  73. biotite/sequence/align/banded.cpython-313-x86_64-linux-gnu.so +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cpython-313-x86_64-linux-gnu.so +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cpython-313-x86_64-linux-gnu.so +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cpython-313-x86_64-linux-gnu.so +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cpython-313-x86_64-linux-gnu.so +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cpython-313-x86_64-linux-gnu.so +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cpython-313-x86_64-linux-gnu.so +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cpython-313-x86_64-linux-gnu.so +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cpython-313-x86_64-linux-gnu.so +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cpython-313-x86_64-linux-gnu.so +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cpython-313-x86_64-linux-gnu.so +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cpython-313-x86_64-linux-gnu.so +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +283 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cpython-313-x86_64-linux-gnu.so +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cpython-313-x86_64-linux-gnu.so +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cpython-313-x86_64-linux-gnu.so +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1562 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cpython-313-x86_64-linux-gnu.so +0 -0
  266. biotite/structure/bonds.pyx +2036 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cpython-313-x86_64-linux-gnu.so +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +310 -0
  271. biotite/structure/charges.cpython-313-x86_64-linux-gnu.so +0 -0
  272. biotite/structure/charges.pyx +520 -0
  273. biotite/structure/compare.py +683 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +591 -0
  278. biotite/structure/geometry.py +817 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +425 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +98 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +389 -0
  311. biotite/structure/io/pdb/file.py +1380 -0
  312. biotite/structure/io/pdb/hybrid36.cpython-313-x86_64-linux-gnu.so +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +674 -0
  319. biotite/structure/io/pdbx/cif.py +1091 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +362 -0
  322. biotite/structure/io/pdbx/convert.py +2113 -0
  323. biotite/structure/io/pdbx/encoding.cpython-313-x86_64-linux-gnu.so +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1078 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +716 -0
  337. biotite/structure/rings.py +451 -0
  338. biotite/structure/sasa.cpython-313-x86_64-linux-gnu.so +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +328 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +160 -0
  349. biotite/version.py +34 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.5.0.dist-info/METADATA +162 -0
  352. biotite-1.5.0.dist-info/RECORD +354 -0
  353. biotite-1.5.0.dist-info/WHEEL +6 -0
  354. biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,940 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.structure.io.mol"
6
+ __author__ = "Patrick Kunzmann, Benjamin Mayer"
7
+ __all__ = ["SDFile", "SDRecord", "Metadata"]
8
+
9
+ import re
10
+ import warnings
11
+ from collections.abc import Mapping, MutableMapping
12
+ from dataclasses import dataclass
13
+ import numpy as np
14
+ from biotite.file import (
15
+ DeserializationError,
16
+ File,
17
+ InvalidFileError,
18
+ SerializationError,
19
+ is_open_compatible,
20
+ is_text,
21
+ )
22
+ from biotite.structure.atoms import AtomArray
23
+ from biotite.structure.bonds import BondList, BondType
24
+ from biotite.structure.io.mol.ctab import (
25
+ read_structure_from_ctab,
26
+ write_structure_to_ctab,
27
+ )
28
+ from biotite.structure.io.mol.header import Header
29
+
30
+ _N_HEADER = 3
31
+ # Number of header lines
32
+ _RECORD_DELIMITER = "$$$$"
33
+
34
+
35
+ class Metadata(MutableMapping):
36
+ r"""
37
+ Additional nonstructural data in an SD record.
38
+
39
+ The metadata is stored as key-value pairs.
40
+ As SDF allows multiple different identifiers for keys,
41
+ the keys are represented by :class:`Metadata.Key`.
42
+
43
+ Parameters
44
+ ----------
45
+ metadata : dict, optional
46
+ The metadata as key-value pairs.
47
+ Keys are instances of :class:`Metadata.Key`.
48
+ Alternatively, keys can be given as strings, in which case the
49
+ string is used as the :attr:`Metadata.Key.name`.
50
+ Values are strings.
51
+ Line breaks in values are allowed.
52
+
53
+ Notes
54
+ -----
55
+ Key names may only contain alphanumeric characters, underscores and
56
+ periods.
57
+
58
+ Examples
59
+ --------
60
+
61
+ >>> metadata = Metadata({
62
+ ... "foo": "Lorem ipsum",
63
+ ... Metadata.Key(number=42, name="bar"): "dolor sit amet,\nconsectetur"
64
+ ... })
65
+ >>> print(metadata)
66
+ > <foo>
67
+ Lorem ipsum
68
+ <BLANKLINE>
69
+ > DT42 <bar>
70
+ dolor sit amet,
71
+ consectetur
72
+ <BLANKLINE>
73
+ >>> print(metadata["foo"])
74
+ Lorem ipsum
75
+ >>> # Strings can be only used for access, if the key contains only a name
76
+ >>> print("bar" in metadata)
77
+ False
78
+ >>> print(metadata[Metadata.Key(number=42, name="bar")])
79
+ dolor sit amet,
80
+ consectetur
81
+ """
82
+
83
+ @dataclass(frozen=True, kw_only=True)
84
+ class Key:
85
+ """
86
+ A metadata key.
87
+
88
+ Parameters
89
+ ----------
90
+ number : int, optional
91
+ number of the field in the database.
92
+ name : str, optional
93
+ Name of the field.
94
+ May only contain alphanumeric characters, underscores and
95
+ periods.
96
+ registry_internal : int, optional
97
+ Internal registry number.
98
+ registry_external : str, optional
99
+ External registry number.
100
+
101
+ Attributes
102
+ ----------
103
+ number, name, registry_internal, registry_external
104
+ The same as the parameters.
105
+ """
106
+
107
+ # The characters that can be given as input to `name`
108
+ # First character must be alphanumeric,
109
+ # following characters may include underscores and periods
110
+ # Although the V3000 specification forbids the use of periods,
111
+ # they are still used in practice and therefore allowed here
112
+ _NAME_INPUT_REGEX = re.compile(r"^[a-zA-Z0-9][\w.]*$")
113
+ # These regexes are used to parse the key from a line
114
+ _COMPONENT_REGEX = {
115
+ "number": re.compile(r"^DT(\d+)$"),
116
+ "name": re.compile(r"^<([a-zA-Z0-9][\w.]*)>$"),
117
+ "registry_internal": re.compile(r"^(\d+)$"),
118
+ "registry_external": re.compile(r"^\(([\w.-]*)\)$"),
119
+ }
120
+
121
+ number: ... = None
122
+ name: ... = None
123
+ registry_internal: ... = None
124
+ registry_external: ... = None
125
+
126
+ def __post_init__(self):
127
+ if self.name is None and self.number is None:
128
+ raise ValueError("At least the field number or name must be set")
129
+ if self.name is not None:
130
+ if not Metadata.Key._NAME_INPUT_REGEX.match(self.name):
131
+ raise ValueError(
132
+ f"Invalid name '{self.name}', must only contains "
133
+ "alphanumeric characters, underscores and periods"
134
+ )
135
+ if self.number is not None:
136
+ # Cannot set field directly as 'frozen=True'
137
+ object.__setattr__(self, "number", int(self.number))
138
+ if self.registry_internal is not None:
139
+ object.__setattr__(
140
+ self, "registry_internal", int(self.registry_internal)
141
+ )
142
+
143
+ @staticmethod
144
+ def deserialize(text):
145
+ """
146
+ Create a :class:`Metadata.Key` object by deserializing the given text
147
+ content.
148
+
149
+ Parameters
150
+ ----------
151
+ text : str
152
+ The content to be deserialized.
153
+
154
+ Returns
155
+ -------
156
+ key : Metadata.Key
157
+ The parsed key.
158
+ """
159
+ # Omit the leading '>'
160
+ key_components = text[1:].split()
161
+ parsed_component_dict = {}
162
+ for component in key_components:
163
+ # For each component in each the key,
164
+ # try to match it with each of the regexes
165
+ for attr_name, regex in Metadata.Key._COMPONENT_REGEX.items():
166
+ pattern_match = regex.match(component)
167
+ if pattern_match is None:
168
+ # Try next pattern
169
+ continue
170
+ if attr_name in parsed_component_dict:
171
+ raise DeserializationError(
172
+ f"Duplicate key component for '{attr_name}'"
173
+ )
174
+ value = pattern_match.group(1)
175
+ parsed_component_dict[attr_name] = value
176
+ break
177
+ else:
178
+ # There is no matching pattern
179
+ raise DeserializationError(f"Invalid key component '{component}'")
180
+ return Metadata.Key(**parsed_component_dict)
181
+
182
+ def serialize(self):
183
+ """
184
+ Convert this object into text content.
185
+
186
+ Returns
187
+ -------
188
+ content : str
189
+ The serialized content.
190
+ """
191
+ key_string = "> "
192
+ if self.number is not None:
193
+ key_string += f"DT{self.number} "
194
+ if self.name is not None:
195
+ key_string += f"<{self.name}> "
196
+ if self.registry_internal is not None:
197
+ key_string += f"{self.registry_internal} "
198
+ if self.registry_external is not None:
199
+ key_string += f"({self.registry_external}) "
200
+ return key_string
201
+
202
+ def __str__(self):
203
+ return self.serialize()
204
+
205
+ def __init__(self, metadata=None):
206
+ if metadata is None:
207
+ metadata = {}
208
+ self._metadata = {}
209
+ for key, value in metadata.items():
210
+ self._metadata[_to_metadata_key(key)] = value
211
+
212
+ @staticmethod
213
+ def deserialize(text):
214
+ """
215
+ Create a :class:`Metadata` objtect by deserializing the given text content.
216
+
217
+ Parameters
218
+ ----------
219
+ text : str
220
+ The content to be deserialized.
221
+
222
+ Returns
223
+ -------
224
+ metadata : Metadata
225
+ The parsed metadata.
226
+ """
227
+ metadata = {}
228
+ current_key = None
229
+ current_value = None
230
+ for line in text.splitlines():
231
+ line = line.strip()
232
+ if len(line) == 0:
233
+ # Skip empty lines
234
+ continue
235
+ if line.startswith(">"):
236
+ _add_key_value_pair(metadata, current_key, current_value)
237
+ current_key = Metadata.Key.deserialize(line)
238
+ current_value = None
239
+ else:
240
+ if current_key is None:
241
+ raise DeserializationError("Value found before metadata key")
242
+ if current_value is None:
243
+ current_value = line
244
+ else:
245
+ current_value += "\n" + line
246
+ # Add final pair
247
+ _add_key_value_pair(metadata, current_key, current_value)
248
+ return Metadata(metadata)
249
+
250
+ def serialize(self):
251
+ """
252
+ Convert this object into text content.
253
+
254
+ Returns
255
+ -------
256
+ content : str
257
+ The serialized content.
258
+ """
259
+ text_blocks = []
260
+ for key, value in self._metadata.items():
261
+ text_blocks.append(key.serialize())
262
+ # Add empty line after value
263
+ text_blocks.append(value + "\n")
264
+ return _join_with_terminal_newline(text_blocks)
265
+
266
+ def __getitem__(self, key):
267
+ return self._metadata[_to_metadata_key(key)]
268
+
269
+ def __setitem__(self, key, value):
270
+ if len(value) == 0:
271
+ raise ValueError("Metadata value must not be empty")
272
+ self._metadata[_to_metadata_key(key)] = value
273
+
274
+ def __delitem__(self, key):
275
+ del self._metadata[_to_metadata_key(key)]
276
+
277
+ def __iter__(self):
278
+ return iter(self._metadata)
279
+
280
+ def __len__(self):
281
+ return len(self._metadata)
282
+
283
+ def __eq__(self, other):
284
+ if not isinstance(other, type(self)):
285
+ return False
286
+ if set(self.keys()) != set(other.keys()):
287
+ return False
288
+ for key in self.keys():
289
+ if self[key] != other[key]:
290
+ return False
291
+ return True
292
+
293
+ def __str__(self):
294
+ return self.serialize()
295
+
296
+
297
+ class SDRecord:
298
+ """
299
+ A record in a SD file.
300
+
301
+ Parameters
302
+ ----------
303
+ header : Header, optional
304
+ The header of the record.
305
+ By default, an empty header is created.
306
+ ctab : str, optional
307
+ The connection table (atoms and bonds) in the record.
308
+ By default, an empty structure is created.
309
+ metadata : Metadata, Mapping or str, optional
310
+ The metadata of the record.
311
+ Can be given as dictionary mapping :attr:`Metadata.Key.name`
312
+ to the respective metadata value.
313
+ By default, no metadata is appended to the record.
314
+
315
+ Attributes
316
+ ----------
317
+ header, ctab, metadata
318
+ The same as the parameters.
319
+
320
+ Examples
321
+ --------
322
+
323
+ >>> atoms = residue("ALA")
324
+ >>> record = SDRecord(header=Header(mol_name="ALA", dimensions="3D"))
325
+ >>> record.set_structure(atoms)
326
+ >>> print(record.get_structure())
327
+ 0 N -0.966 0.493 1.500
328
+ 0 C 0.257 0.418 0.692
329
+ 0 C -0.094 0.017 -0.716
330
+ 0 O -1.056 -0.682 -0.923
331
+ 0 C 1.204 -0.620 1.296
332
+ 0 O 0.661 0.439 -1.742
333
+ 0 H -1.383 -0.425 1.482
334
+ 0 H -0.676 0.661 2.452
335
+ 0 H 0.746 1.392 0.682
336
+ 0 H 1.459 -0.330 2.316
337
+ 0 H 0.715 -1.594 1.307
338
+ 0 H 2.113 -0.676 0.697
339
+ 0 H 0.435 0.182 -2.647
340
+ >>> # Add the record to an SD file
341
+ >>> file = SDFile()
342
+ >>> file["ALA"] = record
343
+ >>> print(file)
344
+ ALA
345
+ 3D
346
+ <BLANKLINE>
347
+ 13 12 0 0 0 0 0 0 0 1 V2000
348
+ -0.9660 0.4930 1.5000 N 0 0 0 0 0 0 0 0 0 0 0 0
349
+ 0.2570 0.4180 0.6920 C 0 0 0 0 0 0 0 0 0 0 0 0
350
+ -0.0940 0.0170 -0.7160 C 0 0 0 0 0 0 0 0 0 0 0 0
351
+ -1.0560 -0.6820 -0.9230 O 0 0 0 0 0 0 0 0 0 0 0 0
352
+ 1.2040 -0.6200 1.2960 C 0 0 0 0 0 0 0 0 0 0 0 0
353
+ 0.6610 0.4390 -1.7420 O 0 0 0 0 0 0 0 0 0 0 0 0
354
+ -1.3830 -0.4250 1.4820 H 0 0 0 0 0 0 0 0 0 0 0 0
355
+ -0.6760 0.6610 2.4520 H 0 0 0 0 0 0 0 0 0 0 0 0
356
+ 0.7460 1.3920 0.6820 H 0 0 0 0 0 0 0 0 0 0 0 0
357
+ 1.4590 -0.3300 2.3160 H 0 0 0 0 0 0 0 0 0 0 0 0
358
+ 0.7150 -1.5940 1.3070 H 0 0 0 0 0 0 0 0 0 0 0 0
359
+ 2.1130 -0.6760 0.6970 H 0 0 0 0 0 0 0 0 0 0 0 0
360
+ 0.4350 0.1820 -2.6470 H 0 0 0 0 0 0 0 0 0 0 0 0
361
+ 1 2 1 0 0 0 0
362
+ 1 7 1 0 0 0 0
363
+ 1 8 1 0 0 0 0
364
+ 2 3 1 0 0 0 0
365
+ 2 5 1 0 0 0 0
366
+ 2 9 1 0 0 0 0
367
+ 3 4 2 0 0 0 0
368
+ 3 6 1 0 0 0 0
369
+ 5 10 1 0 0 0 0
370
+ 5 11 1 0 0 0 0
371
+ 5 12 1 0 0 0 0
372
+ 6 13 1 0 0 0 0
373
+ M END
374
+ $$$$
375
+ <BLANKLINE>
376
+ """
377
+
378
+ def __init__(self, header=None, ctab=None, metadata=None):
379
+ if header is None:
380
+ self._header = Header()
381
+ else:
382
+ self._header = header
383
+
384
+ self._ctab = ctab
385
+
386
+ if metadata is None:
387
+ self._metadata = Metadata()
388
+ elif isinstance(metadata, Metadata):
389
+ self._metadata = metadata
390
+ elif isinstance(metadata, Mapping):
391
+ self._metadata = Metadata(metadata)
392
+ elif isinstance(metadata, str):
393
+ # Serialized form -> will be lazily deserialized
394
+ self._metadata = metadata
395
+ else:
396
+ raise TypeError(
397
+ "Expected 'Metadata', Mapping or str, "
398
+ f"but got '{type(metadata).__name__}'"
399
+ )
400
+
401
+ @property
402
+ def header(self):
403
+ if isinstance(self._header, str):
404
+ try:
405
+ self._header = Header.deserialize(self._header)
406
+ except Exception:
407
+ raise DeserializationError("Failed to deserialize header")
408
+ return self._header
409
+
410
+ @header.setter
411
+ def header(self, header):
412
+ self._header = header
413
+
414
+ @property
415
+ def ctab(self):
416
+ # CTAB string cannot be changed directly -> no setter
417
+ return self._ctab
418
+
419
+ @property
420
+ def metadata(self):
421
+ if isinstance(self._metadata, str):
422
+ try:
423
+ self._metadata = Metadata.deserialize(self._metadata)
424
+ except Exception:
425
+ raise DeserializationError("Failed to deserialize metadata")
426
+ return self._metadata
427
+
428
+ @metadata.setter
429
+ def metadata(self, metadata):
430
+ if isinstance(metadata, Metadata):
431
+ self._metadata = metadata
432
+ elif isinstance(metadata, Mapping):
433
+ self._metadata = Metadata(metadata)
434
+ else:
435
+ raise TypeError(
436
+ f"Expected 'Metadata' or Mapping, but got '{type(metadata).__name__}'"
437
+ )
438
+
439
+ @staticmethod
440
+ def deserialize(text):
441
+ """
442
+ Create an :class:`SDRecord` by deserializing the given text content.
443
+
444
+ Parameters
445
+ ----------
446
+ text : str
447
+ The content to be deserialized.
448
+
449
+ Returns
450
+ -------
451
+ record : SDRecord
452
+ The parsed record.
453
+ """
454
+ lines = text.splitlines()
455
+ ctab_end = _get_ctab_stop(lines)
456
+
457
+ header = _join_with_terminal_newline(lines[:_N_HEADER])
458
+ ctab = _join_with_terminal_newline(lines[_N_HEADER:ctab_end])
459
+ metadata = _join_with_terminal_newline(lines[ctab_end:])
460
+ return SDRecord(header, ctab, metadata)
461
+
462
+ def serialize(self):
463
+ """
464
+ Convert this object into text content.
465
+
466
+ Returns
467
+ -------
468
+ content : str
469
+ The serialized content.
470
+ """
471
+ if isinstance(self._header, str):
472
+ header_string = self._header
473
+ else:
474
+ header_string = self._header.serialize()
475
+
476
+ if self._ctab is None:
477
+ ctab_string = _empty_ctab()
478
+ else:
479
+ ctab_string = self._ctab
480
+
481
+ if isinstance(self._metadata, str):
482
+ metadata_string = self._metadata
483
+ else:
484
+ metadata_string = self._metadata.serialize()
485
+
486
+ return header_string + ctab_string + metadata_string
487
+
488
+ def get_structure(self):
489
+ """
490
+ Parse the structural data in the SD record.
491
+
492
+ Returns
493
+ -------
494
+ array : AtomArray
495
+ This :class:`AtomArray` contains the optional ``charge``
496
+ annotation and has an associated :class:`BondList`.
497
+ All other annotation categories, except ``element`` are
498
+ empty.
499
+ """
500
+ ctab_lines = self._ctab.splitlines()
501
+ if len(ctab_lines) == 0:
502
+ raise InvalidFileError("File does not contain structure data")
503
+ return read_structure_from_ctab(ctab_lines)
504
+
505
+ def set_structure(self, atoms, default_bond_type=BondType.ANY, version=None):
506
+ """
507
+ Set the structural data in the SD record.
508
+
509
+ Parameters
510
+ ----------
511
+ atoms : AtomArray
512
+ The array to be saved into this file.
513
+ Must have an associated :class:`BondList`.
514
+ default_bond_type : BondType, optional
515
+ Bond type fallback for the *Bond block*, if a
516
+ :class:`BondType` has no CTAB counterpart.
517
+ By default, each such bond is treated as
518
+ :attr:`BondType.ANY`.
519
+ version : {"V2000", "V3000"}, optional
520
+ The version of the CTAB format.
521
+ ``"V2000"`` uses the *Atom* and *Bond* block, while
522
+ ``"V3000"`` uses the *Properties* block.
523
+ By default, ``"V2000"`` is used, unless the number of atoms
524
+ or bonds exceeds 999, in which case ``"V3000"`` is used.
525
+ """
526
+ self._ctab = _join_with_terminal_newline(
527
+ write_structure_to_ctab(atoms, default_bond_type, version)
528
+ )
529
+
530
+ def __eq__(self, other):
531
+ if not isinstance(other, type(self)):
532
+ return False
533
+ if not self.header == other.header:
534
+ return False
535
+ if not self.ctab == other.ctab:
536
+ return False
537
+ if not self.metadata == other.metadata:
538
+ return False
539
+ return True
540
+
541
+ def __str__(self):
542
+ return self.serialize()
543
+
544
+
545
+ class SDFile(File, MutableMapping):
546
+ """
547
+ This class represents an SD file for storing small molecule
548
+ structures.
549
+
550
+ The records for each molecule in the file can be accessed and
551
+ modified like a dictionary.
552
+ The structures can be parsed and written from/to each
553
+ :class:`SDRecord` object via :func:`get_structure()` or
554
+ :func:`set_structure()`, respectively.
555
+
556
+ Parameters
557
+ ----------
558
+ records : dict (str -> SDRecord), optional
559
+ The initial records of the file.
560
+ Maps the record names to the corresponding :class:`SDRecord` objects.
561
+ By default no initial records are added.
562
+
563
+ Attributes
564
+ ----------
565
+ record : CIFBlock
566
+ The sole record of the file.
567
+ If the file contains multiple records, an exception is raised.
568
+
569
+ Examples
570
+ --------
571
+ Read a SD file and parse the molecular structure:
572
+
573
+ >>> import os.path
574
+ >>> file = SDFile.read(os.path.join(path_to_structures, "molecules", "TYR.sdf"))
575
+ >>> molecule = file.record.get_structure()
576
+ >>> print(molecule)
577
+ 0 N 1.320 0.952 1.428
578
+ 0 C -0.018 0.429 1.734
579
+ 0 C -0.103 0.094 3.201
580
+ 0 O 0.886 -0.254 3.799
581
+ 0 C -0.274 -0.831 0.907
582
+ 0 C -0.189 -0.496 -0.559
583
+ 0 C 1.022 -0.589 -1.219
584
+ 0 C -1.324 -0.102 -1.244
585
+ 0 C 1.103 -0.282 -2.563
586
+ 0 C -1.247 0.210 -2.587
587
+ 0 C -0.032 0.118 -3.252
588
+ 0 O 0.044 0.420 -4.574
589
+ 0 O -1.279 0.184 3.842
590
+ 0 H 1.977 0.225 1.669
591
+ 0 H 1.365 1.063 0.426
592
+ 0 H -0.767 1.183 1.489
593
+ 0 H 0.473 -1.585 1.152
594
+ 0 H -1.268 -1.219 1.134
595
+ 0 H 1.905 -0.902 -0.683
596
+ 0 H -2.269 -0.031 -0.727
597
+ 0 H 2.049 -0.354 -3.078
598
+ 0 H -2.132 0.523 -3.121
599
+ 0 H -0.123 -0.399 -5.059
600
+ 0 H -1.333 -0.030 4.784
601
+
602
+ Note that important atom annotations may be missing.
603
+ These can be set afterwards:
604
+
605
+ >>> molecule.res_name[:] = "TYR"
606
+ >>> molecule.atom_name[:] = create_atom_names(molecule)
607
+ >>> print(molecule)
608
+ 0 TYR N1 N 1.320 0.952 1.428
609
+ 0 TYR C1 C -0.018 0.429 1.734
610
+ 0 TYR C2 C -0.103 0.094 3.201
611
+ 0 TYR O1 O 0.886 -0.254 3.799
612
+ 0 TYR C3 C -0.274 -0.831 0.907
613
+ 0 TYR C4 C -0.189 -0.496 -0.559
614
+ 0 TYR C5 C 1.022 -0.589 -1.219
615
+ 0 TYR C6 C -1.324 -0.102 -1.244
616
+ 0 TYR C7 C 1.103 -0.282 -2.563
617
+ 0 TYR C8 C -1.247 0.210 -2.587
618
+ 0 TYR C9 C -0.032 0.118 -3.252
619
+ 0 TYR O2 O 0.044 0.420 -4.574
620
+ 0 TYR O3 O -1.279 0.184 3.842
621
+ 0 TYR H1 H 1.977 0.225 1.669
622
+ 0 TYR H2 H 1.365 1.063 0.426
623
+ 0 TYR H3 H -0.767 1.183 1.489
624
+ 0 TYR H4 H 0.473 -1.585 1.152
625
+ 0 TYR H5 H -1.268 -1.219 1.134
626
+ 0 TYR H6 H 1.905 -0.902 -0.683
627
+ 0 TYR H7 H -2.269 -0.031 -0.727
628
+ 0 TYR H8 H 2.049 -0.354 -3.078
629
+ 0 TYR H9 H -2.132 0.523 -3.121
630
+ 0 TYR H10 H -0.123 -0.399 -5.059
631
+ 0 TYR H11 H -1.333 -0.030 4.784
632
+
633
+ Create a SD file and write it to disk:
634
+
635
+ >>> another_molecule = residue("ALA")
636
+ >>> file = SDFile()
637
+ >>> record = SDRecord()
638
+ >>> record.set_structure(molecule)
639
+ >>> file["TYR"] = record
640
+ >>> record = SDRecord()
641
+ >>> record.set_structure(another_molecule)
642
+ >>> file["ALA"] = record
643
+ >>> file.write(os.path.join(path_to_directory, "some_file.cif"))
644
+ >>> print(file)
645
+ TYR
646
+ <BLANKLINE>
647
+ <BLANKLINE>
648
+ 24 24 0 0 0 0 0 0 0 1 V2000
649
+ 1.3200 0.9520 1.4280 N 0 0 0 0 0 0 0 0 0 0 0 0
650
+ -0.0180 0.4290 1.7340 C 0 0 0 0 0 0 0 0 0 0 0 0
651
+ -0.1030 0.0940 3.2010 C 0 0 0 0 0 0 0 0 0 0 0 0
652
+ 0.8860 -0.2540 3.7990 O 0 0 0 0 0 0 0 0 0 0 0 0
653
+ -0.2740 -0.8310 0.9070 C 0 0 0 0 0 0 0 0 0 0 0 0
654
+ -0.1890 -0.4960 -0.5590 C 0 0 0 0 0 0 0 0 0 0 0 0
655
+ 1.0220 -0.5890 -1.2190 C 0 0 0 0 0 0 0 0 0 0 0 0
656
+ -1.3240 -0.1020 -1.2440 C 0 0 0 0 0 0 0 0 0 0 0 0
657
+ 1.1030 -0.2820 -2.5630 C 0 0 0 0 0 0 0 0 0 0 0 0
658
+ -1.2470 0.2100 -2.5870 C 0 0 0 0 0 0 0 0 0 0 0 0
659
+ -0.0320 0.1180 -3.2520 C 0 0 0 0 0 0 0 0 0 0 0 0
660
+ 0.0440 0.4200 -4.5740 O 0 0 0 0 0 0 0 0 0 0 0 0
661
+ -1.2790 0.1840 3.8420 O 0 0 0 0 0 0 0 0 0 0 0 0
662
+ 1.9770 0.2250 1.6690 H 0 0 0 0 0 0 0 0 0 0 0 0
663
+ 1.3650 1.0630 0.4260 H 0 0 0 0 0 0 0 0 0 0 0 0
664
+ -0.7670 1.1830 1.4890 H 0 0 0 0 0 0 0 0 0 0 0 0
665
+ 0.4730 -1.5850 1.1520 H 0 0 0 0 0 0 0 0 0 0 0 0
666
+ -1.2680 -1.2190 1.1340 H 0 0 0 0 0 0 0 0 0 0 0 0
667
+ 1.9050 -0.9020 -0.6830 H 0 0 0 0 0 0 0 0 0 0 0 0
668
+ -2.2690 -0.0310 -0.7270 H 0 0 0 0 0 0 0 0 0 0 0 0
669
+ 2.0490 -0.3540 -3.0780 H 0 0 0 0 0 0 0 0 0 0 0 0
670
+ -2.1320 0.5230 -3.1210 H 0 0 0 0 0 0 0 0 0 0 0 0
671
+ -0.1230 -0.3990 -5.0590 H 0 0 0 0 0 0 0 0 0 0 0 0
672
+ -1.3330 -0.0300 4.7840 H 0 0 0 0 0 0 0 0 0 0 0 0
673
+ 1 2 1 0 0 0 0
674
+ 1 14 1 0 0 0 0
675
+ 1 15 1 0 0 0 0
676
+ 2 3 1 0 0 0 0
677
+ 2 5 1 0 0 0 0
678
+ 2 16 1 0 0 0 0
679
+ 3 4 2 0 0 0 0
680
+ 3 13 1 0 0 0 0
681
+ 5 6 1 0 0 0 0
682
+ 5 17 1 0 0 0 0
683
+ 5 18 1 0 0 0 0
684
+ 6 7 2 0 0 0 0
685
+ 6 8 1 0 0 0 0
686
+ 7 9 1 0 0 0 0
687
+ 7 19 1 0 0 0 0
688
+ 8 10 2 0 0 0 0
689
+ 8 20 1 0 0 0 0
690
+ 9 11 2 0 0 0 0
691
+ 9 21 1 0 0 0 0
692
+ 10 11 1 0 0 0 0
693
+ 10 22 1 0 0 0 0
694
+ 11 12 1 0 0 0 0
695
+ 12 23 1 0 0 0 0
696
+ 13 24 1 0 0 0 0
697
+ M END
698
+ $$$$
699
+ ALA
700
+ <BLANKLINE>
701
+ <BLANKLINE>
702
+ 13 12 0 0 0 0 0 0 0 1 V2000
703
+ -0.9660 0.4930 1.5000 N 0 0 0 0 0 0 0 0 0 0 0 0
704
+ 0.2570 0.4180 0.6920 C 0 0 0 0 0 0 0 0 0 0 0 0
705
+ -0.0940 0.0170 -0.7160 C 0 0 0 0 0 0 0 0 0 0 0 0
706
+ -1.0560 -0.6820 -0.9230 O 0 0 0 0 0 0 0 0 0 0 0 0
707
+ 1.2040 -0.6200 1.2960 C 0 0 0 0 0 0 0 0 0 0 0 0
708
+ 0.6610 0.4390 -1.7420 O 0 0 0 0 0 0 0 0 0 0 0 0
709
+ -1.3830 -0.4250 1.4820 H 0 0 0 0 0 0 0 0 0 0 0 0
710
+ -0.6760 0.6610 2.4520 H 0 0 0 0 0 0 0 0 0 0 0 0
711
+ 0.7460 1.3920 0.6820 H 0 0 0 0 0 0 0 0 0 0 0 0
712
+ 1.4590 -0.3300 2.3160 H 0 0 0 0 0 0 0 0 0 0 0 0
713
+ 0.7150 -1.5940 1.3070 H 0 0 0 0 0 0 0 0 0 0 0 0
714
+ 2.1130 -0.6760 0.6970 H 0 0 0 0 0 0 0 0 0 0 0 0
715
+ 0.4350 0.1820 -2.6470 H 0 0 0 0 0 0 0 0 0 0 0 0
716
+ 1 2 1 0 0 0 0
717
+ 1 7 1 0 0 0 0
718
+ 1 8 1 0 0 0 0
719
+ 2 3 1 0 0 0 0
720
+ 2 5 1 0 0 0 0
721
+ 2 9 1 0 0 0 0
722
+ 3 4 2 0 0 0 0
723
+ 3 6 1 0 0 0 0
724
+ 5 10 1 0 0 0 0
725
+ 5 11 1 0 0 0 0
726
+ 5 12 1 0 0 0 0
727
+ 6 13 1 0 0 0 0
728
+ M END
729
+ $$$$
730
+ <BLANKLINE>
731
+ """
732
+
733
+ def __init__(self, records=None):
734
+ self._records = {}
735
+ if records is not None:
736
+ for mol_name, record in records.items():
737
+ if isinstance(record, SDRecord):
738
+ record.header.mol_name = mol_name
739
+ self._records[mol_name] = record
740
+
741
+ @property
742
+ def lines(self):
743
+ return self.serialize().splitlines()
744
+
745
+ @property
746
+ def record(self):
747
+ if len(self) == 0:
748
+ raise ValueError("There are no records in the file")
749
+ if len(self) > 1:
750
+ raise ValueError("There are multiple records in the file")
751
+ return self[next(iter(self))]
752
+
753
+ @staticmethod
754
+ def deserialize(text):
755
+ """
756
+ Create an :class:`SDFile` by deserializing the given text content.
757
+
758
+ Parameters
759
+ ----------
760
+ text : str
761
+ The content to be deserialized.
762
+
763
+ Returns
764
+ -------
765
+ file_object : SDFile
766
+ The parsed file.
767
+ """
768
+ lines = text.splitlines()
769
+ record_ends = np.array(
770
+ [i for i, line in enumerate(lines) if line.startswith(_RECORD_DELIMITER)],
771
+ dtype=int,
772
+ )
773
+ if len(record_ends) == 0:
774
+ warnings.warn(
775
+ "Final record delimiter missing, "
776
+ "maybe this is a MOL file instead of a SD file"
777
+ )
778
+ record_ends = np.array([len(lines) - 1], dtype=int)
779
+ # The first record starts at the first line and the last
780
+ # delimiter is at the end of the file
781
+ # Records in the middle start directly after the delimiter
782
+ record_starts = np.concatenate(([0], record_ends[:-1] + 1), dtype=int)
783
+ record_names = [lines[start].strip() for start in record_starts]
784
+ return SDFile(
785
+ {
786
+ # Do not include the delimiter
787
+ # -> stop at end (instead of end + 1)
788
+ name: _join_with_terminal_newline(lines[start:end])
789
+ for name, start, end in zip(record_names, record_starts, record_ends)
790
+ }
791
+ )
792
+
793
+ def serialize(self):
794
+ """
795
+ Convert this object into text content.
796
+
797
+ Returns
798
+ -------
799
+ content : str
800
+ The serialized content.
801
+ """
802
+ text_blocks = []
803
+ for record_name, record in self._records.items():
804
+ if isinstance(record, str):
805
+ # Record is already stored as text
806
+ text_blocks.append(record)
807
+ else:
808
+ try:
809
+ text_blocks.append(record.serialize())
810
+ except Exception:
811
+ raise SerializationError(
812
+ f"Failed to serialize record '{record_name}'"
813
+ )
814
+ text_blocks.append(_RECORD_DELIMITER + "\n")
815
+ return "".join(text_blocks)
816
+
817
+ @classmethod
818
+ def read(cls, file):
819
+ """
820
+ Read a SD file.
821
+
822
+ Parameters
823
+ ----------
824
+ file : file-like object or str
825
+ The file to be read.
826
+ Alternatively a file path can be supplied.
827
+
828
+ Returns
829
+ -------
830
+ file_object : SDFile
831
+ The parsed file.
832
+ """
833
+ # File name
834
+ if is_open_compatible(file):
835
+ with open(file, "r") as f:
836
+ text = f.read()
837
+ # File object
838
+ else:
839
+ if not is_text(file):
840
+ raise TypeError("A file opened in 'text' mode is required")
841
+ text = file.read()
842
+ return SDFile.deserialize(text)
843
+
844
+ def write(self, file):
845
+ """
846
+ Write the contents of this object into a SD file.
847
+
848
+ Parameters
849
+ ----------
850
+ file : file-like object or str
851
+ The file to be written to.
852
+ Alternatively a file path can be supplied.
853
+ """
854
+ if is_open_compatible(file):
855
+ with open(file, "w") as f:
856
+ f.write(self.serialize())
857
+ else:
858
+ if not is_text(file):
859
+ raise TypeError("A file opened in 'text' mode is required")
860
+ file.write(self.serialize())
861
+
862
+ def __getitem__(self, key):
863
+ record = self._records[key]
864
+ if isinstance(record, str):
865
+ # Element is stored in serialized form
866
+ # -> must be deserialized first
867
+ try:
868
+ record = SDRecord.deserialize(record)
869
+ except Exception:
870
+ raise DeserializationError(f"Failed to deserialize record '{key}'")
871
+ # Update with deserialized object
872
+ self._records[key] = record
873
+ return record
874
+
875
+ def __setitem__(self, key, record):
876
+ if not isinstance(record, SDRecord):
877
+ raise TypeError(f"Expected 'SDRecord', but got '{type(record).__name__}'")
878
+ # The molecule name in the header is unique across the file
879
+ record.header.mol_name = key
880
+ self._records[key] = record
881
+
882
+ def __delitem__(self, key):
883
+ del self._records[key]
884
+
885
+ def __iter__(self):
886
+ return iter(self._records)
887
+
888
+ def __len__(self):
889
+ return len(self._records)
890
+
891
+ def __eq__(self, other):
892
+ if not isinstance(other, type(self)):
893
+ return False
894
+ if set(self.keys()) != set(other.keys()):
895
+ return False
896
+ for record_name in self.keys():
897
+ if self[record_name] != other[record_name]:
898
+ return False
899
+ return True
900
+
901
+ def __str__(self):
902
+ return self.serialize()
903
+
904
+
905
+ def _join_with_terminal_newline(text_blocks):
906
+ if len(text_blocks) == 0:
907
+ return ""
908
+ else:
909
+ return "\n".join(text_blocks) + "\n"
910
+
911
+
912
+ def _empty_ctab():
913
+ empty_atoms = AtomArray(0)
914
+ empty_atoms.bonds = BondList(0)
915
+ return _join_with_terminal_newline(write_structure_to_ctab(empty_atoms))
916
+
917
+
918
+ def _to_metadata_key(key):
919
+ if isinstance(key, Metadata.Key):
920
+ return key
921
+ elif isinstance(key, str):
922
+ return Metadata.Key(name=key)
923
+ else:
924
+ raise TypeError(
925
+ f"Expected 'Metadata.Key' or str, but got '{type(key).__name__}'"
926
+ )
927
+
928
+
929
+ def _add_key_value_pair(metadata, key, value):
930
+ if key is not None:
931
+ if value is None:
932
+ raise DeserializationError(f"No value found for metadata key {key}")
933
+ metadata[key] = value
934
+
935
+
936
+ def _get_ctab_stop(lines):
937
+ for i in range(_N_HEADER, len(lines)):
938
+ if lines[i].startswith("M END"):
939
+ return i + 1
940
+ return len(lines)