biotite 1.5.0__cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +216 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +197 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +60 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +228 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +258 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +161 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +126 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1228 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +490 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +702 -0
  73. biotite/sequence/align/banded.cpython-312-x86_64-linux-gnu.so +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cpython-312-x86_64-linux-gnu.so +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cpython-312-x86_64-linux-gnu.so +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cpython-312-x86_64-linux-gnu.so +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cpython-312-x86_64-linux-gnu.so +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cpython-312-x86_64-linux-gnu.so +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cpython-312-x86_64-linux-gnu.so +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cpython-312-x86_64-linux-gnu.so +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cpython-312-x86_64-linux-gnu.so +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cpython-312-x86_64-linux-gnu.so +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cpython-312-x86_64-linux-gnu.so +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cpython-312-x86_64-linux-gnu.so +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +283 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cpython-312-x86_64-linux-gnu.so +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cpython-312-x86_64-linux-gnu.so +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cpython-312-x86_64-linux-gnu.so +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1562 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cpython-312-x86_64-linux-gnu.so +0 -0
  266. biotite/structure/bonds.pyx +2036 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cpython-312-x86_64-linux-gnu.so +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +310 -0
  271. biotite/structure/charges.cpython-312-x86_64-linux-gnu.so +0 -0
  272. biotite/structure/charges.pyx +520 -0
  273. biotite/structure/compare.py +683 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +591 -0
  278. biotite/structure/geometry.py +817 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +425 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +98 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +389 -0
  311. biotite/structure/io/pdb/file.py +1380 -0
  312. biotite/structure/io/pdb/hybrid36.cpython-312-x86_64-linux-gnu.so +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +674 -0
  319. biotite/structure/io/pdbx/cif.py +1091 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +362 -0
  322. biotite/structure/io/pdbx/convert.py +2113 -0
  323. biotite/structure/io/pdbx/encoding.cpython-312-x86_64-linux-gnu.so +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1078 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +716 -0
  337. biotite/structure/rings.py +451 -0
  338. biotite/structure/sasa.cpython-312-x86_64-linux-gnu.so +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +328 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +160 -0
  349. biotite/version.py +34 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.5.0.dist-info/METADATA +162 -0
  352. biotite-1.5.0.dist-info/RECORD +354 -0
  353. biotite-1.5.0.dist-info/WHEEL +6 -0
  354. biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,420 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ Functions for parsing and writing an :class:`AtomArray` from/to
7
+ *MDL* connection tables (Ctab).
8
+ """
9
+
10
+ __name__ = "biotite.structure.io.mol"
11
+ __author__ = "Patrick Kunzmann"
12
+ __all__ = ["read_structure_from_ctab", "write_structure_to_ctab"]
13
+
14
+ import itertools
15
+ import shlex
16
+ import warnings
17
+ import numpy as np
18
+ from biotite.file import InvalidFileError
19
+ from biotite.structure.atoms import AtomArray, AtomArrayStack
20
+ from biotite.structure.bonds import BondList, BondType
21
+ from biotite.structure.error import BadStructureError
22
+ from biotite.structure.io.util import number_of_integer_digits
23
+
24
+ BOND_TYPE_MAPPING = {
25
+ 1: BondType.SINGLE,
26
+ 2: BondType.DOUBLE,
27
+ 3: BondType.TRIPLE,
28
+ 4: BondType.AROMATIC,
29
+ 5: BondType.ANY,
30
+ 6: BondType.AROMATIC_SINGLE,
31
+ 7: BondType.AROMATIC_DOUBLE,
32
+ 8: BondType.ANY,
33
+ }
34
+ BOND_TYPE_MAPPING_REV = {v: k for k, v in BOND_TYPE_MAPPING.items()}
35
+
36
+ CHARGE_MAPPING = {0: 0, 1: 3, 2: 2, 3: 1, 5: -1, 6: -2, 7: -3}
37
+ CHARGE_MAPPING_REV = {val: key for key, val in CHARGE_MAPPING.items()}
38
+
39
+ V2000_COMPATIBILITY_LINE = " 0 0 0 0 0 0 0 0 0 0999 V3000"
40
+ # The number of charges per `M CHG` line
41
+ N_CHARGES_PER_LINE = 8
42
+
43
+
44
+ def read_structure_from_ctab(ctab_lines):
45
+ """
46
+ Parse a *MDL* connection table (Ctab) to obtain an
47
+ :class:`AtomArray`.
48
+ :footcite:`Dalby1992`
49
+
50
+ Parameters
51
+ ----------
52
+ ctab_lines : lines of str
53
+ The lines containing the *ctab*.
54
+ Must begin with the *counts* line and end with the `M END` line.
55
+
56
+ Returns
57
+ -------
58
+ atoms : AtomArray
59
+ This :class:`AtomArray` contains the optional ``charge``
60
+ annotation and has an associated :class:`BondList`.
61
+
62
+ References
63
+ ----------
64
+
65
+ ``V3000`` specification was taken from
66
+ `<https://discover.3ds.com/sites/default/files/2020-08/biovia_ctfileformats_2020.pdf>`_.
67
+
68
+ .. footbibliography::
69
+ """
70
+ match _get_version(ctab_lines[0]):
71
+ case "V2000":
72
+ return _read_structure_from_ctab_v2000(ctab_lines)
73
+ case "V3000":
74
+ return _read_structure_from_ctab_v3000(ctab_lines)
75
+ case "":
76
+ raise InvalidFileError("CTAB counts line misses version")
77
+ case unkown_version:
78
+ raise InvalidFileError(f"Unknown CTAB version '{unkown_version}'")
79
+
80
+
81
+ def write_structure_to_ctab(atoms, default_bond_type=BondType.ANY, version=None):
82
+ """
83
+ Convert an :class:`AtomArray` into a
84
+ *MDL* connection table (Ctab).
85
+ :footcite:`Dalby1992`
86
+
87
+ Parameters
88
+ ----------
89
+ atoms : AtomArray
90
+ The array must have an associated :class:`BondList`.
91
+ default_bond_type : BondType, optional
92
+ Bond type fallback for the *Bond block*, if a :class:`BondType`
93
+ has no CTAB counterpart.
94
+ By default, each such bond is treated as :attr:`BondType.ANY`.
95
+ version : {"V2000", "V3000"}, optional
96
+ The version of the CTAB format.
97
+ ``"V2000"`` uses the *Atom* and *Bond* block, while ``"V3000"``
98
+ uses the *Properties* block.
99
+ By default, ``"V2000"`` is used, unless the number of atoms or
100
+ bonds exceeds 999, in which case ``"V3000"`` is used.
101
+
102
+ Returns
103
+ -------
104
+ ctab_lines : lines of str
105
+ The lines containing the *ctab*.
106
+ The lines begin with the *counts* line and end with the `M END`
107
+ line.
108
+
109
+ References
110
+ ----------
111
+
112
+ ``V3000`` specification was taken from
113
+ `<https://discover.3ds.com/sites/default/files/2020-08/biovia_ctfileformats_2020.pdf>`_.
114
+
115
+ .. footbibliography::
116
+ """
117
+ if isinstance(atoms, AtomArrayStack):
118
+ raise TypeError(
119
+ "An 'AtomArrayStack' was given, but only a single model can be written"
120
+ )
121
+ if atoms.bonds is None:
122
+ raise BadStructureError("Input AtomArray has no associated BondList")
123
+ if np.isnan(atoms.coord).any():
124
+ raise BadStructureError("Input AtomArray has NaN coordinates")
125
+
126
+ match version:
127
+ case None:
128
+ if _is_v2000_compatible(atoms.array_length(), atoms.bonds.get_bond_count()):
129
+ return _write_structure_to_ctab_v2000(atoms, default_bond_type)
130
+ else:
131
+ return _write_structure_to_ctab_v3000(atoms, default_bond_type)
132
+ case "V2000":
133
+ if not _is_v2000_compatible(
134
+ atoms.array_length(), atoms.bonds.get_bond_count()
135
+ ):
136
+ raise ValueError(
137
+ "The given number of atoms or bonds is too large for V2000 format"
138
+ )
139
+ return _write_structure_to_ctab_v2000(atoms, default_bond_type)
140
+ case "V3000":
141
+ return _write_structure_to_ctab_v3000(atoms, default_bond_type)
142
+ case unkown_version:
143
+ raise ValueError(f"Unknown CTAB version '{unkown_version}'")
144
+
145
+
146
+ def _read_structure_from_ctab_v2000(ctab_lines):
147
+ n_atoms, n_bonds = _get_counts_v2000(ctab_lines[0])
148
+ atom_lines = ctab_lines[1 : 1 + n_atoms]
149
+ bond_lines = ctab_lines[1 + n_atoms : 1 + n_atoms + n_bonds]
150
+ charge_lines = [
151
+ line
152
+ for line in ctab_lines[1 + n_atoms + n_bonds :]
153
+ if line.startswith("M CHG")
154
+ ]
155
+
156
+ atoms = AtomArray(n_atoms)
157
+ atoms.add_annotation("charge", int)
158
+ for i, line in enumerate(atom_lines):
159
+ atoms.coord[i, 0] = float(line[0:10])
160
+ atoms.coord[i, 1] = float(line[10:20])
161
+ atoms.coord[i, 2] = float(line[20:30])
162
+ atoms.element[i] = line[31:34].strip().upper()
163
+ # If one 'M CHG' entry is present,
164
+ # it supersedes all atom charges in the atom block
165
+ if not charge_lines:
166
+ charge = CHARGE_MAPPING.get(int(line[36:39]))
167
+ if charge is None:
168
+ warnings.warn(
169
+ f"Cannot handle MDL charge type {int(line[36:39])}, "
170
+ f"0 is used instead"
171
+ )
172
+ charge = 0
173
+ atoms.charge[i] = charge
174
+
175
+ for line in charge_lines:
176
+ # Remove 'M CHGnn8' prefix
177
+ line = line[9:]
178
+ # The lines contains atom index and charge alternatingly
179
+ for atom_i_str, charge_str in _batched(line.split(), 2):
180
+ atom_index = int(atom_i_str) - 1
181
+ charge = int(charge_str)
182
+ atoms.charge[atom_index] = charge
183
+
184
+ bond_array = np.zeros((n_bonds, 3), dtype=np.uint32)
185
+ for i, line in enumerate(bond_lines):
186
+ bond_type = BOND_TYPE_MAPPING.get(int(line[6:9]))
187
+ if bond_type is None:
188
+ warnings.warn(
189
+ f"Cannot handle MDL bond type {int(line[6:9])}, "
190
+ f"BondType.ANY is used instead"
191
+ )
192
+ bond_type = BondType.ANY
193
+ bond_array[i, 0] = int(line[0:3]) - 1
194
+ bond_array[i, 1] = int(line[3:6]) - 1
195
+ bond_array[i, 2] = bond_type
196
+ atoms.bonds = BondList(n_atoms, bond_array)
197
+
198
+ return atoms
199
+
200
+
201
+ def _read_structure_from_ctab_v3000(ctab_lines):
202
+ v30_lines = [line[6:].strip() for line in ctab_lines if line.startswith("M V30")]
203
+
204
+ atom_lines = _get_block_v3000(v30_lines, "ATOM")
205
+ if len(atom_lines) == 0:
206
+ raise InvalidFileError("ATOM block is empty")
207
+ atoms = AtomArray(len(atom_lines))
208
+ atoms.add_annotation("charge", int)
209
+ # The V3000 atom index does not necessarily count from 1 to n,
210
+ # but allows arbitrary positive integers
211
+ # Hence, a mapping from V3000 atom index to AtomArray index is
212
+ # needed to get the correct index for a bond
213
+ v30_atom_indices = {}
214
+ for i, line in enumerate(atom_lines):
215
+ if "'" in line or '"' in line:
216
+ columns = shlex.split(line)
217
+ else:
218
+ columns = line.split()
219
+ v30_index = int(columns[0])
220
+ v30_type = columns[1]
221
+ if v30_type == "R#":
222
+ raise NotImplementedError("Rgroup atoms are not supported")
223
+ v30_coord = np.array(columns[2:5], dtype=float)
224
+ v30_properties = create_property_dict_v3000(columns[6:])
225
+
226
+ v30_atom_indices[v30_index] = i
227
+ atoms.coord[i] = v30_coord
228
+ atoms.element[i] = v30_type.upper()
229
+ atoms.charge[i] = int(v30_properties.get("CHG", 0))
230
+
231
+ bond_lines = _get_block_v3000(v30_lines, "BOND")
232
+ bond_array = np.zeros((len(bond_lines), 3), dtype=np.uint32)
233
+ for i, line in enumerate(bond_lines):
234
+ columns = line.split()
235
+ v30_type = int(columns[1])
236
+ v30_atom_index_1 = int(columns[2])
237
+ v30_atom_index_2 = int(columns[3])
238
+
239
+ bond_type = BOND_TYPE_MAPPING.get(v30_type)
240
+ if bond_type is None:
241
+ warnings.warn(
242
+ f"Cannot handle MDL bond type {v30_type}, BondType.ANY is used instead"
243
+ )
244
+ bond_type = BondType.ANY
245
+ bond_array[i, 0] = v30_atom_indices[v30_atom_index_1]
246
+ bond_array[i, 1] = v30_atom_indices[v30_atom_index_2]
247
+ bond_array[i, 2] = bond_type
248
+ atoms.bonds = BondList(atoms.array_length(), bond_array)
249
+
250
+ return atoms
251
+
252
+
253
+ def _get_version(counts_line):
254
+ return counts_line[33:39].strip()
255
+
256
+
257
+ def _is_v2000_compatible(n_atoms, n_bonds):
258
+ # The format uses a maximum of 3 digits for the atom and bond count
259
+ return n_atoms < 1000 and n_bonds < 1000
260
+
261
+
262
+ def _get_counts_v2000(counts_line):
263
+ return int(counts_line[0:3]), int(counts_line[3:6])
264
+
265
+
266
+ def _get_block_v3000(v30_lines, block_name):
267
+ block_lines = []
268
+ in_block = False
269
+ for line in v30_lines:
270
+ if line.startswith(f"BEGIN {block_name}"):
271
+ in_block = True
272
+ elif line.startswith(f"END {block_name}"):
273
+ if in_block:
274
+ return block_lines
275
+ else:
276
+ raise InvalidFileError(f"Block '{block_name}' ended before it began")
277
+ elif in_block:
278
+ block_lines.append(line)
279
+ return block_lines
280
+
281
+
282
+ def create_property_dict_v3000(property_strings):
283
+ properties = {}
284
+ for prop in property_strings:
285
+ key, value = prop.split("=")
286
+ properties[key] = value
287
+ return properties
288
+
289
+
290
+ def _write_structure_to_ctab_v2000(atoms, default_bond_type):
291
+ try:
292
+ charge = atoms.charge
293
+ except AttributeError:
294
+ charge = np.zeros(atoms.array_length(), dtype=int)
295
+
296
+ counts_line = (
297
+ f"{atoms.array_length():>3d}{atoms.bonds.get_bond_count():>3d}"
298
+ " 0 0 0 0 0 0 0 1 V2000"
299
+ )
300
+
301
+ for i, coord_name in enumerate(["x", "y", "z"]):
302
+ n_coord_digits = number_of_integer_digits(atoms.coord[:, i])
303
+ if n_coord_digits > 5:
304
+ raise BadStructureError(
305
+ f"5 pre-decimal columns for {coord_name}-coordinates are "
306
+ f"available, but array would require {n_coord_digits}"
307
+ )
308
+ atom_lines = [
309
+ f"{atoms.coord[i, 0]:>10.4f}"
310
+ f"{atoms.coord[i, 1]:>10.4f}"
311
+ f"{atoms.coord[i, 2]:>10.4f}"
312
+ f" {atoms.element[i].capitalize():3}"
313
+ f"{0:>2}" # Mass difference -> unused
314
+ f"{CHARGE_MAPPING_REV.get(charge[i], 0):>3d}"
315
+ + f"{0:>3d}"
316
+ * 10 # More unused fields
317
+ for i in range(atoms.array_length())
318
+ ]
319
+
320
+ default_bond_value = BOND_TYPE_MAPPING_REV[default_bond_type]
321
+ bond_lines = [
322
+ f"{i + 1:>3d}{j + 1:>3d}"
323
+ f"{BOND_TYPE_MAPPING_REV.get(bond_type, default_bond_value):>3d}"
324
+ + f"{0:>3d}"
325
+ * 4
326
+ for i, j, bond_type in atoms.bonds.as_array()
327
+ ]
328
+
329
+ # V2000 files introduce charge annotations in the property block
330
+ # They define the charge literally (without mapping)
331
+ charge_lines = []
332
+ # Each `M CHG` line can contain up to 8 charges
333
+ for batch in _batched(
334
+ [(atom_i, c) for atom_i, c in enumerate(charge) if c != 0], N_CHARGES_PER_LINE
335
+ ):
336
+ charge_lines.append(
337
+ f"M CHG{len(batch):>3d}"
338
+ + "".join(f" {atom_i + 1:>3d} {c:>3d}" for atom_i, c in batch)
339
+ )
340
+
341
+ return [counts_line] + atom_lines + bond_lines + charge_lines + ["M END"]
342
+
343
+
344
+ def _write_structure_to_ctab_v3000(atoms, default_bond_type):
345
+ try:
346
+ charges = atoms.charge
347
+ except AttributeError:
348
+ charges = np.zeros(atoms.array_length(), dtype=int)
349
+
350
+ counts_line = f"COUNTS {atoms.array_length()} {atoms.bonds.get_bond_count()} 0 0 0"
351
+
352
+ for i, coord_name in enumerate(["x", "y", "z"]):
353
+ n_coord_digits = number_of_integer_digits(atoms.coord[:, i])
354
+ if n_coord_digits > 5:
355
+ raise BadStructureError(
356
+ f"5 pre-decimal columns for {coord_name}-coordinates are "
357
+ f"available, but array would require {n_coord_digits}"
358
+ )
359
+ atom_lines = [
360
+ f"{i + 1}"
361
+ f" {_quote(atoms.element[i].capitalize())}"
362
+ f" {atoms.coord[i, 0]:.4f}"
363
+ f" {atoms.coord[i, 1]:.4f}"
364
+ f" {atoms.coord[i, 2]:.4f}"
365
+ # 'aamap' is unused
366
+ f" 0"
367
+ f" {_to_property(charges[i])}"
368
+ for i in range(atoms.array_length())
369
+ ]
370
+
371
+ default_bond_value = BOND_TYPE_MAPPING_REV[default_bond_type]
372
+ bond_lines = [
373
+ f"{k + 1}"
374
+ f" {BOND_TYPE_MAPPING_REV.get(bond_type, default_bond_value)}"
375
+ f" {i + 1}"
376
+ f" {j + 1}"
377
+ for k, (i, j, bond_type) in enumerate(atoms.bonds.as_array())
378
+ ]
379
+
380
+ lines = (
381
+ ["BEGIN CTAB"]
382
+ + [counts_line]
383
+ + ["BEGIN ATOM"]
384
+ + atom_lines
385
+ + ["END ATOM"]
386
+ + ["BEGIN BOND"]
387
+ + bond_lines
388
+ + ["END BOND"]
389
+ + ["END CTAB"]
390
+ )
391
+ # Mark lines as V3000 CTAB
392
+ lines = ["M V30 " + line for line in lines]
393
+ return [V2000_COMPATIBILITY_LINE] + lines + ["M END"]
394
+
395
+
396
+ def _to_property(charge):
397
+ if charge == 0:
398
+ return ""
399
+ else:
400
+ return f"CHG={charge}"
401
+
402
+
403
+ def _quote(string):
404
+ if " " in string or len(string) == 0:
405
+ return f'"{string}"'
406
+ else:
407
+ return string
408
+
409
+
410
+ def _batched(iterable, n):
411
+ """
412
+ Equivalent to :func:`itertools.batched()`.
413
+
414
+ However, :func:`itertools.batched()` is available since Python 3.12.
415
+ This function can be removed when the minimum supported Python
416
+ version is 3.12.
417
+ """
418
+ iterator = iter(iterable)
419
+ while batch := tuple(itertools.islice(iterator, n)):
420
+ yield batch
@@ -0,0 +1,120 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.structure.io.mol"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["Header"]
8
+
9
+ import datetime
10
+ import warnings
11
+ from dataclasses import dataclass
12
+
13
+ _DATE_FORMAT = "%m%d%y%H%M"
14
+
15
+
16
+ @dataclass
17
+ class Header:
18
+ """
19
+ The header for connection tables.
20
+
21
+ Parameters
22
+ ----------
23
+ mol_name : str, optional
24
+ The name of the molecule.
25
+ initials : str, optional
26
+ The author's initials. Maximum length is 2.
27
+ program : str, optional
28
+ The program name. Maximum length is 8.
29
+ time : datetime or date, optional
30
+ The time of file creation.
31
+ dimensions : str, optional
32
+ Dimensional codes. Maximum length is 2.
33
+ scaling_factors : str, optional
34
+ Scaling factors. Maximum length is 12.
35
+ energy : str, optional
36
+ Energy from modeling program. Maximum length is 12.
37
+ registry_number : str, optional
38
+ MDL registry number. Maximum length is 6.
39
+ comments : str, optional
40
+ Additional comments.
41
+
42
+ Attributes
43
+ ----------
44
+ mol_name, initials, program, time, dimensions, scaling_factors, energy, registry_number, comments
45
+ Same as the parameters.
46
+ """
47
+
48
+ mol_name: ... = ""
49
+ initials: ... = ""
50
+ program: ... = ""
51
+ time: ... = None
52
+ dimensions: ... = ""
53
+ scaling_factors: ... = ""
54
+ energy: ... = ""
55
+ registry_number: ... = ""
56
+ comments: ... = ""
57
+
58
+ @staticmethod
59
+ def deserialize(text):
60
+ lines = text.splitlines()
61
+
62
+ mol_name = lines[0].strip()
63
+ initials = lines[1][0:2].strip()
64
+ program = lines[1][2:10].strip()
65
+ time_string = lines[1][10:20]
66
+ if time_string.strip() == "":
67
+ time = None
68
+ else:
69
+ try:
70
+ time = datetime.datetime.strptime(time_string, _DATE_FORMAT)
71
+ except ValueError:
72
+ warnings.warn(f"Invalid time format '{time_string}' in file header")
73
+ time = None
74
+ dimensions = lines[1][20:22].strip()
75
+ scaling_factors = lines[1][22:34].strip()
76
+ energy = lines[1][34:46].strip()
77
+ registry_number = lines[1][46:52].strip()
78
+
79
+ comments = lines[2].strip()
80
+
81
+ return Header(
82
+ mol_name,
83
+ initials,
84
+ program,
85
+ time,
86
+ dimensions,
87
+ scaling_factors,
88
+ energy,
89
+ registry_number,
90
+ comments,
91
+ )
92
+
93
+ def serialize(self):
94
+ text = ""
95
+
96
+ if self.time is None:
97
+ time_str = ""
98
+ else:
99
+ time_str = self.time.strftime(_DATE_FORMAT)
100
+
101
+ if len(self.mol_name) > 80:
102
+ raise ValueError("Molecule name must not exceed 80 characters")
103
+ text += str(self.mol_name) + "\n"
104
+ # Fixed columns -> minimum and maximum length is the same
105
+ # Shorter values are padded, longer values are truncated
106
+ text += (
107
+ f"{self.initials:>2.2}"
108
+ f"{self.program:>8.8}"
109
+ f"{time_str:>10.10}"
110
+ f"{self.dimensions:>2.2}"
111
+ f"{self.scaling_factors:>12.12}"
112
+ f"{self.energy:>12.12}"
113
+ f"{self.registry_number:>6.6}"
114
+ "\n"
115
+ )
116
+ text += str(self.comments) + "\n"
117
+ return text
118
+
119
+ def __str__(self):
120
+ return self.serialize()
@@ -0,0 +1,149 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.structure.io.mol"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["MOLFile"]
8
+
9
+ from biotite.file import InvalidFileError, TextFile
10
+ from biotite.structure.bonds import BondType
11
+ from biotite.structure.io.mol.ctab import (
12
+ read_structure_from_ctab,
13
+ write_structure_to_ctab,
14
+ )
15
+ from biotite.structure.io.mol.header import Header
16
+
17
+ # Number of header lines
18
+ N_HEADER = 3
19
+
20
+
21
+ class MOLFile(TextFile):
22
+ """
23
+ This class represents a file in MOL format, that is used to store
24
+ structure information for small molecules.
25
+ :footcite:`Dalby1992`
26
+
27
+ Since its use is intended for single small molecules, it stores
28
+ less atom annotation information than the macromolecular structure
29
+ formats:
30
+ Only the atom positions, charges, elements and bonds can be read
31
+ from the file, chain and and residue information is missing.
32
+
33
+ This class can also be used to parse the first structure from an SDF
34
+ file, as the SDF format extends the MOL format.
35
+
36
+ Attributes
37
+ ----------
38
+ header : Header
39
+ The header of the MOL file.
40
+
41
+ References
42
+ ----------
43
+
44
+ .. footbibliography::
45
+
46
+ Examples
47
+ --------
48
+
49
+ >>> from os.path import join
50
+ >>> mol_file = MOLFile.read(join(path_to_structures, "molecules", "TYR.sdf"))
51
+ >>> atom_array = mol_file.get_structure()
52
+ >>> print(atom_array)
53
+ 0 N 1.320 0.952 1.428
54
+ 0 C -0.018 0.429 1.734
55
+ 0 C -0.103 0.094 3.201
56
+ 0 O 0.886 -0.254 3.799
57
+ 0 C -0.274 -0.831 0.907
58
+ 0 C -0.189 -0.496 -0.559
59
+ 0 C 1.022 -0.589 -1.219
60
+ 0 C -1.324 -0.102 -1.244
61
+ 0 C 1.103 -0.282 -2.563
62
+ 0 C -1.247 0.210 -2.587
63
+ 0 C -0.032 0.118 -3.252
64
+ 0 O 0.044 0.420 -4.574
65
+ 0 O -1.279 0.184 3.842
66
+ 0 H 1.977 0.225 1.669
67
+ 0 H 1.365 1.063 0.426
68
+ 0 H -0.767 1.183 1.489
69
+ 0 H 0.473 -1.585 1.152
70
+ 0 H -1.268 -1.219 1.134
71
+ 0 H 1.905 -0.902 -0.683
72
+ 0 H -2.269 -0.031 -0.727
73
+ 0 H 2.049 -0.354 -3.078
74
+ 0 H -2.132 0.523 -3.121
75
+ 0 H -0.123 -0.399 -5.059
76
+ 0 H -1.333 -0.030 4.784
77
+ """
78
+
79
+ def __init__(self):
80
+ super().__init__()
81
+ # empty header lines
82
+ self.lines = [""] * N_HEADER
83
+ self._header = None
84
+
85
+ @classmethod
86
+ def read(cls, file):
87
+ mol_file = super().read(file)
88
+ mol_file._header = None
89
+ return mol_file
90
+
91
+ @property
92
+ def header(self):
93
+ if self._header is None:
94
+ self._header = Header.deserialize("\n".join(self.lines[0:3]) + "\n")
95
+ return self._header
96
+
97
+ @header.setter
98
+ def header(self, header):
99
+ self._header = header
100
+ self.lines[0:3] = self._header.serialize().splitlines()
101
+
102
+ def get_structure(self):
103
+ """
104
+ Get an :class:`AtomArray` from the MOL file.
105
+
106
+ Returns
107
+ -------
108
+ array : AtomArray
109
+ This :class:`AtomArray` contains the optional ``charge``
110
+ annotation and has an associated :class:`BondList`.
111
+ All other annotation categories, except ``element`` are
112
+ empty.
113
+ """
114
+ ctab_lines = _get_ctab_lines(self.lines)
115
+ if len(ctab_lines) == 0:
116
+ raise InvalidFileError("File does not contain structure data")
117
+ return read_structure_from_ctab(ctab_lines)
118
+
119
+ def set_structure(self, atoms, default_bond_type=BondType.ANY, version=None):
120
+ """
121
+ Set the :class:`AtomArray` for the file.
122
+
123
+ Parameters
124
+ ----------
125
+ atoms : AtomArray
126
+ The array to be saved into this file.
127
+ Must have an associated :class:`BondList`.
128
+ default_bond_type : BondType, optional
129
+ Bond type fallback for the *Bond block*, if a
130
+ :class:`BondType` has no CTAB counterpart.
131
+ By default, each such bond is treated as
132
+ :attr:`BondType.ANY`.
133
+ version : {"V2000", "V3000"}, optional
134
+ The version of the CTAB format.
135
+ ``"V2000"`` uses the *Atom* and *Bond* block, while
136
+ ``"V3000"`` uses the *Properties* block.
137
+ By default, ``"V2000"`` is used, unless the number of atoms
138
+ or bonds exceeds 999, in which case ``"V3000"`` is used.
139
+ """
140
+ self.lines = self.lines[:N_HEADER] + write_structure_to_ctab(
141
+ atoms, default_bond_type, version
142
+ )
143
+
144
+
145
+ def _get_ctab_lines(lines):
146
+ for i, line in enumerate(lines):
147
+ if line.startswith("M END"):
148
+ return lines[N_HEADER : i + 1]
149
+ return lines[N_HEADER:]