biotite 0.41.1__cp310-cp310-macosx_10_16_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (340) hide show
  1. biotite/__init__.py +19 -0
  2. biotite/application/__init__.py +43 -0
  3. biotite/application/application.py +265 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +505 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +83 -0
  8. biotite/application/blast/webapp.py +421 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +238 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +152 -0
  13. biotite/application/localapp.py +306 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +122 -0
  16. biotite/application/msaapp.py +374 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +254 -0
  19. biotite/application/muscle/app5.py +171 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +456 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +222 -0
  24. biotite/application/util.py +59 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +304 -0
  27. biotite/application/viennarna/rnafold.py +269 -0
  28. biotite/application/viennarna/rnaplot.py +187 -0
  29. biotite/application/viennarna/util.py +72 -0
  30. biotite/application/webapp.py +77 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/entrez/__init__.py +15 -0
  34. biotite/database/entrez/check.py +61 -0
  35. biotite/database/entrez/dbnames.py +89 -0
  36. biotite/database/entrez/download.py +223 -0
  37. biotite/database/entrez/key.py +44 -0
  38. biotite/database/entrez/query.py +223 -0
  39. biotite/database/error.py +15 -0
  40. biotite/database/pubchem/__init__.py +21 -0
  41. biotite/database/pubchem/download.py +260 -0
  42. biotite/database/pubchem/error.py +20 -0
  43. biotite/database/pubchem/query.py +827 -0
  44. biotite/database/pubchem/throttle.py +99 -0
  45. biotite/database/rcsb/__init__.py +13 -0
  46. biotite/database/rcsb/download.py +167 -0
  47. biotite/database/rcsb/query.py +959 -0
  48. biotite/database/uniprot/__init__.py +13 -0
  49. biotite/database/uniprot/check.py +32 -0
  50. biotite/database/uniprot/download.py +134 -0
  51. biotite/database/uniprot/query.py +209 -0
  52. biotite/file.py +251 -0
  53. biotite/sequence/__init__.py +73 -0
  54. biotite/sequence/align/__init__.py +49 -0
  55. biotite/sequence/align/alignment.py +658 -0
  56. biotite/sequence/align/banded.cpython-310-darwin.so +0 -0
  57. biotite/sequence/align/banded.pyx +652 -0
  58. biotite/sequence/align/buckets.py +69 -0
  59. biotite/sequence/align/cigar.py +434 -0
  60. biotite/sequence/align/kmeralphabet.cpython-310-darwin.so +0 -0
  61. biotite/sequence/align/kmeralphabet.pyx +574 -0
  62. biotite/sequence/align/kmersimilarity.cpython-310-darwin.so +0 -0
  63. biotite/sequence/align/kmersimilarity.pyx +233 -0
  64. biotite/sequence/align/kmertable.cpython-310-darwin.so +0 -0
  65. biotite/sequence/align/kmertable.pyx +3400 -0
  66. biotite/sequence/align/localgapped.cpython-310-darwin.so +0 -0
  67. biotite/sequence/align/localgapped.pyx +892 -0
  68. biotite/sequence/align/localungapped.cpython-310-darwin.so +0 -0
  69. biotite/sequence/align/localungapped.pyx +279 -0
  70. biotite/sequence/align/matrix.py +405 -0
  71. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  72. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  73. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  74. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  75. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  76. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  77. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  78. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  79. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  80. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  81. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  82. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  83. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  84. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  85. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  86. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  87. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  88. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  89. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  93. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  94. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  95. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  96. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  97. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  98. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  99. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  100. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  101. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  102. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  103. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  104. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  105. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  106. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  107. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  108. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  109. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  110. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  111. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  112. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  113. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  114. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  115. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  116. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  117. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  118. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  119. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  120. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  121. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  122. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  154. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  155. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  156. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  157. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  158. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  159. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  160. biotite/sequence/align/multiple.cpython-310-darwin.so +0 -0
  161. biotite/sequence/align/multiple.pyx +620 -0
  162. biotite/sequence/align/pairwise.cpython-310-darwin.so +0 -0
  163. biotite/sequence/align/pairwise.pyx +587 -0
  164. biotite/sequence/align/permutation.cpython-310-darwin.so +0 -0
  165. biotite/sequence/align/permutation.pyx +305 -0
  166. biotite/sequence/align/primes.txt +821 -0
  167. biotite/sequence/align/selector.cpython-310-darwin.so +0 -0
  168. biotite/sequence/align/selector.pyx +956 -0
  169. biotite/sequence/align/statistics.py +265 -0
  170. biotite/sequence/align/tracetable.cpython-310-darwin.so +0 -0
  171. biotite/sequence/align/tracetable.pxd +64 -0
  172. biotite/sequence/align/tracetable.pyx +370 -0
  173. biotite/sequence/alphabet.py +566 -0
  174. biotite/sequence/annotation.py +829 -0
  175. biotite/sequence/codec.cpython-310-darwin.so +0 -0
  176. biotite/sequence/codec.pyx +155 -0
  177. biotite/sequence/codon.py +466 -0
  178. biotite/sequence/codon_tables.txt +202 -0
  179. biotite/sequence/graphics/__init__.py +33 -0
  180. biotite/sequence/graphics/alignment.py +1034 -0
  181. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  182. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  183. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  184. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  185. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  186. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  187. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  188. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  189. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  190. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  191. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  192. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  193. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  194. biotite/sequence/graphics/color_schemes/pb_flower.json +39 -0
  195. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  196. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  197. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  198. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  199. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  200. biotite/sequence/graphics/colorschemes.py +139 -0
  201. biotite/sequence/graphics/dendrogram.py +184 -0
  202. biotite/sequence/graphics/features.py +510 -0
  203. biotite/sequence/graphics/logo.py +110 -0
  204. biotite/sequence/graphics/plasmid.py +661 -0
  205. biotite/sequence/io/__init__.py +12 -0
  206. biotite/sequence/io/fasta/__init__.py +22 -0
  207. biotite/sequence/io/fasta/convert.py +273 -0
  208. biotite/sequence/io/fasta/file.py +278 -0
  209. biotite/sequence/io/fastq/__init__.py +19 -0
  210. biotite/sequence/io/fastq/convert.py +120 -0
  211. biotite/sequence/io/fastq/file.py +551 -0
  212. biotite/sequence/io/genbank/__init__.py +17 -0
  213. biotite/sequence/io/genbank/annotation.py +277 -0
  214. biotite/sequence/io/genbank/file.py +575 -0
  215. biotite/sequence/io/genbank/metadata.py +324 -0
  216. biotite/sequence/io/genbank/sequence.py +172 -0
  217. biotite/sequence/io/general.py +192 -0
  218. biotite/sequence/io/gff/__init__.py +26 -0
  219. biotite/sequence/io/gff/convert.py +133 -0
  220. biotite/sequence/io/gff/file.py +434 -0
  221. biotite/sequence/phylo/__init__.py +36 -0
  222. biotite/sequence/phylo/nj.cpython-310-darwin.so +0 -0
  223. biotite/sequence/phylo/nj.pyx +221 -0
  224. biotite/sequence/phylo/tree.cpython-310-darwin.so +0 -0
  225. biotite/sequence/phylo/tree.pyx +1169 -0
  226. biotite/sequence/phylo/upgma.cpython-310-darwin.so +0 -0
  227. biotite/sequence/phylo/upgma.pyx +164 -0
  228. biotite/sequence/profile.py +456 -0
  229. biotite/sequence/search.py +116 -0
  230. biotite/sequence/seqtypes.py +556 -0
  231. biotite/sequence/sequence.py +374 -0
  232. biotite/structure/__init__.py +132 -0
  233. biotite/structure/atoms.py +1455 -0
  234. biotite/structure/basepairs.py +1415 -0
  235. biotite/structure/bonds.cpython-310-darwin.so +0 -0
  236. biotite/structure/bonds.pyx +1933 -0
  237. biotite/structure/box.py +592 -0
  238. biotite/structure/celllist.cpython-310-darwin.so +0 -0
  239. biotite/structure/celllist.pyx +849 -0
  240. biotite/structure/chains.py +298 -0
  241. biotite/structure/charges.cpython-310-darwin.so +0 -0
  242. biotite/structure/charges.pyx +520 -0
  243. biotite/structure/compare.py +274 -0
  244. biotite/structure/density.py +114 -0
  245. biotite/structure/dotbracket.py +216 -0
  246. biotite/structure/error.py +31 -0
  247. biotite/structure/filter.py +585 -0
  248. biotite/structure/geometry.py +697 -0
  249. biotite/structure/graphics/__init__.py +13 -0
  250. biotite/structure/graphics/atoms.py +226 -0
  251. biotite/structure/graphics/rna.py +282 -0
  252. biotite/structure/hbond.py +409 -0
  253. biotite/structure/info/__init__.py +25 -0
  254. biotite/structure/info/atom_masses.json +121 -0
  255. biotite/structure/info/atoms.py +82 -0
  256. biotite/structure/info/bonds.py +145 -0
  257. biotite/structure/info/ccd/README.rst +8 -0
  258. biotite/structure/info/ccd/amino_acids.txt +1663 -0
  259. biotite/structure/info/ccd/carbohydrates.txt +1135 -0
  260. biotite/structure/info/ccd/components.bcif +0 -0
  261. biotite/structure/info/ccd/nucleotides.txt +798 -0
  262. biotite/structure/info/ccd.py +95 -0
  263. biotite/structure/info/groups.py +90 -0
  264. biotite/structure/info/masses.py +123 -0
  265. biotite/structure/info/misc.py +144 -0
  266. biotite/structure/info/radii.py +197 -0
  267. biotite/structure/info/standardize.py +196 -0
  268. biotite/structure/integrity.py +268 -0
  269. biotite/structure/io/__init__.py +30 -0
  270. biotite/structure/io/ctab.py +72 -0
  271. biotite/structure/io/dcd/__init__.py +13 -0
  272. biotite/structure/io/dcd/file.py +65 -0
  273. biotite/structure/io/general.py +257 -0
  274. biotite/structure/io/gro/__init__.py +14 -0
  275. biotite/structure/io/gro/file.py +343 -0
  276. biotite/structure/io/mmtf/__init__.py +21 -0
  277. biotite/structure/io/mmtf/assembly.py +214 -0
  278. biotite/structure/io/mmtf/convertarray.cpython-310-darwin.so +0 -0
  279. biotite/structure/io/mmtf/convertarray.pyx +341 -0
  280. biotite/structure/io/mmtf/convertfile.cpython-310-darwin.so +0 -0
  281. biotite/structure/io/mmtf/convertfile.pyx +501 -0
  282. biotite/structure/io/mmtf/decode.cpython-310-darwin.so +0 -0
  283. biotite/structure/io/mmtf/decode.pyx +152 -0
  284. biotite/structure/io/mmtf/encode.cpython-310-darwin.so +0 -0
  285. biotite/structure/io/mmtf/encode.pyx +183 -0
  286. biotite/structure/io/mmtf/file.py +233 -0
  287. biotite/structure/io/mol/__init__.py +20 -0
  288. biotite/structure/io/mol/convert.py +115 -0
  289. biotite/structure/io/mol/ctab.py +414 -0
  290. biotite/structure/io/mol/header.py +116 -0
  291. biotite/structure/io/mol/mol.py +193 -0
  292. biotite/structure/io/mol/sdf.py +916 -0
  293. biotite/structure/io/netcdf/__init__.py +13 -0
  294. biotite/structure/io/netcdf/file.py +63 -0
  295. biotite/structure/io/npz/__init__.py +20 -0
  296. biotite/structure/io/npz/file.py +152 -0
  297. biotite/structure/io/pdb/__init__.py +20 -0
  298. biotite/structure/io/pdb/convert.py +293 -0
  299. biotite/structure/io/pdb/file.py +1240 -0
  300. biotite/structure/io/pdb/hybrid36.cpython-310-darwin.so +0 -0
  301. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  302. biotite/structure/io/pdbqt/__init__.py +15 -0
  303. biotite/structure/io/pdbqt/convert.py +107 -0
  304. biotite/structure/io/pdbqt/file.py +640 -0
  305. biotite/structure/io/pdbx/__init__.py +23 -0
  306. biotite/structure/io/pdbx/bcif.py +648 -0
  307. biotite/structure/io/pdbx/cif.py +1032 -0
  308. biotite/structure/io/pdbx/component.py +246 -0
  309. biotite/structure/io/pdbx/convert.py +1597 -0
  310. biotite/structure/io/pdbx/encoding.cpython-310-darwin.so +0 -0
  311. biotite/structure/io/pdbx/encoding.pyx +950 -0
  312. biotite/structure/io/pdbx/legacy.py +267 -0
  313. biotite/structure/io/tng/__init__.py +13 -0
  314. biotite/structure/io/tng/file.py +46 -0
  315. biotite/structure/io/trajfile.py +710 -0
  316. biotite/structure/io/trr/__init__.py +13 -0
  317. biotite/structure/io/trr/file.py +46 -0
  318. biotite/structure/io/xtc/__init__.py +13 -0
  319. biotite/structure/io/xtc/file.py +46 -0
  320. biotite/structure/mechanics.py +75 -0
  321. biotite/structure/molecules.py +353 -0
  322. biotite/structure/pseudoknots.py +642 -0
  323. biotite/structure/rdf.py +243 -0
  324. biotite/structure/repair.py +253 -0
  325. biotite/structure/residues.py +562 -0
  326. biotite/structure/resutil.py +178 -0
  327. biotite/structure/sasa.cpython-310-darwin.so +0 -0
  328. biotite/structure/sasa.pyx +322 -0
  329. biotite/structure/sequence.py +112 -0
  330. biotite/structure/sse.py +327 -0
  331. biotite/structure/superimpose.py +727 -0
  332. biotite/structure/transform.py +504 -0
  333. biotite/structure/util.py +98 -0
  334. biotite/temp.py +86 -0
  335. biotite/version.py +16 -0
  336. biotite/visualize.py +251 -0
  337. biotite-0.41.1.dist-info/METADATA +187 -0
  338. biotite-0.41.1.dist-info/RECORD +340 -0
  339. biotite-0.41.1.dist-info/WHEEL +4 -0
  340. biotite-0.41.1.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,414 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ Functions for parsing and writing an :class:`AtomArray` from/to
7
+ *MDL* connection tables (Ctab).
8
+ """
9
+
10
+ __name__ = "biotite.structure.io.mol"
11
+ __author__ = "Patrick Kunzmann"
12
+ __all__ = ["read_structure_from_ctab", "write_structure_to_ctab"]
13
+
14
+ import itertools
15
+ import warnings
16
+ import shlex
17
+ import numpy as np
18
+ from ....file import InvalidFileError
19
+ from ...error import BadStructureError
20
+ from ...atoms import AtomArray, AtomArrayStack
21
+ from ...bonds import BondList, BondType
22
+
23
+ BOND_TYPE_MAPPING = {
24
+ 1: BondType.SINGLE,
25
+ 2: BondType.DOUBLE,
26
+ 3: BondType.TRIPLE,
27
+ 5: BondType.ANY,
28
+ 6: BondType.SINGLE,
29
+ 7: BondType.DOUBLE,
30
+ 8: BondType.ANY,
31
+ }
32
+ BOND_TYPE_MAPPING_REV = {
33
+ BondType.SINGLE: 1,
34
+ BondType.DOUBLE: 2,
35
+ BondType.TRIPLE: 3,
36
+ BondType.AROMATIC_SINGLE: 1,
37
+ BondType.AROMATIC_DOUBLE: 2,
38
+ BondType.ANY: 8,
39
+ }
40
+
41
+ CHARGE_MAPPING = {0: 0, 1: 3, 2: 2, 3: 1, 5: -1, 6: -2, 7: -3}
42
+ CHARGE_MAPPING_REV = {val: key for key, val in CHARGE_MAPPING.items()}
43
+
44
+ V2000_COMPATIBILITY_LINE = " 0 0 0 0 0 0 0 0 0 0999 V3000"
45
+ # The number of charges per `M CHG` line
46
+ N_CHARGES_PER_LINE = 8
47
+
48
+
49
+ def read_structure_from_ctab(ctab_lines):
50
+ """
51
+ Parse a *MDL* connection table (Ctab) to obtain an
52
+ :class:`AtomArray`.
53
+ :footcite:`Dalby1992`
54
+
55
+ Parameters
56
+ ----------
57
+ ctab_lines : lines of str
58
+ The lines containing the *ctab*.
59
+ Must begin with the *counts* line and end with the `M END` line
60
+
61
+ Returns
62
+ -------
63
+ atoms : AtomArray
64
+ This :class:`AtomArray` contains the optional ``charge``
65
+ annotation and has an associated :class:`BondList`.
66
+
67
+ References
68
+ ----------
69
+
70
+ ``V3000`` specification was taken from
71
+ `<https://discover.3ds.com/sites/default/files/2020-08/biovia_ctfileformats_2020.pdf>`_.
72
+
73
+ .. footbibliography::
74
+
75
+ """
76
+ match _get_version(ctab_lines[0]):
77
+ case "V2000":
78
+ return _read_structure_from_ctab_v2000(ctab_lines)
79
+ case "V3000":
80
+ return _read_structure_from_ctab_v3000(ctab_lines)
81
+ case "":
82
+ raise InvalidFileError("CTAB counts line misses version")
83
+ case unkown_version:
84
+ raise InvalidFileError(f"Unknown CTAB version '{unkown_version}'")
85
+
86
+
87
+ def write_structure_to_ctab(atoms, default_bond_type=BondType.ANY,
88
+ version=None):
89
+ """
90
+ Convert an :class:`AtomArray` into a
91
+ *MDL* connection table (Ctab).
92
+ :footcite:`Dalby1992`
93
+
94
+ Parameters
95
+ ----------
96
+ atoms : AtomArray
97
+ The array must have an associated :class:`BondList`.
98
+ default_bond_type : BondType, optional
99
+ Bond type fallback for the *Bond block*, if a :class:`BondType`
100
+ has no CTAB counterpart.
101
+ By default, each such bond is treated as :attr:`BondType.ANY`.
102
+ version : {"V2000", "V3000"}, optional
103
+ The version of the CTAB format.
104
+ ``"V2000"`` uses the *Atom* and *Bond* block, while ``"V3000"``
105
+ uses the *Properties* block.
106
+ By default, ``"V2000"`` is used, unless the number of atoms or
107
+ bonds exceeds 999, in which case ``"V3000"`` is used.
108
+
109
+ Returns
110
+ -------
111
+ ctab_lines : lines of str
112
+ The lines containing the *ctab*.
113
+ The lines begin with the *counts* line and end with the `M END`
114
+ line.
115
+
116
+ References
117
+ ----------
118
+
119
+ ``V3000`` specification was taken from
120
+ `<https://discover.3ds.com/sites/default/files/2020-08/biovia_ctfileformats_2020.pdf>`_.
121
+
122
+ .. footbibliography::
123
+
124
+ """
125
+ if isinstance(atoms, AtomArrayStack):
126
+ raise TypeError(
127
+ "An 'AtomArrayStack' was given, "
128
+ "but only a single model can be written"
129
+ )
130
+ if atoms.bonds is None:
131
+ raise BadStructureError("Input AtomArray has no associated BondList")
132
+ if np.isnan(atoms.coord).any():
133
+ raise BadStructureError("Input AtomArray has NaN coordinates")
134
+
135
+ match version:
136
+ case None:
137
+ if _is_v2000_compatible(
138
+ atoms.array_length(), atoms.bonds.get_bond_count()
139
+ ):
140
+ return _write_structure_to_ctab_v2000(atoms, default_bond_type)
141
+ else:
142
+ return _write_structure_to_ctab_v3000(atoms, default_bond_type)
143
+ case "V2000":
144
+ if not _is_v2000_compatible(
145
+ atoms.array_length(), atoms.bonds.get_bond_count()
146
+ ):
147
+ raise ValueError(
148
+ "The given number of atoms or bonds is too large "
149
+ "for V2000 format"
150
+ )
151
+ return _write_structure_to_ctab_v2000(atoms, default_bond_type)
152
+ case "V3000":
153
+ return _write_structure_to_ctab_v3000(atoms, default_bond_type)
154
+ case unkown_version:
155
+ raise ValueError(f"Unknown CTAB version '{unkown_version}'")
156
+
157
+
158
+ def _read_structure_from_ctab_v2000(ctab_lines):
159
+ n_atoms, n_bonds = _get_counts_v2000(ctab_lines[0])
160
+ atom_lines = ctab_lines[1 : 1 + n_atoms]
161
+ bond_lines = ctab_lines[1 + n_atoms : 1 + n_atoms + n_bonds]
162
+ charge_lines = [
163
+ line for line in ctab_lines[1 + n_atoms + n_bonds:]
164
+ if line.startswith("M CHG")
165
+ ]
166
+
167
+ atoms = AtomArray(n_atoms)
168
+ atoms.add_annotation("charge", int)
169
+ for i, line in enumerate(atom_lines):
170
+ atoms.coord[i, 0] = float(line[0:10])
171
+ atoms.coord[i, 1] = float(line[10:20])
172
+ atoms.coord[i, 2] = float(line[20:30])
173
+ atoms.element[i] = line[31:34].strip().upper()
174
+ # If one 'M CHG' entry is present,
175
+ # it supersedes all atom charges in the atom block
176
+ if not charge_lines:
177
+ charge = CHARGE_MAPPING.get(int(line[36:39]))
178
+ if charge is None:
179
+ warnings.warn(
180
+ f"Cannot handle MDL charge type {int(line[36 : 39])}, "
181
+ f"0 is used instead"
182
+ )
183
+ charge = 0
184
+ atoms.charge[i] = charge
185
+
186
+ for line in charge_lines:
187
+ # Remove 'M CHGnn8' prefix
188
+ line = line[9:]
189
+ # The lines contains atom index and charge alternatingly
190
+ for atom_i_str, charge_str in _batched(line.split(), 2):
191
+ atom_index = int(atom_i_str) - 1
192
+ charge = int(charge_str)
193
+ atoms.charge[atom_index] = charge
194
+
195
+ bond_array = np.zeros((n_bonds, 3), dtype=np.uint32)
196
+ for i, line in enumerate(bond_lines):
197
+ bond_type = BOND_TYPE_MAPPING.get(int(line[6:9]))
198
+ if bond_type is None:
199
+ warnings.warn(
200
+ f"Cannot handle MDL bond type {int(line[6 : 9])}, "
201
+ f"BondType.ANY is used instead"
202
+ )
203
+ bond_type = BondType.ANY
204
+ bond_array[i, 0] = int(line[0:3]) - 1
205
+ bond_array[i, 1] = int(line[3:6]) - 1
206
+ bond_array[i, 2] = bond_type
207
+ atoms.bonds = BondList(n_atoms, bond_array)
208
+
209
+ return atoms
210
+
211
+ def _read_structure_from_ctab_v3000(ctab_lines):
212
+ v30_lines = [
213
+ line[6:].strip() for line in ctab_lines if line.startswith("M V30")
214
+ ]
215
+
216
+ atom_lines = _get_block_v3000(v30_lines, "ATOM")
217
+ if len(atom_lines) == 0:
218
+ raise InvalidFileError("ATOM block is empty")
219
+ atoms = AtomArray(len(atom_lines))
220
+ atoms.add_annotation("charge", int)
221
+ # The V3000 atom index does not necessarily count from 1 to n,
222
+ # but allows arbitrary positive integers
223
+ # Hence, a mapping from V3000 atom index to AtomArray index is
224
+ # needed to get the correct index for a bond
225
+ v30_atom_indices = {}
226
+ for i, line in enumerate(atom_lines):
227
+ if "'" in line or '"' in line:
228
+ columns = shlex.split(line)
229
+ else:
230
+ columns = line.split()
231
+ v30_index = int(columns[0])
232
+ v30_type = columns[1]
233
+ if v30_type == "R#":
234
+ raise NotImplementedError("Rgroup atoms are not supported")
235
+ v30_coord = np.array(columns[2:5], dtype=float)
236
+ v30_properties = create_property_dict_v3000(columns[6:])
237
+
238
+ v30_atom_indices[v30_index] = i
239
+ atoms.coord[i] = v30_coord
240
+ atoms.element[i] = v30_type.upper()
241
+ atoms.charge[i] = int(v30_properties.get("CHG", 0))
242
+
243
+ bond_lines = _get_block_v3000(v30_lines, "BOND")
244
+ bond_array = np.zeros((len(bond_lines), 3), dtype=np.uint32)
245
+ for i, line in enumerate(bond_lines):
246
+ columns = line.split()
247
+ v30_type = int(columns[1])
248
+ v30_atom_index_1 = int(columns[2])
249
+ v30_atom_index_2 = int(columns[3])
250
+
251
+ bond_type = BOND_TYPE_MAPPING.get(v30_type)
252
+ if bond_type is None:
253
+ warnings.warn(
254
+ f"Cannot handle MDL bond type {v30_type}, "
255
+ f"BondType.ANY is used instead"
256
+ )
257
+ bond_type = BondType.ANY
258
+ bond_array[i, 0] = v30_atom_indices[v30_atom_index_1]
259
+ bond_array[i, 1] = v30_atom_indices[v30_atom_index_2]
260
+ bond_array[i, 2] = bond_type
261
+ atoms.bonds = BondList(atoms.array_length(), bond_array)
262
+
263
+ return atoms
264
+
265
+ def _get_version(counts_line):
266
+ return counts_line[33:39].strip()
267
+
268
+ def _is_v2000_compatible(n_atoms, n_bonds):
269
+ # The format uses a maximum of 3 digits for the atom and bond count
270
+ return n_atoms < 1000 and n_bonds < 1000
271
+
272
+ def _get_counts_v2000(counts_line):
273
+ return int(counts_line[0:3]), int(counts_line[3:6])
274
+
275
+ def _get_block_v3000(v30_lines, block_name):
276
+ block_lines = []
277
+ in_block = False
278
+ for line in v30_lines:
279
+ if line.startswith(f"BEGIN {block_name}"):
280
+ in_block = True
281
+ elif line.startswith(f"END {block_name}"):
282
+ if in_block:
283
+ return block_lines
284
+ else:
285
+ raise InvalidFileError(
286
+ f"Block '{block_name}' ended before it began"
287
+ )
288
+ elif in_block:
289
+ block_lines.append(line)
290
+ return block_lines
291
+
292
+ def create_property_dict_v3000(property_strings):
293
+ properties = {}
294
+ for prop in property_strings:
295
+ key, value = prop.split("=")
296
+ properties[key] = value
297
+ return properties
298
+
299
+
300
+ def _write_structure_to_ctab_v2000(atoms, default_bond_type):
301
+ try:
302
+ charge = atoms.charge
303
+ except AttributeError:
304
+ charge = np.zeros(atoms.array_length(), dtype=int)
305
+
306
+ counts_line = (
307
+ f"{atoms.array_length():>3d}{atoms.bonds.get_bond_count():>3d}"
308
+ " 0 0 0 0 0 0 0 1 V2000"
309
+ )
310
+
311
+ atom_lines = [
312
+ f"{atoms.coord[i,0]:>10.4f}"
313
+ f"{atoms.coord[i,1]:>10.4f}"
314
+ f"{atoms.coord[i,2]:>10.4f}"
315
+ f" {atoms.element[i].capitalize():3}"
316
+ f"{0:>2}" # Mass difference -> unused
317
+ f"{CHARGE_MAPPING_REV.get(charge[i], 0):>3d}"
318
+ + f"{0:>3d}" * 10 # More unused fields
319
+ for i in range(atoms.array_length())
320
+ ]
321
+
322
+ default_bond_value = BOND_TYPE_MAPPING_REV[default_bond_type]
323
+ bond_lines = [
324
+ f"{i+1:>3d}{j+1:>3d}"
325
+ f"{BOND_TYPE_MAPPING_REV.get(bond_type, default_bond_value):>3d}"
326
+ + f"{0:>3d}" * 4
327
+ for i, j, bond_type in atoms.bonds.as_array()
328
+ ]
329
+
330
+ # V2000 files introduce charge annotations in the property block
331
+ # They define the charge literally (without mapping)
332
+ charge_lines = []
333
+ # Each `M CHG` line can contain up to 8 charges
334
+ for batch in _batched(
335
+ [(atom_i, c) for atom_i, c in enumerate(charge) if c != 0],
336
+ N_CHARGES_PER_LINE
337
+ ):
338
+ charge_lines.append(
339
+ f"M CHG{len(batch):>3d}"
340
+ + "".join(f" {atom_i+1:>3d} {c:>3d}" for atom_i, c in batch)
341
+ )
342
+
343
+ return [counts_line] + atom_lines + bond_lines + charge_lines + ["M END"]
344
+
345
+
346
+ def _write_structure_to_ctab_v3000(atoms, default_bond_type):
347
+ try:
348
+ charges = atoms.charge
349
+ except AttributeError:
350
+ charges = np.zeros(atoms.array_length(), dtype=int)
351
+
352
+ counts_line = (
353
+ f"COUNTS {atoms.array_length()} {atoms.bonds.get_bond_count()} 0 0 0"
354
+ )
355
+
356
+ atom_lines = [
357
+ f"{i + 1}"
358
+ f" {_quote(atoms.element[i].capitalize())}"
359
+ f" {atoms.coord[i,0]:.4f}"
360
+ f" {atoms.coord[i,1]:.4f}"
361
+ f" {atoms.coord[i,2]:.4f}"
362
+ # 'aamap' is unused
363
+ f" 0"
364
+ f" {_to_property(charges[i])}"
365
+ for i in range(atoms.array_length())
366
+ ]
367
+
368
+ default_bond_value = BOND_TYPE_MAPPING_REV[default_bond_type]
369
+ bond_lines = [
370
+ f"{k + 1}"
371
+ f" {BOND_TYPE_MAPPING_REV.get(bond_type, default_bond_value)}"
372
+ f" {i + 1}"
373
+ f" {j + 1}"
374
+ for k, (i, j, bond_type) in enumerate(atoms.bonds.as_array())
375
+ ]
376
+
377
+ lines = (
378
+ ["BEGIN CTAB"] +
379
+ [counts_line] +
380
+ ["BEGIN ATOM"] +
381
+ atom_lines +
382
+ ["END ATOM"] +
383
+ ["BEGIN BOND"] +
384
+ bond_lines +
385
+ ["END BOND"] +
386
+ ["END CTAB"]
387
+ )
388
+ # Mark lines as V3000 CTAB
389
+ lines = ["M V30 " + line for line in lines]
390
+ return [V2000_COMPATIBILITY_LINE] + lines + ["M END"]
391
+
392
+ def _to_property(charge):
393
+ if charge == 0:
394
+ return ""
395
+ else:
396
+ return f"CHG={charge}"
397
+
398
+ def _quote(string):
399
+ if " " in string or len(string) == 0:
400
+ return f'"{string}"'
401
+ else:
402
+ return string
403
+
404
+ def _batched(iterable, n):
405
+ """
406
+ Equivalent to :func:`itertools.batched()`.
407
+
408
+ However, :func:`itertools.batched()` is available since Python 3.12.
409
+ This function can be removed when the minimum supported Python
410
+ version is 3.12.
411
+ """
412
+ iterator = iter(iterable)
413
+ while batch := tuple(itertools.islice(iterator, n)):
414
+ yield batch
@@ -0,0 +1,116 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.structure.io.mol"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["Header"]
8
+
9
+ import warnings
10
+ import datetime
11
+ from dataclasses import dataclass
12
+
13
+
14
+ _DATE_FORMAT = "%m%d%y%H%M"
15
+
16
+
17
+ @dataclass
18
+ class Header():
19
+ """
20
+ The header for connection tables.
21
+
22
+ Parameters
23
+ ----------
24
+ mol_name : str, optional
25
+ The name of the molecule.
26
+ initials : str, optional
27
+ The author's initials. Maximum length is 2.
28
+ program : str, optional
29
+ The program name. Maximum length is 8.
30
+ time : datetime or date, optional
31
+ The time of file creation.
32
+ dimensions : str, optional
33
+ Dimensional codes. Maximum length is 2.
34
+ scaling_factors : str, optional
35
+ Scaling factors. Maximum length is 12.
36
+ energy : str, optional
37
+ Energy from modeling program. Maximum length is 12.
38
+ registry_number : str, optional
39
+ MDL registry number. Maximum length is 6.
40
+ comments : str, optional
41
+ Additional comments.
42
+
43
+ Attributes
44
+ ----------
45
+ mol_name, initials, program, time, dimensions, scaling_factors, energy, registry_number, comments
46
+ Same as the parameters.
47
+ """
48
+
49
+ mol_name: ... = ""
50
+ initials: ... = ""
51
+ program: ... = ""
52
+ time: ... = None
53
+ dimensions: ... = ""
54
+ scaling_factors: ... = ""
55
+ energy: ... = ""
56
+ registry_number: ... = ""
57
+ comments: ... = ""
58
+
59
+ @staticmethod
60
+ def deserialize(text):
61
+ lines = text.splitlines()
62
+
63
+ mol_name = lines[0].strip()
64
+ initials = lines[1][0:2].strip()
65
+ program = lines[1][2:10].strip()
66
+ time_string = lines[1][10:20]
67
+ if time_string.strip() == "":
68
+ time = None
69
+ else:
70
+ try:
71
+ time = datetime.datetime.strptime(time_string, _DATE_FORMAT)
72
+ except ValueError:
73
+ warnings.warn(
74
+ f"Invalid time format '{time_string}' in file header"
75
+ )
76
+ time = None
77
+ dimensions = lines[1][20:22].strip()
78
+ scaling_factors = lines[1][22:34].strip()
79
+ energy = lines[1][34:46].strip()
80
+ registry_number = lines[1][46:52].strip()
81
+
82
+ comments = lines[2].strip()
83
+
84
+ return Header(
85
+ mol_name, initials, program, time, dimensions,
86
+ scaling_factors, energy, registry_number, comments
87
+ )
88
+
89
+ def serialize(self):
90
+ text = ""
91
+
92
+ if self.time is None:
93
+ time_str = ""
94
+ else:
95
+ time_str = self.time.strftime(_DATE_FORMAT)
96
+
97
+ if len(self.mol_name) > 80:
98
+ raise ValueError("Molecule name must not exceed 80 characters")
99
+ text += str(self.mol_name) + "\n"
100
+ # Fixed columns -> minimum and maximum length is the same
101
+ # Shorter values are padded, longer values are truncated
102
+ text += (
103
+ f"{self.initials:>2.2}"
104
+ f"{self.program:>8.8}"
105
+ f"{time_str:>10.10}"
106
+ f"{self.dimensions:>2.2}"
107
+ f"{self.scaling_factors:>12.12}"
108
+ f"{self.energy:>12.12}"
109
+ f"{self.registry_number:>6.6}"
110
+ "\n"
111
+ )
112
+ text += str(self.comments) + "\n"
113
+ return text
114
+
115
+ def __str__(self):
116
+ return self.serialize()