molSimplify 1.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (651) hide show
  1. docs/source/conf.py +224 -0
  2. molSimplify/Classes/__init__.py +6 -0
  3. molSimplify/Classes/atom3D.py +235 -0
  4. molSimplify/Classes/dft_obs.py +130 -0
  5. molSimplify/Classes/globalvars.py +827 -0
  6. molSimplify/Classes/helpers.py +161 -0
  7. molSimplify/Classes/ligand.py +2330 -0
  8. molSimplify/Classes/mGUI.py +2493 -0
  9. molSimplify/Classes/mWidgets.py +438 -0
  10. molSimplify/Classes/miniGUI.py +41 -0
  11. molSimplify/Classes/mol2D.py +260 -0
  12. molSimplify/Classes/mol3D.py +5846 -0
  13. molSimplify/Classes/monomer3D.py +253 -0
  14. molSimplify/Classes/partialcharges.py +226 -0
  15. molSimplify/Classes/protein3D.py +1178 -0
  16. molSimplify/Classes/rundiag.py +151 -0
  17. molSimplify/Data/ML.dat +212 -0
  18. molSimplify/Data/MLS_FSR_for_inter.dat +23 -0
  19. molSimplify/Data/MLS_FSR_for_inter2.dat +23 -0
  20. molSimplify/Data/MLS_angle_for_click.dat +8 -0
  21. molSimplify/Data/MLS_angle_for_inter.dat +23 -0
  22. molSimplify/Data/MLS_angle_for_inter2.dat +48 -0
  23. molSimplify/Data/MLS_angle_for_intra.dat +10 -0
  24. molSimplify/Data/MLS_angle_for_intra2.dat +6 -0
  25. molSimplify/Data/MLS_angle_for_oa.dat +18 -0
  26. molSimplify/Data/ML_FSR_for_inter.dat +112 -0
  27. molSimplify/Data/ML_FSR_for_inter2.dat +110 -0
  28. molSimplify/Data/ML_bond_for_cat.dat +8 -0
  29. molSimplify/Data/ML_bond_for_click.dat +8 -0
  30. molSimplify/Data/ML_bond_for_inter.dat +48 -0
  31. molSimplify/Data/ML_bond_for_inter2.dat +48 -0
  32. molSimplify/Data/ML_bond_for_intra.dat +10 -0
  33. molSimplify/Data/ML_bond_for_intra2.dat +6 -0
  34. molSimplify/Data/ML_bond_for_oa.dat +18 -0
  35. molSimplify/Data/bp1.dat +21 -0
  36. molSimplify/Data/li.dat +3 -0
  37. molSimplify/Data/no.dat +2 -0
  38. molSimplify/Data/oct.dat +7 -0
  39. molSimplify/Data/pbp.dat +8 -0
  40. molSimplify/Data/spy.dat +6 -0
  41. molSimplify/Data/sqap.dat +9 -0
  42. molSimplify/Data/sqp.dat +5 -0
  43. molSimplify/Data/tbp.dat +6 -0
  44. molSimplify/Data/tdhd.dat +9 -0
  45. molSimplify/Data/thd.dat +5 -0
  46. molSimplify/Data/tpl.dat +4 -0
  47. molSimplify/Data/tpr.dat +7 -0
  48. molSimplify/Informatics/HFXsensitivity/__init__.py +0 -0
  49. molSimplify/Informatics/HFXsensitivity/measure_HFX_sensitivity_oxo_hat_reb_rel.py +443 -0
  50. molSimplify/Informatics/HFXsensitivity/measure_HFX_stable.py +346 -0
  51. molSimplify/Informatics/MOF/Linker_rotation.py +179 -0
  52. molSimplify/Informatics/MOF/MOF_descriptors.py +1299 -0
  53. molSimplify/Informatics/MOF/MOF_descriptors_alternate_functional.py +589 -0
  54. molSimplify/Informatics/MOF/MOF_functionalizer.py +1648 -0
  55. molSimplify/Informatics/MOF/PBC_functions.py +1347 -0
  56. molSimplify/Informatics/MOF/__init__.py +0 -0
  57. molSimplify/Informatics/MOF/atomic.py +267 -0
  58. molSimplify/Informatics/MOF/cluster_extraction.py +388 -0
  59. molSimplify/Informatics/MOF/fragment_MOFs_for_pormake.py +895 -0
  60. molSimplify/Informatics/MOF/monofunctionalized_BDC/index_information.py +10 -0
  61. molSimplify/Informatics/Mol2Parser.py +46 -0
  62. molSimplify/Informatics/RACassemble.py +408 -0
  63. molSimplify/Informatics/__init__.py +0 -0
  64. molSimplify/Informatics/active_learning/__init__.py +0 -0
  65. molSimplify/Informatics/active_learning/expected_improvement.py +269 -0
  66. molSimplify/Informatics/autocorrelation.py +1930 -0
  67. molSimplify/Informatics/clean_autocorrelation.py +778 -0
  68. molSimplify/Informatics/coulomb_analyze.py +67 -0
  69. molSimplify/Informatics/decoration_manager.py +193 -0
  70. molSimplify/Informatics/geo_analyze.py +88 -0
  71. molSimplify/Informatics/geometrics.py +56 -0
  72. molSimplify/Informatics/graph_analyze.py +163 -0
  73. molSimplify/Informatics/graph_racs.py +288 -0
  74. molSimplify/Informatics/jupyter_vis.py +172 -0
  75. molSimplify/Informatics/lacRACAssemble.py +2192 -0
  76. molSimplify/Informatics/lacRACAssemble_bisdithiolenes.py +236 -0
  77. molSimplify/Informatics/misc_descriptors.py +198 -0
  78. molSimplify/Informatics/organic_fingerprints.py +61 -0
  79. molSimplify/Informatics/partialcharges.py +345 -0
  80. molSimplify/Informatics/protein/activesite.py +53 -0
  81. molSimplify/Informatics/protein/pymol_add_hs.py +33 -0
  82. molSimplify/Informatics/rac155_geo.py +48 -0
  83. molSimplify/Ligands/(1_methylbenzimidazol_2_yl)pyridine.xyz +45 -0
  84. molSimplify/Ligands/1-4-dimethyl-1-2-3-triazole.xyz +15 -0
  85. molSimplify/Ligands/12crown4.mol +62 -0
  86. molSimplify/Ligands/Antipyrine.mol +58 -0
  87. molSimplify/Ligands/BPAbipy.mol +106 -0
  88. molSimplify/Ligands/Hpyrrole.mol +26 -0
  89. molSimplify/Ligands/N-quinolinylbutyramidate.xyz +31 -0
  90. molSimplify/Ligands/N-quinolinylmethylmethinylacetamidate.xyz +30 -0
  91. molSimplify/Ligands/NMe2_-1.xyz +11 -0
  92. molSimplify/Ligands/PCy3.mol +111 -0
  93. molSimplify/Ligands/PMe3.xyz +15 -0
  94. molSimplify/Ligands/PPh3.mol +76 -0
  95. molSimplify/Ligands/Propyphenazone.mol +77 -0
  96. molSimplify/Ligands/acac.mol +33 -0
  97. molSimplify/Ligands/acacen.mol +76 -0
  98. molSimplify/Ligands/acetate.smi +1 -0
  99. molSimplify/Ligands/acetate.xyz +9 -0
  100. molSimplify/Ligands/aceticacidbipyridine.mol +70 -0
  101. molSimplify/Ligands/acetonitrile.mol +17 -0
  102. molSimplify/Ligands/alanine.mol +30 -0
  103. molSimplify/Ligands/alphabetizer.py +21 -0
  104. molSimplify/Ligands/amine.mol +11 -0
  105. molSimplify/Ligands/ammonia.mol +12 -0
  106. molSimplify/Ligands/arginine.mol +58 -0
  107. molSimplify/Ligands/asparagine.mol +38 -0
  108. molSimplify/Ligands/aspartic_acid.mol +35 -0
  109. molSimplify/Ligands/azide.mol +11 -0
  110. molSimplify/Ligands/benzene.mol +28 -0
  111. molSimplify/Ligands/benzene_pi.mol +30 -0
  112. molSimplify/Ligands/benzenedithiol.mol +30 -0
  113. molSimplify/Ligands/benzenethiol.mol +30 -0
  114. molSimplify/Ligands/benzylisocy.mol +38 -0
  115. molSimplify/Ligands/bidiazine.mol +42 -0
  116. molSimplify/Ligands/bidiazole.mol +38 -0
  117. molSimplify/Ligands/bifuran.mol +38 -0
  118. molSimplify/Ligands/bihydrodiazine.mol +58 -0
  119. molSimplify/Ligands/bihydrodiazole.mol +46 -0
  120. molSimplify/Ligands/bihydrooxazine.mol +54 -0
  121. molSimplify/Ligands/bihydrooxazole.mol +42 -0
  122. molSimplify/Ligands/bihydrothiazine.mol +54 -0
  123. molSimplify/Ligands/bihydrothiazole.mol +42 -0
  124. molSimplify/Ligands/biimidazole.mol +38 -0
  125. molSimplify/Ligands/bioxazole.mol +34 -0
  126. molSimplify/Ligands/bipy.mol +46 -0
  127. molSimplify/Ligands/bipyrazine.xyz +20 -0
  128. molSimplify/Ligands/bipyrimidine.mol +42 -0
  129. molSimplify/Ligands/bipyrrole.mol +42 -0
  130. molSimplify/Ligands/bisnapthyridylpyridine.mol +111 -0
  131. molSimplify/Ligands/bithiazole.mol +34 -0
  132. molSimplify/Ligands/bromide.mol +7 -0
  133. molSimplify/Ligands/bromide.smi +1 -0
  134. molSimplify/Ligands/c2.mol +9 -0
  135. molSimplify/Ligands/caprolactone.mol +41 -0
  136. molSimplify/Ligands/carbonyl.mol +8 -0
  137. molSimplify/Ligands/carboxyl.mol +13 -0
  138. molSimplify/Ligands/cat.mol +30 -0
  139. molSimplify/Ligands/chloride.mol +7 -0
  140. molSimplify/Ligands/chloride.smi +1 -0
  141. molSimplify/Ligands/chloropyridine.mol +27 -0
  142. molSimplify/Ligands/co2.mol +10 -0
  143. molSimplify/Ligands/corrolazine.mol +72 -0
  144. molSimplify/Ligands/cs.mol +8 -0
  145. molSimplify/Ligands/cyanate.xyz +5 -0
  146. molSimplify/Ligands/cyanide.mol +9 -0
  147. molSimplify/Ligands/cyanoaceticporphyrin.mol +114 -0
  148. molSimplify/Ligands/cyanopyridine.mol +29 -0
  149. molSimplify/Ligands/cyclam.mol +81 -0
  150. molSimplify/Ligands/cyclen.mol +69 -0
  151. molSimplify/Ligands/cyclopentadienyl.mol +26 -0
  152. molSimplify/Ligands/cysteine.mol +32 -0
  153. molSimplify/Ligands/diaminomethyl.mol +19 -0
  154. molSimplify/Ligands/diazine.mol +25 -0
  155. molSimplify/Ligands/diazole.mol +23 -0
  156. molSimplify/Ligands/dicyanamide.mol +15 -0
  157. molSimplify/Ligands/dihydrofuran.mol +27 -0
  158. molSimplify/Ligands/dmap.xyz +35 -0
  159. molSimplify/Ligands/dmf.mol +28 -0
  160. molSimplify/Ligands/dmi.mol +41 -0
  161. molSimplify/Ligands/dmpe.mol +52 -0
  162. molSimplify/Ligands/dpmu.mol +47 -0
  163. molSimplify/Ligands/dppe.mol +112 -0
  164. molSimplify/Ligands/edta.mol +69 -0
  165. molSimplify/Ligands/en.mol +28 -0
  166. molSimplify/Ligands/ethanethiol.mol +21 -0
  167. molSimplify/Ligands/ethanolamine.mol +26 -0
  168. molSimplify/Ligands/ethbipy.mol +70 -0
  169. molSimplify/Ligands/ethyl.mol +19 -0
  170. molSimplify/Ligands/ethylamine.mol +24 -0
  171. molSimplify/Ligands/ethylene.mol +16 -0
  172. molSimplify/Ligands/ethylesteracac.mol +57 -0
  173. molSimplify/Ligands/fluoride.mol +7 -0
  174. molSimplify/Ligands/fluoride.smi +1 -0
  175. molSimplify/Ligands/formaldehyde.mol +12 -0
  176. molSimplify/Ligands/formamidate.xyz +8 -0
  177. molSimplify/Ligands/formate.xyz +6 -0
  178. molSimplify/Ligands/furan.mol +23 -0
  179. molSimplify/Ligands/glutamic_acid.mol +42 -0
  180. molSimplify/Ligands/glutamine.mol +44 -0
  181. molSimplify/Ligands/glycinate.mol +23 -0
  182. molSimplify/Ligands/glycine.mol +24 -0
  183. molSimplify/Ligands/h2s.mol +10 -0
  184. molSimplify/Ligands/helium.mol +6 -0
  185. molSimplify/Ligands/histidine.mol +45 -0
  186. molSimplify/Ligands/hmpa.mol +62 -0
  187. molSimplify/Ligands/hs-.mol +9 -0
  188. molSimplify/Ligands/hydride.mol +7 -0
  189. molSimplify/Ligands/hydrocarboxyacetylide.xyz +8 -0
  190. molSimplify/Ligands/hydrocyanide.mol +10 -0
  191. molSimplify/Ligands/hydrodiazine.mol +33 -0
  192. molSimplify/Ligands/hydrodiazole.mol +27 -0
  193. molSimplify/Ligands/hydrogensulfide.mol +10 -0
  194. molSimplify/Ligands/hydroisocyanide.mol +11 -0
  195. molSimplify/Ligands/hydrooxazine.mol +31 -0
  196. molSimplify/Ligands/hydrooxazole.mol +25 -0
  197. molSimplify/Ligands/hydrothiazine.mol +31 -0
  198. molSimplify/Ligands/hydrothiazole.mol +25 -0
  199. molSimplify/Ligands/hydroxyl.mol +9 -0
  200. molSimplify/Ligands/imidazole.mol +23 -0
  201. molSimplify/Ligands/imidazolidinone.mol +29 -0
  202. molSimplify/Ligands/imine.mol +13 -0
  203. molSimplify/Ligands/iminodiacetic.mol +33 -0
  204. molSimplify/Ligands/iodide.mol +7 -0
  205. molSimplify/Ligands/iodobenzene.xyz +14 -0
  206. molSimplify/Ligands/isoleucine.mol +48 -0
  207. molSimplify/Ligands/isothiocyanate.mol +11 -0
  208. molSimplify/Ligands/leucine.mol +48 -0
  209. molSimplify/Ligands/ligands.dict +257 -0
  210. molSimplify/Ligands/lysine.mol +54 -0
  211. molSimplify/Ligands/mebenzenedithiol.mol +36 -0
  212. molSimplify/Ligands/mebim_py.xyz +29 -0
  213. molSimplify/Ligands/mebim_pz.xyz +28 -0
  214. molSimplify/Ligands/mebipy.mol +58 -0
  215. molSimplify/Ligands/mecat.mol +36 -0
  216. molSimplify/Ligands/methanal.mol +11 -0
  217. molSimplify/Ligands/methanethiol.mol +15 -0
  218. molSimplify/Ligands/methanol.mol +16 -0
  219. molSimplify/Ligands/methionine.mol +44 -0
  220. molSimplify/Ligands/methyl.mol +13 -0
  221. molSimplify/Ligands/methylacetylide.xyz +8 -0
  222. molSimplify/Ligands/methylamine.mol +19 -0
  223. molSimplify/Ligands/methylazide.xyz +9 -0
  224. molSimplify/Ligands/methylisocy.mol +17 -0
  225. molSimplify/Ligands/methylpyridine.mol +33 -0
  226. molSimplify/Ligands/n2.mol +8 -0
  227. molSimplify/Ligands/n4py.xyz +51 -0
  228. molSimplify/Ligands/nch.mol +10 -0
  229. molSimplify/Ligands/nco-.mol +11 -0
  230. molSimplify/Ligands/nethanolamine.mol +26 -0
  231. molSimplify/Ligands/nitrate.mol +14 -0
  232. molSimplify/Ligands/nitrite.mol +11 -0
  233. molSimplify/Ligands/nitro.mol +11 -0
  234. molSimplify/Ligands/nitrobipy.mol +54 -0
  235. molSimplify/Ligands/nitroso.mol +8 -0
  236. molSimplify/Ligands/nme3.mol +30 -0
  237. molSimplify/Ligands/no-.mol +10 -0
  238. molSimplify/Ligands/no2-.mol +11 -0
  239. molSimplify/Ligands/noxygen.mol +8 -0
  240. molSimplify/Ligands/ns-.mol +10 -0
  241. molSimplify/Ligands/o-pyridylbenzene.xyz +23 -0
  242. molSimplify/Ligands/o-pyridylphenylanion.xyz +22 -0
  243. molSimplify/Ligands/o2-.mol +9 -0
  244. molSimplify/Ligands/o2.xyz +4 -0
  245. molSimplify/Ligands/och2.mol +12 -0
  246. molSimplify/Ligands/oethanolamine.mol +26 -0
  247. molSimplify/Ligands/ome2.mol +22 -0
  248. molSimplify/Ligands/ooh.xyz +5 -0
  249. molSimplify/Ligands/oxalate.mol +17 -0
  250. molSimplify/Ligands/oxalate.smi +1 -0
  251. molSimplify/Ligands/oxygen.mol +7 -0
  252. molSimplify/Ligands/pentacyanocyclopentadienide.mol +36 -0
  253. molSimplify/Ligands/ph2-.mol +11 -0
  254. molSimplify/Ligands/ph3.mol +12 -0
  255. molSimplify/Ligands/phen.mol +51 -0
  256. molSimplify/Ligands/phenacac.mol +63 -0
  257. molSimplify/Ligands/phenalalanine.mol +51 -0
  258. molSimplify/Ligands/phendione.mol +51 -0
  259. molSimplify/Ligands/phenphen.mol +75 -0
  260. molSimplify/Ligands/phenylbenzoxazole.mol +54 -0
  261. molSimplify/Ligands/phenylcyc.mol +99 -0
  262. molSimplify/Ligands/phenylenediamine.mol +37 -0
  263. molSimplify/Ligands/phenylisocy.mol +32 -0
  264. molSimplify/Ligands/phosacidbipy.mol +66 -0
  265. molSimplify/Ligands/phosphine.mol +13 -0
  266. molSimplify/Ligands/phosphorine.mol +27 -0
  267. molSimplify/Ligands/phosphorustrifluoride.mol +12 -0
  268. molSimplify/Ligands/phthalocyanine.mol +126 -0
  269. molSimplify/Ligands/pme3o.mol +32 -0
  270. molSimplify/Ligands/porphyrin.mol +82 -0
  271. molSimplify/Ligands/pph3o.mol +77 -0
  272. molSimplify/Ligands/proline.mol +39 -0
  273. molSimplify/Ligands/propdiol.mol +21 -0
  274. molSimplify/Ligands/propylene.mol +23 -0
  275. molSimplify/Ligands/pyridine.mol +27 -0
  276. molSimplify/Ligands/pyrimidone.mol +27 -0
  277. molSimplify/Ligands/pyrrole.mol +24 -0
  278. molSimplify/Ligands/quinoxalinedithiol.mol +39 -0
  279. molSimplify/Ligands/s2-.mol +9 -0
  280. molSimplify/Ligands/salen.mol +75 -0
  281. molSimplify/Ligands/salphen.mol +84 -0
  282. molSimplify/Ligands/serine.mol +32 -0
  283. molSimplify/Ligands/simple_ligands.dict +14 -0
  284. molSimplify/Ligands/sulfacidbipy.mol +63 -0
  285. molSimplify/Ligands/tbucat.mol +54 -0
  286. molSimplify/Ligands/tbuphisocy.mol +56 -0
  287. molSimplify/Ligands/tbutylcyclen.mol +166 -0
  288. molSimplify/Ligands/tbutylisocy.mol +35 -0
  289. molSimplify/Ligands/tbutylthiol.mol +33 -0
  290. molSimplify/Ligands/tcnoet.mol +43 -0
  291. molSimplify/Ligands/tcnoetOH.mol +45 -0
  292. molSimplify/Ligands/terpy.mol +65 -0
  293. molSimplify/Ligands/tetrahydrofuran.mol +31 -0
  294. molSimplify/Ligands/thiane.mol +37 -0
  295. molSimplify/Ligands/thiazole.mol +21 -0
  296. molSimplify/Ligands/thiocyanate.mol +11 -0
  297. molSimplify/Ligands/thiol.mol +9 -0
  298. molSimplify/Ligands/thiophene.mol +23 -0
  299. molSimplify/Ligands/thiopyridine.mol +29 -0
  300. molSimplify/Ligands/threonine.mol +38 -0
  301. molSimplify/Ligands/tpp.mol +165 -0
  302. molSimplify/Ligands/tricyanomethyl.mol +19 -0
  303. molSimplify/Ligands/trifluoromethyl.mol +13 -0
  304. molSimplify/Ligands/tryptophan.mol +60 -0
  305. molSimplify/Ligands/tyrosine.mol +53 -0
  306. molSimplify/Ligands/uthiol.mol +11 -0
  307. molSimplify/Ligands/uthiolme2.mol +23 -0
  308. molSimplify/Ligands/valine.mol +42 -0
  309. molSimplify/Ligands/water.mol +10 -0
  310. molSimplify/Ligands/x.mol +6 -0
  311. molSimplify/Scripts/__init__.py +0 -0
  312. molSimplify/Scripts/addtodb.py +308 -0
  313. molSimplify/Scripts/cellbuilder.py +1592 -0
  314. molSimplify/Scripts/cellbuilder_tools.py +701 -0
  315. molSimplify/Scripts/chains.py +342 -0
  316. molSimplify/Scripts/convert_2to3.py +23 -0
  317. molSimplify/Scripts/dbinteract.py +631 -0
  318. molSimplify/Scripts/distgeom.py +617 -0
  319. molSimplify/Scripts/findcorrelations.py +287 -0
  320. molSimplify/Scripts/generator.py +267 -0
  321. molSimplify/Scripts/geometry.py +1224 -0
  322. molSimplify/Scripts/grabguivars.py +845 -0
  323. molSimplify/Scripts/in_b3lyp_usetc.py +141 -0
  324. molSimplify/Scripts/inparse.py +1673 -0
  325. molSimplify/Scripts/io.py +1149 -0
  326. molSimplify/Scripts/isomers.py +415 -0
  327. molSimplify/Scripts/jobgen.py +247 -0
  328. molSimplify/Scripts/krr_prep.py +1262 -0
  329. molSimplify/Scripts/molSimplify_io.py +18 -0
  330. molSimplify/Scripts/molden2psi4wfn.py +166 -0
  331. molSimplify/Scripts/namegen.py +32 -0
  332. molSimplify/Scripts/nn_prep.py +561 -0
  333. molSimplify/Scripts/oct_check_mols.py +782 -0
  334. molSimplify/Scripts/periodic_QE.py +97 -0
  335. molSimplify/Scripts/postmold.py +304 -0
  336. molSimplify/Scripts/postmwfn.py +709 -0
  337. molSimplify/Scripts/postparse.py +488 -0
  338. molSimplify/Scripts/postproc.py +139 -0
  339. molSimplify/Scripts/qcgen.py +1450 -0
  340. molSimplify/Scripts/rmsd.py +489 -0
  341. molSimplify/Scripts/rungen.py +670 -0
  342. molSimplify/Scripts/structgen.py +3040 -0
  343. molSimplify/Scripts/tf_nn_prep.py +894 -0
  344. molSimplify/Scripts/tsgen.py +295 -0
  345. molSimplify/Scripts/uq_calibration.py +69 -0
  346. molSimplify/__init__.py +0 -0
  347. molSimplify/__main__.py +197 -0
  348. molSimplify/icons/chemdb.png +0 -0
  349. molSimplify/icons/hjklogo.png +0 -0
  350. molSimplify/icons/icon.png +0 -0
  351. molSimplify/icons/logo.png +0 -0
  352. molSimplify/icons/logo_old.png +0 -0
  353. molSimplify/icons/petachem.png +0 -0
  354. molSimplify/icons/petachem2.png +0 -0
  355. molSimplify/icons/petachem_full.png +0 -0
  356. molSimplify/icons/pythonlogo.png +0 -0
  357. molSimplify/icons/sge copy.png +0 -0
  358. molSimplify/icons/sge.png +0 -0
  359. molSimplify/icons/slurm.png +0 -0
  360. molSimplify/icons/wft1.png +0 -0
  361. molSimplify/icons/wft2.png +0 -0
  362. molSimplify/icons/wft3.png +0 -0
  363. molSimplify/ml/__init__.py +0 -0
  364. molSimplify/ml/kernels.py +36 -0
  365. molSimplify/ml/layers.py +29 -0
  366. molSimplify/molscontrol/__init__.py +14 -0
  367. molSimplify/molscontrol/_version.py +521 -0
  368. molSimplify/molscontrol/clf_tools.py +144 -0
  369. molSimplify/molscontrol/data/README.md +21 -0
  370. molSimplify/molscontrol/data/look_and_say.dat +15 -0
  371. molSimplify/molscontrol/dynamic_classifier.py +514 -0
  372. molSimplify/molscontrol/io_tools.py +363 -0
  373. molSimplify/molscontrol/molscontrol.py +49 -0
  374. molSimplify/molscontrol/terachem/jobscript_control.sh +31 -0
  375. molSimplify/molscontrol/terachem/terachem_input +22 -0
  376. molSimplify/python_krr/X_train_TS.csv +535 -0
  377. molSimplify/python_krr/__init__.py +0 -0
  378. molSimplify/python_krr/hat2_X_mean_std.csv +3 -0
  379. molSimplify/python_krr/hat2_feature_names.csv +1 -0
  380. molSimplify/python_krr/hat2_y_mean_std.csv +2 -0
  381. molSimplify/python_krr/hat_X_mean_std.csv +6 -0
  382. molSimplify/python_krr/hat_feature_names.csv +1 -0
  383. molSimplify/python_krr/hat_krr_X_train.csv +5205 -0
  384. molSimplify/python_krr/hat_krr_dual_coef.csv +1 -0
  385. molSimplify/python_krr/hat_y_mean_std.csv +2 -0
  386. molSimplify/python_krr/sklearn_models.py +34 -0
  387. molSimplify/python_krr/y_train_TS.csv +535 -0
  388. molSimplify/python_nn/ANN.py +198 -0
  389. molSimplify/python_nn/__init__.py +0 -0
  390. molSimplify/python_nn/clf_analysis_tool.py +125 -0
  391. molSimplify/python_nn/dictionary_toolbox.py +49 -0
  392. molSimplify/python_nn/ensemble_test.py +309 -0
  393. molSimplify/python_nn/hs_center.csv +26 -0
  394. molSimplify/python_nn/hs_scale.csv +26 -0
  395. molSimplify/python_nn/ls_center.csv +26 -0
  396. molSimplify/python_nn/ls_scale.csv +26 -0
  397. molSimplify/python_nn/ms_hs_b1.csv +50 -0
  398. molSimplify/python_nn/ms_hs_b2.csv +50 -0
  399. molSimplify/python_nn/ms_hs_b3.csv +1 -0
  400. molSimplify/python_nn/ms_hs_w1.csv +50 -0
  401. molSimplify/python_nn/ms_hs_w2.csv +50 -0
  402. molSimplify/python_nn/ms_hs_w3.csv +1 -0
  403. molSimplify/python_nn/ms_ls_b1.csv +50 -0
  404. molSimplify/python_nn/ms_ls_b2.csv +50 -0
  405. molSimplify/python_nn/ms_ls_b3.csv +1 -0
  406. molSimplify/python_nn/ms_ls_w1.csv +50 -0
  407. molSimplify/python_nn/ms_ls_w2.csv +50 -0
  408. molSimplify/python_nn/ms_ls_w3.csv +1 -0
  409. molSimplify/python_nn/ms_slope_b1.csv +50 -0
  410. molSimplify/python_nn/ms_slope_b2.csv +50 -0
  411. molSimplify/python_nn/ms_slope_b3.csv +1 -0
  412. molSimplify/python_nn/ms_slope_w1.csv +50 -0
  413. molSimplify/python_nn/ms_slope_w2.csv +50 -0
  414. molSimplify/python_nn/ms_slope_w3.csv +1 -0
  415. molSimplify/python_nn/ms_split_b1.csv +50 -0
  416. molSimplify/python_nn/ms_split_b2.csv +50 -0
  417. molSimplify/python_nn/ms_split_b3.csv +1 -0
  418. molSimplify/python_nn/ms_split_w1.csv +50 -0
  419. molSimplify/python_nn/ms_split_w2.csv +50 -0
  420. molSimplify/python_nn/ms_split_w3.csv +1 -0
  421. molSimplify/python_nn/slope_center.csv +25 -0
  422. molSimplify/python_nn/slope_scale.csv +25 -0
  423. molSimplify/python_nn/split_center.csv +26 -0
  424. molSimplify/python_nn/split_scale.csv +26 -0
  425. molSimplify/python_nn/tf_ANN.py +762 -0
  426. molSimplify/python_nn/train_data.csv +1211 -0
  427. molSimplify/tf_nn/__init__.py +0 -0
  428. molSimplify/tf_nn/geo_static_clf/geo_static_clf_model.h5 +0 -0
  429. molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_name.csv +1591 -0
  430. molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_x.csv +2790 -0
  431. molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_y.csv +2790 -0
  432. molSimplify/tf_nn/geo_static_clf/geo_static_clf_vars.csv +154 -0
  433. molSimplify/tf_nn/geos/hs_ii_bl_x.csv +1577 -0
  434. molSimplify/tf_nn/geos/hs_ii_bl_y.csv +1577 -0
  435. molSimplify/tf_nn/geos/hs_ii_model.h5 +0 -0
  436. molSimplify/tf_nn/geos/hs_ii_model.json +1 -0
  437. molSimplify/tf_nn/geos/hs_ii_vars.csv +154 -0
  438. molSimplify/tf_nn/geos/hs_iii_bl_x.csv +1659 -0
  439. molSimplify/tf_nn/geos/hs_iii_bl_y.csv +1659 -0
  440. molSimplify/tf_nn/geos/hs_iii_model.h5 +0 -0
  441. molSimplify/tf_nn/geos/hs_iii_model.json +1 -0
  442. molSimplify/tf_nn/geos/hs_iii_vars.csv +154 -0
  443. molSimplify/tf_nn/geos/ls_ii_bl_x.csv +1374 -0
  444. molSimplify/tf_nn/geos/ls_ii_bl_y.csv +1374 -0
  445. molSimplify/tf_nn/geos/ls_ii_model.h5 +0 -0
  446. molSimplify/tf_nn/geos/ls_ii_model.json +1 -0
  447. molSimplify/tf_nn/geos/ls_ii_vars.csv +154 -0
  448. molSimplify/tf_nn/geos/ls_iii_bl_x.csv +1364 -0
  449. molSimplify/tf_nn/geos/ls_iii_bl_y.csv +1364 -0
  450. molSimplify/tf_nn/geos/ls_iii_model.h5 +0 -0
  451. molSimplify/tf_nn/geos/ls_iii_model.json +1 -0
  452. molSimplify/tf_nn/geos/ls_iii_vars.csv +154 -0
  453. molSimplify/tf_nn/homolumo/gap_model.h5 +0 -0
  454. molSimplify/tf_nn/homolumo/gap_model.json +1 -0
  455. molSimplify/tf_nn/homolumo/gap_test_names.csv +175 -0
  456. molSimplify/tf_nn/homolumo/gap_test_x.csv +176 -0
  457. molSimplify/tf_nn/homolumo/gap_test_y.csv +176 -0
  458. molSimplify/tf_nn/homolumo/gap_train_names.csv +699 -0
  459. molSimplify/tf_nn/homolumo/gap_train_x.csv +700 -0
  460. molSimplify/tf_nn/homolumo/gap_train_y.csv +700 -0
  461. molSimplify/tf_nn/homolumo/gap_vars.csv +153 -0
  462. molSimplify/tf_nn/homolumo/homo_model.h5 +0 -0
  463. molSimplify/tf_nn/homolumo/homo_model.json +126 -0
  464. molSimplify/tf_nn/homolumo/homo_test_names.csv +175 -0
  465. molSimplify/tf_nn/homolumo/homo_test_x.csv +176 -0
  466. molSimplify/tf_nn/homolumo/homo_test_y.csv +176 -0
  467. molSimplify/tf_nn/homolumo/homo_train_names.csv +699 -0
  468. molSimplify/tf_nn/homolumo/homo_train_x.csv +700 -0
  469. molSimplify/tf_nn/homolumo/homo_train_y.csv +700 -0
  470. molSimplify/tf_nn/homolumo/homo_vars.csv +153 -0
  471. molSimplify/tf_nn/oxoandhomo/homo_empty_info.json +7 -0
  472. molSimplify/tf_nn/oxoandhomo/homo_empty_model.h5 +0 -0
  473. molSimplify/tf_nn/oxoandhomo/homo_empty_model.json +1 -0
  474. molSimplify/tf_nn/oxoandhomo/homo_empty_test_names.csv +143 -0
  475. molSimplify/tf_nn/oxoandhomo/homo_empty_test_x.csv +144 -0
  476. molSimplify/tf_nn/oxoandhomo/homo_empty_test_y.csv +144 -0
  477. molSimplify/tf_nn/oxoandhomo/homo_empty_train_names.csv +513 -0
  478. molSimplify/tf_nn/oxoandhomo/homo_empty_train_x.csv +514 -0
  479. molSimplify/tf_nn/oxoandhomo/homo_empty_train_y.csv +514 -0
  480. molSimplify/tf_nn/oxoandhomo/homo_empty_val_names.csv +143 -0
  481. molSimplify/tf_nn/oxoandhomo/homo_empty_val_x.csv +58 -0
  482. molSimplify/tf_nn/oxoandhomo/homo_empty_val_y.csv +58 -0
  483. molSimplify/tf_nn/oxoandhomo/homo_empty_vars.csv +155 -0
  484. molSimplify/tf_nn/oxoandhomo/oxo20_info.json +7 -0
  485. molSimplify/tf_nn/oxoandhomo/oxo20_model.h5 +0 -0
  486. molSimplify/tf_nn/oxoandhomo/oxo20_model.json +1 -0
  487. molSimplify/tf_nn/oxoandhomo/oxo20_test_names.csv +143 -0
  488. molSimplify/tf_nn/oxoandhomo/oxo20_test_x.csv +144 -0
  489. molSimplify/tf_nn/oxoandhomo/oxo20_test_y.csv +144 -0
  490. molSimplify/tf_nn/oxoandhomo/oxo20_train_names.csv +513 -0
  491. molSimplify/tf_nn/oxoandhomo/oxo20_train_x.csv +514 -0
  492. molSimplify/tf_nn/oxoandhomo/oxo20_train_y.csv +514 -0
  493. molSimplify/tf_nn/oxoandhomo/oxo20_val_names.csv +143 -0
  494. molSimplify/tf_nn/oxoandhomo/oxo20_val_x.csv +58 -0
  495. molSimplify/tf_nn/oxoandhomo/oxo20_val_y.csv +58 -0
  496. molSimplify/tf_nn/oxoandhomo/oxo20_vars.csv +154 -0
  497. molSimplify/tf_nn/oxocatalysis/hat_model.h5 +0 -0
  498. molSimplify/tf_nn/oxocatalysis/hat_model.json +1 -0
  499. molSimplify/tf_nn/oxocatalysis/hat_test_names.csv +419 -0
  500. molSimplify/tf_nn/oxocatalysis/hat_test_x.csv +420 -0
  501. molSimplify/tf_nn/oxocatalysis/hat_test_y.csv +420 -0
  502. molSimplify/tf_nn/oxocatalysis/hat_train_names.csv +1507 -0
  503. molSimplify/tf_nn/oxocatalysis/hat_train_x.csv +1508 -0
  504. molSimplify/tf_nn/oxocatalysis/hat_train_y.csv +1508 -0
  505. molSimplify/tf_nn/oxocatalysis/hat_val_x.csv +169 -0
  506. molSimplify/tf_nn/oxocatalysis/hat_val_y.csv +169 -0
  507. molSimplify/tf_nn/oxocatalysis/hat_vars.csv +162 -0
  508. molSimplify/tf_nn/oxocatalysis/oxo_model.h5 +0 -0
  509. molSimplify/tf_nn/oxocatalysis/oxo_model.json +1 -0
  510. molSimplify/tf_nn/oxocatalysis/oxo_test_names.csv +527 -0
  511. molSimplify/tf_nn/oxocatalysis/oxo_test_x.csv +528 -0
  512. molSimplify/tf_nn/oxocatalysis/oxo_test_y.csv +528 -0
  513. molSimplify/tf_nn/oxocatalysis/oxo_train_names.csv +1897 -0
  514. molSimplify/tf_nn/oxocatalysis/oxo_train_x.csv +1898 -0
  515. molSimplify/tf_nn/oxocatalysis/oxo_train_y.csv +1898 -0
  516. molSimplify/tf_nn/oxocatalysis/oxo_val_x.csv +212 -0
  517. molSimplify/tf_nn/oxocatalysis/oxo_val_y.csv +212 -0
  518. molSimplify/tf_nn/oxocatalysis/oxo_vars.csv +162 -0
  519. molSimplify/tf_nn/rescaling_data/gap_mean_x.csv +153 -0
  520. molSimplify/tf_nn/rescaling_data/gap_mean_y.csv +1 -0
  521. molSimplify/tf_nn/rescaling_data/gap_var_x.csv +153 -0
  522. molSimplify/tf_nn/rescaling_data/gap_var_y.csv +1 -0
  523. molSimplify/tf_nn/rescaling_data/geo_static_clf_mean_x.csv +154 -0
  524. molSimplify/tf_nn/rescaling_data/geo_static_clf_mean_y.csv +1 -0
  525. molSimplify/tf_nn/rescaling_data/geo_static_clf_var_x.csv +154 -0
  526. molSimplify/tf_nn/rescaling_data/geo_static_clf_var_y.csv +1 -0
  527. molSimplify/tf_nn/rescaling_data/hat_mean_x.csv +162 -0
  528. molSimplify/tf_nn/rescaling_data/hat_mean_y.csv +1 -0
  529. molSimplify/tf_nn/rescaling_data/hat_var_x.csv +162 -0
  530. molSimplify/tf_nn/rescaling_data/hat_var_y.csv +1 -0
  531. molSimplify/tf_nn/rescaling_data/homo_empty_mean_x.csv +155 -0
  532. molSimplify/tf_nn/rescaling_data/homo_empty_mean_y.csv +1 -0
  533. molSimplify/tf_nn/rescaling_data/homo_empty_var_x.csv +155 -0
  534. molSimplify/tf_nn/rescaling_data/homo_empty_var_y.csv +1 -0
  535. molSimplify/tf_nn/rescaling_data/homo_mean_x.csv +153 -0
  536. molSimplify/tf_nn/rescaling_data/homo_mean_y.csv +1 -0
  537. molSimplify/tf_nn/rescaling_data/homo_var_x.csv +153 -0
  538. molSimplify/tf_nn/rescaling_data/homo_var_y.csv +1 -0
  539. molSimplify/tf_nn/rescaling_data/hs_ii_mean_x.csv +154 -0
  540. molSimplify/tf_nn/rescaling_data/hs_ii_mean_y.csv +3 -0
  541. molSimplify/tf_nn/rescaling_data/hs_ii_var_x.csv +154 -0
  542. molSimplify/tf_nn/rescaling_data/hs_ii_var_y.csv +3 -0
  543. molSimplify/tf_nn/rescaling_data/hs_iii_mean_x.csv +154 -0
  544. molSimplify/tf_nn/rescaling_data/hs_iii_mean_y.csv +3 -0
  545. molSimplify/tf_nn/rescaling_data/hs_iii_var_x.csv +154 -0
  546. molSimplify/tf_nn/rescaling_data/hs_iii_var_y.csv +3 -0
  547. molSimplify/tf_nn/rescaling_data/ls_ii_mean_x.csv +154 -0
  548. molSimplify/tf_nn/rescaling_data/ls_ii_mean_y.csv +3 -0
  549. molSimplify/tf_nn/rescaling_data/ls_ii_var_x.csv +154 -0
  550. molSimplify/tf_nn/rescaling_data/ls_ii_var_y.csv +3 -0
  551. molSimplify/tf_nn/rescaling_data/ls_iii_mean_x.csv +154 -0
  552. molSimplify/tf_nn/rescaling_data/ls_iii_mean_y.csv +3 -0
  553. molSimplify/tf_nn/rescaling_data/ls_iii_var_x.csv +154 -0
  554. molSimplify/tf_nn/rescaling_data/ls_iii_var_y.csv +3 -0
  555. molSimplify/tf_nn/rescaling_data/oxo20_mean_x.csv +154 -0
  556. molSimplify/tf_nn/rescaling_data/oxo20_mean_y.csv +1 -0
  557. molSimplify/tf_nn/rescaling_data/oxo20_var_x.csv +154 -0
  558. molSimplify/tf_nn/rescaling_data/oxo20_var_y.csv +1 -0
  559. molSimplify/tf_nn/rescaling_data/oxo_mean_x.csv +162 -0
  560. molSimplify/tf_nn/rescaling_data/oxo_mean_y.csv +1 -0
  561. molSimplify/tf_nn/rescaling_data/oxo_var_x.csv +162 -0
  562. molSimplify/tf_nn/rescaling_data/oxo_var_y.csv +1 -0
  563. molSimplify/tf_nn/rescaling_data/sc_static_clf_mean_x.csv +154 -0
  564. molSimplify/tf_nn/rescaling_data/sc_static_clf_mean_y.csv +1 -0
  565. molSimplify/tf_nn/rescaling_data/sc_static_clf_var_x.csv +154 -0
  566. molSimplify/tf_nn/rescaling_data/sc_static_clf_var_y.csv +1 -0
  567. molSimplify/tf_nn/rescaling_data/split_mean_x.csv +155 -0
  568. molSimplify/tf_nn/rescaling_data/split_mean_y.csv +1 -0
  569. molSimplify/tf_nn/rescaling_data/split_var_x.csv +155 -0
  570. molSimplify/tf_nn/rescaling_data/split_var_y.csv +1 -0
  571. molSimplify/tf_nn/sc_static_clf/sc_static_clf_model.h5 +0 -0
  572. molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_name.csv +1591 -0
  573. molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_x.csv +1592 -0
  574. molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_y.csv +1592 -0
  575. molSimplify/tf_nn/sc_static_clf/sc_static_clf_vars.csv +154 -0
  576. molSimplify/tf_nn/split/split_model.h5 +0 -0
  577. molSimplify/tf_nn/split/split_model.json +1 -0
  578. molSimplify/tf_nn/split/split_vars.csv +155 -0
  579. molSimplify/tf_nn/split/split_x.csv +1902 -0
  580. molSimplify/tf_nn/split/split_y.csv +1902 -0
  581. molSimplify/tf_nn/split/train_names.csv +1901 -0
  582. molSimplify/utils/__init__.py +0 -0
  583. molSimplify/utils/decorators.py +16 -0
  584. molSimplify/utils/metaclasses.py +12 -0
  585. molSimplify/utils/tensorflow.py +23 -0
  586. molSimplify/utils/timer.py +16 -0
  587. molSimplify-1.7.4.dist-info/LICENSE +674 -0
  588. molSimplify-1.7.4.dist-info/METADATA +821 -0
  589. molSimplify-1.7.4.dist-info/RECORD +651 -0
  590. molSimplify-1.7.4.dist-info/WHEEL +5 -0
  591. molSimplify-1.7.4.dist-info/entry_points.txt +3 -0
  592. molSimplify-1.7.4.dist-info/top_level.txt +4 -0
  593. tests/generateTests.py +122 -0
  594. tests/helperFuncs.py +658 -0
  595. tests/informatics/test_MOF_descriptors.py +128 -0
  596. tests/informatics/test_active_learning.py +113 -0
  597. tests/informatics/test_coulomb_analyze.py +24 -0
  598. tests/informatics/test_graph_racs.py +193 -0
  599. tests/ml/test_kernels.py +20 -0
  600. tests/ml/test_layers.py +47 -0
  601. tests/runtest.py +10 -0
  602. tests/test_Mol2D.py +128 -0
  603. tests/test_basic_imports.py +62 -0
  604. tests/test_bidentate.py +25 -0
  605. tests/test_cli.py +20 -0
  606. tests/test_distgeom.py +106 -0
  607. tests/test_example_1.py +29 -0
  608. tests/test_example_3.py +31 -0
  609. tests/test_example_5.py +43 -0
  610. tests/test_example_7.py +28 -0
  611. tests/test_example_8.py +15 -0
  612. tests/test_example_tbp.py +15 -0
  613. tests/test_ff_xtb.py +111 -0
  614. tests/test_geocheck_oct.py +26 -0
  615. tests/test_geocheck_one_empty.py +15 -0
  616. tests/test_geometry.py +44 -0
  617. tests/test_inparse.py +76 -0
  618. tests/test_io.py +84 -0
  619. tests/test_jobgen.py +84 -0
  620. tests/test_joption_pythonic.py +27 -0
  621. tests/test_ligand_assign.py +58 -0
  622. tests/test_ligand_assign_consistent.py +60 -0
  623. tests/test_ligand_class.py +26 -0
  624. tests/test_ligand_from_mol_file.py +35 -0
  625. tests/test_ligands.py +86 -0
  626. tests/test_mol3D.py +337 -0
  627. tests/test_molcas_caspt2.py +15 -0
  628. tests/test_molcas_casscf.py +15 -0
  629. tests/test_old_ANNs.py +68 -0
  630. tests/test_orca_ccsdt.py +15 -0
  631. tests/test_orca_dft.py +15 -0
  632. tests/test_qcgen.py +50 -0
  633. tests/test_racs.py +124 -0
  634. tests/test_rmsd.py +68 -0
  635. tests/test_structgen_functions.py +198 -0
  636. tests/test_tetrahedral.py +29 -0
  637. tests/test_tutorial_10_part_one.py +16 -0
  638. tests/test_tutorial_10_part_two.py +15 -0
  639. tests/test_tutorial_2.py +11 -0
  640. tests/test_tutorial_3.py +15 -0
  641. tests/test_tutorial_4.py +57 -0
  642. tests/test_tutorial_6.py +10 -0
  643. tests/test_tutorial_8.py +29 -0
  644. tests/test_tutorial_9_part_one.py +15 -0
  645. tests/test_tutorial_9_part_two.py +15 -0
  646. tests/test_tutorial_qm9_part_one.py +6 -0
  647. tests/testresources/refs/racs/generate_references.py +85 -0
  648. workflows/NandyJACSAu2022/bridge_functionalizer.py +253 -0
  649. workflows/NandyJACSAu2022/frag_functionalizer.py +242 -0
  650. workflows/NandyJACSAu2022/fragment_classes.py +586 -0
  651. workflows/NandyJACSAu2022/macrocycle_synthesis.py +179 -0
@@ -0,0 +1,1178 @@
1
+ # @file protein3D.py
2
+ # Defines protein3D class and contains useful manipulation/retrieval routines.
3
+ #
4
+ # Written by HJK Group
5
+ #
6
+ # Dpt of Chemical Engineering, MIT
7
+
8
+ # imports
9
+ from molSimplify.Classes.monomer3D import monomer3D
10
+ from molSimplify.Classes.mol3D import mol3D
11
+ from molSimplify.Classes.atom3D import atom3D
12
+ from molSimplify.Classes.helpers import read_atom, makeMol
13
+ from molSimplify.Classes.globalvars import globalvars
14
+ import urllib.request
15
+ import urllib.error
16
+ import requests
17
+ from bs4 import BeautifulSoup
18
+ import pandas as pd
19
+ import subprocess
20
+ import shlex
21
+ import ast
22
+ import time
23
+ from scipy.spatial import ConvexHull
24
+ # from pymol import cmd, stored
25
+ # no GUI support for now
26
+
27
+
28
+ class protein3D:
29
+ """Holds information about a protein, used to do manipulations. Reads
30
+ information from structure file (pdb, cif) or is directly built from
31
+ molsimplify.
32
+
33
+ """
34
+
35
+ def __init__(self, pdbCode='undef'):
36
+ # Number of monomers
37
+ self.naas = 0
38
+ # Number of heteromolecules
39
+ self.nhetmols = 0
40
+ # Number of chains
41
+ self.nchains = 0
42
+ # Dictionary of monomers
43
+ self.aas = {}
44
+ # Dictionary of all atoms
45
+ self.atoms = {}
46
+ # Dictionary of all atom indices
47
+ self.a_ids = {}
48
+ # Dictionary of heteromolecules
49
+ self.hetmols = {}
50
+ # Dictionary of chains
51
+ self.chains = {}
52
+ # Dictionary of missing atoms
53
+ self.missing_atoms = {}
54
+ # List of missing monomers
55
+ self.missing_aas = []
56
+ # List of chain locations with more than one conformation
57
+ self.conf = []
58
+ # R value
59
+ self.R = -1
60
+ # Rfree value
61
+ self.Rfree = -1
62
+ # PDB code
63
+ self.pdbCode = pdbCode
64
+ # Holder for metals
65
+ self.metals = False
66
+ # Bonds
67
+ self.bonds = {}
68
+ # Data completeness
69
+ self.DataCompleteness = 0
70
+ # RSRZ value
71
+ self.RSRZ = 100
72
+ # TwinL score
73
+ self.TwinL = 0
74
+ # TwinL^2 score
75
+ self.TwinL2 = 0
76
+ # center of mass
77
+ self.com = []
78
+ # centroid
79
+ self.centroid = []
80
+ # convex hull
81
+ self.hull = []
82
+
83
+ def setAAs(self, aas):
84
+ """
85
+ Set monomers of a protein3D class to different monomers.
86
+
87
+ Parameters
88
+ ----------
89
+ aas : dictionary
90
+ Keyed by chain and location
91
+ Valued by monomer3D monomers (amino acids or nucleotides)
92
+ """
93
+ self.aas = aas
94
+ self.naas = len(aas)
95
+
96
+ def setAtoms(self, atoms):
97
+ """
98
+ Set atom indices of a protein3D class to atoms.
99
+
100
+ Parameters
101
+ ----------
102
+ atoms : dictionary
103
+ Keyed by atom index
104
+ Valued by atom3D atom that has that index
105
+ """
106
+ self.atoms = atoms
107
+
108
+ def setIndices(self, a_ids):
109
+ """ Set atom indices of a protein3D class to atoms.
110
+
111
+ Parameters
112
+ ----------
113
+ a_ids : dictionary
114
+ Keyed by atom3D atom
115
+ Valued by its index
116
+ """
117
+ self.a_ids = a_ids
118
+
119
+ def setHetmols(self, hetmols):
120
+ """
121
+ Set heteromolecules of a protein3D class to different ones.
122
+
123
+ Parameters
124
+ ----------
125
+ hetmols : dictionary
126
+ Keyed by chain and location
127
+ Valued by mol3D heteromolecules
128
+ """
129
+ self.hetmols = hetmols
130
+ self.nhetmols = len(hetmols.keys())
131
+
132
+ def setChains(self, chains):
133
+ """
134
+ Set chains of a protein3D class to different chains.
135
+
136
+ Parameters
137
+ ----------
138
+ chains : dictionary
139
+ Keyed by desired chain IDs.
140
+ Valued by the list of molecules in the chain.
141
+ """
142
+ self.chains = chains
143
+ self.nchains = len(chains.keys())
144
+
145
+ def setMissingAtoms(self, missing_atoms):
146
+ """
147
+ Set missing atoms of a protein3D class to a new dictionary.
148
+
149
+ Parameters
150
+ ----------
151
+ missing_atoms : dictionary
152
+ Keyed by amino acid residues / nucleotides of origin
153
+ Valued by missing atoms
154
+ """
155
+ self.missing_atoms = missing_atoms
156
+
157
+ def setMissingAAs(self, missing_aas):
158
+ """
159
+ Set missing amino acids of a protein3D class to a new list.
160
+
161
+ Parameters
162
+ ----------
163
+ missing_aas : list
164
+ List of missing amino acids.
165
+ """
166
+ self.missing_aas = missing_aas
167
+
168
+ def setConf(self, conf):
169
+ """
170
+ Set possible conformations of a protein3D class to a new list.
171
+
172
+ Parameters
173
+ ----------
174
+ conf : list
175
+ List of possible conformations for applicable amino acids.
176
+ """
177
+ self.conf = conf
178
+
179
+ def autoChooseConf(self):
180
+ """
181
+ Automatically choose the conformation of a protein3D class
182
+ instance based first on what the greatest occupancy level is and then
183
+ the first conformation ihe alphabet with all else equal.
184
+
185
+ """
186
+ for c in self.conf:
187
+ c_ids = []
188
+ if c in self.aas.keys():
189
+ lst = self.aas[c]
190
+ else:
191
+ lst = self.hetmols[c]
192
+ if len(lst) == 1:
193
+ self.chains[c[0]].insert(c[1]-1, lst[0])
194
+ else:
195
+ for li in lst:
196
+ if li not in self.chains[c[0]]:
197
+ for j in li.atoms:
198
+ in_more_confs = False
199
+ for m in lst:
200
+ if m != li and j in m.atoms:
201
+ in_more_confs = True
202
+ if type(j) != atom3D and not in_more_confs:
203
+ c_ids.append(j[0])
204
+ elif not in_more_confs:
205
+ c_ids.append(self.getIndex(j))
206
+ # print(c_ids)
207
+ self.stripAtoms(c_ids)
208
+ if type(li) == monomer3D and li in self.aas[c]:
209
+ self.aas[c].remove(li)
210
+ elif type(li) == mol3D and li in self.hetmols[c]:
211
+ self.hetmols[c].remove(li)
212
+ self.setConf([])
213
+
214
+ def setR(self, R):
215
+ """
216
+ Set R value of protein3D class.
217
+
218
+ Parameters
219
+ ----------
220
+ R : float
221
+ The desired new R value.
222
+ """
223
+ self.R = R
224
+
225
+ def setRfree(self, Rfree):
226
+ """
227
+ Set Rfree value of protein3D class.
228
+
229
+ Parameters
230
+ ----------
231
+ Rfree : float
232
+ The desired new Rfree value.
233
+ """
234
+ self.Rfree = Rfree
235
+
236
+ def setRSRZ(self, RSRZ):
237
+ """
238
+ Set RSRZ score of protein3D class.
239
+
240
+ Parameters
241
+ ----------
242
+ RSRZ : float
243
+ The desired new RSRZ score.
244
+ """
245
+ self.RSRZ = RSRZ
246
+
247
+ def getMissingAtoms(self):
248
+ """
249
+ Get missing atoms of a protein3D class.
250
+
251
+ Examples
252
+ --------
253
+ >>> pdb_system = protein3D()
254
+ >>> pdb_system.fetch_pdb('1MH1') # Fetch a PDB
255
+ fetched: 1MH1
256
+ >>> missing_atoms = pdb_system.getMissingAtoms()
257
+
258
+ List atoms in the first set of missing_atoms
259
+ >>> [atom.sym for atom in list(missing_atoms)[0]]
260
+ ['C', 'C', 'C', 'C', 'C', 'C', 'O']
261
+ """
262
+ return self.missing_atoms.values()
263
+
264
+ def getMissingAAs(self):
265
+ """
266
+ Get missing amino acid residues of a protein3D class.
267
+
268
+ Examples
269
+ --------
270
+ >>> pdb_system = protein3D()
271
+ >>> pdb_system.fetch_pdb('1MH1') # Fetch a PDB
272
+ fetched: 1MH1
273
+ >>> pdb_system.getMissingAAs() # This gives a list of monomer3D objects
274
+ [monomer3D(VAL, id=182), monomer3D(LYS, id=183), monomer3D(LYS, id=184)]
275
+ """
276
+ return self.missing_aas
277
+
278
+ def countAAs(self):
279
+ """
280
+ Return the number of amino acid residues in a protein3D class.
281
+
282
+ Examples
283
+ --------
284
+ >>> pdb_system = protein3D()
285
+ >>> pdb_system.fetch_pdb('1os7') # Fetch a PDB
286
+ fetched: 1os7
287
+ >>> pdb_system.countAAs() # This return the number of AAs in the PDB for all the chains.
288
+ 1121
289
+ """
290
+ return self.naas
291
+
292
+ def findAtom(self, sym="X", aa=True):
293
+ """
294
+ Find atoms with a specific symbol that are contained in amino acids
295
+ or heteromolecules.
296
+
297
+ Parameters
298
+ ----------
299
+ sym : str
300
+ element symbol, default as X.
301
+ aa : boolean
302
+ True if we want atoms contained in amino acids
303
+ False if we want atoms contained in heteromolecules
304
+
305
+ Returns
306
+ -------
307
+ inds: list
308
+ a list of atom indices with the specified symbol.
309
+
310
+ Examples
311
+ --------
312
+ >>> pdb_system = protein3D()
313
+ >>> pdb_system.fetch_pdb('1os7') # Fetch a PDB
314
+ fetched: 1os7
315
+ >>> pdb_system.findAtom(sym="S", aa=True) # Returns indices of sulphur atoms present in amino acids
316
+ [2166, 4442, 6733, 9041]
317
+ >>> pdb_system.findAtom(sym="S", aa=False) # Returns indices of sulphur atoms present in heteromolecules
318
+ [9164, 9182, 9200]
319
+ """
320
+ inds = []
321
+ if aa:
322
+ mols = self.aas.values()
323
+ else:
324
+ mols = self.hetmols.values()
325
+ for s in mols:
326
+ for m in s:
327
+ for a in m.atoms:
328
+ if type(a) == tuple:
329
+ ii = a[0]
330
+ a = a[1]
331
+ else:
332
+ ii = self.getIndex(a)
333
+ if a.symbol() == sym:
334
+ inds.append(ii)
335
+ return inds
336
+
337
+ def findAA(self, three_lc="XAA"):
338
+ """
339
+ Find amino acids with a specific three-letter code.
340
+
341
+ Parameters
342
+ ----------
343
+ three_lc: str
344
+ three-letter code, default as XAA.
345
+
346
+ Returns
347
+ -------
348
+ inds: set
349
+ a set of amino acid indices with the specified symbol.
350
+
351
+ Examples
352
+ --------
353
+ >>> pdb_system = protein3D()
354
+ >>> pdb_system.fetch_pdb('1os7') # Fetch a PDB
355
+ fetched: 1os7
356
+
357
+ Return a set of pairs where each pair is a combination of the chain name and
358
+ the index of the amino acid specified (in this case, 'MET')
359
+ >>> aa_set = pdb_system.findAA(three_lc = 'MET')
360
+ >>> sorted(aa_set) # Sorting for reproducible order in doctest
361
+ [('A', 268), ('B', 268), ('C', 268), ('D', 268)]
362
+ """
363
+ inds = set()
364
+ for aa in self.aas.values():
365
+ if aa[0].three_lc == three_lc:
366
+ inds.add((aa[0].chain, aa[0].id))
367
+ return inds
368
+
369
+ def getChain(self, chain_id):
370
+ """
371
+ Takes a chain of interest and turns it into its own protein3D class instance.
372
+
373
+ Parameters
374
+ ----------
375
+ chain_id : string
376
+ The letter name of the chain of interest
377
+
378
+ Returns
379
+ -------
380
+ p : protein3D
381
+ A protein3D instance consisting of just the chain of interest
382
+
383
+ Examples
384
+ --------
385
+ >>> pdb_system = protein3D()
386
+ >>> pdb_system.fetch_pdb('1os7') # Fetch a PDB
387
+ fetched: 1os7
388
+ >>> pdb_system.getChain('A') # doctest: +SKIP
389
+ """
390
+ p = protein3D()
391
+ p.setPDBCode(self.pdbCode)
392
+ p.setChains({chain_id: self.chains[chain_id]})
393
+ p.setR(self.R)
394
+ p.setRfree(self.Rfree)
395
+
396
+ missing_aas = []
397
+ for aa in self.missing_aas:
398
+ if aa.chain == chain_id:
399
+ missing_aas.append(aa)
400
+ p.setMissingAAs(missing_aas)
401
+
402
+ aas = {}
403
+ for aa in self.aas:
404
+ if aa[0] == chain_id:
405
+ aas[aa] = self.aas[aa]
406
+ p.setAAs(aas)
407
+
408
+ gone_atoms = {}
409
+ for aa in self.missing_atoms.keys():
410
+ if aa[0] == chain_id:
411
+ gone_atoms[aa] = self.missing_atoms[aa]
412
+ p.setMissingAtoms(gone_atoms)
413
+
414
+ hets_flipped = {value[0]: key for key, value in self.hetmols.items()}
415
+ atoms = {}
416
+ a_ids = {}
417
+ hets = {}
418
+ for a_id in self.atoms:
419
+ aa = self.getMolecule(a_id)
420
+
421
+ if type(aa) == monomer3D:
422
+ if aa.chain == chain_id:
423
+ atoms[a_id] = self.atoms[a_id]
424
+ a_ids[self.atoms[a_id]] = a_id
425
+ else:
426
+ if aa not in hets_flipped:
427
+ print(a_id)
428
+ het = hets_flipped[aa]
429
+ het_chain_id = het[0]
430
+ if het_chain_id == chain_id:
431
+ hets[het] = self.hetmols[het]
432
+ atoms[a_id] = self.atoms[a_id]
433
+ a_ids[self.atoms[a_id]] = a_id
434
+
435
+ p.setHetmols(hets)
436
+ p.setAtoms(atoms)
437
+
438
+ bonds = {}
439
+ for a in self.bonds.keys():
440
+ if a in p.atoms.values():
441
+ bonds[a] = set()
442
+ for b in self.bonds[a]:
443
+ if b in p.atoms.values():
444
+ bonds[a].add(b)
445
+ p.setBonds(bonds)
446
+
447
+ p.setIndices(a_ids)
448
+ p.setConf([conf for conf in self.conf if conf[0] == chain_id])
449
+
450
+ return p
451
+
452
+ def getMolecule(self, a_id, aas_only=False):
453
+ """
454
+ Finds the molecule that the atom is contained in.
455
+
456
+ Parameters
457
+ ----------
458
+ a_id : int
459
+ The index of the desired atom whose molecule we want to find
460
+ aas_only : boolean
461
+ True if we want ito find atoms contained in amino acids only.
462
+ False if we want atoms contained in all molecules. Default is False.
463
+
464
+ Returns
465
+ -------
466
+ mol : monomer3D or mol3D
467
+ The amino acid residue, nucleotide, or heteromolecule containing the atom
468
+
469
+ Examples
470
+ --------
471
+ >>> pdb_system = protein3D()
472
+ >>> pdb_system.fetch_pdb('1os7') # Fetch a PDB
473
+ fetched: 1os7
474
+
475
+ This returns an molSimplify.Classes.monomer3D object indicating that the atom is part of an amino acid or nucleotide:
476
+ >>> pdb_system.getMolecule(a_id=2166)
477
+ monomer3D(MET, id=268)
478
+
479
+ This returns a mol3D object indicating that the atom is part of a molecule that is not an amino acid or nucleotide
480
+ >>> pdb_system.getMolecule(a_id=9164)
481
+ mol3D(S1O3N1C2)
482
+ >>> pdb_system.getMolecule(a_id=9164).name # This prints the name of the molecule, in this case, it is 'TAU'
483
+ 'TAU'
484
+ """
485
+ for s in self.aas.values():
486
+ for mol in s: # mol is monomer3D
487
+ if (a_id, self.atoms[a_id]) in mol.atoms:
488
+ return mol
489
+ for mol in self.missing_atoms.keys(): # mol is incomplete monomer3D
490
+ if (a_id, self.atoms[a_id]) in self.missing_atoms[mol]:
491
+ return mol
492
+ if not aas_only:
493
+ for s in self.hetmols.values():
494
+ for mol in s: # mol is mol3D
495
+ if self.atoms[a_id] in mol.atoms:
496
+ return mol
497
+ return None # something is wrong
498
+
499
+ def stripAtoms(self, atoms_stripped):
500
+ """
501
+ Removes certain atoms from the protein3D class instance.
502
+
503
+ Parameters
504
+ ----------
505
+ atoms_stripped : list
506
+ List of atom3D indices that should be removed
507
+
508
+ Examples
509
+ --------
510
+ >>> pdb_system = protein3D()
511
+ >>> pdb_system.fetch_pdb('1os7') # Fetch a PDB
512
+ fetched: 1os7
513
+ >>> pdb_system.stripAtoms([2166, 4442, 6733, 2165]) # This removes the list of atoms with
514
+ >>> # indices listedin the code
515
+ """
516
+ atoms = self.atoms
517
+ a_ids = self.a_ids
518
+ keys = list(self.aas.keys()) + list(self.hetmols.keys())
519
+ for tup in keys:
520
+ if tup in self.aas.keys():
521
+ mol_set = self.aas[tup].copy()
522
+ else:
523
+ mol_set = self.hetmols[tup].copy()
524
+ for elt in mol_set:
525
+ for a in elt.atoms:
526
+ if type(a) != atom3D:
527
+ atom = a[1]
528
+ else:
529
+ atom = a
530
+ if atom not in self.a_ids.keys():
531
+ continue
532
+ a_id = self.getIndex(atom)
533
+ if a_id in atoms_stripped:
534
+ if (a_id, atom) in elt.atoms:
535
+ elt.atoms.remove((a_id, atom))
536
+ if atom in elt.c:
537
+ elt.c.remove(atom)
538
+ elif atom in elt.n:
539
+ elt.n.remove(atom)
540
+ elif atom in elt.atoms:
541
+ elt.atoms.remove(atom)
542
+ atoms_stripped.remove(a_id)
543
+ if atom in self.bonds.keys():
544
+ for at in self.bonds[atom]:
545
+ if at in self.bonds.keys():
546
+ temp = self.bonds[at].copy()
547
+ if atom in temp:
548
+ temp.remove(atom)
549
+ self.bonds[at] = temp
550
+ del self.bonds[atom]
551
+ del atoms[a_id]
552
+ del a_ids[atom]
553
+ if len(elt.atoms) == 0:
554
+ if tup in self.aas.keys():
555
+ self.aas[tup].remove(elt)
556
+ if len(self.aas[tup]) == 0:
557
+ del self.aas[tup]
558
+ else:
559
+ self.hetmols[tup].remove(elt)
560
+ if len(self.hetmols[tup]) == 0:
561
+ del self.hetmols[tup]
562
+ while len(atoms_stripped) != 0:
563
+ a_id = atoms_stripped[0]
564
+ atoms_stripped.pop(0)
565
+ if a_id not in atoms.keys():
566
+ continue
567
+ atom = atoms[a_id]
568
+ if atom in self.bonds.keys():
569
+ for at in self.bonds[atom]:
570
+ temp = self.bonds[at].copy()
571
+ if atom in temp:
572
+ temp.remove(atom)
573
+ self.bonds[at] = temp
574
+ del self.bonds[atom]
575
+ del atoms[a_id]
576
+ del a_ids[atom]
577
+ self.setAtoms(atoms)
578
+ self.setIndices(a_ids)
579
+
580
+ def stripHetMol(self, hetmol):
581
+ """
582
+ Removes all heteroatoms part of the specified heteromolecule from
583
+ the protein3D class instance.
584
+
585
+ Parameters
586
+ ----------
587
+ hetmol : str
588
+ String representing the name of a heteromolecule whose
589
+ heteroatoms should be stripped from the protein3D class instance
590
+
591
+ Examples
592
+ --------
593
+ >>> pdb_system = protein3D()
594
+ >>> pdb_system.fetch_pdb('3I40') # Fetch a PDB
595
+ fetched: 3I40
596
+ >>> pdb_system.stripHetMol('HOH')
597
+ """
598
+ hets = self.hetmols.copy()
599
+ for k in hets.keys():
600
+ if k not in self.hetmols.keys():
601
+ continue
602
+ for m in hets[k]:
603
+ if m.name == hetmol:
604
+ ids = []
605
+ for a in m.atoms:
606
+ ids.append(self.a_ids[a])
607
+ self.stripAtoms(ids)
608
+ try: # RM 2023/04/22: I don't think this is necessary as stripAtoms takes care of deleting the hetmol
609
+ del self.hetmols[k]
610
+ except KeyError:
611
+ pass
612
+
613
+ def findMetal(self, transition_metals_only=True):
614
+ """
615
+ Find metal(s) in a protein3D class.
616
+
617
+ Parameters
618
+ ----------
619
+ transition_metals_only : bool, optional
620
+ Only find transition metals. Default is true.
621
+
622
+ Returns
623
+ -------
624
+ metal_list : list
625
+ List of indices of metal atoms in protein3D.
626
+
627
+ Examples
628
+ --------
629
+ >>> pdb_system = protein3D()
630
+ >>> pdb_system.fetch_pdb('1os7')
631
+ fetched: 1os7
632
+ >>> pdb_system.findMetal()
633
+ [9160, 9178, 9196, 9214]
634
+ """
635
+ if not self.metals:
636
+ metal_list = []
637
+ for li in self.hetmols.values(): # no metals in AAs
638
+ for m in li:
639
+ for a in m.atoms:
640
+ if a.ismetal(transition_metals_only=transition_metals_only):
641
+ if a.occup == 1 or a in self.bonds.keys():
642
+ metal_list.append(self.getIndex(a))
643
+ self.metals = metal_list
644
+ return (self.metals)
645
+
646
+ def freezeatom(self, atomIdx):
647
+ """
648
+ Set the freeze attribute to be true for a given atom3D class.
649
+
650
+ Parameters
651
+ ----------
652
+ atomIdx : int
653
+ Index for atom to be frozen.
654
+ """
655
+
656
+ self.atoms[atomIdx].frozen = True
657
+
658
+ def freezeatoms(self, Alist):
659
+ """
660
+ Set the freeze attribute to be true for a given set of atom3D classes,
661
+ given their indices. Preserves ordering, starts from largest index.
662
+
663
+ Parameters
664
+ ----------
665
+ Alist : list
666
+ List of indices for atom3D instances to remove.
667
+ """
668
+
669
+ for h in sorted(Alist, reverse=True):
670
+ self.freezeatom(h)
671
+
672
+ def getAtom(self, idx):
673
+ """
674
+ Get atom with a given index.
675
+
676
+ Parameters
677
+ ----------
678
+ idx : int
679
+ Index of desired atom.
680
+
681
+ Returns
682
+ -------
683
+ atom : atom3D
684
+ atom3D class for element at given index.
685
+
686
+ """
687
+ return self.atoms[idx]
688
+
689
+ def getIndex(self, atom):
690
+ """
691
+ Get index of a given atom
692
+
693
+ Parameters
694
+ ----------
695
+ atom : atom3D
696
+ atom3D class for element at given index.
697
+
698
+ Returns
699
+ -------
700
+ idx : int
701
+ Index of desired atom.
702
+
703
+ """
704
+ if hasattr(self, 'a_ids') and atom in self.a_ids.keys():
705
+ idx = self.a_ids[atom]
706
+ else:
707
+ idx = list(self.atoms.keys())[list(self.atoms.values()).index(atom)]
708
+ return idx
709
+
710
+ def getBoundMols(self, h_id, aas_only=False):
711
+ """
712
+ Get a list of molecules bound to a heteroatom, usually a metal.
713
+
714
+ Parameters
715
+ ----------
716
+ h_id : int
717
+ The index of the desired (hetero)atom origin
718
+ aas_only : boolean
719
+ Whether or not to only consider amino acids, defaults False
720
+
721
+ Returns
722
+ -------
723
+ bound_mols : list
724
+ List of monomer3D and/or mol3D instances of molecules bound to hetatm
725
+ """
726
+ bound_mols = []
727
+ for b_id in self.atoms.keys():
728
+ b = self.atoms[b_id]
729
+ if self.atoms[h_id] not in self.bonds.keys():
730
+ return None
731
+ elif b in self.bonds[self.atoms[h_id]]:
732
+ if self.getMolecule(b_id, aas_only) is not None:
733
+ bound_mols.append(self.getMolecule(b_id, aas_only))
734
+ return bound_mols
735
+
736
+ def readfrompdb(self, text):
737
+ """
738
+ Read PDB into a protein3D class instance.
739
+
740
+ Parameters
741
+ ----------
742
+ text : str
743
+ String of path to PDB file. Path may be local or global.
744
+ May also be the text of a PDB file from the internet.
745
+ """
746
+
747
+ # read in PDB file
748
+ if '.pdb' in text: # means this is a filename
749
+ self.pdbfile = text
750
+ fname = text.split('.pdb')[0]
751
+ with open(fname + '.pdb', 'r') as f:
752
+ text = f.read()
753
+ enter = '\n'
754
+ else:
755
+ enter = "\\n"
756
+
757
+ # class attributes
758
+ aas = {}
759
+ hetmols = {}
760
+ atoms = {}
761
+ a_ids = {}
762
+ chains = {}
763
+ missing_atoms = {}
764
+ missing_aas = []
765
+ conf = []
766
+ bonds = {}
767
+
768
+ # get R and Rfree values (text is full file)
769
+ if "R VALUE (WORKING SET)" in text:
770
+ temp = text.split("R VALUE (WORKING SET)")
771
+ temp2 = temp[-1].split()
772
+ if temp2[1] != 'NULL':
773
+ R = float(temp2[1])
774
+ else:
775
+ R = -100
776
+ if temp2[8] != 'NULL':
777
+ Rfree = float(temp2[8])
778
+ else:
779
+ Rfree = 100
780
+ elif "R VALUE (WORKING SET, NO CUTOFF)" in text:
781
+ temp = text.split("R VALUE (WORKING SET, NO CUTOFF)")
782
+ temp2 = temp[-1].split()
783
+ if temp2[1] != 'NULL':
784
+ R = float(temp2[1])
785
+ else:
786
+ R = -100
787
+ if temp2[10] != 'NULL':
788
+ Rfree = float(temp2[10])
789
+ else:
790
+ Rfree = 100
791
+ else:
792
+ R = -100
793
+ Rfree = 100
794
+
795
+ # start getting missing amino acids
796
+ if "M RES C SSSEQI" in text:
797
+ text = text.split("M RES C SSSEQI")
798
+ want = text[-1]
799
+ text = text[0].split(enter)
800
+ split = text[-1]
801
+ want = want.split(split)
802
+ for line in want:
803
+ if line == want[-1]:
804
+ text = line
805
+ line = line.split(enter)
806
+ line = line[0]
807
+ text = text.replace(line, '')
808
+ sp = line.split()
809
+ if len(sp) > 2:
810
+ res_num = int(sp[2])
811
+ # Ignoring expression tags which are negative residues
812
+ if res_num > 0:
813
+ a = monomer3D(sp[0], sp[1], sp[2])
814
+ missing_aas.append(a)
815
+
816
+ # start getting missing atoms
817
+ if "M RES CSSEQI ATOMS" in text:
818
+ text = text.split("M RES CSSEQI ATOMS")
819
+ want = text[-1]
820
+ text = text[0].split(enter)
821
+ split = text[-1]
822
+ want = want.split(split)
823
+ for line in want:
824
+ if line == want[-1]:
825
+ text = line
826
+ line = line.split(enter)
827
+ line = line[0]
828
+ text = text.replace(line, '')
829
+ sp = line.split()
830
+ if len(sp) > 2:
831
+ missing_atoms[(sp[1], sp[2])] = []
832
+ for atom in sp[3:]:
833
+ if atom != enter and atom[0] in ['C', 'N', 'O', 'H']:
834
+ missing_atoms[(sp[1], sp[2])].append(
835
+ atom3D(Sym=atom[0], greek=atom))
836
+ # start getting amino acids, nucleotides and heteroatoms
837
+ pa_dict = {'AltLoc': ""}
838
+ if "ENDMDL" in text:
839
+ text.split("ENDMDL")
840
+ text = text[-2] + text[-1]
841
+ text = text.split(enter)
842
+ text = text[1:]
843
+ for line in text:
844
+ if line == text[-1]:
845
+ text = line
846
+ line = line.split(enter)
847
+ line = line[0]
848
+ text = text.replace(line, '')
849
+ l_type = line[:6]
850
+ if "ATOM" in l_type or "HETATM" in l_type:
851
+ line = line.replace("\\'", "\'")
852
+ a_dict = read_atom(line)
853
+ if a_dict['ResName'] in globalvars().getAllAAs() or "ATOM" in l_type:
854
+ # have an amino acid or biomolecule monomer
855
+ a, aas, conf, chains, pa_dict, bonds = makeMol(a_dict, aas, conf, chains, pa_dict, bonds)
856
+ else: # have a normal heteromolecule
857
+ a, hetmols, conf, chains, pa_dict, bonds = makeMol(a_dict, hetmols, conf, chains, pa_dict, bonds, False)
858
+ atoms[a_dict['SerialNum']] = a
859
+ a_ids[a] = a_dict['SerialNum']
860
+
861
+ elif "CONECT" in l_type: # get extra connections
862
+ line = line[6:] # remove type
863
+ li = [line[i:i+5] for i in range(0, len(line), 5)]
864
+ if int(li[0]) in atoms.keys() and atoms[int(li[0])] not in bonds.keys():
865
+ bonds[atoms[int(li[0])]] = set()
866
+ for i in li[1:]:
867
+ try:
868
+ bonds[atoms[int(li[0])]].add(atoms[int(i)])
869
+ if atoms[int(li[0])].loc != '':
870
+ for j in {1, -1}:
871
+ if atoms[int(li[0]) + j].greek == atoms[int(li[0])].greek:
872
+ if atoms[int(li[0]) + j] not in bonds.keys():
873
+ bonds[atoms[int(li[0]) + j]] = {atoms[int(i)]}
874
+ else:
875
+ bonds[atoms[int(li[0]) + j]].add(atoms[int(i)])
876
+ if atoms[int(i)] not in bonds.keys():
877
+ bonds[atoms[int(i)]] = {atoms[int(li[0]) + j]}
878
+ else:
879
+ bonds[atoms[int(i)]].add(atoms[int(li[0]) + j])
880
+ except ValueError:
881
+ # if " " not in i and i != " ":
882
+ # print("likely OXT")
883
+ continue
884
+ # deal with conformations in chains
885
+ for i in conf:
886
+ if i in aas.keys():
887
+ c = aas[i]
888
+ else:
889
+ c = hetmols[i]
890
+ for j in range(len(c)):
891
+ # pick chain with higher occupancy or the A chain if tie
892
+ if type(c[j]) == mol3D:
893
+ for a in c[j].atoms:
894
+ full = True
895
+ if a.occup <= 1/len(c):
896
+ full = False
897
+ if full:
898
+ chains[i[0]].append(c[j])
899
+ elif c[j].atoms[0].occup*100 == 100//len(c) and j == 0:
900
+ chains[i[0]].append(c[j])
901
+ elif c[j].occup > 1/len(c):
902
+ chains[i[0]].append(c[j])
903
+ elif c[j].occup*100 == 100//len(c) and j == 0:
904
+ chains[i[0]].append(c[j])
905
+ self.setChains(chains)
906
+ self.setAAs(aas)
907
+ self.setAtoms(atoms)
908
+ self.setIndices(a_ids)
909
+ self.setHetmols(hetmols)
910
+ self.setMissingAtoms(missing_atoms)
911
+ self.setMissingAAs(missing_aas)
912
+ self.setConf(conf)
913
+ self.setR(R)
914
+ self.setRfree(Rfree)
915
+ self.setBonds(bonds)
916
+
917
+ def fetch_pdb(self, pdbCode):
918
+ """
919
+ API query to fetch a pdb and write it as a protein3D class instance
920
+
921
+ Parameters
922
+ ----------
923
+ pdbCode : str
924
+ Code for protein, e.g. 1os7
925
+ """
926
+ remoteCode = pdbCode.upper()
927
+ try:
928
+ data = urllib.request.urlopen(
929
+ 'https://files.rcsb.org/view/' + remoteCode +
930
+ '.pdb').read()
931
+ except urllib.error.URLError:
932
+ print("warning: %s not found.\n" % pdbCode)
933
+ else:
934
+ try:
935
+ self.readfrompdb(str(data))
936
+ self.setPDBCode(pdbCode)
937
+ print("fetched: %s" % (pdbCode))
938
+ except IOError:
939
+ print('aborted')
940
+ else:
941
+ if len(data) == 0:
942
+ print("warning: %s not valid.\n" % pdbCode)
943
+
944
+ def setBonds(self, bonds):
945
+ """
946
+ Sets the bonded atoms in the protein.
947
+
948
+ This is effectively the molecular graph.
949
+
950
+ Parameters
951
+ ----------
952
+ bonds : dictionary
953
+ Keyed by atom3D atoms in the protein
954
+ Valued by a set consisting of bonded atoms
955
+ """
956
+ self.bonds = bonds
957
+
958
+ def readMetaData(self):
959
+ """
960
+ API query to fetch XML data from a pdb and add its useful attributes
961
+ to a protein3D class.
962
+
963
+ Parameters
964
+ ----------
965
+ pdbCode : str
966
+ Code for protein, e.g. 1os7
967
+ """
968
+ pdbCode = self.pdbCode
969
+ try:
970
+ start = 'https://files.rcsb.org/pub/pdb/validation_reports/' + pdbCode[1] + pdbCode[2]
971
+ link = start + '/' + pdbCode + '/' + pdbCode + '_validation.xml'
972
+ xml_doc = requests.get(link)
973
+ except urllib.error.URLError:
974
+ print("warning: %s not found.\n" % pdbCode)
975
+ else:
976
+ try:
977
+ # We then use beautiful soup to read the XML doc. LXML is an XML reader.
978
+ # The soup object is what we then use to parse!
979
+ soup = BeautifulSoup(xml_doc.content, 'lxml-xml')
980
+
981
+ # We can then use methods of the soup object to find "tags" within the XML file.
982
+ # This is how we would extract sections.
983
+ # This is an example of getting everything with a "sec" tag.
984
+ body = soup.find_all('wwPDB-validation-information')
985
+ entry = body[0].find_all("Entry")
986
+ if "DataCompleteness" not in entry[0].attrs.keys():
987
+ self.setDataCompleteness(0)
988
+ print("warning: %s has no DataCompleteness." % pdbCode)
989
+ else:
990
+ self.setDataCompleteness(float(entry[0].attrs["DataCompleteness"]))
991
+ if "percent-RSRZ-outliers" not in entry[0].attrs.keys():
992
+ self.setRSRZ(100)
993
+ print("warning: %s has no RSRZ.\n" % pdbCode)
994
+ else:
995
+ self.setRSRZ(float(entry[0].attrs["percent-RSRZ-outliers"]))
996
+ if "TwinL" not in entry[0].attrs.keys():
997
+ print("warning: %s has no TwinL." % pdbCode)
998
+ self.setTwinL(0)
999
+ else:
1000
+ self.setTwinL(float(entry[0].attrs["TwinL"]))
1001
+ if "TwinL2" not in entry[0].attrs.keys():
1002
+ print("warning: %s has no TwinL2." % pdbCode)
1003
+ self.setTwinL2(0)
1004
+ else:
1005
+ self.setTwinL2(float(entry[0].attrs["TwinL2"]))
1006
+ except IOError:
1007
+ print('aborted')
1008
+ else:
1009
+ if xml_doc is None:
1010
+ print("warning: %s not valid.\n" % pdbCode)
1011
+
1012
+ def setDataCompleteness(self, DataCompleteness):
1013
+ """
1014
+ Set DataCompleteness value of protein3D class.
1015
+
1016
+ Parameters
1017
+ ----------
1018
+ DataCompleteness : float
1019
+ The desired new R value.
1020
+ """
1021
+ self.DataCompleteness = DataCompleteness
1022
+
1023
+ def setTwinL(self, TwinL):
1024
+ """
1025
+ Set TwinL score of protein3D class.
1026
+
1027
+ Parameters
1028
+ ----------
1029
+ TwinL : float
1030
+ The desired new TwinL score.
1031
+ """
1032
+ self.TwinL = TwinL
1033
+
1034
+ def setTwinL2(self, TwinL2):
1035
+ """
1036
+ Set TwinL squared score of protein3D class.
1037
+
1038
+ Parameters
1039
+ ----------
1040
+ TwinL2 : float
1041
+ The desired new TwinL squared score.
1042
+ """
1043
+ self.TwinL2 = TwinL2
1044
+
1045
+ def setEDIAScores(self):
1046
+ """
1047
+ Sets the EDIA score of a protein3D class.
1048
+
1049
+ Parameters
1050
+ ----------
1051
+ pdbCode : string
1052
+ The 4-character code of the protein3D class.
1053
+ """
1054
+ code = self.pdbCode
1055
+ cmd = ('curl -d \'{"edia":{ "pdbCode":"'+code+'"}}\' -H "Accept: application/json"'
1056
+ ' -H "Content-Type: application/json" -X POST https://proteins.plus/api/edia_rest -k')
1057
+ args = shlex.split(cmd)
1058
+ result = subprocess.Popen(args, stdout=subprocess.PIPE,
1059
+ stderr=subprocess.PIPE)
1060
+ result.wait()
1061
+ out, err = result.communicate()
1062
+ dict_str = out.decode("UTF-8")
1063
+ int_dict = ast.literal_eval(dict_str)
1064
+ res2 = subprocess.Popen(['curl', '-k', int_dict['location']],
1065
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1066
+ out2, err2 = res2.communicate()
1067
+ dict2_str = out2.decode("UTF-8")
1068
+ dictionary = ast.literal_eval(dict2_str)
1069
+ t = 5 # can change depending on how frequently to loop
1070
+ while dictionary["status_code"] == 202:
1071
+ res2 = subprocess.Popen(['curl', '-k ', int_dict['location']],
1072
+ stdout=subprocess.PIPE,
1073
+ stderr=subprocess.PIPE)
1074
+ # print('sleeping', t)
1075
+ time.sleep(t)
1076
+ res2.wait()
1077
+ out2, err2 = res2.communicate()
1078
+ dict2_str = out2.decode("UTF-8")
1079
+ dictionary = ast.literal_eval(dict2_str)
1080
+ link = dictionary["atom_scores"]
1081
+ df = pd.read_csv(link, on_bad_lines='skip')
1082
+
1083
+ for i, row in df.iterrows():
1084
+ EDIA = row["EDIA"]
1085
+ index = row["Infile id"]
1086
+ if index in self.atoms.keys():
1087
+ a = self.atoms[index]
1088
+ a.setEDIA(EDIA)
1089
+ if a.occup < 1: # more than one conformation
1090
+ subdf = df[df["Infile id"] == index+1]
1091
+ if subdf.shape[0] == 0 and index+1 in self.atoms.keys():
1092
+ self.atoms[index+1].setEDIA(EDIA)
1093
+ elif subdf.shape[0] == 0 and index-1 in self.atoms.keys():
1094
+ self.atoms[index-1].setEDIA(EDIA)
1095
+ else:
1096
+ print("OXT is missing")
1097
+
1098
+ def setPDBCode(self, pdbCode):
1099
+ """
1100
+ Sets the 4-letter PDB code of a protein3D class instance
1101
+
1102
+ Parameters
1103
+ ----------
1104
+ pdbCode : string
1105
+ Desired 4-letter PDB code
1106
+ """
1107
+ self.pdbCode = pdbCode
1108
+
1109
+ def centermass(self):
1110
+ """Computes coordinates of center of mass of protein.
1111
+
1112
+ """
1113
+
1114
+ center_of_mass = [0, 0, 0] # coordinates of center of mass (X, Y, Z)
1115
+ mmass = 0
1116
+ # loop over atoms in molecule
1117
+ if len(self.atoms.keys()) > 0:
1118
+ for atom in self.atoms.values():
1119
+ # calculate center of mass (relative weight according to atomic mass)
1120
+ xyz = atom.coords()
1121
+ center_of_mass[0] += xyz[0] * atom.mass
1122
+ center_of_mass[1] += xyz[1] * atom.mass
1123
+ center_of_mass[2] += xyz[2] * atom.mass
1124
+ mmass += atom.mass
1125
+ # normalize
1126
+ center_of_mass[0] /= mmass
1127
+ center_of_mass[1] /= mmass
1128
+ center_of_mass[2] /= mmass
1129
+ else:
1130
+ center_of_mass = False
1131
+ print(
1132
+ 'ERROR: Center of mass calculation failed. Structure will be inaccurate.\n')
1133
+ self.com = center_of_mass
1134
+
1135
+ def setCentroid(self):
1136
+ """Computes coordinates of center of mass of protein.
1137
+
1138
+ """
1139
+
1140
+ centroid = [0, 0, 0] # coordinates of centroid (X, Y, Z)
1141
+ # loop over atoms in protein
1142
+ if len(self.atoms.keys()) > 0:
1143
+ for atom in self.atoms.values():
1144
+ # calculate center of mass (relative weight according to atomic mass)
1145
+ xyz = atom.coords()
1146
+ centroid[0] += xyz[0]
1147
+ centroid[1] += xyz[1]
1148
+ centroid[2] += xyz[2]
1149
+ # normalize
1150
+ centroid[0] /= len(self.atoms.keys())
1151
+ centroid[1] /= len(self.atoms.keys())
1152
+ centroid[2] /= len(self.atoms.keys())
1153
+ else:
1154
+ centroid = False
1155
+ print(
1156
+ 'ERROR: Centroid calculation failed. Structure will be inaccurate.\n')
1157
+ self.centroid = centroid
1158
+
1159
+ def convexhull(self):
1160
+ """
1161
+ Computes convex hull of protein.
1162
+
1163
+ Returns
1164
+ -------
1165
+ hull : array
1166
+ Coordinates of convex hull.
1167
+ """
1168
+ points = []
1169
+ # loop over atoms in protein
1170
+ if len(self.atoms.keys()) > 0:
1171
+ for atom in self.atoms.values():
1172
+ points.append(atom.coords())
1173
+ hull = ConvexHull(points)
1174
+ else:
1175
+ hull = False
1176
+ print(
1177
+ 'ERROR: Convex hull calculation failed. Structure will be inaccurate.\n')
1178
+ self.hull = hull