molSimplify 1.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (651) hide show
  1. docs/source/conf.py +224 -0
  2. molSimplify/Classes/__init__.py +6 -0
  3. molSimplify/Classes/atom3D.py +235 -0
  4. molSimplify/Classes/dft_obs.py +130 -0
  5. molSimplify/Classes/globalvars.py +827 -0
  6. molSimplify/Classes/helpers.py +161 -0
  7. molSimplify/Classes/ligand.py +2330 -0
  8. molSimplify/Classes/mGUI.py +2493 -0
  9. molSimplify/Classes/mWidgets.py +438 -0
  10. molSimplify/Classes/miniGUI.py +41 -0
  11. molSimplify/Classes/mol2D.py +260 -0
  12. molSimplify/Classes/mol3D.py +5846 -0
  13. molSimplify/Classes/monomer3D.py +253 -0
  14. molSimplify/Classes/partialcharges.py +226 -0
  15. molSimplify/Classes/protein3D.py +1178 -0
  16. molSimplify/Classes/rundiag.py +151 -0
  17. molSimplify/Data/ML.dat +212 -0
  18. molSimplify/Data/MLS_FSR_for_inter.dat +23 -0
  19. molSimplify/Data/MLS_FSR_for_inter2.dat +23 -0
  20. molSimplify/Data/MLS_angle_for_click.dat +8 -0
  21. molSimplify/Data/MLS_angle_for_inter.dat +23 -0
  22. molSimplify/Data/MLS_angle_for_inter2.dat +48 -0
  23. molSimplify/Data/MLS_angle_for_intra.dat +10 -0
  24. molSimplify/Data/MLS_angle_for_intra2.dat +6 -0
  25. molSimplify/Data/MLS_angle_for_oa.dat +18 -0
  26. molSimplify/Data/ML_FSR_for_inter.dat +112 -0
  27. molSimplify/Data/ML_FSR_for_inter2.dat +110 -0
  28. molSimplify/Data/ML_bond_for_cat.dat +8 -0
  29. molSimplify/Data/ML_bond_for_click.dat +8 -0
  30. molSimplify/Data/ML_bond_for_inter.dat +48 -0
  31. molSimplify/Data/ML_bond_for_inter2.dat +48 -0
  32. molSimplify/Data/ML_bond_for_intra.dat +10 -0
  33. molSimplify/Data/ML_bond_for_intra2.dat +6 -0
  34. molSimplify/Data/ML_bond_for_oa.dat +18 -0
  35. molSimplify/Data/bp1.dat +21 -0
  36. molSimplify/Data/li.dat +3 -0
  37. molSimplify/Data/no.dat +2 -0
  38. molSimplify/Data/oct.dat +7 -0
  39. molSimplify/Data/pbp.dat +8 -0
  40. molSimplify/Data/spy.dat +6 -0
  41. molSimplify/Data/sqap.dat +9 -0
  42. molSimplify/Data/sqp.dat +5 -0
  43. molSimplify/Data/tbp.dat +6 -0
  44. molSimplify/Data/tdhd.dat +9 -0
  45. molSimplify/Data/thd.dat +5 -0
  46. molSimplify/Data/tpl.dat +4 -0
  47. molSimplify/Data/tpr.dat +7 -0
  48. molSimplify/Informatics/HFXsensitivity/__init__.py +0 -0
  49. molSimplify/Informatics/HFXsensitivity/measure_HFX_sensitivity_oxo_hat_reb_rel.py +443 -0
  50. molSimplify/Informatics/HFXsensitivity/measure_HFX_stable.py +346 -0
  51. molSimplify/Informatics/MOF/Linker_rotation.py +179 -0
  52. molSimplify/Informatics/MOF/MOF_descriptors.py +1299 -0
  53. molSimplify/Informatics/MOF/MOF_descriptors_alternate_functional.py +589 -0
  54. molSimplify/Informatics/MOF/MOF_functionalizer.py +1648 -0
  55. molSimplify/Informatics/MOF/PBC_functions.py +1347 -0
  56. molSimplify/Informatics/MOF/__init__.py +0 -0
  57. molSimplify/Informatics/MOF/atomic.py +267 -0
  58. molSimplify/Informatics/MOF/cluster_extraction.py +388 -0
  59. molSimplify/Informatics/MOF/fragment_MOFs_for_pormake.py +895 -0
  60. molSimplify/Informatics/MOF/monofunctionalized_BDC/index_information.py +10 -0
  61. molSimplify/Informatics/Mol2Parser.py +46 -0
  62. molSimplify/Informatics/RACassemble.py +408 -0
  63. molSimplify/Informatics/__init__.py +0 -0
  64. molSimplify/Informatics/active_learning/__init__.py +0 -0
  65. molSimplify/Informatics/active_learning/expected_improvement.py +269 -0
  66. molSimplify/Informatics/autocorrelation.py +1930 -0
  67. molSimplify/Informatics/clean_autocorrelation.py +778 -0
  68. molSimplify/Informatics/coulomb_analyze.py +67 -0
  69. molSimplify/Informatics/decoration_manager.py +193 -0
  70. molSimplify/Informatics/geo_analyze.py +88 -0
  71. molSimplify/Informatics/geometrics.py +56 -0
  72. molSimplify/Informatics/graph_analyze.py +163 -0
  73. molSimplify/Informatics/graph_racs.py +288 -0
  74. molSimplify/Informatics/jupyter_vis.py +172 -0
  75. molSimplify/Informatics/lacRACAssemble.py +2192 -0
  76. molSimplify/Informatics/lacRACAssemble_bisdithiolenes.py +236 -0
  77. molSimplify/Informatics/misc_descriptors.py +198 -0
  78. molSimplify/Informatics/organic_fingerprints.py +61 -0
  79. molSimplify/Informatics/partialcharges.py +345 -0
  80. molSimplify/Informatics/protein/activesite.py +53 -0
  81. molSimplify/Informatics/protein/pymol_add_hs.py +33 -0
  82. molSimplify/Informatics/rac155_geo.py +48 -0
  83. molSimplify/Ligands/(1_methylbenzimidazol_2_yl)pyridine.xyz +45 -0
  84. molSimplify/Ligands/1-4-dimethyl-1-2-3-triazole.xyz +15 -0
  85. molSimplify/Ligands/12crown4.mol +62 -0
  86. molSimplify/Ligands/Antipyrine.mol +58 -0
  87. molSimplify/Ligands/BPAbipy.mol +106 -0
  88. molSimplify/Ligands/Hpyrrole.mol +26 -0
  89. molSimplify/Ligands/N-quinolinylbutyramidate.xyz +31 -0
  90. molSimplify/Ligands/N-quinolinylmethylmethinylacetamidate.xyz +30 -0
  91. molSimplify/Ligands/NMe2_-1.xyz +11 -0
  92. molSimplify/Ligands/PCy3.mol +111 -0
  93. molSimplify/Ligands/PMe3.xyz +15 -0
  94. molSimplify/Ligands/PPh3.mol +76 -0
  95. molSimplify/Ligands/Propyphenazone.mol +77 -0
  96. molSimplify/Ligands/acac.mol +33 -0
  97. molSimplify/Ligands/acacen.mol +76 -0
  98. molSimplify/Ligands/acetate.smi +1 -0
  99. molSimplify/Ligands/acetate.xyz +9 -0
  100. molSimplify/Ligands/aceticacidbipyridine.mol +70 -0
  101. molSimplify/Ligands/acetonitrile.mol +17 -0
  102. molSimplify/Ligands/alanine.mol +30 -0
  103. molSimplify/Ligands/alphabetizer.py +21 -0
  104. molSimplify/Ligands/amine.mol +11 -0
  105. molSimplify/Ligands/ammonia.mol +12 -0
  106. molSimplify/Ligands/arginine.mol +58 -0
  107. molSimplify/Ligands/asparagine.mol +38 -0
  108. molSimplify/Ligands/aspartic_acid.mol +35 -0
  109. molSimplify/Ligands/azide.mol +11 -0
  110. molSimplify/Ligands/benzene.mol +28 -0
  111. molSimplify/Ligands/benzene_pi.mol +30 -0
  112. molSimplify/Ligands/benzenedithiol.mol +30 -0
  113. molSimplify/Ligands/benzenethiol.mol +30 -0
  114. molSimplify/Ligands/benzylisocy.mol +38 -0
  115. molSimplify/Ligands/bidiazine.mol +42 -0
  116. molSimplify/Ligands/bidiazole.mol +38 -0
  117. molSimplify/Ligands/bifuran.mol +38 -0
  118. molSimplify/Ligands/bihydrodiazine.mol +58 -0
  119. molSimplify/Ligands/bihydrodiazole.mol +46 -0
  120. molSimplify/Ligands/bihydrooxazine.mol +54 -0
  121. molSimplify/Ligands/bihydrooxazole.mol +42 -0
  122. molSimplify/Ligands/bihydrothiazine.mol +54 -0
  123. molSimplify/Ligands/bihydrothiazole.mol +42 -0
  124. molSimplify/Ligands/biimidazole.mol +38 -0
  125. molSimplify/Ligands/bioxazole.mol +34 -0
  126. molSimplify/Ligands/bipy.mol +46 -0
  127. molSimplify/Ligands/bipyrazine.xyz +20 -0
  128. molSimplify/Ligands/bipyrimidine.mol +42 -0
  129. molSimplify/Ligands/bipyrrole.mol +42 -0
  130. molSimplify/Ligands/bisnapthyridylpyridine.mol +111 -0
  131. molSimplify/Ligands/bithiazole.mol +34 -0
  132. molSimplify/Ligands/bromide.mol +7 -0
  133. molSimplify/Ligands/bromide.smi +1 -0
  134. molSimplify/Ligands/c2.mol +9 -0
  135. molSimplify/Ligands/caprolactone.mol +41 -0
  136. molSimplify/Ligands/carbonyl.mol +8 -0
  137. molSimplify/Ligands/carboxyl.mol +13 -0
  138. molSimplify/Ligands/cat.mol +30 -0
  139. molSimplify/Ligands/chloride.mol +7 -0
  140. molSimplify/Ligands/chloride.smi +1 -0
  141. molSimplify/Ligands/chloropyridine.mol +27 -0
  142. molSimplify/Ligands/co2.mol +10 -0
  143. molSimplify/Ligands/corrolazine.mol +72 -0
  144. molSimplify/Ligands/cs.mol +8 -0
  145. molSimplify/Ligands/cyanate.xyz +5 -0
  146. molSimplify/Ligands/cyanide.mol +9 -0
  147. molSimplify/Ligands/cyanoaceticporphyrin.mol +114 -0
  148. molSimplify/Ligands/cyanopyridine.mol +29 -0
  149. molSimplify/Ligands/cyclam.mol +81 -0
  150. molSimplify/Ligands/cyclen.mol +69 -0
  151. molSimplify/Ligands/cyclopentadienyl.mol +26 -0
  152. molSimplify/Ligands/cysteine.mol +32 -0
  153. molSimplify/Ligands/diaminomethyl.mol +19 -0
  154. molSimplify/Ligands/diazine.mol +25 -0
  155. molSimplify/Ligands/diazole.mol +23 -0
  156. molSimplify/Ligands/dicyanamide.mol +15 -0
  157. molSimplify/Ligands/dihydrofuran.mol +27 -0
  158. molSimplify/Ligands/dmap.xyz +35 -0
  159. molSimplify/Ligands/dmf.mol +28 -0
  160. molSimplify/Ligands/dmi.mol +41 -0
  161. molSimplify/Ligands/dmpe.mol +52 -0
  162. molSimplify/Ligands/dpmu.mol +47 -0
  163. molSimplify/Ligands/dppe.mol +112 -0
  164. molSimplify/Ligands/edta.mol +69 -0
  165. molSimplify/Ligands/en.mol +28 -0
  166. molSimplify/Ligands/ethanethiol.mol +21 -0
  167. molSimplify/Ligands/ethanolamine.mol +26 -0
  168. molSimplify/Ligands/ethbipy.mol +70 -0
  169. molSimplify/Ligands/ethyl.mol +19 -0
  170. molSimplify/Ligands/ethylamine.mol +24 -0
  171. molSimplify/Ligands/ethylene.mol +16 -0
  172. molSimplify/Ligands/ethylesteracac.mol +57 -0
  173. molSimplify/Ligands/fluoride.mol +7 -0
  174. molSimplify/Ligands/fluoride.smi +1 -0
  175. molSimplify/Ligands/formaldehyde.mol +12 -0
  176. molSimplify/Ligands/formamidate.xyz +8 -0
  177. molSimplify/Ligands/formate.xyz +6 -0
  178. molSimplify/Ligands/furan.mol +23 -0
  179. molSimplify/Ligands/glutamic_acid.mol +42 -0
  180. molSimplify/Ligands/glutamine.mol +44 -0
  181. molSimplify/Ligands/glycinate.mol +23 -0
  182. molSimplify/Ligands/glycine.mol +24 -0
  183. molSimplify/Ligands/h2s.mol +10 -0
  184. molSimplify/Ligands/helium.mol +6 -0
  185. molSimplify/Ligands/histidine.mol +45 -0
  186. molSimplify/Ligands/hmpa.mol +62 -0
  187. molSimplify/Ligands/hs-.mol +9 -0
  188. molSimplify/Ligands/hydride.mol +7 -0
  189. molSimplify/Ligands/hydrocarboxyacetylide.xyz +8 -0
  190. molSimplify/Ligands/hydrocyanide.mol +10 -0
  191. molSimplify/Ligands/hydrodiazine.mol +33 -0
  192. molSimplify/Ligands/hydrodiazole.mol +27 -0
  193. molSimplify/Ligands/hydrogensulfide.mol +10 -0
  194. molSimplify/Ligands/hydroisocyanide.mol +11 -0
  195. molSimplify/Ligands/hydrooxazine.mol +31 -0
  196. molSimplify/Ligands/hydrooxazole.mol +25 -0
  197. molSimplify/Ligands/hydrothiazine.mol +31 -0
  198. molSimplify/Ligands/hydrothiazole.mol +25 -0
  199. molSimplify/Ligands/hydroxyl.mol +9 -0
  200. molSimplify/Ligands/imidazole.mol +23 -0
  201. molSimplify/Ligands/imidazolidinone.mol +29 -0
  202. molSimplify/Ligands/imine.mol +13 -0
  203. molSimplify/Ligands/iminodiacetic.mol +33 -0
  204. molSimplify/Ligands/iodide.mol +7 -0
  205. molSimplify/Ligands/iodobenzene.xyz +14 -0
  206. molSimplify/Ligands/isoleucine.mol +48 -0
  207. molSimplify/Ligands/isothiocyanate.mol +11 -0
  208. molSimplify/Ligands/leucine.mol +48 -0
  209. molSimplify/Ligands/ligands.dict +257 -0
  210. molSimplify/Ligands/lysine.mol +54 -0
  211. molSimplify/Ligands/mebenzenedithiol.mol +36 -0
  212. molSimplify/Ligands/mebim_py.xyz +29 -0
  213. molSimplify/Ligands/mebim_pz.xyz +28 -0
  214. molSimplify/Ligands/mebipy.mol +58 -0
  215. molSimplify/Ligands/mecat.mol +36 -0
  216. molSimplify/Ligands/methanal.mol +11 -0
  217. molSimplify/Ligands/methanethiol.mol +15 -0
  218. molSimplify/Ligands/methanol.mol +16 -0
  219. molSimplify/Ligands/methionine.mol +44 -0
  220. molSimplify/Ligands/methyl.mol +13 -0
  221. molSimplify/Ligands/methylacetylide.xyz +8 -0
  222. molSimplify/Ligands/methylamine.mol +19 -0
  223. molSimplify/Ligands/methylazide.xyz +9 -0
  224. molSimplify/Ligands/methylisocy.mol +17 -0
  225. molSimplify/Ligands/methylpyridine.mol +33 -0
  226. molSimplify/Ligands/n2.mol +8 -0
  227. molSimplify/Ligands/n4py.xyz +51 -0
  228. molSimplify/Ligands/nch.mol +10 -0
  229. molSimplify/Ligands/nco-.mol +11 -0
  230. molSimplify/Ligands/nethanolamine.mol +26 -0
  231. molSimplify/Ligands/nitrate.mol +14 -0
  232. molSimplify/Ligands/nitrite.mol +11 -0
  233. molSimplify/Ligands/nitro.mol +11 -0
  234. molSimplify/Ligands/nitrobipy.mol +54 -0
  235. molSimplify/Ligands/nitroso.mol +8 -0
  236. molSimplify/Ligands/nme3.mol +30 -0
  237. molSimplify/Ligands/no-.mol +10 -0
  238. molSimplify/Ligands/no2-.mol +11 -0
  239. molSimplify/Ligands/noxygen.mol +8 -0
  240. molSimplify/Ligands/ns-.mol +10 -0
  241. molSimplify/Ligands/o-pyridylbenzene.xyz +23 -0
  242. molSimplify/Ligands/o-pyridylphenylanion.xyz +22 -0
  243. molSimplify/Ligands/o2-.mol +9 -0
  244. molSimplify/Ligands/o2.xyz +4 -0
  245. molSimplify/Ligands/och2.mol +12 -0
  246. molSimplify/Ligands/oethanolamine.mol +26 -0
  247. molSimplify/Ligands/ome2.mol +22 -0
  248. molSimplify/Ligands/ooh.xyz +5 -0
  249. molSimplify/Ligands/oxalate.mol +17 -0
  250. molSimplify/Ligands/oxalate.smi +1 -0
  251. molSimplify/Ligands/oxygen.mol +7 -0
  252. molSimplify/Ligands/pentacyanocyclopentadienide.mol +36 -0
  253. molSimplify/Ligands/ph2-.mol +11 -0
  254. molSimplify/Ligands/ph3.mol +12 -0
  255. molSimplify/Ligands/phen.mol +51 -0
  256. molSimplify/Ligands/phenacac.mol +63 -0
  257. molSimplify/Ligands/phenalalanine.mol +51 -0
  258. molSimplify/Ligands/phendione.mol +51 -0
  259. molSimplify/Ligands/phenphen.mol +75 -0
  260. molSimplify/Ligands/phenylbenzoxazole.mol +54 -0
  261. molSimplify/Ligands/phenylcyc.mol +99 -0
  262. molSimplify/Ligands/phenylenediamine.mol +37 -0
  263. molSimplify/Ligands/phenylisocy.mol +32 -0
  264. molSimplify/Ligands/phosacidbipy.mol +66 -0
  265. molSimplify/Ligands/phosphine.mol +13 -0
  266. molSimplify/Ligands/phosphorine.mol +27 -0
  267. molSimplify/Ligands/phosphorustrifluoride.mol +12 -0
  268. molSimplify/Ligands/phthalocyanine.mol +126 -0
  269. molSimplify/Ligands/pme3o.mol +32 -0
  270. molSimplify/Ligands/porphyrin.mol +82 -0
  271. molSimplify/Ligands/pph3o.mol +77 -0
  272. molSimplify/Ligands/proline.mol +39 -0
  273. molSimplify/Ligands/propdiol.mol +21 -0
  274. molSimplify/Ligands/propylene.mol +23 -0
  275. molSimplify/Ligands/pyridine.mol +27 -0
  276. molSimplify/Ligands/pyrimidone.mol +27 -0
  277. molSimplify/Ligands/pyrrole.mol +24 -0
  278. molSimplify/Ligands/quinoxalinedithiol.mol +39 -0
  279. molSimplify/Ligands/s2-.mol +9 -0
  280. molSimplify/Ligands/salen.mol +75 -0
  281. molSimplify/Ligands/salphen.mol +84 -0
  282. molSimplify/Ligands/serine.mol +32 -0
  283. molSimplify/Ligands/simple_ligands.dict +14 -0
  284. molSimplify/Ligands/sulfacidbipy.mol +63 -0
  285. molSimplify/Ligands/tbucat.mol +54 -0
  286. molSimplify/Ligands/tbuphisocy.mol +56 -0
  287. molSimplify/Ligands/tbutylcyclen.mol +166 -0
  288. molSimplify/Ligands/tbutylisocy.mol +35 -0
  289. molSimplify/Ligands/tbutylthiol.mol +33 -0
  290. molSimplify/Ligands/tcnoet.mol +43 -0
  291. molSimplify/Ligands/tcnoetOH.mol +45 -0
  292. molSimplify/Ligands/terpy.mol +65 -0
  293. molSimplify/Ligands/tetrahydrofuran.mol +31 -0
  294. molSimplify/Ligands/thiane.mol +37 -0
  295. molSimplify/Ligands/thiazole.mol +21 -0
  296. molSimplify/Ligands/thiocyanate.mol +11 -0
  297. molSimplify/Ligands/thiol.mol +9 -0
  298. molSimplify/Ligands/thiophene.mol +23 -0
  299. molSimplify/Ligands/thiopyridine.mol +29 -0
  300. molSimplify/Ligands/threonine.mol +38 -0
  301. molSimplify/Ligands/tpp.mol +165 -0
  302. molSimplify/Ligands/tricyanomethyl.mol +19 -0
  303. molSimplify/Ligands/trifluoromethyl.mol +13 -0
  304. molSimplify/Ligands/tryptophan.mol +60 -0
  305. molSimplify/Ligands/tyrosine.mol +53 -0
  306. molSimplify/Ligands/uthiol.mol +11 -0
  307. molSimplify/Ligands/uthiolme2.mol +23 -0
  308. molSimplify/Ligands/valine.mol +42 -0
  309. molSimplify/Ligands/water.mol +10 -0
  310. molSimplify/Ligands/x.mol +6 -0
  311. molSimplify/Scripts/__init__.py +0 -0
  312. molSimplify/Scripts/addtodb.py +308 -0
  313. molSimplify/Scripts/cellbuilder.py +1592 -0
  314. molSimplify/Scripts/cellbuilder_tools.py +701 -0
  315. molSimplify/Scripts/chains.py +342 -0
  316. molSimplify/Scripts/convert_2to3.py +23 -0
  317. molSimplify/Scripts/dbinteract.py +631 -0
  318. molSimplify/Scripts/distgeom.py +617 -0
  319. molSimplify/Scripts/findcorrelations.py +287 -0
  320. molSimplify/Scripts/generator.py +267 -0
  321. molSimplify/Scripts/geometry.py +1224 -0
  322. molSimplify/Scripts/grabguivars.py +845 -0
  323. molSimplify/Scripts/in_b3lyp_usetc.py +141 -0
  324. molSimplify/Scripts/inparse.py +1673 -0
  325. molSimplify/Scripts/io.py +1149 -0
  326. molSimplify/Scripts/isomers.py +415 -0
  327. molSimplify/Scripts/jobgen.py +247 -0
  328. molSimplify/Scripts/krr_prep.py +1262 -0
  329. molSimplify/Scripts/molSimplify_io.py +18 -0
  330. molSimplify/Scripts/molden2psi4wfn.py +166 -0
  331. molSimplify/Scripts/namegen.py +32 -0
  332. molSimplify/Scripts/nn_prep.py +561 -0
  333. molSimplify/Scripts/oct_check_mols.py +782 -0
  334. molSimplify/Scripts/periodic_QE.py +97 -0
  335. molSimplify/Scripts/postmold.py +304 -0
  336. molSimplify/Scripts/postmwfn.py +709 -0
  337. molSimplify/Scripts/postparse.py +488 -0
  338. molSimplify/Scripts/postproc.py +139 -0
  339. molSimplify/Scripts/qcgen.py +1450 -0
  340. molSimplify/Scripts/rmsd.py +489 -0
  341. molSimplify/Scripts/rungen.py +670 -0
  342. molSimplify/Scripts/structgen.py +3040 -0
  343. molSimplify/Scripts/tf_nn_prep.py +894 -0
  344. molSimplify/Scripts/tsgen.py +295 -0
  345. molSimplify/Scripts/uq_calibration.py +69 -0
  346. molSimplify/__init__.py +0 -0
  347. molSimplify/__main__.py +197 -0
  348. molSimplify/icons/chemdb.png +0 -0
  349. molSimplify/icons/hjklogo.png +0 -0
  350. molSimplify/icons/icon.png +0 -0
  351. molSimplify/icons/logo.png +0 -0
  352. molSimplify/icons/logo_old.png +0 -0
  353. molSimplify/icons/petachem.png +0 -0
  354. molSimplify/icons/petachem2.png +0 -0
  355. molSimplify/icons/petachem_full.png +0 -0
  356. molSimplify/icons/pythonlogo.png +0 -0
  357. molSimplify/icons/sge copy.png +0 -0
  358. molSimplify/icons/sge.png +0 -0
  359. molSimplify/icons/slurm.png +0 -0
  360. molSimplify/icons/wft1.png +0 -0
  361. molSimplify/icons/wft2.png +0 -0
  362. molSimplify/icons/wft3.png +0 -0
  363. molSimplify/ml/__init__.py +0 -0
  364. molSimplify/ml/kernels.py +36 -0
  365. molSimplify/ml/layers.py +29 -0
  366. molSimplify/molscontrol/__init__.py +14 -0
  367. molSimplify/molscontrol/_version.py +521 -0
  368. molSimplify/molscontrol/clf_tools.py +144 -0
  369. molSimplify/molscontrol/data/README.md +21 -0
  370. molSimplify/molscontrol/data/look_and_say.dat +15 -0
  371. molSimplify/molscontrol/dynamic_classifier.py +514 -0
  372. molSimplify/molscontrol/io_tools.py +363 -0
  373. molSimplify/molscontrol/molscontrol.py +49 -0
  374. molSimplify/molscontrol/terachem/jobscript_control.sh +31 -0
  375. molSimplify/molscontrol/terachem/terachem_input +22 -0
  376. molSimplify/python_krr/X_train_TS.csv +535 -0
  377. molSimplify/python_krr/__init__.py +0 -0
  378. molSimplify/python_krr/hat2_X_mean_std.csv +3 -0
  379. molSimplify/python_krr/hat2_feature_names.csv +1 -0
  380. molSimplify/python_krr/hat2_y_mean_std.csv +2 -0
  381. molSimplify/python_krr/hat_X_mean_std.csv +6 -0
  382. molSimplify/python_krr/hat_feature_names.csv +1 -0
  383. molSimplify/python_krr/hat_krr_X_train.csv +5205 -0
  384. molSimplify/python_krr/hat_krr_dual_coef.csv +1 -0
  385. molSimplify/python_krr/hat_y_mean_std.csv +2 -0
  386. molSimplify/python_krr/sklearn_models.py +34 -0
  387. molSimplify/python_krr/y_train_TS.csv +535 -0
  388. molSimplify/python_nn/ANN.py +198 -0
  389. molSimplify/python_nn/__init__.py +0 -0
  390. molSimplify/python_nn/clf_analysis_tool.py +125 -0
  391. molSimplify/python_nn/dictionary_toolbox.py +49 -0
  392. molSimplify/python_nn/ensemble_test.py +309 -0
  393. molSimplify/python_nn/hs_center.csv +26 -0
  394. molSimplify/python_nn/hs_scale.csv +26 -0
  395. molSimplify/python_nn/ls_center.csv +26 -0
  396. molSimplify/python_nn/ls_scale.csv +26 -0
  397. molSimplify/python_nn/ms_hs_b1.csv +50 -0
  398. molSimplify/python_nn/ms_hs_b2.csv +50 -0
  399. molSimplify/python_nn/ms_hs_b3.csv +1 -0
  400. molSimplify/python_nn/ms_hs_w1.csv +50 -0
  401. molSimplify/python_nn/ms_hs_w2.csv +50 -0
  402. molSimplify/python_nn/ms_hs_w3.csv +1 -0
  403. molSimplify/python_nn/ms_ls_b1.csv +50 -0
  404. molSimplify/python_nn/ms_ls_b2.csv +50 -0
  405. molSimplify/python_nn/ms_ls_b3.csv +1 -0
  406. molSimplify/python_nn/ms_ls_w1.csv +50 -0
  407. molSimplify/python_nn/ms_ls_w2.csv +50 -0
  408. molSimplify/python_nn/ms_ls_w3.csv +1 -0
  409. molSimplify/python_nn/ms_slope_b1.csv +50 -0
  410. molSimplify/python_nn/ms_slope_b2.csv +50 -0
  411. molSimplify/python_nn/ms_slope_b3.csv +1 -0
  412. molSimplify/python_nn/ms_slope_w1.csv +50 -0
  413. molSimplify/python_nn/ms_slope_w2.csv +50 -0
  414. molSimplify/python_nn/ms_slope_w3.csv +1 -0
  415. molSimplify/python_nn/ms_split_b1.csv +50 -0
  416. molSimplify/python_nn/ms_split_b2.csv +50 -0
  417. molSimplify/python_nn/ms_split_b3.csv +1 -0
  418. molSimplify/python_nn/ms_split_w1.csv +50 -0
  419. molSimplify/python_nn/ms_split_w2.csv +50 -0
  420. molSimplify/python_nn/ms_split_w3.csv +1 -0
  421. molSimplify/python_nn/slope_center.csv +25 -0
  422. molSimplify/python_nn/slope_scale.csv +25 -0
  423. molSimplify/python_nn/split_center.csv +26 -0
  424. molSimplify/python_nn/split_scale.csv +26 -0
  425. molSimplify/python_nn/tf_ANN.py +762 -0
  426. molSimplify/python_nn/train_data.csv +1211 -0
  427. molSimplify/tf_nn/__init__.py +0 -0
  428. molSimplify/tf_nn/geo_static_clf/geo_static_clf_model.h5 +0 -0
  429. molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_name.csv +1591 -0
  430. molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_x.csv +2790 -0
  431. molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_y.csv +2790 -0
  432. molSimplify/tf_nn/geo_static_clf/geo_static_clf_vars.csv +154 -0
  433. molSimplify/tf_nn/geos/hs_ii_bl_x.csv +1577 -0
  434. molSimplify/tf_nn/geos/hs_ii_bl_y.csv +1577 -0
  435. molSimplify/tf_nn/geos/hs_ii_model.h5 +0 -0
  436. molSimplify/tf_nn/geos/hs_ii_model.json +1 -0
  437. molSimplify/tf_nn/geos/hs_ii_vars.csv +154 -0
  438. molSimplify/tf_nn/geos/hs_iii_bl_x.csv +1659 -0
  439. molSimplify/tf_nn/geos/hs_iii_bl_y.csv +1659 -0
  440. molSimplify/tf_nn/geos/hs_iii_model.h5 +0 -0
  441. molSimplify/tf_nn/geos/hs_iii_model.json +1 -0
  442. molSimplify/tf_nn/geos/hs_iii_vars.csv +154 -0
  443. molSimplify/tf_nn/geos/ls_ii_bl_x.csv +1374 -0
  444. molSimplify/tf_nn/geos/ls_ii_bl_y.csv +1374 -0
  445. molSimplify/tf_nn/geos/ls_ii_model.h5 +0 -0
  446. molSimplify/tf_nn/geos/ls_ii_model.json +1 -0
  447. molSimplify/tf_nn/geos/ls_ii_vars.csv +154 -0
  448. molSimplify/tf_nn/geos/ls_iii_bl_x.csv +1364 -0
  449. molSimplify/tf_nn/geos/ls_iii_bl_y.csv +1364 -0
  450. molSimplify/tf_nn/geos/ls_iii_model.h5 +0 -0
  451. molSimplify/tf_nn/geos/ls_iii_model.json +1 -0
  452. molSimplify/tf_nn/geos/ls_iii_vars.csv +154 -0
  453. molSimplify/tf_nn/homolumo/gap_model.h5 +0 -0
  454. molSimplify/tf_nn/homolumo/gap_model.json +1 -0
  455. molSimplify/tf_nn/homolumo/gap_test_names.csv +175 -0
  456. molSimplify/tf_nn/homolumo/gap_test_x.csv +176 -0
  457. molSimplify/tf_nn/homolumo/gap_test_y.csv +176 -0
  458. molSimplify/tf_nn/homolumo/gap_train_names.csv +699 -0
  459. molSimplify/tf_nn/homolumo/gap_train_x.csv +700 -0
  460. molSimplify/tf_nn/homolumo/gap_train_y.csv +700 -0
  461. molSimplify/tf_nn/homolumo/gap_vars.csv +153 -0
  462. molSimplify/tf_nn/homolumo/homo_model.h5 +0 -0
  463. molSimplify/tf_nn/homolumo/homo_model.json +126 -0
  464. molSimplify/tf_nn/homolumo/homo_test_names.csv +175 -0
  465. molSimplify/tf_nn/homolumo/homo_test_x.csv +176 -0
  466. molSimplify/tf_nn/homolumo/homo_test_y.csv +176 -0
  467. molSimplify/tf_nn/homolumo/homo_train_names.csv +699 -0
  468. molSimplify/tf_nn/homolumo/homo_train_x.csv +700 -0
  469. molSimplify/tf_nn/homolumo/homo_train_y.csv +700 -0
  470. molSimplify/tf_nn/homolumo/homo_vars.csv +153 -0
  471. molSimplify/tf_nn/oxoandhomo/homo_empty_info.json +7 -0
  472. molSimplify/tf_nn/oxoandhomo/homo_empty_model.h5 +0 -0
  473. molSimplify/tf_nn/oxoandhomo/homo_empty_model.json +1 -0
  474. molSimplify/tf_nn/oxoandhomo/homo_empty_test_names.csv +143 -0
  475. molSimplify/tf_nn/oxoandhomo/homo_empty_test_x.csv +144 -0
  476. molSimplify/tf_nn/oxoandhomo/homo_empty_test_y.csv +144 -0
  477. molSimplify/tf_nn/oxoandhomo/homo_empty_train_names.csv +513 -0
  478. molSimplify/tf_nn/oxoandhomo/homo_empty_train_x.csv +514 -0
  479. molSimplify/tf_nn/oxoandhomo/homo_empty_train_y.csv +514 -0
  480. molSimplify/tf_nn/oxoandhomo/homo_empty_val_names.csv +143 -0
  481. molSimplify/tf_nn/oxoandhomo/homo_empty_val_x.csv +58 -0
  482. molSimplify/tf_nn/oxoandhomo/homo_empty_val_y.csv +58 -0
  483. molSimplify/tf_nn/oxoandhomo/homo_empty_vars.csv +155 -0
  484. molSimplify/tf_nn/oxoandhomo/oxo20_info.json +7 -0
  485. molSimplify/tf_nn/oxoandhomo/oxo20_model.h5 +0 -0
  486. molSimplify/tf_nn/oxoandhomo/oxo20_model.json +1 -0
  487. molSimplify/tf_nn/oxoandhomo/oxo20_test_names.csv +143 -0
  488. molSimplify/tf_nn/oxoandhomo/oxo20_test_x.csv +144 -0
  489. molSimplify/tf_nn/oxoandhomo/oxo20_test_y.csv +144 -0
  490. molSimplify/tf_nn/oxoandhomo/oxo20_train_names.csv +513 -0
  491. molSimplify/tf_nn/oxoandhomo/oxo20_train_x.csv +514 -0
  492. molSimplify/tf_nn/oxoandhomo/oxo20_train_y.csv +514 -0
  493. molSimplify/tf_nn/oxoandhomo/oxo20_val_names.csv +143 -0
  494. molSimplify/tf_nn/oxoandhomo/oxo20_val_x.csv +58 -0
  495. molSimplify/tf_nn/oxoandhomo/oxo20_val_y.csv +58 -0
  496. molSimplify/tf_nn/oxoandhomo/oxo20_vars.csv +154 -0
  497. molSimplify/tf_nn/oxocatalysis/hat_model.h5 +0 -0
  498. molSimplify/tf_nn/oxocatalysis/hat_model.json +1 -0
  499. molSimplify/tf_nn/oxocatalysis/hat_test_names.csv +419 -0
  500. molSimplify/tf_nn/oxocatalysis/hat_test_x.csv +420 -0
  501. molSimplify/tf_nn/oxocatalysis/hat_test_y.csv +420 -0
  502. molSimplify/tf_nn/oxocatalysis/hat_train_names.csv +1507 -0
  503. molSimplify/tf_nn/oxocatalysis/hat_train_x.csv +1508 -0
  504. molSimplify/tf_nn/oxocatalysis/hat_train_y.csv +1508 -0
  505. molSimplify/tf_nn/oxocatalysis/hat_val_x.csv +169 -0
  506. molSimplify/tf_nn/oxocatalysis/hat_val_y.csv +169 -0
  507. molSimplify/tf_nn/oxocatalysis/hat_vars.csv +162 -0
  508. molSimplify/tf_nn/oxocatalysis/oxo_model.h5 +0 -0
  509. molSimplify/tf_nn/oxocatalysis/oxo_model.json +1 -0
  510. molSimplify/tf_nn/oxocatalysis/oxo_test_names.csv +527 -0
  511. molSimplify/tf_nn/oxocatalysis/oxo_test_x.csv +528 -0
  512. molSimplify/tf_nn/oxocatalysis/oxo_test_y.csv +528 -0
  513. molSimplify/tf_nn/oxocatalysis/oxo_train_names.csv +1897 -0
  514. molSimplify/tf_nn/oxocatalysis/oxo_train_x.csv +1898 -0
  515. molSimplify/tf_nn/oxocatalysis/oxo_train_y.csv +1898 -0
  516. molSimplify/tf_nn/oxocatalysis/oxo_val_x.csv +212 -0
  517. molSimplify/tf_nn/oxocatalysis/oxo_val_y.csv +212 -0
  518. molSimplify/tf_nn/oxocatalysis/oxo_vars.csv +162 -0
  519. molSimplify/tf_nn/rescaling_data/gap_mean_x.csv +153 -0
  520. molSimplify/tf_nn/rescaling_data/gap_mean_y.csv +1 -0
  521. molSimplify/tf_nn/rescaling_data/gap_var_x.csv +153 -0
  522. molSimplify/tf_nn/rescaling_data/gap_var_y.csv +1 -0
  523. molSimplify/tf_nn/rescaling_data/geo_static_clf_mean_x.csv +154 -0
  524. molSimplify/tf_nn/rescaling_data/geo_static_clf_mean_y.csv +1 -0
  525. molSimplify/tf_nn/rescaling_data/geo_static_clf_var_x.csv +154 -0
  526. molSimplify/tf_nn/rescaling_data/geo_static_clf_var_y.csv +1 -0
  527. molSimplify/tf_nn/rescaling_data/hat_mean_x.csv +162 -0
  528. molSimplify/tf_nn/rescaling_data/hat_mean_y.csv +1 -0
  529. molSimplify/tf_nn/rescaling_data/hat_var_x.csv +162 -0
  530. molSimplify/tf_nn/rescaling_data/hat_var_y.csv +1 -0
  531. molSimplify/tf_nn/rescaling_data/homo_empty_mean_x.csv +155 -0
  532. molSimplify/tf_nn/rescaling_data/homo_empty_mean_y.csv +1 -0
  533. molSimplify/tf_nn/rescaling_data/homo_empty_var_x.csv +155 -0
  534. molSimplify/tf_nn/rescaling_data/homo_empty_var_y.csv +1 -0
  535. molSimplify/tf_nn/rescaling_data/homo_mean_x.csv +153 -0
  536. molSimplify/tf_nn/rescaling_data/homo_mean_y.csv +1 -0
  537. molSimplify/tf_nn/rescaling_data/homo_var_x.csv +153 -0
  538. molSimplify/tf_nn/rescaling_data/homo_var_y.csv +1 -0
  539. molSimplify/tf_nn/rescaling_data/hs_ii_mean_x.csv +154 -0
  540. molSimplify/tf_nn/rescaling_data/hs_ii_mean_y.csv +3 -0
  541. molSimplify/tf_nn/rescaling_data/hs_ii_var_x.csv +154 -0
  542. molSimplify/tf_nn/rescaling_data/hs_ii_var_y.csv +3 -0
  543. molSimplify/tf_nn/rescaling_data/hs_iii_mean_x.csv +154 -0
  544. molSimplify/tf_nn/rescaling_data/hs_iii_mean_y.csv +3 -0
  545. molSimplify/tf_nn/rescaling_data/hs_iii_var_x.csv +154 -0
  546. molSimplify/tf_nn/rescaling_data/hs_iii_var_y.csv +3 -0
  547. molSimplify/tf_nn/rescaling_data/ls_ii_mean_x.csv +154 -0
  548. molSimplify/tf_nn/rescaling_data/ls_ii_mean_y.csv +3 -0
  549. molSimplify/tf_nn/rescaling_data/ls_ii_var_x.csv +154 -0
  550. molSimplify/tf_nn/rescaling_data/ls_ii_var_y.csv +3 -0
  551. molSimplify/tf_nn/rescaling_data/ls_iii_mean_x.csv +154 -0
  552. molSimplify/tf_nn/rescaling_data/ls_iii_mean_y.csv +3 -0
  553. molSimplify/tf_nn/rescaling_data/ls_iii_var_x.csv +154 -0
  554. molSimplify/tf_nn/rescaling_data/ls_iii_var_y.csv +3 -0
  555. molSimplify/tf_nn/rescaling_data/oxo20_mean_x.csv +154 -0
  556. molSimplify/tf_nn/rescaling_data/oxo20_mean_y.csv +1 -0
  557. molSimplify/tf_nn/rescaling_data/oxo20_var_x.csv +154 -0
  558. molSimplify/tf_nn/rescaling_data/oxo20_var_y.csv +1 -0
  559. molSimplify/tf_nn/rescaling_data/oxo_mean_x.csv +162 -0
  560. molSimplify/tf_nn/rescaling_data/oxo_mean_y.csv +1 -0
  561. molSimplify/tf_nn/rescaling_data/oxo_var_x.csv +162 -0
  562. molSimplify/tf_nn/rescaling_data/oxo_var_y.csv +1 -0
  563. molSimplify/tf_nn/rescaling_data/sc_static_clf_mean_x.csv +154 -0
  564. molSimplify/tf_nn/rescaling_data/sc_static_clf_mean_y.csv +1 -0
  565. molSimplify/tf_nn/rescaling_data/sc_static_clf_var_x.csv +154 -0
  566. molSimplify/tf_nn/rescaling_data/sc_static_clf_var_y.csv +1 -0
  567. molSimplify/tf_nn/rescaling_data/split_mean_x.csv +155 -0
  568. molSimplify/tf_nn/rescaling_data/split_mean_y.csv +1 -0
  569. molSimplify/tf_nn/rescaling_data/split_var_x.csv +155 -0
  570. molSimplify/tf_nn/rescaling_data/split_var_y.csv +1 -0
  571. molSimplify/tf_nn/sc_static_clf/sc_static_clf_model.h5 +0 -0
  572. molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_name.csv +1591 -0
  573. molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_x.csv +1592 -0
  574. molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_y.csv +1592 -0
  575. molSimplify/tf_nn/sc_static_clf/sc_static_clf_vars.csv +154 -0
  576. molSimplify/tf_nn/split/split_model.h5 +0 -0
  577. molSimplify/tf_nn/split/split_model.json +1 -0
  578. molSimplify/tf_nn/split/split_vars.csv +155 -0
  579. molSimplify/tf_nn/split/split_x.csv +1902 -0
  580. molSimplify/tf_nn/split/split_y.csv +1902 -0
  581. molSimplify/tf_nn/split/train_names.csv +1901 -0
  582. molSimplify/utils/__init__.py +0 -0
  583. molSimplify/utils/decorators.py +16 -0
  584. molSimplify/utils/metaclasses.py +12 -0
  585. molSimplify/utils/tensorflow.py +23 -0
  586. molSimplify/utils/timer.py +16 -0
  587. molSimplify-1.7.4.dist-info/LICENSE +674 -0
  588. molSimplify-1.7.4.dist-info/METADATA +821 -0
  589. molSimplify-1.7.4.dist-info/RECORD +651 -0
  590. molSimplify-1.7.4.dist-info/WHEEL +5 -0
  591. molSimplify-1.7.4.dist-info/entry_points.txt +3 -0
  592. molSimplify-1.7.4.dist-info/top_level.txt +4 -0
  593. tests/generateTests.py +122 -0
  594. tests/helperFuncs.py +658 -0
  595. tests/informatics/test_MOF_descriptors.py +128 -0
  596. tests/informatics/test_active_learning.py +113 -0
  597. tests/informatics/test_coulomb_analyze.py +24 -0
  598. tests/informatics/test_graph_racs.py +193 -0
  599. tests/ml/test_kernels.py +20 -0
  600. tests/ml/test_layers.py +47 -0
  601. tests/runtest.py +10 -0
  602. tests/test_Mol2D.py +128 -0
  603. tests/test_basic_imports.py +62 -0
  604. tests/test_bidentate.py +25 -0
  605. tests/test_cli.py +20 -0
  606. tests/test_distgeom.py +106 -0
  607. tests/test_example_1.py +29 -0
  608. tests/test_example_3.py +31 -0
  609. tests/test_example_5.py +43 -0
  610. tests/test_example_7.py +28 -0
  611. tests/test_example_8.py +15 -0
  612. tests/test_example_tbp.py +15 -0
  613. tests/test_ff_xtb.py +111 -0
  614. tests/test_geocheck_oct.py +26 -0
  615. tests/test_geocheck_one_empty.py +15 -0
  616. tests/test_geometry.py +44 -0
  617. tests/test_inparse.py +76 -0
  618. tests/test_io.py +84 -0
  619. tests/test_jobgen.py +84 -0
  620. tests/test_joption_pythonic.py +27 -0
  621. tests/test_ligand_assign.py +58 -0
  622. tests/test_ligand_assign_consistent.py +60 -0
  623. tests/test_ligand_class.py +26 -0
  624. tests/test_ligand_from_mol_file.py +35 -0
  625. tests/test_ligands.py +86 -0
  626. tests/test_mol3D.py +337 -0
  627. tests/test_molcas_caspt2.py +15 -0
  628. tests/test_molcas_casscf.py +15 -0
  629. tests/test_old_ANNs.py +68 -0
  630. tests/test_orca_ccsdt.py +15 -0
  631. tests/test_orca_dft.py +15 -0
  632. tests/test_qcgen.py +50 -0
  633. tests/test_racs.py +124 -0
  634. tests/test_rmsd.py +68 -0
  635. tests/test_structgen_functions.py +198 -0
  636. tests/test_tetrahedral.py +29 -0
  637. tests/test_tutorial_10_part_one.py +16 -0
  638. tests/test_tutorial_10_part_two.py +15 -0
  639. tests/test_tutorial_2.py +11 -0
  640. tests/test_tutorial_3.py +15 -0
  641. tests/test_tutorial_4.py +57 -0
  642. tests/test_tutorial_6.py +10 -0
  643. tests/test_tutorial_8.py +29 -0
  644. tests/test_tutorial_9_part_one.py +15 -0
  645. tests/test_tutorial_9_part_two.py +15 -0
  646. tests/test_tutorial_qm9_part_one.py +6 -0
  647. tests/testresources/refs/racs/generate_references.py +85 -0
  648. workflows/NandyJACSAu2022/bridge_functionalizer.py +253 -0
  649. workflows/NandyJACSAu2022/frag_functionalizer.py +242 -0
  650. workflows/NandyJACSAu2022/fragment_classes.py +586 -0
  651. workflows/NandyJACSAu2022/macrocycle_synthesis.py +179 -0
@@ -0,0 +1,1262 @@
1
+ # @file nn_prep.py
2
+ # Helper routines for ANN integration
3
+ #
4
+ # Written by Nick Yang for HJK Group
5
+ #
6
+ # Dpt of Chemical Engineering, MIT
7
+
8
+ import copy
9
+ import csv
10
+ import os
11
+ import pickle
12
+ from math import exp
13
+ import matplotlib.pyplot as plt
14
+ import matplotlib.ticker as ticker
15
+ import pandas as pd
16
+ from importlib_resources import files as resource_files
17
+
18
+
19
+ import numpy as np
20
+ from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
21
+ from sklearn.kernel_ridge import KernelRidge
22
+ from sklearn.metrics import mean_absolute_error
23
+ from sklearn.model_selection import train_test_split, GridSearchCV, LeaveOneOut
24
+ from sklearn.multioutput import MultiOutputRegressor
25
+
26
+ from molSimplify.Classes.globalvars import (globalvars)
27
+ from molSimplify.Informatics.autocorrelation import (atom_only_autocorrelation,
28
+ atom_only_deltametric,
29
+ atom_only_ratiometric,
30
+ atom_only_summetric,
31
+ generate_atomonly_autocorrelations,
32
+ generate_atomonly_deltametrics)
33
+ from molSimplify.Informatics.partialcharges import (ffeatures)
34
+
35
+ # import matplotlib.pyplot as plt
36
+ # import matplotlib.ticker as ticker
37
+ from molSimplify.Scripts.geometry import (vecdiff,
38
+ distance,
39
+ vecangle)
40
+
41
+ np.seterr(divide='ignore')
42
+
43
+ csvf = '/Users/tzuhsiungyang/Dropbox (MIT)/Work at the Kulik group/ts_build/Data/xyzf_optts/selected_xyzfs/label_1distance_descs_atRACs.csv'
44
+ colnum_i_label = 1
45
+ colnum_j_label = 2
46
+ colnum_desc = 2
47
+
48
+
49
+ def feature_prep(mol, idx):
50
+ # setting up variables
51
+ fidx_list = []
52
+ sidx_list = []
53
+ satno_list = []
54
+ ref_list = []
55
+ fd_list = []
56
+ exit_signal = True
57
+ # getting bond-order matrix
58
+ mol.convert2OBMol()
59
+ BOMatrix = mol.populateBOMatrix()
60
+
61
+ # preping for the loop
62
+ fidx_list.append(mol.findMetal())
63
+ for i in range(len(fidx_list)):
64
+ for fidx in fidx_list[i]:
65
+ for sidx in mol.getBondedAtoms(fidx):
66
+ sidx_list.append([sidx])
67
+
68
+ for i in range(len(fidx_list)):
69
+ for fidx in fidx_list[i]:
70
+ for j in range(len(sidx_list)):
71
+ for sidx in sidx_list[j]:
72
+ BO = int(BOMatrix[fidx][sidx])
73
+ if BO == 0:
74
+ BO = 1
75
+ satno_str = str(mol.getAtom(sidx).atno)
76
+ satno_list.append(int(BO * satno_str))
77
+
78
+ for satno in set(satno_list):
79
+ satnocount = satno_list.count(satno)
80
+ if satnocount > 1:
81
+ s_sel_list = [i for i, atno in enumerate(
82
+ satno_list) if atno is satno]
83
+ exit_signal = False
84
+
85
+ for i in range(len(fidx_list)):
86
+ for fidx in fidx_list[i]:
87
+ ref_list.append(fidx)
88
+
89
+ # starting the loop
90
+ tidx_list = []
91
+ tatno_list = []
92
+ for i in range(len(sidx_list)):
93
+ tidx_list.append([])
94
+ tatno_list.append([])
95
+
96
+ while not exit_signal:
97
+ fpriority_list = []
98
+ for i in s_sel_list:
99
+ t_list = []
100
+ for sidx in sidx_list[i]:
101
+ for tidx in mol.getBondedAtoms(sidx):
102
+ if tidx not in ref_list:
103
+ t_list.append(tidx)
104
+ tidx_list[i] = t_list
105
+ # print(sidx_list)
106
+ # print(tidx_list)
107
+ for i in s_sel_list:
108
+ for sidx in sidx_list[i]:
109
+ atno_list = tatno_list[i]
110
+ ls = []
111
+ for j in s_sel_list:
112
+ for tidx in tidx_list[j]:
113
+ BO = int(BOMatrix[sidx][tidx])
114
+ tatno_str = str(mol.getAtom(tidx).atno)
115
+ ls.append(BO * tatno_str)
116
+ sorted(ls, reverse=True)
117
+ for j in ls:
118
+ atno_list.append(j)
119
+ a = ''.join(atno_list)
120
+ tatno_list[i] = [a]
121
+ sidx_list = []
122
+ for i in range(len(tidx_list)):
123
+ sidx_list.append(tidx_list[i])
124
+ for i in s_sel_list:
125
+ for sidx in sidx_list[i]:
126
+ ref_list.append(sidx)
127
+ test_list = []
128
+ for i in range(len(sidx_list)):
129
+ test_list.append([])
130
+ # get priorities
131
+ for i in range(len(satno_list)):
132
+ atno_list = []
133
+ atno_list.append(str(satno_list[i]))
134
+ if tatno_list[i] == []:
135
+ atno_list.append('')
136
+ else:
137
+ atno_list.append(tatno_list[i][0])
138
+ a = '.'.join(atno_list)
139
+ fpriority_list.append(float(a))
140
+ if tidx_list == test_list or len(set(fpriority_list)) == 6:
141
+ # if tidx_list == test_list:
142
+ exit_signal = True
143
+ # get distance
144
+ # idx = np.argsort(np.array(fpriority_list))[-1]
145
+ sidx_list = mol.getBondedAtomsByCoordNo(fidx_list[0][0], 6)
146
+ mcoord = mol.getAtom(fidx_list[0][0]).coords()
147
+ vMLs = [vecdiff(mcoord, mol.getAtom(i).coords()) for i in sidx_list]
148
+ rMLs = [distance(mcoord, mol.getAtom(i).coords()) for i in sidx_list]
149
+ idx0 = idx
150
+ vangs = [vecangle(vML, vMLs[idx0]) for vML in vMLs]
151
+ idxes = list(range(6))
152
+ idx5 = np.argsort(np.array(vangs))[-1]
153
+ idx1_4 = copy.deepcopy(idxes)
154
+ idx1_4.remove(idx0)
155
+ idx1_4.remove(idx5)
156
+ fprio1_4 = copy.deepcopy(fpriority_list)
157
+ vMLs1_4 = copy.deepcopy(vMLs)
158
+ rMLs1_4 = copy.deepcopy(rMLs)
159
+ if idx0 > idx5:
160
+ fprio1_4.pop(idx0)
161
+ fprio1_4.pop(idx5)
162
+ vMLs1_4.pop(idx0)
163
+ vMLs1_4.pop(idx5)
164
+ rMLs1_4.pop(idx0)
165
+ rMLs1_4.pop(idx5)
166
+ else:
167
+ fprio1_4.pop(idx5)
168
+ fprio1_4.pop(idx0)
169
+ vMLs1_4.pop(idx5)
170
+ vMLs1_4.pop(idx0)
171
+ rMLs1_4.pop(idx5)
172
+ rMLs1_4.pop(idx0)
173
+ # get ax, eq, ax idxes
174
+ idx1_ = np.argsort(np.array(fprio1_4))[-1]
175
+ vangs1_4 = [vecangle(vML, vMLs1_4[idx1_]) for vML in vMLs1_4]
176
+ idx2_ = np.argsort(np.array(vangs1_4))[-1]
177
+ idx3_ = np.argsort(np.array(vangs1_4))[1]
178
+ idx4_ = np.argsort(np.array(vangs1_4))[2]
179
+ fprio1 = fprio1_4[idx1_]
180
+ fprio2 = fprio1_4[idx2_]
181
+ fprio3 = fprio1_4[idx3_]
182
+ fprio4 = fprio1_4[idx4_]
183
+ fprio1_2 = sorted([fprio1, fprio2])
184
+ fprio3_4 = sorted([fprio3, fprio4])
185
+ if fprio3_4 > fprio1_2:
186
+ if fprio3 > fprio4:
187
+ idx1_ = idx3_
188
+ else:
189
+ idx1_ = idx4_
190
+ idx1 = idx1_4[idx1_]
191
+ idx2 = idx1_4[idx2_]
192
+ idx3_4 = copy.deepcopy(idx1_4)
193
+ fprio3_4 = copy.deepcopy(fprio1_4)
194
+ idx3_4.remove(idx1)
195
+ idx3_4.remove(idx2)
196
+ if idx1_ > idx2_:
197
+ fprio3_4.pop(idx1_)
198
+ fprio3_4.pop(idx2_)
199
+ else:
200
+ fprio3_4.pop(idx2_)
201
+ fprio3_4.pop(idx1_)
202
+ idx3 = idx3_4[np.argsort(np.array(fprio3_4))[-1]]
203
+ idx3_4.remove(idx3)
204
+ idx4 = idx3_4[0]
205
+ idx_list = [idx0, idx1, idx2, idx3, idx4, idx5]
206
+ fpriority_list = np.array(fpriority_list)[idx_list].tolist()
207
+ fd_list = np.array(rMLs)[idx_list].tolist()
208
+
209
+ return fpriority_list, fd_list, idx_list
210
+
211
+
212
+ def normalize(data, mean, std):
213
+ data = np.array(data)
214
+ mean = np.array(mean)
215
+ std = np.array(std)
216
+ data_norm = np.divide((data - mean), std,
217
+ out=np.zeros_like(data - mean), where=std != 0)
218
+ # data_norm = np.nan_to_num(data_norm)
219
+
220
+ return data_norm
221
+
222
+ # predict labels using krr with a given csv file
223
+ # @param csvf the csv file containing headers (first row), data, and label
224
+ # @param colnum_label the column number for the label column
225
+ # @param colnum_desc the starting column number for the descriptor columns
226
+ # @return y_train_data, y_train_pred, y_test_data, y_test_pred, score
227
+
228
+
229
+ def krr_model_training(csvf, colnum_label, colnum_desc, alpha=1, gamma=1, threshold=0.01):
230
+ # read in desc and label
231
+ with open(csvf, 'r') as f:
232
+ fcsv = csv.reader(f)
233
+ headers = np.array(next(f, None).rstrip('\r\n').split(','))[colnum_desc:]
234
+ X = []
235
+ y = []
236
+ lines = [line for line in fcsv]
237
+ lnums = [len(line) for line in lines]
238
+ count = max(set(lnums), key=lnums.count)
239
+ for line in lines:
240
+ if len(line) == count:
241
+ descs = []
242
+ for desc in line[colnum_desc:]:
243
+ descs.append(float(desc))
244
+ X.append(descs)
245
+ y.append(float(line[colnum_label]))
246
+ X = np.array(X)
247
+ y = np.array(y)
248
+ # process desc and label
249
+ mean_X = np.mean(X, axis=0)
250
+ std_X = np.std(X, axis=0)
251
+ mean_y = np.mean(y, axis=0)
252
+ std_y = np.std(y, axis=0)
253
+ X_norm = normalize(X, mean_X, std_X)
254
+ y_norm = normalize(y, mean_y, std_y)
255
+ # stats
256
+ mean_X_dict = dict(list(zip(headers, mean_X)))
257
+ std_X_dict = dict(list(zip(headers, std_X)))
258
+ stat_names = ['mean_X_dict', 'std_X_dict', 'mean_y', 'std_y']
259
+ stats = [mean_X_dict, std_X_dict, mean_y, std_y]
260
+ stat_dict = dict(list(zip(stat_names, stats)))
261
+ # split to train and test
262
+ X_norm_train, X_norm_test, y_norm_train, y_norm_test = train_test_split(
263
+ X_norm, y_norm, test_size=0.2, random_state=0)
264
+ # end
265
+ # feature selection
266
+ selector = RandomForestRegressor(random_state=0, n_estimators=100)
267
+ selector.fit(X_norm_train, y_norm_train)
268
+ X_norm_train_impts = selector.feature_importances_
269
+ idxes = np.where(X_norm_train_impts > threshold)[0]
270
+ print((len(idxes)))
271
+ importances = X_norm_train_impts[idxes]
272
+ features_sel = headers[idxes]
273
+ # importance
274
+ impt_dict = dict(list(zip(features_sel, importances)))
275
+ X_norm_train_sel = X_norm_train.T[idxes].T
276
+ X_norm_test_sel = X_norm_test.T[idxes].T
277
+ # training with krr
278
+ signal = True
279
+ # krr parameters
280
+ kernel = 'rbf'
281
+ factor_lower = -4
282
+ factor_higher = 4
283
+ gamma_lower = gamma * exp(factor_lower)
284
+ gamma_higher = gamma * exp(factor_higher)
285
+ alpha_lower = alpha * exp(factor_lower)
286
+ alpha_higher = alpha * exp(factor_higher)
287
+ lin = 7
288
+ # optimize hyperparameters
289
+ cycle_i = 0
290
+ while gamma == 1 or alpha == 1 or not signal:
291
+ gammas = np.linspace(gamma_lower, gamma_higher, lin)
292
+ alphas = np.linspace(alpha_lower, alpha_higher, lin)
293
+ tuned_parameters = [
294
+ {'kernel': [kernel], 'gamma': gammas, 'alpha': alphas}]
295
+ regr = GridSearchCV(KernelRidge(), tuned_parameters,
296
+ cv=5, scoring='neg_mean_absolute_error')
297
+ regr.fit(X_norm_train_sel, y_norm_train)
298
+ gamma = regr.best_params_['gamma']
299
+ alpha = regr.best_params_['alpha']
300
+ if (gamma < gammas[lin / 2 - 1] or gamma > gammas[lin / 2]) or \
301
+ (alpha < alphas[lin / 2 - 1] or alpha > alphas[lin / 2]):
302
+ # and cycle_i < 10:
303
+ signal = False
304
+ factor_lower *= 0.8
305
+ factor_higher *= 0.8
306
+ if cycle_i > 10:
307
+ factor_lower = -4
308
+ factor_higher = 4
309
+ cycle_i = 0
310
+ gamma_lower = gamma * exp(factor_lower)
311
+ gamma_higher = gamma * exp(factor_higher)
312
+ alpha_lower = alpha * exp(factor_lower)
313
+ alpha_higher = alpha * exp(factor_higher)
314
+ else:
315
+ signal = True
316
+ cycle_i += 1
317
+ print(('gamma is: ', gamma, '. alpha is: ', alpha))
318
+ # final model
319
+ regr = KernelRidge(kernel=kernel, alpha=alpha, gamma=gamma)
320
+ regr.fit(X_norm_train_sel, y_norm_train)
321
+ # predictions
322
+ y_norm_train_pred = regr.predict(X_norm_train_sel)
323
+ y_train_pred = y_norm_train_pred * std_y + mean_y
324
+ y_train_data = y_norm_train * std_y + mean_y
325
+ y_norm_test_pred = regr.predict(X_norm_test_sel)
326
+ y_test_pred = y_norm_test_pred * std_y + mean_y
327
+ y_test_data = y_norm_test * std_y + mean_y
328
+ # data
329
+ train_names = ['X_norm_sel_dict', 'y_data', 'y_pred']
330
+ X_norm_train_sel_names = features_sel
331
+ X_norm_train_sel_dict = dict(
332
+ list(zip(X_norm_train_sel_names, X_norm_train_sel.T)))
333
+ trains = [X_norm_train_sel_dict, y_train_data, y_train_pred]
334
+ train_dict = dict(list(zip(train_names, trains)))
335
+ test_names = ['X_norm_sel_dict', 'y_data', 'y_pred']
336
+ X_norm_test_sel_names = features_sel
337
+ X_norm_test_sel_dict = dict(list(zip(X_norm_test_sel_names, X_norm_test_sel.T)))
338
+ tests = [X_norm_test_sel_dict, y_test_data, y_test_pred]
339
+ test_dict = dict(list(zip(test_names, tests)))
340
+ # performance
341
+ score_train = regr.score(X_norm_train_sel, y_norm_train)
342
+ score_test = regr.score(X_norm_test_sel, y_norm_test)
343
+ MAE_train = mean_absolute_error(y_train_data, y_train_pred)
344
+ MAE_test = mean_absolute_error(y_test_data, y_test_pred)
345
+ perm_names = ['score_train', 'score_test', 'MAE_train', 'MAE_test']
346
+ perms = [score_train, score_test, MAE_train, MAE_test]
347
+ perm_dict = dict(list(zip(perm_names, perms)))
348
+
349
+ return stat_dict, impt_dict, train_dict, test_dict, perm_dict, regr
350
+
351
+ # predict labels using krr with a given csv file
352
+ # @param csvf the csv file containing headers (first row), data, and label
353
+ # @param colnum_label the column number for the label column
354
+ # @param colnum_desc the starting column number for the descriptor columns
355
+ # @return y_train_data, y_train_pred, y_test_data, y_test_pred, score
356
+
357
+
358
+ def krr_model_training_loo(csvf, colnum_label, colnum_desc, feature_names=False, alpha=1, gamma=1, threshold=0.01):
359
+ # read in desc and label
360
+ with open(csvf, 'r') as f:
361
+ fcsv = csv.reader(f)
362
+ headers = np.array(next(f, None).rstrip('\r\n').split(','))[colnum_desc:]
363
+ X = []
364
+ y = []
365
+ lines = [line for line in fcsv]
366
+ lnums = [len(line) for line in lines]
367
+ count = max(set(lnums), key=lnums.count)
368
+ for line in lines:
369
+ if len(line) == count:
370
+ descs = []
371
+ for desc in line[colnum_desc:]:
372
+ descs.append(float(desc))
373
+ X.append(descs)
374
+ y.append(float(line[colnum_label]))
375
+ X = np.array(X)
376
+ y = np.array(y)
377
+ # process desc and label
378
+ mean_X = np.mean(X, axis=0)
379
+ std_X = np.std(X, axis=0)
380
+ mean_y = np.mean(y, axis=0)
381
+ std_y = np.std(y, axis=0)
382
+ X_norm = normalize(X, mean_X, std_X)
383
+ y_norm = normalize(y, mean_y, std_y)
384
+ # split to train and test
385
+ loo = LeaveOneOut()
386
+ total_i = len(X_norm)
387
+ i = 0
388
+ # ys
389
+ ys = []
390
+ # MAEs
391
+ MAEs_test = []
392
+ MAEs_test_i = []
393
+ for train_idx, test_idx in loo.split(X_norm):
394
+ X_norm_train, X_norm_test = X_norm[train_idx], X_norm[test_idx]
395
+ y_norm_train, y_norm_test = y_norm[train_idx], y_norm[test_idx]
396
+ # end
397
+ # feature selection
398
+ if not feature_names:
399
+ selector = RandomForestRegressor(random_state=0, n_estimators=100)
400
+ selector.fit(X_norm_train, y_norm_train)
401
+ X_norm_train_impts = selector.feature_importances_
402
+ idxes = np.where(X_norm_train_impts > threshold)[0]
403
+ print((len(idxes)))
404
+ importances = X_norm_train_impts[idxes]
405
+ features_sel = headers[idxes]
406
+ # importance
407
+ impt_dict = dict(list(zip(features_sel, importances)))
408
+ X_norm_train_sel = X_norm_train.T[idxes].T
409
+ X_norm_test_sel = X_norm_test.T[idxes].T
410
+ print((sorted(impt_dict, key=impt_dict.get)))
411
+ print(impt_dict)
412
+ else:
413
+ idxes = [headers.tolist().index(feature_name)
414
+ for feature_name in feature_names]
415
+ X_norm_train_sel = X_norm_train.T[idxes].T
416
+ X_norm_test_sel = X_norm_test.T[idxes].T
417
+ features_sel = feature_names
418
+ impt_dict = None
419
+ # training with krr
420
+ if i == 0 or (alpha != 1 and gamma != 1):
421
+ signal = True
422
+ else:
423
+ signal = False
424
+ # krr parameters
425
+ kernel = 'rbf'
426
+ factor_lower = -4
427
+ factor_higher = 4
428
+ gamma_lower = gamma * exp(factor_lower)
429
+ gamma_higher = gamma * exp(factor_higher)
430
+ alpha_lower = alpha * exp(factor_lower)
431
+ alpha_higher = alpha * exp(factor_higher)
432
+ lin = 7
433
+ # optimize hyperparameters
434
+ cycle_i = 0
435
+ while gamma == 1 or alpha == 1 or not signal:
436
+ gammas = np.linspace(gamma_lower, gamma_higher, lin)
437
+ alphas = np.linspace(alpha_lower, alpha_higher, lin)
438
+ tuned_parameters = [
439
+ {'kernel': [kernel], 'gamma': gammas, 'alpha': alphas}]
440
+ regr = GridSearchCV(KernelRidge(), tuned_parameters,
441
+ cv=5, scoring='neg_mean_absolute_error')
442
+ regr.fit(X_norm_train_sel, y_norm_train)
443
+ gamma = regr.best_params_['gamma']
444
+ alpha = regr.best_params_['alpha']
445
+ if (gamma < gammas[lin / 2 - 1] or gamma > gammas[lin / 2]) or \
446
+ (alpha < alphas[lin / 2 - 1] or alpha > alphas[lin / 2]):
447
+ # and cycle_i < 10:
448
+ signal = False
449
+ factor_lower *= 0.8
450
+ factor_higher *= 0.8
451
+ if cycle_i > 10:
452
+ factor_lower = -4
453
+ factor_higher = 4
454
+ cycle_i = 0
455
+ gamma_lower = gamma * exp(factor_lower)
456
+ gamma_higher = gamma * exp(factor_higher)
457
+ alpha_lower = alpha * exp(factor_lower)
458
+ alpha_higher = alpha * exp(factor_higher)
459
+ else:
460
+ signal = True
461
+ cycle_i += 1
462
+ print(('gamma is: ', gamma, '. alpha is: ', alpha))
463
+ # final model
464
+ regr = KernelRidge(kernel=kernel, alpha=alpha, gamma=gamma)
465
+ regr.fit(X_norm_train_sel, y_norm_train)
466
+ # predictions
467
+ y_norm_train_pred = regr.predict(X_norm_train_sel)
468
+ y_train_pred = y_norm_train_pred * std_y + mean_y
469
+ y_train_data = y_norm_train * std_y + mean_y
470
+ y_norm_test_pred = regr.predict(X_norm_test_sel)
471
+ y_test_pred = y_norm_test_pred * std_y + mean_y
472
+ y_test_data = y_norm_test * std_y + mean_y
473
+ # data
474
+
475
+ # train_names = ['X_norm_sel_dict', 'y_data', 'y_pred']
476
+ # X_norm_train_sel_names = features_sel
477
+ # X_norm_train_sel_dict = dict(zip(X_norm_train_sel_names, X_norm_train_sel.T))
478
+ # trains = [X_norm_train_sel_dict, y_train_data, y_train_pred]
479
+ # train_dict = dict(zip(train_names, trains))
480
+ # test_names = ['X_norm_sel_dict', 'y_data', 'y_pred']
481
+ # X_norm_test_sel_names = features_sel
482
+ # X_norm_test_sel_dict = dict(zip(X_norm_test_sel_names, X_norm_test_sel.T))
483
+ # tests = [X_norm_test_sel_dict, y_test_data, y_test_pred]
484
+ # test_dict = dict(zip(test_names, tests))
485
+ y_name = ['y_train_data', 'y_train_pred', 'y_test_data', 'y_test_pred']
486
+ y = [y_train_data, y_train_pred, y_test_data, y_test_pred]
487
+ y_dict = dict(list(zip(y_name, y)))
488
+ ys.append(y_dict)
489
+ # performance
490
+ # score_train = regr.score(X_norm_train_sel, y_norm_train)
491
+ # score_test = regr.score(X_norm_test_sel, y_norm_test)
492
+ # MAE_train = mean_absolute_error(y_train_data, y_train_pred)
493
+ MAE_test = mean_absolute_error(y_test_data, y_test_pred)
494
+ # perm_names = ['score_train', 'score_test', 'MAE_train', 'MAE_test']
495
+ # perms = [score_train, score_test, MAE_train, MAE_test]
496
+ # perm_dict = dict(zip(perm_names, perms))
497
+ MAEs_test.append(MAE_test)
498
+ MAEs_test_i.append(i)
499
+ print((str(i) + '/' + str(total_i)))
500
+ i += 1
501
+ perm_dict = dict(list(zip(MAEs_test_i, MAEs_test)))
502
+
503
+ # return stat_dict, impt_dict, train_dict, test_dict, perm_dict, regr
504
+ return impt_dict, perm_dict, ys
505
+
506
+ # predict labels using gradient boosting regressor (GBR) with a given csv file
507
+ # @param csvf the csv file containing headers (first row), data, and label
508
+ # @param colnum_i_label the starting column number for the label column
509
+ # @param colnum_j_label the ending column number for the label column + 1
510
+ # @param colnum_desc the starting column number for the descriptor columns
511
+ # @return y_train_data, y_train_pred, y_test_data, y_test_pred, score
512
+
513
+
514
+ def gbr_model_training(csvf, colnum_i_label, colnum_j_label, colnum_desc):
515
+ # read in desc and label
516
+ with open(csvf, 'r') as f:
517
+ fcsv = csv.reader(f)
518
+ headers = np.array(next(f, None).rstrip('\r\n').split(','))[colnum_desc:]
519
+ X = []
520
+ y = []
521
+ lines = [line for line in fcsv]
522
+ lnums = [len(line) for line in lines]
523
+ count = max(set(lnums), key=lnums.count)
524
+ for line in lines:
525
+ if len(line) == count:
526
+ descs = []
527
+ labels = []
528
+ for desc in line[colnum_desc:]:
529
+ descs.append(float(desc))
530
+ for label in line[colnum_i_label:colnum_j_label]:
531
+ labels.append(float(label))
532
+ X.append(descs)
533
+ y.append(labels)
534
+ X = np.array(X)
535
+ y = np.array(y)
536
+ # process desc and label
537
+ mean_X = np.mean(X, axis=0)
538
+ std_X = np.std(X, axis=0)
539
+ mean_y = np.mean(y, axis=0)
540
+ std_y = np.std(y, axis=0)
541
+ # stats
542
+ mean_X_dict = dict(list(zip(headers, mean_X)))
543
+ std_X_dict = dict(list(zip(headers, std_X)))
544
+ stat_names = ['mean_X_dict', 'std_X_dict', 'mean_y', 'std_y']
545
+ stats = [mean_X_dict, std_X_dict, mean_y, std_y]
546
+ stat_dict = dict(list(zip(stat_names, stats)))
547
+ X_norm = normalize(X, mean_X, std_X)
548
+ y_norm = normalize(y, mean_y, std_y)
549
+ # split to train and test
550
+ X_norm_train, X_norm_test, y_norm_train, y_norm_test = train_test_split(
551
+ X_norm, y_norm, test_size=0.2, random_state=0)
552
+ # end
553
+ # feature selection
554
+ selector = RandomForestRegressor(random_state=0, n_estimators=100)
555
+ selector.fit(X_norm_train, y_norm_train.T[0])
556
+ X_norm_train_impts = selector.feature_importances_
557
+ scores = []
558
+ results = []
559
+ thresholds = np.logspace(-2, -2, 1)
560
+ for threshold in thresholds:
561
+ idxes = np.where(X_norm_train_impts > threshold)[0]
562
+ importances = X_norm_train_impts[idxes]
563
+ features_sel = headers[idxes]
564
+ # importance
565
+ impt_dict = dict(list(zip(features_sel, importances)))
566
+ # idxes = range(len(X_norm_train.T))
567
+ X_norm_train_sel = X_norm_train.T[idxes].T
568
+ X_norm_test_sel = X_norm_test.T[idxes].T
569
+ # training with gbr
570
+ regr = MultiOutputRegressor(GradientBoostingRegressor(random_state=0))
571
+ # final model
572
+ regr.fit(X_norm_train_sel, y_norm_train)
573
+ # predictions
574
+ y_norm_train_pred = regr.predict(X_norm_train_sel)
575
+ y_train_pred = y_norm_train_pred * std_y + mean_y
576
+ y_train_data = y_norm_train * std_y + mean_y
577
+ y_norm_test_pred = regr.predict(X_norm_test_sel)
578
+ y_test_pred = y_norm_test_pred * std_y + mean_y
579
+ y_test_data = y_norm_test * std_y + mean_y
580
+ # data
581
+ train_names = ['X_norm_sel_dict', 'y_data', 'y_pred']
582
+ X_norm_train_sel_names = features_sel
583
+ X_norm_train_sel_dict = dict(
584
+ list(zip(X_norm_train_sel_names, X_norm_train_sel.T)))
585
+ trains = [X_norm_train_sel_dict, y_train_data, y_train_pred]
586
+ train_dict = dict(list(zip(train_names, trains)))
587
+ test_names = ['X_norm_sel_dict', 'y_data', 'y_pred']
588
+ X_norm_test_sel_names = features_sel
589
+ X_norm_test_sel_dict = dict(
590
+ list(zip(X_norm_test_sel_names, X_norm_test_sel.T)))
591
+ tests = [X_norm_test_sel_dict, y_test_data, y_test_pred]
592
+ test_dict = dict(list(zip(test_names, tests)))
593
+ # performance
594
+ score_train = regr.score(X_norm_train_sel, y_norm_train)
595
+ score_test = regr.score(X_norm_test_sel, y_norm_test)
596
+ MAE_train = mean_absolute_error(y_train_data, y_train_pred)
597
+ MAE_test = mean_absolute_error(y_test_data, y_test_pred)
598
+ perm_names = ['score_train', 'score_test', 'MAE_train', 'MAE_test']
599
+ perms = [score_train, score_test, MAE_train, MAE_test]
600
+ perm_dict = dict(list(zip(perm_names, perms)))
601
+ scores.append(score_test)
602
+ results.append([stat_dict, impt_dict, train_dict,
603
+ test_dict, perm_dict, regr])
604
+ idx = np.argsort(np.array(scores))[-1]
605
+ stat_dict = results[idx][0]
606
+ impt_dict = results[idx][1]
607
+ train_dict = results[idx][2]
608
+ test_dict = results[idx][3]
609
+ perm_dict = results[idx][4]
610
+ regr = results[idx][5]
611
+
612
+ return stat_dict, impt_dict, train_dict, test_dict, perm_dict, regr
613
+
614
+ # predict labels using a given regr
615
+ # @param core3D mol3D class of a molecule
616
+ # @param spin the spin multiplicity of the core3D
617
+ # @param train_dict th dictionary that contains the training data
618
+ # @param stat_dict the dictionary that contains the statistics of the training data (e.g. mean, std)
619
+ # @param impt_dict the dictionary that contains the important features
620
+ # @param regr the regression model
621
+ # @return bondl_dict, ds (a list of Euclidean distances)
622
+
623
+
624
+ def ML_model_predict(core3D, spin, train_dict, stat_dict, impt_dict, regr):
625
+ bondl_keys = []
626
+ bondls = []
627
+ spin_ohe = [0] * 6
628
+ spin_ohe[spin - 1] = 1
629
+ mean_y = stat_dict['mean_y']
630
+ std_y = stat_dict['std_y']
631
+ mean_X_dict = stat_dict['mean_X_dict']
632
+ std_X_dict = stat_dict['std_X_dict']
633
+ midxes = core3D.findMetal()
634
+ Xs_train = train_dict['X_norm_sel_dict']
635
+ for midx in midxes:
636
+ matno = core3D.getAtom(midx).atno
637
+ fidxes = core3D.getBondedAtoms(midx)
638
+ for fidx_i, fidx in enumerate(fidxes):
639
+ fprio_list, fd_list, idx_list = feature_prep(core3D, fidx_i)
640
+ descs = []
641
+ desc_names = []
642
+ descs.append(matno)
643
+ desc_names.append('matno_0')
644
+ descs += spin_ohe
645
+ for i in range(len(spin_ohe)):
646
+ desc_names.append('spin' + str(i) + '_ohe')
647
+ for idx_i, idx in enumerate(idx_list):
648
+ fidx_ = fidxes[idx]
649
+ descriptor_names, descriptors = get_descriptor_vector_for_atidx(
650
+ core3D, fidx_)
651
+ for descriptor_name in descriptor_names:
652
+ desc_names.append(descriptor_name + '_' + str(idx_i))
653
+ descs += descriptors
654
+ desc_dict = dict(list(zip(desc_names, descs)))
655
+ descs = []
656
+ Xs_train_sel = []
657
+ # d2s = [0] * len(list(Xs_train.values())[0])
658
+ for key in list(impt_dict.keys()):
659
+ desc = np.divide((desc_dict[key] - mean_X_dict[key]), std_X_dict[key], out=np.zeros_like(
660
+ desc_dict[key] - mean_X_dict[key]), where=std_X_dict[key] != 0)
661
+ descs.append(desc)
662
+ X_train = Xs_train[key]
663
+ Xs_train_sel.append(X_train.tolist())
664
+ # d2s = d2s + np.square(np.array(desc * len(X_train)) - np.array(X_train))
665
+ # print('The largest desc is ' + str(max(descs)))
666
+ # ds = np.sqrt(d2s)
667
+ ds = []
668
+ for i in range(len(Xs_train_sel[0])):
669
+ d = np.linalg.norm(np.array(descs) -
670
+ np.array(Xs_train_sel).T[i])
671
+ ds.append(np.linalg.norm(d))
672
+ bondl = regr.predict([descs]) * std_y + mean_y
673
+ bondl_keys.append(fidx)
674
+ bondls.append(bondl)
675
+ bondl_dict = dict(list(zip(bondl_keys, bondls)))
676
+
677
+ return bondl_dict, ds
678
+
679
+ # predict labels using a given regr
680
+ # @param core3D mol3D class of a molecule
681
+ # @param spin the spin multiplicity of the core3D
682
+ # @param mligcaomt the external atom index of the mlig
683
+ # @return bondl_dict, ds (a list of Euclidean distances)
684
+
685
+
686
+ def krr_model_predict(core3D, spin, mligcatom):
687
+ bondl_keys = []
688
+ bondls = []
689
+ spin_ohe = [0] * 6
690
+ spin_ohe[spin - 1] = 1
691
+ globs = globalvars()
692
+ if globs.custom_path: # test if a custom path is used:
693
+ fpath = str(globs.custom_path).rstrip('/') + "/python_krr"
694
+ else:
695
+ fpath = str(resource_files("molSimplify").joinpath("python_krr"))
696
+ # load model
697
+ f_model = fpath + '/hat_krr_model.pkl'
698
+ with open(f_model, 'rb') as f:
699
+ regr = pickle.load(f)
700
+ Xs_train = regr.X_fit_
701
+ # load stats
702
+ # y stats
703
+ f_stats = fpath + '/hat_y_mean_std.csv'
704
+ with open(f_stats, 'r') as f:
705
+ fcsv = csv.reader(f)
706
+ for i, line in enumerate(fcsv):
707
+ if i == 1:
708
+ mean_y = float(line[0])
709
+ std_y = float(line[1])
710
+ # x stats
711
+ f_stats = fpath + '/hat_X_mean_std.csv'
712
+ with open(f_stats, 'r') as f:
713
+ fcsv = csv.reader(f)
714
+ for i, line in enumerate(fcsv):
715
+ if i == 0:
716
+ feature_names = line
717
+ if i == 1:
718
+ mean_X = [float(ele) for ele in line]
719
+ if i == 2:
720
+ std_X = [float(ele) for ele in line]
721
+ mean_X_dict = dict(list(zip(feature_names, mean_X)))
722
+ std_X_dict = dict(list(zip(feature_names, std_X)))
723
+ # load feature names
724
+ f_stats = fpath + '/hat_feature_names.csv'
725
+ with open(f_stats, 'r') as f:
726
+ fcsv = csv.reader(f)
727
+ for i, line in enumerate(fcsv):
728
+ keys = line
729
+ # rOH
730
+ # load model2
731
+ f_model = fpath + '/hat2_krr_model.pkl'
732
+ with open(f_model, 'rb') as f:
733
+ regr2 = pickle.load(f)
734
+ X2s_train = regr2.X_fit_
735
+ # load stats
736
+ # y2 stats
737
+ f_stats = fpath + '/hat2_y_mean_std.csv'
738
+ with open(f_stats, 'r') as f:
739
+ fcsv = csv.reader(f)
740
+ for i, line in enumerate(fcsv):
741
+ if i == 1:
742
+ mean_y2 = float(line[0])
743
+ std_y2 = float(line[1])
744
+ # x2 stats
745
+ # f_stats = fpath + '/hat2_X_mean_std.csv'
746
+ # with open(f_stats, 'r') as f:
747
+ # fcsv = csv.reader(f)
748
+ # for i, line in enumerate(fcsv):
749
+ # if i == 0:
750
+ # feature2_names = line
751
+ # if i == 1:
752
+ # mean_X2 = [float(ele) for ele in line]
753
+ # if i == 2:
754
+ # std_X2 = [float(ele) for ele in line]
755
+ mean_X2_dict = dict(list(zip(feature_names, mean_X)))
756
+ std_X2_dict = dict(list(zip(feature_names, std_X)))
757
+ # load feature2 names
758
+ f_stats = fpath + '/hat2_feature_names.csv'
759
+ with open(f_stats, 'r') as f:
760
+ fcsv = csv.reader(f)
761
+ for i, line in enumerate(fcsv):
762
+ keys2 = line
763
+ # # get train data
764
+ # Xs_train_sel = []
765
+ # f_X_train = '/Users/tzuhsiungyang/Dropbox (MIT)/Work at the Kulik group/ts_build/Data/xyzf_optts/selected_xyzfs/hat_krr_X_train.csv'
766
+ # with open(f_X_train, 'r') as f:
767
+ # fcsv = csv.reader(f)
768
+ # for line in fcsv:
769
+ # Xs_train.append([float(ele) for ele in line])
770
+ # # get kernel space coefs
771
+ # coefs = []
772
+ # f_coef = '/Users/tzuhsiungyang/Dropbox (MIT)/Work at the Kulik group/ts_build/Data/xyzf_optts/selected_xyzfs/hat_krr_dual_coef.csv'
773
+ # with open(f_coef, 'r') as f:
774
+ # fcsv = csv.reader(f)
775
+ # for line in fcsv:
776
+ # coefs = [float(ele) for ele in line]
777
+ # get features
778
+ midxes = core3D.findMetal()
779
+ for midx in midxes:
780
+ matno = core3D.getAtom(midx).atno
781
+ fidxes = core3D.getBondedAtoms(midx)
782
+ ds1 = []
783
+ for fidx_i, fidx in enumerate(fidxes):
784
+ fprio_list, fd_list, idx_list = feature_prep(core3D, fidx_i)
785
+ descs = []
786
+ desc_names = []
787
+ descs.append(matno)
788
+ desc_names.append('matno_0')
789
+ descs += spin_ohe
790
+ for i in range(len(spin_ohe)):
791
+ desc_names.append('spin' + str(i) + '_ohe')
792
+ for idx_i, idx in enumerate(idx_list):
793
+ fidx_ = fidxes[idx]
794
+ descriptor_names, descriptors = get_descriptor_vector_for_atidx(
795
+ core3D, fidx_)
796
+ for descriptor_name in descriptor_names:
797
+ desc_names.append(descriptor_name + '_' + str(idx_i))
798
+ descs += descriptors
799
+ desc_dict = dict(list(zip(desc_names, descs)))
800
+ descs = []
801
+ # Xs_train_sel = []
802
+ # d2s = [0] * len(Xs_train[0])
803
+ for key in keys:
804
+ desc = np.divide((desc_dict[key] - mean_X_dict[key]), std_X_dict[key], out=np.zeros_like(
805
+ desc_dict[key] - mean_X_dict[key]), where=std_X_dict[key] != 0)
806
+ descs.append(desc)
807
+ # d2s = d2s + np.square(np.array(desc * len(X_train)) - np.array(X_train))
808
+ # print('The largest desc is ' + str(max(descs)))
809
+ # ds = np.sqrt(d2s)
810
+ ds = []
811
+ for i in range(len(Xs_train[0])):
812
+ d = np.linalg.norm(np.array(descs) - np.array(Xs_train)[i])
813
+ ds.append(d)
814
+ ds1.append(ds)
815
+ bondl = regr.predict([descs]) * std_y + mean_y
816
+ bondl_keys.append(fidx)
817
+ bondls.append(bondl)
818
+ if fidx == mligcatom:
819
+ descs = []
820
+ # d2s = [0] * len(X2s_train[0])
821
+ for key in keys2:
822
+ desc = np.divide((desc_dict[key] - mean_X2_dict[key]), std_X2_dict[key],
823
+ out=np.zeros_like(desc_dict[key] - mean_X2_dict[key]), where=std_X2_dict[key] != 0)
824
+ descs.append(desc)
825
+ # d2s = d2s + np.square(np.array(desc * len(X_train)) - np.array(X_train))
826
+ # print('The largest desc is ' + str(max(descs)))
827
+ # ds = np.sqrt(d2s)
828
+ ds2 = []
829
+ for i in range(len(X2s_train[0])):
830
+ d2 = np.linalg.norm(
831
+ np.array(descs) - np.array(X2s_train)[i])
832
+ ds2.append(d2)
833
+ bondl2 = regr2.predict([descs]) * std_y2 + mean_y2
834
+
835
+ bondl_dict = dict(list(zip(bondl_keys, bondls)))
836
+
837
+ return bondl_dict, bondl2, ds1, ds2
838
+
839
+ # ## predict labels using gradient boosting regressor (GBR) with a given csv file
840
+ # # @param csvf the csv file containing headers (first row), data, and label
841
+ # # @param colnum_i_label the starting column number for the label column
842
+ # # @param colnum_j_label the ending column number for the label column + 1
843
+ # # @param colnum_desc the starting column number for the descriptor columns
844
+ # # @return y_train_data, y_train_pred, y_test_data, y_test_pred, score
845
+ # def krr_model_predict(core3D, spin, stat_dict, impt_dict, regr):
846
+ # bondl_keys = []
847
+ # bondls = []
848
+ # spin_ohe = [0] * 6
849
+ # spin_ohe[spin - 1] = 1
850
+ # mean_y = stat_dict['mean_y']
851
+ # std_y = stat_dict['std_y']
852
+ # mean_X = stat_dict['mean_X']
853
+ # std_X = stat_dict['std_X']
854
+ # midxes = core3D.findMetal()
855
+ # for midx in midxes:
856
+ # matno = core3D.getAtom(midx).atno
857
+ # fidxes = core3D.getBondedAtoms(midx)
858
+ # for fidx_i, fidx in enumerate(fidxes):
859
+ # fprio_list, fd_list, idx_list = feature_prep(core3D, fidx_i)
860
+ # descs = []
861
+ # desc_names = []
862
+ # descs.append(matno)
863
+ # desc_names.append('matno_0')
864
+ # descs += spin_ohe
865
+ # for i in range(len(spin_ohe)):
866
+ # desc_names.append('spin' + str(i) + '_ohe')
867
+ # for idx_i, idx in enumerate(idx_list):
868
+ # fidx_ = fidxes[idx]
869
+ # descriptor_names, descriptors = get_descriptor_vector_for_atidx(core3D, fidx_)
870
+ # for descriptor_name in descriptor_names:
871
+ # desc_names.append(descriptor_name + '_' + str(idx_i))
872
+ # descs += descriptors
873
+ # normalize(descs, mean_X, std_X)
874
+ # desc_dict = dict(zip(desc_names, descs))
875
+ # descs = []
876
+ # for key in impt_dict.keys():
877
+ # desc = desc_dict[key]
878
+ # descs.append(desc)
879
+ # regr.fit()
880
+ # bondl = regr.predict([descs]) * std_y + mean_y
881
+ # bondl_keys.append(fidx)
882
+ # bondls.append(bondl)
883
+ # bondl_dict = dict(zip(bondl_keys, bondls))
884
+ #
885
+ # return bondl_dict
886
+
887
+ # wrapper to get KRR predictions for bondl_core3D, bondl_m3D, bondl_m3Dsub from a known mol3D using partial charges
888
+ # @param mol mol3D of the molecule
889
+ # @param charge charge of the molecule
890
+ # @return KRR-predicted bondl_core3D
891
+ # KRR accuracies for bondl_core3D: 98.2% (training score) and 47.6 (test score)
892
+ # KRR accuracies for bondl_m3D: 99.5% (training score) and 51.1 (test score)
893
+
894
+
895
+ def invoke_KRR_from_mol3d_dQ(mol, charge):
896
+ X_norm_train = []
897
+ y_norm_train = []
898
+ # # find the metal from RACs
899
+ # metal = mol.getAtom(mol.findMetal()[0]).symbol()
900
+ # ox_modifier = {metal:oxidation_state}
901
+ # get partialQs
902
+ feature_names, features = ffeatures(mol, charge)
903
+ # # get one-hot-encoding (OHE)
904
+ # descriptor_names,descriptors = create_OHE(descriptor_names,descriptors, metal,oxidation_state)
905
+ # # set exchange fraction
906
+ # descriptor_names += ['alpha']
907
+ # descriptors += [alpha]
908
+ # KRR initiation
909
+ # defined variables
910
+ globs = globalvars()
911
+ if globs.custom_path: # test if a custom path is used:
912
+ X_norm_train_csv = str(globs.custom_path).rstrip(
913
+ '/') + "/python_krr/X_norm_train_TS.csv"
914
+ y_norm_train_csv = str(globs.custom_path).rstrip(
915
+ '/') + "/python_krr/y_norm_train_TS.csv"
916
+ else:
917
+ X_norm_train_csv = resource_files("molSimplify.python_krr").joinpath("X_norm_train_TS.csv")
918
+ y_norm_train_csv = resource_files("molSimplify.python_krr").joinpath("y_norm_train_TS.csv")
919
+ with open(X_norm_train_csv, 'r') as f:
920
+ for line in csv.reader(f):
921
+ X_norm_train.append([float(i) for i in line])
922
+ X_norm_train = np.array(X_norm_train)
923
+ with open(y_norm_train_csv, 'r') as f:
924
+ for line in csv.reader(f):
925
+ y_norm_train.append([float(i) for i in line])
926
+ y_norm_train = np.array(y_norm_train)
927
+ # X_norm_train = pd.read_csv(X_norm_train_csv,header=None)
928
+ # y_norm_train = pd.read_csv(y_norm_train_csv,header=None)
929
+ kernel = 'rbf'
930
+ keys = []
931
+ bondls = []
932
+ for targets in ['bondl_core3D', 'bondl_m3D']: # ,'bondl_m3Dsub']:
933
+ keys.append(targets)
934
+ if targets == 'bondl_core3D':
935
+ # KRR parameters for bondl_core3D
936
+ alpha = 0.1
937
+ gamma = 4.6415888336127775
938
+ mean_y_norm_train = 1.8556069976566096
939
+ std_y_norm_train = 0.08511267085380758
940
+ mean_X_norm_train = np.array([1.1886128903870394, 1.0746595698697274, 1.0089390403652372, 1.0051636435711488,
941
+ 0.9639844597149281, 1.5924309727104378])
942
+ std_X_norm_train = np.array([1.4887238067607071, 1.4391120341824508, 1.351343230273359, 1.302911028297482,
943
+ 1.1511093513567663, 0.7366350688359029])
944
+
945
+ if targets == 'bondl_m3D':
946
+ # KRR parameters for bondl_core3D
947
+ alpha = 0.015848931924611134
948
+ gamma = 8.531678524172808
949
+ mean_y_norm_train = 1.1429284052746633
950
+ std_y_norm_train = 0.04763054722349127
951
+ mean_X_norm_train = np.array(
952
+ [-1.17136495, -1.09058534, -1.04062806, -1.01379334, -0.92612448, -1.30558513])
953
+ std_X_norm_train = np.array(
954
+ [1.36359461, 1.32785945, 1.26392399, 1.21494676, 1.0253893, 0.5940198])
955
+
956
+ # model initation
957
+ X_norm_test = np.array(features[7:13])
958
+ X_norm_test = (X_norm_test - mean_X_norm_train) / std_X_norm_train
959
+ model = KernelRidge(kernel=kernel, alpha=alpha, gamma=gamma)
960
+ model.fit(X_norm_train, y_norm_train)
961
+ y_norm_test = model.predict([X_norm_test])
962
+ y_norm_test = y_norm_test * std_y_norm_train + mean_y_norm_train
963
+ bondl = y_norm_test[0][0]
964
+ bondls.append(bondl)
965
+
966
+ bondl_dict = dict(list(zip(keys, bondls)))
967
+
968
+ return bondl_dict
969
+
970
+ # wrapper to get KRR predictions for bondl_core3D from a known mol3D using RAC-190
971
+ # @param mol mol3D of the molecule
972
+ # @param charge charge of the molecule
973
+ # @return KRR-predicted bondl_core3D
974
+ # KRR accuracies: 98.2% (training score) and 47.6 (test score)
975
+
976
+
977
+ def invoke_KRR_from_mol3d_RACs(mol, charge):
978
+ # # find the metal from RACs
979
+ # metal = mol.getAtom(mol.findMetal()[0]).symbol()
980
+ # ox_modifier = {metal:oxidation_state}
981
+ # get partialQs
982
+ feature_names, features = ffeatures(mol, charge)
983
+ # # get one-hot-encoding (OHE)
984
+ # descriptor_names,descriptors = create_OHE(descriptor_names,descriptors, metal,oxidation_state)
985
+ # # set exchange fraction
986
+ # descriptor_names += ['alpha']
987
+ # descriptors += [alpha]
988
+ # KRR initiation
989
+ # defined variables
990
+ X_norm_train = pd.read_csv(
991
+ '/Users/tzuhsiungyang/anaconda2/envs/molSimplify/molSimplify/molSimplify/python_krr/X_norm_train_TS.csv', header=None)
992
+ y_norm_train = pd.read_csv(
993
+ '/Users/tzuhsiungyang/anaconda2/envs/molSimplify/molSimplify/molSimplify/python_krr/y_norm_train_TS.csv', header=None)
994
+ kernel = 'rbf'
995
+ alpha = 0.1
996
+ gamma = 4.6415888336127775
997
+ mean_y_norm_train = 1.8556069976566096
998
+ std_y_norm_train = 0.08511267085380758
999
+ mean_X_norm_train = np.array([1.1886128903870394, 1.0746595698697274, 1.0089390403652372,
1000
+ 1.0051636435711488, 0.9639844597149281, 1.5924309727104378])
1001
+ std_X_norm_train = np.array([1.4887238067607071, 1.4391120341824508, 1.351343230273359,
1002
+ 1.302911028297482, 1.1511093513567663, 0.7366350688359029])
1003
+ # model initation
1004
+ X_norm_test = np.array(features[7:13])
1005
+ X_norm_test = (X_norm_test - mean_X_norm_train) / std_X_norm_train
1006
+ model = KernelRidge(kernel=kernel, alpha=alpha, gamma=gamma)
1007
+ model.fit(X_norm_train, y_norm_train)
1008
+ y_norm_test = model.predict([X_norm_test])
1009
+ y_norm_test = y_norm_test * std_y_norm_train + mean_y_norm_train
1010
+ bondl_core3D = y_norm_test[0][0]
1011
+
1012
+ return bondl_core3D
1013
+
1014
+ # Gets the RACs of a given atidx
1015
+ # @param mol mol3D of this molecule
1016
+ # @param atidx the index of the atom of concern
1017
+ # @return descriptor_names updated names
1018
+ # @return descriptors updated RACs
1019
+
1020
+
1021
+ def get_descriptor_vector_for_atidx(mol, atidx, depth=4, oct=False):
1022
+ descriptor_names = []
1023
+ descriptors = []
1024
+ result_dictionary = generate_atomonly_autocorrelations(
1025
+ mol, atidx, False, depth, oct)
1026
+ for colnames in result_dictionary['colnames']:
1027
+ descriptor_names += colnames
1028
+ for results in result_dictionary['results']:
1029
+ descriptors += results.tolist()
1030
+ result_dictionary = generate_atomonly_deltametrics(
1031
+ mol, atidx, False, depth, oct)
1032
+ for colnames in result_dictionary['colnames']:
1033
+ for colname in colnames:
1034
+ descriptor_names.append('D_' + colname)
1035
+ for results in result_dictionary['results']:
1036
+ descriptors += results.tolist()
1037
+
1038
+ return descriptor_names, descriptors
1039
+
1040
+
1041
+ def generate_revised_atomonly_autocorrelations(mol, atomIdx, loud, depth=4, oct=True):
1042
+ # this function gets autocorrelations for a molecule starting
1043
+ # in one single atom only
1044
+ # Inputs:
1045
+ # mol - mol3D class
1046
+ # atomIdx - int, index of atom3D class
1047
+ # loud - bool, print output
1048
+ result = list()
1049
+ colnames = []
1050
+ # allowed_strings = ['nuclear_charge', 'ident', 'topology']
1051
+ # labels_strings = ['Z', 'I', 'T']
1052
+ allowed_strings = ['electronegativity',
1053
+ 'nuclear_charge', 'ident', 'topology', 'size']
1054
+ labels_strings = ['chi', 'Z', 'I', 'T', 'S']
1055
+ # print('The selected connection type is ' + str(mol.getAtom(atomIdx).symbol()))
1056
+ for ii, properties in enumerate(allowed_strings):
1057
+ atom_only_ac = atom_only_autocorrelation(
1058
+ mol, properties, depth, atomIdx, oct=oct)
1059
+ this_colnames = []
1060
+ for i in range(0, depth + 1):
1061
+ this_colnames.append(labels_strings[ii] + '-' + str(i))
1062
+ colnames.append(this_colnames)
1063
+ result.append(atom_only_ac)
1064
+ results_dictionary = {'colnames': colnames, 'results': result}
1065
+ return results_dictionary
1066
+
1067
+
1068
+ def generate_atomonly_ratiometrics(mol, atomIdx, loud, depth=4, oct=True):
1069
+ # this function gets autocorrelations for a molecule starting
1070
+ # in one single atom only
1071
+ # Inputs:
1072
+ # mol - mol3D class
1073
+ # atomIdx - int, index of atom3D class
1074
+ # loud - bool, print output
1075
+ result = list()
1076
+ colnames = []
1077
+ # allowed_strings_num = ['electronegativity', 'nuclear_charge']
1078
+ # labels_strings_num = ['chi', 'Z']
1079
+ allowed_strings_num = ['electronegativity',
1080
+ 'nuclear_charge', 'ident', 'topology', 'size']
1081
+ labels_strings_num = ['chi', 'Z', 'I', 'T', 'S']
1082
+ # allowed_strings_den = ['size']
1083
+ # labels_strings_den = ['S']
1084
+ allowed_strings_den = ['electronegativity', 'nuclear_charge', 'size']
1085
+ labels_strings_den = ['chi', 'Z', 'S']
1086
+ # print('The selected connection type is ' + str(mol.getAtom(atomIdx).symbol()))
1087
+ for iii, properties_num in enumerate(allowed_strings_num):
1088
+ for iv, properties_den in enumerate(allowed_strings_den):
1089
+ atom_only_ac = atom_only_ratiometric(
1090
+ mol, properties_num, properties_den, depth, atomIdx, oct=oct)
1091
+ this_colnames = []
1092
+ for i in range(0, depth + 1):
1093
+ this_colnames.append(
1094
+ labels_strings_num[iii] + '-' + labels_strings_den[iv] + '-' + str(i))
1095
+ colnames.append(this_colnames)
1096
+ result.append(atom_only_ac)
1097
+ results_dictionary = {'colnames': colnames, 'results': result}
1098
+ return results_dictionary
1099
+
1100
+
1101
+ def generate_atomonly_summetrics(mol, atomIdx, loud, depth=4, oct=True):
1102
+ # this function gets autocorrelations for a molecule starting
1103
+ # in one single atom only
1104
+ # Inputs:
1105
+ # mol - mol3D class
1106
+ # atomIdx - int, index of atom3D class
1107
+ # loud - bool, print output
1108
+ result = list()
1109
+ colnames = []
1110
+ # allowed_strings = ['ident', 'topology', 'size']
1111
+ # labels_strings = ['I', 'T', 'S']
1112
+ allowed_strings = ['electronegativity',
1113
+ 'nuclear_charge', 'ident', 'topology', 'size']
1114
+ labels_strings = ['chi', 'Z', 'I', 'T', 'S']
1115
+ # print('The selected connection type is ' + str(mol.getAtom(atomIdx).symbol()))
1116
+ for ii, properties in enumerate(allowed_strings):
1117
+ atom_only_ac = atom_only_summetric(
1118
+ mol, properties, depth, atomIdx, oct=oct)
1119
+ this_colnames = []
1120
+ for i in range(0, depth + 1):
1121
+ this_colnames.append(labels_strings[ii] + '-' + str(i))
1122
+ colnames.append(this_colnames)
1123
+ result.append(atom_only_ac)
1124
+ results_dictionary = {'colnames': colnames, 'results': result}
1125
+ return results_dictionary
1126
+
1127
+
1128
+ def generate_revised_atomonly_deltametrics(mol, atomIdx, loud, depth=4, oct=True):
1129
+ # this function gets autocorrelations for a molecule starting
1130
+ # in one single atom only
1131
+ # Inputs:
1132
+ # mol - mol3D class
1133
+ # atomIdx - int, index of atom3D class
1134
+ # loud - bool, print output
1135
+ result = list()
1136
+ colnames = []
1137
+ # allowed_strings = ['electronegativity', 'ident', 'topology']
1138
+ # labels_strings = ['chi', 'I', 'T']
1139
+ allowed_strings = ['electronegativity',
1140
+ 'nuclear_charge', 'ident', 'topology', 'size']
1141
+ labels_strings = ['chi', 'Z', 'I', 'T', 'S']
1142
+ # print('The selected connection type is ' + str(mol.getAtom(atomIdx).symbol()))
1143
+ for ii, properties in enumerate(allowed_strings):
1144
+ atom_only_ac = atom_only_deltametric(
1145
+ mol, properties, depth, atomIdx, oct=oct)
1146
+ this_colnames = []
1147
+ for i in range(0, depth + 1):
1148
+ this_colnames.append(labels_strings[ii] + '-' + str(i))
1149
+ colnames.append(this_colnames)
1150
+ result.append(atom_only_ac)
1151
+ results_dictionary = {'colnames': colnames, 'results': result}
1152
+ return results_dictionary
1153
+
1154
+ # Gets the rRACs of a given atidx
1155
+ # @param mol mol3D of this molecule
1156
+ # @param atidx the index of the atom of concern
1157
+ # @return descriptor_names updated names
1158
+ # @return descriptors updated RACs
1159
+
1160
+
1161
+ def get_revised_descriptor_vector_for_atidx(mol, atidx, depth=4, oct=False):
1162
+ descriptor_names = []
1163
+ descriptors = []
1164
+ result_dictionary = generate_revised_atomonly_autocorrelations(
1165
+ mol, atidx, False, depth, oct)
1166
+ for colnames in result_dictionary['colnames']:
1167
+ for colname in colnames:
1168
+ descriptor_names.append('A_' + colname)
1169
+ for results in result_dictionary['results']:
1170
+ descriptors += results.tolist()
1171
+ result_dictionary = generate_atomonly_ratiometrics(
1172
+ mol, atidx, False, depth, oct)
1173
+ for colnames in result_dictionary['colnames']:
1174
+ for colname in colnames:
1175
+ descriptor_names.append('R_' + colname)
1176
+ for results in result_dictionary['results']:
1177
+ descriptors += results.tolist()
1178
+ result_dictionary = generate_atomonly_summetrics(
1179
+ mol, atidx, False, depth, oct)
1180
+ for colnames in result_dictionary['colnames']:
1181
+ for colname in colnames:
1182
+ descriptor_names.append('S_' + colname)
1183
+ for results in result_dictionary['results']:
1184
+ descriptors += results.tolist()
1185
+ result_dictionary = generate_revised_atomonly_deltametrics(
1186
+ mol, atidx, False, depth, oct)
1187
+ for colnames in result_dictionary['colnames']:
1188
+ for colname in colnames:
1189
+ descriptor_names.append('D_' + colname)
1190
+ for results in result_dictionary['results']:
1191
+ descriptors += results.tolist()
1192
+
1193
+ return descriptor_names, descriptors
1194
+
1195
+ # commented out default_plot() as conda repo does not automatically conda install matplitlib
1196
+
1197
+
1198
+ def default_plot(x, y, name=False):
1199
+ # defs for plt
1200
+ xlabel = r'distance / ${\rm \AA}$'
1201
+ ylabel = r'distance / ${\rm \AA}$'
1202
+ # colors = ['r', 'g', 'b', '.75', 'orange', 'k']
1203
+ # markers = ['o', 's', 'D', 'v', '^', '<', '>']
1204
+ font = {'family': 'sans-serif',
1205
+ # 'weight' : 'bold',
1206
+ 'size': 22}
1207
+ # figure size
1208
+ plt.figure(figsize=(7, 6))
1209
+ # dealing with axes
1210
+ x = np.array(x)
1211
+ y = np.array(y)
1212
+ x_min = float(format(np.amin(x), '.1f')) - 0.1
1213
+ x_max = float(format(np.amax(x), '.1f')) + 0.1
1214
+ # x_range = x_max - x_min
1215
+ plt.xlim(x_min, x_max)
1216
+ # y_min = round(y[0],2)
1217
+ # y_max = round(y[-1],2)
1218
+ plt.ylim(x_min, x_max)
1219
+ plt.xlabel(xlabel)
1220
+ plt.ylabel(ylabel)
1221
+ # dealing with ticks
1222
+ ax = plt.axes()
1223
+ ax.xaxis.set_major_locator(ticker.MultipleLocator(0.4))
1224
+ ax.xaxis.set_minor_locator(ticker.MultipleLocator(0.2))
1225
+ ax.yaxis.set_major_locator(ticker.MultipleLocator(0.4))
1226
+ ax.yaxis.set_minor_locator(ticker.MultipleLocator(0.2))
1227
+ plt.tick_params(which='both', axis='both', direction='in',
1228
+ bottom=True, top=True, right=True, left=True)
1229
+ plt.rcParams['axes.linewidth'] = 3
1230
+ plt.rcParams['xtick.major.size'] = 10
1231
+ plt.rcParams['xtick.major.width'] = 3
1232
+ plt.rcParams['ytick.major.size'] = 10
1233
+ plt.rcParams['ytick.major.width'] = 3
1234
+ plt.rcParams['xtick.minor.size'] = 5
1235
+ plt.rcParams['xtick.minor.width'] = 3
1236
+ plt.rcParams['ytick.minor.size'] = 5
1237
+ plt.rcParams['ytick.minor.width'] = 3
1238
+ plt.tight_layout()
1239
+
1240
+ plt.rc('font', **font)
1241
+ plt.plot(x, y, 'o', markeredgecolor='k')
1242
+ plt.plot([x_min, x_max], [x_min, x_max], linestyle='dashed', color='k')
1243
+ # plt.plot([x_min, x_max], [x_min, x_max], 'k', linestyle='dashed')
1244
+ # Commented out the next block since variables a and e are not defined
1245
+ # RM 2022/02/17
1246
+ # plt.hlines(a['mean_y'], x_min, x_max, linestyle='dashed', color='k')
1247
+ # texts = []
1248
+ # for key in sorted(e.keys()):
1249
+ # text = key + ': ' + str(format(e[key], '.2g'))
1250
+ # texts.append(text)
1251
+ # textstr = '\n'.join(texts)
1252
+ # props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
1253
+ # ax.text(0.05, 0.95, textstr, transform=ax.transAxes, fontsize=14,
1254
+ # verticalalignment='top', bbox=props)
1255
+ # plt.show()
1256
+ if name:
1257
+ fpath = os.getcwd()
1258
+ plt.savefig(fpath + '/' + name + '.eps', dpi=400)
1259
+
1260
+ # # plt.imshow(data,interpolation='none')
1261
+ # # # plt.imshow(data,interpolation='nearest')
1262
+ # # plt.savefig('relative_energies_for_Fe-py4.eps',dpi=400)