molSimplify 1.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (651) hide show
  1. docs/source/conf.py +224 -0
  2. molSimplify/Classes/__init__.py +6 -0
  3. molSimplify/Classes/atom3D.py +235 -0
  4. molSimplify/Classes/dft_obs.py +130 -0
  5. molSimplify/Classes/globalvars.py +827 -0
  6. molSimplify/Classes/helpers.py +161 -0
  7. molSimplify/Classes/ligand.py +2330 -0
  8. molSimplify/Classes/mGUI.py +2493 -0
  9. molSimplify/Classes/mWidgets.py +438 -0
  10. molSimplify/Classes/miniGUI.py +41 -0
  11. molSimplify/Classes/mol2D.py +260 -0
  12. molSimplify/Classes/mol3D.py +5846 -0
  13. molSimplify/Classes/monomer3D.py +253 -0
  14. molSimplify/Classes/partialcharges.py +226 -0
  15. molSimplify/Classes/protein3D.py +1178 -0
  16. molSimplify/Classes/rundiag.py +151 -0
  17. molSimplify/Data/ML.dat +212 -0
  18. molSimplify/Data/MLS_FSR_for_inter.dat +23 -0
  19. molSimplify/Data/MLS_FSR_for_inter2.dat +23 -0
  20. molSimplify/Data/MLS_angle_for_click.dat +8 -0
  21. molSimplify/Data/MLS_angle_for_inter.dat +23 -0
  22. molSimplify/Data/MLS_angle_for_inter2.dat +48 -0
  23. molSimplify/Data/MLS_angle_for_intra.dat +10 -0
  24. molSimplify/Data/MLS_angle_for_intra2.dat +6 -0
  25. molSimplify/Data/MLS_angle_for_oa.dat +18 -0
  26. molSimplify/Data/ML_FSR_for_inter.dat +112 -0
  27. molSimplify/Data/ML_FSR_for_inter2.dat +110 -0
  28. molSimplify/Data/ML_bond_for_cat.dat +8 -0
  29. molSimplify/Data/ML_bond_for_click.dat +8 -0
  30. molSimplify/Data/ML_bond_for_inter.dat +48 -0
  31. molSimplify/Data/ML_bond_for_inter2.dat +48 -0
  32. molSimplify/Data/ML_bond_for_intra.dat +10 -0
  33. molSimplify/Data/ML_bond_for_intra2.dat +6 -0
  34. molSimplify/Data/ML_bond_for_oa.dat +18 -0
  35. molSimplify/Data/bp1.dat +21 -0
  36. molSimplify/Data/li.dat +3 -0
  37. molSimplify/Data/no.dat +2 -0
  38. molSimplify/Data/oct.dat +7 -0
  39. molSimplify/Data/pbp.dat +8 -0
  40. molSimplify/Data/spy.dat +6 -0
  41. molSimplify/Data/sqap.dat +9 -0
  42. molSimplify/Data/sqp.dat +5 -0
  43. molSimplify/Data/tbp.dat +6 -0
  44. molSimplify/Data/tdhd.dat +9 -0
  45. molSimplify/Data/thd.dat +5 -0
  46. molSimplify/Data/tpl.dat +4 -0
  47. molSimplify/Data/tpr.dat +7 -0
  48. molSimplify/Informatics/HFXsensitivity/__init__.py +0 -0
  49. molSimplify/Informatics/HFXsensitivity/measure_HFX_sensitivity_oxo_hat_reb_rel.py +443 -0
  50. molSimplify/Informatics/HFXsensitivity/measure_HFX_stable.py +346 -0
  51. molSimplify/Informatics/MOF/Linker_rotation.py +179 -0
  52. molSimplify/Informatics/MOF/MOF_descriptors.py +1299 -0
  53. molSimplify/Informatics/MOF/MOF_descriptors_alternate_functional.py +589 -0
  54. molSimplify/Informatics/MOF/MOF_functionalizer.py +1648 -0
  55. molSimplify/Informatics/MOF/PBC_functions.py +1347 -0
  56. molSimplify/Informatics/MOF/__init__.py +0 -0
  57. molSimplify/Informatics/MOF/atomic.py +267 -0
  58. molSimplify/Informatics/MOF/cluster_extraction.py +388 -0
  59. molSimplify/Informatics/MOF/fragment_MOFs_for_pormake.py +895 -0
  60. molSimplify/Informatics/MOF/monofunctionalized_BDC/index_information.py +10 -0
  61. molSimplify/Informatics/Mol2Parser.py +46 -0
  62. molSimplify/Informatics/RACassemble.py +408 -0
  63. molSimplify/Informatics/__init__.py +0 -0
  64. molSimplify/Informatics/active_learning/__init__.py +0 -0
  65. molSimplify/Informatics/active_learning/expected_improvement.py +269 -0
  66. molSimplify/Informatics/autocorrelation.py +1930 -0
  67. molSimplify/Informatics/clean_autocorrelation.py +778 -0
  68. molSimplify/Informatics/coulomb_analyze.py +67 -0
  69. molSimplify/Informatics/decoration_manager.py +193 -0
  70. molSimplify/Informatics/geo_analyze.py +88 -0
  71. molSimplify/Informatics/geometrics.py +56 -0
  72. molSimplify/Informatics/graph_analyze.py +163 -0
  73. molSimplify/Informatics/graph_racs.py +288 -0
  74. molSimplify/Informatics/jupyter_vis.py +172 -0
  75. molSimplify/Informatics/lacRACAssemble.py +2192 -0
  76. molSimplify/Informatics/lacRACAssemble_bisdithiolenes.py +236 -0
  77. molSimplify/Informatics/misc_descriptors.py +198 -0
  78. molSimplify/Informatics/organic_fingerprints.py +61 -0
  79. molSimplify/Informatics/partialcharges.py +345 -0
  80. molSimplify/Informatics/protein/activesite.py +53 -0
  81. molSimplify/Informatics/protein/pymol_add_hs.py +33 -0
  82. molSimplify/Informatics/rac155_geo.py +48 -0
  83. molSimplify/Ligands/(1_methylbenzimidazol_2_yl)pyridine.xyz +45 -0
  84. molSimplify/Ligands/1-4-dimethyl-1-2-3-triazole.xyz +15 -0
  85. molSimplify/Ligands/12crown4.mol +62 -0
  86. molSimplify/Ligands/Antipyrine.mol +58 -0
  87. molSimplify/Ligands/BPAbipy.mol +106 -0
  88. molSimplify/Ligands/Hpyrrole.mol +26 -0
  89. molSimplify/Ligands/N-quinolinylbutyramidate.xyz +31 -0
  90. molSimplify/Ligands/N-quinolinylmethylmethinylacetamidate.xyz +30 -0
  91. molSimplify/Ligands/NMe2_-1.xyz +11 -0
  92. molSimplify/Ligands/PCy3.mol +111 -0
  93. molSimplify/Ligands/PMe3.xyz +15 -0
  94. molSimplify/Ligands/PPh3.mol +76 -0
  95. molSimplify/Ligands/Propyphenazone.mol +77 -0
  96. molSimplify/Ligands/acac.mol +33 -0
  97. molSimplify/Ligands/acacen.mol +76 -0
  98. molSimplify/Ligands/acetate.smi +1 -0
  99. molSimplify/Ligands/acetate.xyz +9 -0
  100. molSimplify/Ligands/aceticacidbipyridine.mol +70 -0
  101. molSimplify/Ligands/acetonitrile.mol +17 -0
  102. molSimplify/Ligands/alanine.mol +30 -0
  103. molSimplify/Ligands/alphabetizer.py +21 -0
  104. molSimplify/Ligands/amine.mol +11 -0
  105. molSimplify/Ligands/ammonia.mol +12 -0
  106. molSimplify/Ligands/arginine.mol +58 -0
  107. molSimplify/Ligands/asparagine.mol +38 -0
  108. molSimplify/Ligands/aspartic_acid.mol +35 -0
  109. molSimplify/Ligands/azide.mol +11 -0
  110. molSimplify/Ligands/benzene.mol +28 -0
  111. molSimplify/Ligands/benzene_pi.mol +30 -0
  112. molSimplify/Ligands/benzenedithiol.mol +30 -0
  113. molSimplify/Ligands/benzenethiol.mol +30 -0
  114. molSimplify/Ligands/benzylisocy.mol +38 -0
  115. molSimplify/Ligands/bidiazine.mol +42 -0
  116. molSimplify/Ligands/bidiazole.mol +38 -0
  117. molSimplify/Ligands/bifuran.mol +38 -0
  118. molSimplify/Ligands/bihydrodiazine.mol +58 -0
  119. molSimplify/Ligands/bihydrodiazole.mol +46 -0
  120. molSimplify/Ligands/bihydrooxazine.mol +54 -0
  121. molSimplify/Ligands/bihydrooxazole.mol +42 -0
  122. molSimplify/Ligands/bihydrothiazine.mol +54 -0
  123. molSimplify/Ligands/bihydrothiazole.mol +42 -0
  124. molSimplify/Ligands/biimidazole.mol +38 -0
  125. molSimplify/Ligands/bioxazole.mol +34 -0
  126. molSimplify/Ligands/bipy.mol +46 -0
  127. molSimplify/Ligands/bipyrazine.xyz +20 -0
  128. molSimplify/Ligands/bipyrimidine.mol +42 -0
  129. molSimplify/Ligands/bipyrrole.mol +42 -0
  130. molSimplify/Ligands/bisnapthyridylpyridine.mol +111 -0
  131. molSimplify/Ligands/bithiazole.mol +34 -0
  132. molSimplify/Ligands/bromide.mol +7 -0
  133. molSimplify/Ligands/bromide.smi +1 -0
  134. molSimplify/Ligands/c2.mol +9 -0
  135. molSimplify/Ligands/caprolactone.mol +41 -0
  136. molSimplify/Ligands/carbonyl.mol +8 -0
  137. molSimplify/Ligands/carboxyl.mol +13 -0
  138. molSimplify/Ligands/cat.mol +30 -0
  139. molSimplify/Ligands/chloride.mol +7 -0
  140. molSimplify/Ligands/chloride.smi +1 -0
  141. molSimplify/Ligands/chloropyridine.mol +27 -0
  142. molSimplify/Ligands/co2.mol +10 -0
  143. molSimplify/Ligands/corrolazine.mol +72 -0
  144. molSimplify/Ligands/cs.mol +8 -0
  145. molSimplify/Ligands/cyanate.xyz +5 -0
  146. molSimplify/Ligands/cyanide.mol +9 -0
  147. molSimplify/Ligands/cyanoaceticporphyrin.mol +114 -0
  148. molSimplify/Ligands/cyanopyridine.mol +29 -0
  149. molSimplify/Ligands/cyclam.mol +81 -0
  150. molSimplify/Ligands/cyclen.mol +69 -0
  151. molSimplify/Ligands/cyclopentadienyl.mol +26 -0
  152. molSimplify/Ligands/cysteine.mol +32 -0
  153. molSimplify/Ligands/diaminomethyl.mol +19 -0
  154. molSimplify/Ligands/diazine.mol +25 -0
  155. molSimplify/Ligands/diazole.mol +23 -0
  156. molSimplify/Ligands/dicyanamide.mol +15 -0
  157. molSimplify/Ligands/dihydrofuran.mol +27 -0
  158. molSimplify/Ligands/dmap.xyz +35 -0
  159. molSimplify/Ligands/dmf.mol +28 -0
  160. molSimplify/Ligands/dmi.mol +41 -0
  161. molSimplify/Ligands/dmpe.mol +52 -0
  162. molSimplify/Ligands/dpmu.mol +47 -0
  163. molSimplify/Ligands/dppe.mol +112 -0
  164. molSimplify/Ligands/edta.mol +69 -0
  165. molSimplify/Ligands/en.mol +28 -0
  166. molSimplify/Ligands/ethanethiol.mol +21 -0
  167. molSimplify/Ligands/ethanolamine.mol +26 -0
  168. molSimplify/Ligands/ethbipy.mol +70 -0
  169. molSimplify/Ligands/ethyl.mol +19 -0
  170. molSimplify/Ligands/ethylamine.mol +24 -0
  171. molSimplify/Ligands/ethylene.mol +16 -0
  172. molSimplify/Ligands/ethylesteracac.mol +57 -0
  173. molSimplify/Ligands/fluoride.mol +7 -0
  174. molSimplify/Ligands/fluoride.smi +1 -0
  175. molSimplify/Ligands/formaldehyde.mol +12 -0
  176. molSimplify/Ligands/formamidate.xyz +8 -0
  177. molSimplify/Ligands/formate.xyz +6 -0
  178. molSimplify/Ligands/furan.mol +23 -0
  179. molSimplify/Ligands/glutamic_acid.mol +42 -0
  180. molSimplify/Ligands/glutamine.mol +44 -0
  181. molSimplify/Ligands/glycinate.mol +23 -0
  182. molSimplify/Ligands/glycine.mol +24 -0
  183. molSimplify/Ligands/h2s.mol +10 -0
  184. molSimplify/Ligands/helium.mol +6 -0
  185. molSimplify/Ligands/histidine.mol +45 -0
  186. molSimplify/Ligands/hmpa.mol +62 -0
  187. molSimplify/Ligands/hs-.mol +9 -0
  188. molSimplify/Ligands/hydride.mol +7 -0
  189. molSimplify/Ligands/hydrocarboxyacetylide.xyz +8 -0
  190. molSimplify/Ligands/hydrocyanide.mol +10 -0
  191. molSimplify/Ligands/hydrodiazine.mol +33 -0
  192. molSimplify/Ligands/hydrodiazole.mol +27 -0
  193. molSimplify/Ligands/hydrogensulfide.mol +10 -0
  194. molSimplify/Ligands/hydroisocyanide.mol +11 -0
  195. molSimplify/Ligands/hydrooxazine.mol +31 -0
  196. molSimplify/Ligands/hydrooxazole.mol +25 -0
  197. molSimplify/Ligands/hydrothiazine.mol +31 -0
  198. molSimplify/Ligands/hydrothiazole.mol +25 -0
  199. molSimplify/Ligands/hydroxyl.mol +9 -0
  200. molSimplify/Ligands/imidazole.mol +23 -0
  201. molSimplify/Ligands/imidazolidinone.mol +29 -0
  202. molSimplify/Ligands/imine.mol +13 -0
  203. molSimplify/Ligands/iminodiacetic.mol +33 -0
  204. molSimplify/Ligands/iodide.mol +7 -0
  205. molSimplify/Ligands/iodobenzene.xyz +14 -0
  206. molSimplify/Ligands/isoleucine.mol +48 -0
  207. molSimplify/Ligands/isothiocyanate.mol +11 -0
  208. molSimplify/Ligands/leucine.mol +48 -0
  209. molSimplify/Ligands/ligands.dict +257 -0
  210. molSimplify/Ligands/lysine.mol +54 -0
  211. molSimplify/Ligands/mebenzenedithiol.mol +36 -0
  212. molSimplify/Ligands/mebim_py.xyz +29 -0
  213. molSimplify/Ligands/mebim_pz.xyz +28 -0
  214. molSimplify/Ligands/mebipy.mol +58 -0
  215. molSimplify/Ligands/mecat.mol +36 -0
  216. molSimplify/Ligands/methanal.mol +11 -0
  217. molSimplify/Ligands/methanethiol.mol +15 -0
  218. molSimplify/Ligands/methanol.mol +16 -0
  219. molSimplify/Ligands/methionine.mol +44 -0
  220. molSimplify/Ligands/methyl.mol +13 -0
  221. molSimplify/Ligands/methylacetylide.xyz +8 -0
  222. molSimplify/Ligands/methylamine.mol +19 -0
  223. molSimplify/Ligands/methylazide.xyz +9 -0
  224. molSimplify/Ligands/methylisocy.mol +17 -0
  225. molSimplify/Ligands/methylpyridine.mol +33 -0
  226. molSimplify/Ligands/n2.mol +8 -0
  227. molSimplify/Ligands/n4py.xyz +51 -0
  228. molSimplify/Ligands/nch.mol +10 -0
  229. molSimplify/Ligands/nco-.mol +11 -0
  230. molSimplify/Ligands/nethanolamine.mol +26 -0
  231. molSimplify/Ligands/nitrate.mol +14 -0
  232. molSimplify/Ligands/nitrite.mol +11 -0
  233. molSimplify/Ligands/nitro.mol +11 -0
  234. molSimplify/Ligands/nitrobipy.mol +54 -0
  235. molSimplify/Ligands/nitroso.mol +8 -0
  236. molSimplify/Ligands/nme3.mol +30 -0
  237. molSimplify/Ligands/no-.mol +10 -0
  238. molSimplify/Ligands/no2-.mol +11 -0
  239. molSimplify/Ligands/noxygen.mol +8 -0
  240. molSimplify/Ligands/ns-.mol +10 -0
  241. molSimplify/Ligands/o-pyridylbenzene.xyz +23 -0
  242. molSimplify/Ligands/o-pyridylphenylanion.xyz +22 -0
  243. molSimplify/Ligands/o2-.mol +9 -0
  244. molSimplify/Ligands/o2.xyz +4 -0
  245. molSimplify/Ligands/och2.mol +12 -0
  246. molSimplify/Ligands/oethanolamine.mol +26 -0
  247. molSimplify/Ligands/ome2.mol +22 -0
  248. molSimplify/Ligands/ooh.xyz +5 -0
  249. molSimplify/Ligands/oxalate.mol +17 -0
  250. molSimplify/Ligands/oxalate.smi +1 -0
  251. molSimplify/Ligands/oxygen.mol +7 -0
  252. molSimplify/Ligands/pentacyanocyclopentadienide.mol +36 -0
  253. molSimplify/Ligands/ph2-.mol +11 -0
  254. molSimplify/Ligands/ph3.mol +12 -0
  255. molSimplify/Ligands/phen.mol +51 -0
  256. molSimplify/Ligands/phenacac.mol +63 -0
  257. molSimplify/Ligands/phenalalanine.mol +51 -0
  258. molSimplify/Ligands/phendione.mol +51 -0
  259. molSimplify/Ligands/phenphen.mol +75 -0
  260. molSimplify/Ligands/phenylbenzoxazole.mol +54 -0
  261. molSimplify/Ligands/phenylcyc.mol +99 -0
  262. molSimplify/Ligands/phenylenediamine.mol +37 -0
  263. molSimplify/Ligands/phenylisocy.mol +32 -0
  264. molSimplify/Ligands/phosacidbipy.mol +66 -0
  265. molSimplify/Ligands/phosphine.mol +13 -0
  266. molSimplify/Ligands/phosphorine.mol +27 -0
  267. molSimplify/Ligands/phosphorustrifluoride.mol +12 -0
  268. molSimplify/Ligands/phthalocyanine.mol +126 -0
  269. molSimplify/Ligands/pme3o.mol +32 -0
  270. molSimplify/Ligands/porphyrin.mol +82 -0
  271. molSimplify/Ligands/pph3o.mol +77 -0
  272. molSimplify/Ligands/proline.mol +39 -0
  273. molSimplify/Ligands/propdiol.mol +21 -0
  274. molSimplify/Ligands/propylene.mol +23 -0
  275. molSimplify/Ligands/pyridine.mol +27 -0
  276. molSimplify/Ligands/pyrimidone.mol +27 -0
  277. molSimplify/Ligands/pyrrole.mol +24 -0
  278. molSimplify/Ligands/quinoxalinedithiol.mol +39 -0
  279. molSimplify/Ligands/s2-.mol +9 -0
  280. molSimplify/Ligands/salen.mol +75 -0
  281. molSimplify/Ligands/salphen.mol +84 -0
  282. molSimplify/Ligands/serine.mol +32 -0
  283. molSimplify/Ligands/simple_ligands.dict +14 -0
  284. molSimplify/Ligands/sulfacidbipy.mol +63 -0
  285. molSimplify/Ligands/tbucat.mol +54 -0
  286. molSimplify/Ligands/tbuphisocy.mol +56 -0
  287. molSimplify/Ligands/tbutylcyclen.mol +166 -0
  288. molSimplify/Ligands/tbutylisocy.mol +35 -0
  289. molSimplify/Ligands/tbutylthiol.mol +33 -0
  290. molSimplify/Ligands/tcnoet.mol +43 -0
  291. molSimplify/Ligands/tcnoetOH.mol +45 -0
  292. molSimplify/Ligands/terpy.mol +65 -0
  293. molSimplify/Ligands/tetrahydrofuran.mol +31 -0
  294. molSimplify/Ligands/thiane.mol +37 -0
  295. molSimplify/Ligands/thiazole.mol +21 -0
  296. molSimplify/Ligands/thiocyanate.mol +11 -0
  297. molSimplify/Ligands/thiol.mol +9 -0
  298. molSimplify/Ligands/thiophene.mol +23 -0
  299. molSimplify/Ligands/thiopyridine.mol +29 -0
  300. molSimplify/Ligands/threonine.mol +38 -0
  301. molSimplify/Ligands/tpp.mol +165 -0
  302. molSimplify/Ligands/tricyanomethyl.mol +19 -0
  303. molSimplify/Ligands/trifluoromethyl.mol +13 -0
  304. molSimplify/Ligands/tryptophan.mol +60 -0
  305. molSimplify/Ligands/tyrosine.mol +53 -0
  306. molSimplify/Ligands/uthiol.mol +11 -0
  307. molSimplify/Ligands/uthiolme2.mol +23 -0
  308. molSimplify/Ligands/valine.mol +42 -0
  309. molSimplify/Ligands/water.mol +10 -0
  310. molSimplify/Ligands/x.mol +6 -0
  311. molSimplify/Scripts/__init__.py +0 -0
  312. molSimplify/Scripts/addtodb.py +308 -0
  313. molSimplify/Scripts/cellbuilder.py +1592 -0
  314. molSimplify/Scripts/cellbuilder_tools.py +701 -0
  315. molSimplify/Scripts/chains.py +342 -0
  316. molSimplify/Scripts/convert_2to3.py +23 -0
  317. molSimplify/Scripts/dbinteract.py +631 -0
  318. molSimplify/Scripts/distgeom.py +617 -0
  319. molSimplify/Scripts/findcorrelations.py +287 -0
  320. molSimplify/Scripts/generator.py +267 -0
  321. molSimplify/Scripts/geometry.py +1224 -0
  322. molSimplify/Scripts/grabguivars.py +845 -0
  323. molSimplify/Scripts/in_b3lyp_usetc.py +141 -0
  324. molSimplify/Scripts/inparse.py +1673 -0
  325. molSimplify/Scripts/io.py +1149 -0
  326. molSimplify/Scripts/isomers.py +415 -0
  327. molSimplify/Scripts/jobgen.py +247 -0
  328. molSimplify/Scripts/krr_prep.py +1262 -0
  329. molSimplify/Scripts/molSimplify_io.py +18 -0
  330. molSimplify/Scripts/molden2psi4wfn.py +166 -0
  331. molSimplify/Scripts/namegen.py +32 -0
  332. molSimplify/Scripts/nn_prep.py +561 -0
  333. molSimplify/Scripts/oct_check_mols.py +782 -0
  334. molSimplify/Scripts/periodic_QE.py +97 -0
  335. molSimplify/Scripts/postmold.py +304 -0
  336. molSimplify/Scripts/postmwfn.py +709 -0
  337. molSimplify/Scripts/postparse.py +488 -0
  338. molSimplify/Scripts/postproc.py +139 -0
  339. molSimplify/Scripts/qcgen.py +1450 -0
  340. molSimplify/Scripts/rmsd.py +489 -0
  341. molSimplify/Scripts/rungen.py +670 -0
  342. molSimplify/Scripts/structgen.py +3040 -0
  343. molSimplify/Scripts/tf_nn_prep.py +894 -0
  344. molSimplify/Scripts/tsgen.py +295 -0
  345. molSimplify/Scripts/uq_calibration.py +69 -0
  346. molSimplify/__init__.py +0 -0
  347. molSimplify/__main__.py +197 -0
  348. molSimplify/icons/chemdb.png +0 -0
  349. molSimplify/icons/hjklogo.png +0 -0
  350. molSimplify/icons/icon.png +0 -0
  351. molSimplify/icons/logo.png +0 -0
  352. molSimplify/icons/logo_old.png +0 -0
  353. molSimplify/icons/petachem.png +0 -0
  354. molSimplify/icons/petachem2.png +0 -0
  355. molSimplify/icons/petachem_full.png +0 -0
  356. molSimplify/icons/pythonlogo.png +0 -0
  357. molSimplify/icons/sge copy.png +0 -0
  358. molSimplify/icons/sge.png +0 -0
  359. molSimplify/icons/slurm.png +0 -0
  360. molSimplify/icons/wft1.png +0 -0
  361. molSimplify/icons/wft2.png +0 -0
  362. molSimplify/icons/wft3.png +0 -0
  363. molSimplify/ml/__init__.py +0 -0
  364. molSimplify/ml/kernels.py +36 -0
  365. molSimplify/ml/layers.py +29 -0
  366. molSimplify/molscontrol/__init__.py +14 -0
  367. molSimplify/molscontrol/_version.py +521 -0
  368. molSimplify/molscontrol/clf_tools.py +144 -0
  369. molSimplify/molscontrol/data/README.md +21 -0
  370. molSimplify/molscontrol/data/look_and_say.dat +15 -0
  371. molSimplify/molscontrol/dynamic_classifier.py +514 -0
  372. molSimplify/molscontrol/io_tools.py +363 -0
  373. molSimplify/molscontrol/molscontrol.py +49 -0
  374. molSimplify/molscontrol/terachem/jobscript_control.sh +31 -0
  375. molSimplify/molscontrol/terachem/terachem_input +22 -0
  376. molSimplify/python_krr/X_train_TS.csv +535 -0
  377. molSimplify/python_krr/__init__.py +0 -0
  378. molSimplify/python_krr/hat2_X_mean_std.csv +3 -0
  379. molSimplify/python_krr/hat2_feature_names.csv +1 -0
  380. molSimplify/python_krr/hat2_y_mean_std.csv +2 -0
  381. molSimplify/python_krr/hat_X_mean_std.csv +6 -0
  382. molSimplify/python_krr/hat_feature_names.csv +1 -0
  383. molSimplify/python_krr/hat_krr_X_train.csv +5205 -0
  384. molSimplify/python_krr/hat_krr_dual_coef.csv +1 -0
  385. molSimplify/python_krr/hat_y_mean_std.csv +2 -0
  386. molSimplify/python_krr/sklearn_models.py +34 -0
  387. molSimplify/python_krr/y_train_TS.csv +535 -0
  388. molSimplify/python_nn/ANN.py +198 -0
  389. molSimplify/python_nn/__init__.py +0 -0
  390. molSimplify/python_nn/clf_analysis_tool.py +125 -0
  391. molSimplify/python_nn/dictionary_toolbox.py +49 -0
  392. molSimplify/python_nn/ensemble_test.py +309 -0
  393. molSimplify/python_nn/hs_center.csv +26 -0
  394. molSimplify/python_nn/hs_scale.csv +26 -0
  395. molSimplify/python_nn/ls_center.csv +26 -0
  396. molSimplify/python_nn/ls_scale.csv +26 -0
  397. molSimplify/python_nn/ms_hs_b1.csv +50 -0
  398. molSimplify/python_nn/ms_hs_b2.csv +50 -0
  399. molSimplify/python_nn/ms_hs_b3.csv +1 -0
  400. molSimplify/python_nn/ms_hs_w1.csv +50 -0
  401. molSimplify/python_nn/ms_hs_w2.csv +50 -0
  402. molSimplify/python_nn/ms_hs_w3.csv +1 -0
  403. molSimplify/python_nn/ms_ls_b1.csv +50 -0
  404. molSimplify/python_nn/ms_ls_b2.csv +50 -0
  405. molSimplify/python_nn/ms_ls_b3.csv +1 -0
  406. molSimplify/python_nn/ms_ls_w1.csv +50 -0
  407. molSimplify/python_nn/ms_ls_w2.csv +50 -0
  408. molSimplify/python_nn/ms_ls_w3.csv +1 -0
  409. molSimplify/python_nn/ms_slope_b1.csv +50 -0
  410. molSimplify/python_nn/ms_slope_b2.csv +50 -0
  411. molSimplify/python_nn/ms_slope_b3.csv +1 -0
  412. molSimplify/python_nn/ms_slope_w1.csv +50 -0
  413. molSimplify/python_nn/ms_slope_w2.csv +50 -0
  414. molSimplify/python_nn/ms_slope_w3.csv +1 -0
  415. molSimplify/python_nn/ms_split_b1.csv +50 -0
  416. molSimplify/python_nn/ms_split_b2.csv +50 -0
  417. molSimplify/python_nn/ms_split_b3.csv +1 -0
  418. molSimplify/python_nn/ms_split_w1.csv +50 -0
  419. molSimplify/python_nn/ms_split_w2.csv +50 -0
  420. molSimplify/python_nn/ms_split_w3.csv +1 -0
  421. molSimplify/python_nn/slope_center.csv +25 -0
  422. molSimplify/python_nn/slope_scale.csv +25 -0
  423. molSimplify/python_nn/split_center.csv +26 -0
  424. molSimplify/python_nn/split_scale.csv +26 -0
  425. molSimplify/python_nn/tf_ANN.py +762 -0
  426. molSimplify/python_nn/train_data.csv +1211 -0
  427. molSimplify/tf_nn/__init__.py +0 -0
  428. molSimplify/tf_nn/geo_static_clf/geo_static_clf_model.h5 +0 -0
  429. molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_name.csv +1591 -0
  430. molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_x.csv +2790 -0
  431. molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_y.csv +2790 -0
  432. molSimplify/tf_nn/geo_static_clf/geo_static_clf_vars.csv +154 -0
  433. molSimplify/tf_nn/geos/hs_ii_bl_x.csv +1577 -0
  434. molSimplify/tf_nn/geos/hs_ii_bl_y.csv +1577 -0
  435. molSimplify/tf_nn/geos/hs_ii_model.h5 +0 -0
  436. molSimplify/tf_nn/geos/hs_ii_model.json +1 -0
  437. molSimplify/tf_nn/geos/hs_ii_vars.csv +154 -0
  438. molSimplify/tf_nn/geos/hs_iii_bl_x.csv +1659 -0
  439. molSimplify/tf_nn/geos/hs_iii_bl_y.csv +1659 -0
  440. molSimplify/tf_nn/geos/hs_iii_model.h5 +0 -0
  441. molSimplify/tf_nn/geos/hs_iii_model.json +1 -0
  442. molSimplify/tf_nn/geos/hs_iii_vars.csv +154 -0
  443. molSimplify/tf_nn/geos/ls_ii_bl_x.csv +1374 -0
  444. molSimplify/tf_nn/geos/ls_ii_bl_y.csv +1374 -0
  445. molSimplify/tf_nn/geos/ls_ii_model.h5 +0 -0
  446. molSimplify/tf_nn/geos/ls_ii_model.json +1 -0
  447. molSimplify/tf_nn/geos/ls_ii_vars.csv +154 -0
  448. molSimplify/tf_nn/geos/ls_iii_bl_x.csv +1364 -0
  449. molSimplify/tf_nn/geos/ls_iii_bl_y.csv +1364 -0
  450. molSimplify/tf_nn/geos/ls_iii_model.h5 +0 -0
  451. molSimplify/tf_nn/geos/ls_iii_model.json +1 -0
  452. molSimplify/tf_nn/geos/ls_iii_vars.csv +154 -0
  453. molSimplify/tf_nn/homolumo/gap_model.h5 +0 -0
  454. molSimplify/tf_nn/homolumo/gap_model.json +1 -0
  455. molSimplify/tf_nn/homolumo/gap_test_names.csv +175 -0
  456. molSimplify/tf_nn/homolumo/gap_test_x.csv +176 -0
  457. molSimplify/tf_nn/homolumo/gap_test_y.csv +176 -0
  458. molSimplify/tf_nn/homolumo/gap_train_names.csv +699 -0
  459. molSimplify/tf_nn/homolumo/gap_train_x.csv +700 -0
  460. molSimplify/tf_nn/homolumo/gap_train_y.csv +700 -0
  461. molSimplify/tf_nn/homolumo/gap_vars.csv +153 -0
  462. molSimplify/tf_nn/homolumo/homo_model.h5 +0 -0
  463. molSimplify/tf_nn/homolumo/homo_model.json +126 -0
  464. molSimplify/tf_nn/homolumo/homo_test_names.csv +175 -0
  465. molSimplify/tf_nn/homolumo/homo_test_x.csv +176 -0
  466. molSimplify/tf_nn/homolumo/homo_test_y.csv +176 -0
  467. molSimplify/tf_nn/homolumo/homo_train_names.csv +699 -0
  468. molSimplify/tf_nn/homolumo/homo_train_x.csv +700 -0
  469. molSimplify/tf_nn/homolumo/homo_train_y.csv +700 -0
  470. molSimplify/tf_nn/homolumo/homo_vars.csv +153 -0
  471. molSimplify/tf_nn/oxoandhomo/homo_empty_info.json +7 -0
  472. molSimplify/tf_nn/oxoandhomo/homo_empty_model.h5 +0 -0
  473. molSimplify/tf_nn/oxoandhomo/homo_empty_model.json +1 -0
  474. molSimplify/tf_nn/oxoandhomo/homo_empty_test_names.csv +143 -0
  475. molSimplify/tf_nn/oxoandhomo/homo_empty_test_x.csv +144 -0
  476. molSimplify/tf_nn/oxoandhomo/homo_empty_test_y.csv +144 -0
  477. molSimplify/tf_nn/oxoandhomo/homo_empty_train_names.csv +513 -0
  478. molSimplify/tf_nn/oxoandhomo/homo_empty_train_x.csv +514 -0
  479. molSimplify/tf_nn/oxoandhomo/homo_empty_train_y.csv +514 -0
  480. molSimplify/tf_nn/oxoandhomo/homo_empty_val_names.csv +143 -0
  481. molSimplify/tf_nn/oxoandhomo/homo_empty_val_x.csv +58 -0
  482. molSimplify/tf_nn/oxoandhomo/homo_empty_val_y.csv +58 -0
  483. molSimplify/tf_nn/oxoandhomo/homo_empty_vars.csv +155 -0
  484. molSimplify/tf_nn/oxoandhomo/oxo20_info.json +7 -0
  485. molSimplify/tf_nn/oxoandhomo/oxo20_model.h5 +0 -0
  486. molSimplify/tf_nn/oxoandhomo/oxo20_model.json +1 -0
  487. molSimplify/tf_nn/oxoandhomo/oxo20_test_names.csv +143 -0
  488. molSimplify/tf_nn/oxoandhomo/oxo20_test_x.csv +144 -0
  489. molSimplify/tf_nn/oxoandhomo/oxo20_test_y.csv +144 -0
  490. molSimplify/tf_nn/oxoandhomo/oxo20_train_names.csv +513 -0
  491. molSimplify/tf_nn/oxoandhomo/oxo20_train_x.csv +514 -0
  492. molSimplify/tf_nn/oxoandhomo/oxo20_train_y.csv +514 -0
  493. molSimplify/tf_nn/oxoandhomo/oxo20_val_names.csv +143 -0
  494. molSimplify/tf_nn/oxoandhomo/oxo20_val_x.csv +58 -0
  495. molSimplify/tf_nn/oxoandhomo/oxo20_val_y.csv +58 -0
  496. molSimplify/tf_nn/oxoandhomo/oxo20_vars.csv +154 -0
  497. molSimplify/tf_nn/oxocatalysis/hat_model.h5 +0 -0
  498. molSimplify/tf_nn/oxocatalysis/hat_model.json +1 -0
  499. molSimplify/tf_nn/oxocatalysis/hat_test_names.csv +419 -0
  500. molSimplify/tf_nn/oxocatalysis/hat_test_x.csv +420 -0
  501. molSimplify/tf_nn/oxocatalysis/hat_test_y.csv +420 -0
  502. molSimplify/tf_nn/oxocatalysis/hat_train_names.csv +1507 -0
  503. molSimplify/tf_nn/oxocatalysis/hat_train_x.csv +1508 -0
  504. molSimplify/tf_nn/oxocatalysis/hat_train_y.csv +1508 -0
  505. molSimplify/tf_nn/oxocatalysis/hat_val_x.csv +169 -0
  506. molSimplify/tf_nn/oxocatalysis/hat_val_y.csv +169 -0
  507. molSimplify/tf_nn/oxocatalysis/hat_vars.csv +162 -0
  508. molSimplify/tf_nn/oxocatalysis/oxo_model.h5 +0 -0
  509. molSimplify/tf_nn/oxocatalysis/oxo_model.json +1 -0
  510. molSimplify/tf_nn/oxocatalysis/oxo_test_names.csv +527 -0
  511. molSimplify/tf_nn/oxocatalysis/oxo_test_x.csv +528 -0
  512. molSimplify/tf_nn/oxocatalysis/oxo_test_y.csv +528 -0
  513. molSimplify/tf_nn/oxocatalysis/oxo_train_names.csv +1897 -0
  514. molSimplify/tf_nn/oxocatalysis/oxo_train_x.csv +1898 -0
  515. molSimplify/tf_nn/oxocatalysis/oxo_train_y.csv +1898 -0
  516. molSimplify/tf_nn/oxocatalysis/oxo_val_x.csv +212 -0
  517. molSimplify/tf_nn/oxocatalysis/oxo_val_y.csv +212 -0
  518. molSimplify/tf_nn/oxocatalysis/oxo_vars.csv +162 -0
  519. molSimplify/tf_nn/rescaling_data/gap_mean_x.csv +153 -0
  520. molSimplify/tf_nn/rescaling_data/gap_mean_y.csv +1 -0
  521. molSimplify/tf_nn/rescaling_data/gap_var_x.csv +153 -0
  522. molSimplify/tf_nn/rescaling_data/gap_var_y.csv +1 -0
  523. molSimplify/tf_nn/rescaling_data/geo_static_clf_mean_x.csv +154 -0
  524. molSimplify/tf_nn/rescaling_data/geo_static_clf_mean_y.csv +1 -0
  525. molSimplify/tf_nn/rescaling_data/geo_static_clf_var_x.csv +154 -0
  526. molSimplify/tf_nn/rescaling_data/geo_static_clf_var_y.csv +1 -0
  527. molSimplify/tf_nn/rescaling_data/hat_mean_x.csv +162 -0
  528. molSimplify/tf_nn/rescaling_data/hat_mean_y.csv +1 -0
  529. molSimplify/tf_nn/rescaling_data/hat_var_x.csv +162 -0
  530. molSimplify/tf_nn/rescaling_data/hat_var_y.csv +1 -0
  531. molSimplify/tf_nn/rescaling_data/homo_empty_mean_x.csv +155 -0
  532. molSimplify/tf_nn/rescaling_data/homo_empty_mean_y.csv +1 -0
  533. molSimplify/tf_nn/rescaling_data/homo_empty_var_x.csv +155 -0
  534. molSimplify/tf_nn/rescaling_data/homo_empty_var_y.csv +1 -0
  535. molSimplify/tf_nn/rescaling_data/homo_mean_x.csv +153 -0
  536. molSimplify/tf_nn/rescaling_data/homo_mean_y.csv +1 -0
  537. molSimplify/tf_nn/rescaling_data/homo_var_x.csv +153 -0
  538. molSimplify/tf_nn/rescaling_data/homo_var_y.csv +1 -0
  539. molSimplify/tf_nn/rescaling_data/hs_ii_mean_x.csv +154 -0
  540. molSimplify/tf_nn/rescaling_data/hs_ii_mean_y.csv +3 -0
  541. molSimplify/tf_nn/rescaling_data/hs_ii_var_x.csv +154 -0
  542. molSimplify/tf_nn/rescaling_data/hs_ii_var_y.csv +3 -0
  543. molSimplify/tf_nn/rescaling_data/hs_iii_mean_x.csv +154 -0
  544. molSimplify/tf_nn/rescaling_data/hs_iii_mean_y.csv +3 -0
  545. molSimplify/tf_nn/rescaling_data/hs_iii_var_x.csv +154 -0
  546. molSimplify/tf_nn/rescaling_data/hs_iii_var_y.csv +3 -0
  547. molSimplify/tf_nn/rescaling_data/ls_ii_mean_x.csv +154 -0
  548. molSimplify/tf_nn/rescaling_data/ls_ii_mean_y.csv +3 -0
  549. molSimplify/tf_nn/rescaling_data/ls_ii_var_x.csv +154 -0
  550. molSimplify/tf_nn/rescaling_data/ls_ii_var_y.csv +3 -0
  551. molSimplify/tf_nn/rescaling_data/ls_iii_mean_x.csv +154 -0
  552. molSimplify/tf_nn/rescaling_data/ls_iii_mean_y.csv +3 -0
  553. molSimplify/tf_nn/rescaling_data/ls_iii_var_x.csv +154 -0
  554. molSimplify/tf_nn/rescaling_data/ls_iii_var_y.csv +3 -0
  555. molSimplify/tf_nn/rescaling_data/oxo20_mean_x.csv +154 -0
  556. molSimplify/tf_nn/rescaling_data/oxo20_mean_y.csv +1 -0
  557. molSimplify/tf_nn/rescaling_data/oxo20_var_x.csv +154 -0
  558. molSimplify/tf_nn/rescaling_data/oxo20_var_y.csv +1 -0
  559. molSimplify/tf_nn/rescaling_data/oxo_mean_x.csv +162 -0
  560. molSimplify/tf_nn/rescaling_data/oxo_mean_y.csv +1 -0
  561. molSimplify/tf_nn/rescaling_data/oxo_var_x.csv +162 -0
  562. molSimplify/tf_nn/rescaling_data/oxo_var_y.csv +1 -0
  563. molSimplify/tf_nn/rescaling_data/sc_static_clf_mean_x.csv +154 -0
  564. molSimplify/tf_nn/rescaling_data/sc_static_clf_mean_y.csv +1 -0
  565. molSimplify/tf_nn/rescaling_data/sc_static_clf_var_x.csv +154 -0
  566. molSimplify/tf_nn/rescaling_data/sc_static_clf_var_y.csv +1 -0
  567. molSimplify/tf_nn/rescaling_data/split_mean_x.csv +155 -0
  568. molSimplify/tf_nn/rescaling_data/split_mean_y.csv +1 -0
  569. molSimplify/tf_nn/rescaling_data/split_var_x.csv +155 -0
  570. molSimplify/tf_nn/rescaling_data/split_var_y.csv +1 -0
  571. molSimplify/tf_nn/sc_static_clf/sc_static_clf_model.h5 +0 -0
  572. molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_name.csv +1591 -0
  573. molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_x.csv +1592 -0
  574. molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_y.csv +1592 -0
  575. molSimplify/tf_nn/sc_static_clf/sc_static_clf_vars.csv +154 -0
  576. molSimplify/tf_nn/split/split_model.h5 +0 -0
  577. molSimplify/tf_nn/split/split_model.json +1 -0
  578. molSimplify/tf_nn/split/split_vars.csv +155 -0
  579. molSimplify/tf_nn/split/split_x.csv +1902 -0
  580. molSimplify/tf_nn/split/split_y.csv +1902 -0
  581. molSimplify/tf_nn/split/train_names.csv +1901 -0
  582. molSimplify/utils/__init__.py +0 -0
  583. molSimplify/utils/decorators.py +16 -0
  584. molSimplify/utils/metaclasses.py +12 -0
  585. molSimplify/utils/tensorflow.py +23 -0
  586. molSimplify/utils/timer.py +16 -0
  587. molSimplify-1.7.4.dist-info/LICENSE +674 -0
  588. molSimplify-1.7.4.dist-info/METADATA +821 -0
  589. molSimplify-1.7.4.dist-info/RECORD +651 -0
  590. molSimplify-1.7.4.dist-info/WHEEL +5 -0
  591. molSimplify-1.7.4.dist-info/entry_points.txt +3 -0
  592. molSimplify-1.7.4.dist-info/top_level.txt +4 -0
  593. tests/generateTests.py +122 -0
  594. tests/helperFuncs.py +658 -0
  595. tests/informatics/test_MOF_descriptors.py +128 -0
  596. tests/informatics/test_active_learning.py +113 -0
  597. tests/informatics/test_coulomb_analyze.py +24 -0
  598. tests/informatics/test_graph_racs.py +193 -0
  599. tests/ml/test_kernels.py +20 -0
  600. tests/ml/test_layers.py +47 -0
  601. tests/runtest.py +10 -0
  602. tests/test_Mol2D.py +128 -0
  603. tests/test_basic_imports.py +62 -0
  604. tests/test_bidentate.py +25 -0
  605. tests/test_cli.py +20 -0
  606. tests/test_distgeom.py +106 -0
  607. tests/test_example_1.py +29 -0
  608. tests/test_example_3.py +31 -0
  609. tests/test_example_5.py +43 -0
  610. tests/test_example_7.py +28 -0
  611. tests/test_example_8.py +15 -0
  612. tests/test_example_tbp.py +15 -0
  613. tests/test_ff_xtb.py +111 -0
  614. tests/test_geocheck_oct.py +26 -0
  615. tests/test_geocheck_one_empty.py +15 -0
  616. tests/test_geometry.py +44 -0
  617. tests/test_inparse.py +76 -0
  618. tests/test_io.py +84 -0
  619. tests/test_jobgen.py +84 -0
  620. tests/test_joption_pythonic.py +27 -0
  621. tests/test_ligand_assign.py +58 -0
  622. tests/test_ligand_assign_consistent.py +60 -0
  623. tests/test_ligand_class.py +26 -0
  624. tests/test_ligand_from_mol_file.py +35 -0
  625. tests/test_ligands.py +86 -0
  626. tests/test_mol3D.py +337 -0
  627. tests/test_molcas_caspt2.py +15 -0
  628. tests/test_molcas_casscf.py +15 -0
  629. tests/test_old_ANNs.py +68 -0
  630. tests/test_orca_ccsdt.py +15 -0
  631. tests/test_orca_dft.py +15 -0
  632. tests/test_qcgen.py +50 -0
  633. tests/test_racs.py +124 -0
  634. tests/test_rmsd.py +68 -0
  635. tests/test_structgen_functions.py +198 -0
  636. tests/test_tetrahedral.py +29 -0
  637. tests/test_tutorial_10_part_one.py +16 -0
  638. tests/test_tutorial_10_part_two.py +15 -0
  639. tests/test_tutorial_2.py +11 -0
  640. tests/test_tutorial_3.py +15 -0
  641. tests/test_tutorial_4.py +57 -0
  642. tests/test_tutorial_6.py +10 -0
  643. tests/test_tutorial_8.py +29 -0
  644. tests/test_tutorial_9_part_one.py +15 -0
  645. tests/test_tutorial_9_part_two.py +15 -0
  646. tests/test_tutorial_qm9_part_one.py +6 -0
  647. tests/testresources/refs/racs/generate_references.py +85 -0
  648. workflows/NandyJACSAu2022/bridge_functionalizer.py +253 -0
  649. workflows/NandyJACSAu2022/frag_functionalizer.py +242 -0
  650. workflows/NandyJACSAu2022/fragment_classes.py +586 -0
  651. workflows/NandyJACSAu2022/macrocycle_synthesis.py +179 -0
@@ -0,0 +1,762 @@
1
+ # Written by JP Janet for HJK Group
2
+ # Dpt of Chemical Engineering, MIT
3
+
4
+ ##########################################################
5
+ ######## This script contains a neural network ##########
6
+ ##### trained on octahedral metal-ligand #######
7
+ ######## bond distances and spin propensity ###########
8
+ ##########################################################
9
+
10
+
11
+ import csv
12
+ import glob
13
+ import json
14
+ import os
15
+
16
+ import numpy as np
17
+ import pandas as pd
18
+ import scipy
19
+ from typing import List, Tuple, Union, Optional
20
+ from tensorflow.keras import backend as K
21
+ from tensorflow.keras.models import model_from_json, load_model
22
+ from importlib_resources import files as resource_files
23
+ from packaging import version
24
+ import tensorflow as tf
25
+
26
+ from molSimplify.python_nn.clf_analysis_tool import array_stack, get_layer_outputs, dist_neighbor, get_entropy
27
+
28
+
29
+ def perform_ANN_prediction(RAC_dataframe: pd.DataFrame, predictor_name: str,
30
+ RAC_column: str = 'RACs') -> pd.DataFrame:
31
+ # Performs a correctly normalized/rescaled prediction for a property specified by predictor_name.
32
+ # Also calculates latent vector and smallest latent distance from training data.
33
+ # RAC_dataframe can contain anything (e.g. a database pull) as long as it also contains the required RAC features.
34
+ # Predictor_name can be a name like ls_ii, hs_iii, homo, oxo, hat, etc.
35
+ # Input dataframe must have all RAC features in individual columns, or as dictionaries in a single column specified by `RAC_column`.
36
+ # Will not execute if RAC features are missing.
37
+
38
+ # Returns: RAC_dataframe with new columns added:
39
+ # - predictor_name_latent_vector
40
+ # - predictor_name_min_latent_distance,
41
+ # - predictor_name_prediction
42
+
43
+ assert type(RAC_dataframe) is pd.DataFrame
44
+ train_vars = load_ANN_variables(predictor_name)
45
+ train_mean_x, train_mean_y, train_var_x, train_var_y = load_normalization_data(predictor_name)
46
+ my_ANN = load_keras_ann(predictor_name)
47
+
48
+ # Check if any RAC elements are missing from the provided dataframe
49
+ missing_labels = [i for i in train_vars if i not in RAC_dataframe.columns]
50
+
51
+ if len(missing_labels) > 0:
52
+ # Try checking if there is anything in the column `RAC_column`. If so, deserialize it and re-run.
53
+ if RAC_column in RAC_dataframe.columns:
54
+ deserialized_RACs = pd.DataFrame.from_records(RAC_dataframe[RAC_column].values, index=RAC_dataframe.index.values)
55
+ deserialized_RACs = deserialized_RACs.astype(float)
56
+ RAC_dataframe = RAC_dataframe.join(deserialized_RACs)
57
+ return perform_ANN_prediction(RAC_dataframe, predictor_name, RAC_column='RACs')
58
+ else:
59
+ raise ValueError('Please supply missing variables in your RAC dataframe: %s' % missing_labels)
60
+ if 'alpha' in train_vars:
61
+ if any(RAC_dataframe.alpha > 1):
62
+ raise ValueError('Alpha is too large - should be between 0 and 1.')
63
+
64
+ RAC_subset_for_ANN = RAC_dataframe.loc[:, train_vars].astype(float)
65
+ normalized_input = data_normalize(RAC_subset_for_ANN, train_mean_x, train_var_x)
66
+ ANN_prediction = my_ANN.predict(normalized_input, verbose=0)
67
+ rescaled_output = data_rescale(ANN_prediction, train_mean_y, train_var_y)
68
+
69
+ # Get latent vectors for training data and queried data
70
+ train_x = pd.DataFrame(load_training_data(predictor_name), columns=train_vars).astype(float)
71
+ get_outputs = K.function([my_ANN.layers[0].input, K.learning_phase()],
72
+ [my_ANN.layers[len(my_ANN.layers) - 2].output])
73
+ normalized_train = data_normalize(train_x, train_mean_x, train_var_x)
74
+ training_latent = get_outputs([normalized_train, 0])[0]
75
+ query_latent = get_outputs([normalized_input, 0])[0]
76
+
77
+ # Append all results to dataframe
78
+ results_list = []
79
+ for i in range(len(RAC_dataframe)):
80
+ results_dict = {}
81
+ min_latent_distance = min(np.linalg.norm(training_latent - query_latent[i][:], axis=1))
82
+ results_dict['%s_latent_vector' % predictor_name] = query_latent[i]
83
+ results_dict['%s_min_latent_distance' % predictor_name] = min_latent_distance
84
+ output_value = rescaled_output[i]
85
+ if len(output_value) == 1: # squash array of length 1 to the value it contains
86
+ output_value = output_value[0]
87
+ results_dict['%s_prediction' % predictor_name] = output_value
88
+ results_list.append(results_dict)
89
+ results_df = pd.DataFrame(results_list, index=RAC_dataframe.index)
90
+ RAC_dataframe_with_results = RAC_dataframe.join(results_df)
91
+ return RAC_dataframe_with_results
92
+
93
+
94
+ def get_error_params(latent_distances, errors):
95
+ '''
96
+ Get the maximum-likelihood parameters for an error model N(a+b*(latent_distance)).
97
+ Inputs: latent_distances (vector), errors (vector)
98
+ Output: [a, b]
99
+ '''
100
+ def log_likelihood(params):
101
+ a = params[0]
102
+ b = params[1]
103
+ return -np.nansum(scipy.stats.norm.logpdf(errors, loc=0, scale=a+latent_distances*b))
104
+ results = scipy.optimize.minimize(log_likelihood, np.array([0.2, 0.01]), bounds=[(1e-9, None), (1e-9, None)])
105
+ return results.x
106
+
107
+
108
+ def matrix_loader(path: str, rownames: bool = False) -> Union[Tuple[List[List[str]], List[str]], List[List[str]]]:
109
+ # loads matrix with rowname option
110
+ path_to_file = resource_files("molSimplify.python_nn").joinpath(path.strip("/"))
111
+ if rownames:
112
+ with open(path_to_file, "r") as f:
113
+ csv_lines = list(csv.reader(f))
114
+ row_names = [row[0] for row in csv_lines]
115
+ mat = [row[1:] for row in csv_lines]
116
+ return mat, row_names
117
+ else:
118
+ with open(path_to_file, 'r') as csvfile:
119
+ lines = csv.reader(csvfile, delimiter=',')
120
+ mat = [a for a in lines]
121
+ return mat
122
+
123
+
124
+ def get_key(predictor: str, suffix: Optional[str] = None) -> str:
125
+ if suffix:
126
+ if predictor in ['ls_ii', 'hs_ii', 'ls_iii', 'hs_iii']:
127
+ key = 'geos/' + predictor + '_%s' % suffix
128
+ elif predictor in ['homo', 'gap']:
129
+ key = 'homolumo/' + predictor + '_%s' % suffix
130
+ elif predictor in ['oxo', 'hat']:
131
+ key = 'oxocatalysis/' + predictor + '_%s' % suffix
132
+ elif predictor in ['oxo20', 'homo_empty']:
133
+ key = 'oxoandhomo/' + predictor + '_%s' % suffix
134
+ elif predictor in ['geo_static_clf', 'sc_static_clf']:
135
+ key = predictor + '/' + predictor + '_%s' % suffix
136
+ else:
137
+ key = predictor + '/' + predictor + '_%s' % suffix
138
+ else:
139
+ if predictor in ['ls_ii', 'hs_ii', 'ls_iii', 'hs_iii']:
140
+ key = 'geos/'
141
+ elif predictor in ['homo', 'gap']:
142
+ key = 'homolumo/'
143
+ elif predictor in ['oxo', 'hat']:
144
+ key = 'oxocatalysis/'
145
+ elif predictor in ['oxo20', 'homo_empty']:
146
+ key = 'oxoandhomo/'
147
+ elif predictor in ['geo_static_clf', 'sc_static_clf']:
148
+ key = predictor + '/' + predictor + '_%s' % suffix
149
+ else:
150
+ key = predictor
151
+ return key
152
+
153
+
154
+ def data_rescale(scaled_dat, train_mean, train_var, debug=False) -> np.ndarray:
155
+ d = np.shape(train_mean)[0]
156
+ if debug:
157
+ print(('unnormalizing with number of dimensions = ' + str(d)))
158
+ dat = (np.multiply(scaled_dat.T, np.sqrt(train_var), ) + train_mean).T
159
+ return (dat)
160
+
161
+
162
+ def data_normalize(data, train_mean, train_var, debug=False) -> np.ndarray:
163
+ data = data.astype(float) # Make sure the data is always in float form
164
+ d = np.shape(train_mean)[0]
165
+ # ## double check the variance in the training data
166
+ delete_ind = list()
167
+
168
+ if debug:
169
+ print(('normalizing with number of dimensions = ' + str(d)))
170
+ print('shape of things in normalize:')
171
+ print(('data.shape ' + str(data.shape)))
172
+ print(('train_mean.shape ' + str(train_mean.shape)))
173
+ print(('train_mean.shape ' + str(train_var.shape)))
174
+ for idx, var in enumerate(np.squeeze(train_var)):
175
+ if var < 1e-16:
176
+ delete_ind.append(idx)
177
+ if len(delete_ind) > 0:
178
+ print(('Note: There are %d features with a variance smaller than 1e-16.' % len(delete_ind)))
179
+ print('Please double check your input data if this number is not what you expect...')
180
+ data = np.delete(data, delete_ind, axis=1)
181
+ train_mean = np.delete(train_mean, delete_ind, axis=0)
182
+ train_var = np.delete(train_var, delete_ind, axis=0)
183
+
184
+ scaled_dat = np.divide((data.T - train_mean), np.sqrt(train_var), ).T
185
+ return scaled_dat
186
+
187
+
188
+ def load_normalization_data(name: str):
189
+ train_mean_x = list()
190
+ path_to_file = resource_files("molSimplify.tf_nn").joinpath(f'rescaling_data/{name}_mean_x.csv')
191
+ if os.path.isfile(path_to_file):
192
+ with open(path_to_file, 'r') as f:
193
+ for lines in f.readlines():
194
+ train_mean_x.append([float(lines.strip().strip('[]'))])
195
+
196
+ train_var_x = list()
197
+ path_to_file = resource_files("molSimplify.tf_nn").joinpath(f'rescaling_data/{name}_var_x.csv')
198
+ with open(path_to_file, 'r') as f:
199
+ for lines in f.readlines():
200
+ train_var_x.append([float(lines.strip().strip('[]'))])
201
+
202
+ train_mean_y = list()
203
+ path_to_file = resource_files("molSimplify.tf_nn").joinpath(f'rescaling_data/{name}_mean_y.csv')
204
+ with open(path_to_file, 'r') as f:
205
+ for lines in f.readlines():
206
+ train_mean_y.append([float(lines.strip().strip('[]'))])
207
+ train_var_y = list()
208
+ path_to_file = resource_files("molSimplify.tf_nn").joinpath(f'rescaling_data/{name}_var_y.csv')
209
+ with open(path_to_file, 'r') as f:
210
+ for lines in f.readlines():
211
+ train_var_y.append([float(lines.strip().strip('[]'))])
212
+ else:
213
+ print('---Mean and Variance information do not exist. Calculate from training data...---')
214
+ train_mean_x, train_mean_y, train_var_x, train_var_y = get_data_mean_std(predictor=name)
215
+
216
+ return np.array(train_mean_x), np.array(train_mean_y), np.array(train_var_x), np.array(train_var_y)
217
+
218
+
219
+ def get_data_mean_std(predictor: str):
220
+ if predictor in ['ls_ii', 'hs_ii', 'ls_iii', 'hs_iii']:
221
+ key = 'geos/' + predictor + '_bl_x'
222
+ elif predictor in ['homo', 'gap']:
223
+ key = 'homolumo/' + predictor + '_train_x'
224
+ elif predictor in ['oxo', 'hat']:
225
+ key = 'oxocatalysis/' + predictor + '_train_x'
226
+ elif predictor in ['oxo20', 'homo_empty']:
227
+ key = 'oxoandhomo/' + predictor + '_train_x'
228
+ elif predictor == "split":
229
+ key = predictor + '/' + predictor + '_x'
230
+ elif predictor in ['geo_static_clf', 'sc_static_clf']:
231
+ key = f'{predictor}/{predictor}_train_x'
232
+ else:
233
+ key = predictor + '/' + predictor + '_x'
234
+ path_to_feature_file = resource_files("molSimplify.tf_nn").joinpath(f'{key}.csv')
235
+ df_feature = pd.read_csv(path_to_feature_file)
236
+ train_mean_x, train_var_x = list(), list()
237
+ for col in df_feature:
238
+ train_mean_x.append([np.mean(np.array(df_feature[col]))])
239
+ train_var_x.append([np.var(np.array(df_feature[col]))])
240
+ ### labels
241
+ if predictor in ['ls_ii', 'hs_ii', 'ls_iii', 'hs_iii']:
242
+ key = 'geos/' + predictor + '_bl_y'
243
+ elif predictor in ['homo', 'gap']:
244
+ key = 'homolumo/' + predictor + '_train_y'
245
+ elif predictor in ['oxo', 'hat']:
246
+ key = 'oxocatalysis/' + predictor + '_train_y'
247
+ elif predictor in ['oxo20', 'homo_empty']:
248
+ key = 'oxoandhomo/' + predictor + '_train_y'
249
+ elif predictor == "split":
250
+ key = predictor + '/' + predictor + '_y'
251
+ elif predictor in ['geo_static_clf', 'sc_static_clf']:
252
+ key = f'{predictor}/{predictor}_train_y'
253
+ else:
254
+ key = predictor + '/' + predictor + '_y'
255
+ path_to_label_file = resource_files("molSimplify.tf_nn").joinpath(f'{key}.csv')
256
+ df_label = pd.read_csv(path_to_label_file)
257
+ train_mean_y, train_var_y = list(), list()
258
+ for col in df_label:
259
+ train_mean_y.append([np.mean(np.array(df_label[col]))])
260
+ train_var_y.append([np.var(np.array(df_label[col]))])
261
+ return train_mean_x, train_mean_y, train_var_x, train_var_y
262
+
263
+
264
+ def load_ANN_variables(predictor: str, suffix: str = 'vars') -> List[str]:
265
+ key = get_key(predictor, suffix)
266
+ path_to_file = resource_files("molSimplify.tf_nn").joinpath(f'{key}.csv')
267
+ names = []
268
+ with open(path_to_file, 'r') as f:
269
+ for lines in f.readlines():
270
+ names.append(lines.strip())
271
+ return names
272
+
273
+
274
+ def load_training_data(predictor: str) -> List[List[str]]:
275
+ if predictor in ['ls_ii', 'hs_ii', 'ls_iii', 'hs_iii']:
276
+ key = 'geos/' + predictor + '_bl_x'
277
+ elif predictor in ['homo', 'gap']:
278
+ key = 'homolumo/' + predictor + '_train_x'
279
+ elif predictor in ['oxo', 'hat']:
280
+ key = 'oxocatalysis/' + predictor + '_train_x'
281
+ elif predictor in ['oxo20', 'homo_empty']:
282
+ key = 'oxoandhomo/' + predictor + '_train_x'
283
+ elif predictor == "split":
284
+ key = predictor + '/' + predictor + '_x'
285
+ elif predictor in ['geo_static_clf', 'sc_static_clf']:
286
+ key = predictor + '/' + predictor + '_train_x'
287
+ else:
288
+ key = predictor + '/' + predictor + '_x'
289
+ path_to_file = resource_files("molSimplify.tf_nn").joinpath(f'{key}.csv')
290
+ with open(path_to_file, "r") as f:
291
+ csv_lines = list(csv.reader(f))
292
+ # row_names = [row[0] for row in csv_lines]
293
+ mat = [row for row in csv_lines[1:]]
294
+ return mat
295
+
296
+
297
+ def load_latent_training_data(predictor):
298
+ ##### CURRENTLY LATENT TRAINING DATA NOT AVAIL
299
+ if predictor in ['ls_ii', 'hs_ii', 'ls_iii', 'hs_iii']:
300
+ key = 'geos/' + predictor + '_latent_bl_x'
301
+ elif predictor in ['homo', 'gap']:
302
+ key = 'homolumo/' + predictor + '_latent_train_x'
303
+ elif predictor in ['oxo', 'hat']:
304
+ key = 'oxocatalysis/' + predictor + '_latent_train_x'
305
+ elif predictor in ['oxo20', 'homo_empty']:
306
+ key = 'oxoandhomo/' + predictor + '_latent_train_x'
307
+ elif predictor == "split":
308
+ key = predictor + '/' + predictor + '_latent_x_41_OHE'
309
+ elif predictor in ['geo_static_clf', 'sc_static_clf']:
310
+ key = predictor + '/' + predictor + '_latent_train_x'
311
+ else:
312
+ key = predictor + '/' + predictor + '_latent_x_OHE'
313
+ path_to_file = resource_files("molSimplify.tf_nn").joinpath(f'{key}.csv')
314
+ with open(path_to_file, "r") as f:
315
+ csv_lines = list(csv.reader(f))
316
+ # row_names = [row[0] for row in csv_lines]
317
+ mat = [row for row in csv_lines[1:]]
318
+ return mat
319
+
320
+
321
+ def load_test_data(predictor):
322
+ if predictor in ['ls_ii', 'hs_ii', 'ls_iii', 'hs_iii']:
323
+ key = 'geos/' + predictor + '_bl_x' # Note, this test data is not available, will return train.
324
+ elif predictor in ['homo', 'gap']:
325
+ key = 'homolumo/' + predictor + '_test_x'
326
+ elif predictor in ['oxo', 'hat']:
327
+ key = 'oxocatalysis/' + predictor + '_test_x'
328
+ elif predictor == "split":
329
+ key = predictor + '/' + predictor + '_x' # Note, this test data is not available, will return train
330
+ elif predictor in ['geo_static_clf', 'sc_static_clf']:
331
+ key = predictor + '/' + predictor + '_test_x'
332
+ else:
333
+ key = predictor + '/' + predictor + '_x'
334
+ path_to_file = resource_files("molSimplify.tf_nn").joinpath(f'{key}.csv')
335
+ with open(path_to_file, "r") as f:
336
+ csv_lines = list(csv.reader(f))
337
+ # row_names = [row[0] for row in csv_lines]
338
+ mat = [row for row in csv_lines[1:]]
339
+ return mat
340
+
341
+
342
+ def load_training_labels(predictor: str) -> List[List[str]]:
343
+ if predictor in ['ls_ii', 'hs_ii', 'ls_iii', 'hs_iii']:
344
+ key = 'geos/' + predictor + '_bl_y'
345
+ elif predictor in ['homo', 'gap']:
346
+ key = 'homolumo/' + predictor + '_train_y'
347
+ elif predictor in ['oxo', 'hat']:
348
+ key = 'oxocatalysis/' + predictor + '_train_y'
349
+ elif predictor in ['oxo20', 'homo_empty']:
350
+ key = 'oxoandhomo/' + predictor + '_train_y'
351
+ elif predictor == "split":
352
+ key = predictor + '/' + predictor + '_y'
353
+ elif predictor in ['geo_static_clf', 'sc_static_clf']:
354
+ key = predictor + '/' + predictor + '_train_y'
355
+ else:
356
+ key = predictor + '/' + predictor + '_y'
357
+ path_to_file = resource_files("molSimplify.tf_nn").joinpath(f'{key}.csv')
358
+ with open(path_to_file, "r") as f:
359
+ csv_lines = list(csv.reader(f))
360
+ # row_names = [row[0] for row in csv_lines]
361
+ mat = [row for row in csv_lines[1:]]
362
+ return mat
363
+
364
+
365
+ def load_test_labels(predictor: str) -> List[List[str]]:
366
+ if predictor in ['ls_ii', 'hs_ii', 'ls_iii', 'hs_iii']:
367
+ key = 'geos/' + predictor + '_bl_y'
368
+ elif predictor in ['homo', 'gap']:
369
+ key = 'homolumo/' + predictor + '_test_y'
370
+ elif predictor in ['oxo', 'hat']:
371
+ key = 'oxocatalysis/' + predictor + '_test_y'
372
+ elif predictor in ['oxo20', 'homo_empty']:
373
+ key = 'oxoandhomo/' + predictor + '_test_y'
374
+ elif predictor == "split":
375
+ key = predictor + '/' + predictor + '_y'
376
+ elif predictor in ['geo_static_clf', 'sc_static_clf']:
377
+ key = predictor + '/' + predictor + '_test_y'
378
+ else:
379
+ key = predictor + '/' + predictor + '_y'
380
+ path_to_file = resource_files("molSimplify.tf_nn").joinpath(f'{key}.csv')
381
+ with open(path_to_file, "rU") as f:
382
+ csv_lines = list(csv.reader(f))
383
+ # row_names = [row[0] for row in csv_lines]
384
+ mat = [row for row in csv_lines[1:]]
385
+ return mat
386
+
387
+
388
+ def load_train_info(predictor: str, suffix: str = 'info') -> dict:
389
+ key = get_key(predictor, suffix)
390
+ path_to_file = resource_files("molSimplify.tf_nn").joinpath(f'{key}.json')
391
+ with open(path_to_file, 'r') as json_file:
392
+ loaded_info_dict = json.loads(json_file.read())
393
+ return loaded_info_dict
394
+
395
+
396
+ def load_keras_ann(predictor: str, suffix: str = 'model', compile: bool = False) -> tf.keras.Model:
397
+ # this function loads the ANN for property
398
+ # "predcitor"
399
+ # disable TF output text to reduce console spam
400
+ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
401
+ key = get_key(predictor, suffix)
402
+ if "clf" not in predictor:
403
+ path_to_file = resource_files("molSimplify.tf_nn").joinpath(f'{key}.json')
404
+ with open(path_to_file, 'r') as json_file:
405
+ loaded_model_json = json_file.read()
406
+ loaded_model = model_from_json(loaded_model_json)
407
+ # load weights into model
408
+ path_to_file = resource_files("molSimplify.tf_nn").joinpath(f'{key}.h5')
409
+ loaded_model.load_weights(path_to_file)
410
+ else:
411
+ path_to_file = resource_files("molSimplify.tf_nn").joinpath(f'{key}.h5')
412
+ loaded_model = load_model(path_to_file)
413
+ if compile:
414
+ from tensorflow.keras.optimizers.legacy import Adam
415
+ if predictor == 'homo':
416
+ loaded_model.compile(loss="mse", optimizer=Adam(beta_2=1 - 0.0016204733101599046, beta_1=0.8718839135783554,
417
+ decay=7.770243145972892e-05, lr=0.0004961686075897741),
418
+ metrics=['mse', 'mae', 'mape'])
419
+ elif predictor == 'gap':
420
+ loaded_model.compile(loss="mse", optimizer=Adam(beta_2=1 - 0.00010929248596488832, beta_1=0.8406735969305784,
421
+ decay=0.00011224350434148253, lr=0.0006759924688701965),
422
+ metrics=['mse', 'mae', 'mape'])
423
+ elif predictor in ['oxo', 'hat']:
424
+ # loaded_model.compile(loss="mse", optimizer=Adam(beta_2=0.9637165412871632, beta_1=0.7560951483268549,
425
+ # decay=0.0006651401379502965, lr=0.0007727366541920176),
426
+ # metrics=['mse', 'mae', 'mape']) #decomissioned on 06/20/2019 by Aditya. Using hyperparams from oxo20.
427
+ loaded_model.compile(loss="mse", optimizer=Adam(lr=0.0012838133056087084, beta_1=0.9811686522122317,
428
+ beta_2=0.8264616523572279, decay=0.0005114008091318582),
429
+ metrics=['mse', 'mae', 'mape'])
430
+ elif predictor == 'oxo20':
431
+ loaded_model.compile(loss="mse", optimizer=Adam(lr=0.0012838133056087084, beta_1=0.9811686522122317,
432
+ beta_2=0.8264616523572279, decay=0.0005114008091318582),
433
+ metrics=['mse', 'mae', 'mape'])
434
+ elif predictor == 'homo_empty':
435
+ loaded_model.compile(loss="mse", optimizer=Adam(lr=0.006677578283098809, beta_1=0.8556594887870226,
436
+ beta_2=0.9463468021275508, decay=0.0006621877134674607),
437
+ metrics=['mse', 'mae', 'mape'])
438
+
439
+ elif predictor in ['geo_static_clf', 'sc_static_clf']:
440
+ loaded_model.compile(loss='binary_crossentropy',
441
+ optimizer=Adam(lr=0.00005, beta_1=0.95, decay=0.0001, amsgrad=True),
442
+ metrics=['accuracy'])
443
+ else:
444
+ loaded_model.compile(loss="mse", optimizer='adam',
445
+ metrics=['mse', 'mae', 'mape'])
446
+ # print("Keras/tf model loaded for " + str(predictor) + " from disk")
447
+ return loaded_model
448
+
449
+
450
+ def tf_ANN_excitation_prepare(predictor: str, descriptors: List[float], descriptor_names: List[str]) -> np.ndarray:
451
+ ## this function reforms the provided list of descriptors and their
452
+ ## names to match the expectations of the target ANN model.
453
+ ## it does NOT perfrom standardization
454
+
455
+ ## get variable names
456
+ target_names = load_ANN_variables(predictor)
457
+ if len(target_names) > len(descriptors):
458
+ print('Error: preparing features for ' + str(predictor) + ', recieved '
459
+ + str(len(descriptors)) + ' descriptors')
460
+ print(('model requires ' + str(len(target_names)) + ' descriptors, attempting match'))
461
+ excitation = []
462
+ for var_name in target_names:
463
+ try:
464
+ excitation.append(descriptors[descriptor_names.index(var_name)])
465
+ except ValueError:
466
+ print(('looking for ' + str(var_name)))
467
+ print(('Error! variable ' + str(var_name) + ' not found!'))
468
+ break
469
+ output = np.array(excitation)
470
+ output = np.reshape(output, (1, len(target_names)))
471
+ return output
472
+
473
+
474
+ def ANN_supervisor(predictor: str,
475
+ descriptors: List[float],
476
+ descriptor_names: List[str],
477
+ debug: bool = False) -> Tuple[np.ndarray, np.ndarray]:
478
+ if debug:
479
+ print(('ANN activated for ' + str(predictor)))
480
+
481
+ ## form the excitation in the corrrect order/variables
482
+ excitation = tf_ANN_excitation_prepare(predictor, descriptors, descriptor_names)
483
+ if debug:
484
+ print(('excitation is ' + str(excitation.shape)))
485
+ print('fetching non-dimensionalization data... ')
486
+ # sardines
487
+ train_mean_x, train_mean_y, train_var_x, train_var_y = load_normalization_data(predictor)
488
+ if debug:
489
+ print('rescaling input excitation...')
490
+
491
+ excitation = data_normalize(excitation, train_mean_x, train_var_x, debug=debug)
492
+
493
+ ## fetch ANN
494
+ loaded_model = load_keras_ann(predictor)
495
+ result = data_rescale(loaded_model.predict(excitation, verbose=0), train_mean_y, train_var_y, debug=debug)
496
+ if "clf" not in predictor:
497
+ if debug:
498
+ print(('LOADED MODEL HAS ' + str(
499
+ len(loaded_model.layers)) + ' layers, so latent space measure will be from first ' + str(
500
+ len(loaded_model.layers) - 1) + ' layers'))
501
+ if not version.parse(tf.__version__) >= version.parse('2.0.0'):
502
+ get_outputs = K.function([loaded_model.layers[0].input, K.learning_phase()],
503
+ [loaded_model.layers[len(loaded_model.layers) - 2].output])
504
+ latent_space_vector = get_outputs([excitation, 0]) # Using test phase.
505
+ else:
506
+ latent_space_vector = get_layer_outputs(loaded_model, len(loaded_model.layers) - 2,
507
+ excitation, training_flag=False)
508
+ if debug:
509
+ print('calling ANN model...')
510
+ else:
511
+ latent_space_vector = find_clf_lse(predictor, excitation, loaded_model=loaded_model, ensemble=False,
512
+ modelname=None, debug=debug)
513
+ return result, latent_space_vector
514
+
515
+
516
+ def find_true_min_eu_dist(predictor: str,
517
+ descriptors: List[float],
518
+ descriptor_names: List[str],
519
+ debug: bool = False) -> float:
520
+ # returns scaled euclidean distance to nearest trainning
521
+ # vector in desciptor space
522
+ train_mean_x, train_mean_y, train_var_x, train_var_y = load_normalization_data(predictor)
523
+
524
+ ## form the excitation in the corrrect order/variables
525
+ excitation = tf_ANN_excitation_prepare(predictor, descriptors, descriptor_names)
526
+ excitation = excitation.astype(float) # ensure that the excitation is a float, and not strings
527
+ scaled_excitation = data_normalize(excitation, train_mean_x, train_var_x, debug=debug) # normalize the excitation
528
+ ## getting train matrix info
529
+ mat = load_training_data(predictor)
530
+ train_mat = np.array(mat, dtype='float64')
531
+ ## loop over rows
532
+ min_dist = np.inf
533
+ min_ind = 0
534
+ for i, rows in enumerate(train_mat):
535
+ scaled_row = np.squeeze(
536
+ data_normalize(rows, train_mean_x.T, train_var_x.T, debug=debug)) # Normalizing the row before finding the distance
537
+ this_dist = float(np.linalg.norm(np.subtract(scaled_row, np.array(scaled_excitation)))) # Cast to float for mypy typing
538
+ if this_dist < min_dist:
539
+ min_dist = this_dist
540
+ min_ind = i
541
+ # best_row = rownames[i]
542
+ # min_row = rows
543
+
544
+ if debug:
545
+ print(('min dist EU is ' + str(min_dist)))
546
+ folder_dict = {'homo': 'homolumo', 'gap': 'homolumo',
547
+ 'oxo': 'oxocatalysis', 'hat': 'oxocatalysis',
548
+ 'oxo20': 'oxoandhomo', 'homo_empty': 'oxoandhomo'}
549
+ if predictor in folder_dict:
550
+ key = f'{folder_dict[predictor]}/{predictor}_train_names'
551
+ path_to_file = resource_files("molSimplify.tf_nn").joinpath(f'{key}.csv')
552
+ with open(path_to_file, "r") as f:
553
+ csv_lines = list(csv.reader(f))
554
+ print(('Closest Euc Dist Structure: ' + str(csv_lines[min_ind]).strip('[]') + ' for predictor ' + str(
555
+ predictor)))
556
+ # need to get normalized distances
557
+
558
+ ########################################################################################
559
+ # Changed by Aditya on 08/13/2018. Previously, nearest neighbor was being found in the #
560
+ # unnormalized space, and then that was normalized. This was resulting in bad nearest #
561
+ # neighbor candidate structures. Now routine normalizes before finding the distance. #
562
+ ########################################################################################
563
+
564
+ # train_mean_x,train_mean_y,train_var_x,train_var_y = load_normalization_data(predictor)
565
+
566
+ # flatten min row
567
+ # min_row = np.reshape(min_row, excitation.shape)
568
+ # scaled_excitation = data_normalize(excitation,train_mean_x,train_var_x)
569
+ # scaled_row = data_normalize(min_row,train_mean_x,train_var_x)
570
+ # min_dist = np.linalg.norm(np.subtract(scaled_row,(scaled_excitation)))
571
+ return (min_dist)
572
+
573
+
574
+ def find_ANN_10_NN_normalized_latent_dist(predictor, latent_space_vector, debug=False):
575
+ # returns scaled euclidean distance to nearest trainning
576
+ # vector in desciptor space
577
+
578
+ # average_train_train_10NN = {'homo_empty': 0.43517572, 'oxo20': 0.068675719}
579
+ train_mean_x, train_mean_y, train_var_x, train_var_y = load_normalization_data(predictor)
580
+
581
+ ## getting train matrix info
582
+ mat = load_training_data(predictor)
583
+ train_mat = np.array(mat, dtype='float64')
584
+
585
+ loaded_model = load_keras_ann(predictor)
586
+ if debug:
587
+ print('measuring latent distances:')
588
+ print(('loaded model has ' + str(
589
+ len(loaded_model.layers)) + ' layers, so latent space measure will be from first ' + str(
590
+ len(loaded_model.layers) - 1) + ' layers'))
591
+ norm_train_mat = []
592
+ for i, row in enumerate(train_mat):
593
+ row = np.array(row)
594
+ scaled_excitation = data_normalize(row, train_mean_x.T, train_var_x.T)
595
+ norm_train_mat.append(scaled_excitation)
596
+ norm_train_mat = np.squeeze(np.array(norm_train_mat))
597
+ loaded_model = load_keras_ann(predictor)
598
+ if not version.parse(tf.__version__) >= version.parse('2.0.0'):
599
+ get_outputs = K.function([loaded_model.layers[0].input, K.learning_phase()],
600
+ [loaded_model.layers[len(loaded_model.layers) - 2].output])
601
+ latent_space_train = np.squeeze(np.array(get_outputs([norm_train_mat, 0])))
602
+ else:
603
+ latent_space_train = get_layer_outputs(loaded_model, len(loaded_model.layers) - 2,
604
+ norm_train_mat, training_flag=False)
605
+ latent_space_train = np.squeeze(np.array(latent_space_train))
606
+ dist_array = np.linalg.norm(np.subtract(np.squeeze(latent_space_train), np.squeeze(latent_space_vector)), axis=1)
607
+ # train_dist_array = np.linalg.norm(np.subtract(np.squeeze(latent_space_train), np.squeeze(latent_space_train)),axis=1)
608
+ from scipy.spatial import distance_matrix
609
+ train_dist_array = distance_matrix(latent_space_train, latent_space_train)
610
+ nearest_10_NN_train = []
611
+ for j, train_row in enumerate(train_dist_array):
612
+ nearest_10_NN_train.append(np.sort(np.squeeze(train_row))[0:10])
613
+ nearest_10_NN_train = np.array(nearest_10_NN_train)
614
+ avg_traintrain = np.mean(nearest_10_NN_train)
615
+ sorted_dist = np.sort(np.squeeze(dist_array))
616
+ avg_10_NN_dist = np.mean(sorted_dist[0:10])
617
+ norm_avg_10_NN_dist = avg_10_NN_dist/avg_traintrain
618
+ return norm_avg_10_NN_dist, avg_10_NN_dist, avg_traintrain
619
+
620
+
621
+ def find_ANN_latent_dist(predictor, latent_space_vector, debug=False):
622
+ # returns scaled euclidean distance to nearest trainning
623
+ # vector in desciptor space
624
+ train_mean_x, train_mean_y, train_var_x, train_var_y = load_normalization_data(predictor)
625
+
626
+ ## getting train matrix info
627
+ mat = load_training_data(predictor)
628
+ train_mat = np.array(mat, dtype='float64')
629
+ ## loop over rows
630
+ min_dist = 100000000
631
+ min_ind = 0
632
+
633
+ loaded_model = load_keras_ann(predictor)
634
+
635
+ if debug:
636
+ print('measuring latent distances:')
637
+ print(('loaded model has ' + str(
638
+ len(loaded_model.layers)) + ' layers, so latent space measure will be from first ' + str(
639
+ len(loaded_model.layers) - 1) + ' layers'))
640
+ if not version.parse(tf.__version__) >= version.parse('2.0.0'):
641
+ get_outputs = K.function([loaded_model.layers[0].input, K.learning_phase()],
642
+ [loaded_model.layers[len(loaded_model.layers) - 2].output])
643
+ for i, rows in enumerate(train_mat):
644
+ scaled_row = np.squeeze(
645
+ data_normalize(rows, train_mean_x.T, train_var_x.T, debug=debug)) # Normalizing the row before finding the distance
646
+ if not version.parse(tf.__version__) >= version.parse('2.0.0'):
647
+ latent_train_row = get_outputs([np.array([scaled_row]), 0])
648
+ else:
649
+ latent_train_row = get_layer_outputs(loaded_model, len(loaded_model.layers) - 2,
650
+ [np.array([scaled_row])], training_flag=False)
651
+ this_dist = np.linalg.norm(np.subtract(np.squeeze(latent_train_row), np.squeeze(latent_space_vector)))
652
+ if this_dist < min_dist:
653
+ min_dist = this_dist
654
+ min_ind = i
655
+
656
+ # flatten min row
657
+ if debug:
658
+ print(('min dist is ' + str(min_dist) + ' at ' + str(min_ind)))
659
+ folder_dict = {'homo': 'homolumo', 'gap': 'homolumo',
660
+ 'oxo': 'oxocatalysis', 'hat': 'oxocatalysis',
661
+ 'oxo20': 'oxoandhomo', 'homo_empty': 'oxoandhomo'}
662
+ if predictor in folder_dict:
663
+ key = f'{folder_dict[predictor]}/{predictor}_train_names'
664
+ path_to_file = resource_files("molSimplify.tf_nn").joinpath(f'{key}.csv')
665
+ with open(path_to_file, "r") as f:
666
+ csv_lines = list(csv.reader(f))
667
+ print(('Closest Latent Dist Structure: ' + str(csv_lines[min_ind]) + ' for predictor ' + str(predictor)))
668
+ return (min_dist)
669
+
670
+
671
+ def find_clf_lse(predictor: str,
672
+ excitation,
673
+ loaded_model,
674
+ ensemble: bool = False,
675
+ modelname: Optional[str] = None,
676
+ debug: bool = False) -> np.ndarray:
677
+ if modelname is None:
678
+ modelname = "spectro"
679
+ if predictor == "geo_static_clf":
680
+ avrg_latent_dist = 33.21736244173539
681
+ elif predictor == "sc_static_clf":
682
+ avrg_latent_dist = 38.276809428032685
683
+ else:
684
+ print("Unknown model type")
685
+ return np.zeros_like(excitation)
686
+ key = get_key(predictor, suffix='')
687
+ base_path = resource_files("molSimplify.tf_nn").joinpath(key)
688
+ train_mean_x, train_mean_y, train_var_x, train_var_y = load_normalization_data(predictor)
689
+ labels_train = np.array(load_training_labels(predictor), dtype='int')
690
+ fmat_train = np.array(load_training_data(predictor), dtype='float64')
691
+ fmat_train = data_normalize(fmat_train, train_mean_x, train_var_x, debug=debug)
692
+ fmat_train = np.array(fmat_train)
693
+ if not ensemble:
694
+ # model = base_path + 'model.h5'
695
+ # loaded_model = load_model(model)
696
+ train_latent = get_layer_outputs(loaded_model, -4, fmat_train, training_flag=False)
697
+ test_latent = get_layer_outputs(loaded_model, -4, excitation, training_flag=False)
698
+ nn_latent_dist_test, nn_dists, nn_labels = dist_neighbor(test_latent, train_latent, labels_train,
699
+ l=5, dist_ref=avrg_latent_dist)
700
+ lse = get_entropy(nn_dists, nn_labels)
701
+ else:
702
+ print("Using ensemble averaged LSE.")
703
+ base_path = base_path + 'ensemble_%s/' % modelname
704
+ model_list = sorted(glob.glob(base_path + '/*.h5'))
705
+ if len(model_list) != 10:
706
+ print(key)
707
+ print(base_path)
708
+ print(model_list)
709
+ print(("Error: LSE cannot be calculated with modelname %s--The number of models is wrong." % modelname))
710
+ return np.zeros_like(excitation)
711
+ fmat_train_split = np.array_split(fmat_train, 10, axis=0)
712
+ labels_train_split = np.array_split(labels_train, 10, axis=0)
713
+ entropies_list = []
714
+ for model in model_list:
715
+ print(model)
716
+ loaded_model = load_model(model)
717
+ model_idx = int(model.split("/")[-1].split(".")[0].split("_")[-1])
718
+ _fmat_train = array_stack(fmat_train_split, model_idx)
719
+ _labels_train = array_stack(labels_train_split, model_idx)
720
+ train_latent = get_layer_outputs(loaded_model, -4, _fmat_train, training_flag=False)
721
+ test_latent = get_layer_outputs(loaded_model, -4, excitation, training_flag=False)
722
+ nn_latent_dist_train, _, __ = dist_neighbor(train_latent, train_latent, _labels_train,
723
+ l=5, dist_ref=1)
724
+ avrg_latent_dist = np.mean(nn_latent_dist_train)
725
+ nn_latent_dist_test, nn_dists, nn_labels = dist_neighbor(test_latent, train_latent, _labels_train,
726
+ l=5, dist_ref=avrg_latent_dist)
727
+ entropies = get_entropy(nn_dists, nn_labels)
728
+ entropies_list.append(entropies)
729
+ lse = np.mean(np.array(entropies_list), axis=0)
730
+ return lse
731
+
732
+
733
+ def save_model(model: tf.keras.Model, predictor: str,
734
+ num: Optional[int] = None, suffix: Optional[str] = None):
735
+ key = get_key(predictor, suffix)
736
+ base_path = resource_files("molSimplify.tf_nn").joinpath(key)
737
+ base_path = base_path + 'ensemble_models'
738
+ if not os.path.exists(base_path):
739
+ os.makedirs(base_path)
740
+ if num is not None:
741
+ name = '%s/%s_%d' % (base_path, predictor, num)
742
+ else:
743
+ name = '%s/%s' % (base_path, predictor)
744
+ # serialize model to JSON
745
+ model_json = model.to_json()
746
+ with open("%s.json" % name, "w") as json_file:
747
+ json_file.write(model_json)
748
+ # serialize weights to HDF5
749
+ model.save_weights("%s.h5" % name)
750
+ print(("Saved model !%s! to disk" % name.split('/')[-1]))
751
+
752
+
753
+ def initialize_model_weights(model: tf.keras.Model) -> tf.keras.Model:
754
+ session = K.get_session()
755
+ for layer in model.layers:
756
+ for v in layer.__dict__:
757
+ v_arg = getattr(layer, v)
758
+ if hasattr(v_arg, 'initializer'):
759
+ initializer_method = getattr(v_arg, 'initializer')
760
+ initializer_method.run(session=session)
761
+ # print('reinitializing layer {}.{}'.format(layer.name, v))
762
+ return model