molSimplify 1.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (651) hide show
  1. docs/source/conf.py +224 -0
  2. molSimplify/Classes/__init__.py +6 -0
  3. molSimplify/Classes/atom3D.py +235 -0
  4. molSimplify/Classes/dft_obs.py +130 -0
  5. molSimplify/Classes/globalvars.py +827 -0
  6. molSimplify/Classes/helpers.py +161 -0
  7. molSimplify/Classes/ligand.py +2330 -0
  8. molSimplify/Classes/mGUI.py +2493 -0
  9. molSimplify/Classes/mWidgets.py +438 -0
  10. molSimplify/Classes/miniGUI.py +41 -0
  11. molSimplify/Classes/mol2D.py +260 -0
  12. molSimplify/Classes/mol3D.py +5846 -0
  13. molSimplify/Classes/monomer3D.py +253 -0
  14. molSimplify/Classes/partialcharges.py +226 -0
  15. molSimplify/Classes/protein3D.py +1178 -0
  16. molSimplify/Classes/rundiag.py +151 -0
  17. molSimplify/Data/ML.dat +212 -0
  18. molSimplify/Data/MLS_FSR_for_inter.dat +23 -0
  19. molSimplify/Data/MLS_FSR_for_inter2.dat +23 -0
  20. molSimplify/Data/MLS_angle_for_click.dat +8 -0
  21. molSimplify/Data/MLS_angle_for_inter.dat +23 -0
  22. molSimplify/Data/MLS_angle_for_inter2.dat +48 -0
  23. molSimplify/Data/MLS_angle_for_intra.dat +10 -0
  24. molSimplify/Data/MLS_angle_for_intra2.dat +6 -0
  25. molSimplify/Data/MLS_angle_for_oa.dat +18 -0
  26. molSimplify/Data/ML_FSR_for_inter.dat +112 -0
  27. molSimplify/Data/ML_FSR_for_inter2.dat +110 -0
  28. molSimplify/Data/ML_bond_for_cat.dat +8 -0
  29. molSimplify/Data/ML_bond_for_click.dat +8 -0
  30. molSimplify/Data/ML_bond_for_inter.dat +48 -0
  31. molSimplify/Data/ML_bond_for_inter2.dat +48 -0
  32. molSimplify/Data/ML_bond_for_intra.dat +10 -0
  33. molSimplify/Data/ML_bond_for_intra2.dat +6 -0
  34. molSimplify/Data/ML_bond_for_oa.dat +18 -0
  35. molSimplify/Data/bp1.dat +21 -0
  36. molSimplify/Data/li.dat +3 -0
  37. molSimplify/Data/no.dat +2 -0
  38. molSimplify/Data/oct.dat +7 -0
  39. molSimplify/Data/pbp.dat +8 -0
  40. molSimplify/Data/spy.dat +6 -0
  41. molSimplify/Data/sqap.dat +9 -0
  42. molSimplify/Data/sqp.dat +5 -0
  43. molSimplify/Data/tbp.dat +6 -0
  44. molSimplify/Data/tdhd.dat +9 -0
  45. molSimplify/Data/thd.dat +5 -0
  46. molSimplify/Data/tpl.dat +4 -0
  47. molSimplify/Data/tpr.dat +7 -0
  48. molSimplify/Informatics/HFXsensitivity/__init__.py +0 -0
  49. molSimplify/Informatics/HFXsensitivity/measure_HFX_sensitivity_oxo_hat_reb_rel.py +443 -0
  50. molSimplify/Informatics/HFXsensitivity/measure_HFX_stable.py +346 -0
  51. molSimplify/Informatics/MOF/Linker_rotation.py +179 -0
  52. molSimplify/Informatics/MOF/MOF_descriptors.py +1299 -0
  53. molSimplify/Informatics/MOF/MOF_descriptors_alternate_functional.py +589 -0
  54. molSimplify/Informatics/MOF/MOF_functionalizer.py +1648 -0
  55. molSimplify/Informatics/MOF/PBC_functions.py +1347 -0
  56. molSimplify/Informatics/MOF/__init__.py +0 -0
  57. molSimplify/Informatics/MOF/atomic.py +267 -0
  58. molSimplify/Informatics/MOF/cluster_extraction.py +388 -0
  59. molSimplify/Informatics/MOF/fragment_MOFs_for_pormake.py +895 -0
  60. molSimplify/Informatics/MOF/monofunctionalized_BDC/index_information.py +10 -0
  61. molSimplify/Informatics/Mol2Parser.py +46 -0
  62. molSimplify/Informatics/RACassemble.py +408 -0
  63. molSimplify/Informatics/__init__.py +0 -0
  64. molSimplify/Informatics/active_learning/__init__.py +0 -0
  65. molSimplify/Informatics/active_learning/expected_improvement.py +269 -0
  66. molSimplify/Informatics/autocorrelation.py +1930 -0
  67. molSimplify/Informatics/clean_autocorrelation.py +778 -0
  68. molSimplify/Informatics/coulomb_analyze.py +67 -0
  69. molSimplify/Informatics/decoration_manager.py +193 -0
  70. molSimplify/Informatics/geo_analyze.py +88 -0
  71. molSimplify/Informatics/geometrics.py +56 -0
  72. molSimplify/Informatics/graph_analyze.py +163 -0
  73. molSimplify/Informatics/graph_racs.py +288 -0
  74. molSimplify/Informatics/jupyter_vis.py +172 -0
  75. molSimplify/Informatics/lacRACAssemble.py +2192 -0
  76. molSimplify/Informatics/lacRACAssemble_bisdithiolenes.py +236 -0
  77. molSimplify/Informatics/misc_descriptors.py +198 -0
  78. molSimplify/Informatics/organic_fingerprints.py +61 -0
  79. molSimplify/Informatics/partialcharges.py +345 -0
  80. molSimplify/Informatics/protein/activesite.py +53 -0
  81. molSimplify/Informatics/protein/pymol_add_hs.py +33 -0
  82. molSimplify/Informatics/rac155_geo.py +48 -0
  83. molSimplify/Ligands/(1_methylbenzimidazol_2_yl)pyridine.xyz +45 -0
  84. molSimplify/Ligands/1-4-dimethyl-1-2-3-triazole.xyz +15 -0
  85. molSimplify/Ligands/12crown4.mol +62 -0
  86. molSimplify/Ligands/Antipyrine.mol +58 -0
  87. molSimplify/Ligands/BPAbipy.mol +106 -0
  88. molSimplify/Ligands/Hpyrrole.mol +26 -0
  89. molSimplify/Ligands/N-quinolinylbutyramidate.xyz +31 -0
  90. molSimplify/Ligands/N-quinolinylmethylmethinylacetamidate.xyz +30 -0
  91. molSimplify/Ligands/NMe2_-1.xyz +11 -0
  92. molSimplify/Ligands/PCy3.mol +111 -0
  93. molSimplify/Ligands/PMe3.xyz +15 -0
  94. molSimplify/Ligands/PPh3.mol +76 -0
  95. molSimplify/Ligands/Propyphenazone.mol +77 -0
  96. molSimplify/Ligands/acac.mol +33 -0
  97. molSimplify/Ligands/acacen.mol +76 -0
  98. molSimplify/Ligands/acetate.smi +1 -0
  99. molSimplify/Ligands/acetate.xyz +9 -0
  100. molSimplify/Ligands/aceticacidbipyridine.mol +70 -0
  101. molSimplify/Ligands/acetonitrile.mol +17 -0
  102. molSimplify/Ligands/alanine.mol +30 -0
  103. molSimplify/Ligands/alphabetizer.py +21 -0
  104. molSimplify/Ligands/amine.mol +11 -0
  105. molSimplify/Ligands/ammonia.mol +12 -0
  106. molSimplify/Ligands/arginine.mol +58 -0
  107. molSimplify/Ligands/asparagine.mol +38 -0
  108. molSimplify/Ligands/aspartic_acid.mol +35 -0
  109. molSimplify/Ligands/azide.mol +11 -0
  110. molSimplify/Ligands/benzene.mol +28 -0
  111. molSimplify/Ligands/benzene_pi.mol +30 -0
  112. molSimplify/Ligands/benzenedithiol.mol +30 -0
  113. molSimplify/Ligands/benzenethiol.mol +30 -0
  114. molSimplify/Ligands/benzylisocy.mol +38 -0
  115. molSimplify/Ligands/bidiazine.mol +42 -0
  116. molSimplify/Ligands/bidiazole.mol +38 -0
  117. molSimplify/Ligands/bifuran.mol +38 -0
  118. molSimplify/Ligands/bihydrodiazine.mol +58 -0
  119. molSimplify/Ligands/bihydrodiazole.mol +46 -0
  120. molSimplify/Ligands/bihydrooxazine.mol +54 -0
  121. molSimplify/Ligands/bihydrooxazole.mol +42 -0
  122. molSimplify/Ligands/bihydrothiazine.mol +54 -0
  123. molSimplify/Ligands/bihydrothiazole.mol +42 -0
  124. molSimplify/Ligands/biimidazole.mol +38 -0
  125. molSimplify/Ligands/bioxazole.mol +34 -0
  126. molSimplify/Ligands/bipy.mol +46 -0
  127. molSimplify/Ligands/bipyrazine.xyz +20 -0
  128. molSimplify/Ligands/bipyrimidine.mol +42 -0
  129. molSimplify/Ligands/bipyrrole.mol +42 -0
  130. molSimplify/Ligands/bisnapthyridylpyridine.mol +111 -0
  131. molSimplify/Ligands/bithiazole.mol +34 -0
  132. molSimplify/Ligands/bromide.mol +7 -0
  133. molSimplify/Ligands/bromide.smi +1 -0
  134. molSimplify/Ligands/c2.mol +9 -0
  135. molSimplify/Ligands/caprolactone.mol +41 -0
  136. molSimplify/Ligands/carbonyl.mol +8 -0
  137. molSimplify/Ligands/carboxyl.mol +13 -0
  138. molSimplify/Ligands/cat.mol +30 -0
  139. molSimplify/Ligands/chloride.mol +7 -0
  140. molSimplify/Ligands/chloride.smi +1 -0
  141. molSimplify/Ligands/chloropyridine.mol +27 -0
  142. molSimplify/Ligands/co2.mol +10 -0
  143. molSimplify/Ligands/corrolazine.mol +72 -0
  144. molSimplify/Ligands/cs.mol +8 -0
  145. molSimplify/Ligands/cyanate.xyz +5 -0
  146. molSimplify/Ligands/cyanide.mol +9 -0
  147. molSimplify/Ligands/cyanoaceticporphyrin.mol +114 -0
  148. molSimplify/Ligands/cyanopyridine.mol +29 -0
  149. molSimplify/Ligands/cyclam.mol +81 -0
  150. molSimplify/Ligands/cyclen.mol +69 -0
  151. molSimplify/Ligands/cyclopentadienyl.mol +26 -0
  152. molSimplify/Ligands/cysteine.mol +32 -0
  153. molSimplify/Ligands/diaminomethyl.mol +19 -0
  154. molSimplify/Ligands/diazine.mol +25 -0
  155. molSimplify/Ligands/diazole.mol +23 -0
  156. molSimplify/Ligands/dicyanamide.mol +15 -0
  157. molSimplify/Ligands/dihydrofuran.mol +27 -0
  158. molSimplify/Ligands/dmap.xyz +35 -0
  159. molSimplify/Ligands/dmf.mol +28 -0
  160. molSimplify/Ligands/dmi.mol +41 -0
  161. molSimplify/Ligands/dmpe.mol +52 -0
  162. molSimplify/Ligands/dpmu.mol +47 -0
  163. molSimplify/Ligands/dppe.mol +112 -0
  164. molSimplify/Ligands/edta.mol +69 -0
  165. molSimplify/Ligands/en.mol +28 -0
  166. molSimplify/Ligands/ethanethiol.mol +21 -0
  167. molSimplify/Ligands/ethanolamine.mol +26 -0
  168. molSimplify/Ligands/ethbipy.mol +70 -0
  169. molSimplify/Ligands/ethyl.mol +19 -0
  170. molSimplify/Ligands/ethylamine.mol +24 -0
  171. molSimplify/Ligands/ethylene.mol +16 -0
  172. molSimplify/Ligands/ethylesteracac.mol +57 -0
  173. molSimplify/Ligands/fluoride.mol +7 -0
  174. molSimplify/Ligands/fluoride.smi +1 -0
  175. molSimplify/Ligands/formaldehyde.mol +12 -0
  176. molSimplify/Ligands/formamidate.xyz +8 -0
  177. molSimplify/Ligands/formate.xyz +6 -0
  178. molSimplify/Ligands/furan.mol +23 -0
  179. molSimplify/Ligands/glutamic_acid.mol +42 -0
  180. molSimplify/Ligands/glutamine.mol +44 -0
  181. molSimplify/Ligands/glycinate.mol +23 -0
  182. molSimplify/Ligands/glycine.mol +24 -0
  183. molSimplify/Ligands/h2s.mol +10 -0
  184. molSimplify/Ligands/helium.mol +6 -0
  185. molSimplify/Ligands/histidine.mol +45 -0
  186. molSimplify/Ligands/hmpa.mol +62 -0
  187. molSimplify/Ligands/hs-.mol +9 -0
  188. molSimplify/Ligands/hydride.mol +7 -0
  189. molSimplify/Ligands/hydrocarboxyacetylide.xyz +8 -0
  190. molSimplify/Ligands/hydrocyanide.mol +10 -0
  191. molSimplify/Ligands/hydrodiazine.mol +33 -0
  192. molSimplify/Ligands/hydrodiazole.mol +27 -0
  193. molSimplify/Ligands/hydrogensulfide.mol +10 -0
  194. molSimplify/Ligands/hydroisocyanide.mol +11 -0
  195. molSimplify/Ligands/hydrooxazine.mol +31 -0
  196. molSimplify/Ligands/hydrooxazole.mol +25 -0
  197. molSimplify/Ligands/hydrothiazine.mol +31 -0
  198. molSimplify/Ligands/hydrothiazole.mol +25 -0
  199. molSimplify/Ligands/hydroxyl.mol +9 -0
  200. molSimplify/Ligands/imidazole.mol +23 -0
  201. molSimplify/Ligands/imidazolidinone.mol +29 -0
  202. molSimplify/Ligands/imine.mol +13 -0
  203. molSimplify/Ligands/iminodiacetic.mol +33 -0
  204. molSimplify/Ligands/iodide.mol +7 -0
  205. molSimplify/Ligands/iodobenzene.xyz +14 -0
  206. molSimplify/Ligands/isoleucine.mol +48 -0
  207. molSimplify/Ligands/isothiocyanate.mol +11 -0
  208. molSimplify/Ligands/leucine.mol +48 -0
  209. molSimplify/Ligands/ligands.dict +257 -0
  210. molSimplify/Ligands/lysine.mol +54 -0
  211. molSimplify/Ligands/mebenzenedithiol.mol +36 -0
  212. molSimplify/Ligands/mebim_py.xyz +29 -0
  213. molSimplify/Ligands/mebim_pz.xyz +28 -0
  214. molSimplify/Ligands/mebipy.mol +58 -0
  215. molSimplify/Ligands/mecat.mol +36 -0
  216. molSimplify/Ligands/methanal.mol +11 -0
  217. molSimplify/Ligands/methanethiol.mol +15 -0
  218. molSimplify/Ligands/methanol.mol +16 -0
  219. molSimplify/Ligands/methionine.mol +44 -0
  220. molSimplify/Ligands/methyl.mol +13 -0
  221. molSimplify/Ligands/methylacetylide.xyz +8 -0
  222. molSimplify/Ligands/methylamine.mol +19 -0
  223. molSimplify/Ligands/methylazide.xyz +9 -0
  224. molSimplify/Ligands/methylisocy.mol +17 -0
  225. molSimplify/Ligands/methylpyridine.mol +33 -0
  226. molSimplify/Ligands/n2.mol +8 -0
  227. molSimplify/Ligands/n4py.xyz +51 -0
  228. molSimplify/Ligands/nch.mol +10 -0
  229. molSimplify/Ligands/nco-.mol +11 -0
  230. molSimplify/Ligands/nethanolamine.mol +26 -0
  231. molSimplify/Ligands/nitrate.mol +14 -0
  232. molSimplify/Ligands/nitrite.mol +11 -0
  233. molSimplify/Ligands/nitro.mol +11 -0
  234. molSimplify/Ligands/nitrobipy.mol +54 -0
  235. molSimplify/Ligands/nitroso.mol +8 -0
  236. molSimplify/Ligands/nme3.mol +30 -0
  237. molSimplify/Ligands/no-.mol +10 -0
  238. molSimplify/Ligands/no2-.mol +11 -0
  239. molSimplify/Ligands/noxygen.mol +8 -0
  240. molSimplify/Ligands/ns-.mol +10 -0
  241. molSimplify/Ligands/o-pyridylbenzene.xyz +23 -0
  242. molSimplify/Ligands/o-pyridylphenylanion.xyz +22 -0
  243. molSimplify/Ligands/o2-.mol +9 -0
  244. molSimplify/Ligands/o2.xyz +4 -0
  245. molSimplify/Ligands/och2.mol +12 -0
  246. molSimplify/Ligands/oethanolamine.mol +26 -0
  247. molSimplify/Ligands/ome2.mol +22 -0
  248. molSimplify/Ligands/ooh.xyz +5 -0
  249. molSimplify/Ligands/oxalate.mol +17 -0
  250. molSimplify/Ligands/oxalate.smi +1 -0
  251. molSimplify/Ligands/oxygen.mol +7 -0
  252. molSimplify/Ligands/pentacyanocyclopentadienide.mol +36 -0
  253. molSimplify/Ligands/ph2-.mol +11 -0
  254. molSimplify/Ligands/ph3.mol +12 -0
  255. molSimplify/Ligands/phen.mol +51 -0
  256. molSimplify/Ligands/phenacac.mol +63 -0
  257. molSimplify/Ligands/phenalalanine.mol +51 -0
  258. molSimplify/Ligands/phendione.mol +51 -0
  259. molSimplify/Ligands/phenphen.mol +75 -0
  260. molSimplify/Ligands/phenylbenzoxazole.mol +54 -0
  261. molSimplify/Ligands/phenylcyc.mol +99 -0
  262. molSimplify/Ligands/phenylenediamine.mol +37 -0
  263. molSimplify/Ligands/phenylisocy.mol +32 -0
  264. molSimplify/Ligands/phosacidbipy.mol +66 -0
  265. molSimplify/Ligands/phosphine.mol +13 -0
  266. molSimplify/Ligands/phosphorine.mol +27 -0
  267. molSimplify/Ligands/phosphorustrifluoride.mol +12 -0
  268. molSimplify/Ligands/phthalocyanine.mol +126 -0
  269. molSimplify/Ligands/pme3o.mol +32 -0
  270. molSimplify/Ligands/porphyrin.mol +82 -0
  271. molSimplify/Ligands/pph3o.mol +77 -0
  272. molSimplify/Ligands/proline.mol +39 -0
  273. molSimplify/Ligands/propdiol.mol +21 -0
  274. molSimplify/Ligands/propylene.mol +23 -0
  275. molSimplify/Ligands/pyridine.mol +27 -0
  276. molSimplify/Ligands/pyrimidone.mol +27 -0
  277. molSimplify/Ligands/pyrrole.mol +24 -0
  278. molSimplify/Ligands/quinoxalinedithiol.mol +39 -0
  279. molSimplify/Ligands/s2-.mol +9 -0
  280. molSimplify/Ligands/salen.mol +75 -0
  281. molSimplify/Ligands/salphen.mol +84 -0
  282. molSimplify/Ligands/serine.mol +32 -0
  283. molSimplify/Ligands/simple_ligands.dict +14 -0
  284. molSimplify/Ligands/sulfacidbipy.mol +63 -0
  285. molSimplify/Ligands/tbucat.mol +54 -0
  286. molSimplify/Ligands/tbuphisocy.mol +56 -0
  287. molSimplify/Ligands/tbutylcyclen.mol +166 -0
  288. molSimplify/Ligands/tbutylisocy.mol +35 -0
  289. molSimplify/Ligands/tbutylthiol.mol +33 -0
  290. molSimplify/Ligands/tcnoet.mol +43 -0
  291. molSimplify/Ligands/tcnoetOH.mol +45 -0
  292. molSimplify/Ligands/terpy.mol +65 -0
  293. molSimplify/Ligands/tetrahydrofuran.mol +31 -0
  294. molSimplify/Ligands/thiane.mol +37 -0
  295. molSimplify/Ligands/thiazole.mol +21 -0
  296. molSimplify/Ligands/thiocyanate.mol +11 -0
  297. molSimplify/Ligands/thiol.mol +9 -0
  298. molSimplify/Ligands/thiophene.mol +23 -0
  299. molSimplify/Ligands/thiopyridine.mol +29 -0
  300. molSimplify/Ligands/threonine.mol +38 -0
  301. molSimplify/Ligands/tpp.mol +165 -0
  302. molSimplify/Ligands/tricyanomethyl.mol +19 -0
  303. molSimplify/Ligands/trifluoromethyl.mol +13 -0
  304. molSimplify/Ligands/tryptophan.mol +60 -0
  305. molSimplify/Ligands/tyrosine.mol +53 -0
  306. molSimplify/Ligands/uthiol.mol +11 -0
  307. molSimplify/Ligands/uthiolme2.mol +23 -0
  308. molSimplify/Ligands/valine.mol +42 -0
  309. molSimplify/Ligands/water.mol +10 -0
  310. molSimplify/Ligands/x.mol +6 -0
  311. molSimplify/Scripts/__init__.py +0 -0
  312. molSimplify/Scripts/addtodb.py +308 -0
  313. molSimplify/Scripts/cellbuilder.py +1592 -0
  314. molSimplify/Scripts/cellbuilder_tools.py +701 -0
  315. molSimplify/Scripts/chains.py +342 -0
  316. molSimplify/Scripts/convert_2to3.py +23 -0
  317. molSimplify/Scripts/dbinteract.py +631 -0
  318. molSimplify/Scripts/distgeom.py +617 -0
  319. molSimplify/Scripts/findcorrelations.py +287 -0
  320. molSimplify/Scripts/generator.py +267 -0
  321. molSimplify/Scripts/geometry.py +1224 -0
  322. molSimplify/Scripts/grabguivars.py +845 -0
  323. molSimplify/Scripts/in_b3lyp_usetc.py +141 -0
  324. molSimplify/Scripts/inparse.py +1673 -0
  325. molSimplify/Scripts/io.py +1149 -0
  326. molSimplify/Scripts/isomers.py +415 -0
  327. molSimplify/Scripts/jobgen.py +247 -0
  328. molSimplify/Scripts/krr_prep.py +1262 -0
  329. molSimplify/Scripts/molSimplify_io.py +18 -0
  330. molSimplify/Scripts/molden2psi4wfn.py +166 -0
  331. molSimplify/Scripts/namegen.py +32 -0
  332. molSimplify/Scripts/nn_prep.py +561 -0
  333. molSimplify/Scripts/oct_check_mols.py +782 -0
  334. molSimplify/Scripts/periodic_QE.py +97 -0
  335. molSimplify/Scripts/postmold.py +304 -0
  336. molSimplify/Scripts/postmwfn.py +709 -0
  337. molSimplify/Scripts/postparse.py +488 -0
  338. molSimplify/Scripts/postproc.py +139 -0
  339. molSimplify/Scripts/qcgen.py +1450 -0
  340. molSimplify/Scripts/rmsd.py +489 -0
  341. molSimplify/Scripts/rungen.py +670 -0
  342. molSimplify/Scripts/structgen.py +3040 -0
  343. molSimplify/Scripts/tf_nn_prep.py +894 -0
  344. molSimplify/Scripts/tsgen.py +295 -0
  345. molSimplify/Scripts/uq_calibration.py +69 -0
  346. molSimplify/__init__.py +0 -0
  347. molSimplify/__main__.py +197 -0
  348. molSimplify/icons/chemdb.png +0 -0
  349. molSimplify/icons/hjklogo.png +0 -0
  350. molSimplify/icons/icon.png +0 -0
  351. molSimplify/icons/logo.png +0 -0
  352. molSimplify/icons/logo_old.png +0 -0
  353. molSimplify/icons/petachem.png +0 -0
  354. molSimplify/icons/petachem2.png +0 -0
  355. molSimplify/icons/petachem_full.png +0 -0
  356. molSimplify/icons/pythonlogo.png +0 -0
  357. molSimplify/icons/sge copy.png +0 -0
  358. molSimplify/icons/sge.png +0 -0
  359. molSimplify/icons/slurm.png +0 -0
  360. molSimplify/icons/wft1.png +0 -0
  361. molSimplify/icons/wft2.png +0 -0
  362. molSimplify/icons/wft3.png +0 -0
  363. molSimplify/ml/__init__.py +0 -0
  364. molSimplify/ml/kernels.py +36 -0
  365. molSimplify/ml/layers.py +29 -0
  366. molSimplify/molscontrol/__init__.py +14 -0
  367. molSimplify/molscontrol/_version.py +521 -0
  368. molSimplify/molscontrol/clf_tools.py +144 -0
  369. molSimplify/molscontrol/data/README.md +21 -0
  370. molSimplify/molscontrol/data/look_and_say.dat +15 -0
  371. molSimplify/molscontrol/dynamic_classifier.py +514 -0
  372. molSimplify/molscontrol/io_tools.py +363 -0
  373. molSimplify/molscontrol/molscontrol.py +49 -0
  374. molSimplify/molscontrol/terachem/jobscript_control.sh +31 -0
  375. molSimplify/molscontrol/terachem/terachem_input +22 -0
  376. molSimplify/python_krr/X_train_TS.csv +535 -0
  377. molSimplify/python_krr/__init__.py +0 -0
  378. molSimplify/python_krr/hat2_X_mean_std.csv +3 -0
  379. molSimplify/python_krr/hat2_feature_names.csv +1 -0
  380. molSimplify/python_krr/hat2_y_mean_std.csv +2 -0
  381. molSimplify/python_krr/hat_X_mean_std.csv +6 -0
  382. molSimplify/python_krr/hat_feature_names.csv +1 -0
  383. molSimplify/python_krr/hat_krr_X_train.csv +5205 -0
  384. molSimplify/python_krr/hat_krr_dual_coef.csv +1 -0
  385. molSimplify/python_krr/hat_y_mean_std.csv +2 -0
  386. molSimplify/python_krr/sklearn_models.py +34 -0
  387. molSimplify/python_krr/y_train_TS.csv +535 -0
  388. molSimplify/python_nn/ANN.py +198 -0
  389. molSimplify/python_nn/__init__.py +0 -0
  390. molSimplify/python_nn/clf_analysis_tool.py +125 -0
  391. molSimplify/python_nn/dictionary_toolbox.py +49 -0
  392. molSimplify/python_nn/ensemble_test.py +309 -0
  393. molSimplify/python_nn/hs_center.csv +26 -0
  394. molSimplify/python_nn/hs_scale.csv +26 -0
  395. molSimplify/python_nn/ls_center.csv +26 -0
  396. molSimplify/python_nn/ls_scale.csv +26 -0
  397. molSimplify/python_nn/ms_hs_b1.csv +50 -0
  398. molSimplify/python_nn/ms_hs_b2.csv +50 -0
  399. molSimplify/python_nn/ms_hs_b3.csv +1 -0
  400. molSimplify/python_nn/ms_hs_w1.csv +50 -0
  401. molSimplify/python_nn/ms_hs_w2.csv +50 -0
  402. molSimplify/python_nn/ms_hs_w3.csv +1 -0
  403. molSimplify/python_nn/ms_ls_b1.csv +50 -0
  404. molSimplify/python_nn/ms_ls_b2.csv +50 -0
  405. molSimplify/python_nn/ms_ls_b3.csv +1 -0
  406. molSimplify/python_nn/ms_ls_w1.csv +50 -0
  407. molSimplify/python_nn/ms_ls_w2.csv +50 -0
  408. molSimplify/python_nn/ms_ls_w3.csv +1 -0
  409. molSimplify/python_nn/ms_slope_b1.csv +50 -0
  410. molSimplify/python_nn/ms_slope_b2.csv +50 -0
  411. molSimplify/python_nn/ms_slope_b3.csv +1 -0
  412. molSimplify/python_nn/ms_slope_w1.csv +50 -0
  413. molSimplify/python_nn/ms_slope_w2.csv +50 -0
  414. molSimplify/python_nn/ms_slope_w3.csv +1 -0
  415. molSimplify/python_nn/ms_split_b1.csv +50 -0
  416. molSimplify/python_nn/ms_split_b2.csv +50 -0
  417. molSimplify/python_nn/ms_split_b3.csv +1 -0
  418. molSimplify/python_nn/ms_split_w1.csv +50 -0
  419. molSimplify/python_nn/ms_split_w2.csv +50 -0
  420. molSimplify/python_nn/ms_split_w3.csv +1 -0
  421. molSimplify/python_nn/slope_center.csv +25 -0
  422. molSimplify/python_nn/slope_scale.csv +25 -0
  423. molSimplify/python_nn/split_center.csv +26 -0
  424. molSimplify/python_nn/split_scale.csv +26 -0
  425. molSimplify/python_nn/tf_ANN.py +762 -0
  426. molSimplify/python_nn/train_data.csv +1211 -0
  427. molSimplify/tf_nn/__init__.py +0 -0
  428. molSimplify/tf_nn/geo_static_clf/geo_static_clf_model.h5 +0 -0
  429. molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_name.csv +1591 -0
  430. molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_x.csv +2790 -0
  431. molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_y.csv +2790 -0
  432. molSimplify/tf_nn/geo_static_clf/geo_static_clf_vars.csv +154 -0
  433. molSimplify/tf_nn/geos/hs_ii_bl_x.csv +1577 -0
  434. molSimplify/tf_nn/geos/hs_ii_bl_y.csv +1577 -0
  435. molSimplify/tf_nn/geos/hs_ii_model.h5 +0 -0
  436. molSimplify/tf_nn/geos/hs_ii_model.json +1 -0
  437. molSimplify/tf_nn/geos/hs_ii_vars.csv +154 -0
  438. molSimplify/tf_nn/geos/hs_iii_bl_x.csv +1659 -0
  439. molSimplify/tf_nn/geos/hs_iii_bl_y.csv +1659 -0
  440. molSimplify/tf_nn/geos/hs_iii_model.h5 +0 -0
  441. molSimplify/tf_nn/geos/hs_iii_model.json +1 -0
  442. molSimplify/tf_nn/geos/hs_iii_vars.csv +154 -0
  443. molSimplify/tf_nn/geos/ls_ii_bl_x.csv +1374 -0
  444. molSimplify/tf_nn/geos/ls_ii_bl_y.csv +1374 -0
  445. molSimplify/tf_nn/geos/ls_ii_model.h5 +0 -0
  446. molSimplify/tf_nn/geos/ls_ii_model.json +1 -0
  447. molSimplify/tf_nn/geos/ls_ii_vars.csv +154 -0
  448. molSimplify/tf_nn/geos/ls_iii_bl_x.csv +1364 -0
  449. molSimplify/tf_nn/geos/ls_iii_bl_y.csv +1364 -0
  450. molSimplify/tf_nn/geos/ls_iii_model.h5 +0 -0
  451. molSimplify/tf_nn/geos/ls_iii_model.json +1 -0
  452. molSimplify/tf_nn/geos/ls_iii_vars.csv +154 -0
  453. molSimplify/tf_nn/homolumo/gap_model.h5 +0 -0
  454. molSimplify/tf_nn/homolumo/gap_model.json +1 -0
  455. molSimplify/tf_nn/homolumo/gap_test_names.csv +175 -0
  456. molSimplify/tf_nn/homolumo/gap_test_x.csv +176 -0
  457. molSimplify/tf_nn/homolumo/gap_test_y.csv +176 -0
  458. molSimplify/tf_nn/homolumo/gap_train_names.csv +699 -0
  459. molSimplify/tf_nn/homolumo/gap_train_x.csv +700 -0
  460. molSimplify/tf_nn/homolumo/gap_train_y.csv +700 -0
  461. molSimplify/tf_nn/homolumo/gap_vars.csv +153 -0
  462. molSimplify/tf_nn/homolumo/homo_model.h5 +0 -0
  463. molSimplify/tf_nn/homolumo/homo_model.json +126 -0
  464. molSimplify/tf_nn/homolumo/homo_test_names.csv +175 -0
  465. molSimplify/tf_nn/homolumo/homo_test_x.csv +176 -0
  466. molSimplify/tf_nn/homolumo/homo_test_y.csv +176 -0
  467. molSimplify/tf_nn/homolumo/homo_train_names.csv +699 -0
  468. molSimplify/tf_nn/homolumo/homo_train_x.csv +700 -0
  469. molSimplify/tf_nn/homolumo/homo_train_y.csv +700 -0
  470. molSimplify/tf_nn/homolumo/homo_vars.csv +153 -0
  471. molSimplify/tf_nn/oxoandhomo/homo_empty_info.json +7 -0
  472. molSimplify/tf_nn/oxoandhomo/homo_empty_model.h5 +0 -0
  473. molSimplify/tf_nn/oxoandhomo/homo_empty_model.json +1 -0
  474. molSimplify/tf_nn/oxoandhomo/homo_empty_test_names.csv +143 -0
  475. molSimplify/tf_nn/oxoandhomo/homo_empty_test_x.csv +144 -0
  476. molSimplify/tf_nn/oxoandhomo/homo_empty_test_y.csv +144 -0
  477. molSimplify/tf_nn/oxoandhomo/homo_empty_train_names.csv +513 -0
  478. molSimplify/tf_nn/oxoandhomo/homo_empty_train_x.csv +514 -0
  479. molSimplify/tf_nn/oxoandhomo/homo_empty_train_y.csv +514 -0
  480. molSimplify/tf_nn/oxoandhomo/homo_empty_val_names.csv +143 -0
  481. molSimplify/tf_nn/oxoandhomo/homo_empty_val_x.csv +58 -0
  482. molSimplify/tf_nn/oxoandhomo/homo_empty_val_y.csv +58 -0
  483. molSimplify/tf_nn/oxoandhomo/homo_empty_vars.csv +155 -0
  484. molSimplify/tf_nn/oxoandhomo/oxo20_info.json +7 -0
  485. molSimplify/tf_nn/oxoandhomo/oxo20_model.h5 +0 -0
  486. molSimplify/tf_nn/oxoandhomo/oxo20_model.json +1 -0
  487. molSimplify/tf_nn/oxoandhomo/oxo20_test_names.csv +143 -0
  488. molSimplify/tf_nn/oxoandhomo/oxo20_test_x.csv +144 -0
  489. molSimplify/tf_nn/oxoandhomo/oxo20_test_y.csv +144 -0
  490. molSimplify/tf_nn/oxoandhomo/oxo20_train_names.csv +513 -0
  491. molSimplify/tf_nn/oxoandhomo/oxo20_train_x.csv +514 -0
  492. molSimplify/tf_nn/oxoandhomo/oxo20_train_y.csv +514 -0
  493. molSimplify/tf_nn/oxoandhomo/oxo20_val_names.csv +143 -0
  494. molSimplify/tf_nn/oxoandhomo/oxo20_val_x.csv +58 -0
  495. molSimplify/tf_nn/oxoandhomo/oxo20_val_y.csv +58 -0
  496. molSimplify/tf_nn/oxoandhomo/oxo20_vars.csv +154 -0
  497. molSimplify/tf_nn/oxocatalysis/hat_model.h5 +0 -0
  498. molSimplify/tf_nn/oxocatalysis/hat_model.json +1 -0
  499. molSimplify/tf_nn/oxocatalysis/hat_test_names.csv +419 -0
  500. molSimplify/tf_nn/oxocatalysis/hat_test_x.csv +420 -0
  501. molSimplify/tf_nn/oxocatalysis/hat_test_y.csv +420 -0
  502. molSimplify/tf_nn/oxocatalysis/hat_train_names.csv +1507 -0
  503. molSimplify/tf_nn/oxocatalysis/hat_train_x.csv +1508 -0
  504. molSimplify/tf_nn/oxocatalysis/hat_train_y.csv +1508 -0
  505. molSimplify/tf_nn/oxocatalysis/hat_val_x.csv +169 -0
  506. molSimplify/tf_nn/oxocatalysis/hat_val_y.csv +169 -0
  507. molSimplify/tf_nn/oxocatalysis/hat_vars.csv +162 -0
  508. molSimplify/tf_nn/oxocatalysis/oxo_model.h5 +0 -0
  509. molSimplify/tf_nn/oxocatalysis/oxo_model.json +1 -0
  510. molSimplify/tf_nn/oxocatalysis/oxo_test_names.csv +527 -0
  511. molSimplify/tf_nn/oxocatalysis/oxo_test_x.csv +528 -0
  512. molSimplify/tf_nn/oxocatalysis/oxo_test_y.csv +528 -0
  513. molSimplify/tf_nn/oxocatalysis/oxo_train_names.csv +1897 -0
  514. molSimplify/tf_nn/oxocatalysis/oxo_train_x.csv +1898 -0
  515. molSimplify/tf_nn/oxocatalysis/oxo_train_y.csv +1898 -0
  516. molSimplify/tf_nn/oxocatalysis/oxo_val_x.csv +212 -0
  517. molSimplify/tf_nn/oxocatalysis/oxo_val_y.csv +212 -0
  518. molSimplify/tf_nn/oxocatalysis/oxo_vars.csv +162 -0
  519. molSimplify/tf_nn/rescaling_data/gap_mean_x.csv +153 -0
  520. molSimplify/tf_nn/rescaling_data/gap_mean_y.csv +1 -0
  521. molSimplify/tf_nn/rescaling_data/gap_var_x.csv +153 -0
  522. molSimplify/tf_nn/rescaling_data/gap_var_y.csv +1 -0
  523. molSimplify/tf_nn/rescaling_data/geo_static_clf_mean_x.csv +154 -0
  524. molSimplify/tf_nn/rescaling_data/geo_static_clf_mean_y.csv +1 -0
  525. molSimplify/tf_nn/rescaling_data/geo_static_clf_var_x.csv +154 -0
  526. molSimplify/tf_nn/rescaling_data/geo_static_clf_var_y.csv +1 -0
  527. molSimplify/tf_nn/rescaling_data/hat_mean_x.csv +162 -0
  528. molSimplify/tf_nn/rescaling_data/hat_mean_y.csv +1 -0
  529. molSimplify/tf_nn/rescaling_data/hat_var_x.csv +162 -0
  530. molSimplify/tf_nn/rescaling_data/hat_var_y.csv +1 -0
  531. molSimplify/tf_nn/rescaling_data/homo_empty_mean_x.csv +155 -0
  532. molSimplify/tf_nn/rescaling_data/homo_empty_mean_y.csv +1 -0
  533. molSimplify/tf_nn/rescaling_data/homo_empty_var_x.csv +155 -0
  534. molSimplify/tf_nn/rescaling_data/homo_empty_var_y.csv +1 -0
  535. molSimplify/tf_nn/rescaling_data/homo_mean_x.csv +153 -0
  536. molSimplify/tf_nn/rescaling_data/homo_mean_y.csv +1 -0
  537. molSimplify/tf_nn/rescaling_data/homo_var_x.csv +153 -0
  538. molSimplify/tf_nn/rescaling_data/homo_var_y.csv +1 -0
  539. molSimplify/tf_nn/rescaling_data/hs_ii_mean_x.csv +154 -0
  540. molSimplify/tf_nn/rescaling_data/hs_ii_mean_y.csv +3 -0
  541. molSimplify/tf_nn/rescaling_data/hs_ii_var_x.csv +154 -0
  542. molSimplify/tf_nn/rescaling_data/hs_ii_var_y.csv +3 -0
  543. molSimplify/tf_nn/rescaling_data/hs_iii_mean_x.csv +154 -0
  544. molSimplify/tf_nn/rescaling_data/hs_iii_mean_y.csv +3 -0
  545. molSimplify/tf_nn/rescaling_data/hs_iii_var_x.csv +154 -0
  546. molSimplify/tf_nn/rescaling_data/hs_iii_var_y.csv +3 -0
  547. molSimplify/tf_nn/rescaling_data/ls_ii_mean_x.csv +154 -0
  548. molSimplify/tf_nn/rescaling_data/ls_ii_mean_y.csv +3 -0
  549. molSimplify/tf_nn/rescaling_data/ls_ii_var_x.csv +154 -0
  550. molSimplify/tf_nn/rescaling_data/ls_ii_var_y.csv +3 -0
  551. molSimplify/tf_nn/rescaling_data/ls_iii_mean_x.csv +154 -0
  552. molSimplify/tf_nn/rescaling_data/ls_iii_mean_y.csv +3 -0
  553. molSimplify/tf_nn/rescaling_data/ls_iii_var_x.csv +154 -0
  554. molSimplify/tf_nn/rescaling_data/ls_iii_var_y.csv +3 -0
  555. molSimplify/tf_nn/rescaling_data/oxo20_mean_x.csv +154 -0
  556. molSimplify/tf_nn/rescaling_data/oxo20_mean_y.csv +1 -0
  557. molSimplify/tf_nn/rescaling_data/oxo20_var_x.csv +154 -0
  558. molSimplify/tf_nn/rescaling_data/oxo20_var_y.csv +1 -0
  559. molSimplify/tf_nn/rescaling_data/oxo_mean_x.csv +162 -0
  560. molSimplify/tf_nn/rescaling_data/oxo_mean_y.csv +1 -0
  561. molSimplify/tf_nn/rescaling_data/oxo_var_x.csv +162 -0
  562. molSimplify/tf_nn/rescaling_data/oxo_var_y.csv +1 -0
  563. molSimplify/tf_nn/rescaling_data/sc_static_clf_mean_x.csv +154 -0
  564. molSimplify/tf_nn/rescaling_data/sc_static_clf_mean_y.csv +1 -0
  565. molSimplify/tf_nn/rescaling_data/sc_static_clf_var_x.csv +154 -0
  566. molSimplify/tf_nn/rescaling_data/sc_static_clf_var_y.csv +1 -0
  567. molSimplify/tf_nn/rescaling_data/split_mean_x.csv +155 -0
  568. molSimplify/tf_nn/rescaling_data/split_mean_y.csv +1 -0
  569. molSimplify/tf_nn/rescaling_data/split_var_x.csv +155 -0
  570. molSimplify/tf_nn/rescaling_data/split_var_y.csv +1 -0
  571. molSimplify/tf_nn/sc_static_clf/sc_static_clf_model.h5 +0 -0
  572. molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_name.csv +1591 -0
  573. molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_x.csv +1592 -0
  574. molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_y.csv +1592 -0
  575. molSimplify/tf_nn/sc_static_clf/sc_static_clf_vars.csv +154 -0
  576. molSimplify/tf_nn/split/split_model.h5 +0 -0
  577. molSimplify/tf_nn/split/split_model.json +1 -0
  578. molSimplify/tf_nn/split/split_vars.csv +155 -0
  579. molSimplify/tf_nn/split/split_x.csv +1902 -0
  580. molSimplify/tf_nn/split/split_y.csv +1902 -0
  581. molSimplify/tf_nn/split/train_names.csv +1901 -0
  582. molSimplify/utils/__init__.py +0 -0
  583. molSimplify/utils/decorators.py +16 -0
  584. molSimplify/utils/metaclasses.py +12 -0
  585. molSimplify/utils/tensorflow.py +23 -0
  586. molSimplify/utils/timer.py +16 -0
  587. molSimplify-1.7.4.dist-info/LICENSE +674 -0
  588. molSimplify-1.7.4.dist-info/METADATA +821 -0
  589. molSimplify-1.7.4.dist-info/RECORD +651 -0
  590. molSimplify-1.7.4.dist-info/WHEEL +5 -0
  591. molSimplify-1.7.4.dist-info/entry_points.txt +3 -0
  592. molSimplify-1.7.4.dist-info/top_level.txt +4 -0
  593. tests/generateTests.py +122 -0
  594. tests/helperFuncs.py +658 -0
  595. tests/informatics/test_MOF_descriptors.py +128 -0
  596. tests/informatics/test_active_learning.py +113 -0
  597. tests/informatics/test_coulomb_analyze.py +24 -0
  598. tests/informatics/test_graph_racs.py +193 -0
  599. tests/ml/test_kernels.py +20 -0
  600. tests/ml/test_layers.py +47 -0
  601. tests/runtest.py +10 -0
  602. tests/test_Mol2D.py +128 -0
  603. tests/test_basic_imports.py +62 -0
  604. tests/test_bidentate.py +25 -0
  605. tests/test_cli.py +20 -0
  606. tests/test_distgeom.py +106 -0
  607. tests/test_example_1.py +29 -0
  608. tests/test_example_3.py +31 -0
  609. tests/test_example_5.py +43 -0
  610. tests/test_example_7.py +28 -0
  611. tests/test_example_8.py +15 -0
  612. tests/test_example_tbp.py +15 -0
  613. tests/test_ff_xtb.py +111 -0
  614. tests/test_geocheck_oct.py +26 -0
  615. tests/test_geocheck_one_empty.py +15 -0
  616. tests/test_geometry.py +44 -0
  617. tests/test_inparse.py +76 -0
  618. tests/test_io.py +84 -0
  619. tests/test_jobgen.py +84 -0
  620. tests/test_joption_pythonic.py +27 -0
  621. tests/test_ligand_assign.py +58 -0
  622. tests/test_ligand_assign_consistent.py +60 -0
  623. tests/test_ligand_class.py +26 -0
  624. tests/test_ligand_from_mol_file.py +35 -0
  625. tests/test_ligands.py +86 -0
  626. tests/test_mol3D.py +337 -0
  627. tests/test_molcas_caspt2.py +15 -0
  628. tests/test_molcas_casscf.py +15 -0
  629. tests/test_old_ANNs.py +68 -0
  630. tests/test_orca_ccsdt.py +15 -0
  631. tests/test_orca_dft.py +15 -0
  632. tests/test_qcgen.py +50 -0
  633. tests/test_racs.py +124 -0
  634. tests/test_rmsd.py +68 -0
  635. tests/test_structgen_functions.py +198 -0
  636. tests/test_tetrahedral.py +29 -0
  637. tests/test_tutorial_10_part_one.py +16 -0
  638. tests/test_tutorial_10_part_two.py +15 -0
  639. tests/test_tutorial_2.py +11 -0
  640. tests/test_tutorial_3.py +15 -0
  641. tests/test_tutorial_4.py +57 -0
  642. tests/test_tutorial_6.py +10 -0
  643. tests/test_tutorial_8.py +29 -0
  644. tests/test_tutorial_9_part_one.py +15 -0
  645. tests/test_tutorial_9_part_two.py +15 -0
  646. tests/test_tutorial_qm9_part_one.py +6 -0
  647. tests/testresources/refs/racs/generate_references.py +85 -0
  648. workflows/NandyJACSAu2022/bridge_functionalizer.py +253 -0
  649. workflows/NandyJACSAu2022/frag_functionalizer.py +242 -0
  650. workflows/NandyJACSAu2022/fragment_classes.py +586 -0
  651. workflows/NandyJACSAu2022/macrocycle_synthesis.py +179 -0
@@ -0,0 +1,287 @@
1
+ # @file findcorrelations.py
2
+ # Automated correlation analysis module
3
+ #
4
+ # Written by JP Janet for HJK Group
5
+ #
6
+ # Dpt of Chemical Engineering, MIT
7
+
8
+ import os
9
+ import sys
10
+ import numpy as np
11
+ from molSimplify.Classes.dft_obs import (dft_observation)
12
+ from sklearn import linear_model, preprocessing, metrics, feature_selection, model_selection
13
+
14
+ # def test_skl():
15
+ # valid = 'ok'
16
+ # try:
17
+ # from sklearn import linear_model, preprocessing, metrics, feature_selection, model_selection
18
+ # except:
19
+ # valid = False
20
+ # return valid
21
+
22
+
23
+ def analysis_supervisor(args, rootdir):
24
+ status = True
25
+ print('looking for scikit-learn')
26
+ # if test_skl():
27
+ # pass
28
+ # else:
29
+ # print("Error, scikit-learn not loadable")
30
+ # status = False
31
+ if not args.correlate:
32
+ print("Error, correlation path not given")
33
+ status = False
34
+ print(('looking for file at '+str(args.correlate)))
35
+ if not args.correlate:
36
+ print("Error, correlation path not given")
37
+ status = False
38
+ if not os.path.exists(args.correlate) and status:
39
+ print(("Error, correlation file not found at" + str(args.correlate)))
40
+ status = False
41
+ if not status:
42
+ print('correlation cannot begin! Exiting...')
43
+ sys.exit()
44
+ if args.lig_only:
45
+ print('using ligand-only descriptors (assuming all metals are the same)')
46
+ if args.simple:
47
+ print('using simple autocorrelation descriptors only')
48
+ if args.max_descriptors:
49
+ print(('using a max of '+str(args.max_descriptors)+' only'))
50
+ correlation_supervisor(
51
+ args.correlate, rootdir, args.simple, args.lig_only, args.max_descriptors)
52
+
53
+
54
+ def accquire_file(path):
55
+ # set display options
56
+ np.set_printoptions(precision=3)
57
+ # this function reads in values from a correctly formated path
58
+ # [name],[y],[folder_name]
59
+ file_dict = dict()
60
+ fail_dict = dict()
61
+ counter = 0 # number of added paths
62
+ ncounter = 0 # number of skipped paths
63
+ if os.path.exists(path):
64
+ print('found file, opening...')
65
+ with open(path, 'r') as f:
66
+ # expects csv fomart,
67
+ ### value | path
68
+ for i, lines in enumerate(f):
69
+ print(('read line: ' + str(i)))
70
+ ll = lines.strip('\n\r').split(",")
71
+ name = ll[0]
72
+ if i == 0:
73
+ # this is the first line
74
+ # y_value_name = ll[1]
75
+ # check if path exists:
76
+ # paths_name = ll[2].strip('/') + '/'+name+'.xyz'
77
+ if len(ll) > 3:
78
+ print('custom descriptors found!')
79
+ custom_names = [ll[i] for i in range(4, len(ll))]
80
+
81
+ else:
82
+ y_value = ll[1]
83
+ # check if path exists:
84
+ this_path = '/'+ll[2].strip('/') + '/'+name+'.xyz'
85
+ # print('name = '+str(name))
86
+ this_obs = dft_observation(name, this_path)
87
+ this_obs.sety(y_value)
88
+ if os.path.isfile(this_path):
89
+ # print('path exists')
90
+ this_obs.obtain_mol3d()
91
+ if this_obs.health:
92
+ counter += 1
93
+ file_dict.update({counter: this_obs})
94
+ if len(ll) > 3:
95
+ print(
96
+ ('custom descriptors found for job ' + str(name)))
97
+ custom_descriptors = float(
98
+ [ll[i] for i in range(4, len(ll))])
99
+ this_obs.append_descriptors(
100
+ custom_names, custom_descriptors, '', '')
101
+ else: # bad geo
102
+ this_obs.comments.append(
103
+ ' geo is not healthy, culling ' + str(this_path))
104
+ ncounter += 1
105
+ fail_dict.update({counter: this_obs})
106
+ else: # no geo file found
107
+ this_obs.comments.append(
108
+ ' geo could not be found: ' + str(this_path))
109
+ ncounter += 1
110
+ fail_dict.update({counter: this_obs})
111
+ if counter > 0:
112
+ print(('file import successful, ' + str(counter) + ' geos loaded'))
113
+ len_fail = len(list(fail_dict.keys()))
114
+ if len_fail > 0:
115
+ print((str(len_fail) + ' unsuccessful imports :'))
116
+ for keys in list(fail_dict.keys()):
117
+ print(('failed at line ' + str(keys) +
118
+ ' for job ' + str(fail_dict[keys].name)))
119
+ return(file_dict, fail_dict)
120
+
121
+
122
+ def correlation_supervisor(path, rootdir, simple=False, lig_only=False, max_descriptors=False):
123
+ # load the files from the given input file
124
+ file_dict, fail_dict = accquire_file(path)
125
+ # loop over sucessful imports to get descriptors:
126
+ big_mat = list()
127
+ col_names = list()
128
+ for i, keyv in enumerate(file_dict.keys()):
129
+ file_dict[keyv].get_descriptor_vector(
130
+ lig_only, simple, name=False, loud=False)
131
+ # print('i = ',str(i))
132
+ if i == 0:
133
+ col_names = file_dict[keyv].descriptor_names
134
+ # reorganize the data
135
+ this_row = list()
136
+ this_row.append(float(file_dict[keyv].yvalue))
137
+ this_row.extend(file_dict[keyv].descriptors)
138
+ big_mat.append(this_row)
139
+ big_mat = np.array(big_mat)
140
+ # let's do some regression
141
+ # standardize model:
142
+ col_array = np.array(col_names)
143
+ print(('length of col array is ' + str(len(col_array))))
144
+ n_tot = len(col_array)
145
+ X = big_mat[:, 1:]
146
+ print(('dimension of data matrix is ' + str(big_mat.shape)))
147
+ n_obs = len(X[:, 1])
148
+ Scaler = preprocessing.StandardScaler().fit(X)
149
+ Xs = Scaler.transform(X)
150
+ Y = big_mat[:, 0]
151
+ # find baseline model (all descriptors)
152
+ Reg = linear_model.LinearRegression()
153
+ Reg.fit(Xs, Y)
154
+ Ypred_all_all = Reg.predict(Xs)
155
+ rs_all_all = metrics.r2_score(Y, Ypred_all_all)
156
+ loo = model_selection.LeaveOneOut()
157
+ r_reduce = list()
158
+ mse_reduce = list()
159
+ # stepwise reduce the feature set until only one is left
160
+ for n in range(0, n_tot):
161
+ reductor = feature_selection.RFE(Reg, n_features_to_select=n_tot-n,
162
+ step=1, verbose=0)
163
+ reductor.fit(Xs, Y)
164
+ Ypred_all = reductor.predict(Xs)
165
+ rs_all = metrics.r2_score(Y, Ypred_all)
166
+ mse_all = metrics.mean_squared_error(Y, Ypred_all)
167
+ r_reduce.append(rs_all)
168
+ mse_reduce.append(mse_all)
169
+ # reduce to one feature
170
+
171
+ reductor_features = list()
172
+ for i, ranks in enumerate(reductor.ranking_):
173
+ reductor_features.append([col_array[i], ranks])
174
+ reductor_features = sorted(reductor_features, key=lambda x: x[1])
175
+ # print(reductor_features)
176
+ print('****************************************')
177
+ # select best number using cv
178
+ selector = feature_selection.RFECV(
179
+ Reg, step=1, cv=loo, verbose=0, scoring='neg_mean_squared_error')
180
+ selector.fit(Xs, Y)
181
+ select_mse = selector.grid_scores_
182
+ Ypred = selector.predict(Xs)
183
+ rs = metrics.r2_score(Y, Ypred)
184
+ n_opt = selector.n_features_
185
+ opt_features = col_array[selector.support_]
186
+ ranked_features = list()
187
+ for i, ranks in enumerate(selector.ranking_):
188
+ ranked_features.append([col_array[i], ranks])
189
+ ranked_features = sorted(ranked_features, key=lambda x: x[1])
190
+ print(ranked_features)
191
+ if max_descriptors: # check if we need to reduce further
192
+ print(('a max of ' + str(max_descriptors) + ' were requested'))
193
+ n_max = int(max_descriptors)
194
+ if n_opt > n_max:
195
+ print(('the RFE process selected ' +
196
+ str(n_opt) + ' varibles as optimal'))
197
+ print(('discarding an additional ' + str(n_max-n_opt) + ' variables'))
198
+ new_variables = list()
199
+ for i in range(0, n_max):
200
+ new_variables.append(ranked_features[i])
201
+ # report results to user
202
+ print(('analzyed ' + str(n_obs) + ' molecules'))
203
+ print(('the full-space R2 is '+str("%0.2f" %
204
+ rs_all_all) + ' with ' + str(n_tot) + ' features'))
205
+ print(('optimal number of features is ' +
206
+ str(n_opt) + ' of total ' + str(n_tot)))
207
+ print(('the opt R2 is '+str("%0.2f" % rs)))
208
+
209
+ # print(ranked_features)
210
+ X_r = selector.transform(Xs)
211
+ reg_red = linear_model.LinearRegression()
212
+ reg_red.fit(X_r, Y)
213
+ Ypred_r = reg_red.predict(X_r)
214
+ coefs = reg_red.coef_
215
+ intercept = reg_red.intercept_
216
+ mse_all = metrics.mean_squared_error(Y, Ypred_all_all)
217
+ mse_r = metrics.mean_squared_error(Y, Ypred_r)
218
+ if n_opt < 30:
219
+ print(('the optimal variables are: ' + str(opt_features)))
220
+ print(('the coefficients are' + str(coefs)))
221
+ else:
222
+ print(('the (first 30) optimal variables are: ' +
223
+ str(opt_features[0:29])))
224
+ print(('the (first 30) coefficients are' + str(coefs[0:29])))
225
+ print(('the intercept is ' + str("%0.2f" % intercept)))
226
+ print(('the training MSE with the best feature set is ' + str("%0.2f" % mse_r)))
227
+ print(('the MSE with all features is ' + str("%0.2f" % mse_all)))
228
+ print(('by eliminating ' + str(n_tot - n_opt) + ' features,' +
229
+ ' CV-prediction MSE decreased from ' + str("%0.0f" % abs(select_mse[0])) + ' to ' + str("%00f" % abs(select_mse[n_tot - n_opt]))))
230
+ with open(rootdir+'RFECV_rankings.csv', 'w') as f:
231
+ f.write('RFE_rank,RFE_col,RFECV_rank,RFECV_col, \n')
232
+ for i, items in enumerate(reductor_features):
233
+ f.write(str(items[0]) + ',' + str(items[1]) + ',' +
234
+ str(ranked_features[i][0]) + ',' + str(ranked_features[i][1]) + '\n')
235
+ with open(rootdir + 'y_data.csv', 'w') as f:
236
+ for items in Y:
237
+ f.write(str(items) + '\n')
238
+ with open(rootdir + 'y_pred_r.csv', 'w') as f:
239
+ for items in Ypred_r:
240
+ f.write(str(items) + '\n')
241
+ with open(rootdir+'optimal_decriptor_space.csv', 'w') as f:
242
+ for i in range(0, n_obs):
243
+ for j in range(0, n_opt):
244
+ if j == (n_opt-1):
245
+ f.write(str(X_r[i][j])+'\n')
246
+ else:
247
+ f.write(str(X_r[i][j])+',')
248
+ with open(rootdir+'full_descriptor_space.csv', 'w') as f:
249
+ for names in col_names:
250
+ f.write(names+',')
251
+ f.write('\n')
252
+ for i in range(0, n_obs):
253
+ for j in range(0, n_tot):
254
+ if j == (n_tot-1):
255
+ f.write(str(Xs[i][j])+'\n')
256
+ else:
257
+ f.write(str(Xs[i][j])+',')
258
+ with open(rootdir+'scaling.csv', 'w') as f:
259
+ means = Scaler.mean_
260
+ var = Scaler.var_
261
+ f.write('name, mean,variance \n')
262
+ for i in range(0, n_tot):
263
+ f.write(str(col_names[i])+','+str(means[i]) + ',' +
264
+ str(var[i])+','+str(selector.ranking_[i])+'\n')
265
+ with open(rootdir+'coeficients.csv', 'w') as f:
266
+ f.write('intercept,'+str(intercept) + '\n')
267
+ for i in range(0, n_opt):
268
+ f.write(str(opt_features[i])+','+str(coefs[i])+'\n')
269
+ with open(rootdir + 'rfe_mse.csv', 'w') as f:
270
+ f.write('features removed,mean CV error,'+str(intercept) + '\n')
271
+ count = 0
272
+ for items in mse_reduce:
273
+ f.write(str(count)+','+str(items) + '\n')
274
+ count += 1
275
+ # with open('y_full_all.csv','w') as f:
276
+ # for items in Ypred_all_all:
277
+ # f.write(str(items) + '\n')
278
+ # with open('rfe_r.csv','w') as f:
279
+ # for items in r_reduce:
280
+ # f.write(str(items) + '\n')
281
+
282
+ # with open('select_mse.csv','w') as f:
283
+ # for items in select_mse:
284
+ # f.write(str(items) + '\n')
285
+ # with open('errors.csv','w') as f:
286
+ # for items in errors:
287
+ # f.write(str(items) + '\n')
@@ -0,0 +1,267 @@
1
+ # @file generator.py
2
+ # Main script that coordinates all parts of the program.
3
+ #
4
+ # Written by Kulik Group
5
+ #
6
+ # Department of Chemical Engineering, MIT
7
+
8
+ import os
9
+ import sys
10
+ import glob
11
+ import argparse
12
+ import copy
13
+ from molSimplify.Classes.globalvars import (globalvars)
14
+ from molSimplify.Scripts.addtodb import addtoldb
15
+ from molSimplify.Scripts.io import loadcdxml
16
+ from molSimplify.Scripts.cellbuilder import (slab_module_supervisor)
17
+ from molSimplify.Scripts.chains import (chain_builder_supervisor)
18
+ from molSimplify.Scripts.dbinteract import (dbsearch)
19
+ from molSimplify.Scripts.findcorrelations import (analysis_supervisor)
20
+ from molSimplify.Scripts.inparse import (checkinput,
21
+ cleaninput,
22
+ parseall,
23
+ parseinputfile)
24
+ from molSimplify.Scripts.postproc import (postproc)
25
+ from molSimplify.Scripts.rungen import (constrgen,
26
+ multigenruns,
27
+ draw_supervisor)
28
+
29
+
30
+ def startgen_pythonic(input_dict={'-core': 'fe', '-lig': 'cl,cl,cl,cl,cl,cl'},
31
+ argv=['main.py', '-i', 'asdfasdfasdfasdf'],
32
+ flag=True,
33
+ gui=False,
34
+ write=False):
35
+ """This is the main way to generate structures completely within Python.
36
+
37
+ Parameters
38
+ ----------
39
+ input_dict : dict
40
+ Argument list in the form of a dictionary.
41
+ argv : list
42
+ Default argument list used to "fool" startgen into accepting input_dict.
43
+ flag : bool, optional
44
+ Flag for printing information. Default is True.
45
+ gui : bool, optional
46
+ Flag for GUI. Default is False.
47
+ write : bool, optional
48
+ Flag to generate outputfile from python
49
+
50
+ Returns
51
+ -------
52
+ strfiles : str
53
+ Folder containing the runs.
54
+ emsg : bool
55
+ Flag for error. If error, returns a string with error.
56
+ this_diag : rundiag
57
+ Rundiag class instance that contains ANN attributes (this_diag.ANN_attributes)
58
+ and a mol3D class instance (this_diag.mol).
59
+
60
+ """
61
+ # from molSimplify.Scripts.generator import startgen_pythonic
62
+ inputfile_str = '\n'.join([k + ' ' + v for k, v in list(input_dict.items())])
63
+ if write:
64
+ startgen(argv, flag, gui, inputfile_str, write_files=write)
65
+ else:
66
+ strfiles, emsg, this_diag = startgen(argv, flag, gui, inputfile_str, write_files=write)
67
+ return (strfiles, emsg, this_diag)
68
+
69
+
70
+ # Coordinates subroutines
71
+ # @param argv Argument list
72
+ # @param flag Flag for printing information
73
+ # @param gui Flag for GUI
74
+ # @return Error messages
75
+ def startgen(argv, flag, gui, inputfile_str=None, write_files=True):
76
+ """Coordinates subroutines.
77
+
78
+ Parameters
79
+ ----------
80
+ argv : list
81
+ Argument list.
82
+ flag : bool
83
+ Flag for printing information.
84
+ gui : bool
85
+ Flag for GUI.
86
+ inputfile_str : str, optional
87
+ Optional input passed in as a string. Default is None.
88
+ write_files : bool, optional
89
+ Flag for whether or not files should be written. Should set to false for pythonic generation.
90
+
91
+ Returns
92
+ -------
93
+ emsg : bool
94
+ Flag for error. If error, returns a string with error.
95
+
96
+ """
97
+ emsg = False
98
+ # check for configuration file
99
+ # homedir = os.path.expanduser("~")
100
+ # configfile = False if not glob.glob(homedir+'/.molSimplify') else True
101
+ # if not configfile:
102
+ # print("It looks like the configuration file '~/.molSimplify' does not exist!"
103
+ # "Please follow the next steps to configure the file.")
104
+ # instdir = raw_input("Please select the full path of the top installation directory for the program: ")
105
+ # cdbdir = raw_input("Please specify the full path of the directory containing chemical databases:")
106
+ # mwfn = raw_input("Specify the full path to the Multiwfn executable (for post-processing):")
107
+ # with open(homedir+'/.molSimplify','w') as f:
108
+ # if len(instdir) > 1:
109
+ # f.write("INSTALLDIR="+instdir+'\n')
110
+ # if len(cdbdir) > 1:
111
+ # f.write("CHEMDBDIR="+cdbdir+'\n')
112
+ # if len(mwfn) > 1 :
113
+ # f.write("MULTIWFN="+mwfn[0]+'\n')
114
+ # ## end set-up configuration file ###
115
+ # ########### GLOBALS DEFINITION ############
116
+ globs = globalvars()
117
+ # installdir = globs.installdir
118
+ rundir = globs.rundir
119
+ PROGRAM = globs.PROGRAM
120
+ # ##### END GLOBALS DEFINITION ##############
121
+ # correct installdir
122
+ # if installdir[-1]!='/':
123
+ # installdir+='/'
124
+ # print welcome message
125
+ ss = "\n************************************************************"
126
+ ss += "\n******** Welcome to "+PROGRAM+"! Let's get started. ********\n"
127
+ ss += "************************************************************\n\n"
128
+ if not flag:
129
+ print(ss)
130
+ sys.argv = argv
131
+ parser = argparse.ArgumentParser()
132
+ args = parseall(parser)
133
+ # check if input file exists
134
+ if not glob.glob(args.i) and not inputfile_str:
135
+ emsg = 'Input file '+args.i+' does not exist. Please specify a valid input file.\n'
136
+ print(emsg)
137
+ return emsg
138
+ args.gui = gui # add gui flag
139
+ # parse input file
140
+ if args.i or inputfile_str:
141
+ parseinputfile(args, inputfile_str=inputfile_str)
142
+ if args.cdxml:
143
+ print('converting cdxml file into xyz')
144
+ cdxml = args.cdxml[0]
145
+ fname, msg = loadcdxml(cdxml)
146
+ print(msg)
147
+ if 'two' in msg:
148
+ core = fname + '_cat.xyz'
149
+ sub = fname + '_sub.xyz'
150
+ args.core = [core]
151
+ args.substrate = [sub]
152
+ args.tsgen = True
153
+ if args.custom_data_dir is not None:
154
+ globs.custom_path = args.custom_data_dir
155
+
156
+ # if not args.postp and not args.dbsearch and not args.dbfinger and not args.drawmode
157
+ # and not (args.slab_gen or args.place_on_slab) and not (args.chain) and not (args.correlate):
158
+ # check input arguments
159
+ if (not args.postp and not args.dbsearch and not args.dbfinger
160
+ and not (args.slab_gen or args.place_on_slab)
161
+ and not (args.chain) and not (args.correlate)):
162
+
163
+ # check input arguments
164
+ print('Checking input...')
165
+ if args.tsgen:
166
+ emsg = checkinput(args, calctype="tsgen")
167
+ elif args.ligadd:
168
+ emsg = checkinput(args, calctype="dbadd")
169
+ else:
170
+ emsg = checkinput(args)
171
+ # check before cleaning input arguments and clean only if checked
172
+ cleaninput(args)
173
+ args.gui = False # deepcopy will give error
174
+ if emsg:
175
+ del args
176
+ return emsg
177
+ # check for jobs directory
178
+ rundir = args.rundir+'/' if (args.rundir) else rundir
179
+ if not os.path.isdir(rundir):
180
+ if write_files:
181
+ os.mkdir(rundir)
182
+ # ################## START MAIN ####################
183
+ args0 = copy.deepcopy(args) # save initial arguments
184
+ # add gui flag
185
+ args.gui = gui
186
+ # postprocessing run?
187
+
188
+ if (args.postp):
189
+ postproc(rundir, args, globs)
190
+ # database search?
191
+ elif (args.dbsearch or args.dbfinger):
192
+ emsg = dbsearch(rundir, args, globs)
193
+ if emsg:
194
+ del args
195
+ return emsg
196
+ else:
197
+ print('Successful database search!\n')
198
+ # random generation?
199
+ elif (args.rgen): # check if random generation was requested
200
+ if args.charge:
201
+ args.charge = args.charge[0]
202
+ if args.spin:
203
+ args.spin = args.spin[0]
204
+ corests = args.core
205
+ for cc in corests:
206
+ args = copy.deepcopy(args0)
207
+ # add gui flag
208
+ args.gui = gui
209
+ args.core = cc
210
+ if (args.lig or args.coord or args.lignum or args.ligocc): # constraints given?
211
+ args, emsg = constrgen(rundir, args)
212
+ if emsg:
213
+ del args
214
+ return emsg
215
+ else:
216
+ emsg = 'For random generation specify at least a ligand, coordination or ligand types.\n'
217
+ print(emsg)
218
+ del args
219
+ return emsg
220
+ elif args.drawmode:
221
+ draw_supervisor(args, rundir)
222
+ # slab/place on slab?
223
+ elif (args.slab_gen or args.place_on_slab):
224
+ emsg = slab_module_supervisor(args, rundir)
225
+ # chain builder
226
+ elif (args.chain):
227
+ print('chain on')
228
+ emsg = chain_builder_supervisor(args, rundir)
229
+ # correlation analysis
230
+ elif (args.correlate):
231
+
232
+ print('analysis is looking for correlations')
233
+ analysis_supervisor(args, rundir)
234
+ # add ligand to list
235
+ elif (args.ligadd):
236
+ print(('adding ' + str(args.ligadd) + ' to ligand database with name ' +
237
+ args.ligname + ' and connection atom(s) ' + str(args.ligcon)))
238
+ addtoldb(smimol=args.ligadd, sminame=args.ligname, smident=len(args.ligcon),
239
+ smicat=str(args.ligcon).strip('[]'), smigrps="custom", smictg="custom", ffopt=args.ligffopt)
240
+
241
+ # normal structure generation or transition state building
242
+ else:
243
+ args = copy.deepcopy(args0)
244
+ # add gui flag
245
+ args.gui = gui
246
+ corests = args.core
247
+ # if args.tsgen: # goes through multigenruns for maximum interoperability
248
+ # print('building a transition state')
249
+ if args.tsgen: # goes through multigenruns for maximum interoperability
250
+ print('building a transition state')
251
+ else:
252
+ print('building an equilibrium complex')
253
+ for cc in corests:
254
+ args.core = cc
255
+ emsg = multigenruns(rundir, args, write_files=write_files)
256
+ if emsg:
257
+ print(emsg)
258
+ del args
259
+ return emsg
260
+ ss = "\n**************************************************************"
261
+ ss += "\n***** Thank you for using "+PROGRAM+". Have a nice day! ******\n"
262
+ ss += "**************************************************************"
263
+ ss += globs.about
264
+ if not flag:
265
+ print(ss)
266
+ del args
267
+ return emsg