molSimplify 1.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (651) hide show
  1. docs/source/conf.py +224 -0
  2. molSimplify/Classes/__init__.py +6 -0
  3. molSimplify/Classes/atom3D.py +235 -0
  4. molSimplify/Classes/dft_obs.py +130 -0
  5. molSimplify/Classes/globalvars.py +827 -0
  6. molSimplify/Classes/helpers.py +161 -0
  7. molSimplify/Classes/ligand.py +2330 -0
  8. molSimplify/Classes/mGUI.py +2493 -0
  9. molSimplify/Classes/mWidgets.py +438 -0
  10. molSimplify/Classes/miniGUI.py +41 -0
  11. molSimplify/Classes/mol2D.py +260 -0
  12. molSimplify/Classes/mol3D.py +5846 -0
  13. molSimplify/Classes/monomer3D.py +253 -0
  14. molSimplify/Classes/partialcharges.py +226 -0
  15. molSimplify/Classes/protein3D.py +1178 -0
  16. molSimplify/Classes/rundiag.py +151 -0
  17. molSimplify/Data/ML.dat +212 -0
  18. molSimplify/Data/MLS_FSR_for_inter.dat +23 -0
  19. molSimplify/Data/MLS_FSR_for_inter2.dat +23 -0
  20. molSimplify/Data/MLS_angle_for_click.dat +8 -0
  21. molSimplify/Data/MLS_angle_for_inter.dat +23 -0
  22. molSimplify/Data/MLS_angle_for_inter2.dat +48 -0
  23. molSimplify/Data/MLS_angle_for_intra.dat +10 -0
  24. molSimplify/Data/MLS_angle_for_intra2.dat +6 -0
  25. molSimplify/Data/MLS_angle_for_oa.dat +18 -0
  26. molSimplify/Data/ML_FSR_for_inter.dat +112 -0
  27. molSimplify/Data/ML_FSR_for_inter2.dat +110 -0
  28. molSimplify/Data/ML_bond_for_cat.dat +8 -0
  29. molSimplify/Data/ML_bond_for_click.dat +8 -0
  30. molSimplify/Data/ML_bond_for_inter.dat +48 -0
  31. molSimplify/Data/ML_bond_for_inter2.dat +48 -0
  32. molSimplify/Data/ML_bond_for_intra.dat +10 -0
  33. molSimplify/Data/ML_bond_for_intra2.dat +6 -0
  34. molSimplify/Data/ML_bond_for_oa.dat +18 -0
  35. molSimplify/Data/bp1.dat +21 -0
  36. molSimplify/Data/li.dat +3 -0
  37. molSimplify/Data/no.dat +2 -0
  38. molSimplify/Data/oct.dat +7 -0
  39. molSimplify/Data/pbp.dat +8 -0
  40. molSimplify/Data/spy.dat +6 -0
  41. molSimplify/Data/sqap.dat +9 -0
  42. molSimplify/Data/sqp.dat +5 -0
  43. molSimplify/Data/tbp.dat +6 -0
  44. molSimplify/Data/tdhd.dat +9 -0
  45. molSimplify/Data/thd.dat +5 -0
  46. molSimplify/Data/tpl.dat +4 -0
  47. molSimplify/Data/tpr.dat +7 -0
  48. molSimplify/Informatics/HFXsensitivity/__init__.py +0 -0
  49. molSimplify/Informatics/HFXsensitivity/measure_HFX_sensitivity_oxo_hat_reb_rel.py +443 -0
  50. molSimplify/Informatics/HFXsensitivity/measure_HFX_stable.py +346 -0
  51. molSimplify/Informatics/MOF/Linker_rotation.py +179 -0
  52. molSimplify/Informatics/MOF/MOF_descriptors.py +1299 -0
  53. molSimplify/Informatics/MOF/MOF_descriptors_alternate_functional.py +589 -0
  54. molSimplify/Informatics/MOF/MOF_functionalizer.py +1648 -0
  55. molSimplify/Informatics/MOF/PBC_functions.py +1347 -0
  56. molSimplify/Informatics/MOF/__init__.py +0 -0
  57. molSimplify/Informatics/MOF/atomic.py +267 -0
  58. molSimplify/Informatics/MOF/cluster_extraction.py +388 -0
  59. molSimplify/Informatics/MOF/fragment_MOFs_for_pormake.py +895 -0
  60. molSimplify/Informatics/MOF/monofunctionalized_BDC/index_information.py +10 -0
  61. molSimplify/Informatics/Mol2Parser.py +46 -0
  62. molSimplify/Informatics/RACassemble.py +408 -0
  63. molSimplify/Informatics/__init__.py +0 -0
  64. molSimplify/Informatics/active_learning/__init__.py +0 -0
  65. molSimplify/Informatics/active_learning/expected_improvement.py +269 -0
  66. molSimplify/Informatics/autocorrelation.py +1930 -0
  67. molSimplify/Informatics/clean_autocorrelation.py +778 -0
  68. molSimplify/Informatics/coulomb_analyze.py +67 -0
  69. molSimplify/Informatics/decoration_manager.py +193 -0
  70. molSimplify/Informatics/geo_analyze.py +88 -0
  71. molSimplify/Informatics/geometrics.py +56 -0
  72. molSimplify/Informatics/graph_analyze.py +163 -0
  73. molSimplify/Informatics/graph_racs.py +288 -0
  74. molSimplify/Informatics/jupyter_vis.py +172 -0
  75. molSimplify/Informatics/lacRACAssemble.py +2192 -0
  76. molSimplify/Informatics/lacRACAssemble_bisdithiolenes.py +236 -0
  77. molSimplify/Informatics/misc_descriptors.py +198 -0
  78. molSimplify/Informatics/organic_fingerprints.py +61 -0
  79. molSimplify/Informatics/partialcharges.py +345 -0
  80. molSimplify/Informatics/protein/activesite.py +53 -0
  81. molSimplify/Informatics/protein/pymol_add_hs.py +33 -0
  82. molSimplify/Informatics/rac155_geo.py +48 -0
  83. molSimplify/Ligands/(1_methylbenzimidazol_2_yl)pyridine.xyz +45 -0
  84. molSimplify/Ligands/1-4-dimethyl-1-2-3-triazole.xyz +15 -0
  85. molSimplify/Ligands/12crown4.mol +62 -0
  86. molSimplify/Ligands/Antipyrine.mol +58 -0
  87. molSimplify/Ligands/BPAbipy.mol +106 -0
  88. molSimplify/Ligands/Hpyrrole.mol +26 -0
  89. molSimplify/Ligands/N-quinolinylbutyramidate.xyz +31 -0
  90. molSimplify/Ligands/N-quinolinylmethylmethinylacetamidate.xyz +30 -0
  91. molSimplify/Ligands/NMe2_-1.xyz +11 -0
  92. molSimplify/Ligands/PCy3.mol +111 -0
  93. molSimplify/Ligands/PMe3.xyz +15 -0
  94. molSimplify/Ligands/PPh3.mol +76 -0
  95. molSimplify/Ligands/Propyphenazone.mol +77 -0
  96. molSimplify/Ligands/acac.mol +33 -0
  97. molSimplify/Ligands/acacen.mol +76 -0
  98. molSimplify/Ligands/acetate.smi +1 -0
  99. molSimplify/Ligands/acetate.xyz +9 -0
  100. molSimplify/Ligands/aceticacidbipyridine.mol +70 -0
  101. molSimplify/Ligands/acetonitrile.mol +17 -0
  102. molSimplify/Ligands/alanine.mol +30 -0
  103. molSimplify/Ligands/alphabetizer.py +21 -0
  104. molSimplify/Ligands/amine.mol +11 -0
  105. molSimplify/Ligands/ammonia.mol +12 -0
  106. molSimplify/Ligands/arginine.mol +58 -0
  107. molSimplify/Ligands/asparagine.mol +38 -0
  108. molSimplify/Ligands/aspartic_acid.mol +35 -0
  109. molSimplify/Ligands/azide.mol +11 -0
  110. molSimplify/Ligands/benzene.mol +28 -0
  111. molSimplify/Ligands/benzene_pi.mol +30 -0
  112. molSimplify/Ligands/benzenedithiol.mol +30 -0
  113. molSimplify/Ligands/benzenethiol.mol +30 -0
  114. molSimplify/Ligands/benzylisocy.mol +38 -0
  115. molSimplify/Ligands/bidiazine.mol +42 -0
  116. molSimplify/Ligands/bidiazole.mol +38 -0
  117. molSimplify/Ligands/bifuran.mol +38 -0
  118. molSimplify/Ligands/bihydrodiazine.mol +58 -0
  119. molSimplify/Ligands/bihydrodiazole.mol +46 -0
  120. molSimplify/Ligands/bihydrooxazine.mol +54 -0
  121. molSimplify/Ligands/bihydrooxazole.mol +42 -0
  122. molSimplify/Ligands/bihydrothiazine.mol +54 -0
  123. molSimplify/Ligands/bihydrothiazole.mol +42 -0
  124. molSimplify/Ligands/biimidazole.mol +38 -0
  125. molSimplify/Ligands/bioxazole.mol +34 -0
  126. molSimplify/Ligands/bipy.mol +46 -0
  127. molSimplify/Ligands/bipyrazine.xyz +20 -0
  128. molSimplify/Ligands/bipyrimidine.mol +42 -0
  129. molSimplify/Ligands/bipyrrole.mol +42 -0
  130. molSimplify/Ligands/bisnapthyridylpyridine.mol +111 -0
  131. molSimplify/Ligands/bithiazole.mol +34 -0
  132. molSimplify/Ligands/bromide.mol +7 -0
  133. molSimplify/Ligands/bromide.smi +1 -0
  134. molSimplify/Ligands/c2.mol +9 -0
  135. molSimplify/Ligands/caprolactone.mol +41 -0
  136. molSimplify/Ligands/carbonyl.mol +8 -0
  137. molSimplify/Ligands/carboxyl.mol +13 -0
  138. molSimplify/Ligands/cat.mol +30 -0
  139. molSimplify/Ligands/chloride.mol +7 -0
  140. molSimplify/Ligands/chloride.smi +1 -0
  141. molSimplify/Ligands/chloropyridine.mol +27 -0
  142. molSimplify/Ligands/co2.mol +10 -0
  143. molSimplify/Ligands/corrolazine.mol +72 -0
  144. molSimplify/Ligands/cs.mol +8 -0
  145. molSimplify/Ligands/cyanate.xyz +5 -0
  146. molSimplify/Ligands/cyanide.mol +9 -0
  147. molSimplify/Ligands/cyanoaceticporphyrin.mol +114 -0
  148. molSimplify/Ligands/cyanopyridine.mol +29 -0
  149. molSimplify/Ligands/cyclam.mol +81 -0
  150. molSimplify/Ligands/cyclen.mol +69 -0
  151. molSimplify/Ligands/cyclopentadienyl.mol +26 -0
  152. molSimplify/Ligands/cysteine.mol +32 -0
  153. molSimplify/Ligands/diaminomethyl.mol +19 -0
  154. molSimplify/Ligands/diazine.mol +25 -0
  155. molSimplify/Ligands/diazole.mol +23 -0
  156. molSimplify/Ligands/dicyanamide.mol +15 -0
  157. molSimplify/Ligands/dihydrofuran.mol +27 -0
  158. molSimplify/Ligands/dmap.xyz +35 -0
  159. molSimplify/Ligands/dmf.mol +28 -0
  160. molSimplify/Ligands/dmi.mol +41 -0
  161. molSimplify/Ligands/dmpe.mol +52 -0
  162. molSimplify/Ligands/dpmu.mol +47 -0
  163. molSimplify/Ligands/dppe.mol +112 -0
  164. molSimplify/Ligands/edta.mol +69 -0
  165. molSimplify/Ligands/en.mol +28 -0
  166. molSimplify/Ligands/ethanethiol.mol +21 -0
  167. molSimplify/Ligands/ethanolamine.mol +26 -0
  168. molSimplify/Ligands/ethbipy.mol +70 -0
  169. molSimplify/Ligands/ethyl.mol +19 -0
  170. molSimplify/Ligands/ethylamine.mol +24 -0
  171. molSimplify/Ligands/ethylene.mol +16 -0
  172. molSimplify/Ligands/ethylesteracac.mol +57 -0
  173. molSimplify/Ligands/fluoride.mol +7 -0
  174. molSimplify/Ligands/fluoride.smi +1 -0
  175. molSimplify/Ligands/formaldehyde.mol +12 -0
  176. molSimplify/Ligands/formamidate.xyz +8 -0
  177. molSimplify/Ligands/formate.xyz +6 -0
  178. molSimplify/Ligands/furan.mol +23 -0
  179. molSimplify/Ligands/glutamic_acid.mol +42 -0
  180. molSimplify/Ligands/glutamine.mol +44 -0
  181. molSimplify/Ligands/glycinate.mol +23 -0
  182. molSimplify/Ligands/glycine.mol +24 -0
  183. molSimplify/Ligands/h2s.mol +10 -0
  184. molSimplify/Ligands/helium.mol +6 -0
  185. molSimplify/Ligands/histidine.mol +45 -0
  186. molSimplify/Ligands/hmpa.mol +62 -0
  187. molSimplify/Ligands/hs-.mol +9 -0
  188. molSimplify/Ligands/hydride.mol +7 -0
  189. molSimplify/Ligands/hydrocarboxyacetylide.xyz +8 -0
  190. molSimplify/Ligands/hydrocyanide.mol +10 -0
  191. molSimplify/Ligands/hydrodiazine.mol +33 -0
  192. molSimplify/Ligands/hydrodiazole.mol +27 -0
  193. molSimplify/Ligands/hydrogensulfide.mol +10 -0
  194. molSimplify/Ligands/hydroisocyanide.mol +11 -0
  195. molSimplify/Ligands/hydrooxazine.mol +31 -0
  196. molSimplify/Ligands/hydrooxazole.mol +25 -0
  197. molSimplify/Ligands/hydrothiazine.mol +31 -0
  198. molSimplify/Ligands/hydrothiazole.mol +25 -0
  199. molSimplify/Ligands/hydroxyl.mol +9 -0
  200. molSimplify/Ligands/imidazole.mol +23 -0
  201. molSimplify/Ligands/imidazolidinone.mol +29 -0
  202. molSimplify/Ligands/imine.mol +13 -0
  203. molSimplify/Ligands/iminodiacetic.mol +33 -0
  204. molSimplify/Ligands/iodide.mol +7 -0
  205. molSimplify/Ligands/iodobenzene.xyz +14 -0
  206. molSimplify/Ligands/isoleucine.mol +48 -0
  207. molSimplify/Ligands/isothiocyanate.mol +11 -0
  208. molSimplify/Ligands/leucine.mol +48 -0
  209. molSimplify/Ligands/ligands.dict +257 -0
  210. molSimplify/Ligands/lysine.mol +54 -0
  211. molSimplify/Ligands/mebenzenedithiol.mol +36 -0
  212. molSimplify/Ligands/mebim_py.xyz +29 -0
  213. molSimplify/Ligands/mebim_pz.xyz +28 -0
  214. molSimplify/Ligands/mebipy.mol +58 -0
  215. molSimplify/Ligands/mecat.mol +36 -0
  216. molSimplify/Ligands/methanal.mol +11 -0
  217. molSimplify/Ligands/methanethiol.mol +15 -0
  218. molSimplify/Ligands/methanol.mol +16 -0
  219. molSimplify/Ligands/methionine.mol +44 -0
  220. molSimplify/Ligands/methyl.mol +13 -0
  221. molSimplify/Ligands/methylacetylide.xyz +8 -0
  222. molSimplify/Ligands/methylamine.mol +19 -0
  223. molSimplify/Ligands/methylazide.xyz +9 -0
  224. molSimplify/Ligands/methylisocy.mol +17 -0
  225. molSimplify/Ligands/methylpyridine.mol +33 -0
  226. molSimplify/Ligands/n2.mol +8 -0
  227. molSimplify/Ligands/n4py.xyz +51 -0
  228. molSimplify/Ligands/nch.mol +10 -0
  229. molSimplify/Ligands/nco-.mol +11 -0
  230. molSimplify/Ligands/nethanolamine.mol +26 -0
  231. molSimplify/Ligands/nitrate.mol +14 -0
  232. molSimplify/Ligands/nitrite.mol +11 -0
  233. molSimplify/Ligands/nitro.mol +11 -0
  234. molSimplify/Ligands/nitrobipy.mol +54 -0
  235. molSimplify/Ligands/nitroso.mol +8 -0
  236. molSimplify/Ligands/nme3.mol +30 -0
  237. molSimplify/Ligands/no-.mol +10 -0
  238. molSimplify/Ligands/no2-.mol +11 -0
  239. molSimplify/Ligands/noxygen.mol +8 -0
  240. molSimplify/Ligands/ns-.mol +10 -0
  241. molSimplify/Ligands/o-pyridylbenzene.xyz +23 -0
  242. molSimplify/Ligands/o-pyridylphenylanion.xyz +22 -0
  243. molSimplify/Ligands/o2-.mol +9 -0
  244. molSimplify/Ligands/o2.xyz +4 -0
  245. molSimplify/Ligands/och2.mol +12 -0
  246. molSimplify/Ligands/oethanolamine.mol +26 -0
  247. molSimplify/Ligands/ome2.mol +22 -0
  248. molSimplify/Ligands/ooh.xyz +5 -0
  249. molSimplify/Ligands/oxalate.mol +17 -0
  250. molSimplify/Ligands/oxalate.smi +1 -0
  251. molSimplify/Ligands/oxygen.mol +7 -0
  252. molSimplify/Ligands/pentacyanocyclopentadienide.mol +36 -0
  253. molSimplify/Ligands/ph2-.mol +11 -0
  254. molSimplify/Ligands/ph3.mol +12 -0
  255. molSimplify/Ligands/phen.mol +51 -0
  256. molSimplify/Ligands/phenacac.mol +63 -0
  257. molSimplify/Ligands/phenalalanine.mol +51 -0
  258. molSimplify/Ligands/phendione.mol +51 -0
  259. molSimplify/Ligands/phenphen.mol +75 -0
  260. molSimplify/Ligands/phenylbenzoxazole.mol +54 -0
  261. molSimplify/Ligands/phenylcyc.mol +99 -0
  262. molSimplify/Ligands/phenylenediamine.mol +37 -0
  263. molSimplify/Ligands/phenylisocy.mol +32 -0
  264. molSimplify/Ligands/phosacidbipy.mol +66 -0
  265. molSimplify/Ligands/phosphine.mol +13 -0
  266. molSimplify/Ligands/phosphorine.mol +27 -0
  267. molSimplify/Ligands/phosphorustrifluoride.mol +12 -0
  268. molSimplify/Ligands/phthalocyanine.mol +126 -0
  269. molSimplify/Ligands/pme3o.mol +32 -0
  270. molSimplify/Ligands/porphyrin.mol +82 -0
  271. molSimplify/Ligands/pph3o.mol +77 -0
  272. molSimplify/Ligands/proline.mol +39 -0
  273. molSimplify/Ligands/propdiol.mol +21 -0
  274. molSimplify/Ligands/propylene.mol +23 -0
  275. molSimplify/Ligands/pyridine.mol +27 -0
  276. molSimplify/Ligands/pyrimidone.mol +27 -0
  277. molSimplify/Ligands/pyrrole.mol +24 -0
  278. molSimplify/Ligands/quinoxalinedithiol.mol +39 -0
  279. molSimplify/Ligands/s2-.mol +9 -0
  280. molSimplify/Ligands/salen.mol +75 -0
  281. molSimplify/Ligands/salphen.mol +84 -0
  282. molSimplify/Ligands/serine.mol +32 -0
  283. molSimplify/Ligands/simple_ligands.dict +14 -0
  284. molSimplify/Ligands/sulfacidbipy.mol +63 -0
  285. molSimplify/Ligands/tbucat.mol +54 -0
  286. molSimplify/Ligands/tbuphisocy.mol +56 -0
  287. molSimplify/Ligands/tbutylcyclen.mol +166 -0
  288. molSimplify/Ligands/tbutylisocy.mol +35 -0
  289. molSimplify/Ligands/tbutylthiol.mol +33 -0
  290. molSimplify/Ligands/tcnoet.mol +43 -0
  291. molSimplify/Ligands/tcnoetOH.mol +45 -0
  292. molSimplify/Ligands/terpy.mol +65 -0
  293. molSimplify/Ligands/tetrahydrofuran.mol +31 -0
  294. molSimplify/Ligands/thiane.mol +37 -0
  295. molSimplify/Ligands/thiazole.mol +21 -0
  296. molSimplify/Ligands/thiocyanate.mol +11 -0
  297. molSimplify/Ligands/thiol.mol +9 -0
  298. molSimplify/Ligands/thiophene.mol +23 -0
  299. molSimplify/Ligands/thiopyridine.mol +29 -0
  300. molSimplify/Ligands/threonine.mol +38 -0
  301. molSimplify/Ligands/tpp.mol +165 -0
  302. molSimplify/Ligands/tricyanomethyl.mol +19 -0
  303. molSimplify/Ligands/trifluoromethyl.mol +13 -0
  304. molSimplify/Ligands/tryptophan.mol +60 -0
  305. molSimplify/Ligands/tyrosine.mol +53 -0
  306. molSimplify/Ligands/uthiol.mol +11 -0
  307. molSimplify/Ligands/uthiolme2.mol +23 -0
  308. molSimplify/Ligands/valine.mol +42 -0
  309. molSimplify/Ligands/water.mol +10 -0
  310. molSimplify/Ligands/x.mol +6 -0
  311. molSimplify/Scripts/__init__.py +0 -0
  312. molSimplify/Scripts/addtodb.py +308 -0
  313. molSimplify/Scripts/cellbuilder.py +1592 -0
  314. molSimplify/Scripts/cellbuilder_tools.py +701 -0
  315. molSimplify/Scripts/chains.py +342 -0
  316. molSimplify/Scripts/convert_2to3.py +23 -0
  317. molSimplify/Scripts/dbinteract.py +631 -0
  318. molSimplify/Scripts/distgeom.py +617 -0
  319. molSimplify/Scripts/findcorrelations.py +287 -0
  320. molSimplify/Scripts/generator.py +267 -0
  321. molSimplify/Scripts/geometry.py +1224 -0
  322. molSimplify/Scripts/grabguivars.py +845 -0
  323. molSimplify/Scripts/in_b3lyp_usetc.py +141 -0
  324. molSimplify/Scripts/inparse.py +1673 -0
  325. molSimplify/Scripts/io.py +1149 -0
  326. molSimplify/Scripts/isomers.py +415 -0
  327. molSimplify/Scripts/jobgen.py +247 -0
  328. molSimplify/Scripts/krr_prep.py +1262 -0
  329. molSimplify/Scripts/molSimplify_io.py +18 -0
  330. molSimplify/Scripts/molden2psi4wfn.py +166 -0
  331. molSimplify/Scripts/namegen.py +32 -0
  332. molSimplify/Scripts/nn_prep.py +561 -0
  333. molSimplify/Scripts/oct_check_mols.py +782 -0
  334. molSimplify/Scripts/periodic_QE.py +97 -0
  335. molSimplify/Scripts/postmold.py +304 -0
  336. molSimplify/Scripts/postmwfn.py +709 -0
  337. molSimplify/Scripts/postparse.py +488 -0
  338. molSimplify/Scripts/postproc.py +139 -0
  339. molSimplify/Scripts/qcgen.py +1450 -0
  340. molSimplify/Scripts/rmsd.py +489 -0
  341. molSimplify/Scripts/rungen.py +670 -0
  342. molSimplify/Scripts/structgen.py +3040 -0
  343. molSimplify/Scripts/tf_nn_prep.py +894 -0
  344. molSimplify/Scripts/tsgen.py +295 -0
  345. molSimplify/Scripts/uq_calibration.py +69 -0
  346. molSimplify/__init__.py +0 -0
  347. molSimplify/__main__.py +197 -0
  348. molSimplify/icons/chemdb.png +0 -0
  349. molSimplify/icons/hjklogo.png +0 -0
  350. molSimplify/icons/icon.png +0 -0
  351. molSimplify/icons/logo.png +0 -0
  352. molSimplify/icons/logo_old.png +0 -0
  353. molSimplify/icons/petachem.png +0 -0
  354. molSimplify/icons/petachem2.png +0 -0
  355. molSimplify/icons/petachem_full.png +0 -0
  356. molSimplify/icons/pythonlogo.png +0 -0
  357. molSimplify/icons/sge copy.png +0 -0
  358. molSimplify/icons/sge.png +0 -0
  359. molSimplify/icons/slurm.png +0 -0
  360. molSimplify/icons/wft1.png +0 -0
  361. molSimplify/icons/wft2.png +0 -0
  362. molSimplify/icons/wft3.png +0 -0
  363. molSimplify/ml/__init__.py +0 -0
  364. molSimplify/ml/kernels.py +36 -0
  365. molSimplify/ml/layers.py +29 -0
  366. molSimplify/molscontrol/__init__.py +14 -0
  367. molSimplify/molscontrol/_version.py +521 -0
  368. molSimplify/molscontrol/clf_tools.py +144 -0
  369. molSimplify/molscontrol/data/README.md +21 -0
  370. molSimplify/molscontrol/data/look_and_say.dat +15 -0
  371. molSimplify/molscontrol/dynamic_classifier.py +514 -0
  372. molSimplify/molscontrol/io_tools.py +363 -0
  373. molSimplify/molscontrol/molscontrol.py +49 -0
  374. molSimplify/molscontrol/terachem/jobscript_control.sh +31 -0
  375. molSimplify/molscontrol/terachem/terachem_input +22 -0
  376. molSimplify/python_krr/X_train_TS.csv +535 -0
  377. molSimplify/python_krr/__init__.py +0 -0
  378. molSimplify/python_krr/hat2_X_mean_std.csv +3 -0
  379. molSimplify/python_krr/hat2_feature_names.csv +1 -0
  380. molSimplify/python_krr/hat2_y_mean_std.csv +2 -0
  381. molSimplify/python_krr/hat_X_mean_std.csv +6 -0
  382. molSimplify/python_krr/hat_feature_names.csv +1 -0
  383. molSimplify/python_krr/hat_krr_X_train.csv +5205 -0
  384. molSimplify/python_krr/hat_krr_dual_coef.csv +1 -0
  385. molSimplify/python_krr/hat_y_mean_std.csv +2 -0
  386. molSimplify/python_krr/sklearn_models.py +34 -0
  387. molSimplify/python_krr/y_train_TS.csv +535 -0
  388. molSimplify/python_nn/ANN.py +198 -0
  389. molSimplify/python_nn/__init__.py +0 -0
  390. molSimplify/python_nn/clf_analysis_tool.py +125 -0
  391. molSimplify/python_nn/dictionary_toolbox.py +49 -0
  392. molSimplify/python_nn/ensemble_test.py +309 -0
  393. molSimplify/python_nn/hs_center.csv +26 -0
  394. molSimplify/python_nn/hs_scale.csv +26 -0
  395. molSimplify/python_nn/ls_center.csv +26 -0
  396. molSimplify/python_nn/ls_scale.csv +26 -0
  397. molSimplify/python_nn/ms_hs_b1.csv +50 -0
  398. molSimplify/python_nn/ms_hs_b2.csv +50 -0
  399. molSimplify/python_nn/ms_hs_b3.csv +1 -0
  400. molSimplify/python_nn/ms_hs_w1.csv +50 -0
  401. molSimplify/python_nn/ms_hs_w2.csv +50 -0
  402. molSimplify/python_nn/ms_hs_w3.csv +1 -0
  403. molSimplify/python_nn/ms_ls_b1.csv +50 -0
  404. molSimplify/python_nn/ms_ls_b2.csv +50 -0
  405. molSimplify/python_nn/ms_ls_b3.csv +1 -0
  406. molSimplify/python_nn/ms_ls_w1.csv +50 -0
  407. molSimplify/python_nn/ms_ls_w2.csv +50 -0
  408. molSimplify/python_nn/ms_ls_w3.csv +1 -0
  409. molSimplify/python_nn/ms_slope_b1.csv +50 -0
  410. molSimplify/python_nn/ms_slope_b2.csv +50 -0
  411. molSimplify/python_nn/ms_slope_b3.csv +1 -0
  412. molSimplify/python_nn/ms_slope_w1.csv +50 -0
  413. molSimplify/python_nn/ms_slope_w2.csv +50 -0
  414. molSimplify/python_nn/ms_slope_w3.csv +1 -0
  415. molSimplify/python_nn/ms_split_b1.csv +50 -0
  416. molSimplify/python_nn/ms_split_b2.csv +50 -0
  417. molSimplify/python_nn/ms_split_b3.csv +1 -0
  418. molSimplify/python_nn/ms_split_w1.csv +50 -0
  419. molSimplify/python_nn/ms_split_w2.csv +50 -0
  420. molSimplify/python_nn/ms_split_w3.csv +1 -0
  421. molSimplify/python_nn/slope_center.csv +25 -0
  422. molSimplify/python_nn/slope_scale.csv +25 -0
  423. molSimplify/python_nn/split_center.csv +26 -0
  424. molSimplify/python_nn/split_scale.csv +26 -0
  425. molSimplify/python_nn/tf_ANN.py +762 -0
  426. molSimplify/python_nn/train_data.csv +1211 -0
  427. molSimplify/tf_nn/__init__.py +0 -0
  428. molSimplify/tf_nn/geo_static_clf/geo_static_clf_model.h5 +0 -0
  429. molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_name.csv +1591 -0
  430. molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_x.csv +2790 -0
  431. molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_y.csv +2790 -0
  432. molSimplify/tf_nn/geo_static_clf/geo_static_clf_vars.csv +154 -0
  433. molSimplify/tf_nn/geos/hs_ii_bl_x.csv +1577 -0
  434. molSimplify/tf_nn/geos/hs_ii_bl_y.csv +1577 -0
  435. molSimplify/tf_nn/geos/hs_ii_model.h5 +0 -0
  436. molSimplify/tf_nn/geos/hs_ii_model.json +1 -0
  437. molSimplify/tf_nn/geos/hs_ii_vars.csv +154 -0
  438. molSimplify/tf_nn/geos/hs_iii_bl_x.csv +1659 -0
  439. molSimplify/tf_nn/geos/hs_iii_bl_y.csv +1659 -0
  440. molSimplify/tf_nn/geos/hs_iii_model.h5 +0 -0
  441. molSimplify/tf_nn/geos/hs_iii_model.json +1 -0
  442. molSimplify/tf_nn/geos/hs_iii_vars.csv +154 -0
  443. molSimplify/tf_nn/geos/ls_ii_bl_x.csv +1374 -0
  444. molSimplify/tf_nn/geos/ls_ii_bl_y.csv +1374 -0
  445. molSimplify/tf_nn/geos/ls_ii_model.h5 +0 -0
  446. molSimplify/tf_nn/geos/ls_ii_model.json +1 -0
  447. molSimplify/tf_nn/geos/ls_ii_vars.csv +154 -0
  448. molSimplify/tf_nn/geos/ls_iii_bl_x.csv +1364 -0
  449. molSimplify/tf_nn/geos/ls_iii_bl_y.csv +1364 -0
  450. molSimplify/tf_nn/geos/ls_iii_model.h5 +0 -0
  451. molSimplify/tf_nn/geos/ls_iii_model.json +1 -0
  452. molSimplify/tf_nn/geos/ls_iii_vars.csv +154 -0
  453. molSimplify/tf_nn/homolumo/gap_model.h5 +0 -0
  454. molSimplify/tf_nn/homolumo/gap_model.json +1 -0
  455. molSimplify/tf_nn/homolumo/gap_test_names.csv +175 -0
  456. molSimplify/tf_nn/homolumo/gap_test_x.csv +176 -0
  457. molSimplify/tf_nn/homolumo/gap_test_y.csv +176 -0
  458. molSimplify/tf_nn/homolumo/gap_train_names.csv +699 -0
  459. molSimplify/tf_nn/homolumo/gap_train_x.csv +700 -0
  460. molSimplify/tf_nn/homolumo/gap_train_y.csv +700 -0
  461. molSimplify/tf_nn/homolumo/gap_vars.csv +153 -0
  462. molSimplify/tf_nn/homolumo/homo_model.h5 +0 -0
  463. molSimplify/tf_nn/homolumo/homo_model.json +126 -0
  464. molSimplify/tf_nn/homolumo/homo_test_names.csv +175 -0
  465. molSimplify/tf_nn/homolumo/homo_test_x.csv +176 -0
  466. molSimplify/tf_nn/homolumo/homo_test_y.csv +176 -0
  467. molSimplify/tf_nn/homolumo/homo_train_names.csv +699 -0
  468. molSimplify/tf_nn/homolumo/homo_train_x.csv +700 -0
  469. molSimplify/tf_nn/homolumo/homo_train_y.csv +700 -0
  470. molSimplify/tf_nn/homolumo/homo_vars.csv +153 -0
  471. molSimplify/tf_nn/oxoandhomo/homo_empty_info.json +7 -0
  472. molSimplify/tf_nn/oxoandhomo/homo_empty_model.h5 +0 -0
  473. molSimplify/tf_nn/oxoandhomo/homo_empty_model.json +1 -0
  474. molSimplify/tf_nn/oxoandhomo/homo_empty_test_names.csv +143 -0
  475. molSimplify/tf_nn/oxoandhomo/homo_empty_test_x.csv +144 -0
  476. molSimplify/tf_nn/oxoandhomo/homo_empty_test_y.csv +144 -0
  477. molSimplify/tf_nn/oxoandhomo/homo_empty_train_names.csv +513 -0
  478. molSimplify/tf_nn/oxoandhomo/homo_empty_train_x.csv +514 -0
  479. molSimplify/tf_nn/oxoandhomo/homo_empty_train_y.csv +514 -0
  480. molSimplify/tf_nn/oxoandhomo/homo_empty_val_names.csv +143 -0
  481. molSimplify/tf_nn/oxoandhomo/homo_empty_val_x.csv +58 -0
  482. molSimplify/tf_nn/oxoandhomo/homo_empty_val_y.csv +58 -0
  483. molSimplify/tf_nn/oxoandhomo/homo_empty_vars.csv +155 -0
  484. molSimplify/tf_nn/oxoandhomo/oxo20_info.json +7 -0
  485. molSimplify/tf_nn/oxoandhomo/oxo20_model.h5 +0 -0
  486. molSimplify/tf_nn/oxoandhomo/oxo20_model.json +1 -0
  487. molSimplify/tf_nn/oxoandhomo/oxo20_test_names.csv +143 -0
  488. molSimplify/tf_nn/oxoandhomo/oxo20_test_x.csv +144 -0
  489. molSimplify/tf_nn/oxoandhomo/oxo20_test_y.csv +144 -0
  490. molSimplify/tf_nn/oxoandhomo/oxo20_train_names.csv +513 -0
  491. molSimplify/tf_nn/oxoandhomo/oxo20_train_x.csv +514 -0
  492. molSimplify/tf_nn/oxoandhomo/oxo20_train_y.csv +514 -0
  493. molSimplify/tf_nn/oxoandhomo/oxo20_val_names.csv +143 -0
  494. molSimplify/tf_nn/oxoandhomo/oxo20_val_x.csv +58 -0
  495. molSimplify/tf_nn/oxoandhomo/oxo20_val_y.csv +58 -0
  496. molSimplify/tf_nn/oxoandhomo/oxo20_vars.csv +154 -0
  497. molSimplify/tf_nn/oxocatalysis/hat_model.h5 +0 -0
  498. molSimplify/tf_nn/oxocatalysis/hat_model.json +1 -0
  499. molSimplify/tf_nn/oxocatalysis/hat_test_names.csv +419 -0
  500. molSimplify/tf_nn/oxocatalysis/hat_test_x.csv +420 -0
  501. molSimplify/tf_nn/oxocatalysis/hat_test_y.csv +420 -0
  502. molSimplify/tf_nn/oxocatalysis/hat_train_names.csv +1507 -0
  503. molSimplify/tf_nn/oxocatalysis/hat_train_x.csv +1508 -0
  504. molSimplify/tf_nn/oxocatalysis/hat_train_y.csv +1508 -0
  505. molSimplify/tf_nn/oxocatalysis/hat_val_x.csv +169 -0
  506. molSimplify/tf_nn/oxocatalysis/hat_val_y.csv +169 -0
  507. molSimplify/tf_nn/oxocatalysis/hat_vars.csv +162 -0
  508. molSimplify/tf_nn/oxocatalysis/oxo_model.h5 +0 -0
  509. molSimplify/tf_nn/oxocatalysis/oxo_model.json +1 -0
  510. molSimplify/tf_nn/oxocatalysis/oxo_test_names.csv +527 -0
  511. molSimplify/tf_nn/oxocatalysis/oxo_test_x.csv +528 -0
  512. molSimplify/tf_nn/oxocatalysis/oxo_test_y.csv +528 -0
  513. molSimplify/tf_nn/oxocatalysis/oxo_train_names.csv +1897 -0
  514. molSimplify/tf_nn/oxocatalysis/oxo_train_x.csv +1898 -0
  515. molSimplify/tf_nn/oxocatalysis/oxo_train_y.csv +1898 -0
  516. molSimplify/tf_nn/oxocatalysis/oxo_val_x.csv +212 -0
  517. molSimplify/tf_nn/oxocatalysis/oxo_val_y.csv +212 -0
  518. molSimplify/tf_nn/oxocatalysis/oxo_vars.csv +162 -0
  519. molSimplify/tf_nn/rescaling_data/gap_mean_x.csv +153 -0
  520. molSimplify/tf_nn/rescaling_data/gap_mean_y.csv +1 -0
  521. molSimplify/tf_nn/rescaling_data/gap_var_x.csv +153 -0
  522. molSimplify/tf_nn/rescaling_data/gap_var_y.csv +1 -0
  523. molSimplify/tf_nn/rescaling_data/geo_static_clf_mean_x.csv +154 -0
  524. molSimplify/tf_nn/rescaling_data/geo_static_clf_mean_y.csv +1 -0
  525. molSimplify/tf_nn/rescaling_data/geo_static_clf_var_x.csv +154 -0
  526. molSimplify/tf_nn/rescaling_data/geo_static_clf_var_y.csv +1 -0
  527. molSimplify/tf_nn/rescaling_data/hat_mean_x.csv +162 -0
  528. molSimplify/tf_nn/rescaling_data/hat_mean_y.csv +1 -0
  529. molSimplify/tf_nn/rescaling_data/hat_var_x.csv +162 -0
  530. molSimplify/tf_nn/rescaling_data/hat_var_y.csv +1 -0
  531. molSimplify/tf_nn/rescaling_data/homo_empty_mean_x.csv +155 -0
  532. molSimplify/tf_nn/rescaling_data/homo_empty_mean_y.csv +1 -0
  533. molSimplify/tf_nn/rescaling_data/homo_empty_var_x.csv +155 -0
  534. molSimplify/tf_nn/rescaling_data/homo_empty_var_y.csv +1 -0
  535. molSimplify/tf_nn/rescaling_data/homo_mean_x.csv +153 -0
  536. molSimplify/tf_nn/rescaling_data/homo_mean_y.csv +1 -0
  537. molSimplify/tf_nn/rescaling_data/homo_var_x.csv +153 -0
  538. molSimplify/tf_nn/rescaling_data/homo_var_y.csv +1 -0
  539. molSimplify/tf_nn/rescaling_data/hs_ii_mean_x.csv +154 -0
  540. molSimplify/tf_nn/rescaling_data/hs_ii_mean_y.csv +3 -0
  541. molSimplify/tf_nn/rescaling_data/hs_ii_var_x.csv +154 -0
  542. molSimplify/tf_nn/rescaling_data/hs_ii_var_y.csv +3 -0
  543. molSimplify/tf_nn/rescaling_data/hs_iii_mean_x.csv +154 -0
  544. molSimplify/tf_nn/rescaling_data/hs_iii_mean_y.csv +3 -0
  545. molSimplify/tf_nn/rescaling_data/hs_iii_var_x.csv +154 -0
  546. molSimplify/tf_nn/rescaling_data/hs_iii_var_y.csv +3 -0
  547. molSimplify/tf_nn/rescaling_data/ls_ii_mean_x.csv +154 -0
  548. molSimplify/tf_nn/rescaling_data/ls_ii_mean_y.csv +3 -0
  549. molSimplify/tf_nn/rescaling_data/ls_ii_var_x.csv +154 -0
  550. molSimplify/tf_nn/rescaling_data/ls_ii_var_y.csv +3 -0
  551. molSimplify/tf_nn/rescaling_data/ls_iii_mean_x.csv +154 -0
  552. molSimplify/tf_nn/rescaling_data/ls_iii_mean_y.csv +3 -0
  553. molSimplify/tf_nn/rescaling_data/ls_iii_var_x.csv +154 -0
  554. molSimplify/tf_nn/rescaling_data/ls_iii_var_y.csv +3 -0
  555. molSimplify/tf_nn/rescaling_data/oxo20_mean_x.csv +154 -0
  556. molSimplify/tf_nn/rescaling_data/oxo20_mean_y.csv +1 -0
  557. molSimplify/tf_nn/rescaling_data/oxo20_var_x.csv +154 -0
  558. molSimplify/tf_nn/rescaling_data/oxo20_var_y.csv +1 -0
  559. molSimplify/tf_nn/rescaling_data/oxo_mean_x.csv +162 -0
  560. molSimplify/tf_nn/rescaling_data/oxo_mean_y.csv +1 -0
  561. molSimplify/tf_nn/rescaling_data/oxo_var_x.csv +162 -0
  562. molSimplify/tf_nn/rescaling_data/oxo_var_y.csv +1 -0
  563. molSimplify/tf_nn/rescaling_data/sc_static_clf_mean_x.csv +154 -0
  564. molSimplify/tf_nn/rescaling_data/sc_static_clf_mean_y.csv +1 -0
  565. molSimplify/tf_nn/rescaling_data/sc_static_clf_var_x.csv +154 -0
  566. molSimplify/tf_nn/rescaling_data/sc_static_clf_var_y.csv +1 -0
  567. molSimplify/tf_nn/rescaling_data/split_mean_x.csv +155 -0
  568. molSimplify/tf_nn/rescaling_data/split_mean_y.csv +1 -0
  569. molSimplify/tf_nn/rescaling_data/split_var_x.csv +155 -0
  570. molSimplify/tf_nn/rescaling_data/split_var_y.csv +1 -0
  571. molSimplify/tf_nn/sc_static_clf/sc_static_clf_model.h5 +0 -0
  572. molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_name.csv +1591 -0
  573. molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_x.csv +1592 -0
  574. molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_y.csv +1592 -0
  575. molSimplify/tf_nn/sc_static_clf/sc_static_clf_vars.csv +154 -0
  576. molSimplify/tf_nn/split/split_model.h5 +0 -0
  577. molSimplify/tf_nn/split/split_model.json +1 -0
  578. molSimplify/tf_nn/split/split_vars.csv +155 -0
  579. molSimplify/tf_nn/split/split_x.csv +1902 -0
  580. molSimplify/tf_nn/split/split_y.csv +1902 -0
  581. molSimplify/tf_nn/split/train_names.csv +1901 -0
  582. molSimplify/utils/__init__.py +0 -0
  583. molSimplify/utils/decorators.py +16 -0
  584. molSimplify/utils/metaclasses.py +12 -0
  585. molSimplify/utils/tensorflow.py +23 -0
  586. molSimplify/utils/timer.py +16 -0
  587. molSimplify-1.7.4.dist-info/LICENSE +674 -0
  588. molSimplify-1.7.4.dist-info/METADATA +821 -0
  589. molSimplify-1.7.4.dist-info/RECORD +651 -0
  590. molSimplify-1.7.4.dist-info/WHEEL +5 -0
  591. molSimplify-1.7.4.dist-info/entry_points.txt +3 -0
  592. molSimplify-1.7.4.dist-info/top_level.txt +4 -0
  593. tests/generateTests.py +122 -0
  594. tests/helperFuncs.py +658 -0
  595. tests/informatics/test_MOF_descriptors.py +128 -0
  596. tests/informatics/test_active_learning.py +113 -0
  597. tests/informatics/test_coulomb_analyze.py +24 -0
  598. tests/informatics/test_graph_racs.py +193 -0
  599. tests/ml/test_kernels.py +20 -0
  600. tests/ml/test_layers.py +47 -0
  601. tests/runtest.py +10 -0
  602. tests/test_Mol2D.py +128 -0
  603. tests/test_basic_imports.py +62 -0
  604. tests/test_bidentate.py +25 -0
  605. tests/test_cli.py +20 -0
  606. tests/test_distgeom.py +106 -0
  607. tests/test_example_1.py +29 -0
  608. tests/test_example_3.py +31 -0
  609. tests/test_example_5.py +43 -0
  610. tests/test_example_7.py +28 -0
  611. tests/test_example_8.py +15 -0
  612. tests/test_example_tbp.py +15 -0
  613. tests/test_ff_xtb.py +111 -0
  614. tests/test_geocheck_oct.py +26 -0
  615. tests/test_geocheck_one_empty.py +15 -0
  616. tests/test_geometry.py +44 -0
  617. tests/test_inparse.py +76 -0
  618. tests/test_io.py +84 -0
  619. tests/test_jobgen.py +84 -0
  620. tests/test_joption_pythonic.py +27 -0
  621. tests/test_ligand_assign.py +58 -0
  622. tests/test_ligand_assign_consistent.py +60 -0
  623. tests/test_ligand_class.py +26 -0
  624. tests/test_ligand_from_mol_file.py +35 -0
  625. tests/test_ligands.py +86 -0
  626. tests/test_mol3D.py +337 -0
  627. tests/test_molcas_caspt2.py +15 -0
  628. tests/test_molcas_casscf.py +15 -0
  629. tests/test_old_ANNs.py +68 -0
  630. tests/test_orca_ccsdt.py +15 -0
  631. tests/test_orca_dft.py +15 -0
  632. tests/test_qcgen.py +50 -0
  633. tests/test_racs.py +124 -0
  634. tests/test_rmsd.py +68 -0
  635. tests/test_structgen_functions.py +198 -0
  636. tests/test_tetrahedral.py +29 -0
  637. tests/test_tutorial_10_part_one.py +16 -0
  638. tests/test_tutorial_10_part_two.py +15 -0
  639. tests/test_tutorial_2.py +11 -0
  640. tests/test_tutorial_3.py +15 -0
  641. tests/test_tutorial_4.py +57 -0
  642. tests/test_tutorial_6.py +10 -0
  643. tests/test_tutorial_8.py +29 -0
  644. tests/test_tutorial_9_part_one.py +15 -0
  645. tests/test_tutorial_9_part_two.py +15 -0
  646. tests/test_tutorial_qm9_part_one.py +6 -0
  647. tests/testresources/refs/racs/generate_references.py +85 -0
  648. workflows/NandyJACSAu2022/bridge_functionalizer.py +253 -0
  649. workflows/NandyJACSAu2022/frag_functionalizer.py +242 -0
  650. workflows/NandyJACSAu2022/fragment_classes.py +586 -0
  651. workflows/NandyJACSAu2022/macrocycle_synthesis.py +179 -0
@@ -0,0 +1,589 @@
1
+ from molSimplify.Scripts.cellbuilder_tools import *
2
+ from molSimplify.Classes.mol3D import mol3D
3
+ from molSimplify.Informatics.autocorrelation import *
4
+ from molSimplify.Informatics.misc_descriptors import *
5
+ from molSimplify.Informatics.graph_analyze import *
6
+ from molSimplify.Informatics.RACassemble import *
7
+ import os
8
+ import numpy as np
9
+ import pandas as pd
10
+ from scipy import sparse
11
+ import itertools
12
+ from molSimplify.Informatics.MOF.PBC_functions import *
13
+
14
+ #### NOTE: In addition to molSimplify's dependencies, this portion requires
15
+ #### pymatgen to be installed. The RACs are intended to be computed
16
+ #### on the primitive cell of the material. You can compute them
17
+ #### using the commented out snippet of code if necessary.
18
+
19
+ # Example usage is given at the bottom of the script.
20
+
21
+ '''<<<< CODE TO COMPUTE PRIMITIVE UNIT CELLS >>>>'''
22
+ #########################################################################################
23
+ # This MOF RAC generator assumes that pymatgen is installed. #
24
+ # Pymatgen is used to get the primitive cell. #
25
+ #########################################################################################
26
+
27
+
28
+ def get_primitive(datapath, writepath):
29
+ from pymatgen.io.cif import CifParser
30
+ s = CifParser(datapath, occupancy_tolerance=1).get_structures()[0]
31
+ sprim = s.get_primitive_structure()
32
+ sprim.to("cif", writepath)
33
+
34
+
35
+ '''<<<< END OF CODE TO COMPUTE PRIMITIVE UNIT CELLS >>>>'''
36
+
37
+ #########################################################################################
38
+ # The RAC functions here average over the different SBUs or linkers present. This is #
39
+ # because one MOF could have multiple different linkers or multiple SBUs, and we need #
40
+ # the vector to be of constant dimension so we can correlate the output property. #
41
+ #########################################################################################
42
+
43
+ def identify_main_chain(temp_mol, link_list):
44
+ G = nx.from_numpy_matrix(temp_mol.graph)
45
+ pairs = []
46
+ if len(link_list) == 1:
47
+ main = list(G.nodes)
48
+ return main
49
+ else:
50
+ for a,b in itertools.combinations(link_list, 2):
51
+ pair = (a,b)
52
+ pairs.append(pair)
53
+ shorts = []
54
+ for i in pairs:
55
+ short = list(nx.shortest_path(G, source=i[0], target=i[1]))
56
+ shorts.append(short)
57
+ paths = list(itertools.chain(*shorts))
58
+ min_cycles = (nx.minimum_cycle_basis(G))
59
+ min_cycles_copy = min_cycles.copy()
60
+ min_cycles_copy_2 = []
61
+ paths_copy = paths.copy()
62
+ while len(min_cycles_copy) != len(min_cycles_copy_2):
63
+ min_cycles_copy_2 = min_cycles_copy.copy()
64
+ for i in min_cycles:
65
+ paths = paths_copy.copy()
66
+ if set(paths) & set(i):
67
+ if not set(i).issubset(set((paths))):
68
+ #print('intersection')
69
+ #print(set(i))
70
+ paths_copy += set(i)
71
+ #print(paths_copy)
72
+ min_cycles_copy.remove(i)
73
+ #print(min_cycles_copy)
74
+ #print(len(min_cycles_copy))
75
+
76
+ main = paths
77
+ return main
78
+
79
+ def make_MOF_SBU_RACs(SBUlist, SBU_subgraph, molcif, depth, name,cell,anchoring_atoms, sbupath=False, connections_list=False, connections_subgraphlist=False):
80
+ descriptor_list = []
81
+ lc_descriptor_list = []
82
+ lc_names = []
83
+ names = []
84
+ n_sbu = len(SBUlist)
85
+ descriptor_names = []
86
+ descriptors = []
87
+ if sbupath:
88
+ sbu_descriptor_path = os.path.dirname(sbupath)
89
+ if os.path.getsize(sbu_descriptor_path+'/sbu_descriptors.csv')>0:
90
+ sbu_descriptors = pd.read_csv(sbu_descriptor_path+'/sbu_descriptors.csv')
91
+ else:
92
+ sbu_descriptors = pd.DataFrame()
93
+ if os.path.getsize(sbu_descriptor_path+'/lc_descriptors.csv')>0:
94
+ lc_descriptors = pd.read_csv(sbu_descriptor_path+'/lc_descriptors.csv')
95
+ else:
96
+ lc_descriptors = pd.DataFrame()
97
+
98
+ """""""""
99
+ Loop over all SBUs as identified by subgraphs. Then create the mol3Ds for each SBU.
100
+ """""""""
101
+ for i, SBU in enumerate(SBUlist):
102
+ descriptor_names = []
103
+ descriptors = []
104
+ SBU_mol = mol3D()
105
+ for val in SBU:
106
+ SBU_mol.addAtom(molcif.getAtom(val))
107
+ SBU_mol.graph = SBU_subgraph[i].todense()
108
+
109
+ """""""""
110
+ For each linker connected to the SBU, find the lc atoms for the lc-RACs.
111
+ """""""""
112
+ for j, linker in enumerate(connections_list):
113
+ descriptor_names = []
114
+ descriptors = []
115
+ if len(set(SBU).intersection(linker))>0:
116
+ #### This means that the SBU and linker are connected.
117
+ temp_mol = mol3D()
118
+ link_list = []
119
+ for jj, val2 in enumerate(linker):
120
+ if val2 in anchoring_atoms:
121
+ link_list.append(jj)
122
+ #print(anchoring_atoms)
123
+ # This builds a mol object for the linker --> even though it is in the SBU section.
124
+ temp_mol.addAtom(molcif.getAtom(val2))
125
+
126
+ temp_mol.graph = connections_subgraphlist[j].todense()
127
+ """""""""
128
+ Generate all of the lc autocorrelations (from the connecting atoms)
129
+ """""""""
130
+ results_dictionary = generate_atomonly_autocorrelations(temp_mol, link_list, loud=False, depth=depth, oct=False, polarizability=False,Gval=True)
131
+ descriptor_names, descriptors = append_descriptors(descriptor_names, descriptors,results_dictionary['colnames'],results_dictionary['results'],'lc','all')
132
+ # print('1',len(descriptor_names),len(descriptors))
133
+ results_dictionary = generate_atomonly_deltametrics(temp_mol, link_list, loud=False, depth=depth, oct=False, polarizability=False,Gval=True)
134
+ descriptor_names, descriptors = append_descriptors(descriptor_names, descriptors,results_dictionary['colnames'],results_dictionary['results'],'D_lc','all')
135
+ # print('2',len(descriptor_names),len(descriptors))
136
+ """""""""
137
+ If heteroatom functional groups exist (anything that is not C or H, so methyl is missed, also excludes anything lc, so carboxylic metal-coordinating oxygens skipped),
138
+ compile the list of atoms
139
+ """""""""
140
+ #import time
141
+ #print(time.time())
142
+ functional_atoms = []
143
+ main = identify_main_chain(temp_mol, link_list)
144
+ for jj in range(len(temp_mol.graph)):
145
+ #print(link_list)
146
+ #print(main)
147
+ if jj not in main:
148
+ if not set({temp_mol.atoms[jj].sym}) & set({"H"}):
149
+ functional_atoms.append(jj)
150
+ print(functional_atoms)
151
+ #print(time.time())
152
+
153
+ if len(functional_atoms) > 0:
154
+ results_dictionary = generate_atomonly_autocorrelations(temp_mol, functional_atoms , loud=False, depth=depth, oct=False, polarizability=False,Gval=True)
155
+ descriptor_names, descriptors = append_descriptors(descriptor_names, descriptors,results_dictionary['colnames'],results_dictionary['results'],'func','all')
156
+ # print('3',len(descriptor_names),len(descriptors))
157
+ results_dictionary = generate_atomonly_deltametrics(temp_mol, functional_atoms , loud=False, depth=depth, oct=False, polarizability=False,Gval=True)
158
+ descriptor_names, descriptors = append_descriptors(descriptor_names, descriptors,results_dictionary['colnames'],results_dictionary['results'],'D_func','all')
159
+ # print('4',len(descriptor_names),len(descriptors))
160
+ else:
161
+ descriptor_names, descriptors = append_descriptors(descriptor_names, descriptors,results_dictionary['colnames'],list([numpy.zeros(int(6*(depth + 1)))]),'func','all')
162
+ descriptor_names, descriptors = append_descriptors(descriptor_names, descriptors,results_dictionary['colnames'],list([numpy.zeros(int(6*(depth + 1)))]),'D_func','all')
163
+ # print('5b',len(descriptor_names),len(descriptors))
164
+ for val in descriptors:
165
+ if not (type(val) == float or isinstance(val, numpy.float64)):
166
+ print('Mixed typing. Please convert to python float, and avoid np float')
167
+ raise AssertionError('Mixed typing creates issues. Please convert your typing.')
168
+ descriptor_names += ['name']
169
+ descriptors += [name]
170
+ desc_dict = {key2: descriptors[kk] for kk, key2 in enumerate(descriptor_names)}
171
+ descriptors.remove(name)
172
+ descriptor_names.remove('name')
173
+ lc_descriptors = lc_descriptors.append(desc_dict, ignore_index=True)
174
+ lc_descriptor_list.append(descriptors)
175
+ if j == 0:
176
+ lc_names = descriptor_names
177
+ averaged_lc_descriptors = np.mean(np.array(lc_descriptor_list), axis=0)
178
+ if sbupath:
179
+ lc_descriptors.to_csv(sbu_descriptor_path+'/lc_descriptors.csv', index=False)
180
+ descriptors = []
181
+ descriptor_names = []
182
+ SBU_mol_cart_coords = np.array([atom.coords() for atom in SBU_mol.atoms])
183
+ SBU_mol_atom_labels = [atom.sym for atom in SBU_mol.atoms]
184
+ SBU_mol_adj_mat = np.array(SBU_mol.graph)
185
+ ###### WRITE THE SBU MOL TO THE PLACE
186
+ if sbupath and not os.path.exists(sbupath+"/"+str(name)+str(i)+'.xyz'):
187
+ xyzname = sbupath+"/"+str(name)+"_sbu_"+str(i)+".xyz"
188
+ SBU_mol_fcoords_connected = XYZ_connected(cell , SBU_mol_cart_coords , SBU_mol_adj_mat )
189
+ writeXYZandGraph(xyzname , SBU_mol_atom_labels , cell , SBU_mol_fcoords_connected,SBU_mol_adj_mat)
190
+ """""""""
191
+ Generate all of the SBU based RACs (full scope, mc)
192
+ """""""""
193
+ results_dictionary = generate_full_complex_autocorrelations(SBU_mol,depth=depth,loud=False,flag_name=False,Gval=True)
194
+ descriptor_names, descriptors = append_descriptors(descriptor_names, descriptors,results_dictionary['colnames'],results_dictionary['results'],'f','all')
195
+ # print('6',len(descriptor_names),len(descriptors))
196
+ #### Now starts at every metal on the graph and autocorrelates
197
+ results_dictionary = generate_multimetal_autocorrelations(molcif,depth=depth,loud=False,Gval=True)
198
+ descriptor_names, descriptors = append_descriptors(descriptor_names, descriptors, results_dictionary['colnames'],results_dictionary['results'],'mc','all')
199
+ # print('7',len(descriptor_names),len(descriptors))
200
+ results_dictionary = generate_multimetal_deltametrics(molcif,depth=depth,loud=False,Gval=True)
201
+ descriptor_names, descriptors = append_descriptors(descriptor_names, descriptors,results_dictionary['colnames'],results_dictionary['results'],'D_mc','all')
202
+ # print('8',len(descriptor_names),len(descriptors))
203
+ descriptor_names += ['name']
204
+ descriptors += [name]
205
+ descriptors == list(descriptors)
206
+ desc_dict = {key: descriptors[ii] for ii, key in enumerate(descriptor_names)}
207
+ descriptors.remove(name)
208
+ descriptor_names.remove('name')
209
+ sbu_descriptors = sbu_descriptors.append(desc_dict, ignore_index=True)
210
+ descriptor_list.append(descriptors)
211
+ if i == 0:
212
+ names = descriptor_names
213
+ if sbupath:
214
+ sbu_descriptors.to_csv(sbu_descriptor_path+'/sbu_descriptors.csv', index=False)
215
+ averaged_SBU_descriptors = np.mean(np.array(descriptor_list), axis=0)
216
+ return names, averaged_SBU_descriptors, lc_names, averaged_lc_descriptors
217
+
218
+ def make_MOF_linker_RACs(linkerlist, linker_subgraphlist, molcif, depth, name, cell, linkerpath=False):
219
+ #### This function makes full scope linker RACs for MOFs ####
220
+ descriptor_list = []
221
+ nlink = len(linkerlist)
222
+ descriptor_names = []
223
+ descriptors = []
224
+ if linkerpath:
225
+ linker_descriptor_path = os.path.dirname(linkerpath)
226
+ if os.path.getsize(linker_descriptor_path+'/linker_descriptors.csv')>0:
227
+ linker_descriptors = pd.read_csv(linker_descriptor_path+'/linker_descriptors.csv')
228
+ else:
229
+ linker_descriptors = pd.DataFrame()
230
+ for i, linker in enumerate(linkerlist):
231
+ linker_mol = mol3D()
232
+ for val in linker:
233
+ linker_mol.addAtom(molcif.getAtom(val))
234
+ linker_mol.graph = linker_subgraphlist[i].todense()
235
+ linker_mol_cart_coords=np.array([atom.coords() for atom in linker_mol.atoms])
236
+ linker_mol_atom_labels=[atom.sym for atom in linker_mol.atoms]
237
+ linker_mol_adj_mat = np.array(linker_mol.graph)
238
+ ###### WRITE THE LINKER MOL TO THE PLACE
239
+ if linkerpath and not os.path.exists(linkerpath+"/"+str(name)+str(i)+".xyz"):
240
+ xyzname = linkerpath+"/"+str(name)+"_linker_"+str(i)+".xyz"
241
+ linker_mol_fcoords_connected = XYZ_connected(cell, linker_mol_cart_coords, linker_mol_adj_mat)
242
+ writeXYZandGraph(xyzname, linker_mol_atom_labels, cell, linker_mol_fcoords_connected, linker_mol_adj_mat)
243
+ allowed_strings = ['electronegativity', 'nuclear_charge', 'ident', 'topology', 'size','effective_nuclear_charge']
244
+ labels_strings = ['chi', 'Z', 'I', 'T', 'S','Gval']
245
+ colnames = []
246
+ lig_full = list()
247
+ for ii, properties in enumerate(allowed_strings):
248
+ if not list(descriptors):
249
+ ligand_ac_full = full_autocorrelation(linker_mol, properties, depth)
250
+ else:
251
+ ligand_ac_full += full_autocorrelation(linker_mol, properties, depth)
252
+ this_colnames = []
253
+ for j in range(0,depth+1):
254
+ this_colnames.append('f-lig-'+labels_strings[ii] + '-' + str(j))
255
+ colnames.append(this_colnames)
256
+ lig_full.append(ligand_ac_full)
257
+ lig_full = [item for sublist in lig_full for item in sublist] #flatten lists
258
+ colnames = [item for sublist in colnames for item in sublist]
259
+ colnames += ['name']
260
+ lig_full += [name]
261
+ desc_dict = {key: lig_full[i] for i, key in enumerate(colnames)}
262
+ linker_descriptors = linker_descriptors.append(desc_dict, ignore_index = True)
263
+ lig_full.remove(name)
264
+ colnames.remove('name')
265
+ descriptor_list.append(lig_full)
266
+ #### We dump the standard lc descriptors without averaging or summing so that the user
267
+ #### can make the modifications that they want. By default we take the average ones.
268
+ if linkerpath:
269
+ linker_descriptors.to_csv(linker_descriptor_path+'/linker_descriptors.csv', index=False)
270
+ averaged_ligand_descriptors = np.mean(np.array(descriptor_list), axis=0)
271
+ return colnames, averaged_ligand_descriptors
272
+
273
+
274
+ def get_MOF_descriptors(data, depth, path=False, xyzpath = False):
275
+ if not path:
276
+ print('Need a directory to place all of the linker, SBU, and ligand objects. Exiting now.')
277
+ raise ValueError('Base path must be specified in order to write descriptors.')
278
+ else:
279
+ if path.endswith('/'):
280
+ path = path[:-1]
281
+ if not os.path.isdir(path+'/ligands'):
282
+ os.mkdir(path+'/ligands')
283
+ if not os.path.isdir(path+'/linkers'):
284
+ os.mkdir(path+'/linkers')
285
+ if not os.path.isdir(path+'/sbus'):
286
+ os.mkdir(path+'/sbus')
287
+ if not os.path.isdir(path+'/xyz'):
288
+ os.mkdir(path+'/xyz')
289
+ if not os.path.isdir(path+'/logs'):
290
+ os.mkdir(path+'/logs')
291
+ if not os.path.exists(path+'/sbu_descriptors.csv'):
292
+ with open(path+'/sbu_descriptors.csv','w') as f:
293
+ f.close()
294
+ if not os.path.exists(path+'/linker_descriptors.csv'):
295
+ with open(path+'/linker_descriptors.csv','w') as g:
296
+ g.close()
297
+ if not os.path.exists(path+'/lc_descriptors.csv'):
298
+ with open(path+'/lc_descriptors.csv','w') as h:
299
+ h.close()
300
+ ligandpath = path+'/ligands'
301
+ linkerpath = path+'/linkers'
302
+ sbupath = path+'/sbus'
303
+ logpath = path+"/logs"
304
+
305
+ """""""""
306
+ Input cif file and get the cell parameters and adjacency matrix. If overlap, do not featurize.
307
+ Simultaneously prepare mol3D class for MOF for future RAC featurization (molcif)
308
+ """""""""
309
+
310
+ cpar, allatomtypes, fcoords = readcif(data)
311
+ cell_v = mkcell(cpar)
312
+ cart_coords = fractional2cart(fcoords,cell_v)
313
+ name = os.path.basename(data).strip(".cif")
314
+ if len(cart_coords) > 2000:
315
+ print("Too large cif file, skipping it for now...")
316
+ full_names = [0]
317
+ full_descriptors = [0]
318
+ tmpstr = "Failed to featurize %s: large primitive cell\n"%(name)
319
+ write2file(path,"/FailedStructures.log",tmpstr)
320
+ return full_names, full_descriptors
321
+ distance_mat = compute_distance_matrix2(cell_v,cart_coords)
322
+ try:
323
+ adj_matrix, _ = compute_adj_matrix(distance_mat,allatomtypes)
324
+ except NotImplementedError:
325
+ full_names = [0]
326
+ full_descriptors = [0]
327
+ tmpstr = "Failed to featurize %s: atomic overlap\n"%(name)
328
+ write2file(path,"/FailedStructures.log",tmpstr)
329
+ return full_names, full_descriptors
330
+
331
+ writeXYZandGraph(xyzpath, allatomtypes, cell_v, fcoords, adj_matrix.todense())
332
+ molcif,_,_,_,_ = import_from_cif(data, True)
333
+ molcif.graph = adj_matrix.todense()
334
+
335
+ """""""""
336
+ check number of connected components.
337
+ if more than 1: it checks if the structure is interpenetrated. Fails if no metal in one of the connected components (identified by the graph).
338
+ This includes floating solvent molecules.
339
+ """""""""
340
+
341
+ n_components, labels_components = sparse.csgraph.connected_components(csgraph=adj_matrix, directed=False, return_labels=True)
342
+ metal_list = set([at for at in molcif.findMetal(transition_metals_only=False)])
343
+ # print('##### METAL LIST', metal_list, [molcif.getAtom(val).symbol() for val in list(metal_list)])
344
+ # print('##### METAL LIST', metal_list, [val.symbol() for val in molcif.atoms])
345
+ if not len(metal_list) > 0:
346
+ full_names = [0]
347
+ full_descriptors = [0]
348
+ tmpstr = "Failed to featurize %s: no metal found\n"%(name)
349
+ write2file(path,"/FailedStructures.log",tmpstr)
350
+ return full_names, full_descriptors
351
+
352
+ for comp in range(n_components):
353
+ inds_in_comp = [i for i in range(len(labels_components)) if labels_components[i]==comp]
354
+ if not set(inds_in_comp) & metal_list:
355
+ full_names = [0]
356
+ full_descriptors = [0]
357
+ tmpstr = "Failed to featurize %s: solvent molecules\n"%(name)
358
+ write2file(path,"/FailedStructures.log",tmpstr)
359
+ return full_names, full_descriptors
360
+
361
+ if n_components > 1 :
362
+ print("structure is interpenetrated")
363
+ tmpstr = "%s found to be an interpenetrated structure\n"%(name)
364
+ write2file(logpath,"/%s.log"%name,tmpstr)
365
+
366
+ """""""""
367
+ step 1: metallic part
368
+ removelist = metals (1) + atoms only connected to metals (2) + H connected to (1+2)
369
+ SBUlist = removelist + 1st coordination shell of the metals
370
+ removelist = set()
371
+ Logs the atom types of the connecting atoms to the metal in logpath.
372
+ """""""""
373
+ SBUlist = set()
374
+ metal_list = set([at for at in molcif.findMetal(transition_metals_only=False)])
375
+ # print('##### METAL LIST2', metal_list, [molcif.getAtom(val).symbol() for val in list(metal_list)])
376
+ # print('##### all LIST2', metal_list, [val.symbol() for val in molcif.atoms])
377
+ [SBUlist.update(set([metal])) for metal in molcif.findMetal(transition_metals_only=False)] #Remove all metals as part of the SBU
378
+ [SBUlist.update(set(molcif.getBondedAtomsSmart(metal))) for metal in molcif.findMetal(transition_metals_only=False)]
379
+ removelist = set()
380
+ [removelist.update(set([metal])) for metal in molcif.findMetal(transition_metals_only=False)] #Remove all metals as part of the SBU
381
+ for metal in removelist:
382
+ bonded_atoms = set(molcif.getBondedAtomsSmart(metal))
383
+ bonded_atoms_types = set([str(allatomtypes[at]) for at in set(molcif.getBondedAtomsSmart(metal))])
384
+ cn = len(bonded_atoms)
385
+ cn_atom = ",".join([at for at in bonded_atoms_types])
386
+ tmpstr = "atom %i with type of %s found to have %i coordinates with atom types of %s\n"%(metal,allatomtypes[metal],cn,cn_atom)
387
+ write2file(logpath,"/%s.log"%name,tmpstr)
388
+ [removelist.update(set([atom])) for atom in SBUlist if all((molcif.getAtom(val).ismetal() or molcif.getAtom(val).symbol().upper() == 'H') for val in molcif.getBondedAtomsSmart(atom))]
389
+ """""""""
390
+ adding hydrogens connected to atoms which are only connected to metals. In particular interstitial OH, like in UiO SBU.
391
+ """""""""
392
+ for atom in SBUlist:
393
+ for val in molcif.getBondedAtomsSmart(atom):
394
+ if molcif.getAtom(val).symbol().upper() == 'H':
395
+ removelist.update(set([val]))
396
+
397
+ """""""""
398
+ At this point:
399
+ The remove list only removes metals and things ONLY connected to metals or hydrogens.
400
+ Thus the coordinating atoms are double counted in the linker.
401
+
402
+ step 2: organic part
403
+ removelist = linkers are all atoms - the removelist (assuming no bond between
404
+ organiclinkers)
405
+ """""""""
406
+ allatoms = set(range(0, adj_matrix.shape[0]))
407
+ linkers = allatoms - removelist
408
+ linker_list, linker_subgraphlist = get_closed_subgraph(linkers.copy(), removelist.copy(), adj_matrix)
409
+ connections_list = copy.deepcopy(linker_list)
410
+ connections_subgraphlist = copy.deepcopy(linker_subgraphlist)
411
+ linker_length_list = [len(linker_val) for linker_val in linker_list]
412
+ adjmat = adj_matrix.todense()
413
+ """""""""
414
+ find all anchoring atoms on linkers and ligands (lc identification)
415
+ """""""""
416
+ anc_atoms = set()
417
+ for linker in linker_list:
418
+ for atom_linker in linker:
419
+ bonded2atom = np.nonzero(adj_matrix[atom_linker,:])[1]
420
+ if set(bonded2atom) & metal_list:
421
+ anc_atoms.add(atom_linker)
422
+ """""""""
423
+ step 3: linker or ligand ?
424
+ checking to find the anchors and #SBUs that are connected to an organic part
425
+ anchor <= 1 -> ligand
426
+ anchor > 1 and #SBU > 1 -> linker
427
+ else: walk over the linker graph and count #crossing PBC
428
+ if #crossing is odd -> linker
429
+ else -> ligand
430
+ """""""""
431
+ initial_SBU_list, initial_SBU_subgraphlist = get_closed_subgraph(removelist.copy(), linkers.copy(), adj_matrix)
432
+ templist = linker_list[:]
433
+ tempgraphlist = linker_subgraphlist[:]
434
+ long_ligands = False
435
+ max_min_linker_length , min_max_linker_length = (0,100)
436
+ for ii, atoms_list in reversed(list(enumerate(linker_list))): #Loop over all linker subgraphs
437
+ linkeranchors_list = set()
438
+ linkeranchors_atoms = set()
439
+ sbuanchors_list = set()
440
+ sbu_connect_list = set()
441
+ """""""""
442
+ Here, we are trying to identify what is actually a linker and what is a ligand.
443
+ To do this, we check if something is connected to more than one SBU. Set to
444
+ handle cases where primitive cell is small, ambiguous cases are recorded.
445
+ """""""""
446
+ for iii,atoms in enumerate(atoms_list): #loop over all atoms in a linker
447
+ connected_atoms = np.nonzero(adj_matrix[atoms,:])[1]
448
+ for kk, sbu_atoms_list in enumerate(initial_SBU_list): #loop over all SBU subgraphs
449
+ for sbu_atoms in sbu_atoms_list: #Loop over SBU
450
+ if sbu_atoms in connected_atoms:
451
+ linkeranchors_list.add(iii)
452
+ linkeranchors_atoms.add(atoms)
453
+ sbuanchors_list.add(sbu_atoms)
454
+ sbu_connect_list.add(kk) #Add if unique SBUs
455
+ min_length,max_length = linker_length(linker_subgraphlist[ii].todense(),linkeranchors_list)
456
+
457
+ if len(linkeranchors_list) >=2 : # linker, and in one ambigous case, could be a ligand.
458
+ if len(sbu_connect_list) >= 2: #Something that connects two SBUs is certain to be a linker
459
+ max_min_linker_length = max(min_length,max_min_linker_length)
460
+ min_max_linker_length = min(max_length,min_max_linker_length)
461
+ continue
462
+ else:
463
+ # check number of times we cross PBC :
464
+ # TODO: we still can fail in multidentate ligands!
465
+ linker_cart_coords = np.array([
466
+ at.coords() for at in [molcif.getAtom(val) for val in atoms_list]])
467
+ linker_adjmat = np.array(linker_subgraphlist[ii].todense())
468
+ pr_image_organic = ligand_detect(cell_v,linker_cart_coords,linker_adjmat,linkeranchors_list)
469
+ sbu_temp = linkeranchors_atoms.copy()
470
+ sbu_temp.update({val for val in initial_SBU_list[list(sbu_connect_list)[0]]})
471
+ sbu_temp = list(sbu_temp)
472
+ sbu_cart_coords = np.array([
473
+ at.coords() for at in [molcif.getAtom(val) for val in sbu_temp]])
474
+ sbu_adjmat = slice_mat(adj_matrix.todense(),sbu_temp)
475
+ pr_image_sbu = ligand_detect(cell_v,sbu_cart_coords,sbu_adjmat,set(range(len(linkeranchors_list))))
476
+ if not (len(np.unique(pr_image_sbu, axis=0))==1 and len(np.unique(pr_image_organic, axis=0))==1): # linker
477
+ max_min_linker_length = max(min_length,max_min_linker_length)
478
+ min_max_linker_length = min(max_length,min_max_linker_length)
479
+ tmpstr = str(name)+','+' Anchors list: '+str(sbuanchors_list) \
480
+ +','+' SBU connectlist: '+str(sbu_connect_list)+' set to be linker\n'
481
+ write2file(ligandpath,"/ambiguous.txt",tmpstr)
482
+ continue
483
+ else: # all anchoring atoms are in the same unitcell -> ligand
484
+ removelist.update(set(templist[ii])) # we also want to remove these ligands
485
+ SBUlist.update(set(templist[ii])) # we also want to remove these ligands
486
+ linker_list.pop(ii)
487
+ linker_subgraphlist.pop(ii)
488
+ tmpstr = str(name)+','+' Anchors list: '+str(sbuanchors_list) \
489
+ +','+' SBU connectlist: '+str(sbu_connect_list)+' set to be ligand\n'
490
+ write2file(ligandpath,"/ambiguous.txt",tmpstr)
491
+ tmpstr = str(name)+str(ii)+','+' Anchors list: '+ \
492
+ str(sbuanchors_list)+','+' SBU connectlist: '+str(sbu_connect_list)+'\n'
493
+ write2file(ligandpath,"/ligand.txt",tmpstr)
494
+ else: #definite ligand
495
+ write2file(logpath,"/%s.log"%name,"found ligand\n")
496
+ removelist.update(set(templist[ii])) # we also want to remove these ligands
497
+ SBUlist.update(set(templist[ii])) # we also want to remove these ligands
498
+ linker_list.pop(ii)
499
+ linker_subgraphlist.pop(ii)
500
+ tmpstr = str(name)+','+' Anchors list: '+str(sbuanchors_list) \
501
+ +','+' SBU connectlist: '+str(sbu_connect_list)+'\n'
502
+ write2file(ligandpath,"/ligand.txt",tmpstr)
503
+
504
+ tmpstr = str(name) + ", (min_max_linker_length,max_min_linker_length): " + \
505
+ str(min_max_linker_length) + " , " +str(max_min_linker_length) + "\n"
506
+ write2file(logpath,"/%s.log"%name,tmpstr)
507
+ if min_max_linker_length < 3:
508
+ write2file(linkerpath,"/short_ligands.txt",tmpstr)
509
+ if min_max_linker_length > 2:
510
+ # for N-C-C-N ligand ligand
511
+ if max_min_linker_length == min_max_linker_length:
512
+ long_ligands = True
513
+ elif min_max_linker_length > 3:
514
+ long_ligands = True
515
+
516
+ """""""""
517
+ In the case of long linkers, add second coordination shell without further checks. In the case of short linkers, start from metal
518
+ and grow outwards using the include_extra_shells function
519
+ """""""""
520
+ linker_length_list = [len(linker_val) for linker_val in linker_list]
521
+ if len(set(linker_length_list)) != 1:
522
+ write2file(linkerpath,"/uneven.txt",str(name)+'\n')
523
+ if not min_max_linker_length < 2: # treating the 2 atom ligands differently! Need caution
524
+ if long_ligands:
525
+ tmpstr = "\nStructure has LONG ligand\n\n"
526
+ write2file(logpath,"/%s.log"%name,tmpstr)
527
+ [[SBUlist.add(val) for val in molcif.getBondedAtomsSmart(zero_first_shell)] for zero_first_shell in SBUlist.copy()] #First account for all of the carboxylic acid type linkers, add in the carbons.
528
+ truncated_linkers = allatoms - SBUlist
529
+ SBU_list, SBU_subgraphlist = get_closed_subgraph(SBUlist, truncated_linkers, adj_matrix)
530
+ if not long_ligands:
531
+ tmpstr = "\nStructure has SHORT ligand\n\n"
532
+ write2file(logpath,"/%s.log"%name,tmpstr)
533
+ SBU_list , SBU_subgraphlist = include_extra_shells(SBU_list,SBU_subgraphlist,molcif ,adj_matrix)
534
+ else:
535
+ tmpstr = "Structure %s has extreamly short ligands, check the outputs\n"%name
536
+ write2file(ligandpath,"/ambiguous.txt",tmpstr)
537
+ tmpstr = "Structure has extreamly short ligands\n"
538
+ write2file(logpath,"/%s.log"%name,tmpstr)
539
+ tmpstr = "Structure has extreamly short ligands\n"
540
+ write2file(logpath,"/%s.log"%name,tmpstr)
541
+ truncated_linkers = allatoms - removelist
542
+ SBU_list, SBU_subgraphlist = get_closed_subgraph(removelist, truncated_linkers, adj_matrix)
543
+ SBU_list, SBU_subgraphlist = include_extra_shells(SBU_list,SBU_subgraphlist,molcif ,adj_matrix)
544
+ SBU_list, SBU_subgraphlist = include_extra_shells(SBU_list,SBU_subgraphlist,molcif ,adj_matrix)
545
+
546
+ """""""""
547
+ For the cases that have a linker subgraph, do the featurization.
548
+ """""""""
549
+ if len(linker_subgraphlist)>=1: #Featurize cases that did not fail
550
+ try:
551
+ # if True:
552
+ descriptor_names, descriptors, lc_descriptor_names, lc_descriptors = make_MOF_SBU_RACs(SBU_list, SBU_subgraphlist, molcif, depth, name , cell_v,anc_atoms, sbupath, connections_list, connections_subgraphlist)
553
+ lig_descriptor_names, lig_descriptors = make_MOF_linker_RACs(linker_list, linker_subgraphlist, molcif, depth, name, cell_v, linkerpath)
554
+ full_names = descriptor_names+lig_descriptor_names+lc_descriptor_names #+ ECFP_names
555
+ full_descriptors = list(descriptors)+list(lig_descriptors)+list(lc_descriptors)
556
+ print(len(full_names),len(full_descriptors))
557
+ # else:
558
+ except:
559
+ full_names = [0]
560
+ full_descriptors = [0]
561
+ elif len(linker_subgraphlist) == 1: # this never happens, right?
562
+ print('Suspicious featurization')
563
+ full_names = [1]
564
+ full_descriptors = [1]
565
+ else:
566
+ print('Failed to featurize this MOF.')
567
+ full_names = [0]
568
+ full_descriptors = [0]
569
+ if (len(full_names) <= 1) and (len(full_descriptors) <= 1):
570
+ tmpstr = "Failed to featurize %s\n"%(name)
571
+ write2file(path,"/FailedStructures.log",tmpstr)
572
+ return full_names, full_descriptors
573
+
574
+
575
+ ##### Example of usage over a set of cif files.
576
+ # featurization_list = []
577
+ # import sys
578
+ # featurization_directory = sys.argv[1]
579
+ # for cif_file in os.listdir(featurization_directory+'/cif/'):
580
+ # #### This first part gets the primitive cells ####
581
+ # get_primitive(featurization_directory+'/cif/'+cif_file, featurization_directory+'/primitive/'+cif_file)
582
+ # full_names, full_descriptors = get_MOF_descriptors(featurization_directory+'/primitive/'+cif_file,3,path=featurization_directory+'/',
583
+ # xyzpath=featurization_directory+'/xyz/'+cif_file.replace('cif','xyz'))
584
+ # full_names.append('filename')
585
+ # full_descriptors.append(cif_file)
586
+ # featurization = dict(zip(full_names, full_descriptors))
587
+ # featurization_list.append(featurization)
588
+ # df = pd.DataFrame(featurization_list)
589
+ # df.to_csv('./full_featurization_frame.csv',index=False)