molSimplify 1.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (651) hide show
  1. docs/source/conf.py +224 -0
  2. molSimplify/Classes/__init__.py +6 -0
  3. molSimplify/Classes/atom3D.py +235 -0
  4. molSimplify/Classes/dft_obs.py +130 -0
  5. molSimplify/Classes/globalvars.py +827 -0
  6. molSimplify/Classes/helpers.py +161 -0
  7. molSimplify/Classes/ligand.py +2330 -0
  8. molSimplify/Classes/mGUI.py +2493 -0
  9. molSimplify/Classes/mWidgets.py +438 -0
  10. molSimplify/Classes/miniGUI.py +41 -0
  11. molSimplify/Classes/mol2D.py +260 -0
  12. molSimplify/Classes/mol3D.py +5846 -0
  13. molSimplify/Classes/monomer3D.py +253 -0
  14. molSimplify/Classes/partialcharges.py +226 -0
  15. molSimplify/Classes/protein3D.py +1178 -0
  16. molSimplify/Classes/rundiag.py +151 -0
  17. molSimplify/Data/ML.dat +212 -0
  18. molSimplify/Data/MLS_FSR_for_inter.dat +23 -0
  19. molSimplify/Data/MLS_FSR_for_inter2.dat +23 -0
  20. molSimplify/Data/MLS_angle_for_click.dat +8 -0
  21. molSimplify/Data/MLS_angle_for_inter.dat +23 -0
  22. molSimplify/Data/MLS_angle_for_inter2.dat +48 -0
  23. molSimplify/Data/MLS_angle_for_intra.dat +10 -0
  24. molSimplify/Data/MLS_angle_for_intra2.dat +6 -0
  25. molSimplify/Data/MLS_angle_for_oa.dat +18 -0
  26. molSimplify/Data/ML_FSR_for_inter.dat +112 -0
  27. molSimplify/Data/ML_FSR_for_inter2.dat +110 -0
  28. molSimplify/Data/ML_bond_for_cat.dat +8 -0
  29. molSimplify/Data/ML_bond_for_click.dat +8 -0
  30. molSimplify/Data/ML_bond_for_inter.dat +48 -0
  31. molSimplify/Data/ML_bond_for_inter2.dat +48 -0
  32. molSimplify/Data/ML_bond_for_intra.dat +10 -0
  33. molSimplify/Data/ML_bond_for_intra2.dat +6 -0
  34. molSimplify/Data/ML_bond_for_oa.dat +18 -0
  35. molSimplify/Data/bp1.dat +21 -0
  36. molSimplify/Data/li.dat +3 -0
  37. molSimplify/Data/no.dat +2 -0
  38. molSimplify/Data/oct.dat +7 -0
  39. molSimplify/Data/pbp.dat +8 -0
  40. molSimplify/Data/spy.dat +6 -0
  41. molSimplify/Data/sqap.dat +9 -0
  42. molSimplify/Data/sqp.dat +5 -0
  43. molSimplify/Data/tbp.dat +6 -0
  44. molSimplify/Data/tdhd.dat +9 -0
  45. molSimplify/Data/thd.dat +5 -0
  46. molSimplify/Data/tpl.dat +4 -0
  47. molSimplify/Data/tpr.dat +7 -0
  48. molSimplify/Informatics/HFXsensitivity/__init__.py +0 -0
  49. molSimplify/Informatics/HFXsensitivity/measure_HFX_sensitivity_oxo_hat_reb_rel.py +443 -0
  50. molSimplify/Informatics/HFXsensitivity/measure_HFX_stable.py +346 -0
  51. molSimplify/Informatics/MOF/Linker_rotation.py +179 -0
  52. molSimplify/Informatics/MOF/MOF_descriptors.py +1299 -0
  53. molSimplify/Informatics/MOF/MOF_descriptors_alternate_functional.py +589 -0
  54. molSimplify/Informatics/MOF/MOF_functionalizer.py +1648 -0
  55. molSimplify/Informatics/MOF/PBC_functions.py +1347 -0
  56. molSimplify/Informatics/MOF/__init__.py +0 -0
  57. molSimplify/Informatics/MOF/atomic.py +267 -0
  58. molSimplify/Informatics/MOF/cluster_extraction.py +388 -0
  59. molSimplify/Informatics/MOF/fragment_MOFs_for_pormake.py +895 -0
  60. molSimplify/Informatics/MOF/monofunctionalized_BDC/index_information.py +10 -0
  61. molSimplify/Informatics/Mol2Parser.py +46 -0
  62. molSimplify/Informatics/RACassemble.py +408 -0
  63. molSimplify/Informatics/__init__.py +0 -0
  64. molSimplify/Informatics/active_learning/__init__.py +0 -0
  65. molSimplify/Informatics/active_learning/expected_improvement.py +269 -0
  66. molSimplify/Informatics/autocorrelation.py +1930 -0
  67. molSimplify/Informatics/clean_autocorrelation.py +778 -0
  68. molSimplify/Informatics/coulomb_analyze.py +67 -0
  69. molSimplify/Informatics/decoration_manager.py +193 -0
  70. molSimplify/Informatics/geo_analyze.py +88 -0
  71. molSimplify/Informatics/geometrics.py +56 -0
  72. molSimplify/Informatics/graph_analyze.py +163 -0
  73. molSimplify/Informatics/graph_racs.py +288 -0
  74. molSimplify/Informatics/jupyter_vis.py +172 -0
  75. molSimplify/Informatics/lacRACAssemble.py +2192 -0
  76. molSimplify/Informatics/lacRACAssemble_bisdithiolenes.py +236 -0
  77. molSimplify/Informatics/misc_descriptors.py +198 -0
  78. molSimplify/Informatics/organic_fingerprints.py +61 -0
  79. molSimplify/Informatics/partialcharges.py +345 -0
  80. molSimplify/Informatics/protein/activesite.py +53 -0
  81. molSimplify/Informatics/protein/pymol_add_hs.py +33 -0
  82. molSimplify/Informatics/rac155_geo.py +48 -0
  83. molSimplify/Ligands/(1_methylbenzimidazol_2_yl)pyridine.xyz +45 -0
  84. molSimplify/Ligands/1-4-dimethyl-1-2-3-triazole.xyz +15 -0
  85. molSimplify/Ligands/12crown4.mol +62 -0
  86. molSimplify/Ligands/Antipyrine.mol +58 -0
  87. molSimplify/Ligands/BPAbipy.mol +106 -0
  88. molSimplify/Ligands/Hpyrrole.mol +26 -0
  89. molSimplify/Ligands/N-quinolinylbutyramidate.xyz +31 -0
  90. molSimplify/Ligands/N-quinolinylmethylmethinylacetamidate.xyz +30 -0
  91. molSimplify/Ligands/NMe2_-1.xyz +11 -0
  92. molSimplify/Ligands/PCy3.mol +111 -0
  93. molSimplify/Ligands/PMe3.xyz +15 -0
  94. molSimplify/Ligands/PPh3.mol +76 -0
  95. molSimplify/Ligands/Propyphenazone.mol +77 -0
  96. molSimplify/Ligands/acac.mol +33 -0
  97. molSimplify/Ligands/acacen.mol +76 -0
  98. molSimplify/Ligands/acetate.smi +1 -0
  99. molSimplify/Ligands/acetate.xyz +9 -0
  100. molSimplify/Ligands/aceticacidbipyridine.mol +70 -0
  101. molSimplify/Ligands/acetonitrile.mol +17 -0
  102. molSimplify/Ligands/alanine.mol +30 -0
  103. molSimplify/Ligands/alphabetizer.py +21 -0
  104. molSimplify/Ligands/amine.mol +11 -0
  105. molSimplify/Ligands/ammonia.mol +12 -0
  106. molSimplify/Ligands/arginine.mol +58 -0
  107. molSimplify/Ligands/asparagine.mol +38 -0
  108. molSimplify/Ligands/aspartic_acid.mol +35 -0
  109. molSimplify/Ligands/azide.mol +11 -0
  110. molSimplify/Ligands/benzene.mol +28 -0
  111. molSimplify/Ligands/benzene_pi.mol +30 -0
  112. molSimplify/Ligands/benzenedithiol.mol +30 -0
  113. molSimplify/Ligands/benzenethiol.mol +30 -0
  114. molSimplify/Ligands/benzylisocy.mol +38 -0
  115. molSimplify/Ligands/bidiazine.mol +42 -0
  116. molSimplify/Ligands/bidiazole.mol +38 -0
  117. molSimplify/Ligands/bifuran.mol +38 -0
  118. molSimplify/Ligands/bihydrodiazine.mol +58 -0
  119. molSimplify/Ligands/bihydrodiazole.mol +46 -0
  120. molSimplify/Ligands/bihydrooxazine.mol +54 -0
  121. molSimplify/Ligands/bihydrooxazole.mol +42 -0
  122. molSimplify/Ligands/bihydrothiazine.mol +54 -0
  123. molSimplify/Ligands/bihydrothiazole.mol +42 -0
  124. molSimplify/Ligands/biimidazole.mol +38 -0
  125. molSimplify/Ligands/bioxazole.mol +34 -0
  126. molSimplify/Ligands/bipy.mol +46 -0
  127. molSimplify/Ligands/bipyrazine.xyz +20 -0
  128. molSimplify/Ligands/bipyrimidine.mol +42 -0
  129. molSimplify/Ligands/bipyrrole.mol +42 -0
  130. molSimplify/Ligands/bisnapthyridylpyridine.mol +111 -0
  131. molSimplify/Ligands/bithiazole.mol +34 -0
  132. molSimplify/Ligands/bromide.mol +7 -0
  133. molSimplify/Ligands/bromide.smi +1 -0
  134. molSimplify/Ligands/c2.mol +9 -0
  135. molSimplify/Ligands/caprolactone.mol +41 -0
  136. molSimplify/Ligands/carbonyl.mol +8 -0
  137. molSimplify/Ligands/carboxyl.mol +13 -0
  138. molSimplify/Ligands/cat.mol +30 -0
  139. molSimplify/Ligands/chloride.mol +7 -0
  140. molSimplify/Ligands/chloride.smi +1 -0
  141. molSimplify/Ligands/chloropyridine.mol +27 -0
  142. molSimplify/Ligands/co2.mol +10 -0
  143. molSimplify/Ligands/corrolazine.mol +72 -0
  144. molSimplify/Ligands/cs.mol +8 -0
  145. molSimplify/Ligands/cyanate.xyz +5 -0
  146. molSimplify/Ligands/cyanide.mol +9 -0
  147. molSimplify/Ligands/cyanoaceticporphyrin.mol +114 -0
  148. molSimplify/Ligands/cyanopyridine.mol +29 -0
  149. molSimplify/Ligands/cyclam.mol +81 -0
  150. molSimplify/Ligands/cyclen.mol +69 -0
  151. molSimplify/Ligands/cyclopentadienyl.mol +26 -0
  152. molSimplify/Ligands/cysteine.mol +32 -0
  153. molSimplify/Ligands/diaminomethyl.mol +19 -0
  154. molSimplify/Ligands/diazine.mol +25 -0
  155. molSimplify/Ligands/diazole.mol +23 -0
  156. molSimplify/Ligands/dicyanamide.mol +15 -0
  157. molSimplify/Ligands/dihydrofuran.mol +27 -0
  158. molSimplify/Ligands/dmap.xyz +35 -0
  159. molSimplify/Ligands/dmf.mol +28 -0
  160. molSimplify/Ligands/dmi.mol +41 -0
  161. molSimplify/Ligands/dmpe.mol +52 -0
  162. molSimplify/Ligands/dpmu.mol +47 -0
  163. molSimplify/Ligands/dppe.mol +112 -0
  164. molSimplify/Ligands/edta.mol +69 -0
  165. molSimplify/Ligands/en.mol +28 -0
  166. molSimplify/Ligands/ethanethiol.mol +21 -0
  167. molSimplify/Ligands/ethanolamine.mol +26 -0
  168. molSimplify/Ligands/ethbipy.mol +70 -0
  169. molSimplify/Ligands/ethyl.mol +19 -0
  170. molSimplify/Ligands/ethylamine.mol +24 -0
  171. molSimplify/Ligands/ethylene.mol +16 -0
  172. molSimplify/Ligands/ethylesteracac.mol +57 -0
  173. molSimplify/Ligands/fluoride.mol +7 -0
  174. molSimplify/Ligands/fluoride.smi +1 -0
  175. molSimplify/Ligands/formaldehyde.mol +12 -0
  176. molSimplify/Ligands/formamidate.xyz +8 -0
  177. molSimplify/Ligands/formate.xyz +6 -0
  178. molSimplify/Ligands/furan.mol +23 -0
  179. molSimplify/Ligands/glutamic_acid.mol +42 -0
  180. molSimplify/Ligands/glutamine.mol +44 -0
  181. molSimplify/Ligands/glycinate.mol +23 -0
  182. molSimplify/Ligands/glycine.mol +24 -0
  183. molSimplify/Ligands/h2s.mol +10 -0
  184. molSimplify/Ligands/helium.mol +6 -0
  185. molSimplify/Ligands/histidine.mol +45 -0
  186. molSimplify/Ligands/hmpa.mol +62 -0
  187. molSimplify/Ligands/hs-.mol +9 -0
  188. molSimplify/Ligands/hydride.mol +7 -0
  189. molSimplify/Ligands/hydrocarboxyacetylide.xyz +8 -0
  190. molSimplify/Ligands/hydrocyanide.mol +10 -0
  191. molSimplify/Ligands/hydrodiazine.mol +33 -0
  192. molSimplify/Ligands/hydrodiazole.mol +27 -0
  193. molSimplify/Ligands/hydrogensulfide.mol +10 -0
  194. molSimplify/Ligands/hydroisocyanide.mol +11 -0
  195. molSimplify/Ligands/hydrooxazine.mol +31 -0
  196. molSimplify/Ligands/hydrooxazole.mol +25 -0
  197. molSimplify/Ligands/hydrothiazine.mol +31 -0
  198. molSimplify/Ligands/hydrothiazole.mol +25 -0
  199. molSimplify/Ligands/hydroxyl.mol +9 -0
  200. molSimplify/Ligands/imidazole.mol +23 -0
  201. molSimplify/Ligands/imidazolidinone.mol +29 -0
  202. molSimplify/Ligands/imine.mol +13 -0
  203. molSimplify/Ligands/iminodiacetic.mol +33 -0
  204. molSimplify/Ligands/iodide.mol +7 -0
  205. molSimplify/Ligands/iodobenzene.xyz +14 -0
  206. molSimplify/Ligands/isoleucine.mol +48 -0
  207. molSimplify/Ligands/isothiocyanate.mol +11 -0
  208. molSimplify/Ligands/leucine.mol +48 -0
  209. molSimplify/Ligands/ligands.dict +257 -0
  210. molSimplify/Ligands/lysine.mol +54 -0
  211. molSimplify/Ligands/mebenzenedithiol.mol +36 -0
  212. molSimplify/Ligands/mebim_py.xyz +29 -0
  213. molSimplify/Ligands/mebim_pz.xyz +28 -0
  214. molSimplify/Ligands/mebipy.mol +58 -0
  215. molSimplify/Ligands/mecat.mol +36 -0
  216. molSimplify/Ligands/methanal.mol +11 -0
  217. molSimplify/Ligands/methanethiol.mol +15 -0
  218. molSimplify/Ligands/methanol.mol +16 -0
  219. molSimplify/Ligands/methionine.mol +44 -0
  220. molSimplify/Ligands/methyl.mol +13 -0
  221. molSimplify/Ligands/methylacetylide.xyz +8 -0
  222. molSimplify/Ligands/methylamine.mol +19 -0
  223. molSimplify/Ligands/methylazide.xyz +9 -0
  224. molSimplify/Ligands/methylisocy.mol +17 -0
  225. molSimplify/Ligands/methylpyridine.mol +33 -0
  226. molSimplify/Ligands/n2.mol +8 -0
  227. molSimplify/Ligands/n4py.xyz +51 -0
  228. molSimplify/Ligands/nch.mol +10 -0
  229. molSimplify/Ligands/nco-.mol +11 -0
  230. molSimplify/Ligands/nethanolamine.mol +26 -0
  231. molSimplify/Ligands/nitrate.mol +14 -0
  232. molSimplify/Ligands/nitrite.mol +11 -0
  233. molSimplify/Ligands/nitro.mol +11 -0
  234. molSimplify/Ligands/nitrobipy.mol +54 -0
  235. molSimplify/Ligands/nitroso.mol +8 -0
  236. molSimplify/Ligands/nme3.mol +30 -0
  237. molSimplify/Ligands/no-.mol +10 -0
  238. molSimplify/Ligands/no2-.mol +11 -0
  239. molSimplify/Ligands/noxygen.mol +8 -0
  240. molSimplify/Ligands/ns-.mol +10 -0
  241. molSimplify/Ligands/o-pyridylbenzene.xyz +23 -0
  242. molSimplify/Ligands/o-pyridylphenylanion.xyz +22 -0
  243. molSimplify/Ligands/o2-.mol +9 -0
  244. molSimplify/Ligands/o2.xyz +4 -0
  245. molSimplify/Ligands/och2.mol +12 -0
  246. molSimplify/Ligands/oethanolamine.mol +26 -0
  247. molSimplify/Ligands/ome2.mol +22 -0
  248. molSimplify/Ligands/ooh.xyz +5 -0
  249. molSimplify/Ligands/oxalate.mol +17 -0
  250. molSimplify/Ligands/oxalate.smi +1 -0
  251. molSimplify/Ligands/oxygen.mol +7 -0
  252. molSimplify/Ligands/pentacyanocyclopentadienide.mol +36 -0
  253. molSimplify/Ligands/ph2-.mol +11 -0
  254. molSimplify/Ligands/ph3.mol +12 -0
  255. molSimplify/Ligands/phen.mol +51 -0
  256. molSimplify/Ligands/phenacac.mol +63 -0
  257. molSimplify/Ligands/phenalalanine.mol +51 -0
  258. molSimplify/Ligands/phendione.mol +51 -0
  259. molSimplify/Ligands/phenphen.mol +75 -0
  260. molSimplify/Ligands/phenylbenzoxazole.mol +54 -0
  261. molSimplify/Ligands/phenylcyc.mol +99 -0
  262. molSimplify/Ligands/phenylenediamine.mol +37 -0
  263. molSimplify/Ligands/phenylisocy.mol +32 -0
  264. molSimplify/Ligands/phosacidbipy.mol +66 -0
  265. molSimplify/Ligands/phosphine.mol +13 -0
  266. molSimplify/Ligands/phosphorine.mol +27 -0
  267. molSimplify/Ligands/phosphorustrifluoride.mol +12 -0
  268. molSimplify/Ligands/phthalocyanine.mol +126 -0
  269. molSimplify/Ligands/pme3o.mol +32 -0
  270. molSimplify/Ligands/porphyrin.mol +82 -0
  271. molSimplify/Ligands/pph3o.mol +77 -0
  272. molSimplify/Ligands/proline.mol +39 -0
  273. molSimplify/Ligands/propdiol.mol +21 -0
  274. molSimplify/Ligands/propylene.mol +23 -0
  275. molSimplify/Ligands/pyridine.mol +27 -0
  276. molSimplify/Ligands/pyrimidone.mol +27 -0
  277. molSimplify/Ligands/pyrrole.mol +24 -0
  278. molSimplify/Ligands/quinoxalinedithiol.mol +39 -0
  279. molSimplify/Ligands/s2-.mol +9 -0
  280. molSimplify/Ligands/salen.mol +75 -0
  281. molSimplify/Ligands/salphen.mol +84 -0
  282. molSimplify/Ligands/serine.mol +32 -0
  283. molSimplify/Ligands/simple_ligands.dict +14 -0
  284. molSimplify/Ligands/sulfacidbipy.mol +63 -0
  285. molSimplify/Ligands/tbucat.mol +54 -0
  286. molSimplify/Ligands/tbuphisocy.mol +56 -0
  287. molSimplify/Ligands/tbutylcyclen.mol +166 -0
  288. molSimplify/Ligands/tbutylisocy.mol +35 -0
  289. molSimplify/Ligands/tbutylthiol.mol +33 -0
  290. molSimplify/Ligands/tcnoet.mol +43 -0
  291. molSimplify/Ligands/tcnoetOH.mol +45 -0
  292. molSimplify/Ligands/terpy.mol +65 -0
  293. molSimplify/Ligands/tetrahydrofuran.mol +31 -0
  294. molSimplify/Ligands/thiane.mol +37 -0
  295. molSimplify/Ligands/thiazole.mol +21 -0
  296. molSimplify/Ligands/thiocyanate.mol +11 -0
  297. molSimplify/Ligands/thiol.mol +9 -0
  298. molSimplify/Ligands/thiophene.mol +23 -0
  299. molSimplify/Ligands/thiopyridine.mol +29 -0
  300. molSimplify/Ligands/threonine.mol +38 -0
  301. molSimplify/Ligands/tpp.mol +165 -0
  302. molSimplify/Ligands/tricyanomethyl.mol +19 -0
  303. molSimplify/Ligands/trifluoromethyl.mol +13 -0
  304. molSimplify/Ligands/tryptophan.mol +60 -0
  305. molSimplify/Ligands/tyrosine.mol +53 -0
  306. molSimplify/Ligands/uthiol.mol +11 -0
  307. molSimplify/Ligands/uthiolme2.mol +23 -0
  308. molSimplify/Ligands/valine.mol +42 -0
  309. molSimplify/Ligands/water.mol +10 -0
  310. molSimplify/Ligands/x.mol +6 -0
  311. molSimplify/Scripts/__init__.py +0 -0
  312. molSimplify/Scripts/addtodb.py +308 -0
  313. molSimplify/Scripts/cellbuilder.py +1592 -0
  314. molSimplify/Scripts/cellbuilder_tools.py +701 -0
  315. molSimplify/Scripts/chains.py +342 -0
  316. molSimplify/Scripts/convert_2to3.py +23 -0
  317. molSimplify/Scripts/dbinteract.py +631 -0
  318. molSimplify/Scripts/distgeom.py +617 -0
  319. molSimplify/Scripts/findcorrelations.py +287 -0
  320. molSimplify/Scripts/generator.py +267 -0
  321. molSimplify/Scripts/geometry.py +1224 -0
  322. molSimplify/Scripts/grabguivars.py +845 -0
  323. molSimplify/Scripts/in_b3lyp_usetc.py +141 -0
  324. molSimplify/Scripts/inparse.py +1673 -0
  325. molSimplify/Scripts/io.py +1149 -0
  326. molSimplify/Scripts/isomers.py +415 -0
  327. molSimplify/Scripts/jobgen.py +247 -0
  328. molSimplify/Scripts/krr_prep.py +1262 -0
  329. molSimplify/Scripts/molSimplify_io.py +18 -0
  330. molSimplify/Scripts/molden2psi4wfn.py +166 -0
  331. molSimplify/Scripts/namegen.py +32 -0
  332. molSimplify/Scripts/nn_prep.py +561 -0
  333. molSimplify/Scripts/oct_check_mols.py +782 -0
  334. molSimplify/Scripts/periodic_QE.py +97 -0
  335. molSimplify/Scripts/postmold.py +304 -0
  336. molSimplify/Scripts/postmwfn.py +709 -0
  337. molSimplify/Scripts/postparse.py +488 -0
  338. molSimplify/Scripts/postproc.py +139 -0
  339. molSimplify/Scripts/qcgen.py +1450 -0
  340. molSimplify/Scripts/rmsd.py +489 -0
  341. molSimplify/Scripts/rungen.py +670 -0
  342. molSimplify/Scripts/structgen.py +3040 -0
  343. molSimplify/Scripts/tf_nn_prep.py +894 -0
  344. molSimplify/Scripts/tsgen.py +295 -0
  345. molSimplify/Scripts/uq_calibration.py +69 -0
  346. molSimplify/__init__.py +0 -0
  347. molSimplify/__main__.py +197 -0
  348. molSimplify/icons/chemdb.png +0 -0
  349. molSimplify/icons/hjklogo.png +0 -0
  350. molSimplify/icons/icon.png +0 -0
  351. molSimplify/icons/logo.png +0 -0
  352. molSimplify/icons/logo_old.png +0 -0
  353. molSimplify/icons/petachem.png +0 -0
  354. molSimplify/icons/petachem2.png +0 -0
  355. molSimplify/icons/petachem_full.png +0 -0
  356. molSimplify/icons/pythonlogo.png +0 -0
  357. molSimplify/icons/sge copy.png +0 -0
  358. molSimplify/icons/sge.png +0 -0
  359. molSimplify/icons/slurm.png +0 -0
  360. molSimplify/icons/wft1.png +0 -0
  361. molSimplify/icons/wft2.png +0 -0
  362. molSimplify/icons/wft3.png +0 -0
  363. molSimplify/ml/__init__.py +0 -0
  364. molSimplify/ml/kernels.py +36 -0
  365. molSimplify/ml/layers.py +29 -0
  366. molSimplify/molscontrol/__init__.py +14 -0
  367. molSimplify/molscontrol/_version.py +521 -0
  368. molSimplify/molscontrol/clf_tools.py +144 -0
  369. molSimplify/molscontrol/data/README.md +21 -0
  370. molSimplify/molscontrol/data/look_and_say.dat +15 -0
  371. molSimplify/molscontrol/dynamic_classifier.py +514 -0
  372. molSimplify/molscontrol/io_tools.py +363 -0
  373. molSimplify/molscontrol/molscontrol.py +49 -0
  374. molSimplify/molscontrol/terachem/jobscript_control.sh +31 -0
  375. molSimplify/molscontrol/terachem/terachem_input +22 -0
  376. molSimplify/python_krr/X_train_TS.csv +535 -0
  377. molSimplify/python_krr/__init__.py +0 -0
  378. molSimplify/python_krr/hat2_X_mean_std.csv +3 -0
  379. molSimplify/python_krr/hat2_feature_names.csv +1 -0
  380. molSimplify/python_krr/hat2_y_mean_std.csv +2 -0
  381. molSimplify/python_krr/hat_X_mean_std.csv +6 -0
  382. molSimplify/python_krr/hat_feature_names.csv +1 -0
  383. molSimplify/python_krr/hat_krr_X_train.csv +5205 -0
  384. molSimplify/python_krr/hat_krr_dual_coef.csv +1 -0
  385. molSimplify/python_krr/hat_y_mean_std.csv +2 -0
  386. molSimplify/python_krr/sklearn_models.py +34 -0
  387. molSimplify/python_krr/y_train_TS.csv +535 -0
  388. molSimplify/python_nn/ANN.py +198 -0
  389. molSimplify/python_nn/__init__.py +0 -0
  390. molSimplify/python_nn/clf_analysis_tool.py +125 -0
  391. molSimplify/python_nn/dictionary_toolbox.py +49 -0
  392. molSimplify/python_nn/ensemble_test.py +309 -0
  393. molSimplify/python_nn/hs_center.csv +26 -0
  394. molSimplify/python_nn/hs_scale.csv +26 -0
  395. molSimplify/python_nn/ls_center.csv +26 -0
  396. molSimplify/python_nn/ls_scale.csv +26 -0
  397. molSimplify/python_nn/ms_hs_b1.csv +50 -0
  398. molSimplify/python_nn/ms_hs_b2.csv +50 -0
  399. molSimplify/python_nn/ms_hs_b3.csv +1 -0
  400. molSimplify/python_nn/ms_hs_w1.csv +50 -0
  401. molSimplify/python_nn/ms_hs_w2.csv +50 -0
  402. molSimplify/python_nn/ms_hs_w3.csv +1 -0
  403. molSimplify/python_nn/ms_ls_b1.csv +50 -0
  404. molSimplify/python_nn/ms_ls_b2.csv +50 -0
  405. molSimplify/python_nn/ms_ls_b3.csv +1 -0
  406. molSimplify/python_nn/ms_ls_w1.csv +50 -0
  407. molSimplify/python_nn/ms_ls_w2.csv +50 -0
  408. molSimplify/python_nn/ms_ls_w3.csv +1 -0
  409. molSimplify/python_nn/ms_slope_b1.csv +50 -0
  410. molSimplify/python_nn/ms_slope_b2.csv +50 -0
  411. molSimplify/python_nn/ms_slope_b3.csv +1 -0
  412. molSimplify/python_nn/ms_slope_w1.csv +50 -0
  413. molSimplify/python_nn/ms_slope_w2.csv +50 -0
  414. molSimplify/python_nn/ms_slope_w3.csv +1 -0
  415. molSimplify/python_nn/ms_split_b1.csv +50 -0
  416. molSimplify/python_nn/ms_split_b2.csv +50 -0
  417. molSimplify/python_nn/ms_split_b3.csv +1 -0
  418. molSimplify/python_nn/ms_split_w1.csv +50 -0
  419. molSimplify/python_nn/ms_split_w2.csv +50 -0
  420. molSimplify/python_nn/ms_split_w3.csv +1 -0
  421. molSimplify/python_nn/slope_center.csv +25 -0
  422. molSimplify/python_nn/slope_scale.csv +25 -0
  423. molSimplify/python_nn/split_center.csv +26 -0
  424. molSimplify/python_nn/split_scale.csv +26 -0
  425. molSimplify/python_nn/tf_ANN.py +762 -0
  426. molSimplify/python_nn/train_data.csv +1211 -0
  427. molSimplify/tf_nn/__init__.py +0 -0
  428. molSimplify/tf_nn/geo_static_clf/geo_static_clf_model.h5 +0 -0
  429. molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_name.csv +1591 -0
  430. molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_x.csv +2790 -0
  431. molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_y.csv +2790 -0
  432. molSimplify/tf_nn/geo_static_clf/geo_static_clf_vars.csv +154 -0
  433. molSimplify/tf_nn/geos/hs_ii_bl_x.csv +1577 -0
  434. molSimplify/tf_nn/geos/hs_ii_bl_y.csv +1577 -0
  435. molSimplify/tf_nn/geos/hs_ii_model.h5 +0 -0
  436. molSimplify/tf_nn/geos/hs_ii_model.json +1 -0
  437. molSimplify/tf_nn/geos/hs_ii_vars.csv +154 -0
  438. molSimplify/tf_nn/geos/hs_iii_bl_x.csv +1659 -0
  439. molSimplify/tf_nn/geos/hs_iii_bl_y.csv +1659 -0
  440. molSimplify/tf_nn/geos/hs_iii_model.h5 +0 -0
  441. molSimplify/tf_nn/geos/hs_iii_model.json +1 -0
  442. molSimplify/tf_nn/geos/hs_iii_vars.csv +154 -0
  443. molSimplify/tf_nn/geos/ls_ii_bl_x.csv +1374 -0
  444. molSimplify/tf_nn/geos/ls_ii_bl_y.csv +1374 -0
  445. molSimplify/tf_nn/geos/ls_ii_model.h5 +0 -0
  446. molSimplify/tf_nn/geos/ls_ii_model.json +1 -0
  447. molSimplify/tf_nn/geos/ls_ii_vars.csv +154 -0
  448. molSimplify/tf_nn/geos/ls_iii_bl_x.csv +1364 -0
  449. molSimplify/tf_nn/geos/ls_iii_bl_y.csv +1364 -0
  450. molSimplify/tf_nn/geos/ls_iii_model.h5 +0 -0
  451. molSimplify/tf_nn/geos/ls_iii_model.json +1 -0
  452. molSimplify/tf_nn/geos/ls_iii_vars.csv +154 -0
  453. molSimplify/tf_nn/homolumo/gap_model.h5 +0 -0
  454. molSimplify/tf_nn/homolumo/gap_model.json +1 -0
  455. molSimplify/tf_nn/homolumo/gap_test_names.csv +175 -0
  456. molSimplify/tf_nn/homolumo/gap_test_x.csv +176 -0
  457. molSimplify/tf_nn/homolumo/gap_test_y.csv +176 -0
  458. molSimplify/tf_nn/homolumo/gap_train_names.csv +699 -0
  459. molSimplify/tf_nn/homolumo/gap_train_x.csv +700 -0
  460. molSimplify/tf_nn/homolumo/gap_train_y.csv +700 -0
  461. molSimplify/tf_nn/homolumo/gap_vars.csv +153 -0
  462. molSimplify/tf_nn/homolumo/homo_model.h5 +0 -0
  463. molSimplify/tf_nn/homolumo/homo_model.json +126 -0
  464. molSimplify/tf_nn/homolumo/homo_test_names.csv +175 -0
  465. molSimplify/tf_nn/homolumo/homo_test_x.csv +176 -0
  466. molSimplify/tf_nn/homolumo/homo_test_y.csv +176 -0
  467. molSimplify/tf_nn/homolumo/homo_train_names.csv +699 -0
  468. molSimplify/tf_nn/homolumo/homo_train_x.csv +700 -0
  469. molSimplify/tf_nn/homolumo/homo_train_y.csv +700 -0
  470. molSimplify/tf_nn/homolumo/homo_vars.csv +153 -0
  471. molSimplify/tf_nn/oxoandhomo/homo_empty_info.json +7 -0
  472. molSimplify/tf_nn/oxoandhomo/homo_empty_model.h5 +0 -0
  473. molSimplify/tf_nn/oxoandhomo/homo_empty_model.json +1 -0
  474. molSimplify/tf_nn/oxoandhomo/homo_empty_test_names.csv +143 -0
  475. molSimplify/tf_nn/oxoandhomo/homo_empty_test_x.csv +144 -0
  476. molSimplify/tf_nn/oxoandhomo/homo_empty_test_y.csv +144 -0
  477. molSimplify/tf_nn/oxoandhomo/homo_empty_train_names.csv +513 -0
  478. molSimplify/tf_nn/oxoandhomo/homo_empty_train_x.csv +514 -0
  479. molSimplify/tf_nn/oxoandhomo/homo_empty_train_y.csv +514 -0
  480. molSimplify/tf_nn/oxoandhomo/homo_empty_val_names.csv +143 -0
  481. molSimplify/tf_nn/oxoandhomo/homo_empty_val_x.csv +58 -0
  482. molSimplify/tf_nn/oxoandhomo/homo_empty_val_y.csv +58 -0
  483. molSimplify/tf_nn/oxoandhomo/homo_empty_vars.csv +155 -0
  484. molSimplify/tf_nn/oxoandhomo/oxo20_info.json +7 -0
  485. molSimplify/tf_nn/oxoandhomo/oxo20_model.h5 +0 -0
  486. molSimplify/tf_nn/oxoandhomo/oxo20_model.json +1 -0
  487. molSimplify/tf_nn/oxoandhomo/oxo20_test_names.csv +143 -0
  488. molSimplify/tf_nn/oxoandhomo/oxo20_test_x.csv +144 -0
  489. molSimplify/tf_nn/oxoandhomo/oxo20_test_y.csv +144 -0
  490. molSimplify/tf_nn/oxoandhomo/oxo20_train_names.csv +513 -0
  491. molSimplify/tf_nn/oxoandhomo/oxo20_train_x.csv +514 -0
  492. molSimplify/tf_nn/oxoandhomo/oxo20_train_y.csv +514 -0
  493. molSimplify/tf_nn/oxoandhomo/oxo20_val_names.csv +143 -0
  494. molSimplify/tf_nn/oxoandhomo/oxo20_val_x.csv +58 -0
  495. molSimplify/tf_nn/oxoandhomo/oxo20_val_y.csv +58 -0
  496. molSimplify/tf_nn/oxoandhomo/oxo20_vars.csv +154 -0
  497. molSimplify/tf_nn/oxocatalysis/hat_model.h5 +0 -0
  498. molSimplify/tf_nn/oxocatalysis/hat_model.json +1 -0
  499. molSimplify/tf_nn/oxocatalysis/hat_test_names.csv +419 -0
  500. molSimplify/tf_nn/oxocatalysis/hat_test_x.csv +420 -0
  501. molSimplify/tf_nn/oxocatalysis/hat_test_y.csv +420 -0
  502. molSimplify/tf_nn/oxocatalysis/hat_train_names.csv +1507 -0
  503. molSimplify/tf_nn/oxocatalysis/hat_train_x.csv +1508 -0
  504. molSimplify/tf_nn/oxocatalysis/hat_train_y.csv +1508 -0
  505. molSimplify/tf_nn/oxocatalysis/hat_val_x.csv +169 -0
  506. molSimplify/tf_nn/oxocatalysis/hat_val_y.csv +169 -0
  507. molSimplify/tf_nn/oxocatalysis/hat_vars.csv +162 -0
  508. molSimplify/tf_nn/oxocatalysis/oxo_model.h5 +0 -0
  509. molSimplify/tf_nn/oxocatalysis/oxo_model.json +1 -0
  510. molSimplify/tf_nn/oxocatalysis/oxo_test_names.csv +527 -0
  511. molSimplify/tf_nn/oxocatalysis/oxo_test_x.csv +528 -0
  512. molSimplify/tf_nn/oxocatalysis/oxo_test_y.csv +528 -0
  513. molSimplify/tf_nn/oxocatalysis/oxo_train_names.csv +1897 -0
  514. molSimplify/tf_nn/oxocatalysis/oxo_train_x.csv +1898 -0
  515. molSimplify/tf_nn/oxocatalysis/oxo_train_y.csv +1898 -0
  516. molSimplify/tf_nn/oxocatalysis/oxo_val_x.csv +212 -0
  517. molSimplify/tf_nn/oxocatalysis/oxo_val_y.csv +212 -0
  518. molSimplify/tf_nn/oxocatalysis/oxo_vars.csv +162 -0
  519. molSimplify/tf_nn/rescaling_data/gap_mean_x.csv +153 -0
  520. molSimplify/tf_nn/rescaling_data/gap_mean_y.csv +1 -0
  521. molSimplify/tf_nn/rescaling_data/gap_var_x.csv +153 -0
  522. molSimplify/tf_nn/rescaling_data/gap_var_y.csv +1 -0
  523. molSimplify/tf_nn/rescaling_data/geo_static_clf_mean_x.csv +154 -0
  524. molSimplify/tf_nn/rescaling_data/geo_static_clf_mean_y.csv +1 -0
  525. molSimplify/tf_nn/rescaling_data/geo_static_clf_var_x.csv +154 -0
  526. molSimplify/tf_nn/rescaling_data/geo_static_clf_var_y.csv +1 -0
  527. molSimplify/tf_nn/rescaling_data/hat_mean_x.csv +162 -0
  528. molSimplify/tf_nn/rescaling_data/hat_mean_y.csv +1 -0
  529. molSimplify/tf_nn/rescaling_data/hat_var_x.csv +162 -0
  530. molSimplify/tf_nn/rescaling_data/hat_var_y.csv +1 -0
  531. molSimplify/tf_nn/rescaling_data/homo_empty_mean_x.csv +155 -0
  532. molSimplify/tf_nn/rescaling_data/homo_empty_mean_y.csv +1 -0
  533. molSimplify/tf_nn/rescaling_data/homo_empty_var_x.csv +155 -0
  534. molSimplify/tf_nn/rescaling_data/homo_empty_var_y.csv +1 -0
  535. molSimplify/tf_nn/rescaling_data/homo_mean_x.csv +153 -0
  536. molSimplify/tf_nn/rescaling_data/homo_mean_y.csv +1 -0
  537. molSimplify/tf_nn/rescaling_data/homo_var_x.csv +153 -0
  538. molSimplify/tf_nn/rescaling_data/homo_var_y.csv +1 -0
  539. molSimplify/tf_nn/rescaling_data/hs_ii_mean_x.csv +154 -0
  540. molSimplify/tf_nn/rescaling_data/hs_ii_mean_y.csv +3 -0
  541. molSimplify/tf_nn/rescaling_data/hs_ii_var_x.csv +154 -0
  542. molSimplify/tf_nn/rescaling_data/hs_ii_var_y.csv +3 -0
  543. molSimplify/tf_nn/rescaling_data/hs_iii_mean_x.csv +154 -0
  544. molSimplify/tf_nn/rescaling_data/hs_iii_mean_y.csv +3 -0
  545. molSimplify/tf_nn/rescaling_data/hs_iii_var_x.csv +154 -0
  546. molSimplify/tf_nn/rescaling_data/hs_iii_var_y.csv +3 -0
  547. molSimplify/tf_nn/rescaling_data/ls_ii_mean_x.csv +154 -0
  548. molSimplify/tf_nn/rescaling_data/ls_ii_mean_y.csv +3 -0
  549. molSimplify/tf_nn/rescaling_data/ls_ii_var_x.csv +154 -0
  550. molSimplify/tf_nn/rescaling_data/ls_ii_var_y.csv +3 -0
  551. molSimplify/tf_nn/rescaling_data/ls_iii_mean_x.csv +154 -0
  552. molSimplify/tf_nn/rescaling_data/ls_iii_mean_y.csv +3 -0
  553. molSimplify/tf_nn/rescaling_data/ls_iii_var_x.csv +154 -0
  554. molSimplify/tf_nn/rescaling_data/ls_iii_var_y.csv +3 -0
  555. molSimplify/tf_nn/rescaling_data/oxo20_mean_x.csv +154 -0
  556. molSimplify/tf_nn/rescaling_data/oxo20_mean_y.csv +1 -0
  557. molSimplify/tf_nn/rescaling_data/oxo20_var_x.csv +154 -0
  558. molSimplify/tf_nn/rescaling_data/oxo20_var_y.csv +1 -0
  559. molSimplify/tf_nn/rescaling_data/oxo_mean_x.csv +162 -0
  560. molSimplify/tf_nn/rescaling_data/oxo_mean_y.csv +1 -0
  561. molSimplify/tf_nn/rescaling_data/oxo_var_x.csv +162 -0
  562. molSimplify/tf_nn/rescaling_data/oxo_var_y.csv +1 -0
  563. molSimplify/tf_nn/rescaling_data/sc_static_clf_mean_x.csv +154 -0
  564. molSimplify/tf_nn/rescaling_data/sc_static_clf_mean_y.csv +1 -0
  565. molSimplify/tf_nn/rescaling_data/sc_static_clf_var_x.csv +154 -0
  566. molSimplify/tf_nn/rescaling_data/sc_static_clf_var_y.csv +1 -0
  567. molSimplify/tf_nn/rescaling_data/split_mean_x.csv +155 -0
  568. molSimplify/tf_nn/rescaling_data/split_mean_y.csv +1 -0
  569. molSimplify/tf_nn/rescaling_data/split_var_x.csv +155 -0
  570. molSimplify/tf_nn/rescaling_data/split_var_y.csv +1 -0
  571. molSimplify/tf_nn/sc_static_clf/sc_static_clf_model.h5 +0 -0
  572. molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_name.csv +1591 -0
  573. molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_x.csv +1592 -0
  574. molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_y.csv +1592 -0
  575. molSimplify/tf_nn/sc_static_clf/sc_static_clf_vars.csv +154 -0
  576. molSimplify/tf_nn/split/split_model.h5 +0 -0
  577. molSimplify/tf_nn/split/split_model.json +1 -0
  578. molSimplify/tf_nn/split/split_vars.csv +155 -0
  579. molSimplify/tf_nn/split/split_x.csv +1902 -0
  580. molSimplify/tf_nn/split/split_y.csv +1902 -0
  581. molSimplify/tf_nn/split/train_names.csv +1901 -0
  582. molSimplify/utils/__init__.py +0 -0
  583. molSimplify/utils/decorators.py +16 -0
  584. molSimplify/utils/metaclasses.py +12 -0
  585. molSimplify/utils/tensorflow.py +23 -0
  586. molSimplify/utils/timer.py +16 -0
  587. molSimplify-1.7.4.dist-info/LICENSE +674 -0
  588. molSimplify-1.7.4.dist-info/METADATA +821 -0
  589. molSimplify-1.7.4.dist-info/RECORD +651 -0
  590. molSimplify-1.7.4.dist-info/WHEEL +5 -0
  591. molSimplify-1.7.4.dist-info/entry_points.txt +3 -0
  592. molSimplify-1.7.4.dist-info/top_level.txt +4 -0
  593. tests/generateTests.py +122 -0
  594. tests/helperFuncs.py +658 -0
  595. tests/informatics/test_MOF_descriptors.py +128 -0
  596. tests/informatics/test_active_learning.py +113 -0
  597. tests/informatics/test_coulomb_analyze.py +24 -0
  598. tests/informatics/test_graph_racs.py +193 -0
  599. tests/ml/test_kernels.py +20 -0
  600. tests/ml/test_layers.py +47 -0
  601. tests/runtest.py +10 -0
  602. tests/test_Mol2D.py +128 -0
  603. tests/test_basic_imports.py +62 -0
  604. tests/test_bidentate.py +25 -0
  605. tests/test_cli.py +20 -0
  606. tests/test_distgeom.py +106 -0
  607. tests/test_example_1.py +29 -0
  608. tests/test_example_3.py +31 -0
  609. tests/test_example_5.py +43 -0
  610. tests/test_example_7.py +28 -0
  611. tests/test_example_8.py +15 -0
  612. tests/test_example_tbp.py +15 -0
  613. tests/test_ff_xtb.py +111 -0
  614. tests/test_geocheck_oct.py +26 -0
  615. tests/test_geocheck_one_empty.py +15 -0
  616. tests/test_geometry.py +44 -0
  617. tests/test_inparse.py +76 -0
  618. tests/test_io.py +84 -0
  619. tests/test_jobgen.py +84 -0
  620. tests/test_joption_pythonic.py +27 -0
  621. tests/test_ligand_assign.py +58 -0
  622. tests/test_ligand_assign_consistent.py +60 -0
  623. tests/test_ligand_class.py +26 -0
  624. tests/test_ligand_from_mol_file.py +35 -0
  625. tests/test_ligands.py +86 -0
  626. tests/test_mol3D.py +337 -0
  627. tests/test_molcas_caspt2.py +15 -0
  628. tests/test_molcas_casscf.py +15 -0
  629. tests/test_old_ANNs.py +68 -0
  630. tests/test_orca_ccsdt.py +15 -0
  631. tests/test_orca_dft.py +15 -0
  632. tests/test_qcgen.py +50 -0
  633. tests/test_racs.py +124 -0
  634. tests/test_rmsd.py +68 -0
  635. tests/test_structgen_functions.py +198 -0
  636. tests/test_tetrahedral.py +29 -0
  637. tests/test_tutorial_10_part_one.py +16 -0
  638. tests/test_tutorial_10_part_two.py +15 -0
  639. tests/test_tutorial_2.py +11 -0
  640. tests/test_tutorial_3.py +15 -0
  641. tests/test_tutorial_4.py +57 -0
  642. tests/test_tutorial_6.py +10 -0
  643. tests/test_tutorial_8.py +29 -0
  644. tests/test_tutorial_9_part_one.py +15 -0
  645. tests/test_tutorial_9_part_two.py +15 -0
  646. tests/test_tutorial_qm9_part_one.py +6 -0
  647. tests/testresources/refs/racs/generate_references.py +85 -0
  648. workflows/NandyJACSAu2022/bridge_functionalizer.py +253 -0
  649. workflows/NandyJACSAu2022/frag_functionalizer.py +242 -0
  650. workflows/NandyJACSAu2022/fragment_classes.py +586 -0
  651. workflows/NandyJACSAu2022/macrocycle_synthesis.py +179 -0
@@ -0,0 +1,895 @@
1
+ from molSimplify.Classes.mol3D import mol3D
2
+ from molSimplify.Classes.atom3D import atom3D
3
+ from molSimplify.Informatics.MOF.PBC_functions import (
4
+ compute_adj_matrix,
5
+ compute_distance_matrix3,
6
+ fractional2cart,
7
+ get_closed_subgraph,
8
+ include_extra_shells,
9
+ ligand_detect,
10
+ linker_length,
11
+ mkcell,
12
+ readcif,
13
+ returnXYZandGraph,
14
+ slice_mat,
15
+ write2file,
16
+ writeXYZandGraph,
17
+ XYZ_connected,
18
+ )
19
+ from molSimplify.Scripts.cellbuilder_tools import import_from_cif
20
+ import numpy as np
21
+ from scipy import sparse
22
+ import networkx as nx
23
+ import copy
24
+ import itertools
25
+ import os
26
+
27
+
28
+ def periodic_checker(graph, coords):
29
+ """
30
+ Checks if a graph is periodic or not.
31
+ This does the same task as molSimplify.Informatics.MOF.MOF_descriptors.detect_1D_rod, but in a different way.
32
+
33
+ Parameters
34
+ ----------
35
+ graph : numpy.matrix
36
+ Adjacency matrix. Shape is (number of atoms, number of atoms).
37
+ coords : list of list of float
38
+ Cartesian coordinates of atoms. Length of the outer list is the number of atoms, while each inner list is length 3.
39
+
40
+ Returns
41
+ -------
42
+ periodic : bool
43
+ Whether or not a graph is periodic.
44
+
45
+ """
46
+ from scipy.sparse import csgraph
47
+ csg = csgraph.csgraph_from_dense(graph)
48
+ x, y = csg.nonzero()
49
+ maxdist = 0
50
+ periodic = False
51
+ for row1, row2 in zip(x, y):
52
+ a = np.array(coords[row1])
53
+ b = np.array(coords[row2])
54
+ dist = np.linalg.norm(a-b)
55
+ if dist > maxdist:
56
+ maxdist = dist
57
+ # If any connected atoms are more than four angstroms apart, they are very likely to be offset by a cell vector. Periodic.
58
+ if maxdist > 4:
59
+ periodic = True
60
+ return periodic
61
+
62
+
63
+ def branch(molcif, main_paths, atoms_in_sbu, new_atoms=None):
64
+ """
65
+ Climbs out from a given atom and adds the atoms that are in the branch.
66
+ This is important for getting all atoms in a branched functional group of a linker.
67
+
68
+ Parameters
69
+ ----------
70
+ molcif : molSimplify.Classes.mol3D.mol3D
71
+ The cell of the cif file being analyzed.
72
+ main_paths : list of int
73
+ Indices of main path atoms (atoms that are part of a linker).
74
+ atoms_in_sbu : list of numpy.int64
75
+ Indices of atoms in the SBU.
76
+ new_atoms : list of numpy.int64
77
+ Indices of new atoms to be included.
78
+
79
+ Returns
80
+ -------
81
+ new_atoms : list of numpy.int64
82
+ Indices of new atoms to be included.
83
+ atoms_in_sbu : list of numpy.int64
84
+ Indices of atoms in the SBU.
85
+
86
+ """
87
+ if new_atoms is None:
88
+ new_atoms = []
89
+ original_atoms = atoms_in_sbu.copy()
90
+ for atom in new_atoms:
91
+ bonded_list = molcif.getBondedAtoms(atom)
92
+ if (len(set(bonded_list)-set(main_paths)-set(atoms_in_sbu)) > 0):
93
+ new_atoms += list(set(bonded_list)-set(main_paths))
94
+ new_atoms = list(set(new_atoms))
95
+ atoms_in_sbu += new_atoms
96
+ if len(original_atoms) == len(atoms_in_sbu):
97
+ return new_atoms, atoms_in_sbu
98
+ else:
99
+ branch_atoms, branch_atoms_in_sbu = branch(molcif, main_paths, atoms_in_sbu, new_atoms)
100
+ new_atoms += branch_atoms
101
+ atoms_in_sbu += branch_atoms_in_sbu
102
+ return new_atoms, atoms_in_sbu
103
+
104
+ def identify_main_chain(temp_mol, link_list):
105
+ """
106
+ Identifies the atom that are directly present from one
107
+ connecting point to another. Identifies cases that can be functional groups.
108
+
109
+ Parameters
110
+ ----------
111
+ temp_mol : molSimplify.Classes.mol3D.mol3D
112
+ mol3D of a linker.
113
+ link_list : list of int
114
+ The indices of the anchoring atoms of the linker.
115
+
116
+ Returns
117
+ -------
118
+ main : list of int
119
+ Any atoms that lie on the path between two connection points.
120
+ shortest : int
121
+ The shortest path length between two anchoring atoms.
122
+ longest : int
123
+ The longest path length between two anchoring atoms.
124
+
125
+ """
126
+ G = nx.from_numpy_matrix(temp_mol.graph)
127
+ pairs = []
128
+ shortest = 0
129
+ longest = 0
130
+ if len(link_list) == 1:
131
+ main = list(G.nodes)
132
+ shortest = 1
133
+ longest = 1
134
+ return main, shortest, longest
135
+ else:
136
+ for a, b in itertools.combinations(link_list, 2):
137
+ pair = (a, b)
138
+ pairs.append(pair)
139
+ shorts = []
140
+ for i in pairs:
141
+ short = list(nx.shortest_path(G, source=i[0], target=i[1]))
142
+ shorts.append(short)
143
+ shortest, longest = min([len(short) for short in shorts]), max([len(short) for short in shorts])
144
+ paths = list(itertools.chain(*shorts))
145
+ min_cycles = (nx.minimum_cycle_basis(G)) # gets all closed rings in graph
146
+ min_cycles_copy = min_cycles.copy()
147
+ min_cycles_copy_2 = []
148
+ paths_copy = paths.copy()
149
+ while len(min_cycles_copy) != len(min_cycles_copy_2):
150
+ min_cycles_copy_2 = min_cycles_copy.copy()
151
+ for i in min_cycles:
152
+ paths = paths_copy.copy()
153
+ if set(paths) & set(i):
154
+ # I believe this identifies potential functional groups.
155
+ # Identifies and adds minimum cycles that have atoms in common with any shortest anchoring atom to anchoring atom path.
156
+ if not set(i).issubset(set((paths))):
157
+ paths_copy += set(i)
158
+ min_cycles_copy.remove(i)
159
+
160
+ main = paths
161
+ return main, shortest, longest
162
+
163
+
164
+ def get_molcif_cycles_no_metal(molcif):
165
+ """
166
+ Makes the graph and get all cycles in the graph.
167
+
168
+ Parameters
169
+ ----------
170
+ molcif : molSimplify.Classes.mol3D.mol3D
171
+ The cell of the cif file being analyzed.
172
+
173
+ Returns
174
+ -------
175
+ subcycle_list : list of list of int
176
+ The individual subcycles. Each inner list is a subcycle.
177
+ flat_subcycle_list : list of int
178
+ Flattened list of subcycle atoms (indices).
179
+
180
+ """
181
+ G=nx.from_numpy_matrix(molcif.graph)
182
+ cycles = nx.minimum_cycle_basis(G) # gets all closed rings in graph
183
+ subcycle_list = []
184
+ for cycle in cycles:
185
+ skip_row = False
186
+ for element in cycle:
187
+ # don't include any cycles with metal in it
188
+ # This is necessary to not get malformed cycles.
189
+ if molcif.getAtom(element).ismetal():
190
+ skip_row = True
191
+ break
192
+ if not skip_row:
193
+ subcycle_list.append(cycle)
194
+ # Flatten list to contain all atoms in subcycles
195
+ flat_subcycle_list = [item for sublist in subcycle_list for item in sublist]
196
+ return subcycle_list, flat_subcycle_list
197
+
198
+ def breakdown_MOF(SBUlist, SBU_subgraph, molcif, name, cell, anchoring_atoms, sbupath=False, connections_list=False, connections_subgraphlist=False, linkerpath=False):
199
+ """
200
+ Writes SBU and linker XYZ files.
201
+
202
+ Output codes are as follows:
203
+ 2: There exist short (i.e. 2 atom) and longer linkers. We could not split the MOF apart consistently.
204
+ 4: The MOF contains a 1D rod, which cannot be easily reassembled into a new MOF.
205
+ None: The MOF was split correctly
206
+
207
+ Parameters
208
+ ----------
209
+ SBUlist : list of list of numpy.int64
210
+ Each inner list is its own separate SBU. The ints are the atom indices of that SBU. Length is # of SBUs.
211
+ SBU_subgraph : list of scipy.sparse.csr.csr_matrix
212
+ The atom connections in the SBU subgraph. Length is # of SBUs.
213
+ molcif : molSimplify.Classes.mol3D.mol3D
214
+ The cell of the cif file being analyzed.
215
+ name : str
216
+ The name of the cif being analyzed.
217
+ cell : numpy.ndarray
218
+ The three Cartesian vectors representing the edges of the crystal cell. Shape is (3,3).
219
+ anchoring_atoms : set of numpy.int64
220
+ The indices of the anchoring atoms of the linkers.
221
+ sbupath : str
222
+ The path to which the SBU XYZ files will be written.
223
+ connections_list : list of list of int
224
+ Each inner list is its own separate linker. The ints are the atom indices of that linker. Length is # of linkers.
225
+ connections_subgraphlist : list of numpy.matrix
226
+ The atom connections in the linker subgraph. Length is # of linkers.
227
+ linkerpath : str
228
+ The path to which the linker XYZ files will be written.
229
+
230
+ Returns
231
+ -------
232
+ None
233
+
234
+ """
235
+ n_sbu = len(SBUlist)
236
+ all_SBU_atoms = []
237
+ all_SBU_X_atoms = []
238
+
239
+ # make the graph and get all cycles in the graph
240
+ # return the flattened list of the subcycle atoms
241
+ # The subcycle list contains all of the individual subcycles (if they need to be compared)
242
+ subcycle_list, flat_subcycle_list = get_molcif_cycles_no_metal(molcif)
243
+
244
+ '''
245
+ Loop over all SBUs as identified by subgraphs. Then create the mol3Ds for each SBU.
246
+ '''
247
+ for i, SBU in enumerate(SBUlist):
248
+ # For a given SBU, make a list of main paths. This contains atoms that are part of the linker.
249
+ main_paths = []
250
+ linker_length_dict = {}
251
+ current_longest = 0
252
+ for j, linker in enumerate(connections_list):
253
+ # For each SBU and linker combo, make a mol3D define the linklist for that linker
254
+ linker_mol = mol3D()
255
+ # keep track of added atoms
256
+ linker_added = []
257
+ link_list = []
258
+ linker_dict = {}
259
+ for jj, val2 in enumerate(linker):
260
+ # add anchoring atom to link list. Val2 has molcif numbering
261
+ linker_dict[jj] = val2
262
+ if val2 in anchoring_atoms:
263
+ link_list.append(jj)
264
+ # This builds a mol object for the linker --> even though it is in the SBU section.
265
+ if not (val2 in linker_added):
266
+ linker_mol.addAtom(molcif.getAtom(val2))
267
+ linker_added.append(val2)
268
+ linker_mol.graph = connections_subgraphlist[j]
269
+ # This identifies anything on the simple path from end to end
270
+ main, shortest, longest = identify_main_chain(linker_mol, link_list)
271
+ if longest > current_longest:
272
+ current_longest = longest
273
+ # Currently, main is in linker indices. Get them back in molcif indices.
274
+ # This is the main chain for a given linker.
275
+ main = [linker_dict[val] for val in main]
276
+ main_paths.extend(main)
277
+ min_length, max_length = linker_length(connections_subgraphlist[j],link_list)
278
+ linker_length_j = max(min_length, max_length)
279
+ # Make a dictionary that will identify the linker length and atoms in the linker by the linker number
280
+ linker_length_dict[j] = {'length':linker_length_j, 'atoms':linker, 'longest':longest}
281
+ if current_longest <= 2:
282
+ return 2
283
+ # put all main path atoms into the main path list
284
+ main_paths = list(set(main_paths))
285
+ SBU_mol = mol3D()
286
+
287
+ # This list keeps track of if an atom has been added to the SBU
288
+ SBU_added = []
289
+ # This dictionary keeps a mapping between molcif indices and SBU_mol indices
290
+ SBU_dict = {}
291
+ # Keeps track of the branches off of a linker for instance.
292
+ branches = []
293
+ # Keeps track of the atoms bonded to a cycle.
294
+ bonded_atoms_to_cycle = []
295
+ # Tuple list keeps track of the atoms that are coordinated to X atoms
296
+ tuple_list_sbu = []
297
+ # Keep track of the indices that should be the X atoms
298
+ atoms_that_are_X = []
299
+ X_checked_list = []
300
+ # Make an atom3D list of the X atoms. These atoms should be added to the end of the XYZ.
301
+ X_atom3D_list = []
302
+ for val in SBU:
303
+ # make SBU mol, add new atom if never added before.
304
+ if val not in SBU_added:
305
+ SBU_mol.addAtom(molcif.getAtom(val))
306
+ # Create a mapping between the molcif indices (values) and the SBUmol indices (keys)
307
+ SBU_dict[SBU_mol.natoms-1] = val
308
+ SBU_added.append(val)
309
+ # Check if any of the atoms added to the SBU are part of a cycle. Checks overlap between first
310
+ # two coordination shells and any rings in the SBU.
311
+ in_cycles = any([val in cycle for cycle in subcycle_list])
312
+ if in_cycles:
313
+ # Some atoms overlap with the cycles that are formally part of a linker.
314
+ cycles_with_overlap = []
315
+ for cycle in subcycle_list:
316
+ if val in cycle:
317
+ if cycle not in cycles_with_overlap:
318
+ cycles_with_overlap.append(cycle)
319
+ temp_bonded_list = []
320
+ for cycle_val in cycle:
321
+ temp_bonded = molcif.getBondedAtoms(cycle_val)
322
+ temp_bonded = list(set(temp_bonded)-set(cycle))
323
+ temp_bonded_list.extend(temp_bonded)
324
+ if cycle_val not in SBU_added:
325
+ SBU_mol.addAtom(molcif.getAtom(cycle_val))
326
+ SBU_dict[SBU_mol.natoms-1] = cycle_val
327
+ SBU_added.append(cycle_val)
328
+ bonded_atoms_to_cycle.append(temp_bonded_list)
329
+ # Check how many atoms are branched
330
+ additional_branched_atoms,_ = branch(molcif, main_paths, SBU_added.copy(), [val])
331
+ for branched_atom in additional_branched_atoms:
332
+ if branched_atom not in SBU_added:
333
+ SBU_mol.addAtom(molcif.getAtom(branched_atom))
334
+ SBU_dict[SBU_mol.natoms-1] = branched_atom
335
+ SBU_added.append(branched_atom)
336
+ if len(bonded_atoms_to_cycle)>1:
337
+ new_bonded_atoms_to_cycle = []
338
+ # Don't let things that are part of another cycle be included here
339
+ for bonded_atoms_to_indiv_cycle in bonded_atoms_to_cycle:
340
+ new_bonded_atoms_to_cycle.append(list(set(bonded_atoms_to_indiv_cycle)-set(flat_subcycle_list)))
341
+ combos = itertools.combinations(new_bonded_atoms_to_cycle, 2)
342
+ for comboval in combos:
343
+ if comboval[0] == comboval[1]:
344
+ continue
345
+ intersection = list(set(comboval[0])&set(comboval[1]))
346
+ if len(intersection)>0:
347
+ for comboval_intersection in intersection:
348
+ if comboval_intersection not in SBU_added:
349
+ SBU_mol.addAtom(molcif.getAtom(comboval_intersection))
350
+ SBU_dict[SBU_mol.natoms-1] = comboval_intersection
351
+ SBU_added.append(comboval_intersection)
352
+ intersection_atoms = list(set.intersection(*map(set,bonded_atoms_to_cycle)))
353
+ for intersection_atom in intersection_atoms:
354
+ if intersection_atom not in SBU_added:
355
+ SBU_mol.addAtom(molcif.getAtom(intersection_atom))
356
+ SBU_dict[SBU_mol.natoms-1] = intersection_atom
357
+ SBU_added.append(intersection_atom)
358
+ for SBU_added_atoms in SBU_added.copy():
359
+ bonded_atoms = molcif.getBondedAtoms(SBU_added_atoms)
360
+ for bonded_atom in bonded_atoms:
361
+ if molcif.getAtom(bonded_atom).symbol() == 'H':
362
+ if bonded_atom not in SBU_added:
363
+ SBU_mol.addAtom(molcif.getAtom(bonded_atom))
364
+ SBU_dict[SBU_mol.natoms-1] = bonded_atom
365
+ SBU_added.append(bonded_atom)
366
+ if (bonded_atom in main_paths) and (not ((bonded_atom in SBU_added) or (bonded_atom in X_checked_list))):
367
+ temp_atom = molcif.getAtom(bonded_atom)
368
+ temp_atom_coords = temp_atom.coords()
369
+ new_atom = atom3D(Sym='X', xyz=temp_atom_coords.copy())
370
+ X_atom3D_list.append((new_atom, bonded_atom, SBU_added_atoms))
371
+ X_checked_list.append(bonded_atom)
372
+
373
+ final_X_indices = []
374
+ for X_atom in X_atom3D_list:
375
+ if X_atom[1] in SBU_added:
376
+ continue
377
+ else:
378
+ SBU_added.append(X_atom[1])
379
+ SBU_mol.addAtom(X_atom[0])
380
+ SBU_dict[SBU_mol.natoms-1] = X_atom[1]
381
+ tuple_list_sbu.append((SBU_mol.natoms-1, X_atom[2]))
382
+ final_X_indices.append(SBU_mol.natoms-1)
383
+ atoms_that_are_X.append(X_atom[1])
384
+ SBU_added_no_X = list(set(SBU_added)-set(atoms_that_are_X))
385
+ inv_SBU_dict = {v: k for k, v in SBU_dict.items()}
386
+ tempgraph = molcif.graph[np.ix_(SBU_added, SBU_added)]
387
+ no_X_graph = molcif.graph[np.ix_(SBU_added_no_X, SBU_added_no_X)]
388
+ SBU_mol.graph = tempgraph
389
+ SBU_mol_cart_coords = np.array([atom.coords() for atom in SBU_mol.atoms])
390
+ SBU_mol_atom_labels =[atom.sym for atom in SBU_mol.atoms]
391
+ SBU_mol_adj_mat = np.array(SBU_mol.graph)
392
+
393
+ SBU_mol_fcoords_connected = XYZ_connected(cell, SBU_mol_cart_coords, SBU_mol_adj_mat)
394
+ coord_list, molgraph = returnXYZandGraph(None, SBU_mol_atom_labels, cell, SBU_mol_fcoords_connected, SBU_mol_adj_mat)
395
+ for r in range(SBU_mol.natoms):
396
+ SBU_mol.getAtom(r).setcoords(coord_list[r])
397
+ for val in tuple_list_sbu:
398
+ SBU_mol.BCM(val[0],inv_SBU_dict[val[1]],0.75)
399
+ new_coords = [[float(val2) for val2 in val.split()[1:]] for val in SBU_mol.coords().split('\n')[2:-1]]
400
+ is_periodic = periodic_checker(tempgraph, new_coords)
401
+ # if is_periodic is true, the SBU is periodic in nature --> 1D rod.
402
+
403
+ ###### WRITE THE SBU MOL TO THE PLACE
404
+ if sbupath and not os.path.exists(sbupath+"/"+str(name)+str(i)+'.xyz'):
405
+ if is_periodic:
406
+ xyzname = sbupath+"/"+str(name)+"_sbu1Drod_"+str(i)+".xyz"
407
+ else:
408
+ xyzname = sbupath+"/"+str(name)+"_sbu_"+str(i)+".xyz"
409
+
410
+ if len(final_X_indices)>0:
411
+ X_string = ' '.join([str(val) for val in final_X_indices])
412
+ else:
413
+ X_string = ' '
414
+ coord_list, molgraph = returnXYZandGraph(xyzname, SBU_mol_atom_labels, cell, SBU_mol_fcoords_connected, SBU_mol_adj_mat)
415
+ SBU_mol.writexyz(xyzname, withgraph=True, specialheader=' '+X_string)
416
+ all_SBU_atoms.extend(SBU_added)
417
+ if '1Drod' in xyzname:
418
+ # if SBU is a 1D rod, end it here
419
+ return 4
420
+ atoms_to_be_deleted_from_linker = list(set(all_SBU_atoms))
421
+ for i, linker in enumerate(connections_list):
422
+ linker_mol = mol3D()
423
+ # This list keeps track of if an atom has been added to the SBU
424
+ linker_added = []
425
+ # This dictionary keeps a mapping between molcif indices and SBU_mol indices
426
+ linker_dict = {}
427
+ # Tuple list keeps track of the atoms that are coordinated to X atoms
428
+ tuple_list_linker = []
429
+ # Keep track of the indices that should be the X atoms
430
+ atoms_that_are_X_linker = []
431
+ X_checked_list_linker = []
432
+ # Make an atom3D list of the X atoms. These atoms should be added to the end of the XYZ.
433
+ X_atom3D_list_linker = []
434
+ for val in linker.copy():
435
+ # loop over atoms in linker
436
+ if (val not in atoms_to_be_deleted_from_linker):
437
+ # if current atom should not be deleted (not X), add it.
438
+ linker_mol.addAtom(molcif.getAtom(val))
439
+ linker_added.append(val)
440
+ # keep mapping between linker and molcif
441
+ linker_dict[linker_mol.natoms-1] = val
442
+ current_atom = linker_mol.natoms-1
443
+ # get all of the atoms bonded to the original atom
444
+ for bonded_atom in molcif.getBondedAtoms(val):
445
+ # add the atom if it's in the SBU set
446
+ if (bonded_atom in all_SBU_atoms) and (bonded_atom not in linker_added):
447
+ linker_mol.addAtom(molcif.getAtom(bonded_atom))
448
+ linker_added.append(bonded_atom)
449
+ linker_dict[linker_mol.natoms-1] = bonded_atom
450
+ subatoms = molcif.getBondedAtoms(bonded_atom)
451
+ for subatom in subatoms:
452
+ if (subatom in atoms_to_be_deleted_from_linker) and (not ((subatom in linker_added) or (subatom in X_checked_list_linker))):
453
+ temp_atom_linker = molcif.getAtom(subatom)
454
+ temp_atom_coords_linker = temp_atom_linker.coords()
455
+ new_atom_linker = atom3D(Sym='X', xyz=temp_atom_coords_linker.copy())
456
+ X_atom3D_list_linker.append((new_atom_linker,subatom,bonded_atom))
457
+ X_checked_list.append(bonded_atom)
458
+ final_X_indices_linker = []
459
+ for X_atom_linker in X_atom3D_list_linker:
460
+ if X_atom_linker[1] in linker_added:
461
+ continue
462
+ else:
463
+ linker_added.append(X_atom_linker[1])
464
+ linker_mol.addAtom(X_atom_linker[0])
465
+ linker_dict[linker_mol.natoms-1] = X_atom_linker[1]
466
+ tuple_list_linker.append((linker_mol.natoms-1, X_atom_linker[2]))
467
+ final_X_indices_linker.append(linker_mol.natoms-1)
468
+ atoms_that_are_X_linker.append(X_atom_linker[1])
469
+
470
+ tempgraph = molcif.graph[np.ix_(linker_added, linker_added)]
471
+ linker_added_no_X = list(set(linker_added)-set(atoms_that_are_X_linker))
472
+ no_X_graph_linker = molcif.graph[np.ix_(linker_added_no_X, linker_added_no_X)]
473
+ linker_mol.graph = tempgraph
474
+
475
+ # make sure that the single graph is not multiple
476
+ n_components, labels_components = sparse.csgraph.connected_components(csgraph=no_X_graph_linker)
477
+ linker_mol_cart_coords = np.array([atom.coords() for atom in linker_mol.atoms])
478
+ linker_mol_atom_labels = [atom.sym for atom in linker_mol.atoms]
479
+ linker_mol_adj_mat = np.array(linker_mol.graph)
480
+ inv_linker_dict = {v: k for k, v in linker_dict.items()}
481
+ heavy_atom_count = linker_mol.count_atoms()
482
+ if (linker_mol.natoms == 0) or (n_components > 1) or (heavy_atom_count < 3):
483
+ continue
484
+ linker_mol_fcoords_connected = XYZ_connected(cell, linker_mol_cart_coords , linker_mol_adj_mat )
485
+ coord_list, molgraph = returnXYZandGraph(None , linker_mol_atom_labels , cell , linker_mol_fcoords_connected, linker_mol_adj_mat)
486
+ for r in range(linker_mol.natoms):
487
+ linker_mol.getAtom(r).setcoords(coord_list[r])
488
+ for val in tuple_list_linker:
489
+ linker_mol.BCM(val[0],inv_linker_dict[val[1]],0.75)
490
+ ###### WRITE THE LINKER MOL TO THE PLACE
491
+ if linkerpath and not os.path.exists(linkerpath+"/"+str(name)+str(i)+".xyz"):
492
+ xyzname = linkerpath+"/"+str(name)+"_linker_"+str(i)+".xyz"
493
+
494
+ if len(final_X_indices_linker)>0:
495
+ X_string = ' '.join([str(val) for val in final_X_indices_linker])
496
+ else:
497
+ X_string = ' '
498
+ coord_list, molgraph = returnXYZandGraph(xyzname, linker_mol_atom_labels , cell , linker_mol_fcoords_connected, linker_mol_adj_mat)
499
+ linker_mol.writexyz(xyzname, withgraph=True, specialheader=' '+X_string)
500
+ return None
501
+
502
+ def prepare_initial_SBU(molcif, allatomtypes, metal_list, logpath, name):
503
+ """
504
+ Prepares removelist and SBUlist, which indicate which atoms to remove from linkers and which atoms belong to SBUs.
505
+
506
+ Parameters
507
+ ----------
508
+ molcif : molSimplify.Classes.mol3D.mol3D
509
+ The cell of the cif file being analyzed.
510
+ allatomtypes : list of str
511
+ The atom types of the cif file, indicated by periodic symbols like 'O' and 'Cu'. Length is the number of atoms.
512
+ metal_list : set of int
513
+ The indices of metal atoms in the mol3D.
514
+ logpath : str
515
+ The path to which log files are written.
516
+ name : str
517
+ The name of the cif being analyzed.
518
+
519
+ Returns
520
+ -------
521
+ removelist : set of int
522
+ The indices of atoms to remove.
523
+ SBUlist : set of numpy.int64
524
+ The indices of atoms in SBUs. removelist + 1st coordination shell of the metals
525
+
526
+ """
527
+ SBUlist = set()
528
+ metal_list = set([at for at in molcif.findMetal(transition_metals_only=False)])
529
+ [SBUlist.update(set([metal])) for metal in molcif.findMetal(transition_metals_only=False)] # Remove all metals as part of the SBU
530
+ [SBUlist.update(set(molcif.getBondedAtomsSmart(metal))) for metal in molcif.findMetal(transition_metals_only=False)]
531
+ removelist = set()
532
+ [removelist.update(set([metal])) for metal in molcif.findMetal(transition_metals_only=False)] # Remove all metals as part of the SBU
533
+ for metal in removelist:
534
+ bonded_atoms = set(molcif.getBondedAtomsSmart(metal))
535
+ bonded_atoms_types = set([str(allatomtypes[at]) for at in set(molcif.getBondedAtomsSmart(metal))])
536
+ cn = len(bonded_atoms)
537
+ cn_atom = ",".join([at for at in bonded_atoms_types])
538
+ tmpstr = "atom %i with type of %s found to have %i coordinates with atom types of %s\n"%(metal, allatomtypes[metal], cn, cn_atom)
539
+ write2file(logpath, "/%s.log"%name, tmpstr)
540
+ [removelist.update(set([atom])) for atom in SBUlist if all((molcif.getAtom(val).ismetal() or molcif.getAtom(val).symbol().upper() == 'H') for val in molcif.getBondedAtomsSmart(atom))]
541
+ '''
542
+ adding hydrogens connected to atoms which are only connected to metals. In particular interstitial OH, like in UiO SBU.
543
+ '''
544
+ for atom in SBUlist:
545
+ for val in molcif.getBondedAtomsSmart(atom):
546
+ if molcif.getAtom(val).symbol().upper() == 'H':
547
+ removelist.update(set([val]))
548
+ return removelist, SBUlist
549
+
550
+ def identify_lc_atoms(molcif, removelist, metal_list):
551
+ """
552
+ Returns linker information including the indices of atoms that anchor onto SBUs.
553
+
554
+ Parameters
555
+ ----------
556
+ molcif : molSimplify.Classes.mol3D.mol3D
557
+ The cell of the cif file being analyzed.
558
+ removelist : set of int
559
+ The indices of atoms to remove, i.e. the SBU atoms.
560
+ metal_list : set of int
561
+ The indices of metal atoms in the mol3D.
562
+
563
+ Returns
564
+ -------
565
+ anc_atoms : set of numpy.int64
566
+ The indices of the anchoring atoms of the linkers.
567
+ linkers : set of int
568
+ The indices of linkers.
569
+ linker_list : list of list of int
570
+ Each inner list is its own separate linker. The ints are the atom indices of that linker. Length is # of linkers.
571
+ linker_subgraphlist : list of numpy.matrix
572
+ The atom connections in the linker subgraph. Length is # of linkers.
573
+ allatoms : set of int
574
+ The indices of all of the atoms in the MOF.
575
+ connections_list : list of list of int
576
+ Each inner list is its own separate linker. The ints are the atom indices of that linker. Length is # of linkers.
577
+ connections_subgraphlist : list of numpy.matrix
578
+ The atom connections in the linker subgraph. Length is # of linkers.
579
+
580
+ """
581
+ allatoms = set(range(0, molcif.graph.shape[0]))
582
+ linkers = allatoms - removelist # Anything that is in the remove list (SBU) is removed, leaving linkers
583
+ # Use the atoms for linkers and the remove list, along with the original full unit cell graph to make the linker subgraphs
584
+ linker_list, linker_subgraphlist = get_closed_subgraph(linkers.copy(), removelist.copy(), molcif.graph)
585
+ # Next, we have to determine which atoms on the linkers are the connecting points to the SBU.
586
+ linker_length_list = [len(linker_val) for linker_val in linker_list]
587
+ adjmat = molcif.graph.copy()
588
+ connections_list = copy.deepcopy(linker_list)
589
+ connections_subgraphlist = copy.deepcopy(linker_subgraphlist)
590
+ '''
591
+ find all anchoring atoms on linkers and ligands (lc identification)
592
+ '''
593
+ anc_atoms = set()
594
+ for linker in linker_list:
595
+ for atom_linker in linker:
596
+ # We check from the graph if the anchor atom is bonded to a metal. If it is then it is an anchoring atom
597
+ bonded2atom = np.nonzero(molcif.graph[atom_linker,:])[1]
598
+ if set(bonded2atom) & metal_list:
599
+ anc_atoms.add(atom_linker)
600
+ # return the anchoring atoms, the atoms we leave as linkers
601
+ return anc_atoms, linkers, linker_list, linker_subgraphlist, allatoms, connections_list, connections_subgraphlist
602
+
603
+ def identify_short_linkers(molcif, initial_SBU_list, initial_SBU_subgraphlist, removelist, linkers, linker_list, linker_subgraphlist, adj_matrix, SBUlist, logpath, linkerpath, name, cell_v):
604
+ """
605
+ Helps determine whether a MOF has long or short linkers.
606
+
607
+ Parameters
608
+ ----------
609
+ molcif : molSimplify.Classes.mol3D.mol3D
610
+ The cell of the cif file being analyzed.
611
+ initial_SBU_list : list of list of numpy.int32
612
+ Each inner list is its own separate SBU. The ints are the atom indices of that SBU. Length is # of SBUs.
613
+ initial_SBU_subgraphlist : list of scipy.sparse.csr.csr_matrix
614
+ The atom connections in the SBU subgraph. Length is # of SBUs.
615
+ removelist : set of int
616
+ The indices of atoms to remove.
617
+ linkers : set of int
618
+ The indices of linkers.
619
+ linker_list : list of list of int
620
+ Each inner list is its own separate linker. The ints are the atom indices of that linker. Length is # of linkers.
621
+ linker_subgraphlist : list of numpy.matrix
622
+ The atom connections in the linker subgraph. Length is # of linkers.
623
+ adj_matrix : scipy.sparse.csr.csr_matrix
624
+ Adjacency matrix. 1 represents a bond, 0 represents no bond. Shape is (number of atoms, number of atoms).
625
+ SBUlist : set of numpy.int64
626
+ The indices of atoms in SBUs. removelist + 1st coordination shell of the metals
627
+ logpath : str
628
+ The path to which log files are written.
629
+ linkerpath : str
630
+ Path of the folder to make TXT files in.
631
+ name : str
632
+ The name of the cif being analyzed.
633
+ cell_v : numpy.ndarray
634
+ The three Cartesian vectors representing the edges of the crystal cell. Shape is (3,3).
635
+
636
+ Returns
637
+ -------
638
+ min_max_linker_length : int
639
+ The longest path length between two anchors in a linker.
640
+ long_ligands : bool
641
+ Indicates whether the linkers are short.
642
+ SBUlist : set of numpy.int64
643
+ The indices of atoms in SBUs. removelist + 1st coordination shell of the metals
644
+ removelist : set of int
645
+ The indices of atoms to remove.
646
+ linker_list : list of list of int
647
+ Each inner list is its own separate linker. The ints are the atom indices of that linker. Length is # of linkers.
648
+ linker_subgraphlist : list of numpy.matrix
649
+ The atom connections in the linker subgraph. Length is # of linkers.
650
+
651
+ """
652
+ templist = linker_list[:]
653
+ tempgraphlist = linker_subgraphlist[:]
654
+ long_ligands = False
655
+ max_min_linker_length, min_max_linker_length = (0,100) # The maximum value of the minimum linker length, and the minimum value of the maximum linker length. Updated later.
656
+ for ii, atoms_list in reversed(list(enumerate(linker_list))): #Loop over all linker subgraphs
657
+ linkeranchors_list = set()
658
+ linkeranchors_atoms = set()
659
+ sbuanchors_list = set()
660
+ sbu_connect_list = set()
661
+ """""""""
662
+ Here, we are trying to identify what is actually a linker and what is a ligand.
663
+ To do this, we check if something is connected to more than one SBU. Set to
664
+ handle cases where primitive cell is small, ambiguous cases are recorded.
665
+ """""""""
666
+ for iii, atoms in enumerate(atoms_list): #loop over all atoms in a linker
667
+ connected_atoms = np.nonzero(adj_matrix[atoms,:])[1]
668
+ for kk, sbu_atoms_list in enumerate(initial_SBU_list): #loop over all SBU subgraphs
669
+ for sbu_atoms in sbu_atoms_list: #Loop over SBU
670
+ if sbu_atoms in connected_atoms:
671
+ linkeranchors_list.add(iii)
672
+ linkeranchors_atoms.add(atoms)
673
+ sbuanchors_list.add(sbu_atoms)
674
+ sbu_connect_list.add(kk) #Add if unique SBUs
675
+ min_length, max_length = linker_length(linker_subgraphlist[ii], linkeranchors_list)
676
+
677
+ if len(linkeranchors_list) >=2 : # linker, and in one ambiguous case, could be a ligand.
678
+ if len(sbu_connect_list) >= 2: #Something that connects two SBUs is certain to be a linker
679
+ max_min_linker_length = max(min_length, max_min_linker_length)
680
+ min_max_linker_length = min(max_length, min_max_linker_length)
681
+ continue
682
+ else:
683
+ # check number of times we cross PBC :
684
+ # TODO: we still can fail in multidentate ligands!
685
+ linker_cart_coords = np.array([at.coords() for at in [molcif.getAtom(val) for val in atoms_list]])
686
+ linker_adjmat = np.array(linker_subgraphlist[ii])
687
+ pr_image_organic = ligand_detect(cell_v, linker_cart_coords, linker_adjmat, linkeranchors_list)
688
+ sbu_temp = linkeranchors_atoms.copy()
689
+ sbu_temp.update({val for val in initial_SBU_list[list(sbu_connect_list)[0]]})
690
+ sbu_temp = list(sbu_temp)
691
+ sbu_cart_coords = np.array([at.coords() for at in [molcif.getAtom(val) for val in sbu_temp]])
692
+ sbu_adjmat = slice_mat(adj_matrix.todense(), sbu_temp)
693
+ pr_image_sbu = ligand_detect(cell_v, sbu_cart_coords, sbu_adjmat,set(range(len(linkeranchors_list))))
694
+ if not (len(np.unique(pr_image_sbu, axis=0))==1 and len(np.unique(pr_image_organic, axis=0))==1): # linker
695
+ max_min_linker_length = max(min_length, max_min_linker_length)
696
+ min_max_linker_length = min(max_length, min_max_linker_length)
697
+ tmpstr = str(name)+','+' Anchors list: '+str(sbuanchors_list) \
698
+ +','+' SBU connectlist: '+str(sbu_connect_list)+' set to be linker\n'
699
+ write2file(linkerpath, "/ambiguous.txt", tmpstr)
700
+ continue
701
+ else: # all anchoring atoms are in the same unitcell -> ligand
702
+ removelist.update(set(templist[ii])) # we also want to remove these ligands
703
+ SBUlist.update(set(templist[ii])) # we also want to remove these ligands
704
+ linker_list.pop(ii)
705
+ linker_subgraphlist.pop(ii)
706
+ tmpstr = str(name)+','+' Anchors list: '+str(sbuanchors_list) \
707
+ +','+' SBU connectlist: '+str(sbu_connect_list)+' set to be ligand\n'
708
+ write2file(linkerpath, "/ambiguous.txt", tmpstr)
709
+ tmpstr = str(name)+str(ii)+','+' Anchors list: '+ \
710
+ str(sbuanchors_list)+','+' SBU connectlist: '+str(sbu_connect_list)+'\n'
711
+ write2file(linkerpath, "/ligand.txt", tmpstr)
712
+ else: #definite ligand
713
+ write2file(logpath, "/%s.log"%name, "found ligand\n")
714
+ removelist.update(set(templist[ii])) # we also want to remove these ligands
715
+ SBUlist.update(set(templist[ii])) # we also want to remove these ligands
716
+ linker_list.pop(ii)
717
+ linker_subgraphlist.pop(ii)
718
+ tmpstr = str(name)+','+' Anchors list: '+str(sbuanchors_list) \
719
+ +','+' SBU connectlist: '+str(sbu_connect_list)+'\n'
720
+ write2file(linkerpath, "/ligand.txt", tmpstr)
721
+
722
+ tmpstr = str(name) + ", (min_max_linker_length,max_min_linker_length): " + \
723
+ str(min_max_linker_length) + " , " +str(max_min_linker_length) + "\n"
724
+ write2file(logpath, "/%s.log"%name, tmpstr)
725
+ if min_max_linker_length < 3:
726
+ write2file(linkerpath, "/short_ligands.txt", tmpstr)
727
+ if min_max_linker_length > 2:
728
+ # for N-C-C-N ligand ligand
729
+ if max_min_linker_length == min_max_linker_length:
730
+ long_ligands = True
731
+ elif min_max_linker_length > 3:
732
+ long_ligands = True
733
+ return min_max_linker_length, long_ligands, SBUlist, removelist, linker_list, linker_subgraphlist
734
+
735
+ def make_MOF_fragments(data, path=False, xyzpath=False):
736
+ """
737
+ Breaks a MOF into fragments for use with pormake (in silico MOF construction).
738
+ cif for MOF should have P1 symmetry.
739
+
740
+ Output codes are as follows:
741
+ 2: There exist short (i.e. 2 atom) and longer linkers. We could not split the MOF apart consistently.
742
+ 3: The MOF consists only of very short 2 atom linkers.
743
+ 4: The MOF contains a 1D rod, which cannot be easily reassembled into a new MOF.
744
+ None: The MOF was split correctly
745
+
746
+ Parameters
747
+ ----------
748
+ data : str
749
+ The path to the cif file for which SBUs and linkers will be identified.
750
+ path : str
751
+ The parent path to which output will be written. Will contain a folder for SBUs and another for linkers.
752
+ xyzpath : str
753
+ The path to which an xyz file and a net (connectivity) file of the MOF will be written.
754
+
755
+ Returns
756
+ -------
757
+ return_code : int or None
758
+ See function description for possible return codes and their meanings.
759
+
760
+ """
761
+ if not path:
762
+ print('Need a directory to place all of the linker and SBU objects. Exiting now.')
763
+ raise ValueError('Base path must be specified in order to write descriptors.')
764
+ else:
765
+ if path.endswith('/'):
766
+ path = path[:-1]
767
+ if not os.path.isdir(path+'/linkers'):
768
+ os.mkdir(path+'/linkers')
769
+ if not os.path.isdir(path+'/sbus'):
770
+ os.mkdir(path+'/sbus')
771
+ if not os.path.isdir(path+'/xyz'):
772
+ os.mkdir(path+'/xyz')
773
+ if not os.path.isdir(path+'/logs'):
774
+ os.mkdir(path+'/logs')
775
+ linkerpath = path+'/linkers'
776
+ sbupath = path+'/sbus'
777
+ logpath = path+"/logs"
778
+
779
+ '''
780
+ Input cif file and get the cell parameters and adjacency matrix. If overlap, do not featurize.
781
+ Simultaneously prepare mol3D class for MOF for future RAC featurization (molcif)
782
+ '''
783
+
784
+ cpar, allatomtypes, fcoords = readcif(data)
785
+ cell_v = mkcell(cpar)
786
+ cart_coords = fractional2cart(fcoords, cell_v)
787
+ name = os.path.basename(data).strip(".cif")
788
+ if len(cart_coords) > 2000:
789
+ print("cif file is too large, skipping it for now...")
790
+ tmpstr = "Failed to featurize %s: large primitive cell\n"%(name)
791
+ write2file(path,"/FailedStructures.log", tmpstr)
792
+ return None, None
793
+ distance_mat = compute_distance_matrix3(cell_v, cart_coords)
794
+ try:
795
+ adj_matrix, _ = compute_adj_matrix(distance_mat, allatomtypes)
796
+ except NotImplementedError:
797
+ tmpstr = "Failed to featurize %s: atomic overlap\n"%(name)
798
+ write2file(path,"/FailedStructures.log", tmpstr)
799
+ return None, None
800
+
801
+ writeXYZandGraph(xyzpath, allatomtypes, cell_v, fcoords, adj_matrix.todense())
802
+ molcif,_,_,_,_ = import_from_cif(data, True)
803
+ molcif.graph = adj_matrix.todense()
804
+
805
+ '''
806
+ check number of connected components.
807
+ if more than 1: it checks if the structure is interpenetrated. Fails if no metal in one of the connected components (identified by the graph).
808
+ This includes floating solvent molecules.
809
+ '''
810
+
811
+ n_components, labels_components = sparse.csgraph.connected_components(csgraph=adj_matrix, directed=False, return_labels=True)
812
+ metal_list = set([at for at in molcif.findMetal(transition_metals_only=False)])
813
+ if not len(metal_list) > 0:
814
+ tmpstr = "Failed to featurize %s: no metal found\n"%(name)
815
+ write2file(path,"/FailedStructures.log", tmpstr)
816
+ return None, None
817
+
818
+ for comp in range(n_components):
819
+ inds_in_comp = [i for i in range(len(labels_components)) if labels_components[i]==comp]
820
+ if not set(inds_in_comp)&metal_list:
821
+ tmpstr = "Failed to featurize %s: solvent molecules\n"%(name)
822
+ write2file(path,"/FailedStructures.log", tmpstr)
823
+ return None, None
824
+
825
+ if n_components > 1 :
826
+ print("structure is interpenetrated")
827
+ tmpstr = "%s found to be an interpenetrated structure\n"%(name)
828
+ write2file(logpath, "/%s.log"%name, tmpstr)
829
+
830
+ '''
831
+ step 1: metallic part
832
+ removelist = metals (1) + atoms only connected to metals (2) + H connected to (1+2)
833
+ SBUlist = removelist + 1st coordination shell of the metals
834
+ removelist = set()
835
+ Logs the atom types of the connecting atoms to the metal in logpath.
836
+ '''
837
+ removelist, SBUlist = prepare_initial_SBU(molcif, allatomtypes, metal_list, logpath, name)
838
+
839
+ '''
840
+ At this point:
841
+ The remove list only removes metals and things ONLY connected to metals or hydrogens.
842
+ Thus the coordinating atoms are double counted in the linker.
843
+
844
+ step 2: organic part
845
+ removelist = linkers are all atoms - the removelist (assuming no bond between
846
+ organic linkers)
847
+ '''
848
+ anc_atoms, linkers, linker_list, linker_subgraphlist, allatoms, connections_list, connections_subgraphlist = identify_lc_atoms(molcif, removelist, metal_list)
849
+
850
+ '''
851
+ step 3: linker or ligand ?
852
+ checking to find the anchors and #SBUs that are connected to an organic part
853
+ anchor <= 1 -> ligand
854
+ anchor > 1 and #SBU > 1 -> linker
855
+ else: walk over the linker graph and count #crossing PBC
856
+ if #crossing is odd -> linker
857
+ else -> ligand
858
+ '''
859
+ initial_SBU_list, initial_SBU_subgraphlist = get_closed_subgraph(removelist.copy(), linkers.copy(), adj_matrix)
860
+ min_max_linker_length, long_ligands, SBUlist, removelist, linker_list, linker_subgraphlist = identify_short_linkers(molcif, initial_SBU_list, initial_SBU_subgraphlist, removelist, linkers, linker_list, linker_subgraphlist, adj_matrix, SBUlist, logpath, linkerpath, name, cell_v)
861
+
862
+ '''
863
+ In the case of long linkers, add second coordination shell without further checks. In the case of short linkers, start from metal
864
+ and grow outwards using the include_extra_shells function
865
+ '''
866
+ linker_length_list = [len(linker_val) for linker_val in linker_list]
867
+ if len(set(linker_length_list)) != 1:
868
+ write2file(linkerpath, "/uneven.txt", str(name)+'\n')
869
+ if min_max_linker_length > 2: # treating the 2 atom ligands differently! Need caution
870
+ if long_ligands:
871
+ tmpstr = "\nStructure has LONG LINKER\n\n"
872
+ write2file(logpath, "/%s.log"%name, tmpstr)
873
+ [[SBUlist.add(val) for val in molcif.getBondedAtomsSmart(zero_first_shell)] for zero_first_shell in SBUlist.copy()] #First account for all of the carboxylic acid type linkers, add in the carbons.
874
+ truncated_linkers = allatoms - SBUlist
875
+ SBU_list, SBU_subgraphlist = get_closed_subgraph(SBUlist, truncated_linkers, adj_matrix)
876
+ if not long_ligands:
877
+ tmpstr = "\nStructure has SHORT LINKER\n\n"
878
+ write2file(logpath, "/%s.log"%name, tmpstr)
879
+ SBU_list , SBU_subgraphlist = include_extra_shells(SBU_list, SBU_subgraphlist, molcif, adj_matrix)
880
+ print('=== SKIPPING DUE TO LINKER BEING TOO SHORT!')
881
+ return 2
882
+ else:
883
+ tmpstr = "Structure %s has extremely short linkers, check the outputs\n"%name
884
+ write2file(linkerpath, "/short.txt", tmpstr)
885
+ tmpstr = "Structure has extremely short linkers\n"
886
+ write2file(logpath, "/%s.log"%name, tmpstr)
887
+ truncated_linkers = allatoms - removelist
888
+ SBU_list, SBU_subgraphlist = get_closed_subgraph(removelist, truncated_linkers, adj_matrix)
889
+ SBU_list, SBU_subgraphlist = include_extra_shells(SBU_list, SBU_subgraphlist, molcif, adj_matrix)
890
+ SBU_list, SBU_subgraphlist = include_extra_shells(SBU_list, SBU_subgraphlist, molcif, adj_matrix)
891
+ print('=== SKIPPING DUE TO LINKER BEING TOO SHORT!')
892
+ return 3
893
+
894
+ return_code = breakdown_MOF(SBU_list, SBU_subgraphlist, molcif, name, cell_v, anc_atoms, sbupath, connections_list, connections_subgraphlist, linkerpath)
895
+ return return_code