molSimplify 1.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (651) hide show
  1. docs/source/conf.py +224 -0
  2. molSimplify/Classes/__init__.py +6 -0
  3. molSimplify/Classes/atom3D.py +235 -0
  4. molSimplify/Classes/dft_obs.py +130 -0
  5. molSimplify/Classes/globalvars.py +827 -0
  6. molSimplify/Classes/helpers.py +161 -0
  7. molSimplify/Classes/ligand.py +2330 -0
  8. molSimplify/Classes/mGUI.py +2493 -0
  9. molSimplify/Classes/mWidgets.py +438 -0
  10. molSimplify/Classes/miniGUI.py +41 -0
  11. molSimplify/Classes/mol2D.py +260 -0
  12. molSimplify/Classes/mol3D.py +5846 -0
  13. molSimplify/Classes/monomer3D.py +253 -0
  14. molSimplify/Classes/partialcharges.py +226 -0
  15. molSimplify/Classes/protein3D.py +1178 -0
  16. molSimplify/Classes/rundiag.py +151 -0
  17. molSimplify/Data/ML.dat +212 -0
  18. molSimplify/Data/MLS_FSR_for_inter.dat +23 -0
  19. molSimplify/Data/MLS_FSR_for_inter2.dat +23 -0
  20. molSimplify/Data/MLS_angle_for_click.dat +8 -0
  21. molSimplify/Data/MLS_angle_for_inter.dat +23 -0
  22. molSimplify/Data/MLS_angle_for_inter2.dat +48 -0
  23. molSimplify/Data/MLS_angle_for_intra.dat +10 -0
  24. molSimplify/Data/MLS_angle_for_intra2.dat +6 -0
  25. molSimplify/Data/MLS_angle_for_oa.dat +18 -0
  26. molSimplify/Data/ML_FSR_for_inter.dat +112 -0
  27. molSimplify/Data/ML_FSR_for_inter2.dat +110 -0
  28. molSimplify/Data/ML_bond_for_cat.dat +8 -0
  29. molSimplify/Data/ML_bond_for_click.dat +8 -0
  30. molSimplify/Data/ML_bond_for_inter.dat +48 -0
  31. molSimplify/Data/ML_bond_for_inter2.dat +48 -0
  32. molSimplify/Data/ML_bond_for_intra.dat +10 -0
  33. molSimplify/Data/ML_bond_for_intra2.dat +6 -0
  34. molSimplify/Data/ML_bond_for_oa.dat +18 -0
  35. molSimplify/Data/bp1.dat +21 -0
  36. molSimplify/Data/li.dat +3 -0
  37. molSimplify/Data/no.dat +2 -0
  38. molSimplify/Data/oct.dat +7 -0
  39. molSimplify/Data/pbp.dat +8 -0
  40. molSimplify/Data/spy.dat +6 -0
  41. molSimplify/Data/sqap.dat +9 -0
  42. molSimplify/Data/sqp.dat +5 -0
  43. molSimplify/Data/tbp.dat +6 -0
  44. molSimplify/Data/tdhd.dat +9 -0
  45. molSimplify/Data/thd.dat +5 -0
  46. molSimplify/Data/tpl.dat +4 -0
  47. molSimplify/Data/tpr.dat +7 -0
  48. molSimplify/Informatics/HFXsensitivity/__init__.py +0 -0
  49. molSimplify/Informatics/HFXsensitivity/measure_HFX_sensitivity_oxo_hat_reb_rel.py +443 -0
  50. molSimplify/Informatics/HFXsensitivity/measure_HFX_stable.py +346 -0
  51. molSimplify/Informatics/MOF/Linker_rotation.py +179 -0
  52. molSimplify/Informatics/MOF/MOF_descriptors.py +1299 -0
  53. molSimplify/Informatics/MOF/MOF_descriptors_alternate_functional.py +589 -0
  54. molSimplify/Informatics/MOF/MOF_functionalizer.py +1648 -0
  55. molSimplify/Informatics/MOF/PBC_functions.py +1347 -0
  56. molSimplify/Informatics/MOF/__init__.py +0 -0
  57. molSimplify/Informatics/MOF/atomic.py +267 -0
  58. molSimplify/Informatics/MOF/cluster_extraction.py +388 -0
  59. molSimplify/Informatics/MOF/fragment_MOFs_for_pormake.py +895 -0
  60. molSimplify/Informatics/MOF/monofunctionalized_BDC/index_information.py +10 -0
  61. molSimplify/Informatics/Mol2Parser.py +46 -0
  62. molSimplify/Informatics/RACassemble.py +408 -0
  63. molSimplify/Informatics/__init__.py +0 -0
  64. molSimplify/Informatics/active_learning/__init__.py +0 -0
  65. molSimplify/Informatics/active_learning/expected_improvement.py +269 -0
  66. molSimplify/Informatics/autocorrelation.py +1930 -0
  67. molSimplify/Informatics/clean_autocorrelation.py +778 -0
  68. molSimplify/Informatics/coulomb_analyze.py +67 -0
  69. molSimplify/Informatics/decoration_manager.py +193 -0
  70. molSimplify/Informatics/geo_analyze.py +88 -0
  71. molSimplify/Informatics/geometrics.py +56 -0
  72. molSimplify/Informatics/graph_analyze.py +163 -0
  73. molSimplify/Informatics/graph_racs.py +288 -0
  74. molSimplify/Informatics/jupyter_vis.py +172 -0
  75. molSimplify/Informatics/lacRACAssemble.py +2192 -0
  76. molSimplify/Informatics/lacRACAssemble_bisdithiolenes.py +236 -0
  77. molSimplify/Informatics/misc_descriptors.py +198 -0
  78. molSimplify/Informatics/organic_fingerprints.py +61 -0
  79. molSimplify/Informatics/partialcharges.py +345 -0
  80. molSimplify/Informatics/protein/activesite.py +53 -0
  81. molSimplify/Informatics/protein/pymol_add_hs.py +33 -0
  82. molSimplify/Informatics/rac155_geo.py +48 -0
  83. molSimplify/Ligands/(1_methylbenzimidazol_2_yl)pyridine.xyz +45 -0
  84. molSimplify/Ligands/1-4-dimethyl-1-2-3-triazole.xyz +15 -0
  85. molSimplify/Ligands/12crown4.mol +62 -0
  86. molSimplify/Ligands/Antipyrine.mol +58 -0
  87. molSimplify/Ligands/BPAbipy.mol +106 -0
  88. molSimplify/Ligands/Hpyrrole.mol +26 -0
  89. molSimplify/Ligands/N-quinolinylbutyramidate.xyz +31 -0
  90. molSimplify/Ligands/N-quinolinylmethylmethinylacetamidate.xyz +30 -0
  91. molSimplify/Ligands/NMe2_-1.xyz +11 -0
  92. molSimplify/Ligands/PCy3.mol +111 -0
  93. molSimplify/Ligands/PMe3.xyz +15 -0
  94. molSimplify/Ligands/PPh3.mol +76 -0
  95. molSimplify/Ligands/Propyphenazone.mol +77 -0
  96. molSimplify/Ligands/acac.mol +33 -0
  97. molSimplify/Ligands/acacen.mol +76 -0
  98. molSimplify/Ligands/acetate.smi +1 -0
  99. molSimplify/Ligands/acetate.xyz +9 -0
  100. molSimplify/Ligands/aceticacidbipyridine.mol +70 -0
  101. molSimplify/Ligands/acetonitrile.mol +17 -0
  102. molSimplify/Ligands/alanine.mol +30 -0
  103. molSimplify/Ligands/alphabetizer.py +21 -0
  104. molSimplify/Ligands/amine.mol +11 -0
  105. molSimplify/Ligands/ammonia.mol +12 -0
  106. molSimplify/Ligands/arginine.mol +58 -0
  107. molSimplify/Ligands/asparagine.mol +38 -0
  108. molSimplify/Ligands/aspartic_acid.mol +35 -0
  109. molSimplify/Ligands/azide.mol +11 -0
  110. molSimplify/Ligands/benzene.mol +28 -0
  111. molSimplify/Ligands/benzene_pi.mol +30 -0
  112. molSimplify/Ligands/benzenedithiol.mol +30 -0
  113. molSimplify/Ligands/benzenethiol.mol +30 -0
  114. molSimplify/Ligands/benzylisocy.mol +38 -0
  115. molSimplify/Ligands/bidiazine.mol +42 -0
  116. molSimplify/Ligands/bidiazole.mol +38 -0
  117. molSimplify/Ligands/bifuran.mol +38 -0
  118. molSimplify/Ligands/bihydrodiazine.mol +58 -0
  119. molSimplify/Ligands/bihydrodiazole.mol +46 -0
  120. molSimplify/Ligands/bihydrooxazine.mol +54 -0
  121. molSimplify/Ligands/bihydrooxazole.mol +42 -0
  122. molSimplify/Ligands/bihydrothiazine.mol +54 -0
  123. molSimplify/Ligands/bihydrothiazole.mol +42 -0
  124. molSimplify/Ligands/biimidazole.mol +38 -0
  125. molSimplify/Ligands/bioxazole.mol +34 -0
  126. molSimplify/Ligands/bipy.mol +46 -0
  127. molSimplify/Ligands/bipyrazine.xyz +20 -0
  128. molSimplify/Ligands/bipyrimidine.mol +42 -0
  129. molSimplify/Ligands/bipyrrole.mol +42 -0
  130. molSimplify/Ligands/bisnapthyridylpyridine.mol +111 -0
  131. molSimplify/Ligands/bithiazole.mol +34 -0
  132. molSimplify/Ligands/bromide.mol +7 -0
  133. molSimplify/Ligands/bromide.smi +1 -0
  134. molSimplify/Ligands/c2.mol +9 -0
  135. molSimplify/Ligands/caprolactone.mol +41 -0
  136. molSimplify/Ligands/carbonyl.mol +8 -0
  137. molSimplify/Ligands/carboxyl.mol +13 -0
  138. molSimplify/Ligands/cat.mol +30 -0
  139. molSimplify/Ligands/chloride.mol +7 -0
  140. molSimplify/Ligands/chloride.smi +1 -0
  141. molSimplify/Ligands/chloropyridine.mol +27 -0
  142. molSimplify/Ligands/co2.mol +10 -0
  143. molSimplify/Ligands/corrolazine.mol +72 -0
  144. molSimplify/Ligands/cs.mol +8 -0
  145. molSimplify/Ligands/cyanate.xyz +5 -0
  146. molSimplify/Ligands/cyanide.mol +9 -0
  147. molSimplify/Ligands/cyanoaceticporphyrin.mol +114 -0
  148. molSimplify/Ligands/cyanopyridine.mol +29 -0
  149. molSimplify/Ligands/cyclam.mol +81 -0
  150. molSimplify/Ligands/cyclen.mol +69 -0
  151. molSimplify/Ligands/cyclopentadienyl.mol +26 -0
  152. molSimplify/Ligands/cysteine.mol +32 -0
  153. molSimplify/Ligands/diaminomethyl.mol +19 -0
  154. molSimplify/Ligands/diazine.mol +25 -0
  155. molSimplify/Ligands/diazole.mol +23 -0
  156. molSimplify/Ligands/dicyanamide.mol +15 -0
  157. molSimplify/Ligands/dihydrofuran.mol +27 -0
  158. molSimplify/Ligands/dmap.xyz +35 -0
  159. molSimplify/Ligands/dmf.mol +28 -0
  160. molSimplify/Ligands/dmi.mol +41 -0
  161. molSimplify/Ligands/dmpe.mol +52 -0
  162. molSimplify/Ligands/dpmu.mol +47 -0
  163. molSimplify/Ligands/dppe.mol +112 -0
  164. molSimplify/Ligands/edta.mol +69 -0
  165. molSimplify/Ligands/en.mol +28 -0
  166. molSimplify/Ligands/ethanethiol.mol +21 -0
  167. molSimplify/Ligands/ethanolamine.mol +26 -0
  168. molSimplify/Ligands/ethbipy.mol +70 -0
  169. molSimplify/Ligands/ethyl.mol +19 -0
  170. molSimplify/Ligands/ethylamine.mol +24 -0
  171. molSimplify/Ligands/ethylene.mol +16 -0
  172. molSimplify/Ligands/ethylesteracac.mol +57 -0
  173. molSimplify/Ligands/fluoride.mol +7 -0
  174. molSimplify/Ligands/fluoride.smi +1 -0
  175. molSimplify/Ligands/formaldehyde.mol +12 -0
  176. molSimplify/Ligands/formamidate.xyz +8 -0
  177. molSimplify/Ligands/formate.xyz +6 -0
  178. molSimplify/Ligands/furan.mol +23 -0
  179. molSimplify/Ligands/glutamic_acid.mol +42 -0
  180. molSimplify/Ligands/glutamine.mol +44 -0
  181. molSimplify/Ligands/glycinate.mol +23 -0
  182. molSimplify/Ligands/glycine.mol +24 -0
  183. molSimplify/Ligands/h2s.mol +10 -0
  184. molSimplify/Ligands/helium.mol +6 -0
  185. molSimplify/Ligands/histidine.mol +45 -0
  186. molSimplify/Ligands/hmpa.mol +62 -0
  187. molSimplify/Ligands/hs-.mol +9 -0
  188. molSimplify/Ligands/hydride.mol +7 -0
  189. molSimplify/Ligands/hydrocarboxyacetylide.xyz +8 -0
  190. molSimplify/Ligands/hydrocyanide.mol +10 -0
  191. molSimplify/Ligands/hydrodiazine.mol +33 -0
  192. molSimplify/Ligands/hydrodiazole.mol +27 -0
  193. molSimplify/Ligands/hydrogensulfide.mol +10 -0
  194. molSimplify/Ligands/hydroisocyanide.mol +11 -0
  195. molSimplify/Ligands/hydrooxazine.mol +31 -0
  196. molSimplify/Ligands/hydrooxazole.mol +25 -0
  197. molSimplify/Ligands/hydrothiazine.mol +31 -0
  198. molSimplify/Ligands/hydrothiazole.mol +25 -0
  199. molSimplify/Ligands/hydroxyl.mol +9 -0
  200. molSimplify/Ligands/imidazole.mol +23 -0
  201. molSimplify/Ligands/imidazolidinone.mol +29 -0
  202. molSimplify/Ligands/imine.mol +13 -0
  203. molSimplify/Ligands/iminodiacetic.mol +33 -0
  204. molSimplify/Ligands/iodide.mol +7 -0
  205. molSimplify/Ligands/iodobenzene.xyz +14 -0
  206. molSimplify/Ligands/isoleucine.mol +48 -0
  207. molSimplify/Ligands/isothiocyanate.mol +11 -0
  208. molSimplify/Ligands/leucine.mol +48 -0
  209. molSimplify/Ligands/ligands.dict +257 -0
  210. molSimplify/Ligands/lysine.mol +54 -0
  211. molSimplify/Ligands/mebenzenedithiol.mol +36 -0
  212. molSimplify/Ligands/mebim_py.xyz +29 -0
  213. molSimplify/Ligands/mebim_pz.xyz +28 -0
  214. molSimplify/Ligands/mebipy.mol +58 -0
  215. molSimplify/Ligands/mecat.mol +36 -0
  216. molSimplify/Ligands/methanal.mol +11 -0
  217. molSimplify/Ligands/methanethiol.mol +15 -0
  218. molSimplify/Ligands/methanol.mol +16 -0
  219. molSimplify/Ligands/methionine.mol +44 -0
  220. molSimplify/Ligands/methyl.mol +13 -0
  221. molSimplify/Ligands/methylacetylide.xyz +8 -0
  222. molSimplify/Ligands/methylamine.mol +19 -0
  223. molSimplify/Ligands/methylazide.xyz +9 -0
  224. molSimplify/Ligands/methylisocy.mol +17 -0
  225. molSimplify/Ligands/methylpyridine.mol +33 -0
  226. molSimplify/Ligands/n2.mol +8 -0
  227. molSimplify/Ligands/n4py.xyz +51 -0
  228. molSimplify/Ligands/nch.mol +10 -0
  229. molSimplify/Ligands/nco-.mol +11 -0
  230. molSimplify/Ligands/nethanolamine.mol +26 -0
  231. molSimplify/Ligands/nitrate.mol +14 -0
  232. molSimplify/Ligands/nitrite.mol +11 -0
  233. molSimplify/Ligands/nitro.mol +11 -0
  234. molSimplify/Ligands/nitrobipy.mol +54 -0
  235. molSimplify/Ligands/nitroso.mol +8 -0
  236. molSimplify/Ligands/nme3.mol +30 -0
  237. molSimplify/Ligands/no-.mol +10 -0
  238. molSimplify/Ligands/no2-.mol +11 -0
  239. molSimplify/Ligands/noxygen.mol +8 -0
  240. molSimplify/Ligands/ns-.mol +10 -0
  241. molSimplify/Ligands/o-pyridylbenzene.xyz +23 -0
  242. molSimplify/Ligands/o-pyridylphenylanion.xyz +22 -0
  243. molSimplify/Ligands/o2-.mol +9 -0
  244. molSimplify/Ligands/o2.xyz +4 -0
  245. molSimplify/Ligands/och2.mol +12 -0
  246. molSimplify/Ligands/oethanolamine.mol +26 -0
  247. molSimplify/Ligands/ome2.mol +22 -0
  248. molSimplify/Ligands/ooh.xyz +5 -0
  249. molSimplify/Ligands/oxalate.mol +17 -0
  250. molSimplify/Ligands/oxalate.smi +1 -0
  251. molSimplify/Ligands/oxygen.mol +7 -0
  252. molSimplify/Ligands/pentacyanocyclopentadienide.mol +36 -0
  253. molSimplify/Ligands/ph2-.mol +11 -0
  254. molSimplify/Ligands/ph3.mol +12 -0
  255. molSimplify/Ligands/phen.mol +51 -0
  256. molSimplify/Ligands/phenacac.mol +63 -0
  257. molSimplify/Ligands/phenalalanine.mol +51 -0
  258. molSimplify/Ligands/phendione.mol +51 -0
  259. molSimplify/Ligands/phenphen.mol +75 -0
  260. molSimplify/Ligands/phenylbenzoxazole.mol +54 -0
  261. molSimplify/Ligands/phenylcyc.mol +99 -0
  262. molSimplify/Ligands/phenylenediamine.mol +37 -0
  263. molSimplify/Ligands/phenylisocy.mol +32 -0
  264. molSimplify/Ligands/phosacidbipy.mol +66 -0
  265. molSimplify/Ligands/phosphine.mol +13 -0
  266. molSimplify/Ligands/phosphorine.mol +27 -0
  267. molSimplify/Ligands/phosphorustrifluoride.mol +12 -0
  268. molSimplify/Ligands/phthalocyanine.mol +126 -0
  269. molSimplify/Ligands/pme3o.mol +32 -0
  270. molSimplify/Ligands/porphyrin.mol +82 -0
  271. molSimplify/Ligands/pph3o.mol +77 -0
  272. molSimplify/Ligands/proline.mol +39 -0
  273. molSimplify/Ligands/propdiol.mol +21 -0
  274. molSimplify/Ligands/propylene.mol +23 -0
  275. molSimplify/Ligands/pyridine.mol +27 -0
  276. molSimplify/Ligands/pyrimidone.mol +27 -0
  277. molSimplify/Ligands/pyrrole.mol +24 -0
  278. molSimplify/Ligands/quinoxalinedithiol.mol +39 -0
  279. molSimplify/Ligands/s2-.mol +9 -0
  280. molSimplify/Ligands/salen.mol +75 -0
  281. molSimplify/Ligands/salphen.mol +84 -0
  282. molSimplify/Ligands/serine.mol +32 -0
  283. molSimplify/Ligands/simple_ligands.dict +14 -0
  284. molSimplify/Ligands/sulfacidbipy.mol +63 -0
  285. molSimplify/Ligands/tbucat.mol +54 -0
  286. molSimplify/Ligands/tbuphisocy.mol +56 -0
  287. molSimplify/Ligands/tbutylcyclen.mol +166 -0
  288. molSimplify/Ligands/tbutylisocy.mol +35 -0
  289. molSimplify/Ligands/tbutylthiol.mol +33 -0
  290. molSimplify/Ligands/tcnoet.mol +43 -0
  291. molSimplify/Ligands/tcnoetOH.mol +45 -0
  292. molSimplify/Ligands/terpy.mol +65 -0
  293. molSimplify/Ligands/tetrahydrofuran.mol +31 -0
  294. molSimplify/Ligands/thiane.mol +37 -0
  295. molSimplify/Ligands/thiazole.mol +21 -0
  296. molSimplify/Ligands/thiocyanate.mol +11 -0
  297. molSimplify/Ligands/thiol.mol +9 -0
  298. molSimplify/Ligands/thiophene.mol +23 -0
  299. molSimplify/Ligands/thiopyridine.mol +29 -0
  300. molSimplify/Ligands/threonine.mol +38 -0
  301. molSimplify/Ligands/tpp.mol +165 -0
  302. molSimplify/Ligands/tricyanomethyl.mol +19 -0
  303. molSimplify/Ligands/trifluoromethyl.mol +13 -0
  304. molSimplify/Ligands/tryptophan.mol +60 -0
  305. molSimplify/Ligands/tyrosine.mol +53 -0
  306. molSimplify/Ligands/uthiol.mol +11 -0
  307. molSimplify/Ligands/uthiolme2.mol +23 -0
  308. molSimplify/Ligands/valine.mol +42 -0
  309. molSimplify/Ligands/water.mol +10 -0
  310. molSimplify/Ligands/x.mol +6 -0
  311. molSimplify/Scripts/__init__.py +0 -0
  312. molSimplify/Scripts/addtodb.py +308 -0
  313. molSimplify/Scripts/cellbuilder.py +1592 -0
  314. molSimplify/Scripts/cellbuilder_tools.py +701 -0
  315. molSimplify/Scripts/chains.py +342 -0
  316. molSimplify/Scripts/convert_2to3.py +23 -0
  317. molSimplify/Scripts/dbinteract.py +631 -0
  318. molSimplify/Scripts/distgeom.py +617 -0
  319. molSimplify/Scripts/findcorrelations.py +287 -0
  320. molSimplify/Scripts/generator.py +267 -0
  321. molSimplify/Scripts/geometry.py +1224 -0
  322. molSimplify/Scripts/grabguivars.py +845 -0
  323. molSimplify/Scripts/in_b3lyp_usetc.py +141 -0
  324. molSimplify/Scripts/inparse.py +1673 -0
  325. molSimplify/Scripts/io.py +1149 -0
  326. molSimplify/Scripts/isomers.py +415 -0
  327. molSimplify/Scripts/jobgen.py +247 -0
  328. molSimplify/Scripts/krr_prep.py +1262 -0
  329. molSimplify/Scripts/molSimplify_io.py +18 -0
  330. molSimplify/Scripts/molden2psi4wfn.py +166 -0
  331. molSimplify/Scripts/namegen.py +32 -0
  332. molSimplify/Scripts/nn_prep.py +561 -0
  333. molSimplify/Scripts/oct_check_mols.py +782 -0
  334. molSimplify/Scripts/periodic_QE.py +97 -0
  335. molSimplify/Scripts/postmold.py +304 -0
  336. molSimplify/Scripts/postmwfn.py +709 -0
  337. molSimplify/Scripts/postparse.py +488 -0
  338. molSimplify/Scripts/postproc.py +139 -0
  339. molSimplify/Scripts/qcgen.py +1450 -0
  340. molSimplify/Scripts/rmsd.py +489 -0
  341. molSimplify/Scripts/rungen.py +670 -0
  342. molSimplify/Scripts/structgen.py +3040 -0
  343. molSimplify/Scripts/tf_nn_prep.py +894 -0
  344. molSimplify/Scripts/tsgen.py +295 -0
  345. molSimplify/Scripts/uq_calibration.py +69 -0
  346. molSimplify/__init__.py +0 -0
  347. molSimplify/__main__.py +197 -0
  348. molSimplify/icons/chemdb.png +0 -0
  349. molSimplify/icons/hjklogo.png +0 -0
  350. molSimplify/icons/icon.png +0 -0
  351. molSimplify/icons/logo.png +0 -0
  352. molSimplify/icons/logo_old.png +0 -0
  353. molSimplify/icons/petachem.png +0 -0
  354. molSimplify/icons/petachem2.png +0 -0
  355. molSimplify/icons/petachem_full.png +0 -0
  356. molSimplify/icons/pythonlogo.png +0 -0
  357. molSimplify/icons/sge copy.png +0 -0
  358. molSimplify/icons/sge.png +0 -0
  359. molSimplify/icons/slurm.png +0 -0
  360. molSimplify/icons/wft1.png +0 -0
  361. molSimplify/icons/wft2.png +0 -0
  362. molSimplify/icons/wft3.png +0 -0
  363. molSimplify/ml/__init__.py +0 -0
  364. molSimplify/ml/kernels.py +36 -0
  365. molSimplify/ml/layers.py +29 -0
  366. molSimplify/molscontrol/__init__.py +14 -0
  367. molSimplify/molscontrol/_version.py +521 -0
  368. molSimplify/molscontrol/clf_tools.py +144 -0
  369. molSimplify/molscontrol/data/README.md +21 -0
  370. molSimplify/molscontrol/data/look_and_say.dat +15 -0
  371. molSimplify/molscontrol/dynamic_classifier.py +514 -0
  372. molSimplify/molscontrol/io_tools.py +363 -0
  373. molSimplify/molscontrol/molscontrol.py +49 -0
  374. molSimplify/molscontrol/terachem/jobscript_control.sh +31 -0
  375. molSimplify/molscontrol/terachem/terachem_input +22 -0
  376. molSimplify/python_krr/X_train_TS.csv +535 -0
  377. molSimplify/python_krr/__init__.py +0 -0
  378. molSimplify/python_krr/hat2_X_mean_std.csv +3 -0
  379. molSimplify/python_krr/hat2_feature_names.csv +1 -0
  380. molSimplify/python_krr/hat2_y_mean_std.csv +2 -0
  381. molSimplify/python_krr/hat_X_mean_std.csv +6 -0
  382. molSimplify/python_krr/hat_feature_names.csv +1 -0
  383. molSimplify/python_krr/hat_krr_X_train.csv +5205 -0
  384. molSimplify/python_krr/hat_krr_dual_coef.csv +1 -0
  385. molSimplify/python_krr/hat_y_mean_std.csv +2 -0
  386. molSimplify/python_krr/sklearn_models.py +34 -0
  387. molSimplify/python_krr/y_train_TS.csv +535 -0
  388. molSimplify/python_nn/ANN.py +198 -0
  389. molSimplify/python_nn/__init__.py +0 -0
  390. molSimplify/python_nn/clf_analysis_tool.py +125 -0
  391. molSimplify/python_nn/dictionary_toolbox.py +49 -0
  392. molSimplify/python_nn/ensemble_test.py +309 -0
  393. molSimplify/python_nn/hs_center.csv +26 -0
  394. molSimplify/python_nn/hs_scale.csv +26 -0
  395. molSimplify/python_nn/ls_center.csv +26 -0
  396. molSimplify/python_nn/ls_scale.csv +26 -0
  397. molSimplify/python_nn/ms_hs_b1.csv +50 -0
  398. molSimplify/python_nn/ms_hs_b2.csv +50 -0
  399. molSimplify/python_nn/ms_hs_b3.csv +1 -0
  400. molSimplify/python_nn/ms_hs_w1.csv +50 -0
  401. molSimplify/python_nn/ms_hs_w2.csv +50 -0
  402. molSimplify/python_nn/ms_hs_w3.csv +1 -0
  403. molSimplify/python_nn/ms_ls_b1.csv +50 -0
  404. molSimplify/python_nn/ms_ls_b2.csv +50 -0
  405. molSimplify/python_nn/ms_ls_b3.csv +1 -0
  406. molSimplify/python_nn/ms_ls_w1.csv +50 -0
  407. molSimplify/python_nn/ms_ls_w2.csv +50 -0
  408. molSimplify/python_nn/ms_ls_w3.csv +1 -0
  409. molSimplify/python_nn/ms_slope_b1.csv +50 -0
  410. molSimplify/python_nn/ms_slope_b2.csv +50 -0
  411. molSimplify/python_nn/ms_slope_b3.csv +1 -0
  412. molSimplify/python_nn/ms_slope_w1.csv +50 -0
  413. molSimplify/python_nn/ms_slope_w2.csv +50 -0
  414. molSimplify/python_nn/ms_slope_w3.csv +1 -0
  415. molSimplify/python_nn/ms_split_b1.csv +50 -0
  416. molSimplify/python_nn/ms_split_b2.csv +50 -0
  417. molSimplify/python_nn/ms_split_b3.csv +1 -0
  418. molSimplify/python_nn/ms_split_w1.csv +50 -0
  419. molSimplify/python_nn/ms_split_w2.csv +50 -0
  420. molSimplify/python_nn/ms_split_w3.csv +1 -0
  421. molSimplify/python_nn/slope_center.csv +25 -0
  422. molSimplify/python_nn/slope_scale.csv +25 -0
  423. molSimplify/python_nn/split_center.csv +26 -0
  424. molSimplify/python_nn/split_scale.csv +26 -0
  425. molSimplify/python_nn/tf_ANN.py +762 -0
  426. molSimplify/python_nn/train_data.csv +1211 -0
  427. molSimplify/tf_nn/__init__.py +0 -0
  428. molSimplify/tf_nn/geo_static_clf/geo_static_clf_model.h5 +0 -0
  429. molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_name.csv +1591 -0
  430. molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_x.csv +2790 -0
  431. molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_y.csv +2790 -0
  432. molSimplify/tf_nn/geo_static_clf/geo_static_clf_vars.csv +154 -0
  433. molSimplify/tf_nn/geos/hs_ii_bl_x.csv +1577 -0
  434. molSimplify/tf_nn/geos/hs_ii_bl_y.csv +1577 -0
  435. molSimplify/tf_nn/geos/hs_ii_model.h5 +0 -0
  436. molSimplify/tf_nn/geos/hs_ii_model.json +1 -0
  437. molSimplify/tf_nn/geos/hs_ii_vars.csv +154 -0
  438. molSimplify/tf_nn/geos/hs_iii_bl_x.csv +1659 -0
  439. molSimplify/tf_nn/geos/hs_iii_bl_y.csv +1659 -0
  440. molSimplify/tf_nn/geos/hs_iii_model.h5 +0 -0
  441. molSimplify/tf_nn/geos/hs_iii_model.json +1 -0
  442. molSimplify/tf_nn/geos/hs_iii_vars.csv +154 -0
  443. molSimplify/tf_nn/geos/ls_ii_bl_x.csv +1374 -0
  444. molSimplify/tf_nn/geos/ls_ii_bl_y.csv +1374 -0
  445. molSimplify/tf_nn/geos/ls_ii_model.h5 +0 -0
  446. molSimplify/tf_nn/geos/ls_ii_model.json +1 -0
  447. molSimplify/tf_nn/geos/ls_ii_vars.csv +154 -0
  448. molSimplify/tf_nn/geos/ls_iii_bl_x.csv +1364 -0
  449. molSimplify/tf_nn/geos/ls_iii_bl_y.csv +1364 -0
  450. molSimplify/tf_nn/geos/ls_iii_model.h5 +0 -0
  451. molSimplify/tf_nn/geos/ls_iii_model.json +1 -0
  452. molSimplify/tf_nn/geos/ls_iii_vars.csv +154 -0
  453. molSimplify/tf_nn/homolumo/gap_model.h5 +0 -0
  454. molSimplify/tf_nn/homolumo/gap_model.json +1 -0
  455. molSimplify/tf_nn/homolumo/gap_test_names.csv +175 -0
  456. molSimplify/tf_nn/homolumo/gap_test_x.csv +176 -0
  457. molSimplify/tf_nn/homolumo/gap_test_y.csv +176 -0
  458. molSimplify/tf_nn/homolumo/gap_train_names.csv +699 -0
  459. molSimplify/tf_nn/homolumo/gap_train_x.csv +700 -0
  460. molSimplify/tf_nn/homolumo/gap_train_y.csv +700 -0
  461. molSimplify/tf_nn/homolumo/gap_vars.csv +153 -0
  462. molSimplify/tf_nn/homolumo/homo_model.h5 +0 -0
  463. molSimplify/tf_nn/homolumo/homo_model.json +126 -0
  464. molSimplify/tf_nn/homolumo/homo_test_names.csv +175 -0
  465. molSimplify/tf_nn/homolumo/homo_test_x.csv +176 -0
  466. molSimplify/tf_nn/homolumo/homo_test_y.csv +176 -0
  467. molSimplify/tf_nn/homolumo/homo_train_names.csv +699 -0
  468. molSimplify/tf_nn/homolumo/homo_train_x.csv +700 -0
  469. molSimplify/tf_nn/homolumo/homo_train_y.csv +700 -0
  470. molSimplify/tf_nn/homolumo/homo_vars.csv +153 -0
  471. molSimplify/tf_nn/oxoandhomo/homo_empty_info.json +7 -0
  472. molSimplify/tf_nn/oxoandhomo/homo_empty_model.h5 +0 -0
  473. molSimplify/tf_nn/oxoandhomo/homo_empty_model.json +1 -0
  474. molSimplify/tf_nn/oxoandhomo/homo_empty_test_names.csv +143 -0
  475. molSimplify/tf_nn/oxoandhomo/homo_empty_test_x.csv +144 -0
  476. molSimplify/tf_nn/oxoandhomo/homo_empty_test_y.csv +144 -0
  477. molSimplify/tf_nn/oxoandhomo/homo_empty_train_names.csv +513 -0
  478. molSimplify/tf_nn/oxoandhomo/homo_empty_train_x.csv +514 -0
  479. molSimplify/tf_nn/oxoandhomo/homo_empty_train_y.csv +514 -0
  480. molSimplify/tf_nn/oxoandhomo/homo_empty_val_names.csv +143 -0
  481. molSimplify/tf_nn/oxoandhomo/homo_empty_val_x.csv +58 -0
  482. molSimplify/tf_nn/oxoandhomo/homo_empty_val_y.csv +58 -0
  483. molSimplify/tf_nn/oxoandhomo/homo_empty_vars.csv +155 -0
  484. molSimplify/tf_nn/oxoandhomo/oxo20_info.json +7 -0
  485. molSimplify/tf_nn/oxoandhomo/oxo20_model.h5 +0 -0
  486. molSimplify/tf_nn/oxoandhomo/oxo20_model.json +1 -0
  487. molSimplify/tf_nn/oxoandhomo/oxo20_test_names.csv +143 -0
  488. molSimplify/tf_nn/oxoandhomo/oxo20_test_x.csv +144 -0
  489. molSimplify/tf_nn/oxoandhomo/oxo20_test_y.csv +144 -0
  490. molSimplify/tf_nn/oxoandhomo/oxo20_train_names.csv +513 -0
  491. molSimplify/tf_nn/oxoandhomo/oxo20_train_x.csv +514 -0
  492. molSimplify/tf_nn/oxoandhomo/oxo20_train_y.csv +514 -0
  493. molSimplify/tf_nn/oxoandhomo/oxo20_val_names.csv +143 -0
  494. molSimplify/tf_nn/oxoandhomo/oxo20_val_x.csv +58 -0
  495. molSimplify/tf_nn/oxoandhomo/oxo20_val_y.csv +58 -0
  496. molSimplify/tf_nn/oxoandhomo/oxo20_vars.csv +154 -0
  497. molSimplify/tf_nn/oxocatalysis/hat_model.h5 +0 -0
  498. molSimplify/tf_nn/oxocatalysis/hat_model.json +1 -0
  499. molSimplify/tf_nn/oxocatalysis/hat_test_names.csv +419 -0
  500. molSimplify/tf_nn/oxocatalysis/hat_test_x.csv +420 -0
  501. molSimplify/tf_nn/oxocatalysis/hat_test_y.csv +420 -0
  502. molSimplify/tf_nn/oxocatalysis/hat_train_names.csv +1507 -0
  503. molSimplify/tf_nn/oxocatalysis/hat_train_x.csv +1508 -0
  504. molSimplify/tf_nn/oxocatalysis/hat_train_y.csv +1508 -0
  505. molSimplify/tf_nn/oxocatalysis/hat_val_x.csv +169 -0
  506. molSimplify/tf_nn/oxocatalysis/hat_val_y.csv +169 -0
  507. molSimplify/tf_nn/oxocatalysis/hat_vars.csv +162 -0
  508. molSimplify/tf_nn/oxocatalysis/oxo_model.h5 +0 -0
  509. molSimplify/tf_nn/oxocatalysis/oxo_model.json +1 -0
  510. molSimplify/tf_nn/oxocatalysis/oxo_test_names.csv +527 -0
  511. molSimplify/tf_nn/oxocatalysis/oxo_test_x.csv +528 -0
  512. molSimplify/tf_nn/oxocatalysis/oxo_test_y.csv +528 -0
  513. molSimplify/tf_nn/oxocatalysis/oxo_train_names.csv +1897 -0
  514. molSimplify/tf_nn/oxocatalysis/oxo_train_x.csv +1898 -0
  515. molSimplify/tf_nn/oxocatalysis/oxo_train_y.csv +1898 -0
  516. molSimplify/tf_nn/oxocatalysis/oxo_val_x.csv +212 -0
  517. molSimplify/tf_nn/oxocatalysis/oxo_val_y.csv +212 -0
  518. molSimplify/tf_nn/oxocatalysis/oxo_vars.csv +162 -0
  519. molSimplify/tf_nn/rescaling_data/gap_mean_x.csv +153 -0
  520. molSimplify/tf_nn/rescaling_data/gap_mean_y.csv +1 -0
  521. molSimplify/tf_nn/rescaling_data/gap_var_x.csv +153 -0
  522. molSimplify/tf_nn/rescaling_data/gap_var_y.csv +1 -0
  523. molSimplify/tf_nn/rescaling_data/geo_static_clf_mean_x.csv +154 -0
  524. molSimplify/tf_nn/rescaling_data/geo_static_clf_mean_y.csv +1 -0
  525. molSimplify/tf_nn/rescaling_data/geo_static_clf_var_x.csv +154 -0
  526. molSimplify/tf_nn/rescaling_data/geo_static_clf_var_y.csv +1 -0
  527. molSimplify/tf_nn/rescaling_data/hat_mean_x.csv +162 -0
  528. molSimplify/tf_nn/rescaling_data/hat_mean_y.csv +1 -0
  529. molSimplify/tf_nn/rescaling_data/hat_var_x.csv +162 -0
  530. molSimplify/tf_nn/rescaling_data/hat_var_y.csv +1 -0
  531. molSimplify/tf_nn/rescaling_data/homo_empty_mean_x.csv +155 -0
  532. molSimplify/tf_nn/rescaling_data/homo_empty_mean_y.csv +1 -0
  533. molSimplify/tf_nn/rescaling_data/homo_empty_var_x.csv +155 -0
  534. molSimplify/tf_nn/rescaling_data/homo_empty_var_y.csv +1 -0
  535. molSimplify/tf_nn/rescaling_data/homo_mean_x.csv +153 -0
  536. molSimplify/tf_nn/rescaling_data/homo_mean_y.csv +1 -0
  537. molSimplify/tf_nn/rescaling_data/homo_var_x.csv +153 -0
  538. molSimplify/tf_nn/rescaling_data/homo_var_y.csv +1 -0
  539. molSimplify/tf_nn/rescaling_data/hs_ii_mean_x.csv +154 -0
  540. molSimplify/tf_nn/rescaling_data/hs_ii_mean_y.csv +3 -0
  541. molSimplify/tf_nn/rescaling_data/hs_ii_var_x.csv +154 -0
  542. molSimplify/tf_nn/rescaling_data/hs_ii_var_y.csv +3 -0
  543. molSimplify/tf_nn/rescaling_data/hs_iii_mean_x.csv +154 -0
  544. molSimplify/tf_nn/rescaling_data/hs_iii_mean_y.csv +3 -0
  545. molSimplify/tf_nn/rescaling_data/hs_iii_var_x.csv +154 -0
  546. molSimplify/tf_nn/rescaling_data/hs_iii_var_y.csv +3 -0
  547. molSimplify/tf_nn/rescaling_data/ls_ii_mean_x.csv +154 -0
  548. molSimplify/tf_nn/rescaling_data/ls_ii_mean_y.csv +3 -0
  549. molSimplify/tf_nn/rescaling_data/ls_ii_var_x.csv +154 -0
  550. molSimplify/tf_nn/rescaling_data/ls_ii_var_y.csv +3 -0
  551. molSimplify/tf_nn/rescaling_data/ls_iii_mean_x.csv +154 -0
  552. molSimplify/tf_nn/rescaling_data/ls_iii_mean_y.csv +3 -0
  553. molSimplify/tf_nn/rescaling_data/ls_iii_var_x.csv +154 -0
  554. molSimplify/tf_nn/rescaling_data/ls_iii_var_y.csv +3 -0
  555. molSimplify/tf_nn/rescaling_data/oxo20_mean_x.csv +154 -0
  556. molSimplify/tf_nn/rescaling_data/oxo20_mean_y.csv +1 -0
  557. molSimplify/tf_nn/rescaling_data/oxo20_var_x.csv +154 -0
  558. molSimplify/tf_nn/rescaling_data/oxo20_var_y.csv +1 -0
  559. molSimplify/tf_nn/rescaling_data/oxo_mean_x.csv +162 -0
  560. molSimplify/tf_nn/rescaling_data/oxo_mean_y.csv +1 -0
  561. molSimplify/tf_nn/rescaling_data/oxo_var_x.csv +162 -0
  562. molSimplify/tf_nn/rescaling_data/oxo_var_y.csv +1 -0
  563. molSimplify/tf_nn/rescaling_data/sc_static_clf_mean_x.csv +154 -0
  564. molSimplify/tf_nn/rescaling_data/sc_static_clf_mean_y.csv +1 -0
  565. molSimplify/tf_nn/rescaling_data/sc_static_clf_var_x.csv +154 -0
  566. molSimplify/tf_nn/rescaling_data/sc_static_clf_var_y.csv +1 -0
  567. molSimplify/tf_nn/rescaling_data/split_mean_x.csv +155 -0
  568. molSimplify/tf_nn/rescaling_data/split_mean_y.csv +1 -0
  569. molSimplify/tf_nn/rescaling_data/split_var_x.csv +155 -0
  570. molSimplify/tf_nn/rescaling_data/split_var_y.csv +1 -0
  571. molSimplify/tf_nn/sc_static_clf/sc_static_clf_model.h5 +0 -0
  572. molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_name.csv +1591 -0
  573. molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_x.csv +1592 -0
  574. molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_y.csv +1592 -0
  575. molSimplify/tf_nn/sc_static_clf/sc_static_clf_vars.csv +154 -0
  576. molSimplify/tf_nn/split/split_model.h5 +0 -0
  577. molSimplify/tf_nn/split/split_model.json +1 -0
  578. molSimplify/tf_nn/split/split_vars.csv +155 -0
  579. molSimplify/tf_nn/split/split_x.csv +1902 -0
  580. molSimplify/tf_nn/split/split_y.csv +1902 -0
  581. molSimplify/tf_nn/split/train_names.csv +1901 -0
  582. molSimplify/utils/__init__.py +0 -0
  583. molSimplify/utils/decorators.py +16 -0
  584. molSimplify/utils/metaclasses.py +12 -0
  585. molSimplify/utils/tensorflow.py +23 -0
  586. molSimplify/utils/timer.py +16 -0
  587. molSimplify-1.7.4.dist-info/LICENSE +674 -0
  588. molSimplify-1.7.4.dist-info/METADATA +821 -0
  589. molSimplify-1.7.4.dist-info/RECORD +651 -0
  590. molSimplify-1.7.4.dist-info/WHEEL +5 -0
  591. molSimplify-1.7.4.dist-info/entry_points.txt +3 -0
  592. molSimplify-1.7.4.dist-info/top_level.txt +4 -0
  593. tests/generateTests.py +122 -0
  594. tests/helperFuncs.py +658 -0
  595. tests/informatics/test_MOF_descriptors.py +128 -0
  596. tests/informatics/test_active_learning.py +113 -0
  597. tests/informatics/test_coulomb_analyze.py +24 -0
  598. tests/informatics/test_graph_racs.py +193 -0
  599. tests/ml/test_kernels.py +20 -0
  600. tests/ml/test_layers.py +47 -0
  601. tests/runtest.py +10 -0
  602. tests/test_Mol2D.py +128 -0
  603. tests/test_basic_imports.py +62 -0
  604. tests/test_bidentate.py +25 -0
  605. tests/test_cli.py +20 -0
  606. tests/test_distgeom.py +106 -0
  607. tests/test_example_1.py +29 -0
  608. tests/test_example_3.py +31 -0
  609. tests/test_example_5.py +43 -0
  610. tests/test_example_7.py +28 -0
  611. tests/test_example_8.py +15 -0
  612. tests/test_example_tbp.py +15 -0
  613. tests/test_ff_xtb.py +111 -0
  614. tests/test_geocheck_oct.py +26 -0
  615. tests/test_geocheck_one_empty.py +15 -0
  616. tests/test_geometry.py +44 -0
  617. tests/test_inparse.py +76 -0
  618. tests/test_io.py +84 -0
  619. tests/test_jobgen.py +84 -0
  620. tests/test_joption_pythonic.py +27 -0
  621. tests/test_ligand_assign.py +58 -0
  622. tests/test_ligand_assign_consistent.py +60 -0
  623. tests/test_ligand_class.py +26 -0
  624. tests/test_ligand_from_mol_file.py +35 -0
  625. tests/test_ligands.py +86 -0
  626. tests/test_mol3D.py +337 -0
  627. tests/test_molcas_caspt2.py +15 -0
  628. tests/test_molcas_casscf.py +15 -0
  629. tests/test_old_ANNs.py +68 -0
  630. tests/test_orca_ccsdt.py +15 -0
  631. tests/test_orca_dft.py +15 -0
  632. tests/test_qcgen.py +50 -0
  633. tests/test_racs.py +124 -0
  634. tests/test_rmsd.py +68 -0
  635. tests/test_structgen_functions.py +198 -0
  636. tests/test_tetrahedral.py +29 -0
  637. tests/test_tutorial_10_part_one.py +16 -0
  638. tests/test_tutorial_10_part_two.py +15 -0
  639. tests/test_tutorial_2.py +11 -0
  640. tests/test_tutorial_3.py +15 -0
  641. tests/test_tutorial_4.py +57 -0
  642. tests/test_tutorial_6.py +10 -0
  643. tests/test_tutorial_8.py +29 -0
  644. tests/test_tutorial_9_part_one.py +15 -0
  645. tests/test_tutorial_9_part_two.py +15 -0
  646. tests/test_tutorial_qm9_part_one.py +6 -0
  647. tests/testresources/refs/racs/generate_references.py +85 -0
  648. workflows/NandyJACSAu2022/bridge_functionalizer.py +253 -0
  649. workflows/NandyJACSAu2022/frag_functionalizer.py +242 -0
  650. workflows/NandyJACSAu2022/fragment_classes.py +586 -0
  651. workflows/NandyJACSAu2022/macrocycle_synthesis.py +179 -0
@@ -0,0 +1,1347 @@
1
+ import numpy as np
2
+ import itertools
3
+ import networkx as nx
4
+ from scipy.spatial import distance
5
+ from scipy import sparse
6
+ import copy
7
+ from molSimplify.Scripts.cellbuilder_tools import import_from_cif
8
+ from molSimplify.Informatics.MOF.atomic import (
9
+ COVALENT_RADII,
10
+ alkali,
11
+ lanthanides,
12
+ metals,
13
+ )
14
+
15
+ # PBC: periodic boundary conditions
16
+
17
+ deg2rad = np.pi/180.0
18
+ def readcif(name):
19
+ """
20
+ Reads a cif file and returns information about its structure and composition.
21
+
22
+ Parameters
23
+ ----------
24
+ name : str
25
+ The path of the cif file to be read.
26
+
27
+ Returns
28
+ -------
29
+ cpar : numpy.ndarray
30
+ The parameters (i.e. lattice constants) of the MOF cell. Specifically, A, B, C, alpha, beta, and gamma. Shape is (6,).
31
+ atomtypes : list of str
32
+ The atom types of the cif file, indicated by periodic symbols like 'O' and 'Cu'. Length is the number of atoms.
33
+ positions : numpy.ndarray
34
+ The fractional positions of the atoms of the cif file. Shape is (number of atoms, 3).
35
+
36
+ """
37
+ with open(name , 'r', errors='ignore') as fi: # ignore takes care of unicode errors in some cifs
38
+ EIF = fi.readlines()
39
+ cond=False
40
+ atom_props_count=0
41
+ atomlines=[]
42
+ counter=0
43
+ cell_parameter_boundary=[0.0,0.0]
44
+ for line in EIF:
45
+ line_stripped=line.strip()
46
+ if (not line) or line_stripped.startswith("#"):
47
+ continue
48
+ line_splitted=line.split()
49
+
50
+ if line_stripped.startswith("_cell_length_a"):
51
+ temp = line_splitted[1].replace(')','')
52
+ temp = temp.replace('(','')
53
+ cell_a=float(temp)
54
+ cell_parameter_boundary[0]=counter+1
55
+ elif line_stripped.startswith("_cell_length_b"):
56
+ temp = line_splitted[1].replace(')','')
57
+ temp = temp.replace('(','')
58
+ cell_b=float(temp)
59
+ elif line_stripped.startswith("_cell_length_c"):
60
+ temp = line_splitted[1].replace(')','')
61
+ temp = temp.replace('(','')
62
+ cell_c=float(temp)
63
+ elif line_stripped.startswith("_cell_angle_alpha"):
64
+ temp = line_splitted[1].replace(')','')
65
+ temp = temp.replace('(','')
66
+ cell_alpha=float(temp)
67
+ elif line_stripped.startswith("_cell_angle_beta"):
68
+ temp = line_splitted[1].replace(')','')
69
+ temp = temp.replace('(','')
70
+ cell_beta=float(temp)
71
+ elif line_stripped.startswith("_cell_angle_gamma"):
72
+ temp = line_splitted[1].replace(')','')
73
+ temp = temp.replace('(','')
74
+ cell_gamma=float(temp)
75
+ cell_parameter_boundary[1]=counter+1
76
+ # if cond and line_stripped.startswith("loop_"):
77
+ # break
78
+ # else:
79
+
80
+ if line_stripped.startswith("_atom") :
81
+
82
+ if line_stripped=="_atom_site_label" or line_stripped == '_atom_site_type_symbol':
83
+ cond = True # We have entered the block with the desired atom information.
84
+ # The reason for the or is that the order fo these lines can vary depending on cif
85
+ if line_stripped == '_atom_site_type_symbol':
86
+ type_index=atom_props_count
87
+ elif line_stripped=="_atom_site_fract_x":
88
+ fracx_index=atom_props_count
89
+ elif line_stripped=="_atom_site_fract_y":
90
+ fracy_index=atom_props_count
91
+ elif line_stripped=="_atom_site_fract_z":
92
+ fracz_index=atom_props_count
93
+ # elif "charge" in line_stripped:
94
+ # charge_index=atom_props_count
95
+
96
+ if cond:
97
+ atom_props_count+=1 # Another atom property in the block we are interested in.
98
+
99
+ elif cond:
100
+
101
+ if len(line_splitted)==atom_props_count:
102
+ atomlines.append(line)
103
+ elif line == '\n':
104
+ continue # Allow for newlines between the _atom_ lines and the lines holding the atom information
105
+ else:
106
+ break # Don't need to keep looking through the file, since we've seen all the desired information for all atoms. We left the block.
107
+
108
+ counter+=1
109
+
110
+ positions=[]
111
+ atomtypes=[]
112
+ for cn,at in enumerate(atomlines):
113
+ ln=at.strip().split()
114
+ positions.append([float(ln[fracx_index].replace('(','').replace(')','')),
115
+ float(ln[fracy_index].replace('(','').replace(')','')),
116
+ float(ln[fracz_index].replace('(','').replace(')',''))])
117
+ ln[type_index] = ln[type_index].strip("_")
118
+ at_type = ln[type_index]
119
+ # for idx, char in enumerate(ln[type_index]): # Looking through the characters of the element symbol in order to remove any numbers
120
+ # if char.isdigit(): # This means one of the characters in the atom type is a number.
121
+ # at_type = ln[type_index][:idx] # Overwriting. Use the atom element symbol without numbers.
122
+ # break # Get the characters up to the number, then stop
123
+ at_type = at_type.capitalize()
124
+ atomtypes.append(at_type)
125
+
126
+ cpar=np.array([cell_a,cell_b,cell_c,cell_alpha,cell_beta,cell_gamma])
127
+ positions = np.array(positions)
128
+ return cpar, atomtypes, positions
129
+
130
+ def compute_image_flag(cell, fcoord1, fcoord2):
131
+ """
132
+ Calculates how to shift fcoord2 to get it as close as possible to fcoord1. Shift by the crystal cell vectors.
133
+
134
+ Parameters
135
+ ----------
136
+ cell : numpy.ndarray
137
+ The three Cartesian vectors representing the edges of the crystal cell. Shape is (3,3).
138
+ fcoord1 : numpy.ndarray
139
+ Fractional coordinates of atom 1. Shape is (3,).
140
+ fcoord2 : numpy.ndarray
141
+ Fractional coordinates of atom 2. Shape is (3,).
142
+
143
+ Returns
144
+ -------
145
+ supercells[image] : numpy.ndarray
146
+ The nearest cell shift of fcoord2 to fcoord1. Shape is (3,). Values will be -1, 0, or 1.
147
+
148
+ """
149
+ supercells = np.array(list(itertools.product((-1, 0, 1), repeat=3)))
150
+ fcoords = fcoord2 + supercells # 27 versions of fcoord2, shifted some cells over in different directions
151
+ coords = np.array([np.dot(j, cell) for j in fcoords]) # Cartesian coordinates
152
+ coord1 = np.dot(fcoord1, cell)
153
+ dists = distance.cdist([coord1], coords) # Euclidean distance
154
+ dists = dists[0].tolist()
155
+ image = dists.index(min(dists)) # The image of the closest fcoord2, when considering cell shifts
156
+ return supercells[image]
157
+
158
+
159
+ def linker_length(adjmat, anchors):
160
+ """
161
+ Computes the shortest and longest paths between anchors in a linker.
162
+
163
+ Parameters
164
+ ----------
165
+ adjmat : numpy.matrix
166
+ The atom connections in the linker subgraph.
167
+ anchors : set of ints
168
+ The indices of linker atoms that are bonded to SBUs.
169
+
170
+ Returns
171
+ -------
172
+ (min_length,max_length) : tuple of ints
173
+ min_length is the shortest path length between two anchors in a linker.
174
+ max_length is the longest path length between two anchors in a linker.
175
+
176
+ """
177
+ rows, cols = np.where(adjmat == 1)
178
+ edges = zip(rows.tolist(), cols.tolist())
179
+ gr = nx.Graph()
180
+ gr.add_edges_from(edges)
181
+
182
+ # Start max_length and min_length off with values that will most likely be overwritten.
183
+ max_length = 0
184
+ min_length = 1000
185
+
186
+ for i,j in itertools.combinations(anchors, 2):
187
+ max_length=max(len(nx.shortest_path(gr,i,j))-1,max_length)
188
+ min_length=min(len(nx.shortest_path(gr,i,j))-1,min_length)
189
+ return (min_length,max_length)
190
+
191
+ def slice_mat(mat, atoms):
192
+ """
193
+ Slice the matrix mat.
194
+
195
+ Parameters
196
+ ----------
197
+ mat : numpy.matrix
198
+ The adjacency matrix. Shape is (number of atoms, number of atoms).
199
+ atoms : list of numpy.int32
200
+ The indices of atoms that determine the matrix slice.
201
+
202
+ Returns
203
+ -------
204
+ np.array(mat[np.ix_(list(atoms),list(atoms))]) : numpy.ndarray
205
+ The matrix slice. Shape is (len(atoms), len(atoms)).
206
+
207
+ """
208
+ return np.array(mat[np.ix_(list(atoms),list(atoms))])
209
+
210
+
211
+ def ligand_detect(cell, cart_coords, adj_mat, anchorlist):
212
+ """
213
+ Calculates how to shift anchor atoms so that they are close to atoms bonded to them.
214
+ I imagine this tackles the issue of two bonded atoms being on different sides of a crystal cell.
215
+
216
+ Parameters
217
+ ----------
218
+ cell : numpy.ndarray
219
+ The three Cartesian vectors representing the edges of the crystal cell. Shape is (3,3).
220
+ cart_coords : numpy.ndarray
221
+ Cartesian coordinates of the atoms in the linker or sbu. Shape is (number of atoms, 3).
222
+ adj_mat : numpy.ndarray
223
+ Adjacency matrix. 1 represents a bond, 0 represents no bond. Shape is (number of atoms, number of atoms).
224
+ anchorlist : set of ints
225
+ The indices of the anchor atoms in the linker or sbu.
226
+
227
+ Returns
228
+ -------
229
+ np.array(periodic_images) : numpy.ndarray
230
+ The cell shifts that get the anchor atoms closest to an atom (current_node) they are bonded with. Shape is (len(anchorlist), 3).
231
+
232
+ """
233
+ invcell=np.linalg.inv(cell)
234
+ fcoords=np.dot(cart_coords,invcell) # fractional coordinates
235
+ connected_components=[0] # This list will be grown to include all atoms that are part of the linker or sbu.
236
+ checked=[] # Keeps tracked of the indices of atoms that have already been checked.
237
+ periodic_images=[]
238
+ if 0 in anchorlist:
239
+ periodic_images.append(np.array([0,0,0]))
240
+ counter=0
241
+ while len(connected_components) < len(cart_coords):
242
+ current_node = connected_components[counter]
243
+ for j,v in enumerate(adj_mat[current_node]):
244
+ if v==1 and (j not in checked) and (j not in connected_components): # If find a bonded atom that hasn't been checked yet
245
+ image_flag = compute_image_flag(cell,fcoords[current_node],fcoords[j])
246
+ fcoords[j] += image_flag # Shifting fractional coordinates by the number of cells specified by compute_image_flag
247
+ connected_components.append(j)
248
+ checked.append(j)
249
+ if j in anchorlist:
250
+ periodic_images.append(image_flag)
251
+ counter+=1
252
+
253
+ return np.array(periodic_images)
254
+
255
+
256
+ def XYZ_connected(cell, cart_coords, adj_mat):
257
+ """
258
+ Calculate fractional coordinates of atoms for the specified connected component, shifted by cell vectors to make the coordinates close to each other.
259
+
260
+ Parameters
261
+ ----------
262
+ cell : numpy.ndarray
263
+ The three Cartesian vectors representing the edges of the crystal cell. Shape is (3,3).
264
+ cart_coords : numpy.ndarray
265
+ Cartesian coordinates of the atoms in this component. Shape is (number of atoms, 3).
266
+ adj_mat : numpy.ndarray
267
+ Adjacency matrix. 1 represents a bond, 0 represents no bond. Shape is (number of atoms, number of atoms).
268
+
269
+ Returns
270
+ -------
271
+ fcoords : numpy.ndarray
272
+ Fractional coordinates of the atoms in this component. Shape is (number of atoms, 3).
273
+
274
+ """
275
+ invcell=np.linalg.inv(cell)
276
+ fcoords=np.dot(cart_coords,invcell) # fractional coordinates
277
+ connected_components=[0] # This list will be grown to include all atoms that are part of the linker or sbu.
278
+ checked=[] # Keeps tracked of the indices of atoms that have already been checked.
279
+ counter=0
280
+ from scipy import sparse
281
+ n_components, labels_components = sparse.csgraph.connected_components(csgraph=adj_mat, directed=False, return_labels=True)
282
+ # print(n_components,'comp',labels_components)
283
+ tested_index = 0 # The label for the connected components. 0 indicates the first connected component, etc.
284
+ index_counter = 0
285
+ while len(connected_components) < len(cart_coords):
286
+ try:
287
+ current_node = connected_components[counter]
288
+ except:
289
+ indices = [i for i, x in enumerate(labels_components) if x == tested_index] # Indices corresponding to atoms in the component corresponding to tested_index
290
+ current_node = indices[index_counter]
291
+ # print(current_node,indices)
292
+
293
+ if index_counter == (len(indices)-1):
294
+ tested_index += 1
295
+ index_counter = 0
296
+ else:
297
+ index_counter += 1
298
+ for j,v in enumerate(adj_mat[current_node]):
299
+ if v==1 and (j not in checked) and (j not in connected_components): # If find a bonded atom that hasn't been checked yet
300
+ fcoords[j]+=compute_image_flag(cell,fcoords[current_node],fcoords[j]) # Shifting fractional coordinates by the number of cells specified by compute_image_flag
301
+ connected_components.append(j)
302
+ checked.append(j)
303
+ # print(connected_components)
304
+ counter+=1
305
+ return fcoords
306
+
307
+ def writeXYZfcoords(filename, atoms, cell, fcoords):
308
+ """
309
+ Write an XYZ file using fractional coordinates.
310
+
311
+ Parameters
312
+ ----------
313
+ filename : str
314
+ The path to where the xyz of the MOF structure will be written.
315
+ atoms : list of str
316
+ The atom types of the cif file, indicated by periodic symbols like 'O' and 'Cu'. Length is the number of atoms.
317
+ cell : numpy.ndarray
318
+ The three Cartesian vectors representing the edges of the crystal cell. Shape is (3,3).
319
+ fcoords : numpy.ndarray
320
+ The fractional positions of the atoms of the cif file. Shape is (number of atoms, 3).
321
+
322
+ Returns
323
+ -------
324
+ None
325
+
326
+ """
327
+ with open(filename,"w") as fo:
328
+ fo.write("%i\n\n"%len(atoms))
329
+ for i,fcoord in enumerate(fcoords):
330
+ cart_coord=np.dot(fcoord,cell)
331
+ s="%10.2f %10.2f %10.2f"%(cart_coord[0],cart_coord[1],cart_coord[2])
332
+ fo.write("%s %s\n"%(atoms[i],s))
333
+
334
+ def writeXYZandGraph(filename, atoms, cell, fcoords, molgraph):
335
+ """
336
+ Write the xyz file for the MOF structure, as well as the net file containing the MOF's graph.
337
+
338
+ Parameters
339
+ ----------
340
+ filename : str
341
+ The path to where the xyz of the MOF structure will be written.
342
+ atoms : list of str
343
+ The atom types of the cif file, indicated by periodic symbols like 'O' and 'Cu'. Length is the number of atoms.
344
+ cell : numpy.ndarray
345
+ The three Cartesian vectors representing the edges of the crystal cell. Shape is (3,3).
346
+ fcoords : numpy.ndarray
347
+ The fractional positions of the atoms of the cif file. Shape is (number of atoms, 3).
348
+ molgraph : numpy.matrix or numpy.ndarray
349
+ The adjacency matrix, which indicates which atoms are connected to which atoms. Shape is (number of atoms, number of atoms).
350
+
351
+ Returns
352
+ -------
353
+ None
354
+
355
+ """
356
+
357
+ with open(filename,"w") as fo:
358
+ fo.write("%i\n\n"%len(atoms)) # The first line indicates the number of atoms in the cell of the structure.
359
+ for i,fcoord in enumerate(fcoords):
360
+ cart_coord=np.dot(fcoord,cell) # Go from fractional coordinates to Cartesian coordinates.
361
+ s="%10.2f %10.2f %10.2f"%(cart_coord[0],cart_coord[1],cart_coord[2]) # X, Y, Z
362
+ fo.write("%s %s\n"%(atoms[i],s)) # Writing the coordinates of each atom.
363
+ tmpstr=",".join([at for at in atoms])
364
+ np.savetxt(filename[:-4]+".net",molgraph,fmt="%i",delimiter=",",header=tmpstr) # Save a net file.
365
+
366
+
367
+ def returnXYZandGraph(filename, atoms, cell, fcoords, molgraph):
368
+ """
369
+ TODO
370
+
371
+ Parameters
372
+ ----------
373
+ TODO : TODO
374
+ TODO
375
+ TODO : TODO
376
+ TODO
377
+ TODO : TODO
378
+ TODO
379
+
380
+ Returns
381
+ -------
382
+ TODO : TODO
383
+ TODO
384
+ TODO : TODO
385
+ TODO
386
+ TODO : TODO
387
+ TODO
388
+
389
+ """
390
+ coord_list = []
391
+ for i,fcoord in enumerate(fcoords):
392
+ cart_coord=np.dot(fcoord,cell)
393
+ coord_list.append([cart_coord[0],cart_coord[1],cart_coord[2]])
394
+ tmpstr=",".join([at for at in atoms])
395
+ if filename is not None:
396
+ np.savetxt(filename[:-4]+".net",molgraph,fmt="%i",delimiter=",",header=tmpstr)
397
+ return coord_list, molgraph
398
+
399
+ def writeXYZcoords(filename, atoms, coords):
400
+ """
401
+ Write an XYZ file using Cartesian coordinates.
402
+
403
+ Parameters
404
+ ----------
405
+ filename : str
406
+ The path to where the xyz of the MOF structure will be written.
407
+ atoms : list of str
408
+ The atom types of the cif file, indicated by periodic symbols like 'O' and 'Cu'. Length is the number of atoms.
409
+ coords : numpy.ndarray
410
+ The Cartesian positions of the atoms of the cif file. Shape is (number of atoms, 3).
411
+
412
+ Returns
413
+ -------
414
+ None
415
+
416
+ """
417
+ with open(filename,"w") as fo:
418
+ fo.write("%i\n\n"%len(atoms))
419
+ for i,cart_coord in enumerate(coords):
420
+ s="%10.2f %10.2f %10.2f"%(cart_coord[0],cart_coord[1],cart_coord[2])
421
+ fo.write("%s %s\n"%(atoms[i],s))
422
+ return
423
+
424
+ def writeXYZcoords_withcomment(filename, atoms, coords, comment):
425
+ """
426
+ Write an XYZ file using Cartesian coordinates, with a comment included.
427
+
428
+ Parameters
429
+ ----------
430
+ filename : str
431
+ The path to where the xyz of the MOF structure will be written.
432
+ atoms : list of str
433
+ The atom types of the cif file, indicated by periodic symbols like 'O' and 'Cu'. Length is the number of atoms.
434
+ coords : numpy.ndarray
435
+ The Cartesian positions of the atoms of the cif file. Shape is (number of atoms, 3).
436
+ comment : str
437
+ The comment to include in the XYZ file.
438
+
439
+ """
440
+ with open(filename,"w") as fo:
441
+ fo.write("%i\n"%len(atoms))
442
+ fo.write("%s\n"%comment)
443
+ for i,cart_coord in enumerate(coords):
444
+ s="%10.2f %10.2f %10.2f"%(cart_coord[0],cart_coord[1],cart_coord[2])
445
+ fo.write("%s %s\n"%(atoms[i],s))
446
+ return
447
+
448
+ def write2file(pt, fn, st):
449
+ """
450
+ Writes the string st to a file.
451
+
452
+ Parameters
453
+ ----------
454
+ pt : str
455
+ Path of the folder to make a file in.
456
+ fn : str
457
+ Name of the file to write to.
458
+ st : str
459
+ What to write in the file.
460
+
461
+ Returns
462
+ -------
463
+ None
464
+
465
+ """
466
+ with open(pt+fn, "a") as fo:
467
+ fo.write(st)
468
+
469
+ def write_cif(fname, cellprm, fcoords, atom_labels):
470
+ """
471
+ Writes a cif file with the provided parameters.
472
+
473
+ Parameters
474
+ ----------
475
+ fname : str
476
+ The path to the cif file to be written.
477
+ cellprm : numpy.ndarray
478
+ The parameters (i.e. lattice constants) of the MOF cell. Specifically, A, B, C, alpha, beta, and gamma. Shape is (6,).
479
+ fcoords : numpy.ndarray
480
+ The fractional positions of the atoms of the cif file. Shape is (number of atoms, 3).
481
+ atom_labels : list of str
482
+ The atom types of the cif file, indicated by periodic symbols like 'O' and 'Cu'. Length is the number of atoms.
483
+
484
+ Returns
485
+ -------
486
+ None
487
+
488
+ """
489
+ with open(fname,'w') as f_cif:
490
+ f_cif.write("data_I\n")
491
+ f_cif.write("_chemical_name_common \'%s\'\n"%(fname.strip(".cif")))
492
+ f_cif.write("_cell_length_a %8.05f\n"%(cellprm[0]))
493
+ f_cif.write("_cell_length_b %8.05f\n"%(cellprm[1]))
494
+ f_cif.write("_cell_length_c %8.05f\n"%(cellprm[2]))
495
+ f_cif.write("_cell_angle_alpha %4.05f\n"%(cellprm[3]))
496
+ f_cif.write("_cell_angle_beta %4.05f\n"%(cellprm[4]))
497
+ f_cif.write("_cell_angle_gamma %4.05f\n"%(cellprm[5]))
498
+ f_cif.write("_space_group_name_H-M_alt \'P 1\'\n\n\n")
499
+ f_cif.write("loop_\n_space_group_symop_operation_xyz\n 'x, y, z' \n\n")
500
+ f_cif.write("loop_\n")
501
+ f_cif.write("_atom_site_label\n")
502
+ f_cif.write("_atom_site_fract_x\n")
503
+ f_cif.write("_atom_site_fract_y\n")
504
+ f_cif.write("_atom_site_fract_z\n")
505
+ f_cif.write("_atom_site_type_symbol\n")
506
+ for i,atom in enumerate(atom_labels):
507
+ f_cif.write("%-5s %8s %8s %8s %5s\n"%(atom,fcoords[i,0],fcoords[i,1],fcoords[i,2],"%s"%(atom)))
508
+
509
+ def cell_to_cellpar(cell, radians=False):
510
+ """
511
+ TODO
512
+
513
+ Parameters
514
+ ----------
515
+ TODO : TODO
516
+ TODO
517
+ TODO : TODO
518
+ TODO
519
+ TODO : TODO
520
+ TODO
521
+
522
+ Returns
523
+ -------
524
+ TODO : TODO
525
+ TODO
526
+ TODO : TODO
527
+ TODO
528
+ TODO : TODO
529
+ TODO
530
+
531
+ """
532
+ lengths = [np.linalg.norm(v) for v in cell]
533
+ angles = []
534
+ for i in range(3):
535
+ j = i - 1
536
+ k = i - 2
537
+ ll = lengths[j] * lengths[k]
538
+ if ll > 1e-16:
539
+ x = np.dot(cell[j], cell[k]) / ll
540
+ angle = 180.0 / np.pi * np.arccos(x)
541
+ else:
542
+ angle = 90.0
543
+ angles.append(angle)
544
+ if radians:
545
+ angles = [angle * np.pi / 180 for angle in angles]
546
+ return np.array(lengths + angles)
547
+
548
+ def findPaths(G, u, n):
549
+ """
550
+ Finds paths between atom u and atoms n bonds away.
551
+
552
+ Parameters
553
+ ----------
554
+ G : networkx.classes.graph.Graph
555
+ networkx graph for the linker of interest.
556
+ u : int
557
+ The index of the anchor atom's index in the linker list of indices.
558
+ n : int
559
+ How many bonds away one functionalized atom should be from another.
560
+
561
+ Returns
562
+ -------
563
+ paths : list of list of int
564
+ Inner lists will be length four, if n is three. All inner lists start with u.
565
+ Note, may return [[u]] instead if n is zero. [[u]] is a list of list of int.
566
+
567
+ """
568
+ if n==0:
569
+ return [[u]]
570
+ paths = [[u]+path for neighbor in G.neighbors(u) for path in findPaths(G,neighbor,n-1) if u not in path] # recursive
571
+ # if u not in path ensures no atom is used twice in a path.
572
+ # Example of paths: [[12, 3, 7, 6], [12, 3, 7, 14], [12, 4, 0, 14], [12, 4, 0, 15], [12, 4, 9, 5], [12, 4, 9, 11]]
573
+ return paths
574
+
575
+ def fractional2cart(fcoords, cell):
576
+ """
577
+ Convert from fractional coordinates to Cartesian coordinates.
578
+
579
+ Parameters
580
+ ----------
581
+ fcoords : numpy.ndarray
582
+ The fractional positions of the atoms of the cif file. Shape is (number of atoms, 3).
583
+ cell : The three Cartesian vectors representing the edges of the crystal cell.
584
+ Shape is (3,3).
585
+
586
+ Returns
587
+ -------
588
+ np.dot(fcoords,cell) : numpy.ndarray
589
+ The Cartesian coordinates of the crystal atoms. Shape is (number of atoms, 3).
590
+
591
+ """
592
+ return np.dot(fcoords,cell)
593
+
594
+ def frac_coord(coord, cell):
595
+ """
596
+ Convert from Cartesian coordinates to fractional coordinates.
597
+
598
+ Parameters
599
+ ----------
600
+ coord : numpy.ndarray
601
+ The Cartesian coordinates of the atoms of the cif file. Shape is (number of atoms, 3).
602
+ cell : The three Cartesian vectors representing the edges of the crystal cell.
603
+ Shape is (3,3).
604
+
605
+ Returns
606
+ -------
607
+ np.dot(coord,invcell) : numpy.ndarray
608
+ The fractional positions of the crystal atoms. Shape is (number of atoms, 3).
609
+
610
+ """
611
+ invcell=np.linalg.inv(cell)
612
+ return np.dot(coord,invcell)
613
+
614
+ def compute_distance_matrix3(cell, cart_coords, num_cells=1):
615
+ """
616
+ Computes the pairwise distances between all atom pairs in the crystal cell.
617
+
618
+ Parameters
619
+ ----------
620
+ cell : numpy.ndarray
621
+ The three Cartesian vectors representing the edges of the crystal cell. Shape is (3,3).
622
+ cart_coords : numpy.ndarray
623
+ The Cartesian coordinates of the crystal atoms. Shape is (number of atoms, 3).
624
+ num_cells : int
625
+ The number of crystal cells to put together for the evaluation of distances.
626
+
627
+ Returns
628
+ -------
629
+ distance_matrix : numpy.ndarray
630
+ The distance of each atom to each other atom. Shape is (number of atoms, number of atoms).
631
+
632
+ """
633
+ pos = np.arange(-num_cells, num_cells+1, 1) # [-1, 0, 1] if num_cells is 1
634
+ combos = np.array(np.meshgrid(pos, pos, pos)).T.reshape(-1,3) # The 27 combinations of -1, 0, 1 if num_cells is 1
635
+ shifts = np.sum(np.expand_dims(cell, axis=0)*np.expand_dims(combos, axis=-1), axis=1) # The possible shifts by the crystal cell vectors.
636
+ # NxNxCells distance array
637
+ shifted = np.expand_dims(cart_coords, axis=1) + np.expand_dims(shifts, axis=0) # The shifted Cartesian coordinates. Shape is (number of atoms, number of combinations in combos, 3)
638
+
639
+ # The distances between atoms, across different crystal cell shifts, for the three Cartesian dimensions.
640
+ dist = np.expand_dims(np.expand_dims(cart_coords, axis=1), axis=1) - np.expand_dims(shifted, axis=0) # Shape is (number of atoms, number of atoms, number of combinations in combos, 3)
641
+ # The shape of np.expand_dims(np.expand_dims(cart_coords, axis=1), axis=1) is (number of atoms, 1, 1, 3)
642
+ # The shape of np.expand_dims(shifted, axis=0) is (1, number of atoms, number of combinations in combos, 3)
643
+ # numpy subtraction expands out the axes of length one for the subtraction.
644
+
645
+ # The standard distance formula of square root of x^2 + y^2 + z^2
646
+ dist = np.sqrt(np.sum(np.square(dist), axis=-1)) # Shape is (number of atoms, number of atoms, number of combinations in combos)
647
+
648
+ # But we want only the minimum
649
+ distance_matrix = np.min(dist, axis=-1) # Consider the distance between two atoms at the crystal cell shift where they are closest.
650
+ return distance_matrix
651
+
652
+ def position_nearest_atom(cell, cart_coords, index_of_interest, num_cells=1):
653
+ """
654
+ Computes the pairwise distances between all atoms in the crystal cell to the atom specified by index_of_interest; returns the position of the nearest atom.
655
+
656
+ Parameters
657
+ ----------
658
+ cell : numpy.ndarray
659
+ The three Cartesian vectors representing the edges of the crystal cell. Shape is (3,3).
660
+ cart_coords : numpy.ndarray
661
+ The Cartesian coordinates of the crystal atoms. Shape is (number of atoms, 3).
662
+ index_of_interest : int
663
+ The index of the atom to which we want to find the nearest atom's position.
664
+ num_cells : int
665
+ The number of crystal cells to put together for the evaluation of distances.
666
+
667
+ Returns
668
+ -------
669
+ nearest_position : numpy.ndarray
670
+ The Cartesian coordinates of the nearest atom. Shape is (3,).
671
+ nearest_index : numpy.int64
672
+ The index of the nearest atom.
673
+ shift_for_nearest_atom : numpy.ndarray
674
+ The crystal cell shifts that position the nearest atom closest to the atom of interest. Shape is (3,). Will look something like [-1 0 -1] or [0 0 0] or etc.
675
+
676
+ """
677
+ pos = np.arange(-num_cells, num_cells+1, 1) # [-1, 0, 1] if num_cells is 1
678
+ combos = np.array(np.meshgrid(pos, pos, pos)).T.reshape(-1,3) # The 27 combinations of -1, 0, 1 if num_cells is 1
679
+ shifts = np.sum(np.expand_dims(cell, axis=0)*np.expand_dims(combos, axis=-1), axis=1) # The possible shifts by the crystal cell vectors.
680
+ # NxNxCells distance array
681
+ shifted = np.expand_dims(cart_coords, axis=1) + np.expand_dims(shifts, axis=0) # The shifted Cartesian coordinates. Shape is (number of atoms, number of combinations in combos, 3)
682
+
683
+ # The distances between atoms, across different crystal cell shifts, for the three Cartesian dimensions.
684
+ dist = np.expand_dims(np.expand_dims(cart_coords[index_of_interest], axis=0), axis=0) - shifted # Shape is (number of atoms, number of combinations in combos, 3)
685
+ # The shape of np.expand_dims(np.expand_dims(cart_coords[index_of_interest], axis=0), axis=0) is (1, 1, 3). These are the coordinates of the atom of interest.
686
+ # numpy subtraction expands out the axes of length one for the subtraction.
687
+
688
+ # The standard distance formula of square root of x^2 + y^2 + z^2
689
+ dist = np.sqrt(np.sum(np.square(dist), axis=-1)) # Shape is (number of atoms, number of combinations in combos)
690
+
691
+ # Want the atom that is closest to index_of_interest, given the ideal shift
692
+ # Don't want to consider distance of atom of interest to itself, so I eliminate it from consideration this way.
693
+ dist[index_of_interest,:] = np.array([np.Inf]*np.shape(dist)[1])
694
+ # Find the index of the closest atom.
695
+ index_nearest_atom = np.argmin(dist)
696
+ index_nearest_atom = np.unravel_index(index_nearest_atom, np.shape(dist)) # This is (atom index, shift index)
697
+
698
+ # Get the Cartesian coordinates of the nearest atom
699
+ nearest_position = shifted[index_nearest_atom[0], index_nearest_atom[1], :]
700
+ nearest_index = index_nearest_atom[0]
701
+ shift_for_nearest_atom = combos[index_nearest_atom[1],:]
702
+
703
+ return nearest_position, nearest_index, shift_for_nearest_atom
704
+
705
+ def make_graph_from_nodes_edges(nodes, edges, attribs):
706
+ """
707
+ TODO
708
+
709
+ Parameters
710
+ ----------
711
+ TODO : TODO
712
+ TODO
713
+ TODO : TODO
714
+ TODO
715
+ TODO : TODO
716
+ TODO
717
+
718
+ Returns
719
+ -------
720
+ TODO : TODO
721
+ TODO
722
+ TODO : TODO
723
+ TODO
724
+ TODO : TODO
725
+ TODO
726
+
727
+ """
728
+ gr = nx.Graph()
729
+ [gr.add_node(n,atomicNum=at) for n,at in zip(nodes,attribs)]
730
+ #gr.add_nodes_from(nodes)
731
+ gr.add_edges_from(edges)
732
+ return gr
733
+
734
+ def mkcell(cpar):
735
+ """
736
+ Update the cell representation to match the parameters.
737
+
738
+ Parameters
739
+ ----------
740
+ cpar : numpy.ndarray
741
+ The parameters (i.e. lattice constants) of the MOF cell. Specifically, A, B, C, alpha, beta, and gamma. Shape is (6,).
742
+
743
+ Returns
744
+ -------
745
+ vectors : numpy.ndarray
746
+ The three Cartesian vectors representing the edges of the crystal cell. Shape is (3,3).
747
+
748
+ """
749
+
750
+ a_mag, b_mag, c_mag = cpar[:3]
751
+ alpha, beta, gamma = [x * deg2rad for x in cpar[3:]] # Converting the angles to radians from degrees.
752
+ a_vec = np.array([a_mag, 0.0, 0.0]) # a_vec is taken to be along the x axis
753
+ b_vec = np.array([b_mag * np.cos(gamma), b_mag * np.sin(gamma), 0.0]) # See this depiction of lattice parameters for reasoning behind these equations. https://www.doitpoms.ac.uk/tlplib/crystallography3/parameters.php. b_vec is taken to be in the X-Y plane.
754
+ c_x = c_mag * np.cos(beta)
755
+ c_y = c_mag * (np.cos(alpha) - np.cos(gamma) * np.cos(beta)) / np.sin(gamma) # You have to use a matrix to convert. This is derived in most textbooks on crystallography, such as McKie & McKie 'Essentials of Crystallography'. https://chemistry.stackexchange.com/questions/136836/converting-fractional-coordinates-into-cartesian-coordinates-for-crystallography
756
+ c_vec = np.array([c_x, c_y, (c_mag**2 - c_x**2 - c_y**2)**0.5]) # c_x**2 + c_y**2 + c_z**2 = c_mag**2
757
+ vectors = np.array([a_vec, b_vec, c_vec])
758
+ return vectors
759
+
760
+ def make_supercell(cell, atoms, fcoords, exp_coeff):
761
+ """
762
+ TODO
763
+
764
+ Parameters
765
+ ----------
766
+ TODO : TODO
767
+ TODO
768
+ TODO : TODO
769
+ TODO
770
+ TODO : TODO
771
+ TODO
772
+
773
+ Returns
774
+ -------
775
+ TODO : TODO
776
+ TODO
777
+ TODO : TODO
778
+ TODO
779
+ TODO : TODO
780
+ TODO
781
+
782
+ """
783
+ supercell = np.multiply(cell.T, exp_coeff).T
784
+ superatoms=[]
785
+ superfcoords=[]
786
+ for i in range(exp_coeff[0]):
787
+ for j in range(exp_coeff[1]):
788
+ for k in range(exp_coeff[2]):
789
+ for na,atom in enumerate(atoms):
790
+ fc=fcoords[na]
791
+ fx = fc[0]/exp_coeff[0] + float(i)/exp_coeff[0]
792
+ fy = fc[1]/exp_coeff[1] + float(j)/exp_coeff[1]
793
+ fz = fc[2]/exp_coeff[2] + float(k)/exp_coeff[2]
794
+ superfcoords.append([fx,fy,fz])
795
+ superatoms.append(atom)
796
+ superfcoords= np.array(superfcoords)
797
+ return supercell,superatoms,superfcoords
798
+
799
+
800
+ def compute_adj_matrix(distance_mat, allatomtypes, wiggle_room=1, handle_overlap=False):
801
+ """
802
+ Calculates what atoms are bonded to each other.
803
+
804
+ Bonding is trickier in MOFs than in TM complexes due to metal-metal bonding, motivating the existence of this function
805
+ even though a similar one exists in mol3D.
806
+
807
+ Parameters
808
+ ----------
809
+ distance_mat : numpy.ndarray
810
+ The distance of each atom to each other atom. Shape is (number of atoms, number of atoms).
811
+ allatomtypes : list of str
812
+ The atom types of the cif file, indicated by periodic symbols like 'O' and 'Cu'. Length is the number of atoms.
813
+ wiggle_room : float
814
+ A multiplier that allows for more or less strict bond distance cutoffs.
815
+ handle_overlap : bool
816
+ Indicates whether to provide diagnostics for atoms that are overlapping, or to just raise an error if it finds overlaps
817
+
818
+ Returns
819
+ -------
820
+ sparse.csr_matrix(adj_matrix) : scipy.sparse.csr.csr_matrix
821
+ Adjacency matrix. 1 represents a bond, 0 represents no bond. Shape is (number of atoms, number of atoms).
822
+ overlap_atoms : list
823
+ Indices of atoms that overlap with any atom of a lower index.
824
+
825
+ """
826
+
827
+ overlap_atoms = []
828
+ adj_matrix=np.zeros(distance_mat.shape)
829
+ for i,e1 in enumerate(allatomtypes[:-1]): # Iterating through all pairs of atoms.
830
+ for j,e2 in enumerate(allatomtypes[i+1:]):
831
+ elements = set([e1, e2])
832
+
833
+ # In the context of sets, < means that all the items in the set elements is in the set metals, for example.
834
+ if (elements < metals): # FIXME no metal-metal bond allowed
835
+ continue
836
+
837
+ rad = (COVALENT_RADII[e1] + COVALENT_RADII[e2])
838
+ dist = distance_mat[i,i+j+1]
839
+ # check for atomic overlap:
840
+ if dist < min(COVALENT_RADII[e1] , COVALENT_RADII[e2]):
841
+ print(f"Atomic overlap involving atom {i} and {i+j+1}! Zero-indexed.")
842
+ print(f"dist is {dist} and the cutoff is {min(COVALENT_RADII[e1] , COVALENT_RADII[e2])}")
843
+ if handle_overlap:
844
+ # Check whether atom i is already in overlap_atoms and will be removed.
845
+ # If so, no need to remove an atom that overlaps with atom i.
846
+ if i not in overlap_atoms:
847
+ overlap_atoms.append(i+j+1) # The atom with index i+j+1 overlapped with another atom.
848
+ else:
849
+ print('Overlapping atoms! Error')
850
+ raise NotImplementedError # Exit the function.
851
+ tempsf = 0.9 # This is modified below under certain conditions, to account for looser or tigher bonding.
852
+ # There is probably a better way to fix these kinds of issues.
853
+ # In the context of sets, & is the intersection. If the intersection is null, the (&) expression is False.
854
+ if (set("F") < elements) and (elements & metals): # One of the members of elements is fluorine, and one is a metal.
855
+ tempsf = 0.8
856
+ if (set("C") < elements) and (elements & metals):
857
+ tempsf = 0.95
858
+ if (set("H") < elements) and (elements & metals) and (not elements & alkali):
859
+ tempsf = 0.75
860
+
861
+ if (set("O") < elements) and (elements & metals):
862
+ tempsf = 0.85
863
+ if (set("N") < elements) and (elements & metals):
864
+ tempsf = 0.82
865
+ # fix for water particle recognition.
866
+ if(set(["O", "H"]) <= elements):
867
+ tempsf = 0.8
868
+ # very specific fix for Michelle's amine appended MOF
869
+ if(set(["N","H"]) <= elements):
870
+ tempsf = 0.67
871
+ if(set(["Mg","N"]) <= elements):
872
+ tempsf = 0.80
873
+ if(set(["C","H"]) <= elements):
874
+ tempsf = 0.80
875
+ if(set(["K"]) <= elements):
876
+ tempsf = 0.95
877
+ if(lanthanides & elements):
878
+ tempsf = 0.95
879
+ if(elements ==set(["C"]) ):
880
+ tempsf = 0.85
881
+ if dist*tempsf < rad * wiggle_room: # and not (alkali & elements):
882
+ # Entering this if statement means there is a bond between the two atoms.
883
+ adj_matrix[i,i+j+1]=1
884
+ adj_matrix[i+j+1,i]=1
885
+
886
+ # Removing duplicates and sorting.
887
+ overlap_atoms = [*set(overlap_atoms)]
888
+ return sparse.csr_matrix(adj_matrix), overlap_atoms
889
+
890
+
891
+
892
+ def get_closed_subgraph(linkers, SBUlist, adj_matrix):
893
+ ###############################################################################
894
+ # This part separates the linkers into their respective subgraphs #
895
+ # First element is the things you want to find subgraphs of. #
896
+ # If this is the linkers, you input that as the first. #
897
+ # If you input the SBU as the first, then you get the subgraphs of the SBU. #
898
+ # The second element tells you what part of the matrix is NOT what you want. #
899
+ # If we want subgraphs of linkers, we want to exclude the SBU. #
900
+ ###############################################################################
901
+ """
902
+
903
+ Parameters
904
+ ----------
905
+ linkers : set of int
906
+ Indices corresponding to atoms in the linkers (or SBUs; see the summary part of this docstring) of the MOF. The part of the matrix to analyze.
907
+ SBUlist : set of numpy.int64
908
+ Indices corresponding to atoms in the SBUs (or linkers) of the MOF. The part of the matrix to ignore.
909
+ adj_matrix : scipy.sparse.csr.csr_matrix
910
+ Adjacency matrix. 1 represents a bond, 0 represents no bond. Shape is (number of atoms, number of atoms).
911
+
912
+ Returns
913
+ -------
914
+ linker_list : list of lists of ints
915
+ Each inner list is its own separate linker (or SBU). The ints are the atom indices of that linker (or SBU). Length is # of linkers (or SBUs).
916
+ linker_subgraphlist : list of scipy.sparse.csr.csr_matrix
917
+ The atom connections in the linker (or SBU) subgraph. Length is # of linkers (or SBUs).
918
+
919
+ """
920
+
921
+ linkers_sub = linkers.copy()
922
+ linker_list = []
923
+ linker_subgraphlist = []
924
+ counter = 0
925
+ while len(linkers_sub)>0:
926
+ # Every time this while loop is entered, an entire linker will be identified.
927
+ counter += 1
928
+ if counter > 5000:
929
+ break
930
+ start_idx = list(linkers_sub)[0] # index of an atom belonging to the linkers
931
+ current_linker_list = set([start_idx]) # Linker atoms will be added to this set as they are discovered.
932
+ checked_list = set() # Will contain all of the indices that have already been tried as start_idx.
933
+ while len(checked_list) <= len(current_linker_list):
934
+ loop_over = np.nonzero(adj_matrix[start_idx])[1] # indices of atoms with bonds to the atom with the index start_idx
935
+ current_linker_list.update(loop_over)
936
+ current_linker_list = current_linker_list-SBUlist
937
+ checked_list.add(start_idx)
938
+ for val in loop_over:
939
+ if val not in SBUlist:
940
+ current_linker_list.update(np.nonzero(adj_matrix[val])[1]) # np.nonzero(adj_matrix[val])[1] are the indices of atoms with bonds to the atom with index val
941
+ left_to_check = current_linker_list-checked_list-SBUlist # Linker atoms whose connecting atoms still need to be checked.
942
+ if len(left_to_check) == 0:
943
+ break
944
+ else:
945
+ start_idx = list(left_to_check)[0] # update start_idx for the next pass through the while loop
946
+ current_linker_list = current_linker_list - SBUlist
947
+ linkers_sub = linkers_sub - current_linker_list
948
+ ####### We want to return both the linker itself as well as the subgraph corresponding to it.
949
+ linker_list.append(list(current_linker_list))
950
+ linker_subgraphlist.append(adj_matrix[np.ix_(list(current_linker_list),list(current_linker_list))])
951
+
952
+ return linker_list, linker_subgraphlist
953
+
954
+ def include_extra_shells(SBUlists, subgraphlists, molcif, adjmat):
955
+ """
956
+ Include extra atoms in the SBUs. One more shell.
957
+
958
+ Parameters
959
+ ----------
960
+ SBUlists : list of lists of ints
961
+ Each inner list is its own separate SBU. The ints are the atom indices of that SBU. Length is # of SBUs.
962
+ subgraphlists : list of scipy.sparse.csr.csr_matrix
963
+ The atom connections in the SBU subgraph. Length is # of SBUs.
964
+ molcif : molSimplify.Classes.mol3D.mol3D
965
+ The cell of the cif file being analyzed.
966
+ adjmat : scipy.sparse.csr.csr_matrix
967
+ 1 represents a bond, 0 represents no bond. Shape is (number of atoms, number of atoms).
968
+
969
+ Returns
970
+ -------
971
+ SBUs : list of lists of numpy.int64
972
+ The expanded atom indices of each SBU.
973
+ subgraphs : list of scipy.sparse.csr.csr_matrix
974
+ The atom bonding information of the SBUs in the variable `SBUs`. Which atoms are bonded to which.
975
+
976
+ """
977
+
978
+ SBUs=[]
979
+ subgraphs=[]
980
+ for SBU in SBUlists:
981
+ for zero_first_shell in copy.deepcopy(SBU):
982
+ for val in molcif.getBondedAtomsSmart(zero_first_shell):
983
+ SBU.append(val) # Include in the SBU every atom that is bonded to the SBU
984
+ SBUset = set(SBU) # Removing duplicate atom indices.
985
+ SBUs.append(list(SBUset))
986
+ subgraphs.append(adjmat[np.ix_(list(SBUset),list(SBUset))])
987
+
988
+ return SBUs, subgraphs
989
+
990
+ def disorder_detector(name):
991
+ """
992
+ Reads a cif file and returns information on which atoms have fractional occupancy.
993
+
994
+ Parameters
995
+ ----------
996
+ name : str
997
+ The path of the cif file to be read.
998
+
999
+ Returns
1000
+ -------
1001
+ disordered_atom_indices : list of ints
1002
+ The indices of atoms with fractional occupancies.
1003
+ disordered_atom_types : list of str
1004
+ The elemental symbols of atoms with fractional occupancies.
1005
+ disordered_atom_occupancies : list of floats
1006
+ The fractional occupancies of the atoms with fractional occupancies.
1007
+
1008
+ """
1009
+ with open(name , 'r', errors='ignore') as fi: # ignore takes care of unicode errors in some cifs
1010
+ EIF = fi.readlines()
1011
+ cond=False
1012
+ occupancy_index=False
1013
+ atom_props_count=0
1014
+ atomlines=[]
1015
+ for line in EIF:
1016
+ line_stripped=line.strip()
1017
+ if (not line) or line_stripped.startswith("#"):
1018
+ continue
1019
+ line_splitted=line.split()
1020
+
1021
+ if line_stripped.startswith("_atom") :
1022
+
1023
+ if line_stripped == "_atom_site_label" or line_stripped == '_atom_site_type_symbol':
1024
+ cond=True # We have entered the block with the desired atom information.
1025
+ # The reason for the or is that the order fo these lines can vary depending on cif
1026
+ if line_stripped == '_atom_site_type_symbol':
1027
+ type_index=atom_props_count
1028
+ elif line_stripped=="_atom_site_occupancy":
1029
+ occupancy_index=atom_props_count
1030
+
1031
+ if cond:
1032
+ atom_props_count+=1 # Another atom property in the block we are interested in.
1033
+
1034
+ elif cond:
1035
+ if len(line_splitted)==atom_props_count:
1036
+ atomlines.append(line)
1037
+ else:
1038
+ break # Don't need to keep looking through the file, since we've seen all the desired information for all atoms. We left the block.
1039
+
1040
+
1041
+ disordered_atom_indices = []
1042
+ disordered_atom_types = []
1043
+ disordered_atom_occupancies = []
1044
+
1045
+ if occupancy_index: # This means that occupancy information is available
1046
+ for idx, at in enumerate(atomlines): # Go through the lines of the cif with atom specific information. Atom by atom.
1047
+ ln=at.strip().split()
1048
+
1049
+ current_atom_occupancy = ln[occupancy_index].split('(')[0] # Excluding parentheses in order to convert to float.
1050
+ current_atom_occupancy = float(current_atom_occupancy)
1051
+
1052
+ if current_atom_occupancy != 1: # Disordered atom
1053
+
1054
+ disordered_atom_indices.append(idx)
1055
+
1056
+ ln[type_index] = ln[type_index].strip("_")
1057
+ at_type = ln[type_index]
1058
+ disordered_atom_types.append(at_type)
1059
+
1060
+ disordered_atom_occupancies.append(current_atom_occupancy)
1061
+
1062
+ return disordered_atom_indices, disordered_atom_types, disordered_atom_occupancies
1063
+
1064
+ def remove_duplicate_atoms(allatomtypes, fcoords):
1065
+ """
1066
+ Removes any atoms that have the exact same coordinate as a lower index atom.
1067
+ This pops up after removing symmetry with Vesta. Symmetry removal helps the molSimplify code get connectivity right.
1068
+
1069
+ Parameters
1070
+ ----------
1071
+ allatomtypes : list of str
1072
+ The atom types of the cif file, indicated by periodic symbols like 'O' and 'Cu'. Length is the number of atoms.
1073
+ fcoords : numpy.ndarray
1074
+ The fractional positions of the atoms of the cif file. Shape is (number of atoms, 3).
1075
+
1076
+ Returns
1077
+ -------
1078
+ allatomtypes_trim : list of str
1079
+ The atom types of the cif file, indicated by periodic symbols like 'O' and 'Cu'. Length is the number of atoms.
1080
+ All duplicate atoms removed.
1081
+ fcoords_trim : numpy.ndarray
1082
+ The fractional positions of the atoms of the cif file. Shape is (number of atoms, 3).
1083
+ All duplicate atoms removed.
1084
+
1085
+ """
1086
+
1087
+ # Get the unique fractional coordinate 3-tuples.
1088
+ fcoords_trim, indices = np.unique(fcoords, axis=0, return_index=True)
1089
+ # Get the atom types of the unique fractional coordinates.
1090
+ allatomtypes_trim = [allatomtypes[_i] for _i in indices]
1091
+
1092
+ return allatomtypes_trim, fcoords_trim
1093
+
1094
+ def remove_undesired_atoms(undesired_indices, allatomtypes, fcoords):
1095
+ """
1096
+ Takes a list of indices, and removes those elements from allatomtypes and fcoords.
1097
+
1098
+ Parameters
1099
+ ----------
1100
+ undesired_indices : list
1101
+ The indices of the atoms to remove.
1102
+ allatomtypes : list of str
1103
+ The atom types of the cif file, indicated by periodic symbols like 'O' and 'Cu'. Length is the number of atoms.
1104
+ fcoords : numpy.ndarray
1105
+ The fractional positions of the atoms of the cif file. Shape is (number of atoms, 3).
1106
+
1107
+ Returns
1108
+ -------
1109
+ allatomtypes_trim : list of str
1110
+ The atom types of the cif file, indicated by periodic symbols like 'O' and 'Cu'. Length is the number of atoms.
1111
+ All undesired atoms removed.
1112
+ fcoords_trim : numpy.ndarray
1113
+ The fractional positions of the atoms of the cif file. Shape is (number of atoms, 3).
1114
+ All undesired atoms removed.
1115
+
1116
+ """
1117
+ number_of_atoms = len(allatomtypes)
1118
+ desired_indices = [_i for _i in list(range(number_of_atoms)) if (_i not in undesired_indices)] # The indices we want to keep.
1119
+ allatomtypes_trim = [value for (_i, value) in enumerate(allatomtypes) if (_i in desired_indices)]
1120
+ fcoords_trim = fcoords[desired_indices]
1121
+
1122
+ return allatomtypes_trim, fcoords_trim
1123
+
1124
+
1125
+ def overlap_removal(cif_path, new_cif_path):
1126
+ """
1127
+ Reads a cif file, removes overlapping atoms, and writes the cif to the provided path.
1128
+ For a new CIF, recommended to remove symmetry (either with Vesta or with get_primitive), then run overlap_removal, then run solvent_removal.
1129
+
1130
+ Parameters
1131
+ ----------
1132
+ cif_path : str
1133
+ The path of the cif file to be read.
1134
+ new_cif_path : str
1135
+ The path to which the modified cif file will be written.
1136
+
1137
+ Returns
1138
+ -------
1139
+ None
1140
+
1141
+ """
1142
+
1143
+ # Much of this code parallels that in the beginning of the MOF_descriptors.get_MOF_descriptors function
1144
+
1145
+ # Loading the cif and getting information about the crystal cell.
1146
+ cpar, allatomtypes, fcoords = readcif(cif_path)
1147
+ allatomtypes, fcoords = remove_duplicate_atoms(allatomtypes, fcoords)
1148
+ cell_v = mkcell(cpar)
1149
+ cart_coords = fractional2cart(fcoords, cell_v)
1150
+ # if len(cart_coords) > 2000: # Don't deal with large cifs because of computational resources required for their treatment.
1151
+ # raise Exception("Too large of a cif file")
1152
+
1153
+ # Assuming that the cif does not have graph information of the structure.
1154
+ distance_mat = compute_distance_matrix3(cell_v,cart_coords)
1155
+ adj_matrix, overlap_atoms = compute_adj_matrix(distance_mat, allatomtypes, handle_overlap=True)
1156
+
1157
+ # Dealing with the case of overlapping atoms.
1158
+ if len(overlap_atoms) != 0:
1159
+ print('Dealing with overlap')
1160
+ allatomtypes, fcoords = remove_undesired_atoms(overlap_atoms, allatomtypes, fcoords)
1161
+
1162
+ # Writing the cif files
1163
+ write_cif(new_cif_path,cpar,fcoords,allatomtypes)
1164
+
1165
+ def solvent_removal(cif_path, new_cif_path, wiggle_room=1):
1166
+ """
1167
+ Reads a cif file, removes floating solvent atoms, and writes the cif to the provided path.
1168
+ Assumes cif has P1 symmetry.
1169
+
1170
+ Parameters
1171
+ ----------
1172
+ cif_path : str
1173
+ The path of the cif file to be read.
1174
+ new_cif_path : str
1175
+ The path to which the modified cif file will be written.
1176
+ wiggle_room : float
1177
+ A multiplier that allows for more or less strict bond distance cutoffs.
1178
+ Useful for some trouble CIFs with long bonds.
1179
+
1180
+ Returns
1181
+ -------
1182
+ None
1183
+
1184
+ """
1185
+
1186
+ # Much of this code parallels that in the beginning of the MOF_descriptors.get_MOF_descriptors function
1187
+
1188
+ # Loading the cif and getting information about the crystal cell.
1189
+ cpar, allatomtypes, fcoords = readcif(cif_path)
1190
+ cell_v = mkcell(cpar)
1191
+ cart_coords = fractional2cart(fcoords, cell_v)
1192
+ # if len(cart_coords) > 2000: # Don't deal with large cifs because of computational resources required for their treatment.
1193
+ # raise Exception("Too large of a cif file")
1194
+
1195
+ # Assuming that the cif does not have graph information of the structure.
1196
+ distance_mat = compute_distance_matrix3(cell_v,cart_coords)
1197
+ try:
1198
+ adj_matrix, _ = compute_adj_matrix(distance_mat, allatomtypes, wiggle_room=wiggle_room, handle_overlap=False)
1199
+ except NotImplementedError:
1200
+ raise Exception("Failed due to atomic overlap")
1201
+
1202
+ # Getting the adjacency matrix (bond information).
1203
+ adj_matrix = sparse.csr_matrix(adj_matrix)
1204
+ molcif,_,_,_,_ = import_from_cif(cif_path, True) # molcif is a mol3D class of a single unit cell (or the cell of the cif file)
1205
+ molcif.graph = adj_matrix.todense()
1206
+
1207
+ # Finding the connected components
1208
+ n_components, labels_components = sparse.csgraph.connected_components(csgraph=adj_matrix, directed=False, return_labels=True)
1209
+ print(f'n_components: {n_components}')
1210
+ print(f'labels_components: {labels_components}')
1211
+ print(f'len is {len(labels_components)}')
1212
+ metal_list = set([at for at in molcif.findMetal(transition_metals_only=False)]) # the atom indices of the metals
1213
+ if not len(metal_list) > 0:
1214
+ raise Exception("No metal in the structure.")
1215
+
1216
+ solvent_indices = [] # This list will be filled in with the indices of solvent atoms.
1217
+
1218
+ for comp in range(n_components):
1219
+ inds_in_comp = [i for i in range(len(labels_components)) if labels_components[i]==comp]
1220
+ if not set(inds_in_comp) & metal_list: # In the context of sets, & is the intersection. If the intersection is null, the (&) expression is False; the `not` would then make it True.
1221
+ # If this if statement is entered, there is an entire connected component that has no metals in it. No connections to any metal. I.e. solvent.
1222
+ solvent_indices.extend(inds_in_comp)
1223
+
1224
+ # Removing the atoms corresponding to the solvent.
1225
+ allatomtypes, fcoords = remove_undesired_atoms(solvent_indices, allatomtypes, fcoords)
1226
+
1227
+ # print(f'The solvent indices are {solvent_indices}')
1228
+
1229
+ # Writing the cif files
1230
+ write_cif(new_cif_path,cpar,fcoords,allatomtypes)
1231
+
1232
+
1233
+
1234
+
1235
+
1236
+
1237
+ ##### Deprecated #####
1238
+
1239
+ # The functions compute_distance_matrix, compute_distance_matrix2, and compute_distance_matrix3 all do the same thing.
1240
+ # However, compute_distance_matrix3 is significantly faster than compute_distance_matrix2, which in turn is faster than compute_distance_matrix.
1241
+ # This is due to the use of for loops in compute_distance_matrix and compute_distance_matrix2, versus the vectorized (pre-compiled C code) numpy functions in compute_distance_matrix3.
1242
+
1243
+ def compute_distance_matrix(cell, cart_coords):
1244
+ """
1245
+ Computes the pairwise distances between all atom pairs in the crystal cell. First version of this function.
1246
+
1247
+ Parameters
1248
+ ----------
1249
+ cell : numpy.ndarray
1250
+ The three Cartesian vectors representing the edges of the crystal cell. Shape is (3,3).
1251
+ cart_coords : numpy.ndarray
1252
+ The Cartesian coordinates of the crystal atoms. Shape is (number of atoms, 3).
1253
+
1254
+ Returns
1255
+ -------
1256
+ distance_matrix : numpy.ndarray
1257
+ The distance of each atom to each other atom. Shape is (number of atoms, number of atoms).
1258
+
1259
+ """
1260
+ distance_matrix=np.zeros([len(cart_coords),len(cart_coords)]) # This array will be filled in.
1261
+ for i in range(len(cart_coords)): # Looping through all combinations of atoms.
1262
+ for j in range(i+1,len(cart_coords)):
1263
+ d=min_img_distance(cart_coords[i],cart_coords[j],cell)
1264
+ distance_matrix[i,j]=d # Filling in the distance numpy array.
1265
+ distance_matrix[j,i]=d
1266
+
1267
+ return distance_matrix
1268
+
1269
+ def min_img_distance(coords1, coords2, cell):
1270
+ """
1271
+ Calculates the distance between two atoms specified by coords1 and coords2.
1272
+ The minimum image distance is taken, meaning the shortest distance between the two atoms with consideration of the repeating periodic structure of the MOF.
1273
+
1274
+ Parameters
1275
+ ----------
1276
+ coords1 : numpy.ndarray
1277
+ The Cartesian coordinates of the first atom under consideration. Shape is (3,).
1278
+ coords2 : numpy.ndarray
1279
+ The Cartesian coordinates of the second atom under consideration. Shape is (3,).
1280
+ cell : numpy.ndarray
1281
+ The three Cartesian vectors representing the edges of the crystal cell. Shape is (3,3).
1282
+
1283
+ Returns
1284
+ -------
1285
+ np.linalg.norm(four) : numpy.float64
1286
+ The distance between the two atoms.
1287
+
1288
+ """
1289
+ invcell=np.linalg.inv(cell) # The inverse cell parameters
1290
+ one = np.dot(coords1,invcell) % 1 # Fractional coordinates. % is modulo.
1291
+ two = np.dot(coords2,invcell) % 1 # Fractional coordinates.
1292
+ three = np.around(one - two) # numpy array of three entries. Possible values of entries are -1, 0, and 1. Corresponds to the crystal cell shift that gets the two atoms the closest.
1293
+ four = np.dot(one - two - three, cell) # Converting back to Cartesian coordinates from fractional.
1294
+ return np.linalg.norm(four)
1295
+
1296
+ def compute_distance_matrix2(cell, cart_coords):
1297
+ """
1298
+ Computes the pairwise distances between all atom pairs in the crystal cell. Second version of this function.
1299
+
1300
+ Parameters
1301
+ ----------
1302
+ cell : numpy.ndarray
1303
+ The three Cartesian vectors representing the edges of the crystal cell. Shape is (3,3).
1304
+ cart_coords : numpy.ndarray
1305
+ The Cartesian coordinates of the crystal atoms. Shape is (number of atoms, 3).
1306
+
1307
+ Returns
1308
+ -------
1309
+ distance_matrix : numpy.ndarray
1310
+ The distance of each atom to each other atom. Shape is (number of atoms, number of atoms).
1311
+
1312
+ """
1313
+ distance_matrix=np.zeros([len(cart_coords),len(cart_coords)]) # This array will be filled in.
1314
+ for i in range(len(cart_coords)): # Looping through all combinations of atoms.
1315
+ for j in range(i+1,len(cart_coords)):
1316
+ d=min_img_distance2(cart_coords[i],cart_coords[j],cell)
1317
+ distance_matrix[i,j]=d # Filling in the distance numpy array.
1318
+ distance_matrix[j,i]=d
1319
+
1320
+ return distance_matrix
1321
+
1322
+ def min_img_distance2(coords1, coords2, cell):
1323
+ """
1324
+ Calculates the distance between two atoms specified by coords1 and coords2.
1325
+ The minimum image distance is taken, meaning the shortest distance between the two atoms with consideration of the repeating periodic structure of the MOF.
1326
+
1327
+ Parameters
1328
+ ----------
1329
+ coords1 : numpy.ndarray
1330
+ The Cartesian coordinates of the first atom under consideration. Shape is (3,).
1331
+ coords2 : numpy.ndarray
1332
+ The Cartesian coordinates of the second atom under consideration. Shape is (3,).
1333
+ cell : numpy.ndarray
1334
+ The three Cartesian vectors representing the edges of the crystal cell. Shape is (3,3).
1335
+
1336
+ Returns
1337
+ -------
1338
+ np.amin(dists) : numpy.float64
1339
+ The distance between the two atoms.
1340
+
1341
+ """
1342
+ invcell=np.linalg.inv(cell) # The inverse cell parameters
1343
+ supercells = np.array(list(itertools.product((-1, 0, 1), repeat=3))) # 27 possible crystal cell shifts.
1344
+ fcoords = np.dot(coords2,invcell) + supercells # Many different versions of coords2, shifted different linear combinations of the crystal cell vectors.
1345
+ coords = np.array([np.dot(j,cell) for j in fcoords]) # Converting to Cartesian coordinates.
1346
+ dists = distance.cdist([coords1], coords) # Euclidean distance
1347
+ return np.amin(dists) # Take the minimum, corresponding to the distance between the two atoms at their closest, when considering the periodic structure of a MOF.