molSimplify 1.7.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docs/source/conf.py +224 -0
- molSimplify/Classes/__init__.py +6 -0
- molSimplify/Classes/atom3D.py +235 -0
- molSimplify/Classes/dft_obs.py +130 -0
- molSimplify/Classes/globalvars.py +827 -0
- molSimplify/Classes/helpers.py +161 -0
- molSimplify/Classes/ligand.py +2330 -0
- molSimplify/Classes/mGUI.py +2493 -0
- molSimplify/Classes/mWidgets.py +438 -0
- molSimplify/Classes/miniGUI.py +41 -0
- molSimplify/Classes/mol2D.py +260 -0
- molSimplify/Classes/mol3D.py +5846 -0
- molSimplify/Classes/monomer3D.py +253 -0
- molSimplify/Classes/partialcharges.py +226 -0
- molSimplify/Classes/protein3D.py +1178 -0
- molSimplify/Classes/rundiag.py +151 -0
- molSimplify/Data/ML.dat +212 -0
- molSimplify/Data/MLS_FSR_for_inter.dat +23 -0
- molSimplify/Data/MLS_FSR_for_inter2.dat +23 -0
- molSimplify/Data/MLS_angle_for_click.dat +8 -0
- molSimplify/Data/MLS_angle_for_inter.dat +23 -0
- molSimplify/Data/MLS_angle_for_inter2.dat +48 -0
- molSimplify/Data/MLS_angle_for_intra.dat +10 -0
- molSimplify/Data/MLS_angle_for_intra2.dat +6 -0
- molSimplify/Data/MLS_angle_for_oa.dat +18 -0
- molSimplify/Data/ML_FSR_for_inter.dat +112 -0
- molSimplify/Data/ML_FSR_for_inter2.dat +110 -0
- molSimplify/Data/ML_bond_for_cat.dat +8 -0
- molSimplify/Data/ML_bond_for_click.dat +8 -0
- molSimplify/Data/ML_bond_for_inter.dat +48 -0
- molSimplify/Data/ML_bond_for_inter2.dat +48 -0
- molSimplify/Data/ML_bond_for_intra.dat +10 -0
- molSimplify/Data/ML_bond_for_intra2.dat +6 -0
- molSimplify/Data/ML_bond_for_oa.dat +18 -0
- molSimplify/Data/bp1.dat +21 -0
- molSimplify/Data/li.dat +3 -0
- molSimplify/Data/no.dat +2 -0
- molSimplify/Data/oct.dat +7 -0
- molSimplify/Data/pbp.dat +8 -0
- molSimplify/Data/spy.dat +6 -0
- molSimplify/Data/sqap.dat +9 -0
- molSimplify/Data/sqp.dat +5 -0
- molSimplify/Data/tbp.dat +6 -0
- molSimplify/Data/tdhd.dat +9 -0
- molSimplify/Data/thd.dat +5 -0
- molSimplify/Data/tpl.dat +4 -0
- molSimplify/Data/tpr.dat +7 -0
- molSimplify/Informatics/HFXsensitivity/__init__.py +0 -0
- molSimplify/Informatics/HFXsensitivity/measure_HFX_sensitivity_oxo_hat_reb_rel.py +443 -0
- molSimplify/Informatics/HFXsensitivity/measure_HFX_stable.py +346 -0
- molSimplify/Informatics/MOF/Linker_rotation.py +179 -0
- molSimplify/Informatics/MOF/MOF_descriptors.py +1299 -0
- molSimplify/Informatics/MOF/MOF_descriptors_alternate_functional.py +589 -0
- molSimplify/Informatics/MOF/MOF_functionalizer.py +1648 -0
- molSimplify/Informatics/MOF/PBC_functions.py +1347 -0
- molSimplify/Informatics/MOF/__init__.py +0 -0
- molSimplify/Informatics/MOF/atomic.py +267 -0
- molSimplify/Informatics/MOF/cluster_extraction.py +388 -0
- molSimplify/Informatics/MOF/fragment_MOFs_for_pormake.py +895 -0
- molSimplify/Informatics/MOF/monofunctionalized_BDC/index_information.py +10 -0
- molSimplify/Informatics/Mol2Parser.py +46 -0
- molSimplify/Informatics/RACassemble.py +408 -0
- molSimplify/Informatics/__init__.py +0 -0
- molSimplify/Informatics/active_learning/__init__.py +0 -0
- molSimplify/Informatics/active_learning/expected_improvement.py +269 -0
- molSimplify/Informatics/autocorrelation.py +1930 -0
- molSimplify/Informatics/clean_autocorrelation.py +778 -0
- molSimplify/Informatics/coulomb_analyze.py +67 -0
- molSimplify/Informatics/decoration_manager.py +193 -0
- molSimplify/Informatics/geo_analyze.py +88 -0
- molSimplify/Informatics/geometrics.py +56 -0
- molSimplify/Informatics/graph_analyze.py +163 -0
- molSimplify/Informatics/graph_racs.py +288 -0
- molSimplify/Informatics/jupyter_vis.py +172 -0
- molSimplify/Informatics/lacRACAssemble.py +2192 -0
- molSimplify/Informatics/lacRACAssemble_bisdithiolenes.py +236 -0
- molSimplify/Informatics/misc_descriptors.py +198 -0
- molSimplify/Informatics/organic_fingerprints.py +61 -0
- molSimplify/Informatics/partialcharges.py +345 -0
- molSimplify/Informatics/protein/activesite.py +53 -0
- molSimplify/Informatics/protein/pymol_add_hs.py +33 -0
- molSimplify/Informatics/rac155_geo.py +48 -0
- molSimplify/Ligands/(1_methylbenzimidazol_2_yl)pyridine.xyz +45 -0
- molSimplify/Ligands/1-4-dimethyl-1-2-3-triazole.xyz +15 -0
- molSimplify/Ligands/12crown4.mol +62 -0
- molSimplify/Ligands/Antipyrine.mol +58 -0
- molSimplify/Ligands/BPAbipy.mol +106 -0
- molSimplify/Ligands/Hpyrrole.mol +26 -0
- molSimplify/Ligands/N-quinolinylbutyramidate.xyz +31 -0
- molSimplify/Ligands/N-quinolinylmethylmethinylacetamidate.xyz +30 -0
- molSimplify/Ligands/NMe2_-1.xyz +11 -0
- molSimplify/Ligands/PCy3.mol +111 -0
- molSimplify/Ligands/PMe3.xyz +15 -0
- molSimplify/Ligands/PPh3.mol +76 -0
- molSimplify/Ligands/Propyphenazone.mol +77 -0
- molSimplify/Ligands/acac.mol +33 -0
- molSimplify/Ligands/acacen.mol +76 -0
- molSimplify/Ligands/acetate.smi +1 -0
- molSimplify/Ligands/acetate.xyz +9 -0
- molSimplify/Ligands/aceticacidbipyridine.mol +70 -0
- molSimplify/Ligands/acetonitrile.mol +17 -0
- molSimplify/Ligands/alanine.mol +30 -0
- molSimplify/Ligands/alphabetizer.py +21 -0
- molSimplify/Ligands/amine.mol +11 -0
- molSimplify/Ligands/ammonia.mol +12 -0
- molSimplify/Ligands/arginine.mol +58 -0
- molSimplify/Ligands/asparagine.mol +38 -0
- molSimplify/Ligands/aspartic_acid.mol +35 -0
- molSimplify/Ligands/azide.mol +11 -0
- molSimplify/Ligands/benzene.mol +28 -0
- molSimplify/Ligands/benzene_pi.mol +30 -0
- molSimplify/Ligands/benzenedithiol.mol +30 -0
- molSimplify/Ligands/benzenethiol.mol +30 -0
- molSimplify/Ligands/benzylisocy.mol +38 -0
- molSimplify/Ligands/bidiazine.mol +42 -0
- molSimplify/Ligands/bidiazole.mol +38 -0
- molSimplify/Ligands/bifuran.mol +38 -0
- molSimplify/Ligands/bihydrodiazine.mol +58 -0
- molSimplify/Ligands/bihydrodiazole.mol +46 -0
- molSimplify/Ligands/bihydrooxazine.mol +54 -0
- molSimplify/Ligands/bihydrooxazole.mol +42 -0
- molSimplify/Ligands/bihydrothiazine.mol +54 -0
- molSimplify/Ligands/bihydrothiazole.mol +42 -0
- molSimplify/Ligands/biimidazole.mol +38 -0
- molSimplify/Ligands/bioxazole.mol +34 -0
- molSimplify/Ligands/bipy.mol +46 -0
- molSimplify/Ligands/bipyrazine.xyz +20 -0
- molSimplify/Ligands/bipyrimidine.mol +42 -0
- molSimplify/Ligands/bipyrrole.mol +42 -0
- molSimplify/Ligands/bisnapthyridylpyridine.mol +111 -0
- molSimplify/Ligands/bithiazole.mol +34 -0
- molSimplify/Ligands/bromide.mol +7 -0
- molSimplify/Ligands/bromide.smi +1 -0
- molSimplify/Ligands/c2.mol +9 -0
- molSimplify/Ligands/caprolactone.mol +41 -0
- molSimplify/Ligands/carbonyl.mol +8 -0
- molSimplify/Ligands/carboxyl.mol +13 -0
- molSimplify/Ligands/cat.mol +30 -0
- molSimplify/Ligands/chloride.mol +7 -0
- molSimplify/Ligands/chloride.smi +1 -0
- molSimplify/Ligands/chloropyridine.mol +27 -0
- molSimplify/Ligands/co2.mol +10 -0
- molSimplify/Ligands/corrolazine.mol +72 -0
- molSimplify/Ligands/cs.mol +8 -0
- molSimplify/Ligands/cyanate.xyz +5 -0
- molSimplify/Ligands/cyanide.mol +9 -0
- molSimplify/Ligands/cyanoaceticporphyrin.mol +114 -0
- molSimplify/Ligands/cyanopyridine.mol +29 -0
- molSimplify/Ligands/cyclam.mol +81 -0
- molSimplify/Ligands/cyclen.mol +69 -0
- molSimplify/Ligands/cyclopentadienyl.mol +26 -0
- molSimplify/Ligands/cysteine.mol +32 -0
- molSimplify/Ligands/diaminomethyl.mol +19 -0
- molSimplify/Ligands/diazine.mol +25 -0
- molSimplify/Ligands/diazole.mol +23 -0
- molSimplify/Ligands/dicyanamide.mol +15 -0
- molSimplify/Ligands/dihydrofuran.mol +27 -0
- molSimplify/Ligands/dmap.xyz +35 -0
- molSimplify/Ligands/dmf.mol +28 -0
- molSimplify/Ligands/dmi.mol +41 -0
- molSimplify/Ligands/dmpe.mol +52 -0
- molSimplify/Ligands/dpmu.mol +47 -0
- molSimplify/Ligands/dppe.mol +112 -0
- molSimplify/Ligands/edta.mol +69 -0
- molSimplify/Ligands/en.mol +28 -0
- molSimplify/Ligands/ethanethiol.mol +21 -0
- molSimplify/Ligands/ethanolamine.mol +26 -0
- molSimplify/Ligands/ethbipy.mol +70 -0
- molSimplify/Ligands/ethyl.mol +19 -0
- molSimplify/Ligands/ethylamine.mol +24 -0
- molSimplify/Ligands/ethylene.mol +16 -0
- molSimplify/Ligands/ethylesteracac.mol +57 -0
- molSimplify/Ligands/fluoride.mol +7 -0
- molSimplify/Ligands/fluoride.smi +1 -0
- molSimplify/Ligands/formaldehyde.mol +12 -0
- molSimplify/Ligands/formamidate.xyz +8 -0
- molSimplify/Ligands/formate.xyz +6 -0
- molSimplify/Ligands/furan.mol +23 -0
- molSimplify/Ligands/glutamic_acid.mol +42 -0
- molSimplify/Ligands/glutamine.mol +44 -0
- molSimplify/Ligands/glycinate.mol +23 -0
- molSimplify/Ligands/glycine.mol +24 -0
- molSimplify/Ligands/h2s.mol +10 -0
- molSimplify/Ligands/helium.mol +6 -0
- molSimplify/Ligands/histidine.mol +45 -0
- molSimplify/Ligands/hmpa.mol +62 -0
- molSimplify/Ligands/hs-.mol +9 -0
- molSimplify/Ligands/hydride.mol +7 -0
- molSimplify/Ligands/hydrocarboxyacetylide.xyz +8 -0
- molSimplify/Ligands/hydrocyanide.mol +10 -0
- molSimplify/Ligands/hydrodiazine.mol +33 -0
- molSimplify/Ligands/hydrodiazole.mol +27 -0
- molSimplify/Ligands/hydrogensulfide.mol +10 -0
- molSimplify/Ligands/hydroisocyanide.mol +11 -0
- molSimplify/Ligands/hydrooxazine.mol +31 -0
- molSimplify/Ligands/hydrooxazole.mol +25 -0
- molSimplify/Ligands/hydrothiazine.mol +31 -0
- molSimplify/Ligands/hydrothiazole.mol +25 -0
- molSimplify/Ligands/hydroxyl.mol +9 -0
- molSimplify/Ligands/imidazole.mol +23 -0
- molSimplify/Ligands/imidazolidinone.mol +29 -0
- molSimplify/Ligands/imine.mol +13 -0
- molSimplify/Ligands/iminodiacetic.mol +33 -0
- molSimplify/Ligands/iodide.mol +7 -0
- molSimplify/Ligands/iodobenzene.xyz +14 -0
- molSimplify/Ligands/isoleucine.mol +48 -0
- molSimplify/Ligands/isothiocyanate.mol +11 -0
- molSimplify/Ligands/leucine.mol +48 -0
- molSimplify/Ligands/ligands.dict +257 -0
- molSimplify/Ligands/lysine.mol +54 -0
- molSimplify/Ligands/mebenzenedithiol.mol +36 -0
- molSimplify/Ligands/mebim_py.xyz +29 -0
- molSimplify/Ligands/mebim_pz.xyz +28 -0
- molSimplify/Ligands/mebipy.mol +58 -0
- molSimplify/Ligands/mecat.mol +36 -0
- molSimplify/Ligands/methanal.mol +11 -0
- molSimplify/Ligands/methanethiol.mol +15 -0
- molSimplify/Ligands/methanol.mol +16 -0
- molSimplify/Ligands/methionine.mol +44 -0
- molSimplify/Ligands/methyl.mol +13 -0
- molSimplify/Ligands/methylacetylide.xyz +8 -0
- molSimplify/Ligands/methylamine.mol +19 -0
- molSimplify/Ligands/methylazide.xyz +9 -0
- molSimplify/Ligands/methylisocy.mol +17 -0
- molSimplify/Ligands/methylpyridine.mol +33 -0
- molSimplify/Ligands/n2.mol +8 -0
- molSimplify/Ligands/n4py.xyz +51 -0
- molSimplify/Ligands/nch.mol +10 -0
- molSimplify/Ligands/nco-.mol +11 -0
- molSimplify/Ligands/nethanolamine.mol +26 -0
- molSimplify/Ligands/nitrate.mol +14 -0
- molSimplify/Ligands/nitrite.mol +11 -0
- molSimplify/Ligands/nitro.mol +11 -0
- molSimplify/Ligands/nitrobipy.mol +54 -0
- molSimplify/Ligands/nitroso.mol +8 -0
- molSimplify/Ligands/nme3.mol +30 -0
- molSimplify/Ligands/no-.mol +10 -0
- molSimplify/Ligands/no2-.mol +11 -0
- molSimplify/Ligands/noxygen.mol +8 -0
- molSimplify/Ligands/ns-.mol +10 -0
- molSimplify/Ligands/o-pyridylbenzene.xyz +23 -0
- molSimplify/Ligands/o-pyridylphenylanion.xyz +22 -0
- molSimplify/Ligands/o2-.mol +9 -0
- molSimplify/Ligands/o2.xyz +4 -0
- molSimplify/Ligands/och2.mol +12 -0
- molSimplify/Ligands/oethanolamine.mol +26 -0
- molSimplify/Ligands/ome2.mol +22 -0
- molSimplify/Ligands/ooh.xyz +5 -0
- molSimplify/Ligands/oxalate.mol +17 -0
- molSimplify/Ligands/oxalate.smi +1 -0
- molSimplify/Ligands/oxygen.mol +7 -0
- molSimplify/Ligands/pentacyanocyclopentadienide.mol +36 -0
- molSimplify/Ligands/ph2-.mol +11 -0
- molSimplify/Ligands/ph3.mol +12 -0
- molSimplify/Ligands/phen.mol +51 -0
- molSimplify/Ligands/phenacac.mol +63 -0
- molSimplify/Ligands/phenalalanine.mol +51 -0
- molSimplify/Ligands/phendione.mol +51 -0
- molSimplify/Ligands/phenphen.mol +75 -0
- molSimplify/Ligands/phenylbenzoxazole.mol +54 -0
- molSimplify/Ligands/phenylcyc.mol +99 -0
- molSimplify/Ligands/phenylenediamine.mol +37 -0
- molSimplify/Ligands/phenylisocy.mol +32 -0
- molSimplify/Ligands/phosacidbipy.mol +66 -0
- molSimplify/Ligands/phosphine.mol +13 -0
- molSimplify/Ligands/phosphorine.mol +27 -0
- molSimplify/Ligands/phosphorustrifluoride.mol +12 -0
- molSimplify/Ligands/phthalocyanine.mol +126 -0
- molSimplify/Ligands/pme3o.mol +32 -0
- molSimplify/Ligands/porphyrin.mol +82 -0
- molSimplify/Ligands/pph3o.mol +77 -0
- molSimplify/Ligands/proline.mol +39 -0
- molSimplify/Ligands/propdiol.mol +21 -0
- molSimplify/Ligands/propylene.mol +23 -0
- molSimplify/Ligands/pyridine.mol +27 -0
- molSimplify/Ligands/pyrimidone.mol +27 -0
- molSimplify/Ligands/pyrrole.mol +24 -0
- molSimplify/Ligands/quinoxalinedithiol.mol +39 -0
- molSimplify/Ligands/s2-.mol +9 -0
- molSimplify/Ligands/salen.mol +75 -0
- molSimplify/Ligands/salphen.mol +84 -0
- molSimplify/Ligands/serine.mol +32 -0
- molSimplify/Ligands/simple_ligands.dict +14 -0
- molSimplify/Ligands/sulfacidbipy.mol +63 -0
- molSimplify/Ligands/tbucat.mol +54 -0
- molSimplify/Ligands/tbuphisocy.mol +56 -0
- molSimplify/Ligands/tbutylcyclen.mol +166 -0
- molSimplify/Ligands/tbutylisocy.mol +35 -0
- molSimplify/Ligands/tbutylthiol.mol +33 -0
- molSimplify/Ligands/tcnoet.mol +43 -0
- molSimplify/Ligands/tcnoetOH.mol +45 -0
- molSimplify/Ligands/terpy.mol +65 -0
- molSimplify/Ligands/tetrahydrofuran.mol +31 -0
- molSimplify/Ligands/thiane.mol +37 -0
- molSimplify/Ligands/thiazole.mol +21 -0
- molSimplify/Ligands/thiocyanate.mol +11 -0
- molSimplify/Ligands/thiol.mol +9 -0
- molSimplify/Ligands/thiophene.mol +23 -0
- molSimplify/Ligands/thiopyridine.mol +29 -0
- molSimplify/Ligands/threonine.mol +38 -0
- molSimplify/Ligands/tpp.mol +165 -0
- molSimplify/Ligands/tricyanomethyl.mol +19 -0
- molSimplify/Ligands/trifluoromethyl.mol +13 -0
- molSimplify/Ligands/tryptophan.mol +60 -0
- molSimplify/Ligands/tyrosine.mol +53 -0
- molSimplify/Ligands/uthiol.mol +11 -0
- molSimplify/Ligands/uthiolme2.mol +23 -0
- molSimplify/Ligands/valine.mol +42 -0
- molSimplify/Ligands/water.mol +10 -0
- molSimplify/Ligands/x.mol +6 -0
- molSimplify/Scripts/__init__.py +0 -0
- molSimplify/Scripts/addtodb.py +308 -0
- molSimplify/Scripts/cellbuilder.py +1592 -0
- molSimplify/Scripts/cellbuilder_tools.py +701 -0
- molSimplify/Scripts/chains.py +342 -0
- molSimplify/Scripts/convert_2to3.py +23 -0
- molSimplify/Scripts/dbinteract.py +631 -0
- molSimplify/Scripts/distgeom.py +617 -0
- molSimplify/Scripts/findcorrelations.py +287 -0
- molSimplify/Scripts/generator.py +267 -0
- molSimplify/Scripts/geometry.py +1224 -0
- molSimplify/Scripts/grabguivars.py +845 -0
- molSimplify/Scripts/in_b3lyp_usetc.py +141 -0
- molSimplify/Scripts/inparse.py +1673 -0
- molSimplify/Scripts/io.py +1149 -0
- molSimplify/Scripts/isomers.py +415 -0
- molSimplify/Scripts/jobgen.py +247 -0
- molSimplify/Scripts/krr_prep.py +1262 -0
- molSimplify/Scripts/molSimplify_io.py +18 -0
- molSimplify/Scripts/molden2psi4wfn.py +166 -0
- molSimplify/Scripts/namegen.py +32 -0
- molSimplify/Scripts/nn_prep.py +561 -0
- molSimplify/Scripts/oct_check_mols.py +782 -0
- molSimplify/Scripts/periodic_QE.py +97 -0
- molSimplify/Scripts/postmold.py +304 -0
- molSimplify/Scripts/postmwfn.py +709 -0
- molSimplify/Scripts/postparse.py +488 -0
- molSimplify/Scripts/postproc.py +139 -0
- molSimplify/Scripts/qcgen.py +1450 -0
- molSimplify/Scripts/rmsd.py +489 -0
- molSimplify/Scripts/rungen.py +670 -0
- molSimplify/Scripts/structgen.py +3040 -0
- molSimplify/Scripts/tf_nn_prep.py +894 -0
- molSimplify/Scripts/tsgen.py +295 -0
- molSimplify/Scripts/uq_calibration.py +69 -0
- molSimplify/__init__.py +0 -0
- molSimplify/__main__.py +197 -0
- molSimplify/icons/chemdb.png +0 -0
- molSimplify/icons/hjklogo.png +0 -0
- molSimplify/icons/icon.png +0 -0
- molSimplify/icons/logo.png +0 -0
- molSimplify/icons/logo_old.png +0 -0
- molSimplify/icons/petachem.png +0 -0
- molSimplify/icons/petachem2.png +0 -0
- molSimplify/icons/petachem_full.png +0 -0
- molSimplify/icons/pythonlogo.png +0 -0
- molSimplify/icons/sge copy.png +0 -0
- molSimplify/icons/sge.png +0 -0
- molSimplify/icons/slurm.png +0 -0
- molSimplify/icons/wft1.png +0 -0
- molSimplify/icons/wft2.png +0 -0
- molSimplify/icons/wft3.png +0 -0
- molSimplify/ml/__init__.py +0 -0
- molSimplify/ml/kernels.py +36 -0
- molSimplify/ml/layers.py +29 -0
- molSimplify/molscontrol/__init__.py +14 -0
- molSimplify/molscontrol/_version.py +521 -0
- molSimplify/molscontrol/clf_tools.py +144 -0
- molSimplify/molscontrol/data/README.md +21 -0
- molSimplify/molscontrol/data/look_and_say.dat +15 -0
- molSimplify/molscontrol/dynamic_classifier.py +514 -0
- molSimplify/molscontrol/io_tools.py +363 -0
- molSimplify/molscontrol/molscontrol.py +49 -0
- molSimplify/molscontrol/terachem/jobscript_control.sh +31 -0
- molSimplify/molscontrol/terachem/terachem_input +22 -0
- molSimplify/python_krr/X_train_TS.csv +535 -0
- molSimplify/python_krr/__init__.py +0 -0
- molSimplify/python_krr/hat2_X_mean_std.csv +3 -0
- molSimplify/python_krr/hat2_feature_names.csv +1 -0
- molSimplify/python_krr/hat2_y_mean_std.csv +2 -0
- molSimplify/python_krr/hat_X_mean_std.csv +6 -0
- molSimplify/python_krr/hat_feature_names.csv +1 -0
- molSimplify/python_krr/hat_krr_X_train.csv +5205 -0
- molSimplify/python_krr/hat_krr_dual_coef.csv +1 -0
- molSimplify/python_krr/hat_y_mean_std.csv +2 -0
- molSimplify/python_krr/sklearn_models.py +34 -0
- molSimplify/python_krr/y_train_TS.csv +535 -0
- molSimplify/python_nn/ANN.py +198 -0
- molSimplify/python_nn/__init__.py +0 -0
- molSimplify/python_nn/clf_analysis_tool.py +125 -0
- molSimplify/python_nn/dictionary_toolbox.py +49 -0
- molSimplify/python_nn/ensemble_test.py +309 -0
- molSimplify/python_nn/hs_center.csv +26 -0
- molSimplify/python_nn/hs_scale.csv +26 -0
- molSimplify/python_nn/ls_center.csv +26 -0
- molSimplify/python_nn/ls_scale.csv +26 -0
- molSimplify/python_nn/ms_hs_b1.csv +50 -0
- molSimplify/python_nn/ms_hs_b2.csv +50 -0
- molSimplify/python_nn/ms_hs_b3.csv +1 -0
- molSimplify/python_nn/ms_hs_w1.csv +50 -0
- molSimplify/python_nn/ms_hs_w2.csv +50 -0
- molSimplify/python_nn/ms_hs_w3.csv +1 -0
- molSimplify/python_nn/ms_ls_b1.csv +50 -0
- molSimplify/python_nn/ms_ls_b2.csv +50 -0
- molSimplify/python_nn/ms_ls_b3.csv +1 -0
- molSimplify/python_nn/ms_ls_w1.csv +50 -0
- molSimplify/python_nn/ms_ls_w2.csv +50 -0
- molSimplify/python_nn/ms_ls_w3.csv +1 -0
- molSimplify/python_nn/ms_slope_b1.csv +50 -0
- molSimplify/python_nn/ms_slope_b2.csv +50 -0
- molSimplify/python_nn/ms_slope_b3.csv +1 -0
- molSimplify/python_nn/ms_slope_w1.csv +50 -0
- molSimplify/python_nn/ms_slope_w2.csv +50 -0
- molSimplify/python_nn/ms_slope_w3.csv +1 -0
- molSimplify/python_nn/ms_split_b1.csv +50 -0
- molSimplify/python_nn/ms_split_b2.csv +50 -0
- molSimplify/python_nn/ms_split_b3.csv +1 -0
- molSimplify/python_nn/ms_split_w1.csv +50 -0
- molSimplify/python_nn/ms_split_w2.csv +50 -0
- molSimplify/python_nn/ms_split_w3.csv +1 -0
- molSimplify/python_nn/slope_center.csv +25 -0
- molSimplify/python_nn/slope_scale.csv +25 -0
- molSimplify/python_nn/split_center.csv +26 -0
- molSimplify/python_nn/split_scale.csv +26 -0
- molSimplify/python_nn/tf_ANN.py +762 -0
- molSimplify/python_nn/train_data.csv +1211 -0
- molSimplify/tf_nn/__init__.py +0 -0
- molSimplify/tf_nn/geo_static_clf/geo_static_clf_model.h5 +0 -0
- molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_name.csv +1591 -0
- molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_x.csv +2790 -0
- molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_y.csv +2790 -0
- molSimplify/tf_nn/geo_static_clf/geo_static_clf_vars.csv +154 -0
- molSimplify/tf_nn/geos/hs_ii_bl_x.csv +1577 -0
- molSimplify/tf_nn/geos/hs_ii_bl_y.csv +1577 -0
- molSimplify/tf_nn/geos/hs_ii_model.h5 +0 -0
- molSimplify/tf_nn/geos/hs_ii_model.json +1 -0
- molSimplify/tf_nn/geos/hs_ii_vars.csv +154 -0
- molSimplify/tf_nn/geos/hs_iii_bl_x.csv +1659 -0
- molSimplify/tf_nn/geos/hs_iii_bl_y.csv +1659 -0
- molSimplify/tf_nn/geos/hs_iii_model.h5 +0 -0
- molSimplify/tf_nn/geos/hs_iii_model.json +1 -0
- molSimplify/tf_nn/geos/hs_iii_vars.csv +154 -0
- molSimplify/tf_nn/geos/ls_ii_bl_x.csv +1374 -0
- molSimplify/tf_nn/geos/ls_ii_bl_y.csv +1374 -0
- molSimplify/tf_nn/geos/ls_ii_model.h5 +0 -0
- molSimplify/tf_nn/geos/ls_ii_model.json +1 -0
- molSimplify/tf_nn/geos/ls_ii_vars.csv +154 -0
- molSimplify/tf_nn/geos/ls_iii_bl_x.csv +1364 -0
- molSimplify/tf_nn/geos/ls_iii_bl_y.csv +1364 -0
- molSimplify/tf_nn/geos/ls_iii_model.h5 +0 -0
- molSimplify/tf_nn/geos/ls_iii_model.json +1 -0
- molSimplify/tf_nn/geos/ls_iii_vars.csv +154 -0
- molSimplify/tf_nn/homolumo/gap_model.h5 +0 -0
- molSimplify/tf_nn/homolumo/gap_model.json +1 -0
- molSimplify/tf_nn/homolumo/gap_test_names.csv +175 -0
- molSimplify/tf_nn/homolumo/gap_test_x.csv +176 -0
- molSimplify/tf_nn/homolumo/gap_test_y.csv +176 -0
- molSimplify/tf_nn/homolumo/gap_train_names.csv +699 -0
- molSimplify/tf_nn/homolumo/gap_train_x.csv +700 -0
- molSimplify/tf_nn/homolumo/gap_train_y.csv +700 -0
- molSimplify/tf_nn/homolumo/gap_vars.csv +153 -0
- molSimplify/tf_nn/homolumo/homo_model.h5 +0 -0
- molSimplify/tf_nn/homolumo/homo_model.json +126 -0
- molSimplify/tf_nn/homolumo/homo_test_names.csv +175 -0
- molSimplify/tf_nn/homolumo/homo_test_x.csv +176 -0
- molSimplify/tf_nn/homolumo/homo_test_y.csv +176 -0
- molSimplify/tf_nn/homolumo/homo_train_names.csv +699 -0
- molSimplify/tf_nn/homolumo/homo_train_x.csv +700 -0
- molSimplify/tf_nn/homolumo/homo_train_y.csv +700 -0
- molSimplify/tf_nn/homolumo/homo_vars.csv +153 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_info.json +7 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_model.h5 +0 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_model.json +1 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_test_names.csv +143 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_test_x.csv +144 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_test_y.csv +144 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_train_names.csv +513 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_train_x.csv +514 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_train_y.csv +514 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_val_names.csv +143 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_val_x.csv +58 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_val_y.csv +58 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_vars.csv +155 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_info.json +7 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_model.h5 +0 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_model.json +1 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_test_names.csv +143 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_test_x.csv +144 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_test_y.csv +144 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_train_names.csv +513 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_train_x.csv +514 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_train_y.csv +514 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_val_names.csv +143 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_val_x.csv +58 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_val_y.csv +58 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_vars.csv +154 -0
- molSimplify/tf_nn/oxocatalysis/hat_model.h5 +0 -0
- molSimplify/tf_nn/oxocatalysis/hat_model.json +1 -0
- molSimplify/tf_nn/oxocatalysis/hat_test_names.csv +419 -0
- molSimplify/tf_nn/oxocatalysis/hat_test_x.csv +420 -0
- molSimplify/tf_nn/oxocatalysis/hat_test_y.csv +420 -0
- molSimplify/tf_nn/oxocatalysis/hat_train_names.csv +1507 -0
- molSimplify/tf_nn/oxocatalysis/hat_train_x.csv +1508 -0
- molSimplify/tf_nn/oxocatalysis/hat_train_y.csv +1508 -0
- molSimplify/tf_nn/oxocatalysis/hat_val_x.csv +169 -0
- molSimplify/tf_nn/oxocatalysis/hat_val_y.csv +169 -0
- molSimplify/tf_nn/oxocatalysis/hat_vars.csv +162 -0
- molSimplify/tf_nn/oxocatalysis/oxo_model.h5 +0 -0
- molSimplify/tf_nn/oxocatalysis/oxo_model.json +1 -0
- molSimplify/tf_nn/oxocatalysis/oxo_test_names.csv +527 -0
- molSimplify/tf_nn/oxocatalysis/oxo_test_x.csv +528 -0
- molSimplify/tf_nn/oxocatalysis/oxo_test_y.csv +528 -0
- molSimplify/tf_nn/oxocatalysis/oxo_train_names.csv +1897 -0
- molSimplify/tf_nn/oxocatalysis/oxo_train_x.csv +1898 -0
- molSimplify/tf_nn/oxocatalysis/oxo_train_y.csv +1898 -0
- molSimplify/tf_nn/oxocatalysis/oxo_val_x.csv +212 -0
- molSimplify/tf_nn/oxocatalysis/oxo_val_y.csv +212 -0
- molSimplify/tf_nn/oxocatalysis/oxo_vars.csv +162 -0
- molSimplify/tf_nn/rescaling_data/gap_mean_x.csv +153 -0
- molSimplify/tf_nn/rescaling_data/gap_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/gap_var_x.csv +153 -0
- molSimplify/tf_nn/rescaling_data/gap_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/geo_static_clf_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/geo_static_clf_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/geo_static_clf_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/geo_static_clf_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/hat_mean_x.csv +162 -0
- molSimplify/tf_nn/rescaling_data/hat_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/hat_var_x.csv +162 -0
- molSimplify/tf_nn/rescaling_data/hat_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/homo_empty_mean_x.csv +155 -0
- molSimplify/tf_nn/rescaling_data/homo_empty_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/homo_empty_var_x.csv +155 -0
- molSimplify/tf_nn/rescaling_data/homo_empty_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/homo_mean_x.csv +153 -0
- molSimplify/tf_nn/rescaling_data/homo_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/homo_var_x.csv +153 -0
- molSimplify/tf_nn/rescaling_data/homo_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/hs_ii_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/hs_ii_mean_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/hs_ii_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/hs_ii_var_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/hs_iii_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/hs_iii_mean_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/hs_iii_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/hs_iii_var_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/ls_ii_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/ls_ii_mean_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/ls_ii_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/ls_ii_var_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/ls_iii_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/ls_iii_mean_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/ls_iii_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/ls_iii_var_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/oxo20_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/oxo20_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/oxo20_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/oxo20_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/oxo_mean_x.csv +162 -0
- molSimplify/tf_nn/rescaling_data/oxo_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/oxo_var_x.csv +162 -0
- molSimplify/tf_nn/rescaling_data/oxo_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/sc_static_clf_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/sc_static_clf_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/sc_static_clf_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/sc_static_clf_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/split_mean_x.csv +155 -0
- molSimplify/tf_nn/rescaling_data/split_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/split_var_x.csv +155 -0
- molSimplify/tf_nn/rescaling_data/split_var_y.csv +1 -0
- molSimplify/tf_nn/sc_static_clf/sc_static_clf_model.h5 +0 -0
- molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_name.csv +1591 -0
- molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_x.csv +1592 -0
- molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_y.csv +1592 -0
- molSimplify/tf_nn/sc_static_clf/sc_static_clf_vars.csv +154 -0
- molSimplify/tf_nn/split/split_model.h5 +0 -0
- molSimplify/tf_nn/split/split_model.json +1 -0
- molSimplify/tf_nn/split/split_vars.csv +155 -0
- molSimplify/tf_nn/split/split_x.csv +1902 -0
- molSimplify/tf_nn/split/split_y.csv +1902 -0
- molSimplify/tf_nn/split/train_names.csv +1901 -0
- molSimplify/utils/__init__.py +0 -0
- molSimplify/utils/decorators.py +16 -0
- molSimplify/utils/metaclasses.py +12 -0
- molSimplify/utils/tensorflow.py +23 -0
- molSimplify/utils/timer.py +16 -0
- molSimplify-1.7.4.dist-info/LICENSE +674 -0
- molSimplify-1.7.4.dist-info/METADATA +821 -0
- molSimplify-1.7.4.dist-info/RECORD +651 -0
- molSimplify-1.7.4.dist-info/WHEEL +5 -0
- molSimplify-1.7.4.dist-info/entry_points.txt +3 -0
- molSimplify-1.7.4.dist-info/top_level.txt +4 -0
- tests/generateTests.py +122 -0
- tests/helperFuncs.py +658 -0
- tests/informatics/test_MOF_descriptors.py +128 -0
- tests/informatics/test_active_learning.py +113 -0
- tests/informatics/test_coulomb_analyze.py +24 -0
- tests/informatics/test_graph_racs.py +193 -0
- tests/ml/test_kernels.py +20 -0
- tests/ml/test_layers.py +47 -0
- tests/runtest.py +10 -0
- tests/test_Mol2D.py +128 -0
- tests/test_basic_imports.py +62 -0
- tests/test_bidentate.py +25 -0
- tests/test_cli.py +20 -0
- tests/test_distgeom.py +106 -0
- tests/test_example_1.py +29 -0
- tests/test_example_3.py +31 -0
- tests/test_example_5.py +43 -0
- tests/test_example_7.py +28 -0
- tests/test_example_8.py +15 -0
- tests/test_example_tbp.py +15 -0
- tests/test_ff_xtb.py +111 -0
- tests/test_geocheck_oct.py +26 -0
- tests/test_geocheck_one_empty.py +15 -0
- tests/test_geometry.py +44 -0
- tests/test_inparse.py +76 -0
- tests/test_io.py +84 -0
- tests/test_jobgen.py +84 -0
- tests/test_joption_pythonic.py +27 -0
- tests/test_ligand_assign.py +58 -0
- tests/test_ligand_assign_consistent.py +60 -0
- tests/test_ligand_class.py +26 -0
- tests/test_ligand_from_mol_file.py +35 -0
- tests/test_ligands.py +86 -0
- tests/test_mol3D.py +337 -0
- tests/test_molcas_caspt2.py +15 -0
- tests/test_molcas_casscf.py +15 -0
- tests/test_old_ANNs.py +68 -0
- tests/test_orca_ccsdt.py +15 -0
- tests/test_orca_dft.py +15 -0
- tests/test_qcgen.py +50 -0
- tests/test_racs.py +124 -0
- tests/test_rmsd.py +68 -0
- tests/test_structgen_functions.py +198 -0
- tests/test_tetrahedral.py +29 -0
- tests/test_tutorial_10_part_one.py +16 -0
- tests/test_tutorial_10_part_two.py +15 -0
- tests/test_tutorial_2.py +11 -0
- tests/test_tutorial_3.py +15 -0
- tests/test_tutorial_4.py +57 -0
- tests/test_tutorial_6.py +10 -0
- tests/test_tutorial_8.py +29 -0
- tests/test_tutorial_9_part_one.py +15 -0
- tests/test_tutorial_9_part_two.py +15 -0
- tests/test_tutorial_qm9_part_one.py +6 -0
- tests/testresources/refs/racs/generate_references.py +85 -0
- workflows/NandyJACSAu2022/bridge_functionalizer.py +253 -0
- workflows/NandyJACSAu2022/frag_functionalizer.py +242 -0
- workflows/NandyJACSAu2022/fragment_classes.py +586 -0
- workflows/NandyJACSAu2022/macrocycle_synthesis.py +179 -0
|
@@ -0,0 +1,1648 @@
|
|
|
1
|
+
from molSimplify.Classes.mol3D import mol3D
|
|
2
|
+
from molSimplify.Classes.atom3D import atom3D
|
|
3
|
+
from molSimplify.Scripts.cellbuilder_tools import import_from_cif
|
|
4
|
+
from molSimplify.Informatics.MOF.MOF_descriptors import get_primitive
|
|
5
|
+
from molSimplify.Informatics.MOF.monofunctionalized_BDC.index_information import INDEX_INFO
|
|
6
|
+
from molSimplify.Scripts.geometry import checkplanar, PointRotateAxis, distance, rotate_around_axis
|
|
7
|
+
from molSimplify.Informatics.MOF.PBC_functions import (
|
|
8
|
+
compute_adj_matrix,
|
|
9
|
+
compute_distance_matrix3,
|
|
10
|
+
compute_image_flag,
|
|
11
|
+
findPaths,
|
|
12
|
+
frac_coord,
|
|
13
|
+
fractional2cart,
|
|
14
|
+
get_closed_subgraph,
|
|
15
|
+
readcif,
|
|
16
|
+
XYZ_connected,
|
|
17
|
+
write_cif,
|
|
18
|
+
)
|
|
19
|
+
from importlib_resources import files as resource_files
|
|
20
|
+
import numpy as np
|
|
21
|
+
import scipy
|
|
22
|
+
import networkx as nx
|
|
23
|
+
import spglib
|
|
24
|
+
import os
|
|
25
|
+
|
|
26
|
+
### Beginning of functions ###
|
|
27
|
+
|
|
28
|
+
##### THE INPUT REQUIRES A P1 CELL ######
|
|
29
|
+
# If not P1, will functionalize wrong. #
|
|
30
|
+
#########################################
|
|
31
|
+
|
|
32
|
+
# This script can only functionalize C-H bonds.
|
|
33
|
+
|
|
34
|
+
def functionalize_MOF(cif_file, path2write, functional_group='F', functionalization_limit=1, path_between_functionalizations=3, additional_atom_offset=0):
|
|
35
|
+
"""
|
|
36
|
+
Functionalizes the provided MOF and writes the functionalized version to a cif file.
|
|
37
|
+
Loops through the atoms of a MOF and functionalizes at suitable carbon atoms.
|
|
38
|
+
|
|
39
|
+
Parameters
|
|
40
|
+
----------
|
|
41
|
+
cif_file : str
|
|
42
|
+
The path to the cif file to be functionalized.
|
|
43
|
+
path2write : str
|
|
44
|
+
The folder path where the cif of the functionalized MOF will be written.
|
|
45
|
+
functional_group : str
|
|
46
|
+
The functional group to use for MOF functionalization.
|
|
47
|
+
functionalization_limit : int
|
|
48
|
+
The number of functionalizations per linker.
|
|
49
|
+
path_between_functionalizations : int
|
|
50
|
+
How many bonds away one functionalized atom should be from another, if functionalized_limit is greater than one.
|
|
51
|
+
additional_atom_offset : float
|
|
52
|
+
Extent to which to rotate the placement of depth 2 functional group atoms. Give in degrees.
|
|
53
|
+
Useful for preventing atomic overlap / unintended bonds.
|
|
54
|
+
|
|
55
|
+
Returns
|
|
56
|
+
-------
|
|
57
|
+
functionalized_atoms : list of int
|
|
58
|
+
Which indices of the original cif file were functionalized.
|
|
59
|
+
|
|
60
|
+
"""
|
|
61
|
+
dict_approach = check_support(functional_group)
|
|
62
|
+
if not dict_approach:
|
|
63
|
+
raise Exception('That functional group is not supported through the dictionary approach.')
|
|
64
|
+
|
|
65
|
+
base_mof_name = os.path.basename(cif_file)
|
|
66
|
+
if base_mof_name.endswith('.cif'):
|
|
67
|
+
base_mof_name = base_mof_name[:-4]
|
|
68
|
+
######################################################
|
|
69
|
+
# Takes the CIF file as input of the bare structure. #
|
|
70
|
+
# Functionalization limit is how many times a single #
|
|
71
|
+
# linker is allowed to be functionalized. Default #
|
|
72
|
+
# functionalization is fluoride. #
|
|
73
|
+
######################################################
|
|
74
|
+
|
|
75
|
+
# Read the cif file and make the cell for fractional coordinates
|
|
76
|
+
cpar, allatomtypes, fcoords = readcif(cif_file)
|
|
77
|
+
molcif, cell_vector, alpha, beta, gamma = import_from_cif(cif_file, True)
|
|
78
|
+
cell_v = np.array(cell_vector)
|
|
79
|
+
original_fcoords = fcoords.copy()
|
|
80
|
+
cart_coords = fractional2cart(fcoords, cell_v)
|
|
81
|
+
distance_mat = compute_distance_matrix3(cell_v, cart_coords)
|
|
82
|
+
adj_matrix, _ = compute_adj_matrix(distance_mat, allatomtypes)
|
|
83
|
+
molcif.graph = adj_matrix.todense()
|
|
84
|
+
|
|
85
|
+
###### At this point, we have most things we need to functionalize.
|
|
86
|
+
# Thus the first step is to break down into linkers. This uses what we developed for MOF featurization
|
|
87
|
+
linker_list, linker_subgraphlist = get_linkers(molcif, adj_matrix, allatomtypes)
|
|
88
|
+
|
|
89
|
+
###### We need to then figure out which atoms to functionalize.
|
|
90
|
+
checkedlist = set() # Keeps track of the atoms that have already been checked for functionalization.
|
|
91
|
+
# Make a copy of the atom type list to loop over later.
|
|
92
|
+
original_allatomtypes = allatomtypes.copy() # Storing all the chemical symbols that there were originally.
|
|
93
|
+
delete_list = [] # Collect all of the H that need to be deleted later.
|
|
94
|
+
extra_atom_coords = []
|
|
95
|
+
extra_atom_types = []
|
|
96
|
+
functionalized_atoms = []
|
|
97
|
+
|
|
98
|
+
if functional_group != 'H': # We don't do anything for -H functionalization.
|
|
99
|
+
|
|
100
|
+
### Iterate over atoms until we find one suitable for functionalization.
|
|
101
|
+
for i, atom in enumerate(original_allatomtypes):
|
|
102
|
+
print(f'i is {i}')
|
|
103
|
+
if i in checkedlist:
|
|
104
|
+
continue # Move on to the next atom.
|
|
105
|
+
if atom != 'C': # Assumes that functionalization is performed on a C atom.
|
|
106
|
+
checkedlist.add(i)
|
|
107
|
+
continue
|
|
108
|
+
|
|
109
|
+
# Atoms that are connected to atom i.
|
|
110
|
+
connected_atom_list, connected_atom_types = connected_atoms_from_adjmat(adj_matrix, i, original_allatomtypes)
|
|
111
|
+
|
|
112
|
+
if ('H' not in connected_atom_types) or (connected_atom_types.count('H')>1 or len(connected_atom_types) != 3): ### must functionalize where an H was. Needs sp2 C.
|
|
113
|
+
# Note: if a carbon has more than one hydrogen bonded to it, it is not considered for functionalization.
|
|
114
|
+
# So, the carbons treated by this code will carbons in a benzene-style ring for the most part, I assume.
|
|
115
|
+
# Since apply_functionalization assumes two neighbors to the carbon excluding hydrogens.
|
|
116
|
+
# TODO expand in the future?
|
|
117
|
+
# Note: can only replace a hydrogen in the functionalization, at the moment. Can't replace a methyl, hydroxyl, etc.
|
|
118
|
+
checkedlist.add(i)
|
|
119
|
+
continue
|
|
120
|
+
else: # Found a suitable location for functionalization.
|
|
121
|
+
functionalized = False
|
|
122
|
+
functionalization_counter = functionalization_limit
|
|
123
|
+
|
|
124
|
+
# Identifying the linker that has atom i.
|
|
125
|
+
# Also adds all the atoms in the identified linker to checkedlist. So, won't check this linker again.
|
|
126
|
+
linker_to_analyze, linker_to_analyze_index, checkedlist = linker_identification(linker_list, i, checkedlist)
|
|
127
|
+
|
|
128
|
+
linker_atom_types, linker_graph, linker_cart_coords = analyze_linker(cart_coords,
|
|
129
|
+
linker_to_analyze,
|
|
130
|
+
allatomtypes,
|
|
131
|
+
linker_subgraphlist,
|
|
132
|
+
linker_to_analyze_index,
|
|
133
|
+
cell_v,
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
"""""""""
|
|
137
|
+
Linker functionalization of the current linker.
|
|
138
|
+
"""""""""
|
|
139
|
+
|
|
140
|
+
# The following code will functionalize this linker functionalization_limit times, or as close to this many times as possible.
|
|
141
|
+
for k, connected_atom in enumerate(connected_atom_types): # Look through the atoms bonded to atom i.
|
|
142
|
+
if connected_atom == 'H':
|
|
143
|
+
|
|
144
|
+
"""""""""
|
|
145
|
+
The first linker functionalization.
|
|
146
|
+
"""""""""
|
|
147
|
+
molcif, functionalization_counter, functionalized, delete_list, extra_atom_coords, extra_atom_types, functionalized_atoms = first_functionalization(molcif,
|
|
148
|
+
allatomtypes,
|
|
149
|
+
i,
|
|
150
|
+
connected_atom_list,
|
|
151
|
+
k,
|
|
152
|
+
functional_group,
|
|
153
|
+
linker_cart_coords,
|
|
154
|
+
linker_to_analyze,
|
|
155
|
+
linker_atom_types,
|
|
156
|
+
linker_graph,
|
|
157
|
+
functionalization_counter,
|
|
158
|
+
delete_list,
|
|
159
|
+
extra_atom_coords,
|
|
160
|
+
extra_atom_types,
|
|
161
|
+
functionalized_atoms,
|
|
162
|
+
additional_atom_offset=additional_atom_offset
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
break # Don't search the rest of the connected atoms if replaced a hydrogen and functionalized already at the atom with index i.
|
|
166
|
+
|
|
167
|
+
"""""""""
|
|
168
|
+
Any additional linker functionalizations.
|
|
169
|
+
"""""""""
|
|
170
|
+
# If there is more than one functionalization, this is where that happens.
|
|
171
|
+
# Will check other atoms on the linker to potentially functionalize them.
|
|
172
|
+
while functionalization_counter > 0: # Still have some more functionalizations to make.
|
|
173
|
+
molcif, functionalization_counter, delete_list, extra_atom_coords, extra_atom_types, functionalized_atoms = additional_functionalization(i,
|
|
174
|
+
linker_to_analyze,
|
|
175
|
+
linker_subgraphlist,
|
|
176
|
+
linker_to_analyze_index,
|
|
177
|
+
path_between_functionalizations,
|
|
178
|
+
functionalized,
|
|
179
|
+
adj_matrix,
|
|
180
|
+
allatomtypes,
|
|
181
|
+
molcif,
|
|
182
|
+
functional_group,
|
|
183
|
+
linker_cart_coords,
|
|
184
|
+
linker_atom_types,
|
|
185
|
+
linker_graph,
|
|
186
|
+
functionalization_counter,
|
|
187
|
+
delete_list,
|
|
188
|
+
extra_atom_coords,
|
|
189
|
+
extra_atom_types,
|
|
190
|
+
functionalized_atoms,
|
|
191
|
+
additional_atom_offset=additional_atom_offset
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
"""""""""
|
|
195
|
+
Apply delete_list and extra_atom_types to make final_atom_types and new_coord_list.
|
|
196
|
+
"""""""""
|
|
197
|
+
# Deleting atoms (hydrogens that are replaced by functional groups)
|
|
198
|
+
new_coord_list, final_atom_types = atom_deletion(cart_coords, allatomtypes, delete_list)
|
|
199
|
+
|
|
200
|
+
# Adding atoms (the atoms in the functional groups)
|
|
201
|
+
allatomtypes, fcoords = atom_addition(extra_atom_types, final_atom_types, new_coord_list, extra_atom_coords, cell_v)
|
|
202
|
+
|
|
203
|
+
"""""""""
|
|
204
|
+
Write the cif.
|
|
205
|
+
"""""""""
|
|
206
|
+
cif_folder = f'{path2write}cif/'
|
|
207
|
+
mkdir_if_absent(cif_folder)
|
|
208
|
+
write_cif(f'{path2write}cif/functionalized_{base_mof_name}_{functional_group}_{functionalization_limit}.cif', cpar, fcoords, allatomtypes)
|
|
209
|
+
|
|
210
|
+
"""""""""
|
|
211
|
+
Check on how the functionalization affected the symmetry.
|
|
212
|
+
"""""""""
|
|
213
|
+
print('------- UNFUNCTIONALIZED CASE --------')
|
|
214
|
+
symmetry_check(original_allatomtypes, original_fcoords, cell_v)
|
|
215
|
+
|
|
216
|
+
# Analysis for the case where the cell is functionalized.
|
|
217
|
+
# Difference with the block above: allatomtypes and fcoords, instead of original_allatomtypes and original_fcoords
|
|
218
|
+
print('------- FUNCTIONALIZED CASE --------')
|
|
219
|
+
symmetry_check(allatomtypes, fcoords, cell_v)
|
|
220
|
+
|
|
221
|
+
return functionalized_atoms
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def first_functionalization(molcif,
|
|
225
|
+
allatomtypes,
|
|
226
|
+
i,
|
|
227
|
+
connected_atom_list,
|
|
228
|
+
k,
|
|
229
|
+
functional_group,
|
|
230
|
+
linker_cart_coords,
|
|
231
|
+
linker_to_functionalize,
|
|
232
|
+
linker_atom_types,
|
|
233
|
+
linker_graph,
|
|
234
|
+
functionalization_counter,
|
|
235
|
+
delete_list,
|
|
236
|
+
extra_atom_coords,
|
|
237
|
+
extra_atom_types,
|
|
238
|
+
functionalized_atoms,
|
|
239
|
+
additional_atom_offset=0
|
|
240
|
+
):
|
|
241
|
+
"""
|
|
242
|
+
Functionalizes a linker for the first time, at atom `i` with functional group `functional_group`.
|
|
243
|
+
|
|
244
|
+
Parameters
|
|
245
|
+
----------
|
|
246
|
+
molcif : molSimplify.Classes.mol3D.mol3D
|
|
247
|
+
The cell of the cif file to be functionalized.
|
|
248
|
+
allatomtypes : list of str
|
|
249
|
+
The atom types of the MOF, indicated by chemical symbols like 'O' and 'Cu'. Length is the number of atoms.
|
|
250
|
+
i : int
|
|
251
|
+
The global index of the atom of to be functionalized.
|
|
252
|
+
connected_atom_list : numpy.ndarray of numpy.int32
|
|
253
|
+
The indices of the atoms connected to the atom of interest i.
|
|
254
|
+
k : int
|
|
255
|
+
The index of the atom in connected_atom_types that is a hydrogen. Will be replaced with the functional group.
|
|
256
|
+
functional_group : str
|
|
257
|
+
The functional group to use for MOF functionalization.
|
|
258
|
+
linker_cart_coords : numpy.ndarray of numpy.float64
|
|
259
|
+
The Cartesian coordinates of the atoms in the linker. Shape is (number of atoms in linker, 3).
|
|
260
|
+
linker_to_functionalize : list of numpy.int32
|
|
261
|
+
A list of the global atom indices of the atoms in the identified linker.
|
|
262
|
+
The identified linker is the one that has atom i.
|
|
263
|
+
linker_atom_types : list of str
|
|
264
|
+
The chemical symbols of the atoms in the linker. Length is the number of atoms in the linker.
|
|
265
|
+
linker_graph : numpy.ndarray of numpy.float64
|
|
266
|
+
The adjacency matrix of the linker. Shape is (number of atoms in linker, number of atoms in linker).
|
|
267
|
+
functionalization_counter : int
|
|
268
|
+
The number of functionalizations left to be done on the linker.
|
|
269
|
+
delete_list : list of numpy.int32
|
|
270
|
+
The indices of atoms that are deleted because they are replaced by functional groups.
|
|
271
|
+
extra_atom_coords : list of numpy.ndarray of numpy.float64
|
|
272
|
+
The Cartesian coordinates of the atoms added during functionalization.
|
|
273
|
+
Each item of the list is a new functional group.
|
|
274
|
+
Shape of each numpy.ndarray is (number of atoms in functional group, 3).
|
|
275
|
+
extra_atom_types : list of list of str
|
|
276
|
+
The chemical symbols of the atoms added through functional groups. Each inner list is a functional group.
|
|
277
|
+
functionalized_atoms : list of int
|
|
278
|
+
The global indices of atoms that have been functionalized.
|
|
279
|
+
additional_atom_offset : float
|
|
280
|
+
Extent to which to rotate the placement of depth 2 functional group atoms. Give in degrees.
|
|
281
|
+
Useful for preventing atomic overlap / unintended bonds.
|
|
282
|
+
|
|
283
|
+
Returns
|
|
284
|
+
-------
|
|
285
|
+
molcif : molSimplify.Classes.mol3D.mol3D
|
|
286
|
+
The cell of the functionalized MOF.
|
|
287
|
+
functionalization_counter : int
|
|
288
|
+
The number of functionalizations left to be done on the linker. This variable is decreased by one when this function is run successfully.
|
|
289
|
+
functionalized : bool
|
|
290
|
+
Indicates whether the number of linker functionalizations requested by the user have been made. True is so, False otherwise.
|
|
291
|
+
delete_list : list of numpy.int32
|
|
292
|
+
The updated indices of atoms that are deleted because they are replaced by functional groups.
|
|
293
|
+
extra_atom_coords : list of numpy.ndarray of numpy.float64
|
|
294
|
+
The updated Cartesian coordinates of the atoms added during functionalization.
|
|
295
|
+
extra_atom_types : list of list of str
|
|
296
|
+
The updated chemical symbols of the atoms added through functional groups. Each inner list is a functional group.
|
|
297
|
+
functionalized_atoms : list of int
|
|
298
|
+
The updated global indices of atoms that have been functionalized.
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
"""
|
|
302
|
+
# Apply the functionalization to the MOF.
|
|
303
|
+
molcif, atom_types_to_add, additions_to_cart, functionalization_counter, functionalized = apply_functionalization(molcif,
|
|
304
|
+
allatomtypes, i, connected_atom_list[k], connected_atom_list, functional_group,
|
|
305
|
+
linker_cart_coords, linker_to_functionalize, linker_atom_types, linker_graph, functionalization_counter, additional_atom_offset=additional_atom_offset)
|
|
306
|
+
|
|
307
|
+
# Add the atom that's been functionalized to the deleted atom list so that it isn't kept in the final structure.
|
|
308
|
+
delete_list.append(connected_atom_list[k]) # E.g. if a hydrogen was replaced by a fluorine, delete the hydrogen.
|
|
309
|
+
|
|
310
|
+
# There are atoms added to the structure now. Add those.
|
|
311
|
+
extra_atom_coords.append(additions_to_cart)
|
|
312
|
+
extra_atom_types.append(atom_types_to_add)
|
|
313
|
+
|
|
314
|
+
# Keep track of what atoms have been functionalized.
|
|
315
|
+
functionalized_atoms.append(i)
|
|
316
|
+
|
|
317
|
+
return molcif, functionalization_counter, functionalized, delete_list, extra_atom_coords, extra_atom_types, functionalized_atoms
|
|
318
|
+
|
|
319
|
+
def additional_functionalization(i,
|
|
320
|
+
linker_to_functionalize,
|
|
321
|
+
linker_subgraphlist,
|
|
322
|
+
linker_to_functionalize_index,
|
|
323
|
+
path_between_functionalizations,
|
|
324
|
+
functionalized,
|
|
325
|
+
adj_matrix,
|
|
326
|
+
allatomtypes,
|
|
327
|
+
molcif,
|
|
328
|
+
functional_group,
|
|
329
|
+
linker_cart_coords,
|
|
330
|
+
linker_atom_types,
|
|
331
|
+
linker_graph,
|
|
332
|
+
functionalization_counter,
|
|
333
|
+
delete_list,
|
|
334
|
+
extra_atom_coords,
|
|
335
|
+
extra_atom_types,
|
|
336
|
+
functionalized_atoms,
|
|
337
|
+
additional_atom_offset=0):
|
|
338
|
+
"""
|
|
339
|
+
Executes additional functionalization on the specified linker,
|
|
340
|
+
at positions (path_between_functionalizations) bonds away from the atom with index i.
|
|
341
|
+
|
|
342
|
+
Parameters
|
|
343
|
+
----------
|
|
344
|
+
i : int
|
|
345
|
+
The global index of the atom from which atoms that are path_between_functionalizations bonds away will be considered.
|
|
346
|
+
linker_to_functionalize : list of numpy.int32
|
|
347
|
+
A list of the global atom indices of the atoms in the identified linker.
|
|
348
|
+
The identified linker is the one that has atom i.
|
|
349
|
+
linker_subgraphlist : list of scipy.sparse.csr.csr_matrix
|
|
350
|
+
The atom connections in the linker subgraph. Length is # of linkers.
|
|
351
|
+
linker_to_functionalize_index : int
|
|
352
|
+
The number identifier of the linker that contains the atom of interest.
|
|
353
|
+
path_between_functionalizations : int
|
|
354
|
+
How many bonds away one functionalized atom should be from another, if functionalized_limit is greater than one.
|
|
355
|
+
functionalized : bool
|
|
356
|
+
Indicates whether the number of linker functionalizations requested by the user have been made. True is so, False otherwise.
|
|
357
|
+
adj_matrix : scipy.sparse.csr.csr_matrix
|
|
358
|
+
1 represents a bond, 0 represents no bond. Shape is (number of atoms, number of atoms).
|
|
359
|
+
allatomtypes : list of str
|
|
360
|
+
The atom types of the MOF, indicated by chemical symbols like 'O' and 'Cu'. Length is the number of atoms.
|
|
361
|
+
molcif : molSimplify.Classes.mol3D.mol3D
|
|
362
|
+
The cell of the cif file to be functionalized.
|
|
363
|
+
functional_group : str
|
|
364
|
+
The functional group to use for MOF functionalization.
|
|
365
|
+
linker_cart_coords : numpy.ndarray of numpy.float64
|
|
366
|
+
The Cartesian coordinates of the atoms in the linker. Shape is (number of atoms in linker, 3).
|
|
367
|
+
linker_atom_types : list of str
|
|
368
|
+
The chemical symbols of the atoms in the linker. Length is the number of atoms in the linker.
|
|
369
|
+
linker_graph : numpy.ndarray of numpy.float64
|
|
370
|
+
The adjacency matrix of the linker. Shape is (number of atoms in linker, number of atoms in linker).
|
|
371
|
+
functionalization_counter : int
|
|
372
|
+
The number of functionalizations left to be done on the linker.
|
|
373
|
+
delete_list : list of numpy.int32
|
|
374
|
+
The indices of atoms that are deleted because they are replaced by functional groups.
|
|
375
|
+
extra_atom_coords : list of numpy.ndarray of numpy.float64
|
|
376
|
+
The Cartesian coordinates of the atoms added during functionalization.
|
|
377
|
+
Each item of the list is a new functional group.
|
|
378
|
+
Shape of each numpy.ndarray is (number of atoms in functional group, 3).
|
|
379
|
+
extra_atom_types : list of list of str
|
|
380
|
+
The chemical symbols of the atoms added through functional groups. Each inner list is a functional group.
|
|
381
|
+
functionalized_atoms : list of int
|
|
382
|
+
The global indices of atoms that have been functionalized.
|
|
383
|
+
additional_atom_offset : float
|
|
384
|
+
Extent to which to rotate the placement of depth 2 functional group atoms. Give in degrees.
|
|
385
|
+
Useful for preventing atomic overlap / unintended bonds.
|
|
386
|
+
|
|
387
|
+
Returns
|
|
388
|
+
-------
|
|
389
|
+
molcif : molSimplify.Classes.mol3D.mol3D
|
|
390
|
+
The cell of the functionalized MOF.
|
|
391
|
+
functionalization_counter : int
|
|
392
|
+
The number of functionalizations left to be done on the linker. This variable is decreased by one when this function is run successfully.
|
|
393
|
+
delete_list : list of numpy.int32
|
|
394
|
+
The updated indices of atoms that are deleted because they are replaced by functional groups.
|
|
395
|
+
extra_atom_coords : list of numpy.ndarray of numpy.float64
|
|
396
|
+
The updated Cartesian coordinates of the atoms added during functionalization.
|
|
397
|
+
extra_atom_types : list of list of str
|
|
398
|
+
The chemical symbols of the atoms added through functional groups. Each inner list is a functional group.
|
|
399
|
+
functionalized_atoms : list of int
|
|
400
|
+
The updated global indices of atoms that have been functionalized.
|
|
401
|
+
|
|
402
|
+
"""
|
|
403
|
+
original_functionalization_counter = functionalization_counter
|
|
404
|
+
|
|
405
|
+
anchor_idx = linker_to_functionalize.index(i) # As a reminder, linker_to_functionalize is a list of numpy.int32, the numpy.int32s being indices for the atoms in the linker
|
|
406
|
+
G = make_networkx_graph(linker_subgraphlist[linker_to_functionalize_index]) # Getting the graph for the linker of interest.
|
|
407
|
+
# Use network X to find functionalization paths that are N atoms away from the original spot
|
|
408
|
+
n_path_lengths_away = findPaths(G,anchor_idx,path_between_functionalizations)
|
|
409
|
+
already_functionalized = False
|
|
410
|
+
for path in n_path_lengths_away: # Looking at the possible paths between the anchor_idx and atoms that are N (path_between_functionalizations) atom away.
|
|
411
|
+
if already_functionalized: # An atom was already functionalized.
|
|
412
|
+
break
|
|
413
|
+
|
|
414
|
+
potential_functionalization = path[-1] # Gets the last point on the graph at distance "path_between_functionalizations" away.
|
|
415
|
+
functionalization_index = linker_to_functionalize[potential_functionalization] # Gets the global index of the atom to functionalize.
|
|
416
|
+
|
|
417
|
+
# Get the neighbors of the atom that we are considering for functionalization.
|
|
418
|
+
secondary_connected_atom_list, secondary_connected_atom_types = connected_atoms_from_adjmat(adj_matrix, functionalization_index, allatomtypes)
|
|
419
|
+
|
|
420
|
+
if 'H' not in secondary_connected_atom_types:
|
|
421
|
+
continue # Must functionalize where an H was. If not, skip.
|
|
422
|
+
elif functionalization_index in functionalized_atoms:
|
|
423
|
+
continue # This atom has already been functionalized.
|
|
424
|
+
else:
|
|
425
|
+
for l, secondary_connected_atom in enumerate(secondary_connected_atom_types):
|
|
426
|
+
if (secondary_connected_atom == 'H') and (not functionalized):
|
|
427
|
+
molcif, atom_types_to_add, additions_to_cart, functionalization_counter, functionalized = apply_functionalization(molcif,
|
|
428
|
+
allatomtypes, functionalization_index, secondary_connected_atom_list[l], secondary_connected_atom_list,
|
|
429
|
+
functional_group, linker_cart_coords, linker_to_functionalize, linker_atom_types, linker_graph,
|
|
430
|
+
functionalization_counter, additional_atom_offset=additional_atom_offset)
|
|
431
|
+
delete_list.append(secondary_connected_atom_list[l])
|
|
432
|
+
extra_atom_coords.append(additions_to_cart)
|
|
433
|
+
extra_atom_types.append(atom_types_to_add)
|
|
434
|
+
functionalized_atoms.append(functionalization_index)
|
|
435
|
+
already_functionalized = True # Want to break out of all the for loops
|
|
436
|
+
break # break the for l, secondary... loop since a functionalization was made.
|
|
437
|
+
|
|
438
|
+
if functionalization_counter == original_functionalization_counter: # Equivalently, if already_functionalized == False
|
|
439
|
+
# This means there are no more locations on the linker that can be functionalized.
|
|
440
|
+
functionalization_counter = 0 # No more functionalizations to be done.
|
|
441
|
+
|
|
442
|
+
return molcif, functionalization_counter, delete_list, extra_atom_coords, extra_atom_types, functionalized_atoms
|
|
443
|
+
|
|
444
|
+
def apply_functionalization(molcif, allatomtypes, position_to_functionalize, atom_to_replace, position_to_functionalize_neighbors,
|
|
445
|
+
functional_group, linker_cart_coords, linker_to_analyze, linker_atom_types, linker_graph, functionalization_counter, additional_atom_offset=0):
|
|
446
|
+
#######################################################################################################
|
|
447
|
+
# Note: position_to_functionalize is distinct from atom_to_replace. When functionalizing a C-H bond, #
|
|
448
|
+
# position_to_functionalize is the C, and atom_to_replace is the H. Currently, only select #
|
|
449
|
+
# functionalizations can be handled: NH2, CH3, NO2, CF3, CN, OH, SH. #
|
|
450
|
+
# mol3D object deletes the H atom and adds all other groups. Position_to_functionalize_neighbors #
|
|
451
|
+
# allows determination of the plane of functionalization (important for symmetry preservation). #
|
|
452
|
+
#######################################################################################################
|
|
453
|
+
"""
|
|
454
|
+
Functionalizes at the specified position. Supports some multi-atom functional groups.
|
|
455
|
+
Functionalization will take place at the index position_to_functionalize.
|
|
456
|
+
The atom with that index is in the linker described by linker_cart_coords, linker_to_analyze, linker_atom_types, and linker_graph.
|
|
457
|
+
|
|
458
|
+
Parameters
|
|
459
|
+
----------
|
|
460
|
+
molcif : molSimplify.Classes.mol3D.mol3D
|
|
461
|
+
The cell of the cif file to be functionalized.
|
|
462
|
+
allatomtypes : list of str
|
|
463
|
+
The atom types of the cif file, indicated by chemical symbols like 'O' and 'Cu'. Length is the number of atoms.
|
|
464
|
+
position_to_functionalize : int
|
|
465
|
+
The global index of the atom to functionalize.
|
|
466
|
+
For the current state of the code, this is a carbon.
|
|
467
|
+
atom_to_replace : numpy.int32
|
|
468
|
+
The index of the atom to replace with the specified functional group.
|
|
469
|
+
For the current state of the code, this is a hydrogen.
|
|
470
|
+
position_to_functionalize_neighbors : numpy.ndarray of numpy.int32
|
|
471
|
+
The indices of atoms bonded to the atom with index position_to_functionalize.
|
|
472
|
+
functional_group : str
|
|
473
|
+
The functional group to use for MOF functionalization.
|
|
474
|
+
linker_cart_coords : numpy.ndarray of numpy.float64
|
|
475
|
+
The Cartesian coordinates of the atoms in the linker. Shape is (number of atoms in linker, 3).
|
|
476
|
+
linker_to_analyze : list of numpy.int32
|
|
477
|
+
The indices of the atoms in the linker.
|
|
478
|
+
linker_atom_types : list of str
|
|
479
|
+
The chemical symbols of the atoms in the linker.
|
|
480
|
+
linker_graph : numpy.ndarray of numpy.float64
|
|
481
|
+
The adjacency matrix of the linker. 1 indicates a bond. 0 indicates the absence of a bond.
|
|
482
|
+
functionalization_counter : int
|
|
483
|
+
The number of functionalizations left to be done on the linker.
|
|
484
|
+
additional_atom_offset : float
|
|
485
|
+
Extent to which to rotate the placement of depth 2 functional group atoms. Give in degrees.
|
|
486
|
+
Useful for preventing atomic overlap / unintended bonds.
|
|
487
|
+
|
|
488
|
+
Returns
|
|
489
|
+
-------
|
|
490
|
+
molcif : molSimplify.Classes.mol3D.mol3D
|
|
491
|
+
The cell of the functionalized MOF.
|
|
492
|
+
atom_types_to_add : list of str
|
|
493
|
+
The chemical symbols of the atoms added. These are the atoms in the functional group.
|
|
494
|
+
additions_to_cart : numpy.ndarray of numpy.float64
|
|
495
|
+
The Cartesian coordinates of the atoms added during functionalization. Shape is (number of atoms in functional group, 3).
|
|
496
|
+
functionalization_counter : int
|
|
497
|
+
The number of functionalizations left to be done on the linker. This variable is decreased by one when this function is run successfully.
|
|
498
|
+
functionalized : bool
|
|
499
|
+
Indicates whether the number of linker functionalizations requested by the user have been made. True is so, False otherwise.
|
|
500
|
+
|
|
501
|
+
"""
|
|
502
|
+
connection_length_dict, connection_atom_dict, bond_length_dict, bond_angle_dict, bond_rotation_dict = geo_dict_loader()
|
|
503
|
+
|
|
504
|
+
##### Construct the plane to work on.
|
|
505
|
+
neighbors_not_to_replace = list(set(position_to_functionalize_neighbors)-set([atom_to_replace])) # The not-to-be-replaced atoms bonded to the atom to functionalized.
|
|
506
|
+
if len(neighbors_not_to_replace) > 2: # The atom to functionalize has too many atoms connected to it. More than 3.
|
|
507
|
+
raise ValueError('currently can only work with sp2 carbons. This is > sp2.')
|
|
508
|
+
functionalization_position_on_linker = linker_to_analyze.index(position_to_functionalize)
|
|
509
|
+
temp_idx = linker_to_analyze.index(neighbors_not_to_replace[0]) # The first of the not-to-be-replaced atoms bonded to the atom to functionalize.
|
|
510
|
+
connected_atom_list, connected_atom_types = connected_atoms_from_adjmat(scipy.sparse.csr_matrix(linker_graph), temp_idx, linker_atom_types) # Atoms connected to that first of the not-to-be-replaced atoms.
|
|
511
|
+
|
|
512
|
+
"""""""""
|
|
513
|
+
Vector preparation.
|
|
514
|
+
"""""""""
|
|
515
|
+
|
|
516
|
+
initial_placement, directional_unit_vector, norm_cp = vector_preparation(connected_atom_types,
|
|
517
|
+
neighbors_not_to_replace,
|
|
518
|
+
linker_to_analyze,
|
|
519
|
+
linker_cart_coords,
|
|
520
|
+
functionalization_position_on_linker,
|
|
521
|
+
connection_length_dict,
|
|
522
|
+
functional_group)
|
|
523
|
+
|
|
524
|
+
"""""""""
|
|
525
|
+
Functionalization.
|
|
526
|
+
"""""""""
|
|
527
|
+
|
|
528
|
+
#### Add the atom3D for the connecting atom. This is the full functional group for monoatomic functionalization.
|
|
529
|
+
molcif, atom_types_to_add, additions_to_cart = connecting_atom_functionalization(connection_atom_dict,
|
|
530
|
+
functional_group,
|
|
531
|
+
initial_placement,
|
|
532
|
+
molcif)
|
|
533
|
+
|
|
534
|
+
"""""""""
|
|
535
|
+
Multiatomic functionalization.
|
|
536
|
+
"""""""""
|
|
537
|
+
|
|
538
|
+
# Necessary for functional groups like CF3. Adds the atoms (e.g. the fluorines) that are not the connecting atom (e.g. C for CF3).
|
|
539
|
+
if len(connection_atom_dict[functional_group])>1:
|
|
540
|
+
molcif, atom_types_to_add, additions_to_cart = multiatomic_functionalization(connection_atom_dict,
|
|
541
|
+
bond_length_dict,
|
|
542
|
+
bond_angle_dict,
|
|
543
|
+
bond_rotation_dict,
|
|
544
|
+
functional_group,
|
|
545
|
+
directional_unit_vector,
|
|
546
|
+
norm_cp,
|
|
547
|
+
initial_placement,
|
|
548
|
+
additions_to_cart,
|
|
549
|
+
atom_types_to_add,
|
|
550
|
+
molcif,
|
|
551
|
+
additional_atom_offset=additional_atom_offset
|
|
552
|
+
)
|
|
553
|
+
|
|
554
|
+
functionalization_counter -= 1
|
|
555
|
+
if functionalization_counter == 0:
|
|
556
|
+
# If the number of linker functionalizations requested by the user have been made, set this variable to true.
|
|
557
|
+
functionalized = True
|
|
558
|
+
else:
|
|
559
|
+
functionalized = False
|
|
560
|
+
|
|
561
|
+
return molcif, atom_types_to_add, additions_to_cart, functionalization_counter, functionalized
|
|
562
|
+
|
|
563
|
+
def analyze_linker(cart_coords,
|
|
564
|
+
linker_to_analyze,
|
|
565
|
+
allatomtypes,
|
|
566
|
+
linker_subgraphlist,
|
|
567
|
+
linker_to_analyze_index,
|
|
568
|
+
cell_v):
|
|
569
|
+
"""
|
|
570
|
+
Returns information on the specified linker.
|
|
571
|
+
|
|
572
|
+
Parameters
|
|
573
|
+
----------
|
|
574
|
+
cart_coords : numpy.ndarray of numpy.float64
|
|
575
|
+
The Cartesian coordinates of the crystal atoms. Shape is (number of atoms, 3).
|
|
576
|
+
linker_to_analyze : list of numpy.int32
|
|
577
|
+
A list of the global atom indices of the atoms in the identified linker.
|
|
578
|
+
The identified linker is the one that has atom i.
|
|
579
|
+
allatomtypes : list of str
|
|
580
|
+
The atom types of the MOF, indicated by chemical symbols like 'O' and 'Cu'. Length is the number of atoms.
|
|
581
|
+
linker_subgraphlist : list of scipy.sparse.csr.csr_matrix
|
|
582
|
+
The atom connections in the linker subgraph. Length is # of linkers.
|
|
583
|
+
linker_to_analyze_index : int
|
|
584
|
+
The number identifier of the linker that contains the atom of interest.
|
|
585
|
+
cell_v : numpy.ndarray of numpy.float64
|
|
586
|
+
Each row corresponds to one of the cell vectors. Shape is (3, 3).
|
|
587
|
+
|
|
588
|
+
Returns
|
|
589
|
+
-------
|
|
590
|
+
linker_atom_types : list of str
|
|
591
|
+
The chemical symbols of the atoms in the linker. Length is the number of atoms in the linker.
|
|
592
|
+
linker_graph : numpy.ndarray of numpy.float64
|
|
593
|
+
The adjacency matrix of the linker. Shape is (number of atoms in linker, number of atoms in linker).
|
|
594
|
+
linker_cart_coords : numpy.ndarray of numpy.float64
|
|
595
|
+
The Cartesian coordinates of the atoms in the linker. Shape is (number of atoms in linker, 3).
|
|
596
|
+
|
|
597
|
+
"""
|
|
598
|
+
# Get the cartesian coordinates of the linker from the linker atoms.
|
|
599
|
+
linker_coords = [cart_coords[val,:] for val in linker_to_analyze] ### contains the atom numbers in the linker.
|
|
600
|
+
# Get the linker atom types of the linker from the linker atoms.
|
|
601
|
+
linker_atom_types = [allatomtypes[val] for val in linker_to_analyze]
|
|
602
|
+
# Get the linker graph that's useful for determining what's connected to what.
|
|
603
|
+
linker_graph = linker_subgraphlist[linker_to_analyze_index].todense()
|
|
604
|
+
linker_graph = np.asarray(linker_graph)
|
|
605
|
+
# Get the connected atoms that will shift positions in fractional coordinates.
|
|
606
|
+
linker_f_coords = XYZ_connected(cell_v, linker_coords, linker_graph)
|
|
607
|
+
# Use the cell vector to translate those coordinates back to Cartesian.
|
|
608
|
+
linker_cart_coords = fractional2cart(linker_f_coords,cell_v)
|
|
609
|
+
|
|
610
|
+
return linker_atom_types, linker_graph, linker_cart_coords
|
|
611
|
+
|
|
612
|
+
def symmetry_check(allatomtypes, fcoords, cell_v, precision=1):
|
|
613
|
+
"""
|
|
614
|
+
Checks the spacegroup and the space group number of the provided MOF information.
|
|
615
|
+
Before and after finding the Niggli cell (maximally reduced cell).
|
|
616
|
+
|
|
617
|
+
Parameters
|
|
618
|
+
----------
|
|
619
|
+
allatomtypes : list of str
|
|
620
|
+
The atom types of the MOF, indicated by chemical symbols like 'O' and 'Cu'. Length is the number of atoms.
|
|
621
|
+
fcoords : numpy.ndarray of numpy.float64
|
|
622
|
+
The fractional positions of the crystal atoms. Shape is (number of atoms, 3).
|
|
623
|
+
cell_v : numpy.ndarray of numpy.float64
|
|
624
|
+
Each row corresponds to one of the cell vectors. Shape is (3, 3).
|
|
625
|
+
precision : float
|
|
626
|
+
Cartesian distance tolerance and angle tolerance.
|
|
627
|
+
https://spglib.github.io/spglib/variable.html#variables-symprec
|
|
628
|
+
|
|
629
|
+
Returns
|
|
630
|
+
-------
|
|
631
|
+
None
|
|
632
|
+
|
|
633
|
+
"""
|
|
634
|
+
numbers = [] # Will keep track of which atom in unique_types is in which position in allatomtypes.
|
|
635
|
+
unique_types=list(set(allatomtypes))
|
|
636
|
+
for label in allatomtypes:
|
|
637
|
+
numbers.append(int(unique_types.index(label)+1))
|
|
638
|
+
full_cell_for_spg = (cell_v, fcoords, numbers)
|
|
639
|
+
spcg = spglib.get_spacegroup(full_cell_for_spg, symprec=precision)
|
|
640
|
+
dataset = spglib.get_symmetry_dataset(full_cell_for_spg)
|
|
641
|
+
space_group_number = int(dataset['number'])
|
|
642
|
+
print('spacegroup before:', spcg)
|
|
643
|
+
print('space group number before:', space_group_number)
|
|
644
|
+
lattice_new, scaled_positions_new, numbers_new = spglib.standardize_cell(full_cell_for_spg, to_primitive=False,
|
|
645
|
+
no_idealize=False, symprec=precision, angle_tolerance=precision)
|
|
646
|
+
niggli_lattice = spglib.niggli_reduce(lattice_new, eps=1e-5) # Niggli reduction
|
|
647
|
+
spcg = spglib.get_spacegroup((niggli_lattice, scaled_positions_new, numbers_new), symprec=precision)
|
|
648
|
+
dataset = spglib.get_symmetry_dataset((niggli_lattice, scaled_positions_new, numbers_new))
|
|
649
|
+
space_group_number = int(dataset['number'])
|
|
650
|
+
print('spacegroup after standardization:', spcg)
|
|
651
|
+
print('space group number after standardization:', space_group_number)
|
|
652
|
+
|
|
653
|
+
def linker_identification(linker_list, i, checkedlist):
|
|
654
|
+
"""
|
|
655
|
+
Identifies which linker the atom i is in.
|
|
656
|
+
|
|
657
|
+
Parameters
|
|
658
|
+
----------
|
|
659
|
+
linker_list : list of lists of ints
|
|
660
|
+
Each inner list is its own separate linker. The ints are the global atom indices of that linker. Length is # of linkers.
|
|
661
|
+
i : int
|
|
662
|
+
The global index of the atom of interest.
|
|
663
|
+
checkedlist : set of int
|
|
664
|
+
The indices of atoms that have already been checked for functionalization.
|
|
665
|
+
|
|
666
|
+
Returns
|
|
667
|
+
-------
|
|
668
|
+
linker_to_analyze : list of numpy.int32
|
|
669
|
+
A list of the global atom indices of the atoms in the identified linker.
|
|
670
|
+
The identified linker is the one that has atom i.
|
|
671
|
+
linker_to_analyze_index : int
|
|
672
|
+
The number identifier of the linker that contains the atom of interest.
|
|
673
|
+
checkedlist : set of int
|
|
674
|
+
The indices of atoms that have already been checked for functionalization.
|
|
675
|
+
Updated to include the atoms in the identified linker.
|
|
676
|
+
|
|
677
|
+
"""
|
|
678
|
+
linker_to_analyze, linker_to_analyze_index = None, None # These are updated in the following line.
|
|
679
|
+
for linker_num, linker in enumerate(linker_list): # Iterate over the linkers of the MOF.
|
|
680
|
+
if i in linker: # The atom i (which is to be functionalized) is in the atoms of the current linker.
|
|
681
|
+
linker_to_analyze_index, linker_to_analyze = linker_num, linker
|
|
682
|
+
# Once a linker has been functionalized, we want to be done with that linker and not functionalize it again.
|
|
683
|
+
[checkedlist.add(val) for val in linker]
|
|
684
|
+
break # Don't need to keep looking through the linkers in this case, since the atom i which is to be functionalized was in the current linker.
|
|
685
|
+
|
|
686
|
+
if linker_to_analyze is None: # linker_to_analyze was never overwritten.
|
|
687
|
+
raise Exception(f"Atom {i} was not in any linker - something has gone wrong.")
|
|
688
|
+
|
|
689
|
+
return linker_to_analyze, linker_to_analyze_index, checkedlist
|
|
690
|
+
|
|
691
|
+
def geo_dict_loader():
|
|
692
|
+
"""
|
|
693
|
+
Returns geometry information on the supported functional groups.
|
|
694
|
+
Currently, these are F, Cl, Br, I, CH3, CN, NH2, NO2, CF3, OH, and SH.
|
|
695
|
+
|
|
696
|
+
Parameters
|
|
697
|
+
----------
|
|
698
|
+
None
|
|
699
|
+
|
|
700
|
+
Returns
|
|
701
|
+
-------
|
|
702
|
+
connection_length_dict : dict
|
|
703
|
+
For each functional group, indicates to bond length of the connecting atom on the functional group to a carbon atom, in angstroms.
|
|
704
|
+
connection_atom_dict : dict
|
|
705
|
+
For each functional group, indicates the element in the functional group that connects to the carbon atom; then indicates remaining elements for multiatomic functional groups.
|
|
706
|
+
bond_length_dict : dict
|
|
707
|
+
For each functional group (as applicable), indicates the bond length of the connecting atom on the functional group to the other atoms in the functional group, in angstroms.
|
|
708
|
+
Pertinent to multiatomic functional groups.
|
|
709
|
+
bond_angle_dict : dict
|
|
710
|
+
For each functional group (as applicable), indicates how far off a straight line the non-connecting functional group atoms are,
|
|
711
|
+
relative to a directional unit vector that goes through the connecting carbon and the connecting atom on the functional group.
|
|
712
|
+
For example, for CF3, this is the C-C-F angle, where the first C is the carbon being functionalized and the second C is part of the CF3 functional group.
|
|
713
|
+
Pertinent to multiatomic functional groups.
|
|
714
|
+
bond_rotation_dict : dict
|
|
715
|
+
For each functional group (as applicable), indicates the angle rotation and the number of rotations for non-connecting atoms. E.g. for CH3, the hydrogens are 120 degrees rotated apart.
|
|
716
|
+
Pertinent to multiatomic functional groups.
|
|
717
|
+
|
|
718
|
+
"""
|
|
719
|
+
### connection_length_dict: How far to place the functional group connecting atom from the connecting carbon.
|
|
720
|
+
# All lengths and angles are coming from DFT calculations on the solo linker (Br, CF4).
|
|
721
|
+
connection_length_dict = {'F':1.37,'Cl':1.80,'Br':2.03,'I':2.23,'CH3':1.52,'CN':1.44,'NH2':1.38,'NO2':1.49,'CF3':1.51,'OH':1.36,'SH':1.82}
|
|
722
|
+
|
|
723
|
+
# The first element of every item is the connecting atom. For example, CH3 will connect through the carbon.
|
|
724
|
+
connection_atom_dict = {'F':['F'],'Cl':['Cl'],'Br':['Br'],'I':['I'],'CH3':['C','H'],'CN':['C','N'],'NH2':['N','H'],'NO2':['N','O'],'CF3':['C','F'],'OH':['O','H'],'SH':['S','H']}
|
|
725
|
+
|
|
726
|
+
# The dictionary below constructs the bond angle of the primitive portion (i.e. CH, NH, CF, NO, etc.).
|
|
727
|
+
bond_length_dict = {'CH3':1.09, 'CN':1.17, 'NH2':1.02, 'NO2':1.25, 'CF3':1.36, 'OH':1.03, 'SH':1.34}
|
|
728
|
+
|
|
729
|
+
bond_angle_dict = {'CH3':110, 'CN':180, 'NH2':120, 'NO2':122.5, 'CF3':112, 'OH':100, 'SH':100}
|
|
730
|
+
|
|
731
|
+
# The bond rotation dictionary is a list of the angles to rotate by, followed by the number of
|
|
732
|
+
# times the motif must be repeated, in addition to the initial bond placement (e.g. CH3 needs 2
|
|
733
|
+
# more CH bonds in additional to the one originally placed).
|
|
734
|
+
bond_rotation_dict = {'CH3':[120, 2], 'CN':0, 'NH2':[180, 1], 'NO2': [180, 1], 'CF3': [120, 2], 'OH': 0, 'SH': 0}
|
|
735
|
+
|
|
736
|
+
return connection_length_dict, connection_atom_dict, bond_length_dict, bond_angle_dict, bond_rotation_dict
|
|
737
|
+
|
|
738
|
+
def vector_preparation(connected_atom_types, neighbors_not_to_replace, linker_to_analyze, linker_cart_coords, functionalization_position_on_linker,
|
|
739
|
+
connection_length_dict, functional_group):
|
|
740
|
+
"""
|
|
741
|
+
Prepares placement information for monoatomic and multiatomic functionalization.
|
|
742
|
+
|
|
743
|
+
Parameters
|
|
744
|
+
----------
|
|
745
|
+
connected_atom_types : list of str
|
|
746
|
+
The chemical symbols of the atoms bonded to one of the not-to-be-replaced atoms (which is in turn bonded to the atom to functionalize).
|
|
747
|
+
neighbors_not_to_replace : list of numpy.int32
|
|
748
|
+
The global indices of not-to-be-replaced atoms bonded to the atom to functionalized.
|
|
749
|
+
linker_to_analyze : list of numpy.int32
|
|
750
|
+
The indices of the atoms in the linker.
|
|
751
|
+
linker_cart_coords : numpy.ndarray of numpy.float64
|
|
752
|
+
The Cartesian coordinates of the atoms in the linker. Shape is (number of atoms in linker, 3).
|
|
753
|
+
functionalization_position_on_linker : int
|
|
754
|
+
The linker index of the atom to functionalize.
|
|
755
|
+
connection_length_dict : dict
|
|
756
|
+
How far to place the functional group from the connecting carbon.
|
|
757
|
+
functional_group : str
|
|
758
|
+
The functional group to use for MOF functionalization.
|
|
759
|
+
|
|
760
|
+
Returns
|
|
761
|
+
-------
|
|
762
|
+
initial_placement : numpy.ndarray of numpy.float64
|
|
763
|
+
The Cartesian coordinates of the connecting atom of the functional group.
|
|
764
|
+
Pertinent to mono and multiatomic functionalization. Shape is (3,).
|
|
765
|
+
directional_unit_vector : numpy.ndarray of numpy.float64
|
|
766
|
+
Vector resulting from the addition of the two vectors from the two not-to-be-replaced neighbor atoms to the atom to be functionalized.
|
|
767
|
+
Normalized.
|
|
768
|
+
Pertinent to multiatomic functionalization later. Shape is (3,).
|
|
769
|
+
norm_cp : numpy.ndarray of numpy.float64
|
|
770
|
+
Cross product of the two vectors from the two not-to-be-replaced neighbor atoms to the atom to be functionalized.
|
|
771
|
+
Normalized.
|
|
772
|
+
Pertinent to multiatomic functionalization. Shape is (3,).
|
|
773
|
+
|
|
774
|
+
"""
|
|
775
|
+
# NOTE: there is an assumption here that neighbors_not_to_replace is length 2, based on the code below.
|
|
776
|
+
# These are the two not-to-be-replaced neighbor atoms.
|
|
777
|
+
|
|
778
|
+
if 'H' in connected_atom_types: # The first of the not-to-be-replaced atoms has hydrogen bonded to it.
|
|
779
|
+
print('First type of vector_a')
|
|
780
|
+
vector_a = neighbors_not_to_replace[0] # vector is a misnomer here, but this variable is used to calculate a vector in a few lines.
|
|
781
|
+
vector_b = neighbors_not_to_replace[1]
|
|
782
|
+
else: # The first of the not-to-be-replaced atoms does not have hydrogen bonded to it.
|
|
783
|
+
print('Second type of vector_a')
|
|
784
|
+
vector_a = neighbors_not_to_replace[1]
|
|
785
|
+
vector_b = neighbors_not_to_replace[0]
|
|
786
|
+
|
|
787
|
+
neighbor1 = linker_to_analyze.index(vector_a) # The index of the index vector_a in the list of indices linker_to_analyze.
|
|
788
|
+
neighbor2 = linker_to_analyze.index(vector_b)
|
|
789
|
+
v1 = linker_cart_coords[functionalization_position_on_linker]-linker_cart_coords[neighbor1] # Vector from one of the not-to-be-replaced atoms to the atom to functionalize.
|
|
790
|
+
v2 = linker_cart_coords[functionalization_position_on_linker]-linker_cart_coords[neighbor2] # Vector from the other of the not-to-be-replaced atoms to the atom to functionalize.
|
|
791
|
+
|
|
792
|
+
cp = np.cross(v1, v2) # cross product to get a perpendicular vector to v1 and v2
|
|
793
|
+
# a, b, c = cp
|
|
794
|
+
directional_unit_vector = (v1+v2)/np.linalg.norm(v1+v2) # points in the direction of where the functional group should be placed. Draw it out to visualize it!
|
|
795
|
+
norm_cp = cp/np.linalg.norm(cp) # Normalizing the cross product
|
|
796
|
+
|
|
797
|
+
#### Find the vector for placement of the connecting atom
|
|
798
|
+
direction_to_place_connecting = directional_unit_vector*connection_length_dict[functional_group] # This is a vector. Direction * magnitude
|
|
799
|
+
|
|
800
|
+
# Apply the vector to the starting position, linker_cart_coords[functionalization_position_on_linker]
|
|
801
|
+
initial_placement = linker_cart_coords[functionalization_position_on_linker]+direction_to_place_connecting
|
|
802
|
+
|
|
803
|
+
#### Check if it is in the plane with the original 3 atoms (it should be)
|
|
804
|
+
planartest = checkplanar(initial_placement, linker_cart_coords[functionalization_position_on_linker], linker_cart_coords[neighbor1],linker_cart_coords[neighbor2])
|
|
805
|
+
if not planartest:
|
|
806
|
+
raise ValueError('This atom is not planar to the original 3 atoms. Issue detected. Exiting.')
|
|
807
|
+
|
|
808
|
+
return initial_placement, directional_unit_vector, norm_cp
|
|
809
|
+
|
|
810
|
+
def connecting_atom_functionalization(connection_atom_dict,
|
|
811
|
+
functional_group,
|
|
812
|
+
initial_placement,
|
|
813
|
+
molcif):
|
|
814
|
+
"""
|
|
815
|
+
Adds the connecting atom of the functional group.
|
|
816
|
+
|
|
817
|
+
Parameters
|
|
818
|
+
----------
|
|
819
|
+
connection_atom_dict : dict
|
|
820
|
+
For each functional group, indicates the element in the functional group that connects to the carbon atom; then indicates remaining elements for multiatomic functional groups.
|
|
821
|
+
functional_group : str
|
|
822
|
+
The functional group to use for MOF functionalization.
|
|
823
|
+
initial_placement : numpy.ndarray of numpy.float64
|
|
824
|
+
The Cartesian coordinates of the connecting atom of the functional group.
|
|
825
|
+
Shape is (3,).
|
|
826
|
+
molcif : molSimplify.Classes.mol3D.mol3D
|
|
827
|
+
The cell of the cif file to be functionalized.
|
|
828
|
+
|
|
829
|
+
Returns
|
|
830
|
+
-------
|
|
831
|
+
molcif : molSimplify.Classes.mol3D.mol3D
|
|
832
|
+
The modified cell of the cif file.
|
|
833
|
+
atom_types_to_add : list of str
|
|
834
|
+
The chemical symbols of the atoms added. When leaving this function, length is 1.
|
|
835
|
+
additions_to_cart : numpy.ndarray of numpy.float64
|
|
836
|
+
The Cartesian coordinates of the atoms added during functionalization. When leaving this function, shape is (1, 3).
|
|
837
|
+
|
|
838
|
+
"""
|
|
839
|
+
connecting_atom = atom3D(connection_atom_dict[functional_group][0], initial_placement)
|
|
840
|
+
molcif.addAtom(connecting_atom)
|
|
841
|
+
additions_to_cart = np.array([initial_placement])
|
|
842
|
+
atom_types_to_add = []
|
|
843
|
+
atom_types_to_add.append(connection_atom_dict[functional_group][0])
|
|
844
|
+
return molcif, atom_types_to_add, additions_to_cart
|
|
845
|
+
|
|
846
|
+
def multiatomic_functionalization(connection_atom_dict,
|
|
847
|
+
bond_length_dict,
|
|
848
|
+
bond_angle_dict,
|
|
849
|
+
bond_rotation_dict,
|
|
850
|
+
functional_group,
|
|
851
|
+
directional_unit_vector,
|
|
852
|
+
norm_cp,
|
|
853
|
+
initial_placement,
|
|
854
|
+
additions_to_cart,
|
|
855
|
+
atom_types_to_add,
|
|
856
|
+
molcif,
|
|
857
|
+
additional_atom_offset=0):
|
|
858
|
+
"""
|
|
859
|
+
Adds functional group atoms that are not the connecting atom.
|
|
860
|
+
Adds these atoms to the connecting atom.
|
|
861
|
+
|
|
862
|
+
Parameters
|
|
863
|
+
----------
|
|
864
|
+
connection_atom_dict : dict
|
|
865
|
+
For each functional group, indicates the element in the functional group that connects to the carbon atom; then indicates remaining elements for multiatomic functional groups.
|
|
866
|
+
bond_length_dict : dict
|
|
867
|
+
For each functional group (as applicable), indicates the bond length of the connecting atom on the functional group to the other atoms in the functional group, in angstroms.
|
|
868
|
+
bond_angle_dict : dict
|
|
869
|
+
For each functional group (as applicable), indicates how far off a straight line the non-connecting functional group atoms are,
|
|
870
|
+
relative to a directional unit vector that goes through the connecting carbon and the connecting atom on the functional group.
|
|
871
|
+
For example, for CF3, this is the C-C-F angle, where the first C is the carbon being functionalized and the second C is part of the CF3 functional group.
|
|
872
|
+
bond_rotation_dict : dict
|
|
873
|
+
For each functional group (as applicable), indicates the angle rotation and the number of rotations for non-connecting atoms. E.g. for CH3, the hydrogens are 120 degrees rotated apart.
|
|
874
|
+
functional_group : str
|
|
875
|
+
The functional group to use for MOF functionalization.
|
|
876
|
+
directional_unit_vector : numpy.ndarray of numpy.float64
|
|
877
|
+
Vector resulting from the addition of the two vectors from the two not-to-be-replaced neighbor atoms to the atom to be functionalized.
|
|
878
|
+
Normalized.
|
|
879
|
+
Shape is (3,).
|
|
880
|
+
norm_cp : numpy.ndarray of numpy.float64
|
|
881
|
+
Cross product of the two vectors from the two not-to-be-replaced neighbor atoms to the atom to be functionalized.
|
|
882
|
+
Normalized.
|
|
883
|
+
Shape is (3,).
|
|
884
|
+
initial_placement : numpy.ndarray of numpy.float64
|
|
885
|
+
The Cartesian coordinates of the connecting atom of the functional group.
|
|
886
|
+
Shape is (3,).
|
|
887
|
+
additions_to_cart : numpy.ndarray of numpy.float64
|
|
888
|
+
The Cartesian coordinates of the atoms added during functionalization. When entering this function, shape is (1, 3).
|
|
889
|
+
atom_types_to_add : list of str
|
|
890
|
+
The chemical symbols of the atoms added. When entering this function, length is 1.
|
|
891
|
+
molcif : molSimplify.Classes.mol3D.mol3D
|
|
892
|
+
The cell of the MOF to be functionalized.
|
|
893
|
+
additional_atom_offset : float
|
|
894
|
+
Extent to which to rotate the placement of depth 2 functional group atoms. Give in degrees.
|
|
895
|
+
Useful for preventing atomic overlap / unintended bonds.
|
|
896
|
+
|
|
897
|
+
Returns
|
|
898
|
+
-------
|
|
899
|
+
molcif : molSimplify.Classes.mol3D.mol3D
|
|
900
|
+
The cell of the functionalized MOF.
|
|
901
|
+
atom_types_to_add : list of str
|
|
902
|
+
The chemical symbols of the atoms added. These are the atoms in the functional group.
|
|
903
|
+
additions_to_cart : numpy.ndarray of numpy.float64
|
|
904
|
+
The Cartesian coordinates of the atoms added during functionalization. Shape is (number of atoms in functional group, 3).
|
|
905
|
+
|
|
906
|
+
"""
|
|
907
|
+
#### Take direction of placing the first connecting atom. Rotate it within the plane made by the 4 points, by the angle from the dictionary.
|
|
908
|
+
deg2rad = 2*np.pi/360
|
|
909
|
+
bonded_placement = ((np.cos((180-bond_angle_dict[functional_group])*deg2rad))*directional_unit_vector*bond_length_dict[functional_group]+
|
|
910
|
+
(np.sin((180-bond_angle_dict[functional_group])*deg2rad))*(np.cross(norm_cp, directional_unit_vector*bond_length_dict[functional_group])))
|
|
911
|
+
|
|
912
|
+
# The shape of bonded_placement is (3,), as is the shape of initial_placement
|
|
913
|
+
|
|
914
|
+
##### Find where the first functionalization should be placed.
|
|
915
|
+
final_placement = initial_placement + bonded_placement
|
|
916
|
+
final_placement = np.array(PointRotateAxis(directional_unit_vector.tolist(),initial_placement.tolist(),final_placement.tolist(), additional_atom_offset*deg2rad)) # Apply rotation additional_atom_offset if requested.
|
|
917
|
+
# This is where one of the hydrogens in CH3 is added, for example.
|
|
918
|
+
bonded_atom = atom3D(connection_atom_dict[functional_group][1], final_placement)
|
|
919
|
+
molcif.addAtom(bonded_atom)
|
|
920
|
+
additions_to_cart = np.concatenate((additions_to_cart, np.array([final_placement])))
|
|
921
|
+
atom_types_to_add.append(connection_atom_dict[functional_group][1])
|
|
922
|
+
if bond_rotation_dict[functional_group] != 0:
|
|
923
|
+
rotated_atom3Ds = []
|
|
924
|
+
num_rotations = bond_rotation_dict[functional_group][1]
|
|
925
|
+
counter = 1
|
|
926
|
+
while counter <= num_rotations: # This is where the two extra H's in CH3 are added, for example.
|
|
927
|
+
rotate_by = bond_rotation_dict[functional_group][0]*counter*deg2rad
|
|
928
|
+
# print('initial coords of CH bond',final_placement)
|
|
929
|
+
rotated_coords = PointRotateAxis(directional_unit_vector.tolist(),initial_placement.tolist(),final_placement.tolist(),rotate_by)
|
|
930
|
+
# print('rotated coords of CH bond, rotation number',rotated_coords, counter)
|
|
931
|
+
rotated_atom3Ds.append(atom3D(connection_atom_dict[functional_group][1], rotated_coords))
|
|
932
|
+
additions_to_cart = np.concatenate((additions_to_cart, np.array([rotated_coords])))
|
|
933
|
+
atom_types_to_add.append(connection_atom_dict[functional_group][1])
|
|
934
|
+
counter += 1
|
|
935
|
+
[molcif.addAtom(val) for val in rotated_atom3Ds]
|
|
936
|
+
|
|
937
|
+
return molcif, atom_types_to_add, additions_to_cart
|
|
938
|
+
|
|
939
|
+
def make_networkx_graph(adj_matrix):
|
|
940
|
+
"""
|
|
941
|
+
Makes a networkx graph of the bonds of the atoms in the linker specified by adj_matrix.
|
|
942
|
+
|
|
943
|
+
Parameters
|
|
944
|
+
----------
|
|
945
|
+
adj_matrix : scipy.sparse.csr.csr_matrix
|
|
946
|
+
The atom connections of a linker subgraph. Indicates what atoms are bonded to what.
|
|
947
|
+
|
|
948
|
+
Returns
|
|
949
|
+
-------
|
|
950
|
+
G : networkx.classes.graph.Graph
|
|
951
|
+
The networkx graph of the bonds of the atoms in the linker.
|
|
952
|
+
|
|
953
|
+
"""
|
|
954
|
+
if scipy.sparse.issparse(adj_matrix):
|
|
955
|
+
adj_matrix = adj_matrix.todense()
|
|
956
|
+
rows, cols = np.where(np.array(adj_matrix) == 1) # 1 indicates a bond. 0 indicates no bond.
|
|
957
|
+
edges = zip(rows.tolist(), cols.tolist())
|
|
958
|
+
G = nx.Graph()
|
|
959
|
+
G.add_edges_from(edges)
|
|
960
|
+
return G
|
|
961
|
+
|
|
962
|
+
def get_linkers(molcif, adj_matrix, allatomtypes):
|
|
963
|
+
"""
|
|
964
|
+
Returns information on the linkers in the provided MOF.
|
|
965
|
+
Similar to the code in molSimplify.Informatics.MOF.MOF_descriptors.get_MOF_descriptors. Specifically, step 1: metallic part
|
|
966
|
+
|
|
967
|
+
Parameters
|
|
968
|
+
----------
|
|
969
|
+
molcif : molSimplify.Classes.mol3D.mol3D
|
|
970
|
+
The cell of the cif file being analyzed.
|
|
971
|
+
adj_matrix : scipy.sparse.csr.csr_matrix
|
|
972
|
+
1 represents a bond, 0 represents no bond. Shape is (number of atoms, number of atoms).
|
|
973
|
+
allatomtypes : list of str
|
|
974
|
+
The atom types of the cif file, indicated by chemical symbols like 'O' and 'Cu'. Length is the number of atoms.
|
|
975
|
+
|
|
976
|
+
Returns
|
|
977
|
+
-------
|
|
978
|
+
linker_list : list of lists of ints
|
|
979
|
+
Each inner list is its own separate linker. The ints are the global atom indices of that linker. Length is # of linkers.
|
|
980
|
+
linker_subgraphlist : list of scipy.sparse.csr.csr_matrix
|
|
981
|
+
The atom connections in the linker subgraph. Length is # of linkers.
|
|
982
|
+
|
|
983
|
+
"""
|
|
984
|
+
SBUlist = set() # Will contain the indices of atoms belonging to SBUs.
|
|
985
|
+
[SBUlist.update(set([metal])) for metal in molcif.findMetal(transition_metals_only=False)] # Consider all metals as part of the SBUs.
|
|
986
|
+
[SBUlist.update(set(molcif.getBondedAtomsSmart(metal))) for metal in molcif.findMetal(transition_metals_only=False)] # Also consider all atoms bonded to a metals part of the SBUs.
|
|
987
|
+
|
|
988
|
+
removelist = set()
|
|
989
|
+
[removelist.update(set([metal])) for metal in molcif.findMetal(transition_metals_only=False)] # Remove all metals as part of the SBU.
|
|
990
|
+
# for metal in removelist:
|
|
991
|
+
# bonded_atoms = set(molcif.getBondedAtomsSmart(metal))
|
|
992
|
+
# bonded_atoms_types = set([str(allatomtypes[at]) for at in set(molcif.getBondedAtomsSmart(metal))]) # The types of elements bonded to metals. E.g. oxygen, carbon, etc.
|
|
993
|
+
|
|
994
|
+
# Add to removelist any atoms that are only bonded to metals (not counting hydrogens).
|
|
995
|
+
# The all() function returns True if all items in an iterable are true, otherwise it returns False.
|
|
996
|
+
[removelist.update(set([atom])) for atom in SBUlist if all((molcif.getAtom(val).ismetal() or
|
|
997
|
+
molcif.getAtom(val).symbol().upper() == 'H') for val in molcif.getBondedAtomsSmart(atom))]
|
|
998
|
+
|
|
999
|
+
allatoms = set(range(0, adj_matrix.shape[0])) # A set that goes from 0 to the number of atoms - 1
|
|
1000
|
+
linkers = allatoms - removelist
|
|
1001
|
+
linker_list, linker_subgraphlist = get_closed_subgraph(linkers.copy(), removelist.copy(), adj_matrix)
|
|
1002
|
+
return linker_list, linker_subgraphlist
|
|
1003
|
+
|
|
1004
|
+
def connected_atoms_from_adjmat(adj_matrix, index, allatomtypes):
|
|
1005
|
+
"""
|
|
1006
|
+
Finds the atoms connected to the atom with the index `index`.
|
|
1007
|
+
This function works with sparse matrices. Assumes you handed a sparse matrix.
|
|
1008
|
+
|
|
1009
|
+
Parameters
|
|
1010
|
+
----------
|
|
1011
|
+
adj_matrix : scipy.sparse.csr.csr_matrix
|
|
1012
|
+
1 represents a bond, 0 represents no bond. Shape is (number of atoms, number of atoms).
|
|
1013
|
+
index : int
|
|
1014
|
+
The index of the atom for which the connected atoms will be found.
|
|
1015
|
+
allatomtypes : list of str
|
|
1016
|
+
The atom types of the cif file, indicated by chemical symbols like 'O' and 'Cu'. Length is the number of atoms.
|
|
1017
|
+
|
|
1018
|
+
Returns
|
|
1019
|
+
-------
|
|
1020
|
+
connected_atom_list : numpy.ndarray of numpy.int32
|
|
1021
|
+
The indices of the atoms connected to the atom of interest.
|
|
1022
|
+
connected_atom_types : list of str
|
|
1023
|
+
The chemical symbols of the atoms connected to the atom of interest. Order is the same as connected_atom_list.
|
|
1024
|
+
|
|
1025
|
+
"""
|
|
1026
|
+
connected_atom_list = np.nonzero(adj_matrix[index,:])[1] # indices of atoms with bonds to the atom with the index `index`
|
|
1027
|
+
connected_atom_types = [allatomtypes[j] for j in connected_atom_list]
|
|
1028
|
+
|
|
1029
|
+
return connected_atom_list, connected_atom_types
|
|
1030
|
+
|
|
1031
|
+
def apply_monatomic_functionalization(molcif, allatomtypes, atom_to_replace, functional_group, functionalization_counter):
|
|
1032
|
+
"""
|
|
1033
|
+
Deprecated way of executing monatomic functionalization.
|
|
1034
|
+
Does not take into account the different bond lengths of different functional groups, as is done in vector_preparation's calculation of initial_placement.
|
|
1035
|
+
|
|
1036
|
+
"""
|
|
1037
|
+
molcif.getAtom(atom_to_replace).mutate(functional_group) # Replaces one atom3D with another.
|
|
1038
|
+
allatomtypes[atom_to_replace] = functional_group
|
|
1039
|
+
functionalization_counter -= 1
|
|
1040
|
+
if functionalization_counter == 0:
|
|
1041
|
+
functionalized = True
|
|
1042
|
+
else:
|
|
1043
|
+
functionalized = False
|
|
1044
|
+
return molcif, allatomtypes, functionalization_counter, functionalized
|
|
1045
|
+
|
|
1046
|
+
def check_support(functional_group):
|
|
1047
|
+
"""
|
|
1048
|
+
Raises a ValueError if the functional_group is not in the pre-defined list of supported functional groups.
|
|
1049
|
+
|
|
1050
|
+
Parameters
|
|
1051
|
+
----------
|
|
1052
|
+
functional_group : str
|
|
1053
|
+
Chemical formula for the functional group.
|
|
1054
|
+
|
|
1055
|
+
Returns
|
|
1056
|
+
-------
|
|
1057
|
+
dict_approach : bool
|
|
1058
|
+
Indicates whether the functional group is added using the dictionary approach.
|
|
1059
|
+
If not, functional group is added using some template xyz about its structure, through a merge of mol3D objects.
|
|
1060
|
+
|
|
1061
|
+
"""
|
|
1062
|
+
supported_functional_groups = ['F', 'Cl', 'Br', 'I', 'CH3', 'CN', 'NH2', 'NO2', 'CF3', 'OH', 'SH']
|
|
1063
|
+
|
|
1064
|
+
# These functional groups are not added via the dictionary approach since they go more than two atoms deep, or are not uniform in bond length at every atom depth.
|
|
1065
|
+
supported_functional_groups_by_mol3D_merge = ['OCF3', 'SO3H', 'OCH3']
|
|
1066
|
+
if functional_group not in supported_functional_groups+supported_functional_groups_by_mol3D_merge:
|
|
1067
|
+
raise ValueError('Unsupported functional group requested.')
|
|
1068
|
+
else:
|
|
1069
|
+
dict_approach = functional_group in supported_functional_groups
|
|
1070
|
+
return dict_approach
|
|
1071
|
+
|
|
1072
|
+
def functionalize_MOF_at_indices(cif_file, path2write, functional_group, func_indices, additional_atom_offset=0):
|
|
1073
|
+
"""
|
|
1074
|
+
Functionalizes the provided MOF and writes the functionalized version to a cif file.
|
|
1075
|
+
Functionalizes at the specified indices func_indices, provided the atoms at those indices are sp2 carbons with a hydrogen atom.
|
|
1076
|
+
|
|
1077
|
+
Parameters
|
|
1078
|
+
----------
|
|
1079
|
+
cif_file : str
|
|
1080
|
+
The path to the cif file to be functionalized.
|
|
1081
|
+
path2write : str
|
|
1082
|
+
The folder path where the cif of the functionalized MOF will be written.
|
|
1083
|
+
functional_group : str
|
|
1084
|
+
The functional group to use for MOF functionalization.
|
|
1085
|
+
func_indices : list of int
|
|
1086
|
+
The indices of the atoms at which to functionalize. Zero-indexed.
|
|
1087
|
+
additional_atom_offset : float or list of float
|
|
1088
|
+
Extent to which to rotate the placement of depth 2 functional group atoms. Give in degrees.
|
|
1089
|
+
Useful for preventing atomic overlap / unintended bonds.
|
|
1090
|
+
If list, must be the length of func_indices.
|
|
1091
|
+
|
|
1092
|
+
Returns
|
|
1093
|
+
-------
|
|
1094
|
+
None
|
|
1095
|
+
|
|
1096
|
+
"""
|
|
1097
|
+
|
|
1098
|
+
if not isinstance(additional_atom_offset, list):
|
|
1099
|
+
# Convert to a list.
|
|
1100
|
+
additional_atom_offset = [additional_atom_offset] * len(func_indices)
|
|
1101
|
+
|
|
1102
|
+
dict_approach = check_support(functional_group)
|
|
1103
|
+
if not dict_approach:
|
|
1104
|
+
# The requested functional group is more than two atoms deep, or has differing bond lengths/atom identities at a given depth.
|
|
1105
|
+
# Use a different function for treating these.
|
|
1106
|
+
functionalize_MOF_at_indices_mol3D_merge(cif_file, path2write, functional_group, func_indices, additional_atom_offset)
|
|
1107
|
+
return
|
|
1108
|
+
|
|
1109
|
+
### Start of repeat code (in common with functionalize_MOF) ###
|
|
1110
|
+
base_mof_name = os.path.basename(cif_file)
|
|
1111
|
+
if base_mof_name.endswith('.cif'):
|
|
1112
|
+
base_mof_name = base_mof_name[:-4]
|
|
1113
|
+
|
|
1114
|
+
# Read the cif file and make the cell for fractional coordinates
|
|
1115
|
+
cpar, allatomtypes, fcoords = readcif(cif_file)
|
|
1116
|
+
molcif, cell_vector, alpha, beta, gamma = import_from_cif(cif_file, True)
|
|
1117
|
+
cell_v = np.array(cell_vector)
|
|
1118
|
+
cart_coords = fractional2cart(fcoords, cell_v)
|
|
1119
|
+
distance_mat = compute_distance_matrix3(cell_v, cart_coords)
|
|
1120
|
+
adj_matrix, _ = compute_adj_matrix(distance_mat, allatomtypes)
|
|
1121
|
+
molcif.graph = adj_matrix.todense()
|
|
1122
|
+
|
|
1123
|
+
### End of repeat code ###
|
|
1124
|
+
|
|
1125
|
+
###### At this point, we have most things we need to functionalize.
|
|
1126
|
+
# Thus the first step is to break down into linkers. This uses what we developed for MOF featurization
|
|
1127
|
+
linker_list, linker_subgraphlist = get_linkers(molcif, adj_matrix, allatomtypes)
|
|
1128
|
+
|
|
1129
|
+
###### We need to then figure out which atoms to functionalize.
|
|
1130
|
+
checkedlist = set() # Keeps track of the atoms that have already been checked for functionalization.
|
|
1131
|
+
delete_list = [] # Collect all of the H that need to be deleted later.
|
|
1132
|
+
extra_atom_coords = []
|
|
1133
|
+
extra_atom_types = []
|
|
1134
|
+
functionalized_atoms = []
|
|
1135
|
+
|
|
1136
|
+
for _i, func_index in enumerate(func_indices):
|
|
1137
|
+
print(f'On {_i+1} out of {len(func_indices)}')
|
|
1138
|
+
atom_to_functionalize = allatomtypes[func_index]
|
|
1139
|
+
|
|
1140
|
+
# Atoms that are connected to atom at index func_index.
|
|
1141
|
+
connected_atom_list, connected_atom_types = connected_atoms_from_adjmat(adj_matrix, func_index, allatomtypes)
|
|
1142
|
+
|
|
1143
|
+
if atom_to_functionalize != 'C': # Assumes that functionalization is performed on a C atom.
|
|
1144
|
+
raise ValueError('Invalid atom to functionalize: not a carbon atom.')
|
|
1145
|
+
elif 'H' not in connected_atom_types: # Must functionalize where an H was.
|
|
1146
|
+
raise ValueError('Invalid atom to functionalize: no hydrogen neighbor to replace.')
|
|
1147
|
+
elif len(connected_atom_types) != 3: # Needs sp2 C.
|
|
1148
|
+
raise ValueError('Invalid atom to functionalize: not an sp2 carbon atom.')
|
|
1149
|
+
else: # atom_to_functionalize is a suitable location for functionalization.
|
|
1150
|
+
# Identifying the linker that has atom atom_to_functionalize.
|
|
1151
|
+
# Also adds all the atoms in the identified linker to checkedlist.
|
|
1152
|
+
linker_to_analyze, linker_to_analyze_index, _ = linker_identification(linker_list, func_index, checkedlist) # checkedlist is not important here.
|
|
1153
|
+
|
|
1154
|
+
linker_atom_types, linker_graph, linker_cart_coords = analyze_linker(cart_coords,
|
|
1155
|
+
linker_to_analyze,
|
|
1156
|
+
allatomtypes,
|
|
1157
|
+
linker_subgraphlist,
|
|
1158
|
+
linker_to_analyze_index,
|
|
1159
|
+
cell_v,
|
|
1160
|
+
)
|
|
1161
|
+
|
|
1162
|
+
"""""""""
|
|
1163
|
+
Functionalization.
|
|
1164
|
+
"""""""""
|
|
1165
|
+
|
|
1166
|
+
for k, connected_atom in enumerate(connected_atom_types): # Look through the atoms bonded to atom i.
|
|
1167
|
+
if connected_atom == 'H':
|
|
1168
|
+
|
|
1169
|
+
functionalization_counter = 1
|
|
1170
|
+
molcif, functionalization_counter, functionalized, delete_list, extra_atom_coords, extra_atom_types, functionalized_atoms = first_functionalization(molcif,
|
|
1171
|
+
allatomtypes,
|
|
1172
|
+
func_index,
|
|
1173
|
+
connected_atom_list,
|
|
1174
|
+
k,
|
|
1175
|
+
functional_group,
|
|
1176
|
+
linker_cart_coords,
|
|
1177
|
+
linker_to_analyze,
|
|
1178
|
+
linker_atom_types,
|
|
1179
|
+
linker_graph,
|
|
1180
|
+
functionalization_counter,
|
|
1181
|
+
delete_list,
|
|
1182
|
+
extra_atom_coords,
|
|
1183
|
+
extra_atom_types,
|
|
1184
|
+
functionalized_atoms,
|
|
1185
|
+
additional_atom_offset=additional_atom_offset[_i]
|
|
1186
|
+
) # functionalized_atoms is not important here.
|
|
1187
|
+
|
|
1188
|
+
break # Don't search the rest of the connected atoms if replaced a hydrogen and functionalized already at the atom with index i.
|
|
1189
|
+
|
|
1190
|
+
"""""""""
|
|
1191
|
+
Apply delete_list and extra_atom_types to make final_atom_types and new_coord_list.
|
|
1192
|
+
"""""""""
|
|
1193
|
+
# Deleting atoms (hydrogens that are replaced by functional groups)
|
|
1194
|
+
new_coord_list, final_atom_types = atom_deletion(cart_coords, allatomtypes, delete_list)
|
|
1195
|
+
|
|
1196
|
+
# Adding atoms (the atoms in the functional groups)
|
|
1197
|
+
allatomtypes, fcoords = atom_addition(extra_atom_types, final_atom_types, new_coord_list, extra_atom_coords, cell_v)
|
|
1198
|
+
|
|
1199
|
+
# Check to make sure none of the functional group atoms are too close to other atoms in the CIF
|
|
1200
|
+
# If so, code will interpret atoms to be bonded that should not be.
|
|
1201
|
+
post_functionalization_overlap_and_bonding_check(cell_v, allatomtypes, fcoords, extra_atom_types)
|
|
1202
|
+
|
|
1203
|
+
"""""""""
|
|
1204
|
+
Write the cif.
|
|
1205
|
+
"""""""""
|
|
1206
|
+
cif_folder = f'{path2write}cif/'
|
|
1207
|
+
mkdir_if_absent(cif_folder)
|
|
1208
|
+
write_cif(f'{path2write}cif/functionalized_{base_mof_name}_{functional_group}_index.cif', cpar, fcoords, allatomtypes)
|
|
1209
|
+
|
|
1210
|
+
|
|
1211
|
+
def functionalize_MOF_at_indices_mol3D_merge(cif_file, path2write, functional_group, func_indices, additional_atom_offset):
|
|
1212
|
+
"""
|
|
1213
|
+
Functionalizes the provided MOF and writes the functionalized version to a cif file.
|
|
1214
|
+
Functionalizes at the specified indices func_indices, provided the atoms at those indices are sp2 carbons with a hydrogen atom.
|
|
1215
|
+
Differs from functionalize_MOF_at_indices in that this function handles more challenging functionalizations.
|
|
1216
|
+
Works with geometries stored in the folder monofunctionalized_BDC.
|
|
1217
|
+
|
|
1218
|
+
Parameters
|
|
1219
|
+
----------
|
|
1220
|
+
cif_file : str
|
|
1221
|
+
The path to the cif file to be functionalized.
|
|
1222
|
+
path2write : str
|
|
1223
|
+
The folder path where the cif of the functionalized MOF will be written.
|
|
1224
|
+
functional_group : str
|
|
1225
|
+
The functional group to use for MOF functionalization.
|
|
1226
|
+
func_indices : list of int
|
|
1227
|
+
The indices of the atoms at which to functionalize. Zero-indexed.
|
|
1228
|
+
additional_atom_offset : list of float
|
|
1229
|
+
Extent to which to rotate the placement of depth 2 functional group atoms. Give in degrees.
|
|
1230
|
+
Useful for preventing atomic overlap / unintended bonds.
|
|
1231
|
+
Must be the length of func_indices.
|
|
1232
|
+
|
|
1233
|
+
Returns
|
|
1234
|
+
-------
|
|
1235
|
+
None
|
|
1236
|
+
|
|
1237
|
+
"""
|
|
1238
|
+
|
|
1239
|
+
### Start of repeat code (in common with functionalize_MOF_at_indices) ###
|
|
1240
|
+
base_mof_name = os.path.basename(cif_file)
|
|
1241
|
+
if base_mof_name.endswith('.cif'):
|
|
1242
|
+
base_mof_name = base_mof_name[:-4]
|
|
1243
|
+
|
|
1244
|
+
# Read the cif file and make the cell for fractional coordinates
|
|
1245
|
+
cpar, allatomtypes, fcoords = readcif(cif_file)
|
|
1246
|
+
molcif, cell_vector, alpha, beta, gamma = import_from_cif(cif_file, True)
|
|
1247
|
+
cell_v = np.array(cell_vector)
|
|
1248
|
+
cart_coords = fractional2cart(fcoords, cell_v)
|
|
1249
|
+
distance_mat = compute_distance_matrix3(cell_v, cart_coords)
|
|
1250
|
+
adj_matrix, _ = compute_adj_matrix(distance_mat, allatomtypes)
|
|
1251
|
+
molcif.graph = adj_matrix.todense()
|
|
1252
|
+
|
|
1253
|
+
### End of repeat code ###
|
|
1254
|
+
|
|
1255
|
+
|
|
1256
|
+
### Section with the functional group template ###
|
|
1257
|
+
|
|
1258
|
+
# Load in the mol3D from the folder molSimplify folder monofunctionalized_BDC.
|
|
1259
|
+
functional_group_template = mol3D()
|
|
1260
|
+
func_group_xyz_path = str(resource_files("molSimplify").joinpath(f"Informatics/MOF/monofunctionalized_BDC/{functional_group}.xyz"))
|
|
1261
|
+
functional_group_template.readfromxyz(func_group_xyz_path) # This is a whole BDC linker with the requested functional group on it.
|
|
1262
|
+
|
|
1263
|
+
# Read information about the important indices of the functional_group_template.
|
|
1264
|
+
fg_anchor_index, fg_fg_indices, fg_main_carbon_index, fg_carbon_neighbor_indices = INDEX_INFO[functional_group] # fg stands for functional group.
|
|
1265
|
+
|
|
1266
|
+
### Begin functionalization process ###
|
|
1267
|
+
|
|
1268
|
+
# To keep track of the hydrogen atoms replaced with functional groups.
|
|
1269
|
+
H_indices_to_delete = []
|
|
1270
|
+
|
|
1271
|
+
# To keep track of the functional groups to merge on the cif. These are mol3D objects.
|
|
1272
|
+
func_groups = []
|
|
1273
|
+
|
|
1274
|
+
for _i, func_index in enumerate(func_indices): # Loop over all indices to be functionalized.
|
|
1275
|
+
print(f'On {_i+1} out of {len(func_indices)}')
|
|
1276
|
+
atom_to_functionalize = allatomtypes[func_index]
|
|
1277
|
+
|
|
1278
|
+
# Atoms that are connected to atom at index func_index.
|
|
1279
|
+
connected_atom_list, connected_atom_types = connected_atoms_from_adjmat(adj_matrix, func_index, allatomtypes)
|
|
1280
|
+
|
|
1281
|
+
if atom_to_functionalize != 'C': # Assumes that functionalization is performed on a C atom with two C neighbors and one H neighbor.
|
|
1282
|
+
raise ValueError('Invalid atom to functionalize: not a carbon atom.')
|
|
1283
|
+
elif 'H' not in connected_atom_types: # Must functionalize where an H was.
|
|
1284
|
+
raise ValueError('Invalid atom to functionalize: no hydrogen neighbor to replace.')
|
|
1285
|
+
elif len(connected_atom_types) != 3: # Needs sp2 C.
|
|
1286
|
+
raise ValueError('Invalid atom to functionalize: not an sp2 carbon atom.')
|
|
1287
|
+
else: # atom_to_functionalize is a suitable location for functionalization.
|
|
1288
|
+
|
|
1289
|
+
"""""""""
|
|
1290
|
+
Functionalization.
|
|
1291
|
+
"""""""""
|
|
1292
|
+
|
|
1293
|
+
carbon_neighbor_indices = []
|
|
1294
|
+
for k, connected_atom in enumerate(connected_atom_types): # Look through the atoms bonded to atom i.
|
|
1295
|
+
if connected_atom == 'H':
|
|
1296
|
+
H_indices_to_delete.append(connected_atom_list[k])
|
|
1297
|
+
elif connected_atom == 'C':
|
|
1298
|
+
carbon_neighbor_indices.append(connected_atom_list[k])
|
|
1299
|
+
# Checking to make sure the results of the above for loop make sense.
|
|
1300
|
+
if len(carbon_neighbor_indices) != 2:
|
|
1301
|
+
raise ValueError(f"Unexpected number of carbon neighbors {len(carbon_neighbor_indices)}.")
|
|
1302
|
+
|
|
1303
|
+
"""""""""
|
|
1304
|
+
Aligning a copy of the functional_group_template to where we want to functionalize.
|
|
1305
|
+
"""""""""
|
|
1306
|
+
|
|
1307
|
+
functional_group_clone = mol3D()
|
|
1308
|
+
functional_group_clone.copymol3D(functional_group_template)
|
|
1309
|
+
|
|
1310
|
+
### Doing some stuff with the MOF mol3D ###
|
|
1311
|
+
|
|
1312
|
+
# How should we rotate functional_group_clone so it aligns with the carbon we want to functionalize?
|
|
1313
|
+
# Answer: align a copy of functional_group_template with the three carbons of interest in the cif.
|
|
1314
|
+
molcif_clone = mol3D()
|
|
1315
|
+
molcif_clone.copymol3D(molcif)
|
|
1316
|
+
|
|
1317
|
+
# Shift the two neighbor carbons by cell vectors until they are closest to the carbon to be functionalized.
|
|
1318
|
+
shift_carbon_1 = compute_image_flag(cell_v, fcoords[func_index], fcoords[carbon_neighbor_indices[0]])
|
|
1319
|
+
shift_carbon_2 = compute_image_flag(cell_v, fcoords[func_index], fcoords[carbon_neighbor_indices[1]])
|
|
1320
|
+
# Changing the cartesian coordinates of the two carbon neighbors by the required cell vector shifts.
|
|
1321
|
+
carbon_1_cart = fractional2cart(fcoords[carbon_neighbor_indices[0]]+shift_carbon_1, cell_v)
|
|
1322
|
+
carbon_2_cart = fractional2cart(fcoords[carbon_neighbor_indices[1]]+shift_carbon_2, cell_v)
|
|
1323
|
+
# Setting new positions.
|
|
1324
|
+
molcif_clone.getAtoms()[carbon_neighbor_indices[0]].setcoords(carbon_1_cart)
|
|
1325
|
+
molcif_clone.getAtoms()[carbon_neighbor_indices[1]].setcoords(carbon_2_cart)
|
|
1326
|
+
molcif_clone.getAtoms()[func_index].setcoords(cart_coords[func_index])
|
|
1327
|
+
|
|
1328
|
+
# Translate first. Just the difference between the two main carbon atoms.
|
|
1329
|
+
# So, will make the two main carbons overlap.
|
|
1330
|
+
translation_vector = np.array(molcif_clone.getAtom(func_index).coords()) - np.array(functional_group_template.getAtom(fg_main_carbon_index).coords())
|
|
1331
|
+
|
|
1332
|
+
### Now, answer the question of how much to rotate the functional group to align it to the carbon in the CIF. ###
|
|
1333
|
+
initial_guess = np.zeros(3)
|
|
1334
|
+
rotation_vector = scipy.optimize.fmin(alignment_objective, initial_guess, args=(molcif_clone, func_index,
|
|
1335
|
+
carbon_neighbor_indices, functional_group_template, fg_main_carbon_index, fg_carbon_neighbor_indices, translation_vector))
|
|
1336
|
+
|
|
1337
|
+
# Unpacking
|
|
1338
|
+
x_rotation = rotation_vector[0]
|
|
1339
|
+
y_rotation = rotation_vector[1]
|
|
1340
|
+
z_rotation = rotation_vector[2]
|
|
1341
|
+
|
|
1342
|
+
# Applying the translation and rotation to the functional_group_clone.
|
|
1343
|
+
functional_group_clone.translate(translation_vector)
|
|
1344
|
+
main_carbon_coordinate = functional_group_clone.getAtom(fg_main_carbon_index).coords()
|
|
1345
|
+
functional_group_clone = rotate_around_axis(functional_group_clone, main_carbon_coordinate, [1,0,0], x_rotation)
|
|
1346
|
+
functional_group_clone = rotate_around_axis(functional_group_clone, main_carbon_coordinate, [0,1,0], y_rotation)
|
|
1347
|
+
functional_group_clone = rotate_around_axis(functional_group_clone, main_carbon_coordinate, [0,0,1], z_rotation)
|
|
1348
|
+
|
|
1349
|
+
# Account for additional_atom_offset
|
|
1350
|
+
# Vector between functionalized carbon and the anchor atom of the functional group (e.g. the C in -CH3 functional group).
|
|
1351
|
+
anchor_coordinate = functional_group_clone.getAtom(fg_anchor_index).coords()
|
|
1352
|
+
direction_vector = np.array(anchor_coordinate) - np.array(main_carbon_coordinate)
|
|
1353
|
+
functional_group_clone = rotate_around_axis(functional_group_clone, main_carbon_coordinate, direction_vector, additional_atom_offset[_i])
|
|
1354
|
+
|
|
1355
|
+
# Delete unwanted functional_group_template atoms.
|
|
1356
|
+
num_atoms = functional_group_clone.getNumAtoms()
|
|
1357
|
+
clone_indices = range(num_atoms)
|
|
1358
|
+
clone_indices_to_remove = [idx for idx in clone_indices if idx not in fg_fg_indices]
|
|
1359
|
+
functional_group_clone.deleteatoms(clone_indices_to_remove)
|
|
1360
|
+
func_groups.append(functional_group_clone)
|
|
1361
|
+
|
|
1362
|
+
# Combining the mol3D objects.
|
|
1363
|
+
for new_fg in func_groups:
|
|
1364
|
+
molcif = molcif.combine(new_fg, dirty=True) # Adds the functional group to the end of molcif (index-wise)
|
|
1365
|
+
|
|
1366
|
+
# Delete hydrogen atoms on the functionalized carbons.
|
|
1367
|
+
molcif.deleteatoms(H_indices_to_delete)
|
|
1368
|
+
|
|
1369
|
+
# Getting the fractional coordinates.
|
|
1370
|
+
cartesian_coordinates = molcif.coordsvect()
|
|
1371
|
+
fcoords = frac_coord(cartesian_coordinates, cell_v)
|
|
1372
|
+
# Getting the atom types.
|
|
1373
|
+
allatomtypes = molcif.symvect()
|
|
1374
|
+
|
|
1375
|
+
# """""""""
|
|
1376
|
+
# Write the cif.
|
|
1377
|
+
# """""""""
|
|
1378
|
+
cif_folder = f'{path2write}cif/'
|
|
1379
|
+
mkdir_if_absent(cif_folder)
|
|
1380
|
+
write_cif(f'{path2write}cif/functionalized_{base_mof_name}_{functional_group}_index.cif', cpar, fcoords, allatomtypes)
|
|
1381
|
+
|
|
1382
|
+
|
|
1383
|
+
def alignment_objective(rotation_vector, molcif_clone, MOF_main_carbon_index, MOF_carbon_neighbor_indices,
|
|
1384
|
+
functional_group_template, fg_main_carbon_index, fg_carbon_neighbor_indices, translation_vector):
|
|
1385
|
+
"""
|
|
1386
|
+
The objective function to be minimized by finding the optimal x, y, and z rotation angles.
|
|
1387
|
+
|
|
1388
|
+
Parameters
|
|
1389
|
+
----------
|
|
1390
|
+
rotation_vector : numpy.ndarray
|
|
1391
|
+
The vector by which to rotate the functional group BDC template.
|
|
1392
|
+
Shape is (3,).
|
|
1393
|
+
molcif_clone : mol3D
|
|
1394
|
+
Structure information on the MOF.
|
|
1395
|
+
MOF_main_carbon_index : int
|
|
1396
|
+
The index of the carbon to be functionalized in molcif_clone.
|
|
1397
|
+
MOF_carbon_neighbor_indices : list of int
|
|
1398
|
+
The two indices of the two carbon atoms bonded to the main carbon in molcif_clone.
|
|
1399
|
+
functional_group_template : mol3D
|
|
1400
|
+
Structure information on the functional group BDC template.
|
|
1401
|
+
fg_main_carbon_index : int
|
|
1402
|
+
The index of the carbon that is functionalized in functional_group_template.
|
|
1403
|
+
fg_carbon_neighbor_indices : list of int
|
|
1404
|
+
The two indices of the two carbon atoms bonded to the main carbon atom in functional_group_template.
|
|
1405
|
+
translation_vector : numpy.ndarray
|
|
1406
|
+
The vector by which to translate the functional group BDC template.
|
|
1407
|
+
Shape is (3,).
|
|
1408
|
+
|
|
1409
|
+
Returns
|
|
1410
|
+
-------
|
|
1411
|
+
objective_function : float
|
|
1412
|
+
The sum of the distances between each of the three carbons of interest.
|
|
1413
|
+
Compares the carbons in the template BDC with the functional group, and the carbons in the MOF.
|
|
1414
|
+
The main carbon is the carbon at which functionalization occurs. The neighbors are bonded to that main carbon.
|
|
1415
|
+
|
|
1416
|
+
"""
|
|
1417
|
+
|
|
1418
|
+
# Unpacking
|
|
1419
|
+
x_rotation = rotation_vector[0]
|
|
1420
|
+
y_rotation = rotation_vector[1]
|
|
1421
|
+
z_rotation = rotation_vector[2]
|
|
1422
|
+
|
|
1423
|
+
template_copy = mol3D()
|
|
1424
|
+
template_copy.copymol3D(functional_group_template)
|
|
1425
|
+
|
|
1426
|
+
# Translate template_copy.
|
|
1427
|
+
template_copy.translate(translation_vector)
|
|
1428
|
+
|
|
1429
|
+
# Rotate template_copy through an axis passing through the main carbon, i.e. the carbon to be functionalized.
|
|
1430
|
+
main_carbon_coordinate = template_copy.getAtom(fg_main_carbon_index).coords()
|
|
1431
|
+
template_copy = rotate_around_axis(template_copy, main_carbon_coordinate, [1,0,0], x_rotation)
|
|
1432
|
+
template_copy = rotate_around_axis(template_copy, main_carbon_coordinate, [0,1,0], y_rotation)
|
|
1433
|
+
template_copy = rotate_around_axis(template_copy, main_carbon_coordinate, [0,0,1], z_rotation)
|
|
1434
|
+
|
|
1435
|
+
# NOTE: small degree of flexibility here. Can align carbon 1 of MOF_carbon_neighbor_indices to carbon 1 of fg_carbon_neighbor_indices
|
|
1436
|
+
# Or align carbon 1 of MOF_carbon_neighbor_indices to carbon 2 of fg_carbon_neighbor_indices.
|
|
1437
|
+
# I go with the former approach here.
|
|
1438
|
+
|
|
1439
|
+
distance1 = distance(molcif_clone.getAtom(MOF_main_carbon_index).coords(), template_copy.getAtom(fg_main_carbon_index).coords())
|
|
1440
|
+
distance2 = distance(molcif_clone.getAtom(MOF_carbon_neighbor_indices[0]).coords(), template_copy.getAtom(fg_carbon_neighbor_indices[0]).coords())
|
|
1441
|
+
distance3 = distance(molcif_clone.getAtom(MOF_carbon_neighbor_indices[1]).coords(), template_copy.getAtom(fg_carbon_neighbor_indices[1]).coords())
|
|
1442
|
+
|
|
1443
|
+
objective_function = distance1 + distance2 + distance3 # Want to minimize this.
|
|
1444
|
+
return objective_function
|
|
1445
|
+
|
|
1446
|
+
|
|
1447
|
+
def post_functionalization_overlap_and_bonding_check(cell_v, allatomtypes, fcoords, extra_atom_types):
|
|
1448
|
+
"""
|
|
1449
|
+
Prints information on whether the introduced functional group atoms are overlapping with other atoms.
|
|
1450
|
+
Also prints information on the interpreted bonds of the introduced functional group atoms. Useful to make sure the functional group atoms are not too close to other atoms.
|
|
1451
|
+
|
|
1452
|
+
Parameters
|
|
1453
|
+
----------
|
|
1454
|
+
cell_v : numpy.ndarray of numpy.float64
|
|
1455
|
+
Each row corresponds to one of the cell vectors. Shape is (3, 3).
|
|
1456
|
+
allatomtypes : list of str
|
|
1457
|
+
The atom types of the MOF, indicated by chemical symbols like 'O' and 'Cu'. Length is the number of atoms.
|
|
1458
|
+
fcoords : numpy.ndarray of numpy.float64
|
|
1459
|
+
The fractional positions of the crystal atoms. Shape is (number of atoms, 3).
|
|
1460
|
+
extra_atom_types : list of list of str
|
|
1461
|
+
The chemical symbols of the atoms added through functional groups. Each inner list is a functional group.
|
|
1462
|
+
|
|
1463
|
+
Returns
|
|
1464
|
+
-------
|
|
1465
|
+
None
|
|
1466
|
+
|
|
1467
|
+
"""
|
|
1468
|
+
|
|
1469
|
+
cart_coords = fractional2cart(fcoords, cell_v)
|
|
1470
|
+
distance_mat = compute_distance_matrix3(cell_v, cart_coords)
|
|
1471
|
+
adj_matrix, _ = compute_adj_matrix(distance_mat, allatomtypes, handle_overlap=False) # Will throw an error if atoms are overlapping after functionalization.
|
|
1472
|
+
adj_matrix = adj_matrix.todense()
|
|
1473
|
+
adj_matrix = np.squeeze(np.asarray(adj_matrix)) # Converting from numpy.matrix to numpy.array
|
|
1474
|
+
|
|
1475
|
+
# Since functional group atoms are added to the end of the atom type list and fractional coordinate numpy array (see function atom_addition),
|
|
1476
|
+
# we just check the last few rows of the adjacency matrix.
|
|
1477
|
+
# These last few rows correspond to the functional group atoms that were added.
|
|
1478
|
+
flattened_extra_atom_types = [item for sublist in extra_atom_types for item in sublist] # Flattening the list of lists of str. Results in a list of str.
|
|
1479
|
+
flattened_extra_atom_types = flattened_extra_atom_types[::-1] # Reversing the order of elements.
|
|
1480
|
+
for i in range(len(flattened_extra_atom_types)): # Check all the added atoms
|
|
1481
|
+
print(f'Number of bonds to functional group atom {flattened_extra_atom_types[i]} is {np.sum(adj_matrix[-1-i])}')
|
|
1482
|
+
|
|
1483
|
+
def atom_deletion(cart_coords, allatomtypes, delete_list):
|
|
1484
|
+
"""
|
|
1485
|
+
Makes new coordinate and atom lists that disregard the undesired hydrogens.
|
|
1486
|
+
|
|
1487
|
+
Parameters
|
|
1488
|
+
----------
|
|
1489
|
+
cart_coords : numpy.ndarray of numpy.float64
|
|
1490
|
+
The Cartesian coordinates of the crystal atoms. Shape is (number of atoms, 3).
|
|
1491
|
+
allatomtypes : list of str
|
|
1492
|
+
The atom types of the MOF, indicated by chemical symbols like 'O' and 'Cu'. Length is the number of atoms.
|
|
1493
|
+
delete_list : list of numpy.int32
|
|
1494
|
+
The indices of atoms that are deleted because they are replaced by functional groups.
|
|
1495
|
+
|
|
1496
|
+
Returns
|
|
1497
|
+
-------
|
|
1498
|
+
new_coord_list : numpy.ndarray
|
|
1499
|
+
The updated Cartesian coordinates of the crystal atoms. Shape is (number of atoms, 3).
|
|
1500
|
+
final_atom_types : list of str
|
|
1501
|
+
The updated atom types of the MOF, indicated by chemical symbols like 'O' and 'Cu'. Length is the number of atoms.
|
|
1502
|
+
|
|
1503
|
+
"""
|
|
1504
|
+
print('Initial shape', cart_coords.shape, len(allatomtypes))
|
|
1505
|
+
new_coord_list = None # Will be changed in the following lines.
|
|
1506
|
+
final_atom_types = []
|
|
1507
|
+
for cart_row in range(0, cart_coords.shape[0]): # Going from zero through number of atoms - 1
|
|
1508
|
+
if cart_row in delete_list:
|
|
1509
|
+
if allatomtypes[cart_row] != 'H':
|
|
1510
|
+
raise Exception('Error!') # As the code is implemented right now, only hydrogens should be being replaced.
|
|
1511
|
+
else:
|
|
1512
|
+
continue
|
|
1513
|
+
elif new_coord_list is None: # new_coord_list still needs to get its first entry
|
|
1514
|
+
new_coord_list = np.array([cart_coords[cart_row,:]])
|
|
1515
|
+
else:
|
|
1516
|
+
new_coord_list = np.concatenate((np.array(new_coord_list),np.array([cart_coords[cart_row,:]])),axis=0)
|
|
1517
|
+
final_atom_types.append(allatomtypes[cart_row])
|
|
1518
|
+
# Really have deletion by non-inclusion (see the continue statement).
|
|
1519
|
+
print('Shape after deletions', new_coord_list.shape, len(final_atom_types)) # (shape is (number of atoms, 3))
|
|
1520
|
+
|
|
1521
|
+
return new_coord_list, final_atom_types
|
|
1522
|
+
|
|
1523
|
+
def atom_addition(extra_atom_types, final_atom_types, new_coord_list, extra_atom_coords, cell_v):
|
|
1524
|
+
"""
|
|
1525
|
+
Adds the functional group atoms to the lists of atom types and coordinates.
|
|
1526
|
+
|
|
1527
|
+
Parameters
|
|
1528
|
+
----------
|
|
1529
|
+
extra_atom_types : list of list of str
|
|
1530
|
+
The chemical symbols of the atoms added through functional groups. Each inner list is a functional group.
|
|
1531
|
+
final_atom_types : list of str
|
|
1532
|
+
The atom types of the MOF, indicated by chemical symbols like 'O' and 'Cu'. Length is the number of atoms.
|
|
1533
|
+
new_coord_list : numpy.ndarray
|
|
1534
|
+
The Cartesian coordinates of the crystal atoms. Shape is (number of atoms, 3).
|
|
1535
|
+
extra_atom_coords : list of numpy.ndarray of numpy.float64
|
|
1536
|
+
The Cartesian coordinates of the atoms added during functionalization.
|
|
1537
|
+
Each item of the list is a new functional group.
|
|
1538
|
+
Shape of each numpy.ndarray is (number of atoms in functional group, 3).
|
|
1539
|
+
cell_v : numpy.ndarray of numpy.float64
|
|
1540
|
+
Each row corresponds to one of the cell vectors. Shape is (3, 3).
|
|
1541
|
+
|
|
1542
|
+
Returns
|
|
1543
|
+
-------
|
|
1544
|
+
allatomtypes : list of str
|
|
1545
|
+
The atom types of the MOF, indicated by chemical symbols like 'O' and 'Cu'. Length is the number of atoms.
|
|
1546
|
+
fcoords : numpy.ndarray of numpy.float64
|
|
1547
|
+
The fractional positions of the crystal atoms. Shape is (number of atoms, 3).
|
|
1548
|
+
|
|
1549
|
+
"""
|
|
1550
|
+
# fg: functional group
|
|
1551
|
+
for fg_num, new_fg in enumerate(extra_atom_types): # Atoms added upon functionalization
|
|
1552
|
+
print('new_fg',new_fg)
|
|
1553
|
+
[final_atom_types.append(new_atom) for new_atom in new_fg] # Adding to the chemical symbols list.
|
|
1554
|
+
new_coord_list = np.concatenate((new_coord_list,np.array(extra_atom_coords[fg_num])),axis=0) # Adding to the coordinates array.
|
|
1555
|
+
print('Shape after deletions and inclusion of functional group atoms', new_coord_list.shape, len(final_atom_types))
|
|
1556
|
+
allatomtypes = final_atom_types
|
|
1557
|
+
fcoords = frac_coord(new_coord_list, cell_v)
|
|
1558
|
+
|
|
1559
|
+
return allatomtypes, fcoords
|
|
1560
|
+
|
|
1561
|
+
def DS_remover(file_list):
|
|
1562
|
+
"""
|
|
1563
|
+
Removes .DS_Store from the provided list of files.
|
|
1564
|
+
|
|
1565
|
+
Parameters
|
|
1566
|
+
----------
|
|
1567
|
+
file_list : list of str
|
|
1568
|
+
A list of files.
|
|
1569
|
+
|
|
1570
|
+
Returns
|
|
1571
|
+
-------
|
|
1572
|
+
file_list : list of str
|
|
1573
|
+
An updated list of files.
|
|
1574
|
+
|
|
1575
|
+
"""
|
|
1576
|
+
file_list = [_i for _i in file_list if 'DS_Store' not in _i]
|
|
1577
|
+
return file_list
|
|
1578
|
+
|
|
1579
|
+
def mkdir_if_absent(folder_path):
|
|
1580
|
+
"""
|
|
1581
|
+
Makes a folder at folder_path if it does not yet exist.
|
|
1582
|
+
|
|
1583
|
+
Parameters
|
|
1584
|
+
----------
|
|
1585
|
+
folder_path : str
|
|
1586
|
+
The folder path to check, and potentially at which to make a folder.
|
|
1587
|
+
|
|
1588
|
+
Returns
|
|
1589
|
+
-------
|
|
1590
|
+
None
|
|
1591
|
+
|
|
1592
|
+
"""
|
|
1593
|
+
if not os.path.exists(folder_path):
|
|
1594
|
+
os.mkdir(folder_path)
|
|
1595
|
+
|
|
1596
|
+
### End of functions ###
|
|
1597
|
+
|
|
1598
|
+
def main():
|
|
1599
|
+
### Example below ###
|
|
1600
|
+
|
|
1601
|
+
# Functional groups to use
|
|
1602
|
+
func_group = ['F','Cl','Br','I','CH3','CN','NH2','NO2','CF3','OH','SH']
|
|
1603
|
+
|
|
1604
|
+
##### Below is just Aditya's file structure for the paths, can be changed.
|
|
1605
|
+
|
|
1606
|
+
mofname = 'UiO66'
|
|
1607
|
+
# Defining folder names
|
|
1608
|
+
base_database_path = str(mofname)+'/'
|
|
1609
|
+
base_database_path_primitive = str(mofname)+'_p1/'
|
|
1610
|
+
base_write_path = str(mofname)+'_functionalized/'
|
|
1611
|
+
|
|
1612
|
+
# The way the code is currently set up, the only folder required prior to running this script is a folder of the name `mofname` with the CIF file of the name `mofname` in it.
|
|
1613
|
+
# Functionalization should occur on the primitive cell.
|
|
1614
|
+
unmodified_cifs = DS_remover(os.listdir(base_database_path)) # The list of unmodified cifs
|
|
1615
|
+
for primMOF in unmodified_cifs:
|
|
1616
|
+
primMOF = primMOF.strip('.cif')
|
|
1617
|
+
mkdir_if_absent(base_database_path_primitive)
|
|
1618
|
+
mkdir_if_absent(base_write_path)
|
|
1619
|
+
get_primitive(base_database_path+primMOF+'.cif', base_database_path_primitive+primMOF+'.cif')
|
|
1620
|
+
|
|
1621
|
+
primitive_cifs = DS_remover(os.listdir(base_database_path_primitive))
|
|
1622
|
+
for MOF in primitive_cifs:
|
|
1623
|
+
MOF = MOF.strip('.cif')
|
|
1624
|
+
num_list = [1, 2]
|
|
1625
|
+
path = 3
|
|
1626
|
+
for num_func in num_list:
|
|
1627
|
+
for func in func_group:
|
|
1628
|
+
super_func_folder = f'{base_write_path}{MOF}_{num_func}_functionalization/'
|
|
1629
|
+
mkdir_if_absent(super_func_folder)
|
|
1630
|
+
|
|
1631
|
+
func_folder = f'{base_write_path}{MOF}_{num_func}_functionalization/{func}/'
|
|
1632
|
+
if not os.path.exists(func_folder):
|
|
1633
|
+
os.mkdir(func_folder) # Making the folder if it doesn't exist yet.
|
|
1634
|
+
functionalize_MOF(f'{base_database_path_primitive}{MOF}.cif', func_folder,
|
|
1635
|
+
path_between_functionalizations=path, functionalization_limit=num_func, functional_group=func)
|
|
1636
|
+
else:
|
|
1637
|
+
continue
|
|
1638
|
+
|
|
1639
|
+
# Functionalize by index
|
|
1640
|
+
index_func_folder = f'{mofname}_index_func/'
|
|
1641
|
+
mkdir_if_absent(index_func_folder)
|
|
1642
|
+
functionalize_MOF_at_indices(f'{base_database_path_primitive}{MOF}.cif', f'{index_func_folder}', 'F', [57])
|
|
1643
|
+
functionalize_MOF_at_indices(f'{base_database_path_primitive}{MOF}.cif', f'{index_func_folder}', 'F', [57, 65])
|
|
1644
|
+
|
|
1645
|
+
### End of example ###
|
|
1646
|
+
|
|
1647
|
+
if __name__ == "__main__":
|
|
1648
|
+
main()
|