molSimplify 1.7.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docs/source/conf.py +224 -0
- molSimplify/Classes/__init__.py +6 -0
- molSimplify/Classes/atom3D.py +235 -0
- molSimplify/Classes/dft_obs.py +130 -0
- molSimplify/Classes/globalvars.py +827 -0
- molSimplify/Classes/helpers.py +161 -0
- molSimplify/Classes/ligand.py +2330 -0
- molSimplify/Classes/mGUI.py +2493 -0
- molSimplify/Classes/mWidgets.py +438 -0
- molSimplify/Classes/miniGUI.py +41 -0
- molSimplify/Classes/mol2D.py +260 -0
- molSimplify/Classes/mol3D.py +5846 -0
- molSimplify/Classes/monomer3D.py +253 -0
- molSimplify/Classes/partialcharges.py +226 -0
- molSimplify/Classes/protein3D.py +1178 -0
- molSimplify/Classes/rundiag.py +151 -0
- molSimplify/Data/ML.dat +212 -0
- molSimplify/Data/MLS_FSR_for_inter.dat +23 -0
- molSimplify/Data/MLS_FSR_for_inter2.dat +23 -0
- molSimplify/Data/MLS_angle_for_click.dat +8 -0
- molSimplify/Data/MLS_angle_for_inter.dat +23 -0
- molSimplify/Data/MLS_angle_for_inter2.dat +48 -0
- molSimplify/Data/MLS_angle_for_intra.dat +10 -0
- molSimplify/Data/MLS_angle_for_intra2.dat +6 -0
- molSimplify/Data/MLS_angle_for_oa.dat +18 -0
- molSimplify/Data/ML_FSR_for_inter.dat +112 -0
- molSimplify/Data/ML_FSR_for_inter2.dat +110 -0
- molSimplify/Data/ML_bond_for_cat.dat +8 -0
- molSimplify/Data/ML_bond_for_click.dat +8 -0
- molSimplify/Data/ML_bond_for_inter.dat +48 -0
- molSimplify/Data/ML_bond_for_inter2.dat +48 -0
- molSimplify/Data/ML_bond_for_intra.dat +10 -0
- molSimplify/Data/ML_bond_for_intra2.dat +6 -0
- molSimplify/Data/ML_bond_for_oa.dat +18 -0
- molSimplify/Data/bp1.dat +21 -0
- molSimplify/Data/li.dat +3 -0
- molSimplify/Data/no.dat +2 -0
- molSimplify/Data/oct.dat +7 -0
- molSimplify/Data/pbp.dat +8 -0
- molSimplify/Data/spy.dat +6 -0
- molSimplify/Data/sqap.dat +9 -0
- molSimplify/Data/sqp.dat +5 -0
- molSimplify/Data/tbp.dat +6 -0
- molSimplify/Data/tdhd.dat +9 -0
- molSimplify/Data/thd.dat +5 -0
- molSimplify/Data/tpl.dat +4 -0
- molSimplify/Data/tpr.dat +7 -0
- molSimplify/Informatics/HFXsensitivity/__init__.py +0 -0
- molSimplify/Informatics/HFXsensitivity/measure_HFX_sensitivity_oxo_hat_reb_rel.py +443 -0
- molSimplify/Informatics/HFXsensitivity/measure_HFX_stable.py +346 -0
- molSimplify/Informatics/MOF/Linker_rotation.py +179 -0
- molSimplify/Informatics/MOF/MOF_descriptors.py +1299 -0
- molSimplify/Informatics/MOF/MOF_descriptors_alternate_functional.py +589 -0
- molSimplify/Informatics/MOF/MOF_functionalizer.py +1648 -0
- molSimplify/Informatics/MOF/PBC_functions.py +1347 -0
- molSimplify/Informatics/MOF/__init__.py +0 -0
- molSimplify/Informatics/MOF/atomic.py +267 -0
- molSimplify/Informatics/MOF/cluster_extraction.py +388 -0
- molSimplify/Informatics/MOF/fragment_MOFs_for_pormake.py +895 -0
- molSimplify/Informatics/MOF/monofunctionalized_BDC/index_information.py +10 -0
- molSimplify/Informatics/Mol2Parser.py +46 -0
- molSimplify/Informatics/RACassemble.py +408 -0
- molSimplify/Informatics/__init__.py +0 -0
- molSimplify/Informatics/active_learning/__init__.py +0 -0
- molSimplify/Informatics/active_learning/expected_improvement.py +269 -0
- molSimplify/Informatics/autocorrelation.py +1930 -0
- molSimplify/Informatics/clean_autocorrelation.py +778 -0
- molSimplify/Informatics/coulomb_analyze.py +67 -0
- molSimplify/Informatics/decoration_manager.py +193 -0
- molSimplify/Informatics/geo_analyze.py +88 -0
- molSimplify/Informatics/geometrics.py +56 -0
- molSimplify/Informatics/graph_analyze.py +163 -0
- molSimplify/Informatics/graph_racs.py +288 -0
- molSimplify/Informatics/jupyter_vis.py +172 -0
- molSimplify/Informatics/lacRACAssemble.py +2192 -0
- molSimplify/Informatics/lacRACAssemble_bisdithiolenes.py +236 -0
- molSimplify/Informatics/misc_descriptors.py +198 -0
- molSimplify/Informatics/organic_fingerprints.py +61 -0
- molSimplify/Informatics/partialcharges.py +345 -0
- molSimplify/Informatics/protein/activesite.py +53 -0
- molSimplify/Informatics/protein/pymol_add_hs.py +33 -0
- molSimplify/Informatics/rac155_geo.py +48 -0
- molSimplify/Ligands/(1_methylbenzimidazol_2_yl)pyridine.xyz +45 -0
- molSimplify/Ligands/1-4-dimethyl-1-2-3-triazole.xyz +15 -0
- molSimplify/Ligands/12crown4.mol +62 -0
- molSimplify/Ligands/Antipyrine.mol +58 -0
- molSimplify/Ligands/BPAbipy.mol +106 -0
- molSimplify/Ligands/Hpyrrole.mol +26 -0
- molSimplify/Ligands/N-quinolinylbutyramidate.xyz +31 -0
- molSimplify/Ligands/N-quinolinylmethylmethinylacetamidate.xyz +30 -0
- molSimplify/Ligands/NMe2_-1.xyz +11 -0
- molSimplify/Ligands/PCy3.mol +111 -0
- molSimplify/Ligands/PMe3.xyz +15 -0
- molSimplify/Ligands/PPh3.mol +76 -0
- molSimplify/Ligands/Propyphenazone.mol +77 -0
- molSimplify/Ligands/acac.mol +33 -0
- molSimplify/Ligands/acacen.mol +76 -0
- molSimplify/Ligands/acetate.smi +1 -0
- molSimplify/Ligands/acetate.xyz +9 -0
- molSimplify/Ligands/aceticacidbipyridine.mol +70 -0
- molSimplify/Ligands/acetonitrile.mol +17 -0
- molSimplify/Ligands/alanine.mol +30 -0
- molSimplify/Ligands/alphabetizer.py +21 -0
- molSimplify/Ligands/amine.mol +11 -0
- molSimplify/Ligands/ammonia.mol +12 -0
- molSimplify/Ligands/arginine.mol +58 -0
- molSimplify/Ligands/asparagine.mol +38 -0
- molSimplify/Ligands/aspartic_acid.mol +35 -0
- molSimplify/Ligands/azide.mol +11 -0
- molSimplify/Ligands/benzene.mol +28 -0
- molSimplify/Ligands/benzene_pi.mol +30 -0
- molSimplify/Ligands/benzenedithiol.mol +30 -0
- molSimplify/Ligands/benzenethiol.mol +30 -0
- molSimplify/Ligands/benzylisocy.mol +38 -0
- molSimplify/Ligands/bidiazine.mol +42 -0
- molSimplify/Ligands/bidiazole.mol +38 -0
- molSimplify/Ligands/bifuran.mol +38 -0
- molSimplify/Ligands/bihydrodiazine.mol +58 -0
- molSimplify/Ligands/bihydrodiazole.mol +46 -0
- molSimplify/Ligands/bihydrooxazine.mol +54 -0
- molSimplify/Ligands/bihydrooxazole.mol +42 -0
- molSimplify/Ligands/bihydrothiazine.mol +54 -0
- molSimplify/Ligands/bihydrothiazole.mol +42 -0
- molSimplify/Ligands/biimidazole.mol +38 -0
- molSimplify/Ligands/bioxazole.mol +34 -0
- molSimplify/Ligands/bipy.mol +46 -0
- molSimplify/Ligands/bipyrazine.xyz +20 -0
- molSimplify/Ligands/bipyrimidine.mol +42 -0
- molSimplify/Ligands/bipyrrole.mol +42 -0
- molSimplify/Ligands/bisnapthyridylpyridine.mol +111 -0
- molSimplify/Ligands/bithiazole.mol +34 -0
- molSimplify/Ligands/bromide.mol +7 -0
- molSimplify/Ligands/bromide.smi +1 -0
- molSimplify/Ligands/c2.mol +9 -0
- molSimplify/Ligands/caprolactone.mol +41 -0
- molSimplify/Ligands/carbonyl.mol +8 -0
- molSimplify/Ligands/carboxyl.mol +13 -0
- molSimplify/Ligands/cat.mol +30 -0
- molSimplify/Ligands/chloride.mol +7 -0
- molSimplify/Ligands/chloride.smi +1 -0
- molSimplify/Ligands/chloropyridine.mol +27 -0
- molSimplify/Ligands/co2.mol +10 -0
- molSimplify/Ligands/corrolazine.mol +72 -0
- molSimplify/Ligands/cs.mol +8 -0
- molSimplify/Ligands/cyanate.xyz +5 -0
- molSimplify/Ligands/cyanide.mol +9 -0
- molSimplify/Ligands/cyanoaceticporphyrin.mol +114 -0
- molSimplify/Ligands/cyanopyridine.mol +29 -0
- molSimplify/Ligands/cyclam.mol +81 -0
- molSimplify/Ligands/cyclen.mol +69 -0
- molSimplify/Ligands/cyclopentadienyl.mol +26 -0
- molSimplify/Ligands/cysteine.mol +32 -0
- molSimplify/Ligands/diaminomethyl.mol +19 -0
- molSimplify/Ligands/diazine.mol +25 -0
- molSimplify/Ligands/diazole.mol +23 -0
- molSimplify/Ligands/dicyanamide.mol +15 -0
- molSimplify/Ligands/dihydrofuran.mol +27 -0
- molSimplify/Ligands/dmap.xyz +35 -0
- molSimplify/Ligands/dmf.mol +28 -0
- molSimplify/Ligands/dmi.mol +41 -0
- molSimplify/Ligands/dmpe.mol +52 -0
- molSimplify/Ligands/dpmu.mol +47 -0
- molSimplify/Ligands/dppe.mol +112 -0
- molSimplify/Ligands/edta.mol +69 -0
- molSimplify/Ligands/en.mol +28 -0
- molSimplify/Ligands/ethanethiol.mol +21 -0
- molSimplify/Ligands/ethanolamine.mol +26 -0
- molSimplify/Ligands/ethbipy.mol +70 -0
- molSimplify/Ligands/ethyl.mol +19 -0
- molSimplify/Ligands/ethylamine.mol +24 -0
- molSimplify/Ligands/ethylene.mol +16 -0
- molSimplify/Ligands/ethylesteracac.mol +57 -0
- molSimplify/Ligands/fluoride.mol +7 -0
- molSimplify/Ligands/fluoride.smi +1 -0
- molSimplify/Ligands/formaldehyde.mol +12 -0
- molSimplify/Ligands/formamidate.xyz +8 -0
- molSimplify/Ligands/formate.xyz +6 -0
- molSimplify/Ligands/furan.mol +23 -0
- molSimplify/Ligands/glutamic_acid.mol +42 -0
- molSimplify/Ligands/glutamine.mol +44 -0
- molSimplify/Ligands/glycinate.mol +23 -0
- molSimplify/Ligands/glycine.mol +24 -0
- molSimplify/Ligands/h2s.mol +10 -0
- molSimplify/Ligands/helium.mol +6 -0
- molSimplify/Ligands/histidine.mol +45 -0
- molSimplify/Ligands/hmpa.mol +62 -0
- molSimplify/Ligands/hs-.mol +9 -0
- molSimplify/Ligands/hydride.mol +7 -0
- molSimplify/Ligands/hydrocarboxyacetylide.xyz +8 -0
- molSimplify/Ligands/hydrocyanide.mol +10 -0
- molSimplify/Ligands/hydrodiazine.mol +33 -0
- molSimplify/Ligands/hydrodiazole.mol +27 -0
- molSimplify/Ligands/hydrogensulfide.mol +10 -0
- molSimplify/Ligands/hydroisocyanide.mol +11 -0
- molSimplify/Ligands/hydrooxazine.mol +31 -0
- molSimplify/Ligands/hydrooxazole.mol +25 -0
- molSimplify/Ligands/hydrothiazine.mol +31 -0
- molSimplify/Ligands/hydrothiazole.mol +25 -0
- molSimplify/Ligands/hydroxyl.mol +9 -0
- molSimplify/Ligands/imidazole.mol +23 -0
- molSimplify/Ligands/imidazolidinone.mol +29 -0
- molSimplify/Ligands/imine.mol +13 -0
- molSimplify/Ligands/iminodiacetic.mol +33 -0
- molSimplify/Ligands/iodide.mol +7 -0
- molSimplify/Ligands/iodobenzene.xyz +14 -0
- molSimplify/Ligands/isoleucine.mol +48 -0
- molSimplify/Ligands/isothiocyanate.mol +11 -0
- molSimplify/Ligands/leucine.mol +48 -0
- molSimplify/Ligands/ligands.dict +257 -0
- molSimplify/Ligands/lysine.mol +54 -0
- molSimplify/Ligands/mebenzenedithiol.mol +36 -0
- molSimplify/Ligands/mebim_py.xyz +29 -0
- molSimplify/Ligands/mebim_pz.xyz +28 -0
- molSimplify/Ligands/mebipy.mol +58 -0
- molSimplify/Ligands/mecat.mol +36 -0
- molSimplify/Ligands/methanal.mol +11 -0
- molSimplify/Ligands/methanethiol.mol +15 -0
- molSimplify/Ligands/methanol.mol +16 -0
- molSimplify/Ligands/methionine.mol +44 -0
- molSimplify/Ligands/methyl.mol +13 -0
- molSimplify/Ligands/methylacetylide.xyz +8 -0
- molSimplify/Ligands/methylamine.mol +19 -0
- molSimplify/Ligands/methylazide.xyz +9 -0
- molSimplify/Ligands/methylisocy.mol +17 -0
- molSimplify/Ligands/methylpyridine.mol +33 -0
- molSimplify/Ligands/n2.mol +8 -0
- molSimplify/Ligands/n4py.xyz +51 -0
- molSimplify/Ligands/nch.mol +10 -0
- molSimplify/Ligands/nco-.mol +11 -0
- molSimplify/Ligands/nethanolamine.mol +26 -0
- molSimplify/Ligands/nitrate.mol +14 -0
- molSimplify/Ligands/nitrite.mol +11 -0
- molSimplify/Ligands/nitro.mol +11 -0
- molSimplify/Ligands/nitrobipy.mol +54 -0
- molSimplify/Ligands/nitroso.mol +8 -0
- molSimplify/Ligands/nme3.mol +30 -0
- molSimplify/Ligands/no-.mol +10 -0
- molSimplify/Ligands/no2-.mol +11 -0
- molSimplify/Ligands/noxygen.mol +8 -0
- molSimplify/Ligands/ns-.mol +10 -0
- molSimplify/Ligands/o-pyridylbenzene.xyz +23 -0
- molSimplify/Ligands/o-pyridylphenylanion.xyz +22 -0
- molSimplify/Ligands/o2-.mol +9 -0
- molSimplify/Ligands/o2.xyz +4 -0
- molSimplify/Ligands/och2.mol +12 -0
- molSimplify/Ligands/oethanolamine.mol +26 -0
- molSimplify/Ligands/ome2.mol +22 -0
- molSimplify/Ligands/ooh.xyz +5 -0
- molSimplify/Ligands/oxalate.mol +17 -0
- molSimplify/Ligands/oxalate.smi +1 -0
- molSimplify/Ligands/oxygen.mol +7 -0
- molSimplify/Ligands/pentacyanocyclopentadienide.mol +36 -0
- molSimplify/Ligands/ph2-.mol +11 -0
- molSimplify/Ligands/ph3.mol +12 -0
- molSimplify/Ligands/phen.mol +51 -0
- molSimplify/Ligands/phenacac.mol +63 -0
- molSimplify/Ligands/phenalalanine.mol +51 -0
- molSimplify/Ligands/phendione.mol +51 -0
- molSimplify/Ligands/phenphen.mol +75 -0
- molSimplify/Ligands/phenylbenzoxazole.mol +54 -0
- molSimplify/Ligands/phenylcyc.mol +99 -0
- molSimplify/Ligands/phenylenediamine.mol +37 -0
- molSimplify/Ligands/phenylisocy.mol +32 -0
- molSimplify/Ligands/phosacidbipy.mol +66 -0
- molSimplify/Ligands/phosphine.mol +13 -0
- molSimplify/Ligands/phosphorine.mol +27 -0
- molSimplify/Ligands/phosphorustrifluoride.mol +12 -0
- molSimplify/Ligands/phthalocyanine.mol +126 -0
- molSimplify/Ligands/pme3o.mol +32 -0
- molSimplify/Ligands/porphyrin.mol +82 -0
- molSimplify/Ligands/pph3o.mol +77 -0
- molSimplify/Ligands/proline.mol +39 -0
- molSimplify/Ligands/propdiol.mol +21 -0
- molSimplify/Ligands/propylene.mol +23 -0
- molSimplify/Ligands/pyridine.mol +27 -0
- molSimplify/Ligands/pyrimidone.mol +27 -0
- molSimplify/Ligands/pyrrole.mol +24 -0
- molSimplify/Ligands/quinoxalinedithiol.mol +39 -0
- molSimplify/Ligands/s2-.mol +9 -0
- molSimplify/Ligands/salen.mol +75 -0
- molSimplify/Ligands/salphen.mol +84 -0
- molSimplify/Ligands/serine.mol +32 -0
- molSimplify/Ligands/simple_ligands.dict +14 -0
- molSimplify/Ligands/sulfacidbipy.mol +63 -0
- molSimplify/Ligands/tbucat.mol +54 -0
- molSimplify/Ligands/tbuphisocy.mol +56 -0
- molSimplify/Ligands/tbutylcyclen.mol +166 -0
- molSimplify/Ligands/tbutylisocy.mol +35 -0
- molSimplify/Ligands/tbutylthiol.mol +33 -0
- molSimplify/Ligands/tcnoet.mol +43 -0
- molSimplify/Ligands/tcnoetOH.mol +45 -0
- molSimplify/Ligands/terpy.mol +65 -0
- molSimplify/Ligands/tetrahydrofuran.mol +31 -0
- molSimplify/Ligands/thiane.mol +37 -0
- molSimplify/Ligands/thiazole.mol +21 -0
- molSimplify/Ligands/thiocyanate.mol +11 -0
- molSimplify/Ligands/thiol.mol +9 -0
- molSimplify/Ligands/thiophene.mol +23 -0
- molSimplify/Ligands/thiopyridine.mol +29 -0
- molSimplify/Ligands/threonine.mol +38 -0
- molSimplify/Ligands/tpp.mol +165 -0
- molSimplify/Ligands/tricyanomethyl.mol +19 -0
- molSimplify/Ligands/trifluoromethyl.mol +13 -0
- molSimplify/Ligands/tryptophan.mol +60 -0
- molSimplify/Ligands/tyrosine.mol +53 -0
- molSimplify/Ligands/uthiol.mol +11 -0
- molSimplify/Ligands/uthiolme2.mol +23 -0
- molSimplify/Ligands/valine.mol +42 -0
- molSimplify/Ligands/water.mol +10 -0
- molSimplify/Ligands/x.mol +6 -0
- molSimplify/Scripts/__init__.py +0 -0
- molSimplify/Scripts/addtodb.py +308 -0
- molSimplify/Scripts/cellbuilder.py +1592 -0
- molSimplify/Scripts/cellbuilder_tools.py +701 -0
- molSimplify/Scripts/chains.py +342 -0
- molSimplify/Scripts/convert_2to3.py +23 -0
- molSimplify/Scripts/dbinteract.py +631 -0
- molSimplify/Scripts/distgeom.py +617 -0
- molSimplify/Scripts/findcorrelations.py +287 -0
- molSimplify/Scripts/generator.py +267 -0
- molSimplify/Scripts/geometry.py +1224 -0
- molSimplify/Scripts/grabguivars.py +845 -0
- molSimplify/Scripts/in_b3lyp_usetc.py +141 -0
- molSimplify/Scripts/inparse.py +1673 -0
- molSimplify/Scripts/io.py +1149 -0
- molSimplify/Scripts/isomers.py +415 -0
- molSimplify/Scripts/jobgen.py +247 -0
- molSimplify/Scripts/krr_prep.py +1262 -0
- molSimplify/Scripts/molSimplify_io.py +18 -0
- molSimplify/Scripts/molden2psi4wfn.py +166 -0
- molSimplify/Scripts/namegen.py +32 -0
- molSimplify/Scripts/nn_prep.py +561 -0
- molSimplify/Scripts/oct_check_mols.py +782 -0
- molSimplify/Scripts/periodic_QE.py +97 -0
- molSimplify/Scripts/postmold.py +304 -0
- molSimplify/Scripts/postmwfn.py +709 -0
- molSimplify/Scripts/postparse.py +488 -0
- molSimplify/Scripts/postproc.py +139 -0
- molSimplify/Scripts/qcgen.py +1450 -0
- molSimplify/Scripts/rmsd.py +489 -0
- molSimplify/Scripts/rungen.py +670 -0
- molSimplify/Scripts/structgen.py +3040 -0
- molSimplify/Scripts/tf_nn_prep.py +894 -0
- molSimplify/Scripts/tsgen.py +295 -0
- molSimplify/Scripts/uq_calibration.py +69 -0
- molSimplify/__init__.py +0 -0
- molSimplify/__main__.py +197 -0
- molSimplify/icons/chemdb.png +0 -0
- molSimplify/icons/hjklogo.png +0 -0
- molSimplify/icons/icon.png +0 -0
- molSimplify/icons/logo.png +0 -0
- molSimplify/icons/logo_old.png +0 -0
- molSimplify/icons/petachem.png +0 -0
- molSimplify/icons/petachem2.png +0 -0
- molSimplify/icons/petachem_full.png +0 -0
- molSimplify/icons/pythonlogo.png +0 -0
- molSimplify/icons/sge copy.png +0 -0
- molSimplify/icons/sge.png +0 -0
- molSimplify/icons/slurm.png +0 -0
- molSimplify/icons/wft1.png +0 -0
- molSimplify/icons/wft2.png +0 -0
- molSimplify/icons/wft3.png +0 -0
- molSimplify/ml/__init__.py +0 -0
- molSimplify/ml/kernels.py +36 -0
- molSimplify/ml/layers.py +29 -0
- molSimplify/molscontrol/__init__.py +14 -0
- molSimplify/molscontrol/_version.py +521 -0
- molSimplify/molscontrol/clf_tools.py +144 -0
- molSimplify/molscontrol/data/README.md +21 -0
- molSimplify/molscontrol/data/look_and_say.dat +15 -0
- molSimplify/molscontrol/dynamic_classifier.py +514 -0
- molSimplify/molscontrol/io_tools.py +363 -0
- molSimplify/molscontrol/molscontrol.py +49 -0
- molSimplify/molscontrol/terachem/jobscript_control.sh +31 -0
- molSimplify/molscontrol/terachem/terachem_input +22 -0
- molSimplify/python_krr/X_train_TS.csv +535 -0
- molSimplify/python_krr/__init__.py +0 -0
- molSimplify/python_krr/hat2_X_mean_std.csv +3 -0
- molSimplify/python_krr/hat2_feature_names.csv +1 -0
- molSimplify/python_krr/hat2_y_mean_std.csv +2 -0
- molSimplify/python_krr/hat_X_mean_std.csv +6 -0
- molSimplify/python_krr/hat_feature_names.csv +1 -0
- molSimplify/python_krr/hat_krr_X_train.csv +5205 -0
- molSimplify/python_krr/hat_krr_dual_coef.csv +1 -0
- molSimplify/python_krr/hat_y_mean_std.csv +2 -0
- molSimplify/python_krr/sklearn_models.py +34 -0
- molSimplify/python_krr/y_train_TS.csv +535 -0
- molSimplify/python_nn/ANN.py +198 -0
- molSimplify/python_nn/__init__.py +0 -0
- molSimplify/python_nn/clf_analysis_tool.py +125 -0
- molSimplify/python_nn/dictionary_toolbox.py +49 -0
- molSimplify/python_nn/ensemble_test.py +309 -0
- molSimplify/python_nn/hs_center.csv +26 -0
- molSimplify/python_nn/hs_scale.csv +26 -0
- molSimplify/python_nn/ls_center.csv +26 -0
- molSimplify/python_nn/ls_scale.csv +26 -0
- molSimplify/python_nn/ms_hs_b1.csv +50 -0
- molSimplify/python_nn/ms_hs_b2.csv +50 -0
- molSimplify/python_nn/ms_hs_b3.csv +1 -0
- molSimplify/python_nn/ms_hs_w1.csv +50 -0
- molSimplify/python_nn/ms_hs_w2.csv +50 -0
- molSimplify/python_nn/ms_hs_w3.csv +1 -0
- molSimplify/python_nn/ms_ls_b1.csv +50 -0
- molSimplify/python_nn/ms_ls_b2.csv +50 -0
- molSimplify/python_nn/ms_ls_b3.csv +1 -0
- molSimplify/python_nn/ms_ls_w1.csv +50 -0
- molSimplify/python_nn/ms_ls_w2.csv +50 -0
- molSimplify/python_nn/ms_ls_w3.csv +1 -0
- molSimplify/python_nn/ms_slope_b1.csv +50 -0
- molSimplify/python_nn/ms_slope_b2.csv +50 -0
- molSimplify/python_nn/ms_slope_b3.csv +1 -0
- molSimplify/python_nn/ms_slope_w1.csv +50 -0
- molSimplify/python_nn/ms_slope_w2.csv +50 -0
- molSimplify/python_nn/ms_slope_w3.csv +1 -0
- molSimplify/python_nn/ms_split_b1.csv +50 -0
- molSimplify/python_nn/ms_split_b2.csv +50 -0
- molSimplify/python_nn/ms_split_b3.csv +1 -0
- molSimplify/python_nn/ms_split_w1.csv +50 -0
- molSimplify/python_nn/ms_split_w2.csv +50 -0
- molSimplify/python_nn/ms_split_w3.csv +1 -0
- molSimplify/python_nn/slope_center.csv +25 -0
- molSimplify/python_nn/slope_scale.csv +25 -0
- molSimplify/python_nn/split_center.csv +26 -0
- molSimplify/python_nn/split_scale.csv +26 -0
- molSimplify/python_nn/tf_ANN.py +762 -0
- molSimplify/python_nn/train_data.csv +1211 -0
- molSimplify/tf_nn/__init__.py +0 -0
- molSimplify/tf_nn/geo_static_clf/geo_static_clf_model.h5 +0 -0
- molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_name.csv +1591 -0
- molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_x.csv +2790 -0
- molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_y.csv +2790 -0
- molSimplify/tf_nn/geo_static_clf/geo_static_clf_vars.csv +154 -0
- molSimplify/tf_nn/geos/hs_ii_bl_x.csv +1577 -0
- molSimplify/tf_nn/geos/hs_ii_bl_y.csv +1577 -0
- molSimplify/tf_nn/geos/hs_ii_model.h5 +0 -0
- molSimplify/tf_nn/geos/hs_ii_model.json +1 -0
- molSimplify/tf_nn/geos/hs_ii_vars.csv +154 -0
- molSimplify/tf_nn/geos/hs_iii_bl_x.csv +1659 -0
- molSimplify/tf_nn/geos/hs_iii_bl_y.csv +1659 -0
- molSimplify/tf_nn/geos/hs_iii_model.h5 +0 -0
- molSimplify/tf_nn/geos/hs_iii_model.json +1 -0
- molSimplify/tf_nn/geos/hs_iii_vars.csv +154 -0
- molSimplify/tf_nn/geos/ls_ii_bl_x.csv +1374 -0
- molSimplify/tf_nn/geos/ls_ii_bl_y.csv +1374 -0
- molSimplify/tf_nn/geos/ls_ii_model.h5 +0 -0
- molSimplify/tf_nn/geos/ls_ii_model.json +1 -0
- molSimplify/tf_nn/geos/ls_ii_vars.csv +154 -0
- molSimplify/tf_nn/geos/ls_iii_bl_x.csv +1364 -0
- molSimplify/tf_nn/geos/ls_iii_bl_y.csv +1364 -0
- molSimplify/tf_nn/geos/ls_iii_model.h5 +0 -0
- molSimplify/tf_nn/geos/ls_iii_model.json +1 -0
- molSimplify/tf_nn/geos/ls_iii_vars.csv +154 -0
- molSimplify/tf_nn/homolumo/gap_model.h5 +0 -0
- molSimplify/tf_nn/homolumo/gap_model.json +1 -0
- molSimplify/tf_nn/homolumo/gap_test_names.csv +175 -0
- molSimplify/tf_nn/homolumo/gap_test_x.csv +176 -0
- molSimplify/tf_nn/homolumo/gap_test_y.csv +176 -0
- molSimplify/tf_nn/homolumo/gap_train_names.csv +699 -0
- molSimplify/tf_nn/homolumo/gap_train_x.csv +700 -0
- molSimplify/tf_nn/homolumo/gap_train_y.csv +700 -0
- molSimplify/tf_nn/homolumo/gap_vars.csv +153 -0
- molSimplify/tf_nn/homolumo/homo_model.h5 +0 -0
- molSimplify/tf_nn/homolumo/homo_model.json +126 -0
- molSimplify/tf_nn/homolumo/homo_test_names.csv +175 -0
- molSimplify/tf_nn/homolumo/homo_test_x.csv +176 -0
- molSimplify/tf_nn/homolumo/homo_test_y.csv +176 -0
- molSimplify/tf_nn/homolumo/homo_train_names.csv +699 -0
- molSimplify/tf_nn/homolumo/homo_train_x.csv +700 -0
- molSimplify/tf_nn/homolumo/homo_train_y.csv +700 -0
- molSimplify/tf_nn/homolumo/homo_vars.csv +153 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_info.json +7 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_model.h5 +0 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_model.json +1 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_test_names.csv +143 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_test_x.csv +144 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_test_y.csv +144 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_train_names.csv +513 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_train_x.csv +514 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_train_y.csv +514 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_val_names.csv +143 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_val_x.csv +58 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_val_y.csv +58 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_vars.csv +155 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_info.json +7 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_model.h5 +0 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_model.json +1 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_test_names.csv +143 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_test_x.csv +144 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_test_y.csv +144 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_train_names.csv +513 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_train_x.csv +514 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_train_y.csv +514 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_val_names.csv +143 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_val_x.csv +58 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_val_y.csv +58 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_vars.csv +154 -0
- molSimplify/tf_nn/oxocatalysis/hat_model.h5 +0 -0
- molSimplify/tf_nn/oxocatalysis/hat_model.json +1 -0
- molSimplify/tf_nn/oxocatalysis/hat_test_names.csv +419 -0
- molSimplify/tf_nn/oxocatalysis/hat_test_x.csv +420 -0
- molSimplify/tf_nn/oxocatalysis/hat_test_y.csv +420 -0
- molSimplify/tf_nn/oxocatalysis/hat_train_names.csv +1507 -0
- molSimplify/tf_nn/oxocatalysis/hat_train_x.csv +1508 -0
- molSimplify/tf_nn/oxocatalysis/hat_train_y.csv +1508 -0
- molSimplify/tf_nn/oxocatalysis/hat_val_x.csv +169 -0
- molSimplify/tf_nn/oxocatalysis/hat_val_y.csv +169 -0
- molSimplify/tf_nn/oxocatalysis/hat_vars.csv +162 -0
- molSimplify/tf_nn/oxocatalysis/oxo_model.h5 +0 -0
- molSimplify/tf_nn/oxocatalysis/oxo_model.json +1 -0
- molSimplify/tf_nn/oxocatalysis/oxo_test_names.csv +527 -0
- molSimplify/tf_nn/oxocatalysis/oxo_test_x.csv +528 -0
- molSimplify/tf_nn/oxocatalysis/oxo_test_y.csv +528 -0
- molSimplify/tf_nn/oxocatalysis/oxo_train_names.csv +1897 -0
- molSimplify/tf_nn/oxocatalysis/oxo_train_x.csv +1898 -0
- molSimplify/tf_nn/oxocatalysis/oxo_train_y.csv +1898 -0
- molSimplify/tf_nn/oxocatalysis/oxo_val_x.csv +212 -0
- molSimplify/tf_nn/oxocatalysis/oxo_val_y.csv +212 -0
- molSimplify/tf_nn/oxocatalysis/oxo_vars.csv +162 -0
- molSimplify/tf_nn/rescaling_data/gap_mean_x.csv +153 -0
- molSimplify/tf_nn/rescaling_data/gap_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/gap_var_x.csv +153 -0
- molSimplify/tf_nn/rescaling_data/gap_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/geo_static_clf_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/geo_static_clf_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/geo_static_clf_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/geo_static_clf_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/hat_mean_x.csv +162 -0
- molSimplify/tf_nn/rescaling_data/hat_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/hat_var_x.csv +162 -0
- molSimplify/tf_nn/rescaling_data/hat_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/homo_empty_mean_x.csv +155 -0
- molSimplify/tf_nn/rescaling_data/homo_empty_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/homo_empty_var_x.csv +155 -0
- molSimplify/tf_nn/rescaling_data/homo_empty_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/homo_mean_x.csv +153 -0
- molSimplify/tf_nn/rescaling_data/homo_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/homo_var_x.csv +153 -0
- molSimplify/tf_nn/rescaling_data/homo_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/hs_ii_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/hs_ii_mean_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/hs_ii_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/hs_ii_var_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/hs_iii_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/hs_iii_mean_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/hs_iii_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/hs_iii_var_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/ls_ii_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/ls_ii_mean_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/ls_ii_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/ls_ii_var_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/ls_iii_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/ls_iii_mean_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/ls_iii_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/ls_iii_var_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/oxo20_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/oxo20_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/oxo20_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/oxo20_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/oxo_mean_x.csv +162 -0
- molSimplify/tf_nn/rescaling_data/oxo_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/oxo_var_x.csv +162 -0
- molSimplify/tf_nn/rescaling_data/oxo_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/sc_static_clf_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/sc_static_clf_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/sc_static_clf_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/sc_static_clf_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/split_mean_x.csv +155 -0
- molSimplify/tf_nn/rescaling_data/split_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/split_var_x.csv +155 -0
- molSimplify/tf_nn/rescaling_data/split_var_y.csv +1 -0
- molSimplify/tf_nn/sc_static_clf/sc_static_clf_model.h5 +0 -0
- molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_name.csv +1591 -0
- molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_x.csv +1592 -0
- molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_y.csv +1592 -0
- molSimplify/tf_nn/sc_static_clf/sc_static_clf_vars.csv +154 -0
- molSimplify/tf_nn/split/split_model.h5 +0 -0
- molSimplify/tf_nn/split/split_model.json +1 -0
- molSimplify/tf_nn/split/split_vars.csv +155 -0
- molSimplify/tf_nn/split/split_x.csv +1902 -0
- molSimplify/tf_nn/split/split_y.csv +1902 -0
- molSimplify/tf_nn/split/train_names.csv +1901 -0
- molSimplify/utils/__init__.py +0 -0
- molSimplify/utils/decorators.py +16 -0
- molSimplify/utils/metaclasses.py +12 -0
- molSimplify/utils/tensorflow.py +23 -0
- molSimplify/utils/timer.py +16 -0
- molSimplify-1.7.4.dist-info/LICENSE +674 -0
- molSimplify-1.7.4.dist-info/METADATA +821 -0
- molSimplify-1.7.4.dist-info/RECORD +651 -0
- molSimplify-1.7.4.dist-info/WHEEL +5 -0
- molSimplify-1.7.4.dist-info/entry_points.txt +3 -0
- molSimplify-1.7.4.dist-info/top_level.txt +4 -0
- tests/generateTests.py +122 -0
- tests/helperFuncs.py +658 -0
- tests/informatics/test_MOF_descriptors.py +128 -0
- tests/informatics/test_active_learning.py +113 -0
- tests/informatics/test_coulomb_analyze.py +24 -0
- tests/informatics/test_graph_racs.py +193 -0
- tests/ml/test_kernels.py +20 -0
- tests/ml/test_layers.py +47 -0
- tests/runtest.py +10 -0
- tests/test_Mol2D.py +128 -0
- tests/test_basic_imports.py +62 -0
- tests/test_bidentate.py +25 -0
- tests/test_cli.py +20 -0
- tests/test_distgeom.py +106 -0
- tests/test_example_1.py +29 -0
- tests/test_example_3.py +31 -0
- tests/test_example_5.py +43 -0
- tests/test_example_7.py +28 -0
- tests/test_example_8.py +15 -0
- tests/test_example_tbp.py +15 -0
- tests/test_ff_xtb.py +111 -0
- tests/test_geocheck_oct.py +26 -0
- tests/test_geocheck_one_empty.py +15 -0
- tests/test_geometry.py +44 -0
- tests/test_inparse.py +76 -0
- tests/test_io.py +84 -0
- tests/test_jobgen.py +84 -0
- tests/test_joption_pythonic.py +27 -0
- tests/test_ligand_assign.py +58 -0
- tests/test_ligand_assign_consistent.py +60 -0
- tests/test_ligand_class.py +26 -0
- tests/test_ligand_from_mol_file.py +35 -0
- tests/test_ligands.py +86 -0
- tests/test_mol3D.py +337 -0
- tests/test_molcas_caspt2.py +15 -0
- tests/test_molcas_casscf.py +15 -0
- tests/test_old_ANNs.py +68 -0
- tests/test_orca_ccsdt.py +15 -0
- tests/test_orca_dft.py +15 -0
- tests/test_qcgen.py +50 -0
- tests/test_racs.py +124 -0
- tests/test_rmsd.py +68 -0
- tests/test_structgen_functions.py +198 -0
- tests/test_tetrahedral.py +29 -0
- tests/test_tutorial_10_part_one.py +16 -0
- tests/test_tutorial_10_part_two.py +15 -0
- tests/test_tutorial_2.py +11 -0
- tests/test_tutorial_3.py +15 -0
- tests/test_tutorial_4.py +57 -0
- tests/test_tutorial_6.py +10 -0
- tests/test_tutorial_8.py +29 -0
- tests/test_tutorial_9_part_one.py +15 -0
- tests/test_tutorial_9_part_two.py +15 -0
- tests/test_tutorial_qm9_part_one.py +6 -0
- tests/testresources/refs/racs/generate_references.py +85 -0
- workflows/NandyJACSAu2022/bridge_functionalizer.py +253 -0
- workflows/NandyJACSAu2022/frag_functionalizer.py +242 -0
- workflows/NandyJACSAu2022/fragment_classes.py +586 -0
- workflows/NandyJACSAu2022/macrocycle_synthesis.py +179 -0
|
@@ -0,0 +1,346 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import numpy as np
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import argparse
|
|
5
|
+
from sklearn.linear_model import LinearRegression
|
|
6
|
+
from sklearn.model_selection import LeaveOneOut
|
|
7
|
+
|
|
8
|
+
'''
|
|
9
|
+
This script takes in an absolute path to a CSV file that has
|
|
10
|
+
complexes labeled, as well as exchange fractions. It then
|
|
11
|
+
takes those values and determines if the behavior is linear or
|
|
12
|
+
not. If so, it calculates the sensitivity. If not, then it
|
|
13
|
+
gives a reason for not computing it and logs that reason.
|
|
14
|
+
|
|
15
|
+
The script relies on raw data with one column labeled "complex_no_HFX"
|
|
16
|
+
and another labeled "alpha". The former contains the name with the ligand
|
|
17
|
+
field. The latter contains the HFX value.
|
|
18
|
+
|
|
19
|
+
If given no arguments, the function will just measure the sensitivity
|
|
20
|
+
of the spin splitting energies with an LOOCV cutoff of 5 kcal/mol,
|
|
21
|
+
requiring at least 4 points, and prioritizing lines with R2 of 0.99.
|
|
22
|
+
'''
|
|
23
|
+
|
|
24
|
+
def measure_sensitivity(path_to_csv, path_to_write=False, prop='SSE', R2_cutoff=0.99, CV_tolerance=5, num_points=4):
|
|
25
|
+
if path_to_csv[0] != '/':
|
|
26
|
+
path_to_csv = os.getcwd()+'/'+path_to_csv
|
|
27
|
+
if path_to_write is False:
|
|
28
|
+
path_to_write = os.getcwd()+'/'+os.path.split(path_to_csv)[1].replace('.csv','')
|
|
29
|
+
raw_data = pd.read_csv(path_to_csv)
|
|
30
|
+
raw_data = raw_data.sort_values(by=['complex_no_HFX','alpha'])
|
|
31
|
+
|
|
32
|
+
### This loops over unique ligand fields. Here, we keep track of things
|
|
33
|
+
### by compiling two lists. One is data that is kept and turned into a
|
|
34
|
+
### sensitivity. The other is any point that is eliminated. We log eliminations
|
|
35
|
+
### into two categories. The first is 'whole', which means that the whole
|
|
36
|
+
### ligand field is eliminated. The second is 'point', which means a single
|
|
37
|
+
### point was removed from the data point before measuring sensitivity.
|
|
38
|
+
|
|
39
|
+
kept_dict_list = []
|
|
40
|
+
removed_dict_list = []
|
|
41
|
+
flag = False
|
|
42
|
+
for i, unique_complex in raw_data.groupby('complex_no_HFX'):
|
|
43
|
+
name_vals = unique_complex['complex'].tolist()
|
|
44
|
+
alpha_vals = np.array(unique_complex['alpha'].tolist()).reshape(-1, 1)
|
|
45
|
+
property_vals = np.array(unique_complex[str(prop)].tolist()).reshape(-1, 1)
|
|
46
|
+
R2 = None
|
|
47
|
+
#### First, we check if there are enough points. If not, we discard.
|
|
48
|
+
if int(unique_complex.shape[0]) < num_points:
|
|
49
|
+
for alpha, prop_val in zip(alpha_vals,property_vals):
|
|
50
|
+
removed_dict_list.append({'complex_no_HFX':i, 'alpha':alpha[0], str(prop):prop_val[0],'reason':'not_enough_points_to_start','elim_type':'whole','R2':R2})
|
|
51
|
+
continue
|
|
52
|
+
|
|
53
|
+
##### Next, we fit a line through the data points and check its R2 #####
|
|
54
|
+
R2, reg = measure_R2(alpha_vals,property_vals)
|
|
55
|
+
|
|
56
|
+
##### If the R2 value is above the cutoff, we keep the data and do not process further #####
|
|
57
|
+
if R2 >= R2_cutoff:
|
|
58
|
+
for alpha, prop_val in zip(alpha_vals,property_vals):
|
|
59
|
+
temp_dict = {'complex_no_HFX':i, 'alpha':alpha[0], str(prop):prop_val[0],'R2':R2,'sensitivity':float(reg.coef_)}
|
|
60
|
+
kept_dict_list.append(temp_dict)
|
|
61
|
+
continue
|
|
62
|
+
else:
|
|
63
|
+
##### Next, we check for any points lying off of the line that can fix the line by removal of that point.
|
|
64
|
+
##### This check checks to see whether the removal of a single point results in the R2 test
|
|
65
|
+
##### being passed, or whether that point exceeds a heuristic cutoff.
|
|
66
|
+
kept_points_X, kept_points_y, CV_removed_list = CV_check(alpha_vals, property_vals, name=i, prop=prop, CV_tolerance=CV_tolerance, R2_cutoff=R2_cutoff, num_points=num_points)
|
|
67
|
+
new_R2, new_reg = measure_R2(kept_points_X,kept_points_y)
|
|
68
|
+
if new_R2 >= R2_cutoff:
|
|
69
|
+
# If removal of the point leads to the R2 test being passed, we stop processing.
|
|
70
|
+
for alpha, prop_val in zip(kept_points_X,kept_points_y):
|
|
71
|
+
kept_dict_list.append({'complex_no_HFX':i, 'alpha':alpha[0], str(prop):prop_val[0],'R2':new_R2,'sensitivity':float(new_reg.coef_)})
|
|
72
|
+
removed_dict_list += CV_removed_list
|
|
73
|
+
continue
|
|
74
|
+
else:
|
|
75
|
+
previous = len(property_vals)
|
|
76
|
+
while len(kept_points_X) >= num_points:
|
|
77
|
+
now = len(kept_points_X)
|
|
78
|
+
if now == previous:
|
|
79
|
+
break
|
|
80
|
+
kept_points_X, kept_points_y, new_removed_list = CV_check(kept_points_X, kept_points_y, name=i, prop=prop, CV_tolerance=CV_tolerance, R2_cutoff=R2_cutoff, num_points=num_points)
|
|
81
|
+
previous = len(kept_points_X)
|
|
82
|
+
CV_removed_list += new_removed_list
|
|
83
|
+
|
|
84
|
+
##### Next, we make sure the removal of the point allows us to have enough points. If not, we discard.
|
|
85
|
+
if len(kept_points_X) < num_points:
|
|
86
|
+
for alpha, prop_val in zip(alpha_vals,property_vals):
|
|
87
|
+
removed_dict_list.append({'complex_no_HFX':i, 'alpha':alpha[0], str(prop):prop_val[0],'reason':'CV_resulted_in_not_enough_points','elim_type':'whole','R2':R2})
|
|
88
|
+
continue
|
|
89
|
+
else:
|
|
90
|
+
##### Next, we check the R2 again to see if the new points result in a better R2.
|
|
91
|
+
R2_repeat, reg_repeat = measure_R2(alpha_vals,property_vals)
|
|
92
|
+
if R2_repeat >= R2_cutoff:
|
|
93
|
+
for alpha, prop_val in zip(kept_points_X,kept_points_y):
|
|
94
|
+
print(alpha,prop_val)
|
|
95
|
+
kept_dict_list.append({'complex_no_HFX':i, 'alpha':alpha[0], str(prop):prop_val[0],'R2':R2_repeat,'sensitivity':float(reg_repeat.coef_)})
|
|
96
|
+
removed_dict_list += CV_removed_list
|
|
97
|
+
continue
|
|
98
|
+
else:
|
|
99
|
+
##### If it does not meet the R2 check, we check the sign of the slopes.
|
|
100
|
+
kept_points_X, kept_points_y, slope_removed = slope_sign_check(kept_points_X,kept_points_y, name=i, prop=prop, num_points=num_points)
|
|
101
|
+
if len(kept_points_X) < num_points:
|
|
102
|
+
final_R2, reg_final = measure_R2(alpha_vals,property_vals)
|
|
103
|
+
for alpha, prop_val in zip(alpha_vals,property_vals):
|
|
104
|
+
removed_dict_list.append({'complex_no_HFX':i, 'alpha':alpha[0], str(prop):prop_val[0],'reason':'failed_sign_change_slope_check','elim_type':'whole','R2':final_R2})
|
|
105
|
+
continue
|
|
106
|
+
else:
|
|
107
|
+
# If we have enough points, we check the R2, and then repeat the outlier check if the line can be saved.
|
|
108
|
+
kept_R2, kept_reg = measure_R2(kept_points_X,kept_points_y)
|
|
109
|
+
if kept_R2 >= R2_cutoff:
|
|
110
|
+
for alpha, prop_val in zip(kept_points_X,kept_points_y):
|
|
111
|
+
kept_dict_list.append({'complex_no_HFX':i, 'alpha':alpha[0], str(prop):prop_val[0],'R2':kept_R2,'sensitivity':float(kept_reg.coef_)})
|
|
112
|
+
removed_dict_list += slope_removed
|
|
113
|
+
continue
|
|
114
|
+
else:
|
|
115
|
+
kept_points_X, kept_points_y, CV_removed_list = CV_check(kept_points_X, kept_points_y, name=i, prop=prop, CV_tolerance=CV_tolerance, R2_cutoff=R2_cutoff, num_points=num_points)
|
|
116
|
+
backup_X = kept_points_X[:]
|
|
117
|
+
backup_y = kept_points_y[:]
|
|
118
|
+
previous = 10000
|
|
119
|
+
while len(kept_points_X) >= num_points:
|
|
120
|
+
now = len(kept_points_X)
|
|
121
|
+
if now == previous:
|
|
122
|
+
break
|
|
123
|
+
kept_points_X, kept_points_y, new_removed_list = CV_check(kept_points_X, kept_points_y, name=i, prop=prop, CV_tolerance=CV_tolerance, R2_cutoff=R2_cutoff, num_points=num_points)
|
|
124
|
+
previous = len(kept_points_X)
|
|
125
|
+
CV_removed_list += new_removed_list
|
|
126
|
+
if len(kept_points_X) < num_points:
|
|
127
|
+
kept_points_X = backup_X
|
|
128
|
+
kept_points_y = backup_y
|
|
129
|
+
else:
|
|
130
|
+
removed_dict_list += CV_removed_list
|
|
131
|
+
R2, reg = measure_R2(kept_points_X,kept_points_y)
|
|
132
|
+
for alpha, prop_val in zip(kept_points_X,kept_points_y):
|
|
133
|
+
temp_dict = {'complex_no_HFX':i, 'alpha':alpha[0], str(prop):prop_val[0],'R2':R2,'sensitivity':float(reg.coef_)}
|
|
134
|
+
kept_dict_list.append(temp_dict)
|
|
135
|
+
continue
|
|
136
|
+
#### Now we write all of our processed data to a dataframe.
|
|
137
|
+
kept_data = pd.DataFrame(kept_dict_list)
|
|
138
|
+
kept_data = kept_data[['complex_no_HFX','alpha',str(prop),'R2','sensitivity']]
|
|
139
|
+
kept_data['symmetry'] = kept_data['complex_no_HFX'].apply(symmetry_class)
|
|
140
|
+
kept_data = kept_data.sort_values(by=['R2','complex_no_HFX','alpha'])
|
|
141
|
+
group_dict_list = []
|
|
142
|
+
for i, group in kept_data.groupby('complex_no_HFX'):
|
|
143
|
+
namelist = i.split('_')
|
|
144
|
+
if namelist[2] != namelist[4]:
|
|
145
|
+
symmetry = 'cis'
|
|
146
|
+
elif (namelist[2]==namelist[4]) and (namelist[2]==namelist[6]) and (namelist[2]==namelist[7]):
|
|
147
|
+
symmetry = 'homoleptic'
|
|
148
|
+
elif (namelist[2]==namelist[4]) and (namelist[2]==namelist[6]) and (namelist[2]!=namelist[7]):
|
|
149
|
+
symmetry = '5+1'
|
|
150
|
+
elif (namelist[2]==namelist[4]) and (namelist[2]!=namelist[6]) and (namelist[2]!=namelist[7]) and (namelist[6]==namelist[7]):
|
|
151
|
+
symmetry = 'trans'
|
|
152
|
+
alphas = group['alpha'].tolist()
|
|
153
|
+
energies = group[str(prop)].tolist()
|
|
154
|
+
group_dict = {}
|
|
155
|
+
group_dict['complex'] = i
|
|
156
|
+
for j, val in enumerate([0, 5, 10, 15, 20, 25, 30]):
|
|
157
|
+
if val not in alphas:
|
|
158
|
+
group_dict[val] = np.nan
|
|
159
|
+
else:
|
|
160
|
+
idx = alphas.index(val)
|
|
161
|
+
group_dict[val] = energies[idx]
|
|
162
|
+
group_dict['symmetry'] = symmetry
|
|
163
|
+
group_dict['sensitivity'] = group['sensitivity'].values[0]*100
|
|
164
|
+
group_dict['R2'] = group['R2'].values[0]
|
|
165
|
+
group_dict_list.append(group_dict)
|
|
166
|
+
grouped_df = pd.DataFrame(group_dict_list)
|
|
167
|
+
grouped_df = grouped_df[['complex',0, 5, 10, 15, 20, 25, 30,'R2','sensitivity','symmetry']]
|
|
168
|
+
grouped_df.to_csv(path_to_write+'_kept_grouped.csv',index=False)
|
|
169
|
+
thrown_data = pd.DataFrame(removed_dict_list)
|
|
170
|
+
thrown_data = thrown_data[['complex_no_HFX','alpha',str(prop),'reason','elim_type','R2']]
|
|
171
|
+
thrown_data['symmetry'] = thrown_data['complex_no_HFX'].apply(symmetry_class)
|
|
172
|
+
thrown_data = thrown_data.sort_values(by=['complex_no_HFX','alpha'])
|
|
173
|
+
group_dict_list = []
|
|
174
|
+
for i, group in thrown_data.groupby('complex_no_HFX'):
|
|
175
|
+
namelist = i.split('_')
|
|
176
|
+
if namelist[2] != namelist[4]:
|
|
177
|
+
symmetry = 'cis'
|
|
178
|
+
elif (namelist[2]==namelist[4]) and (namelist[2]==namelist[6]) and (namelist[2]==namelist[7]):
|
|
179
|
+
symmetry = 'homoleptic'
|
|
180
|
+
elif (namelist[2]==namelist[4]) and (namelist[2]==namelist[6]) and (namelist[2]!=namelist[7]):
|
|
181
|
+
symmetry = '5+1'
|
|
182
|
+
elif (namelist[2]==namelist[4]) and (namelist[2]!=namelist[6]) and (namelist[2]!=namelist[7]) and (namelist[6]==namelist[7]):
|
|
183
|
+
symmetry = 'trans'
|
|
184
|
+
alphas = group['alpha'].tolist()
|
|
185
|
+
energies = group[str(prop)].tolist()
|
|
186
|
+
reasons = group['reason'].tolist()
|
|
187
|
+
group_dict = {}
|
|
188
|
+
group_dict['complex'] = i
|
|
189
|
+
for j, val in enumerate([0, 5, 10, 15, 20, 25, 30]):
|
|
190
|
+
if val not in alphas:
|
|
191
|
+
group_dict[val] = np.nan
|
|
192
|
+
group_dict[str(val)+'_elim'] = np.nan
|
|
193
|
+
else:
|
|
194
|
+
idx = alphas.index(val)
|
|
195
|
+
group_dict[val] = energies[idx]
|
|
196
|
+
group_dict[str(val)+'_elim'] = reasons[idx]
|
|
197
|
+
group_dict['symmetry'] = symmetry
|
|
198
|
+
group_dict['R2'] = group['R2'].values[0]
|
|
199
|
+
group_dict_list.append(group_dict)
|
|
200
|
+
grouped_df = pd.DataFrame(group_dict_list)
|
|
201
|
+
grouped_df = grouped_df[['complex',0, 5, 10, 15, 20, 25, 30, '0_elim','5_elim','10_elim','15_elim','20_elim','25_elim','30_elim','symmetry','R2']]
|
|
202
|
+
grouped_df.to_csv(path_to_write+'_elim_grouped.csv',index=False)
|
|
203
|
+
kept_data['combined'] = kept_data['complex_no_HFX']+'_'+kept_data['alpha'].astype(str)
|
|
204
|
+
thrown_data['combined'] = thrown_data['complex_no_HFX']+'_'+thrown_data['alpha'].astype(str)
|
|
205
|
+
#### No points that are thrown away should also be kept.
|
|
206
|
+
print('sanity check',set(kept_data['combined']).intersection(set(thrown_data['combined'])))
|
|
207
|
+
#### Report how many ligand fields there were to start with.
|
|
208
|
+
print(str(len(set(raw_data['complex_no_HFX'])))+' POSSIBLE sensitivities.')
|
|
209
|
+
#### Check how many ligand fields end up being kept.
|
|
210
|
+
print(str(len(set(kept_data['complex_no_HFX'])))+' FINAL calculated sensitivities.')
|
|
211
|
+
#### Check how many whole lines are thrown out.
|
|
212
|
+
whole = thrown_data[thrown_data['elim_type']=='whole']
|
|
213
|
+
point = thrown_data[thrown_data['elim_type']=='point']
|
|
214
|
+
print('ELIMINATED '+str(len(set(whole['complex_no_HFX'])))+' WHOLE ligand fields.')
|
|
215
|
+
print('SAVED '+str(len(set(point['complex_no_HFX'])))+' ligand fields by eliminating a point or two.')
|
|
216
|
+
#### Check how many ligand fields have something thrown out.
|
|
217
|
+
print(str(len(set(thrown_data['complex_no_HFX'])))+' ligand fields with something removed.')
|
|
218
|
+
|
|
219
|
+
#### Write all the data to CSVs.
|
|
220
|
+
kept_data.to_csv(path_to_write+'_kept.csv')
|
|
221
|
+
thrown_data.to_csv(path_to_write+'_discarded.csv')
|
|
222
|
+
|
|
223
|
+
def measure_R2(X, y):
|
|
224
|
+
reg = LinearRegression()
|
|
225
|
+
reg.fit(X, y)
|
|
226
|
+
R2 = reg.score(X, y)
|
|
227
|
+
return R2, reg
|
|
228
|
+
|
|
229
|
+
def symmetry_class(complex_name):
|
|
230
|
+
namelist = complex_name.split('_')
|
|
231
|
+
symmetry = 'unknown'
|
|
232
|
+
if namelist[2] != namelist[4]:
|
|
233
|
+
symmetry = 'cis'
|
|
234
|
+
elif (namelist[2]==namelist[4]) and (namelist[2]==namelist[6]) and (namelist[2]==namelist[7]):
|
|
235
|
+
symmetry = 'homoleptic'
|
|
236
|
+
elif (namelist[2]==namelist[4]) and (namelist[2]==namelist[6]) and (namelist[2]!=namelist[7]):
|
|
237
|
+
symmetry = '5+1'
|
|
238
|
+
elif (namelist[2]==namelist[4]) and (namelist[2]!=namelist[6]) and (namelist[2]!=namelist[7]) and (namelist[6]==namelist[7]):
|
|
239
|
+
symmetry = 'trans'
|
|
240
|
+
return symmetry
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def CV_check(X, y, name, prop, CV_tolerance, R2_cutoff, num_points):
|
|
244
|
+
loo = LeaveOneOut()
|
|
245
|
+
kept_points_X = False
|
|
246
|
+
kept_points_y = False
|
|
247
|
+
removed_dict_list = []
|
|
248
|
+
originalR2, reg = measure_R2(X, y)
|
|
249
|
+
##### Perform LOOCV on the data with cutoffs provided #####
|
|
250
|
+
for train_index, test_index in loo.split(X):
|
|
251
|
+
train_X, test_X = X[train_index], X[test_index]
|
|
252
|
+
train_y, test_y = y[train_index], y[test_index]
|
|
253
|
+
##### Fit the training data with a model and check its R2 #####
|
|
254
|
+
R2, reg = measure_R2(train_X, train_y)
|
|
255
|
+
if (R2 >= R2_cutoff) and len(train_X) >= num_points:# or (R2>originalR2):
|
|
256
|
+
##### If eliminating the single point improves the R2, keep that change.
|
|
257
|
+
kept_points_X, kept_points_y = train_X, train_y
|
|
258
|
+
removed_dict_list.append({'complex_no_HFX':name,'alpha':int(np.squeeze(test_X)), str(prop):float(np.squeeze(test_y)),'reason':'eliminating_point_led_to_R2_pass','elim_type':'point','R2':R2})
|
|
259
|
+
return kept_points_X, kept_points_y, removed_dict_list
|
|
260
|
+
else:
|
|
261
|
+
pred_error = test_y - reg.predict(test_X)
|
|
262
|
+
if (abs(pred_error)>CV_tolerance):
|
|
263
|
+
kept_points_X, kept_points_y = train_X, train_y
|
|
264
|
+
removed_dict_list.append({'complex_no_HFX':name,'alpha':int(np.squeeze(test_X)), str(prop):float(np.squeeze(test_y)),'reason':'point_had_LOOCV_greater_than_cutoff','elim_type':'point','R2':R2})
|
|
265
|
+
return kept_points_X, kept_points_y, removed_dict_list
|
|
266
|
+
if isinstance(kept_points_X, bool) or (len(kept_points_X)<num_points):
|
|
267
|
+
kept_points_X, kept_points_y = X, y
|
|
268
|
+
return kept_points_X, kept_points_y, removed_dict_list
|
|
269
|
+
|
|
270
|
+
def slope_sign_check(X, y, name, prop, num_points):
|
|
271
|
+
kept_points_X = []
|
|
272
|
+
kept_points_y = []
|
|
273
|
+
elim_points_X = []
|
|
274
|
+
elim_points_y = []
|
|
275
|
+
num_slopes = len(X)-1
|
|
276
|
+
coef_list = []
|
|
277
|
+
removed_dict_list = []
|
|
278
|
+
for i in range(num_slopes):
|
|
279
|
+
reg = LinearRegression()
|
|
280
|
+
temp_X = X[i:i+2]
|
|
281
|
+
temp_y = y[i:i+2]
|
|
282
|
+
reg.fit(temp_X,temp_y)
|
|
283
|
+
coef_list.append(float(np.squeeze(reg.coef_)))
|
|
284
|
+
neg_count = len(list(filter(lambda x: (x < 0), coef_list)))
|
|
285
|
+
pos_count = len(list(filter(lambda x: (x >= 0), coef_list)))
|
|
286
|
+
signchange = ((np.roll(np.sign(coef_list), 1) - np.sign(coef_list)) != 0).astype(int)
|
|
287
|
+
signchange[0] = 0
|
|
288
|
+
signchange_list = np.where(signchange==1)[0]/float(len(signchange))
|
|
289
|
+
split_sign = np.array_split(signchange, 2)
|
|
290
|
+
num_changes_first = np.sum(split_sign[0])
|
|
291
|
+
num_changes_second = np.sum(split_sign[1])
|
|
292
|
+
if len(signchange_list) == 0:
|
|
293
|
+
sign_flag = 0
|
|
294
|
+
else:
|
|
295
|
+
sign_flag = signchange_list[0]
|
|
296
|
+
diff_points = abs(neg_count-pos_count)
|
|
297
|
+
remove_counter = 0
|
|
298
|
+
if ((neg_count == pos_count) or (diff_points>=1 and len(X)<num_points) or
|
|
299
|
+
((len(X)-num_points-min(neg_count,pos_count)-1)<0 and (not min(neg_count,pos_count)<=1)) or ((sign_flag>0.4) and (sign_flag<0.6)) or (num_changes_first>0 and num_changes_second>0)):
|
|
300
|
+
for j, val in enumerate(elim_points_X):
|
|
301
|
+
removed_dict_list.append({'complex_no_HFX':name,'alpha':int(np.squeeze(val)), str(prop):float(np.squeeze(elim_points_y[j])),'reason':'identified_slope_sign_change','elim_type':'point'})
|
|
302
|
+
return kept_points_X, kept_points_y, removed_dict_list
|
|
303
|
+
else:
|
|
304
|
+
for i in range(len(coef_list)-1):
|
|
305
|
+
frac = float(i) / len(coef_list)
|
|
306
|
+
if np.sign(coef_list[i]) != np.sign(coef_list[i+1]):
|
|
307
|
+
if frac < 0.5:
|
|
308
|
+
kept_points_X = X[i+1:]
|
|
309
|
+
kept_points_y = y[i+1:]
|
|
310
|
+
elim_points_X = X[0:i+1]
|
|
311
|
+
elim_points_y = y[0:i+1]
|
|
312
|
+
elif frac >= 0.5:
|
|
313
|
+
kept_points_X = X[0:i+1]
|
|
314
|
+
kept_points_y = y[0:i+1]
|
|
315
|
+
elim_points_X = X[i+1:]
|
|
316
|
+
elim_points_y = y[i+1:]
|
|
317
|
+
if len(elim_points_X)>0:
|
|
318
|
+
for j, val in enumerate(elim_points_X):
|
|
319
|
+
removed_dict_list.append({'complex_no_HFX':name,'alpha':int(np.squeeze(val)), str(prop):float(np.squeeze(elim_points_y[j])),'reason':'identified_slope_sign_change_that_can_be_fixed','elim_type':'point', 'R2':np.nan})
|
|
320
|
+
if (len(elim_points_X) == 0) or isinstance(elim_points_X,bool):
|
|
321
|
+
kept_points_X = X
|
|
322
|
+
kept_points_y = y
|
|
323
|
+
return kept_points_X, kept_points_y, removed_dict_list
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def main():
|
|
327
|
+
parser = argparse.ArgumentParser(description='Script to process some sensitivity data.')
|
|
328
|
+
parser.add_argument('--data', dest='path_to_csv', action='store', type=str, required=True,
|
|
329
|
+
help='Path to CSV containing raw data.')
|
|
330
|
+
parser.add_argument('--writepath', dest='path_to_write', action='store', type=str,default=False,
|
|
331
|
+
help='Path to dump processed data. Defaults to dumping in script directory.')
|
|
332
|
+
parser.add_argument('--prop', dest='prop', action='store', type=str, default='SSE',
|
|
333
|
+
help='Name for the property as in the CSV. Should be "SSE" for spin splitting.')
|
|
334
|
+
parser.add_argument('--R2', dest='R2_cutoff', action='store', type=float, default=0.99,
|
|
335
|
+
help='R2 check cutoff value for linearity. Default is 0.99.')
|
|
336
|
+
parser.add_argument('--cutoff', dest='CV_tolerance', action='store', type=int, default=5,
|
|
337
|
+
help='Heuristic cutoff for eliminating outliers. Defaults to 5 for SSE.')
|
|
338
|
+
parser.add_argument('--num_points', dest='num_points', action='store', type=int, default=4,
|
|
339
|
+
help='Minimum number of points to form the HFX line. Defaults to 4.')
|
|
340
|
+
args = parser.parse_args()
|
|
341
|
+
print(args)
|
|
342
|
+
measure_sensitivity(args.path_to_csv, args.path_to_write, args.prop, args.R2_cutoff, args.CV_tolerance, args.num_points)
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
if __name__ == "__main__":
|
|
346
|
+
main()
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
from molSimplify.Scripts.cellbuilder_tools import import_from_cif
|
|
2
|
+
from molSimplify.Informatics.MOF.PBC_functions import (
|
|
3
|
+
compute_adj_matrix,
|
|
4
|
+
compute_distance_matrix3,
|
|
5
|
+
frac_coord,
|
|
6
|
+
fractional2cart,
|
|
7
|
+
readcif,
|
|
8
|
+
XYZ_connected,
|
|
9
|
+
write_cif,
|
|
10
|
+
)
|
|
11
|
+
from molSimplify.Informatics.MOF.MOF_functionalizer import get_linkers
|
|
12
|
+
import numpy as np
|
|
13
|
+
import os
|
|
14
|
+
|
|
15
|
+
def rotate_around_axis(axis, r, p, t):
|
|
16
|
+
"""
|
|
17
|
+
Function that rotates the point about the axis with given angle
|
|
18
|
+
# 1) Translate space so that the reference point locate at the origin (T)
|
|
19
|
+
# 2) Rotate space so that the rotation axis lies in the xz plane (R_x)
|
|
20
|
+
# 3) Rotate space so that the rotation axis lies in the z axis (R_y)
|
|
21
|
+
# 4) Rotate angle t about the z axis (R_z)
|
|
22
|
+
# 5) Rotate and translate space back to original space (T_inv, R_xinv, R_yinv)
|
|
23
|
+
|
|
24
|
+
Parameters
|
|
25
|
+
----------
|
|
26
|
+
axis : The rotation axis vector
|
|
27
|
+
r : The reference point of the axis vector
|
|
28
|
+
p : The point to rotate
|
|
29
|
+
t : Rotation angle
|
|
30
|
+
|
|
31
|
+
Returns
|
|
32
|
+
-------
|
|
33
|
+
new : new coordinates
|
|
34
|
+
"""
|
|
35
|
+
unit_axis = axis / np.linalg.norm(axis) # normalize axis vector
|
|
36
|
+
a = unit_axis[0]
|
|
37
|
+
b = unit_axis[1]
|
|
38
|
+
c = unit_axis[2]
|
|
39
|
+
d = np.sqrt(unit_axis[1]**2 + unit_axis[2]**2)
|
|
40
|
+
|
|
41
|
+
T = [[1, 0, 0, -r[0]], [0, 1, 0, -r[1]], [0, 0, 1, -r[2]], [0, 0, 0, 1]]
|
|
42
|
+
T_inv = np.linalg.inv(T)
|
|
43
|
+
old = [p[0], p[1], p[2], 1]
|
|
44
|
+
|
|
45
|
+
if d != 0:
|
|
46
|
+
R_x = [[1, 0, 0, 0], [0, c/d, -b/d, 0], [0, b/d, c/d, 0], [0, 0, 0, 1]]
|
|
47
|
+
R_xinv = np.linalg.inv(R_x)
|
|
48
|
+
R_y = [[d, 0, -a, 0], [0, 1, 0, 0], [a, 0, d, 0], [0, 0, 0, 1]]
|
|
49
|
+
R_yinv = np.linalg.inv(R_y)
|
|
50
|
+
R_z = [[np.cos(t), np.sin(t), 0, 0], [-np.sin(t), np.cos(t), 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]]
|
|
51
|
+
|
|
52
|
+
# new = T_inv * R_xinv * R_yinv * R_z * R_y * R_x * T * old
|
|
53
|
+
new = T_inv.dot(R_xinv).dot(R_yinv).dot(R_z).dot(R_y).dot(R_x).dot(T).dot(old)
|
|
54
|
+
else: # if d ==0, rotation axis is along the x axis -> no rotation along y/z axis
|
|
55
|
+
R_x = [[1, 0, 0, 0], [0, np.cos(t), np.sin(t), 0], [0, -np.sin(t), np.cos(t), 0], [0, 0, 0, 1]]
|
|
56
|
+
# new = T_inv * R_x * T * old
|
|
57
|
+
new = T_inv.dot(R_x).dot(T).dot(old)
|
|
58
|
+
return (new[0:3])
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def linker_rotation(molcif, fcoords, linker, rot_angle):
|
|
62
|
+
"""
|
|
63
|
+
Finds the rotation axis on the given linker and rotate the linker about the rotation axis.
|
|
64
|
+
Linker must be carboxylic acid linker.
|
|
65
|
+
Currently works for MOFs with Zr atom as metal atom
|
|
66
|
+
|
|
67
|
+
Parameters
|
|
68
|
+
----------
|
|
69
|
+
molcif : molSimplify.Classes.mol3D.mol3D
|
|
70
|
+
The cell of the cif file being analyzed.
|
|
71
|
+
fcoords : numpy.ndarray
|
|
72
|
+
The fractional coordinates of the atoms.
|
|
73
|
+
linker : list of numpy.int32
|
|
74
|
+
The indices of the atoms in the linker.
|
|
75
|
+
rot_angle : float
|
|
76
|
+
Desired angle of rotation.
|
|
77
|
+
|
|
78
|
+
Returns
|
|
79
|
+
-------
|
|
80
|
+
frac_new_linker : numpy.ndarray
|
|
81
|
+
fractional coordinates of new linker atoms
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
tmp_cart_coords = fractional2cart(fcoords, cell_v)
|
|
85
|
+
fcoords_connected = XYZ_connected(cell_v, tmp_cart_coords, np.array(molcif.graph))
|
|
86
|
+
cart_coords = fractional2cart(fcoords_connected, cell_v)
|
|
87
|
+
Zr_bonded_O = []
|
|
88
|
+
ZrO_bonded_C = []
|
|
89
|
+
C_axis = []
|
|
90
|
+
atom_not_to_rotate = []
|
|
91
|
+
new_linker = []
|
|
92
|
+
linker_coord_original = cart_coords[linker]
|
|
93
|
+
|
|
94
|
+
# identifying Zr coordinated O's
|
|
95
|
+
# molcif.getBondedAtomsSmart(idx) returns bonded atom id, allatomtypes[val] has bonded atom type
|
|
96
|
+
for idx in linker:
|
|
97
|
+
adj_atoms = [allatomtypes[val] for val in molcif.getBondedAtomsSmart(idx)]
|
|
98
|
+
if 'Zr' in adj_atoms:
|
|
99
|
+
atom_not_to_rotate.append(idx)
|
|
100
|
+
Zr_bonded_O.append(idx)
|
|
101
|
+
|
|
102
|
+
# Identifying stationary Carbon coordinated to Zr-O
|
|
103
|
+
for idx in linker:
|
|
104
|
+
for val in molcif.getBondedAtomsSmart(idx):
|
|
105
|
+
if val in Zr_bonded_O and idx not in atom_not_to_rotate:
|
|
106
|
+
atom_not_to_rotate.append(idx)
|
|
107
|
+
ZrO_bonded_C.append(idx)
|
|
108
|
+
break
|
|
109
|
+
|
|
110
|
+
# Identifying stationary Carbon coordinated to Zr-O-C
|
|
111
|
+
for idx in linker:
|
|
112
|
+
for val in molcif.getBondedAtomsSmart(idx):
|
|
113
|
+
if val in ZrO_bonded_C and idx not in atom_not_to_rotate:
|
|
114
|
+
atom_not_to_rotate.append(idx)
|
|
115
|
+
C_axis.append(cart_coords[idx]) # designate these Carbons as rotation axis
|
|
116
|
+
break
|
|
117
|
+
|
|
118
|
+
# Rotation axis defined by vectors between two stationary Carbons
|
|
119
|
+
rot_axis = np.array(C_axis[1] - C_axis[0])
|
|
120
|
+
|
|
121
|
+
# Obtain new linker coordinates
|
|
122
|
+
for idx in linker:
|
|
123
|
+
if idx in atom_not_to_rotate:
|
|
124
|
+
new_linker.append(cart_coords[idx])
|
|
125
|
+
else:
|
|
126
|
+
new_linker.append(rotate_around_axis(rot_axis, C_axis[0], np.ndarray.tolist(cart_coords[idx]), rot_angle))
|
|
127
|
+
|
|
128
|
+
# Change back to fractional coordinates
|
|
129
|
+
frac_new_linker = frac_coord(new_linker, cell_v)
|
|
130
|
+
|
|
131
|
+
return frac_new_linker
|
|
132
|
+
|
|
133
|
+
### End of functions ###
|
|
134
|
+
|
|
135
|
+
# Functional groups to use
|
|
136
|
+
func_group = ['Br','CF3','CH3','CN','COOH','Cl','F','I','NH2','NO2','OH','SH']
|
|
137
|
+
|
|
138
|
+
for elem in func_group:
|
|
139
|
+
cif_file=f'functionalized_UiO66_{elem}_1.cif' # Functionalized .CIF file name goes here
|
|
140
|
+
path2write = str(elem)+'/'
|
|
141
|
+
# (from pbc_functions) reads cif and returns cpar (cell parametrs: 3 cell lengths, 3 cell angles), a list all atom elements in atom index order, and fractional coordinates
|
|
142
|
+
cpar, allatomtypes, fcoords = readcif(path2write+cif_file)
|
|
143
|
+
# obtains mol3D
|
|
144
|
+
molcif,cell_vector, alpha, beta, gamma = import_from_cif(path2write+cif_file, True)
|
|
145
|
+
cell_v = np.array(cell_vector)
|
|
146
|
+
cart_coords = fractional2cart(fcoords,cell_v)
|
|
147
|
+
distance_mat = compute_distance_matrix3(cell_v,cart_coords) # distance matrix of all atoms
|
|
148
|
+
adj_matrix, _ = compute_adj_matrix(distance_mat,allatomtypes) # from distance matrix and heuristics for bond distances, obtains connectivity information in the form of adjacency matrix (graph)
|
|
149
|
+
molcif.graph = adj_matrix.todense() # dense form of adjacency matrix / graph is saved to molcif object
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
# list of linkers
|
|
153
|
+
linker_list, linker_subgraphlist = get_linkers(molcif, adj_matrix, allatomtypes)
|
|
154
|
+
# get BDC linkers
|
|
155
|
+
linker_bdc_list = []
|
|
156
|
+
for linker_num, linker in enumerate(linker_list):
|
|
157
|
+
if len(linker) < 2:
|
|
158
|
+
continue
|
|
159
|
+
else:
|
|
160
|
+
linker_bdc_list.append(linker)
|
|
161
|
+
print(cpar)
|
|
162
|
+
|
|
163
|
+
# get coordinates of BDC linkers
|
|
164
|
+
linker_coords = [fcoords[val,:] for val in linker_bdc_list]
|
|
165
|
+
|
|
166
|
+
coords_new = fcoords.copy()
|
|
167
|
+
rot_angle_degree = np.linspace(0, 360, 25) # Define rotation angles
|
|
168
|
+
rot_angle_list = rot_angle_degree/180*np.pi
|
|
169
|
+
|
|
170
|
+
# Rotation of all of the linkers
|
|
171
|
+
for i, rot_angle in enumerate(rot_angle_list):
|
|
172
|
+
for linker_num, linker in enumerate(linker_bdc_list):
|
|
173
|
+
new_linker=linker_rotation(molcif, fcoords, linker, rot_angle)
|
|
174
|
+
coords_new[linker_bdc_list[linker_num],:] = new_linker # new_linker
|
|
175
|
+
path_directory = str(path2write)+str(int(rot_angle_degree[i]))
|
|
176
|
+
if not os.path.exists(path_directory):
|
|
177
|
+
os.mkdir(path_directory)
|
|
178
|
+
write_cif(f'{path_directory}/modified_{elem}_{int(rot_angle_degree[i])}.cif', cpar, coords_new, allatomtypes)
|
|
179
|
+
print(str(elem) + " done")
|