PyPI - molSimplify - Versions diffs - 1.7.4__py3-none-any.whl - Mend

molSimplify 1.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (651) hide show

docs/source/conf.py +224 -0
molSimplify/Classes/__init__.py +6 -0
molSimplify/Classes/atom3D.py +235 -0
molSimplify/Classes/dft_obs.py +130 -0
molSimplify/Classes/globalvars.py +827 -0
molSimplify/Classes/helpers.py +161 -0
molSimplify/Classes/ligand.py +2330 -0
molSimplify/Classes/mGUI.py +2493 -0
molSimplify/Classes/mWidgets.py +438 -0
molSimplify/Classes/miniGUI.py +41 -0
molSimplify/Classes/mol2D.py +260 -0
molSimplify/Classes/mol3D.py +5846 -0
molSimplify/Classes/monomer3D.py +253 -0
molSimplify/Classes/partialcharges.py +226 -0
molSimplify/Classes/protein3D.py +1178 -0
molSimplify/Classes/rundiag.py +151 -0
molSimplify/Data/ML.dat +212 -0
molSimplify/Data/MLS_FSR_for_inter.dat +23 -0
molSimplify/Data/MLS_FSR_for_inter2.dat +23 -0
molSimplify/Data/MLS_angle_for_click.dat +8 -0
molSimplify/Data/MLS_angle_for_inter.dat +23 -0
molSimplify/Data/MLS_angle_for_inter2.dat +48 -0
molSimplify/Data/MLS_angle_for_intra.dat +10 -0
molSimplify/Data/MLS_angle_for_intra2.dat +6 -0
molSimplify/Data/MLS_angle_for_oa.dat +18 -0
molSimplify/Data/ML_FSR_for_inter.dat +112 -0
molSimplify/Data/ML_FSR_for_inter2.dat +110 -0
molSimplify/Data/ML_bond_for_cat.dat +8 -0
molSimplify/Data/ML_bond_for_click.dat +8 -0
molSimplify/Data/ML_bond_for_inter.dat +48 -0
molSimplify/Data/ML_bond_for_inter2.dat +48 -0
molSimplify/Data/ML_bond_for_intra.dat +10 -0
molSimplify/Data/ML_bond_for_intra2.dat +6 -0
molSimplify/Data/ML_bond_for_oa.dat +18 -0
molSimplify/Data/bp1.dat +21 -0
molSimplify/Data/li.dat +3 -0
molSimplify/Data/no.dat +2 -0
molSimplify/Data/oct.dat +7 -0
molSimplify/Data/pbp.dat +8 -0
molSimplify/Data/spy.dat +6 -0
molSimplify/Data/sqap.dat +9 -0
molSimplify/Data/sqp.dat +5 -0
molSimplify/Data/tbp.dat +6 -0
molSimplify/Data/tdhd.dat +9 -0
molSimplify/Data/thd.dat +5 -0
molSimplify/Data/tpl.dat +4 -0
molSimplify/Data/tpr.dat +7 -0
molSimplify/Informatics/HFXsensitivity/__init__.py +0 -0
molSimplify/Informatics/HFXsensitivity/measure_HFX_sensitivity_oxo_hat_reb_rel.py +443 -0
molSimplify/Informatics/HFXsensitivity/measure_HFX_stable.py +346 -0
molSimplify/Informatics/MOF/Linker_rotation.py +179 -0
molSimplify/Informatics/MOF/MOF_descriptors.py +1299 -0
molSimplify/Informatics/MOF/MOF_descriptors_alternate_functional.py +589 -0
molSimplify/Informatics/MOF/MOF_functionalizer.py +1648 -0
molSimplify/Informatics/MOF/PBC_functions.py +1347 -0
molSimplify/Informatics/MOF/__init__.py +0 -0
molSimplify/Informatics/MOF/atomic.py +267 -0
molSimplify/Informatics/MOF/cluster_extraction.py +388 -0
molSimplify/Informatics/MOF/fragment_MOFs_for_pormake.py +895 -0
molSimplify/Informatics/MOF/monofunctionalized_BDC/index_information.py +10 -0
molSimplify/Informatics/Mol2Parser.py +46 -0
molSimplify/Informatics/RACassemble.py +408 -0
molSimplify/Informatics/__init__.py +0 -0
molSimplify/Informatics/active_learning/__init__.py +0 -0
molSimplify/Informatics/active_learning/expected_improvement.py +269 -0
molSimplify/Informatics/autocorrelation.py +1930 -0
molSimplify/Informatics/clean_autocorrelation.py +778 -0
molSimplify/Informatics/coulomb_analyze.py +67 -0
molSimplify/Informatics/decoration_manager.py +193 -0
molSimplify/Informatics/geo_analyze.py +88 -0
molSimplify/Informatics/geometrics.py +56 -0
molSimplify/Informatics/graph_analyze.py +163 -0
molSimplify/Informatics/graph_racs.py +288 -0
molSimplify/Informatics/jupyter_vis.py +172 -0
molSimplify/Informatics/lacRACAssemble.py +2192 -0
molSimplify/Informatics/lacRACAssemble_bisdithiolenes.py +236 -0
molSimplify/Informatics/misc_descriptors.py +198 -0
molSimplify/Informatics/organic_fingerprints.py +61 -0
molSimplify/Informatics/partialcharges.py +345 -0
molSimplify/Informatics/protein/activesite.py +53 -0
molSimplify/Informatics/protein/pymol_add_hs.py +33 -0
molSimplify/Informatics/rac155_geo.py +48 -0
molSimplify/Ligands/(1_methylbenzimidazol_2_yl)pyridine.xyz +45 -0
molSimplify/Ligands/1-4-dimethyl-1-2-3-triazole.xyz +15 -0
molSimplify/Ligands/12crown4.mol +62 -0
molSimplify/Ligands/Antipyrine.mol +58 -0
molSimplify/Ligands/BPAbipy.mol +106 -0
molSimplify/Ligands/Hpyrrole.mol +26 -0
molSimplify/Ligands/N-quinolinylbutyramidate.xyz +31 -0
molSimplify/Ligands/N-quinolinylmethylmethinylacetamidate.xyz +30 -0
molSimplify/Ligands/NMe2_-1.xyz +11 -0
molSimplify/Ligands/PCy3.mol +111 -0
molSimplify/Ligands/PMe3.xyz +15 -0
molSimplify/Ligands/PPh3.mol +76 -0
molSimplify/Ligands/Propyphenazone.mol +77 -0
molSimplify/Ligands/acac.mol +33 -0
molSimplify/Ligands/acacen.mol +76 -0
molSimplify/Ligands/acetate.smi +1 -0
molSimplify/Ligands/acetate.xyz +9 -0
molSimplify/Ligands/aceticacidbipyridine.mol +70 -0
molSimplify/Ligands/acetonitrile.mol +17 -0
molSimplify/Ligands/alanine.mol +30 -0
molSimplify/Ligands/alphabetizer.py +21 -0
molSimplify/Ligands/amine.mol +11 -0
molSimplify/Ligands/ammonia.mol +12 -0
molSimplify/Ligands/arginine.mol +58 -0
molSimplify/Ligands/asparagine.mol +38 -0
molSimplify/Ligands/aspartic_acid.mol +35 -0
molSimplify/Ligands/azide.mol +11 -0
molSimplify/Ligands/benzene.mol +28 -0
molSimplify/Ligands/benzene_pi.mol +30 -0
molSimplify/Ligands/benzenedithiol.mol +30 -0
molSimplify/Ligands/benzenethiol.mol +30 -0
molSimplify/Ligands/benzylisocy.mol +38 -0
molSimplify/Ligands/bidiazine.mol +42 -0
molSimplify/Ligands/bidiazole.mol +38 -0
molSimplify/Ligands/bifuran.mol +38 -0
molSimplify/Ligands/bihydrodiazine.mol +58 -0
molSimplify/Ligands/bihydrodiazole.mol +46 -0
molSimplify/Ligands/bihydrooxazine.mol +54 -0
molSimplify/Ligands/bihydrooxazole.mol +42 -0
molSimplify/Ligands/bihydrothiazine.mol +54 -0
molSimplify/Ligands/bihydrothiazole.mol +42 -0
molSimplify/Ligands/biimidazole.mol +38 -0
molSimplify/Ligands/bioxazole.mol +34 -0
molSimplify/Ligands/bipy.mol +46 -0
molSimplify/Ligands/bipyrazine.xyz +20 -0
molSimplify/Ligands/bipyrimidine.mol +42 -0
molSimplify/Ligands/bipyrrole.mol +42 -0
molSimplify/Ligands/bisnapthyridylpyridine.mol +111 -0
molSimplify/Ligands/bithiazole.mol +34 -0
molSimplify/Ligands/bromide.mol +7 -0
molSimplify/Ligands/bromide.smi +1 -0
molSimplify/Ligands/c2.mol +9 -0
molSimplify/Ligands/caprolactone.mol +41 -0
molSimplify/Ligands/carbonyl.mol +8 -0
molSimplify/Ligands/carboxyl.mol +13 -0
molSimplify/Ligands/cat.mol +30 -0
molSimplify/Ligands/chloride.mol +7 -0
molSimplify/Ligands/chloride.smi +1 -0
molSimplify/Ligands/chloropyridine.mol +27 -0
molSimplify/Ligands/co2.mol +10 -0
molSimplify/Ligands/corrolazine.mol +72 -0
molSimplify/Ligands/cs.mol +8 -0
molSimplify/Ligands/cyanate.xyz +5 -0
molSimplify/Ligands/cyanide.mol +9 -0
molSimplify/Ligands/cyanoaceticporphyrin.mol +114 -0
molSimplify/Ligands/cyanopyridine.mol +29 -0
molSimplify/Ligands/cyclam.mol +81 -0
molSimplify/Ligands/cyclen.mol +69 -0
molSimplify/Ligands/cyclopentadienyl.mol +26 -0
molSimplify/Ligands/cysteine.mol +32 -0
molSimplify/Ligands/diaminomethyl.mol +19 -0
molSimplify/Ligands/diazine.mol +25 -0
molSimplify/Ligands/diazole.mol +23 -0
molSimplify/Ligands/dicyanamide.mol +15 -0
molSimplify/Ligands/dihydrofuran.mol +27 -0
molSimplify/Ligands/dmap.xyz +35 -0
molSimplify/Ligands/dmf.mol +28 -0
molSimplify/Ligands/dmi.mol +41 -0
molSimplify/Ligands/dmpe.mol +52 -0
molSimplify/Ligands/dpmu.mol +47 -0
molSimplify/Ligands/dppe.mol +112 -0
molSimplify/Ligands/edta.mol +69 -0
molSimplify/Ligands/en.mol +28 -0
molSimplify/Ligands/ethanethiol.mol +21 -0
molSimplify/Ligands/ethanolamine.mol +26 -0
molSimplify/Ligands/ethbipy.mol +70 -0
molSimplify/Ligands/ethyl.mol +19 -0
molSimplify/Ligands/ethylamine.mol +24 -0
molSimplify/Ligands/ethylene.mol +16 -0
molSimplify/Ligands/ethylesteracac.mol +57 -0
molSimplify/Ligands/fluoride.mol +7 -0
molSimplify/Ligands/fluoride.smi +1 -0
molSimplify/Ligands/formaldehyde.mol +12 -0
molSimplify/Ligands/formamidate.xyz +8 -0
molSimplify/Ligands/formate.xyz +6 -0
molSimplify/Ligands/furan.mol +23 -0
molSimplify/Ligands/glutamic_acid.mol +42 -0
molSimplify/Ligands/glutamine.mol +44 -0
molSimplify/Ligands/glycinate.mol +23 -0
molSimplify/Ligands/glycine.mol +24 -0
molSimplify/Ligands/h2s.mol +10 -0
molSimplify/Ligands/helium.mol +6 -0
molSimplify/Ligands/histidine.mol +45 -0
molSimplify/Ligands/hmpa.mol +62 -0
molSimplify/Ligands/hs-.mol +9 -0
molSimplify/Ligands/hydride.mol +7 -0
molSimplify/Ligands/hydrocarboxyacetylide.xyz +8 -0
molSimplify/Ligands/hydrocyanide.mol +10 -0
molSimplify/Ligands/hydrodiazine.mol +33 -0
molSimplify/Ligands/hydrodiazole.mol +27 -0
molSimplify/Ligands/hydrogensulfide.mol +10 -0
molSimplify/Ligands/hydroisocyanide.mol +11 -0
molSimplify/Ligands/hydrooxazine.mol +31 -0
molSimplify/Ligands/hydrooxazole.mol +25 -0
molSimplify/Ligands/hydrothiazine.mol +31 -0
molSimplify/Ligands/hydrothiazole.mol +25 -0
molSimplify/Ligands/hydroxyl.mol +9 -0
molSimplify/Ligands/imidazole.mol +23 -0
molSimplify/Ligands/imidazolidinone.mol +29 -0
molSimplify/Ligands/imine.mol +13 -0
molSimplify/Ligands/iminodiacetic.mol +33 -0
molSimplify/Ligands/iodide.mol +7 -0
molSimplify/Ligands/iodobenzene.xyz +14 -0
molSimplify/Ligands/isoleucine.mol +48 -0
molSimplify/Ligands/isothiocyanate.mol +11 -0
molSimplify/Ligands/leucine.mol +48 -0
molSimplify/Ligands/ligands.dict +257 -0
molSimplify/Ligands/lysine.mol +54 -0
molSimplify/Ligands/mebenzenedithiol.mol +36 -0
molSimplify/Ligands/mebim_py.xyz +29 -0
molSimplify/Ligands/mebim_pz.xyz +28 -0
molSimplify/Ligands/mebipy.mol +58 -0
molSimplify/Ligands/mecat.mol +36 -0
molSimplify/Ligands/methanal.mol +11 -0
molSimplify/Ligands/methanethiol.mol +15 -0
molSimplify/Ligands/methanol.mol +16 -0
molSimplify/Ligands/methionine.mol +44 -0
molSimplify/Ligands/methyl.mol +13 -0
molSimplify/Ligands/methylacetylide.xyz +8 -0
molSimplify/Ligands/methylamine.mol +19 -0
molSimplify/Ligands/methylazide.xyz +9 -0
molSimplify/Ligands/methylisocy.mol +17 -0
molSimplify/Ligands/methylpyridine.mol +33 -0
molSimplify/Ligands/n2.mol +8 -0
molSimplify/Ligands/n4py.xyz +51 -0
molSimplify/Ligands/nch.mol +10 -0
molSimplify/Ligands/nco-.mol +11 -0
molSimplify/Ligands/nethanolamine.mol +26 -0
molSimplify/Ligands/nitrate.mol +14 -0
molSimplify/Ligands/nitrite.mol +11 -0
molSimplify/Ligands/nitro.mol +11 -0
molSimplify/Ligands/nitrobipy.mol +54 -0
molSimplify/Ligands/nitroso.mol +8 -0
molSimplify/Ligands/nme3.mol +30 -0
molSimplify/Ligands/no-.mol +10 -0
molSimplify/Ligands/no2-.mol +11 -0
molSimplify/Ligands/noxygen.mol +8 -0
molSimplify/Ligands/ns-.mol +10 -0
molSimplify/Ligands/o-pyridylbenzene.xyz +23 -0
molSimplify/Ligands/o-pyridylphenylanion.xyz +22 -0
molSimplify/Ligands/o2-.mol +9 -0
molSimplify/Ligands/o2.xyz +4 -0
molSimplify/Ligands/och2.mol +12 -0
molSimplify/Ligands/oethanolamine.mol +26 -0
molSimplify/Ligands/ome2.mol +22 -0
molSimplify/Ligands/ooh.xyz +5 -0
molSimplify/Ligands/oxalate.mol +17 -0
molSimplify/Ligands/oxalate.smi +1 -0
molSimplify/Ligands/oxygen.mol +7 -0
molSimplify/Ligands/pentacyanocyclopentadienide.mol +36 -0
molSimplify/Ligands/ph2-.mol +11 -0
molSimplify/Ligands/ph3.mol +12 -0
molSimplify/Ligands/phen.mol +51 -0
molSimplify/Ligands/phenacac.mol +63 -0
molSimplify/Ligands/phenalalanine.mol +51 -0
molSimplify/Ligands/phendione.mol +51 -0
molSimplify/Ligands/phenphen.mol +75 -0
molSimplify/Ligands/phenylbenzoxazole.mol +54 -0
molSimplify/Ligands/phenylcyc.mol +99 -0
molSimplify/Ligands/phenylenediamine.mol +37 -0
molSimplify/Ligands/phenylisocy.mol +32 -0
molSimplify/Ligands/phosacidbipy.mol +66 -0
molSimplify/Ligands/phosphine.mol +13 -0
molSimplify/Ligands/phosphorine.mol +27 -0
molSimplify/Ligands/phosphorustrifluoride.mol +12 -0
molSimplify/Ligands/phthalocyanine.mol +126 -0
molSimplify/Ligands/pme3o.mol +32 -0
molSimplify/Ligands/porphyrin.mol +82 -0
molSimplify/Ligands/pph3o.mol +77 -0
molSimplify/Ligands/proline.mol +39 -0
molSimplify/Ligands/propdiol.mol +21 -0
molSimplify/Ligands/propylene.mol +23 -0
molSimplify/Ligands/pyridine.mol +27 -0
molSimplify/Ligands/pyrimidone.mol +27 -0
molSimplify/Ligands/pyrrole.mol +24 -0
molSimplify/Ligands/quinoxalinedithiol.mol +39 -0
molSimplify/Ligands/s2-.mol +9 -0
molSimplify/Ligands/salen.mol +75 -0
molSimplify/Ligands/salphen.mol +84 -0
molSimplify/Ligands/serine.mol +32 -0
molSimplify/Ligands/simple_ligands.dict +14 -0
molSimplify/Ligands/sulfacidbipy.mol +63 -0
molSimplify/Ligands/tbucat.mol +54 -0
molSimplify/Ligands/tbuphisocy.mol +56 -0
molSimplify/Ligands/tbutylcyclen.mol +166 -0
molSimplify/Ligands/tbutylisocy.mol +35 -0
molSimplify/Ligands/tbutylthiol.mol +33 -0
molSimplify/Ligands/tcnoet.mol +43 -0
molSimplify/Ligands/tcnoetOH.mol +45 -0
molSimplify/Ligands/terpy.mol +65 -0
molSimplify/Ligands/tetrahydrofuran.mol +31 -0
molSimplify/Ligands/thiane.mol +37 -0
molSimplify/Ligands/thiazole.mol +21 -0
molSimplify/Ligands/thiocyanate.mol +11 -0
molSimplify/Ligands/thiol.mol +9 -0
molSimplify/Ligands/thiophene.mol +23 -0
molSimplify/Ligands/thiopyridine.mol +29 -0
molSimplify/Ligands/threonine.mol +38 -0
molSimplify/Ligands/tpp.mol +165 -0
molSimplify/Ligands/tricyanomethyl.mol +19 -0
molSimplify/Ligands/trifluoromethyl.mol +13 -0
molSimplify/Ligands/tryptophan.mol +60 -0
molSimplify/Ligands/tyrosine.mol +53 -0
molSimplify/Ligands/uthiol.mol +11 -0
molSimplify/Ligands/uthiolme2.mol +23 -0
molSimplify/Ligands/valine.mol +42 -0
molSimplify/Ligands/water.mol +10 -0
molSimplify/Ligands/x.mol +6 -0
molSimplify/Scripts/__init__.py +0 -0
molSimplify/Scripts/addtodb.py +308 -0
molSimplify/Scripts/cellbuilder.py +1592 -0
molSimplify/Scripts/cellbuilder_tools.py +701 -0
molSimplify/Scripts/chains.py +342 -0
molSimplify/Scripts/convert_2to3.py +23 -0
molSimplify/Scripts/dbinteract.py +631 -0
molSimplify/Scripts/distgeom.py +617 -0
molSimplify/Scripts/findcorrelations.py +287 -0
molSimplify/Scripts/generator.py +267 -0
molSimplify/Scripts/geometry.py +1224 -0
molSimplify/Scripts/grabguivars.py +845 -0
molSimplify/Scripts/in_b3lyp_usetc.py +141 -0
molSimplify/Scripts/inparse.py +1673 -0
molSimplify/Scripts/io.py +1149 -0
molSimplify/Scripts/isomers.py +415 -0
molSimplify/Scripts/jobgen.py +247 -0
molSimplify/Scripts/krr_prep.py +1262 -0
molSimplify/Scripts/molSimplify_io.py +18 -0
molSimplify/Scripts/molden2psi4wfn.py +166 -0
molSimplify/Scripts/namegen.py +32 -0
molSimplify/Scripts/nn_prep.py +561 -0
molSimplify/Scripts/oct_check_mols.py +782 -0
molSimplify/Scripts/periodic_QE.py +97 -0
molSimplify/Scripts/postmold.py +304 -0
molSimplify/Scripts/postmwfn.py +709 -0
molSimplify/Scripts/postparse.py +488 -0
molSimplify/Scripts/postproc.py +139 -0
molSimplify/Scripts/qcgen.py +1450 -0
molSimplify/Scripts/rmsd.py +489 -0
molSimplify/Scripts/rungen.py +670 -0
molSimplify/Scripts/structgen.py +3040 -0
molSimplify/Scripts/tf_nn_prep.py +894 -0
molSimplify/Scripts/tsgen.py +295 -0
molSimplify/Scripts/uq_calibration.py +69 -0
molSimplify/__init__.py +0 -0
molSimplify/__main__.py +197 -0
molSimplify/icons/chemdb.png +0 -0
molSimplify/icons/hjklogo.png +0 -0
molSimplify/icons/icon.png +0 -0
molSimplify/icons/logo.png +0 -0
molSimplify/icons/logo_old.png +0 -0
molSimplify/icons/petachem.png +0 -0
molSimplify/icons/petachem2.png +0 -0
molSimplify/icons/petachem_full.png +0 -0
molSimplify/icons/pythonlogo.png +0 -0
molSimplify/icons/sge copy.png +0 -0
molSimplify/icons/sge.png +0 -0
molSimplify/icons/slurm.png +0 -0
molSimplify/icons/wft1.png +0 -0
molSimplify/icons/wft2.png +0 -0
molSimplify/icons/wft3.png +0 -0
molSimplify/ml/__init__.py +0 -0
molSimplify/ml/kernels.py +36 -0
molSimplify/ml/layers.py +29 -0
molSimplify/molscontrol/__init__.py +14 -0
molSimplify/molscontrol/_version.py +521 -0
molSimplify/molscontrol/clf_tools.py +144 -0
molSimplify/molscontrol/data/README.md +21 -0
molSimplify/molscontrol/data/look_and_say.dat +15 -0
molSimplify/molscontrol/dynamic_classifier.py +514 -0
molSimplify/molscontrol/io_tools.py +363 -0
molSimplify/molscontrol/molscontrol.py +49 -0
molSimplify/molscontrol/terachem/jobscript_control.sh +31 -0
molSimplify/molscontrol/terachem/terachem_input +22 -0
molSimplify/python_krr/X_train_TS.csv +535 -0
molSimplify/python_krr/__init__.py +0 -0
molSimplify/python_krr/hat2_X_mean_std.csv +3 -0
molSimplify/python_krr/hat2_feature_names.csv +1 -0
molSimplify/python_krr/hat2_y_mean_std.csv +2 -0
molSimplify/python_krr/hat_X_mean_std.csv +6 -0
molSimplify/python_krr/hat_feature_names.csv +1 -0
molSimplify/python_krr/hat_krr_X_train.csv +5205 -0
molSimplify/python_krr/hat_krr_dual_coef.csv +1 -0
molSimplify/python_krr/hat_y_mean_std.csv +2 -0
molSimplify/python_krr/sklearn_models.py +34 -0
molSimplify/python_krr/y_train_TS.csv +535 -0
molSimplify/python_nn/ANN.py +198 -0
molSimplify/python_nn/__init__.py +0 -0
molSimplify/python_nn/clf_analysis_tool.py +125 -0
molSimplify/python_nn/dictionary_toolbox.py +49 -0
molSimplify/python_nn/ensemble_test.py +309 -0
molSimplify/python_nn/hs_center.csv +26 -0
molSimplify/python_nn/hs_scale.csv +26 -0
molSimplify/python_nn/ls_center.csv +26 -0
molSimplify/python_nn/ls_scale.csv +26 -0
molSimplify/python_nn/ms_hs_b1.csv +50 -0
molSimplify/python_nn/ms_hs_b2.csv +50 -0
molSimplify/python_nn/ms_hs_b3.csv +1 -0
molSimplify/python_nn/ms_hs_w1.csv +50 -0
molSimplify/python_nn/ms_hs_w2.csv +50 -0
molSimplify/python_nn/ms_hs_w3.csv +1 -0
molSimplify/python_nn/ms_ls_b1.csv +50 -0
molSimplify/python_nn/ms_ls_b2.csv +50 -0
molSimplify/python_nn/ms_ls_b3.csv +1 -0
molSimplify/python_nn/ms_ls_w1.csv +50 -0
molSimplify/python_nn/ms_ls_w2.csv +50 -0
molSimplify/python_nn/ms_ls_w3.csv +1 -0
molSimplify/python_nn/ms_slope_b1.csv +50 -0
molSimplify/python_nn/ms_slope_b2.csv +50 -0
molSimplify/python_nn/ms_slope_b3.csv +1 -0
molSimplify/python_nn/ms_slope_w1.csv +50 -0
molSimplify/python_nn/ms_slope_w2.csv +50 -0
molSimplify/python_nn/ms_slope_w3.csv +1 -0
molSimplify/python_nn/ms_split_b1.csv +50 -0
molSimplify/python_nn/ms_split_b2.csv +50 -0
molSimplify/python_nn/ms_split_b3.csv +1 -0
molSimplify/python_nn/ms_split_w1.csv +50 -0
molSimplify/python_nn/ms_split_w2.csv +50 -0
molSimplify/python_nn/ms_split_w3.csv +1 -0
molSimplify/python_nn/slope_center.csv +25 -0
molSimplify/python_nn/slope_scale.csv +25 -0
molSimplify/python_nn/split_center.csv +26 -0
molSimplify/python_nn/split_scale.csv +26 -0
molSimplify/python_nn/tf_ANN.py +762 -0
molSimplify/python_nn/train_data.csv +1211 -0
molSimplify/tf_nn/__init__.py +0 -0
molSimplify/tf_nn/geo_static_clf/geo_static_clf_model.h5 +0 -0
molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_name.csv +1591 -0
molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_x.csv +2790 -0
molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_y.csv +2790 -0
molSimplify/tf_nn/geo_static_clf/geo_static_clf_vars.csv +154 -0
molSimplify/tf_nn/geos/hs_ii_bl_x.csv +1577 -0
molSimplify/tf_nn/geos/hs_ii_bl_y.csv +1577 -0
molSimplify/tf_nn/geos/hs_ii_model.h5 +0 -0
molSimplify/tf_nn/geos/hs_ii_model.json +1 -0
molSimplify/tf_nn/geos/hs_ii_vars.csv +154 -0
molSimplify/tf_nn/geos/hs_iii_bl_x.csv +1659 -0
molSimplify/tf_nn/geos/hs_iii_bl_y.csv +1659 -0
molSimplify/tf_nn/geos/hs_iii_model.h5 +0 -0
molSimplify/tf_nn/geos/hs_iii_model.json +1 -0
molSimplify/tf_nn/geos/hs_iii_vars.csv +154 -0
molSimplify/tf_nn/geos/ls_ii_bl_x.csv +1374 -0
molSimplify/tf_nn/geos/ls_ii_bl_y.csv +1374 -0
molSimplify/tf_nn/geos/ls_ii_model.h5 +0 -0
molSimplify/tf_nn/geos/ls_ii_model.json +1 -0
molSimplify/tf_nn/geos/ls_ii_vars.csv +154 -0
molSimplify/tf_nn/geos/ls_iii_bl_x.csv +1364 -0
molSimplify/tf_nn/geos/ls_iii_bl_y.csv +1364 -0
molSimplify/tf_nn/geos/ls_iii_model.h5 +0 -0
molSimplify/tf_nn/geos/ls_iii_model.json +1 -0
molSimplify/tf_nn/geos/ls_iii_vars.csv +154 -0
molSimplify/tf_nn/homolumo/gap_model.h5 +0 -0
molSimplify/tf_nn/homolumo/gap_model.json +1 -0
molSimplify/tf_nn/homolumo/gap_test_names.csv +175 -0
molSimplify/tf_nn/homolumo/gap_test_x.csv +176 -0
molSimplify/tf_nn/homolumo/gap_test_y.csv +176 -0
molSimplify/tf_nn/homolumo/gap_train_names.csv +699 -0
molSimplify/tf_nn/homolumo/gap_train_x.csv +700 -0
molSimplify/tf_nn/homolumo/gap_train_y.csv +700 -0
molSimplify/tf_nn/homolumo/gap_vars.csv +153 -0
molSimplify/tf_nn/homolumo/homo_model.h5 +0 -0
molSimplify/tf_nn/homolumo/homo_model.json +126 -0
molSimplify/tf_nn/homolumo/homo_test_names.csv +175 -0
molSimplify/tf_nn/homolumo/homo_test_x.csv +176 -0
molSimplify/tf_nn/homolumo/homo_test_y.csv +176 -0
molSimplify/tf_nn/homolumo/homo_train_names.csv +699 -0
molSimplify/tf_nn/homolumo/homo_train_x.csv +700 -0
molSimplify/tf_nn/homolumo/homo_train_y.csv +700 -0
molSimplify/tf_nn/homolumo/homo_vars.csv +153 -0
molSimplify/tf_nn/oxoandhomo/homo_empty_info.json +7 -0
molSimplify/tf_nn/oxoandhomo/homo_empty_model.h5 +0 -0
molSimplify/tf_nn/oxoandhomo/homo_empty_model.json +1 -0
molSimplify/tf_nn/oxoandhomo/homo_empty_test_names.csv +143 -0
molSimplify/tf_nn/oxoandhomo/homo_empty_test_x.csv +144 -0
molSimplify/tf_nn/oxoandhomo/homo_empty_test_y.csv +144 -0
molSimplify/tf_nn/oxoandhomo/homo_empty_train_names.csv +513 -0
molSimplify/tf_nn/oxoandhomo/homo_empty_train_x.csv +514 -0
molSimplify/tf_nn/oxoandhomo/homo_empty_train_y.csv +514 -0
molSimplify/tf_nn/oxoandhomo/homo_empty_val_names.csv +143 -0
molSimplify/tf_nn/oxoandhomo/homo_empty_val_x.csv +58 -0
molSimplify/tf_nn/oxoandhomo/homo_empty_val_y.csv +58 -0
molSimplify/tf_nn/oxoandhomo/homo_empty_vars.csv +155 -0
molSimplify/tf_nn/oxoandhomo/oxo20_info.json +7 -0
molSimplify/tf_nn/oxoandhomo/oxo20_model.h5 +0 -0
molSimplify/tf_nn/oxoandhomo/oxo20_model.json +1 -0
molSimplify/tf_nn/oxoandhomo/oxo20_test_names.csv +143 -0
molSimplify/tf_nn/oxoandhomo/oxo20_test_x.csv +144 -0
molSimplify/tf_nn/oxoandhomo/oxo20_test_y.csv +144 -0
molSimplify/tf_nn/oxoandhomo/oxo20_train_names.csv +513 -0
molSimplify/tf_nn/oxoandhomo/oxo20_train_x.csv +514 -0
molSimplify/tf_nn/oxoandhomo/oxo20_train_y.csv +514 -0
molSimplify/tf_nn/oxoandhomo/oxo20_val_names.csv +143 -0
molSimplify/tf_nn/oxoandhomo/oxo20_val_x.csv +58 -0
molSimplify/tf_nn/oxoandhomo/oxo20_val_y.csv +58 -0
molSimplify/tf_nn/oxoandhomo/oxo20_vars.csv +154 -0
molSimplify/tf_nn/oxocatalysis/hat_model.h5 +0 -0
molSimplify/tf_nn/oxocatalysis/hat_model.json +1 -0
molSimplify/tf_nn/oxocatalysis/hat_test_names.csv +419 -0
molSimplify/tf_nn/oxocatalysis/hat_test_x.csv +420 -0
molSimplify/tf_nn/oxocatalysis/hat_test_y.csv +420 -0
molSimplify/tf_nn/oxocatalysis/hat_train_names.csv +1507 -0
molSimplify/tf_nn/oxocatalysis/hat_train_x.csv +1508 -0
molSimplify/tf_nn/oxocatalysis/hat_train_y.csv +1508 -0
molSimplify/tf_nn/oxocatalysis/hat_val_x.csv +169 -0
molSimplify/tf_nn/oxocatalysis/hat_val_y.csv +169 -0
molSimplify/tf_nn/oxocatalysis/hat_vars.csv +162 -0
molSimplify/tf_nn/oxocatalysis/oxo_model.h5 +0 -0
molSimplify/tf_nn/oxocatalysis/oxo_model.json +1 -0
molSimplify/tf_nn/oxocatalysis/oxo_test_names.csv +527 -0
molSimplify/tf_nn/oxocatalysis/oxo_test_x.csv +528 -0
molSimplify/tf_nn/oxocatalysis/oxo_test_y.csv +528 -0
molSimplify/tf_nn/oxocatalysis/oxo_train_names.csv +1897 -0
molSimplify/tf_nn/oxocatalysis/oxo_train_x.csv +1898 -0
molSimplify/tf_nn/oxocatalysis/oxo_train_y.csv +1898 -0
molSimplify/tf_nn/oxocatalysis/oxo_val_x.csv +212 -0
molSimplify/tf_nn/oxocatalysis/oxo_val_y.csv +212 -0
molSimplify/tf_nn/oxocatalysis/oxo_vars.csv +162 -0
molSimplify/tf_nn/rescaling_data/gap_mean_x.csv +153 -0
molSimplify/tf_nn/rescaling_data/gap_mean_y.csv +1 -0
molSimplify/tf_nn/rescaling_data/gap_var_x.csv +153 -0
molSimplify/tf_nn/rescaling_data/gap_var_y.csv +1 -0
molSimplify/tf_nn/rescaling_data/geo_static_clf_mean_x.csv +154 -0
molSimplify/tf_nn/rescaling_data/geo_static_clf_mean_y.csv +1 -0
molSimplify/tf_nn/rescaling_data/geo_static_clf_var_x.csv +154 -0
molSimplify/tf_nn/rescaling_data/geo_static_clf_var_y.csv +1 -0
molSimplify/tf_nn/rescaling_data/hat_mean_x.csv +162 -0
molSimplify/tf_nn/rescaling_data/hat_mean_y.csv +1 -0
molSimplify/tf_nn/rescaling_data/hat_var_x.csv +162 -0
molSimplify/tf_nn/rescaling_data/hat_var_y.csv +1 -0
molSimplify/tf_nn/rescaling_data/homo_empty_mean_x.csv +155 -0
molSimplify/tf_nn/rescaling_data/homo_empty_mean_y.csv +1 -0
molSimplify/tf_nn/rescaling_data/homo_empty_var_x.csv +155 -0
molSimplify/tf_nn/rescaling_data/homo_empty_var_y.csv +1 -0
molSimplify/tf_nn/rescaling_data/homo_mean_x.csv +153 -0
molSimplify/tf_nn/rescaling_data/homo_mean_y.csv +1 -0
molSimplify/tf_nn/rescaling_data/homo_var_x.csv +153 -0
molSimplify/tf_nn/rescaling_data/homo_var_y.csv +1 -0
molSimplify/tf_nn/rescaling_data/hs_ii_mean_x.csv +154 -0
molSimplify/tf_nn/rescaling_data/hs_ii_mean_y.csv +3 -0
molSimplify/tf_nn/rescaling_data/hs_ii_var_x.csv +154 -0
molSimplify/tf_nn/rescaling_data/hs_ii_var_y.csv +3 -0
molSimplify/tf_nn/rescaling_data/hs_iii_mean_x.csv +154 -0
molSimplify/tf_nn/rescaling_data/hs_iii_mean_y.csv +3 -0
molSimplify/tf_nn/rescaling_data/hs_iii_var_x.csv +154 -0
molSimplify/tf_nn/rescaling_data/hs_iii_var_y.csv +3 -0
molSimplify/tf_nn/rescaling_data/ls_ii_mean_x.csv +154 -0
molSimplify/tf_nn/rescaling_data/ls_ii_mean_y.csv +3 -0
molSimplify/tf_nn/rescaling_data/ls_ii_var_x.csv +154 -0
molSimplify/tf_nn/rescaling_data/ls_ii_var_y.csv +3 -0
molSimplify/tf_nn/rescaling_data/ls_iii_mean_x.csv +154 -0
molSimplify/tf_nn/rescaling_data/ls_iii_mean_y.csv +3 -0
molSimplify/tf_nn/rescaling_data/ls_iii_var_x.csv +154 -0
molSimplify/tf_nn/rescaling_data/ls_iii_var_y.csv +3 -0
molSimplify/tf_nn/rescaling_data/oxo20_mean_x.csv +154 -0
molSimplify/tf_nn/rescaling_data/oxo20_mean_y.csv +1 -0
molSimplify/tf_nn/rescaling_data/oxo20_var_x.csv +154 -0
molSimplify/tf_nn/rescaling_data/oxo20_var_y.csv +1 -0
molSimplify/tf_nn/rescaling_data/oxo_mean_x.csv +162 -0
molSimplify/tf_nn/rescaling_data/oxo_mean_y.csv +1 -0
molSimplify/tf_nn/rescaling_data/oxo_var_x.csv +162 -0
molSimplify/tf_nn/rescaling_data/oxo_var_y.csv +1 -0
molSimplify/tf_nn/rescaling_data/sc_static_clf_mean_x.csv +154 -0
molSimplify/tf_nn/rescaling_data/sc_static_clf_mean_y.csv +1 -0
molSimplify/tf_nn/rescaling_data/sc_static_clf_var_x.csv +154 -0
molSimplify/tf_nn/rescaling_data/sc_static_clf_var_y.csv +1 -0
molSimplify/tf_nn/rescaling_data/split_mean_x.csv +155 -0
molSimplify/tf_nn/rescaling_data/split_mean_y.csv +1 -0
molSimplify/tf_nn/rescaling_data/split_var_x.csv +155 -0
molSimplify/tf_nn/rescaling_data/split_var_y.csv +1 -0
molSimplify/tf_nn/sc_static_clf/sc_static_clf_model.h5 +0 -0
molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_name.csv +1591 -0
molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_x.csv +1592 -0
molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_y.csv +1592 -0
molSimplify/tf_nn/sc_static_clf/sc_static_clf_vars.csv +154 -0
molSimplify/tf_nn/split/split_model.h5 +0 -0
molSimplify/tf_nn/split/split_model.json +1 -0
molSimplify/tf_nn/split/split_vars.csv +155 -0
molSimplify/tf_nn/split/split_x.csv +1902 -0
molSimplify/tf_nn/split/split_y.csv +1902 -0
molSimplify/tf_nn/split/train_names.csv +1901 -0
molSimplify/utils/__init__.py +0 -0
molSimplify/utils/decorators.py +16 -0
molSimplify/utils/metaclasses.py +12 -0
molSimplify/utils/tensorflow.py +23 -0
molSimplify/utils/timer.py +16 -0
molSimplify-1.7.4.dist-info/LICENSE +674 -0
molSimplify-1.7.4.dist-info/METADATA +821 -0
molSimplify-1.7.4.dist-info/RECORD +651 -0
molSimplify-1.7.4.dist-info/WHEEL +5 -0
molSimplify-1.7.4.dist-info/entry_points.txt +3 -0
molSimplify-1.7.4.dist-info/top_level.txt +4 -0
tests/generateTests.py +122 -0
tests/helperFuncs.py +658 -0
tests/informatics/test_MOF_descriptors.py +128 -0
tests/informatics/test_active_learning.py +113 -0
tests/informatics/test_coulomb_analyze.py +24 -0
tests/informatics/test_graph_racs.py +193 -0
tests/ml/test_kernels.py +20 -0
tests/ml/test_layers.py +47 -0
tests/runtest.py +10 -0
tests/test_Mol2D.py +128 -0
tests/test_basic_imports.py +62 -0
tests/test_bidentate.py +25 -0
tests/test_cli.py +20 -0
tests/test_distgeom.py +106 -0
tests/test_example_1.py +29 -0
tests/test_example_3.py +31 -0
tests/test_example_5.py +43 -0
tests/test_example_7.py +28 -0
tests/test_example_8.py +15 -0
tests/test_example_tbp.py +15 -0
tests/test_ff_xtb.py +111 -0
tests/test_geocheck_oct.py +26 -0
tests/test_geocheck_one_empty.py +15 -0
tests/test_geometry.py +44 -0
tests/test_inparse.py +76 -0
tests/test_io.py +84 -0
tests/test_jobgen.py +84 -0
tests/test_joption_pythonic.py +27 -0
tests/test_ligand_assign.py +58 -0
tests/test_ligand_assign_consistent.py +60 -0
tests/test_ligand_class.py +26 -0
tests/test_ligand_from_mol_file.py +35 -0
tests/test_ligands.py +86 -0
tests/test_mol3D.py +337 -0
tests/test_molcas_caspt2.py +15 -0
tests/test_molcas_casscf.py +15 -0
tests/test_old_ANNs.py +68 -0
tests/test_orca_ccsdt.py +15 -0
tests/test_orca_dft.py +15 -0
tests/test_qcgen.py +50 -0
tests/test_racs.py +124 -0
tests/test_rmsd.py +68 -0
tests/test_structgen_functions.py +198 -0
tests/test_tetrahedral.py +29 -0
tests/test_tutorial_10_part_one.py +16 -0
tests/test_tutorial_10_part_two.py +15 -0
tests/test_tutorial_2.py +11 -0
tests/test_tutorial_3.py +15 -0
tests/test_tutorial_4.py +57 -0
tests/test_tutorial_6.py +10 -0
tests/test_tutorial_8.py +29 -0
tests/test_tutorial_9_part_one.py +15 -0
tests/test_tutorial_9_part_two.py +15 -0
tests/test_tutorial_qm9_part_one.py +6 -0
tests/testresources/refs/racs/generate_references.py +85 -0
workflows/NandyJACSAu2022/bridge_functionalizer.py +253 -0
workflows/NandyJACSAu2022/frag_functionalizer.py +242 -0
workflows/NandyJACSAu2022/fragment_classes.py +586 -0
workflows/NandyJACSAu2022/macrocycle_synthesis.py +179 -0

molSimplify/Informatics/HFXsensitivity/measure_HFX_sensitivity_oxo_hat_reb_rel.py ADDED Viewed

@@ -0,0 +1,443 @@
+import os
+import numpy as np
+import pandas as pd
+import argparse
+from sklearn.linear_model import LinearRegression
+from sklearn.model_selection import LeaveOneOut
+'''
+This script takes in an absolute path to a CSV file that has
+complexes labeled, as well as exchange fractions. It then
+takes those values and determines if the behavior is linear or
+not. If the behavior is linear, it calculates the sensitivity. If not,
+then it gives a reason for not computing it and logs that reason.
+The script relies on raw data with one column labeled "complex_no_HFX"
+and another labeled "alpha". The former contains the name with the ligand
+field. The latter contains the HFX value.
+If given no arguments, the function will just measure the sensitivity
+of the spin splitting energies with an LOOCV cutoff of 5 kcal/mol,
+requiring at least 4 points, and prioritizing lines with R2 of 0.99.
+'''
+def measure_sensitivity(path_to_csv, path_to_write=False, R2_cutoff=0.99, CV_tolerance=5, num_points=4):
+    if path_to_csv[0] != '/':
+        path_to_csv = os.getcwd()+'/'+path_to_csv
+    if path_to_write is False:
+        path_to_write = os.getcwd()+'/'+os.path.split(path_to_csv)[1].replace('.csv', '')
+    raw_data = pd.read_csv(path_to_csv)
+    raw_data = raw_data.sort_values(by=['name'])
+    energy_columns = [val for val in raw_data.columns.values if 'oxo' in val or 'hat' in val or 'reb' in val or 'rel' in val]
+    raw_data[energy_columns] = raw_data[energy_columns].astype(float)
+    # ## This loops over unique ligand fields. Here, we keep track of things
+    # ## by compiling two lists. One is data that is kept and turned into a
+    # ## sensitivity. The other is any point that is eliminated. We log eliminations
+    # ## into two categories. The first is 'whole', which means that the whole
+    # ## ligand field is eliminated. The second is 'point', which means a single
+    # ## point was removed from the data point before measuring sensitivity.
+    for rxn_energy in ['oxo', 'hat', 'reb', 'rel']:
+        kept_dict_list = []
+        removed_dict_list = []
+        flag = False
+        for i, row in raw_data.iterrows():
+            kept_alpha_values = []
+            kept_rxn_energies = []
+            for alpha_val in [0, 5, 10, 15, 20, 25, 30]:
+                if not np.isnan(row[rxn_energy+'_'+str(alpha_val)]):
+                    kept_alpha_values.append(alpha_val)
+                    kept_rxn_energies.append(row[rxn_energy+'_'+str(alpha_val)])
+            R2 = None
+            kept_alpha_values = np.squeeze(np.array(kept_alpha_values)).reshape(-1, 1)
+            kept_rxn_energies = np.squeeze(np.array(kept_rxn_energies)).reshape(-1, 1)
+            print(len(kept_alpha_values), kept_alpha_values)
+            # ### First, we check if there are enough points. If not, we discard.
+            if len(kept_alpha_values) < num_points:
+                for alpha, prop_val in zip(kept_alpha_values, kept_rxn_energies):
+                    print(kept_alpha_values, kept_rxn_energies)
+                    removed_dict_list.append({'name': row['name'],
+                                              'alpha': alpha[0],
+                                              str(rxn_energy): prop_val[0],
+                                              'reason': 'not_enough_points_to_start',
+                                              'elim_type': 'whole', 'R2': R2})
+                continue
+            # #### Next, we fit a line through the data points and check its R2 #####
+            R2, reg = measure_R2(kept_alpha_values.reshape(-1, 1), kept_rxn_energies.reshape(-1, 1))
+            print('===R2', R2)
+            # #### If the R2 value is above the cutoff, we keep the data and do not process further #####
+            if R2 >= R2_cutoff:
+                for alpha, prop_val in zip(kept_alpha_values, kept_rxn_energies):
+                    print(alpha, prop_val)
+                    temp_dict = {'name': row['name'],
+                                 'alpha': alpha[0],
+                                 str(rxn_energy): prop_val[0],
+                                 'R2': R2,
+                                 'sensitivity': float(reg.coef_)}
+                    kept_dict_list.append(temp_dict)
+                continue
+            else:
+                # #### Next, we check for any points lying off of the line that can fix the line by removal of that point.
+                # #### This check checks to see whether the removal of a single point results in the R2 test
+                # #### being passed, or whether that point exceeds a heuristic cutoff.
+                kept_points_X, kept_points_y, R2_removed_list, new_R2, new_reg = R2_upon_elimination(kept_alpha_values, kept_rxn_energies, name=row['name'], prop=rxn_energy, R2_cutoff=R2_cutoff, num_points=num_points)
+                if new_R2 >= R2_cutoff:
+                    # If removal of the point leads to the R2 test being passed, we stop processing.
+                    for alpha, prop_val in zip(kept_points_X, kept_points_y):
+                        kept_dict_list.append({'name': row['name'],
+                                               'alpha': alpha[0],
+                                               str(rxn_energy): prop_val[0],
+                                               'R2': new_R2,
+                                               'sensitivity': float(new_reg.coef_)})
+                    removed_dict_list += R2_removed_list
+                    continue
+                kept_points_X, kept_points_y, CV_removed_list = CV_check(kept_alpha_values, kept_rxn_energies, name=row['name'], prop=rxn_energy, CV_tolerance=CV_tolerance, num_points=num_points)
+                previous = len(kept_rxn_energies)
+                while len(kept_points_X) >= num_points:
+                    now = len(kept_points_X)
+                    if now == previous:
+                        break
+                    kept_points_X, kept_points_y, R2_removed_list, new_R2, new_reg = R2_upon_elimination(kept_points_X, kept_points_y, name=row['name'], prop=rxn_energy, R2_cutoff=R2_cutoff, num_points=num_points)
+                    if new_R2 >= R2_cutoff:
+                        break
+                    print("Before:", kept_points_X, kept_points_y)
+                    kept_points_X, kept_points_y, new_removed_list = CV_check(kept_points_X, kept_points_y, name=row['name'], prop=rxn_energy, CV_tolerance=CV_tolerance, num_points=num_points)
+                    print("After:", kept_points_X, kept_points_y)
+                    previous = len(kept_points_X)
+                    CV_removed_list += new_removed_list
+                    CV_removed_list += R2_removed_list
+                # #### Next, we make sure the removal of the point allows us to have enough points. If not, we discard.
+                if len(kept_points_X) < num_points:
+                    for alpha, prop_val in zip(kept_alpha_values, kept_rxn_energies):
+                        removed_dict_list.append({'name': row['name'],
+                                                  'alpha': alpha[0],
+                                                  str(rxn_energy): prop_val[0],
+                                                  'reason': 'CV_resulted_in_not_enough_points',
+                                                  'elim_type': 'whole',
+                                                  'R2': R2})
+                    continue
+                else:
+                    # #### Next, we check the R2 again to see if the new points result in a better R2.
+                    if new_R2 >= R2_cutoff:
+                        for alpha, prop_val in zip(kept_points_X, kept_points_y):
+                            print(alpha, prop_val)
+                            kept_dict_list.append({'name': row['name'],
+                                                   'alpha': alpha[0],
+                                                   str(rxn_energy): prop_val[0],
+                                                   'R2': new_R2,
+                                                   'sensitivity': float(new_reg.coef_)})
+                        removed_dict_list += CV_removed_list
+                        continue
+                    else:
+                        if rxn_energy == 'oxo' or rxn_energy == 'hat' or rxn_energy == 'reb':
+                            # #### If it does not meet the R2 check, we check the sign of the slopes.
+                            kept_points_X, kept_points_y, slope_removed = slope_sign_check(kept_points_X, kept_points_y, name=row['name'], prop=rxn_energy, num_points=num_points)
+                            if len(kept_points_X) < num_points:
+                                final_R2, reg_final = measure_R2(kept_alpha_values.reshape(-1, 1), kept_rxn_energies.reshape(-1, 1))
+                                for alpha, prop_val in zip(kept_alpha_values, kept_rxn_energies):
+                                    removed_dict_list.append({'name': row['name'],
+                                                              'alpha': alpha[0],
+                                                              str(rxn_energy): prop_val[0],
+                                                              'reason': 'failed_sign_change_slope_check',
+                                                              'elim_type': 'whole',
+                                                              'R2': final_R2})
+                                continue
+                            else:
+                                # If we have enough points, we check the R2, and then repeat the outlier check if the line can be saved.
+                                kept_R2, kept_reg = measure_R2(kept_points_X.reshape(-1, 1), kept_points_y.reshape(-1, 1))
+                                if kept_R2 >= R2_cutoff:
+                                    for alpha, prop_val in zip(kept_points_X, kept_points_y):
+                                        kept_dict_list.append({'name': row['name'],
+                                                               'alpha': alpha[0],
+                                                               str(rxn_energy): prop_val[0],
+                                                               'R2': kept_R2,
+                                                               'sensitivity': float(kept_reg.coef_)})
+                                    removed_dict_list += slope_removed
+                                    continue
+                                else:
+                                    kept_points_X, kept_points_y, R2_removed_list, new_R2, new_reg = R2_upon_elimination(kept_points_X, kept_points_y, name=row['name'], prop=rxn_energy, R2_cutoff=R2_cutoff, num_points=num_points)
+                                    if new_R2 >= R2_cutoff:
+                                        # If removal of the point leads to the R2 test being passed, we stop processing.
+                                        for alpha, prop_val in zip(kept_points_X, kept_points_y):
+                                            kept_dict_list.append({'name': row['name'],
+                                                                   'alpha': alpha[0],
+                                                                   str(rxn_energy): prop_val[0],
+                                                                   'R2': new_R2,
+                                                                   'sensitivity': float(new_reg.coef_)})
+                                        removed_dict_list += R2_removed_list
+                                        continue
+                                    kept_points_X, kept_points_y, CV_removed_list = CV_check(kept_points_X, kept_points_y, name=row['name'], prop=rxn_energy, CV_tolerance=CV_tolerance, num_points=num_points)
+                                    backup_X = kept_points_X[:]
+                                    backup_y = kept_points_y[:]
+                                    previous = 10000
+                                    while len(kept_points_X) >= num_points:
+                                        now = len(kept_points_X)
+                                        if now == previous:
+                                            break
+                                        kept_points_X, kept_points_y, R2_removed_list, new_R2, new_reg = R2_upon_elimination(kept_points_X, kept_points_y, name=row['name'], prop=rxn_energy, R2_cutoff=R2_cutoff, num_points=num_points)
+                                        if new_R2 >= R2_cutoff:
+                                            break
+                                        kept_points_X, kept_points_y, new_removed_list = CV_check(kept_points_X, kept_points_y, name=row['name'], prop=rxn_energy, CV_tolerance=CV_tolerance, num_points=num_points)
+                                        previous = len(kept_points_X)
+                                        CV_removed_list += new_removed_list
+                                        CV_removed_list += R2_removed_list
+                                    if len(kept_points_X) < num_points:
+                                        kept_points_X = backup_X
+                                        kept_points_y = backup_y
+                                    else:
+                                        removed_dict_list += CV_removed_list
+                                    R2, reg = measure_R2(kept_points_X.reshape(-1, 1), kept_points_y.reshape(-1, 1))
+                                    for alpha, prop_val in zip(kept_points_X, kept_points_y):
+                                        temp_dict = {'name': row['name'],
+                                                     'alpha': alpha[0],
+                                                     str(rxn_energy): prop_val[0],
+                                                     'R2': R2,
+                                                     'sensitivity': float(reg.coef_)}
+                                        kept_dict_list.append(temp_dict)
+                                    continue
+                        elif rxn_energy == 'rel':  # For release step, we do not do a slope-sign check since the senstivities are centered around zero
+                            # #### If it does not meet the R2 check, we keep the data anyway.
+                            R2, reg = measure_R2(kept_points_X.reshape(-1, 1), kept_points_y.reshape(-1, 1))
+                            for alpha, prop_val in zip(kept_points_X, kept_points_y):
+                                temp_dict = {'name': row['name'],
+                                             'alpha': alpha[0],
+                                             str(rxn_energy): prop_val[0],
+                                             'R2': R2,
+                                             'sensitivity': float(reg.coef_)}
+                                kept_dict_list.append(temp_dict)
+                            continue
+        # ### Now we write all of our processed data to a dataframe.
+        kept_data = pd.DataFrame(kept_dict_list)
+        print(kept_data)
+        kept_data = kept_data[['name', 'alpha', str(rxn_energy), 'R2', 'sensitivity']]
+        kept_data = kept_data.sort_values(by=['R2', 'name', 'alpha'])
+        group_dict_list = []
+        for i, group in kept_data.groupby('name'):
+            alphas = group['alpha'].tolist()
+            energies = group[str(rxn_energy)].tolist()
+            group_dict = {}
+            group_dict['complex'] = i
+            for j, val in enumerate([0, 5, 10, 15, 20, 25, 30]):
+                if val not in alphas:
+                    group_dict[val] = np.nan
+                else:
+                    idx = alphas.index(val)
+                    group_dict[val] = energies[idx]
+            group_dict['sensitivity'] = group['sensitivity'].values[0]*100
+            group_dict['R2'] = group['R2'].values[0]
+            group_dict_list.append(group_dict)
+        grouped_df = pd.DataFrame(group_dict_list)
+        grouped_df = grouped_df[['complex', 0, 5, 10, 15, 20, 25, 30, 'R2', 'sensitivity']]
+        grouped_df.to_csv(rxn_energy+'/kept_grouped_'+str(rxn_energy)+'.csv', index=False)
+        thrown_data = pd.DataFrame(removed_dict_list)
+        thrown_data = thrown_data[['name', 'alpha', str(rxn_energy), 'reason', 'elim_type', 'R2']]
+        thrown_data = thrown_data.sort_values(by=['name', 'alpha'])
+        group_dict_list = []
+        for i, group in thrown_data.groupby('name'):
+            alphas = group['alpha'].tolist()
+            energies = group[str(rxn_energy)].tolist()
+            reasons = group['reason'].tolist()
+            group_dict = {}
+            group_dict['complex'] = i
+            for j, val in enumerate([0, 5, 10, 15, 20, 25, 30]):
+                if val not in alphas:
+                    group_dict[val] = np.nan
+                    group_dict[str(val)+'_elim'] = np.nan
+                else:
+                    idx = alphas.index(val)
+                    group_dict[val] = energies[idx]
+                    group_dict[str(val)+'_elim'] = reasons[idx]
+            group_dict['R2'] = group['R2'].values[0]
+            group_dict_list.append(group_dict)
+        grouped_df = pd.DataFrame(group_dict_list)
+        grouped_df = grouped_df[['complex', 0, 5, 10, 15, 20, 25, 30, '0_elim', '5_elim', '10_elim', '15_elim', '20_elim', '25_elim', '30_elim', 'R2']]
+        grouped_df.to_csv(rxn_energy+'/'+'elim_grouped_'+str(rxn_energy)+'.csv', index=False)
+        kept_data['combined'] = kept_data['name']+'_'+kept_data['alpha'].astype(str)
+        print(thrown_data[['name', 'alpha']])
+        thrown_data['combined'] = thrown_data['name']+'_'+thrown_data['alpha'].astype(str)
+        # ### No points that are thrown away should also be kept.
+        print('sanity check', set(kept_data['combined']).intersection(set(thrown_data['combined'])))
+        # ### Report how many ligand fields there were to start with.
+        print(str(len(set(raw_data['name'])))+' POSSIBLE sensitivities.')
+        # ### Check how many ligand fields end up being kept.
+        print(str(len(set(kept_data['name'])))+' FINAL calculated sensitivities.')
+        # ### Check how many whole lines are thrown out.
+        whole = thrown_data[thrown_data['elim_type'] == 'whole']
+        point = thrown_data[thrown_data['elim_type'] == 'point']
+        print('ELIMINATED '+str(len(set(whole['name'])))+' WHOLE ligand fields.')
+        print('SAVED '+str(len(set(point['name'])))+' ligand fields by eliminating a point or two.')
+        # ### Check how many ligand fields have something thrown out.
+        print(str(len(set(thrown_data['name'])))+' ligand fields with something removed.')
+        # ### Write all the data to CSVs.
+        kept_data.to_csv(rxn_energy+'/'+(rxn_energy)+'_kept.csv')
+        thrown_data.to_csv(rxn_energy+'/'+(rxn_energy)+'_discarded.csv')
+def measure_R2(X, y):
+    reg = LinearRegression()
+    reg.fit(X, y)
+    R2 = reg.score(X, y)
+    return R2, reg
+def CV_check(X, y, name, prop, CV_tolerance, num_points):
+    loo = LeaveOneOut()
+    kept_points_X = False
+    kept_points_y = False
+    removed_dict_list = []
+    # #### Perform LOOCV on the data with cutoffs provided #####
+    print(X, y, "X,y received")
+    for train_index, test_index in loo.split(X):
+        print("Train and test indices: " + str(train_index) + "," + str(test_index))
+        train_X, test_X = X[train_index], X[test_index]
+        train_y, test_y = y[train_index], y[test_index]
+        # #### Fit the training data with a model and check its R2 #####
+        R2, reg = measure_R2(train_X.reshape(-1, 1), train_y.reshape(-1, 1))  # R2 here is to only report back but not being used for a decision
+        pred_error = test_y - reg.predict(test_X.reshape(-1, 1))
+        if (abs(pred_error) > CV_tolerance):
+            print("greater than CV cutoff")
+            kept_points_X, kept_points_y = train_X, train_y
+            removed_dict_list.append({'name': name,
+                                      'alpha': int(np.squeeze(test_X)),
+                                      str(prop): float(np.squeeze(test_y)),
+                                      'reason': 'point_had_LOOCV_greater_than_cutoff',
+                                      'elim_type': 'point',
+                                      'R2': R2})
+            return kept_points_X, kept_points_y, removed_dict_list
+        if isinstance(kept_points_X, bool) or (len(kept_points_X) < num_points):
+            kept_points_X, kept_points_y = X, y
+    return kept_points_X, kept_points_y, removed_dict_list
+def R2_upon_elimination(X, y, name, prop, R2_cutoff, num_points):
+    loo = LeaveOneOut()
+    kept_points_X = False
+    kept_points_y = False
+    removed_dict_list = []
+    originalR2, originalreg = measure_R2(X.reshape(-1, 1), y.reshape(-1, 1))
+    # #### Perform LOOCV on the data with cutoffs provided #####
+    print(X, y, "X,y received")
+    for train_index, test_index in loo.split(X):
+        print("Train and test indices: " + str(train_index) + "," + str(test_index))
+        train_X, test_X = X[train_index], X[test_index]
+        train_y, test_y = y[train_index], y[test_index]
+        # #### Fit the training data with a model and check its R2 #####
+        R2, reg = measure_R2(train_X.reshape(-1, 1), train_y.reshape(-1, 1))
+        print(R2, train_X, train_y)
+        if (R2 >= R2_cutoff) and len(train_X) >= num_points:  # or (R2>originalR2):
+            # #### If eliminating the single point improves the R2, keep that change.
+            print("In the 'if' statement")
+            kept_points_X, kept_points_y = train_X, train_y
+            try:
+                name = int(name)
+                flag = True
+            except:
+                flag = False
+            if flag:
+                print(name)
+            removed_dict_list.append({'name': name,
+                                      'alpha': int(np.squeeze(test_X)),
+                                      str(prop): float(np.squeeze(test_y)),
+                                      'reason': 'eliminating_point_led_to_R2_pass',
+                                      'elim_type': 'point',
+                                      'R2': R2})
+            return kept_points_X, kept_points_y, removed_dict_list, R2, reg
+        if isinstance(kept_points_X, bool) or (len(kept_points_X) < num_points):
+            kept_points_X, kept_points_y = X, y
+    return kept_points_X, kept_points_y, removed_dict_list, originalR2, originalreg
+def slope_sign_check(X, y, name, prop, num_points):
+    kept_points_X = []
+    kept_points_y = []
+    elim_points_X = []
+    elim_points_y = []
+    num_slopes = len(X)-1
+    coef_list = []
+    removed_dict_list = []
+    for i in range(num_slopes):
+        reg = LinearRegression()
+        temp_X = X[i:i+2]
+        temp_y = y[i:i+2]
+        reg.fit(temp_X.reshape(-1, 1), temp_y.reshape(-1, 1))
+        coef_list.append(float(np.squeeze(reg.coef_)))
+    neg_count = len(list(filter(lambda x: (x < 0), coef_list)))
+    pos_count = len(list(filter(lambda x: (x >= 0), coef_list)))
+    signchange = ((np.roll(np.sign(coef_list), 1) - np.sign(coef_list)) != 0).astype(int)
+    signchange[0] = 0
+    signchange_list = np.where(signchange == 1)[0]/float(len(signchange))
+    split_sign = np.array_split(signchange, 2)
+    num_changes_first = np.sum(split_sign[0])
+    num_changes_second = np.sum(split_sign[1])
+    if len(signchange_list) == 0:
+        sign_flag = 0
+    else:
+        sign_flag = signchange_list[0]
+    diff_points = abs(neg_count-pos_count)
+    remove_counter = 0
+    if ((neg_count == pos_count) or (diff_points >= 1 and len(X) < num_points) or
+            ((len(X)-num_points-min(neg_count, pos_count)-1) < 0 and (not min(neg_count, pos_count) <= 1)) or ((sign_flag > 0.4) and (sign_flag < 0.6)) or (num_changes_first > 0 and num_changes_second > 0)):
+        for j, val in enumerate(elim_points_X):
+            removed_dict_list.append({'name': name,
+                                      'alpha': int(np.squeeze(val)),
+                                      str(prop): float(np.squeeze(elim_points_y[j])),
+                                      'reason': 'identified_slope_sign_change',
+                                      'elim_type': 'point'})
+        return kept_points_X, kept_points_y, removed_dict_list
+    else:
+        for i in range(len(coef_list)-1):
+            frac = float(i) / len(coef_list)
+            if np.sign(coef_list[i]) != np.sign(coef_list[i+1]):
+                if frac < 0.5:
+                    kept_points_X = X[i+1:]
+                    kept_points_y = y[i+1:]
+                    elim_points_X = X[0:i+1]
+                    elim_points_y = y[0:i+1]
+                elif frac >= 0.5:
+                    kept_points_X = X[0:i+1]
+                    kept_points_y = y[0:i+1]
+                    elim_points_X = X[i+1:]
+                    elim_points_y = y[i+1:]
+        if len(elim_points_X) > 0:
+            for j, val in enumerate(elim_points_X):
+                removed_dict_list.append({'name': name,
+                                          'alpha': int(np.squeeze(val)),
+                                          str(prop): float(np.squeeze(elim_points_y[j])),
+                                          'reason': 'identified_slope_sign_change_that_can_be_fixed',
+                                          'elim_type': 'point',
+                                          'R2': np.nan})
+    if (len(elim_points_X) == 0) or isinstance(elim_points_X, bool):
+        kept_points_X = X
+        kept_points_y = y
+    return kept_points_X, kept_points_y, removed_dict_list
+def main():
+    parser = argparse.ArgumentParser(description='Script to process some sensitivity data.')
+    parser.add_argument('--data', dest='path_to_csv', action='store', type=str, required=True,
+                        help='Path to CSV containing raw data.')
+    parser.add_argument('--writepath', dest='path_to_write', action='store', type=str, default=False,
+                        help='Path to dump processed data. Defaults to dumping in script directory.')
+    parser.add_argument('--R2', dest='R2_cutoff', action='store', type=float, default=0.99,
+                        help='R2 check cutoff value for linearity. Default is 0.99.')
+    parser.add_argument('--cutoff', dest='CV_tolerance', action='store', type=int, default=5,
+                        help='Heuristic cutoff for eliminating outliers. Defaults to 5 for SSE.')
+    parser.add_argument('--num_points', dest='num_points', action='store', type=int, default=4,
+                        help='Minimum number of points to form the HFX line. Defaults to 4.')
+    args = parser.parse_args()
+    print(args)
+    measure_sensitivity(args.path_to_csv, args.path_to_write, args.R2_cutoff, args.CV_tolerance, args.num_points)
+if __name__ == "__main__":
+    main()