molSimplify 1.7.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docs/source/conf.py +224 -0
- molSimplify/Classes/__init__.py +6 -0
- molSimplify/Classes/atom3D.py +235 -0
- molSimplify/Classes/dft_obs.py +130 -0
- molSimplify/Classes/globalvars.py +827 -0
- molSimplify/Classes/helpers.py +161 -0
- molSimplify/Classes/ligand.py +2330 -0
- molSimplify/Classes/mGUI.py +2493 -0
- molSimplify/Classes/mWidgets.py +438 -0
- molSimplify/Classes/miniGUI.py +41 -0
- molSimplify/Classes/mol2D.py +260 -0
- molSimplify/Classes/mol3D.py +5846 -0
- molSimplify/Classes/monomer3D.py +253 -0
- molSimplify/Classes/partialcharges.py +226 -0
- molSimplify/Classes/protein3D.py +1178 -0
- molSimplify/Classes/rundiag.py +151 -0
- molSimplify/Data/ML.dat +212 -0
- molSimplify/Data/MLS_FSR_for_inter.dat +23 -0
- molSimplify/Data/MLS_FSR_for_inter2.dat +23 -0
- molSimplify/Data/MLS_angle_for_click.dat +8 -0
- molSimplify/Data/MLS_angle_for_inter.dat +23 -0
- molSimplify/Data/MLS_angle_for_inter2.dat +48 -0
- molSimplify/Data/MLS_angle_for_intra.dat +10 -0
- molSimplify/Data/MLS_angle_for_intra2.dat +6 -0
- molSimplify/Data/MLS_angle_for_oa.dat +18 -0
- molSimplify/Data/ML_FSR_for_inter.dat +112 -0
- molSimplify/Data/ML_FSR_for_inter2.dat +110 -0
- molSimplify/Data/ML_bond_for_cat.dat +8 -0
- molSimplify/Data/ML_bond_for_click.dat +8 -0
- molSimplify/Data/ML_bond_for_inter.dat +48 -0
- molSimplify/Data/ML_bond_for_inter2.dat +48 -0
- molSimplify/Data/ML_bond_for_intra.dat +10 -0
- molSimplify/Data/ML_bond_for_intra2.dat +6 -0
- molSimplify/Data/ML_bond_for_oa.dat +18 -0
- molSimplify/Data/bp1.dat +21 -0
- molSimplify/Data/li.dat +3 -0
- molSimplify/Data/no.dat +2 -0
- molSimplify/Data/oct.dat +7 -0
- molSimplify/Data/pbp.dat +8 -0
- molSimplify/Data/spy.dat +6 -0
- molSimplify/Data/sqap.dat +9 -0
- molSimplify/Data/sqp.dat +5 -0
- molSimplify/Data/tbp.dat +6 -0
- molSimplify/Data/tdhd.dat +9 -0
- molSimplify/Data/thd.dat +5 -0
- molSimplify/Data/tpl.dat +4 -0
- molSimplify/Data/tpr.dat +7 -0
- molSimplify/Informatics/HFXsensitivity/__init__.py +0 -0
- molSimplify/Informatics/HFXsensitivity/measure_HFX_sensitivity_oxo_hat_reb_rel.py +443 -0
- molSimplify/Informatics/HFXsensitivity/measure_HFX_stable.py +346 -0
- molSimplify/Informatics/MOF/Linker_rotation.py +179 -0
- molSimplify/Informatics/MOF/MOF_descriptors.py +1299 -0
- molSimplify/Informatics/MOF/MOF_descriptors_alternate_functional.py +589 -0
- molSimplify/Informatics/MOF/MOF_functionalizer.py +1648 -0
- molSimplify/Informatics/MOF/PBC_functions.py +1347 -0
- molSimplify/Informatics/MOF/__init__.py +0 -0
- molSimplify/Informatics/MOF/atomic.py +267 -0
- molSimplify/Informatics/MOF/cluster_extraction.py +388 -0
- molSimplify/Informatics/MOF/fragment_MOFs_for_pormake.py +895 -0
- molSimplify/Informatics/MOF/monofunctionalized_BDC/index_information.py +10 -0
- molSimplify/Informatics/Mol2Parser.py +46 -0
- molSimplify/Informatics/RACassemble.py +408 -0
- molSimplify/Informatics/__init__.py +0 -0
- molSimplify/Informatics/active_learning/__init__.py +0 -0
- molSimplify/Informatics/active_learning/expected_improvement.py +269 -0
- molSimplify/Informatics/autocorrelation.py +1930 -0
- molSimplify/Informatics/clean_autocorrelation.py +778 -0
- molSimplify/Informatics/coulomb_analyze.py +67 -0
- molSimplify/Informatics/decoration_manager.py +193 -0
- molSimplify/Informatics/geo_analyze.py +88 -0
- molSimplify/Informatics/geometrics.py +56 -0
- molSimplify/Informatics/graph_analyze.py +163 -0
- molSimplify/Informatics/graph_racs.py +288 -0
- molSimplify/Informatics/jupyter_vis.py +172 -0
- molSimplify/Informatics/lacRACAssemble.py +2192 -0
- molSimplify/Informatics/lacRACAssemble_bisdithiolenes.py +236 -0
- molSimplify/Informatics/misc_descriptors.py +198 -0
- molSimplify/Informatics/organic_fingerprints.py +61 -0
- molSimplify/Informatics/partialcharges.py +345 -0
- molSimplify/Informatics/protein/activesite.py +53 -0
- molSimplify/Informatics/protein/pymol_add_hs.py +33 -0
- molSimplify/Informatics/rac155_geo.py +48 -0
- molSimplify/Ligands/(1_methylbenzimidazol_2_yl)pyridine.xyz +45 -0
- molSimplify/Ligands/1-4-dimethyl-1-2-3-triazole.xyz +15 -0
- molSimplify/Ligands/12crown4.mol +62 -0
- molSimplify/Ligands/Antipyrine.mol +58 -0
- molSimplify/Ligands/BPAbipy.mol +106 -0
- molSimplify/Ligands/Hpyrrole.mol +26 -0
- molSimplify/Ligands/N-quinolinylbutyramidate.xyz +31 -0
- molSimplify/Ligands/N-quinolinylmethylmethinylacetamidate.xyz +30 -0
- molSimplify/Ligands/NMe2_-1.xyz +11 -0
- molSimplify/Ligands/PCy3.mol +111 -0
- molSimplify/Ligands/PMe3.xyz +15 -0
- molSimplify/Ligands/PPh3.mol +76 -0
- molSimplify/Ligands/Propyphenazone.mol +77 -0
- molSimplify/Ligands/acac.mol +33 -0
- molSimplify/Ligands/acacen.mol +76 -0
- molSimplify/Ligands/acetate.smi +1 -0
- molSimplify/Ligands/acetate.xyz +9 -0
- molSimplify/Ligands/aceticacidbipyridine.mol +70 -0
- molSimplify/Ligands/acetonitrile.mol +17 -0
- molSimplify/Ligands/alanine.mol +30 -0
- molSimplify/Ligands/alphabetizer.py +21 -0
- molSimplify/Ligands/amine.mol +11 -0
- molSimplify/Ligands/ammonia.mol +12 -0
- molSimplify/Ligands/arginine.mol +58 -0
- molSimplify/Ligands/asparagine.mol +38 -0
- molSimplify/Ligands/aspartic_acid.mol +35 -0
- molSimplify/Ligands/azide.mol +11 -0
- molSimplify/Ligands/benzene.mol +28 -0
- molSimplify/Ligands/benzene_pi.mol +30 -0
- molSimplify/Ligands/benzenedithiol.mol +30 -0
- molSimplify/Ligands/benzenethiol.mol +30 -0
- molSimplify/Ligands/benzylisocy.mol +38 -0
- molSimplify/Ligands/bidiazine.mol +42 -0
- molSimplify/Ligands/bidiazole.mol +38 -0
- molSimplify/Ligands/bifuran.mol +38 -0
- molSimplify/Ligands/bihydrodiazine.mol +58 -0
- molSimplify/Ligands/bihydrodiazole.mol +46 -0
- molSimplify/Ligands/bihydrooxazine.mol +54 -0
- molSimplify/Ligands/bihydrooxazole.mol +42 -0
- molSimplify/Ligands/bihydrothiazine.mol +54 -0
- molSimplify/Ligands/bihydrothiazole.mol +42 -0
- molSimplify/Ligands/biimidazole.mol +38 -0
- molSimplify/Ligands/bioxazole.mol +34 -0
- molSimplify/Ligands/bipy.mol +46 -0
- molSimplify/Ligands/bipyrazine.xyz +20 -0
- molSimplify/Ligands/bipyrimidine.mol +42 -0
- molSimplify/Ligands/bipyrrole.mol +42 -0
- molSimplify/Ligands/bisnapthyridylpyridine.mol +111 -0
- molSimplify/Ligands/bithiazole.mol +34 -0
- molSimplify/Ligands/bromide.mol +7 -0
- molSimplify/Ligands/bromide.smi +1 -0
- molSimplify/Ligands/c2.mol +9 -0
- molSimplify/Ligands/caprolactone.mol +41 -0
- molSimplify/Ligands/carbonyl.mol +8 -0
- molSimplify/Ligands/carboxyl.mol +13 -0
- molSimplify/Ligands/cat.mol +30 -0
- molSimplify/Ligands/chloride.mol +7 -0
- molSimplify/Ligands/chloride.smi +1 -0
- molSimplify/Ligands/chloropyridine.mol +27 -0
- molSimplify/Ligands/co2.mol +10 -0
- molSimplify/Ligands/corrolazine.mol +72 -0
- molSimplify/Ligands/cs.mol +8 -0
- molSimplify/Ligands/cyanate.xyz +5 -0
- molSimplify/Ligands/cyanide.mol +9 -0
- molSimplify/Ligands/cyanoaceticporphyrin.mol +114 -0
- molSimplify/Ligands/cyanopyridine.mol +29 -0
- molSimplify/Ligands/cyclam.mol +81 -0
- molSimplify/Ligands/cyclen.mol +69 -0
- molSimplify/Ligands/cyclopentadienyl.mol +26 -0
- molSimplify/Ligands/cysteine.mol +32 -0
- molSimplify/Ligands/diaminomethyl.mol +19 -0
- molSimplify/Ligands/diazine.mol +25 -0
- molSimplify/Ligands/diazole.mol +23 -0
- molSimplify/Ligands/dicyanamide.mol +15 -0
- molSimplify/Ligands/dihydrofuran.mol +27 -0
- molSimplify/Ligands/dmap.xyz +35 -0
- molSimplify/Ligands/dmf.mol +28 -0
- molSimplify/Ligands/dmi.mol +41 -0
- molSimplify/Ligands/dmpe.mol +52 -0
- molSimplify/Ligands/dpmu.mol +47 -0
- molSimplify/Ligands/dppe.mol +112 -0
- molSimplify/Ligands/edta.mol +69 -0
- molSimplify/Ligands/en.mol +28 -0
- molSimplify/Ligands/ethanethiol.mol +21 -0
- molSimplify/Ligands/ethanolamine.mol +26 -0
- molSimplify/Ligands/ethbipy.mol +70 -0
- molSimplify/Ligands/ethyl.mol +19 -0
- molSimplify/Ligands/ethylamine.mol +24 -0
- molSimplify/Ligands/ethylene.mol +16 -0
- molSimplify/Ligands/ethylesteracac.mol +57 -0
- molSimplify/Ligands/fluoride.mol +7 -0
- molSimplify/Ligands/fluoride.smi +1 -0
- molSimplify/Ligands/formaldehyde.mol +12 -0
- molSimplify/Ligands/formamidate.xyz +8 -0
- molSimplify/Ligands/formate.xyz +6 -0
- molSimplify/Ligands/furan.mol +23 -0
- molSimplify/Ligands/glutamic_acid.mol +42 -0
- molSimplify/Ligands/glutamine.mol +44 -0
- molSimplify/Ligands/glycinate.mol +23 -0
- molSimplify/Ligands/glycine.mol +24 -0
- molSimplify/Ligands/h2s.mol +10 -0
- molSimplify/Ligands/helium.mol +6 -0
- molSimplify/Ligands/histidine.mol +45 -0
- molSimplify/Ligands/hmpa.mol +62 -0
- molSimplify/Ligands/hs-.mol +9 -0
- molSimplify/Ligands/hydride.mol +7 -0
- molSimplify/Ligands/hydrocarboxyacetylide.xyz +8 -0
- molSimplify/Ligands/hydrocyanide.mol +10 -0
- molSimplify/Ligands/hydrodiazine.mol +33 -0
- molSimplify/Ligands/hydrodiazole.mol +27 -0
- molSimplify/Ligands/hydrogensulfide.mol +10 -0
- molSimplify/Ligands/hydroisocyanide.mol +11 -0
- molSimplify/Ligands/hydrooxazine.mol +31 -0
- molSimplify/Ligands/hydrooxazole.mol +25 -0
- molSimplify/Ligands/hydrothiazine.mol +31 -0
- molSimplify/Ligands/hydrothiazole.mol +25 -0
- molSimplify/Ligands/hydroxyl.mol +9 -0
- molSimplify/Ligands/imidazole.mol +23 -0
- molSimplify/Ligands/imidazolidinone.mol +29 -0
- molSimplify/Ligands/imine.mol +13 -0
- molSimplify/Ligands/iminodiacetic.mol +33 -0
- molSimplify/Ligands/iodide.mol +7 -0
- molSimplify/Ligands/iodobenzene.xyz +14 -0
- molSimplify/Ligands/isoleucine.mol +48 -0
- molSimplify/Ligands/isothiocyanate.mol +11 -0
- molSimplify/Ligands/leucine.mol +48 -0
- molSimplify/Ligands/ligands.dict +257 -0
- molSimplify/Ligands/lysine.mol +54 -0
- molSimplify/Ligands/mebenzenedithiol.mol +36 -0
- molSimplify/Ligands/mebim_py.xyz +29 -0
- molSimplify/Ligands/mebim_pz.xyz +28 -0
- molSimplify/Ligands/mebipy.mol +58 -0
- molSimplify/Ligands/mecat.mol +36 -0
- molSimplify/Ligands/methanal.mol +11 -0
- molSimplify/Ligands/methanethiol.mol +15 -0
- molSimplify/Ligands/methanol.mol +16 -0
- molSimplify/Ligands/methionine.mol +44 -0
- molSimplify/Ligands/methyl.mol +13 -0
- molSimplify/Ligands/methylacetylide.xyz +8 -0
- molSimplify/Ligands/methylamine.mol +19 -0
- molSimplify/Ligands/methylazide.xyz +9 -0
- molSimplify/Ligands/methylisocy.mol +17 -0
- molSimplify/Ligands/methylpyridine.mol +33 -0
- molSimplify/Ligands/n2.mol +8 -0
- molSimplify/Ligands/n4py.xyz +51 -0
- molSimplify/Ligands/nch.mol +10 -0
- molSimplify/Ligands/nco-.mol +11 -0
- molSimplify/Ligands/nethanolamine.mol +26 -0
- molSimplify/Ligands/nitrate.mol +14 -0
- molSimplify/Ligands/nitrite.mol +11 -0
- molSimplify/Ligands/nitro.mol +11 -0
- molSimplify/Ligands/nitrobipy.mol +54 -0
- molSimplify/Ligands/nitroso.mol +8 -0
- molSimplify/Ligands/nme3.mol +30 -0
- molSimplify/Ligands/no-.mol +10 -0
- molSimplify/Ligands/no2-.mol +11 -0
- molSimplify/Ligands/noxygen.mol +8 -0
- molSimplify/Ligands/ns-.mol +10 -0
- molSimplify/Ligands/o-pyridylbenzene.xyz +23 -0
- molSimplify/Ligands/o-pyridylphenylanion.xyz +22 -0
- molSimplify/Ligands/o2-.mol +9 -0
- molSimplify/Ligands/o2.xyz +4 -0
- molSimplify/Ligands/och2.mol +12 -0
- molSimplify/Ligands/oethanolamine.mol +26 -0
- molSimplify/Ligands/ome2.mol +22 -0
- molSimplify/Ligands/ooh.xyz +5 -0
- molSimplify/Ligands/oxalate.mol +17 -0
- molSimplify/Ligands/oxalate.smi +1 -0
- molSimplify/Ligands/oxygen.mol +7 -0
- molSimplify/Ligands/pentacyanocyclopentadienide.mol +36 -0
- molSimplify/Ligands/ph2-.mol +11 -0
- molSimplify/Ligands/ph3.mol +12 -0
- molSimplify/Ligands/phen.mol +51 -0
- molSimplify/Ligands/phenacac.mol +63 -0
- molSimplify/Ligands/phenalalanine.mol +51 -0
- molSimplify/Ligands/phendione.mol +51 -0
- molSimplify/Ligands/phenphen.mol +75 -0
- molSimplify/Ligands/phenylbenzoxazole.mol +54 -0
- molSimplify/Ligands/phenylcyc.mol +99 -0
- molSimplify/Ligands/phenylenediamine.mol +37 -0
- molSimplify/Ligands/phenylisocy.mol +32 -0
- molSimplify/Ligands/phosacidbipy.mol +66 -0
- molSimplify/Ligands/phosphine.mol +13 -0
- molSimplify/Ligands/phosphorine.mol +27 -0
- molSimplify/Ligands/phosphorustrifluoride.mol +12 -0
- molSimplify/Ligands/phthalocyanine.mol +126 -0
- molSimplify/Ligands/pme3o.mol +32 -0
- molSimplify/Ligands/porphyrin.mol +82 -0
- molSimplify/Ligands/pph3o.mol +77 -0
- molSimplify/Ligands/proline.mol +39 -0
- molSimplify/Ligands/propdiol.mol +21 -0
- molSimplify/Ligands/propylene.mol +23 -0
- molSimplify/Ligands/pyridine.mol +27 -0
- molSimplify/Ligands/pyrimidone.mol +27 -0
- molSimplify/Ligands/pyrrole.mol +24 -0
- molSimplify/Ligands/quinoxalinedithiol.mol +39 -0
- molSimplify/Ligands/s2-.mol +9 -0
- molSimplify/Ligands/salen.mol +75 -0
- molSimplify/Ligands/salphen.mol +84 -0
- molSimplify/Ligands/serine.mol +32 -0
- molSimplify/Ligands/simple_ligands.dict +14 -0
- molSimplify/Ligands/sulfacidbipy.mol +63 -0
- molSimplify/Ligands/tbucat.mol +54 -0
- molSimplify/Ligands/tbuphisocy.mol +56 -0
- molSimplify/Ligands/tbutylcyclen.mol +166 -0
- molSimplify/Ligands/tbutylisocy.mol +35 -0
- molSimplify/Ligands/tbutylthiol.mol +33 -0
- molSimplify/Ligands/tcnoet.mol +43 -0
- molSimplify/Ligands/tcnoetOH.mol +45 -0
- molSimplify/Ligands/terpy.mol +65 -0
- molSimplify/Ligands/tetrahydrofuran.mol +31 -0
- molSimplify/Ligands/thiane.mol +37 -0
- molSimplify/Ligands/thiazole.mol +21 -0
- molSimplify/Ligands/thiocyanate.mol +11 -0
- molSimplify/Ligands/thiol.mol +9 -0
- molSimplify/Ligands/thiophene.mol +23 -0
- molSimplify/Ligands/thiopyridine.mol +29 -0
- molSimplify/Ligands/threonine.mol +38 -0
- molSimplify/Ligands/tpp.mol +165 -0
- molSimplify/Ligands/tricyanomethyl.mol +19 -0
- molSimplify/Ligands/trifluoromethyl.mol +13 -0
- molSimplify/Ligands/tryptophan.mol +60 -0
- molSimplify/Ligands/tyrosine.mol +53 -0
- molSimplify/Ligands/uthiol.mol +11 -0
- molSimplify/Ligands/uthiolme2.mol +23 -0
- molSimplify/Ligands/valine.mol +42 -0
- molSimplify/Ligands/water.mol +10 -0
- molSimplify/Ligands/x.mol +6 -0
- molSimplify/Scripts/__init__.py +0 -0
- molSimplify/Scripts/addtodb.py +308 -0
- molSimplify/Scripts/cellbuilder.py +1592 -0
- molSimplify/Scripts/cellbuilder_tools.py +701 -0
- molSimplify/Scripts/chains.py +342 -0
- molSimplify/Scripts/convert_2to3.py +23 -0
- molSimplify/Scripts/dbinteract.py +631 -0
- molSimplify/Scripts/distgeom.py +617 -0
- molSimplify/Scripts/findcorrelations.py +287 -0
- molSimplify/Scripts/generator.py +267 -0
- molSimplify/Scripts/geometry.py +1224 -0
- molSimplify/Scripts/grabguivars.py +845 -0
- molSimplify/Scripts/in_b3lyp_usetc.py +141 -0
- molSimplify/Scripts/inparse.py +1673 -0
- molSimplify/Scripts/io.py +1149 -0
- molSimplify/Scripts/isomers.py +415 -0
- molSimplify/Scripts/jobgen.py +247 -0
- molSimplify/Scripts/krr_prep.py +1262 -0
- molSimplify/Scripts/molSimplify_io.py +18 -0
- molSimplify/Scripts/molden2psi4wfn.py +166 -0
- molSimplify/Scripts/namegen.py +32 -0
- molSimplify/Scripts/nn_prep.py +561 -0
- molSimplify/Scripts/oct_check_mols.py +782 -0
- molSimplify/Scripts/periodic_QE.py +97 -0
- molSimplify/Scripts/postmold.py +304 -0
- molSimplify/Scripts/postmwfn.py +709 -0
- molSimplify/Scripts/postparse.py +488 -0
- molSimplify/Scripts/postproc.py +139 -0
- molSimplify/Scripts/qcgen.py +1450 -0
- molSimplify/Scripts/rmsd.py +489 -0
- molSimplify/Scripts/rungen.py +670 -0
- molSimplify/Scripts/structgen.py +3040 -0
- molSimplify/Scripts/tf_nn_prep.py +894 -0
- molSimplify/Scripts/tsgen.py +295 -0
- molSimplify/Scripts/uq_calibration.py +69 -0
- molSimplify/__init__.py +0 -0
- molSimplify/__main__.py +197 -0
- molSimplify/icons/chemdb.png +0 -0
- molSimplify/icons/hjklogo.png +0 -0
- molSimplify/icons/icon.png +0 -0
- molSimplify/icons/logo.png +0 -0
- molSimplify/icons/logo_old.png +0 -0
- molSimplify/icons/petachem.png +0 -0
- molSimplify/icons/petachem2.png +0 -0
- molSimplify/icons/petachem_full.png +0 -0
- molSimplify/icons/pythonlogo.png +0 -0
- molSimplify/icons/sge copy.png +0 -0
- molSimplify/icons/sge.png +0 -0
- molSimplify/icons/slurm.png +0 -0
- molSimplify/icons/wft1.png +0 -0
- molSimplify/icons/wft2.png +0 -0
- molSimplify/icons/wft3.png +0 -0
- molSimplify/ml/__init__.py +0 -0
- molSimplify/ml/kernels.py +36 -0
- molSimplify/ml/layers.py +29 -0
- molSimplify/molscontrol/__init__.py +14 -0
- molSimplify/molscontrol/_version.py +521 -0
- molSimplify/molscontrol/clf_tools.py +144 -0
- molSimplify/molscontrol/data/README.md +21 -0
- molSimplify/molscontrol/data/look_and_say.dat +15 -0
- molSimplify/molscontrol/dynamic_classifier.py +514 -0
- molSimplify/molscontrol/io_tools.py +363 -0
- molSimplify/molscontrol/molscontrol.py +49 -0
- molSimplify/molscontrol/terachem/jobscript_control.sh +31 -0
- molSimplify/molscontrol/terachem/terachem_input +22 -0
- molSimplify/python_krr/X_train_TS.csv +535 -0
- molSimplify/python_krr/__init__.py +0 -0
- molSimplify/python_krr/hat2_X_mean_std.csv +3 -0
- molSimplify/python_krr/hat2_feature_names.csv +1 -0
- molSimplify/python_krr/hat2_y_mean_std.csv +2 -0
- molSimplify/python_krr/hat_X_mean_std.csv +6 -0
- molSimplify/python_krr/hat_feature_names.csv +1 -0
- molSimplify/python_krr/hat_krr_X_train.csv +5205 -0
- molSimplify/python_krr/hat_krr_dual_coef.csv +1 -0
- molSimplify/python_krr/hat_y_mean_std.csv +2 -0
- molSimplify/python_krr/sklearn_models.py +34 -0
- molSimplify/python_krr/y_train_TS.csv +535 -0
- molSimplify/python_nn/ANN.py +198 -0
- molSimplify/python_nn/__init__.py +0 -0
- molSimplify/python_nn/clf_analysis_tool.py +125 -0
- molSimplify/python_nn/dictionary_toolbox.py +49 -0
- molSimplify/python_nn/ensemble_test.py +309 -0
- molSimplify/python_nn/hs_center.csv +26 -0
- molSimplify/python_nn/hs_scale.csv +26 -0
- molSimplify/python_nn/ls_center.csv +26 -0
- molSimplify/python_nn/ls_scale.csv +26 -0
- molSimplify/python_nn/ms_hs_b1.csv +50 -0
- molSimplify/python_nn/ms_hs_b2.csv +50 -0
- molSimplify/python_nn/ms_hs_b3.csv +1 -0
- molSimplify/python_nn/ms_hs_w1.csv +50 -0
- molSimplify/python_nn/ms_hs_w2.csv +50 -0
- molSimplify/python_nn/ms_hs_w3.csv +1 -0
- molSimplify/python_nn/ms_ls_b1.csv +50 -0
- molSimplify/python_nn/ms_ls_b2.csv +50 -0
- molSimplify/python_nn/ms_ls_b3.csv +1 -0
- molSimplify/python_nn/ms_ls_w1.csv +50 -0
- molSimplify/python_nn/ms_ls_w2.csv +50 -0
- molSimplify/python_nn/ms_ls_w3.csv +1 -0
- molSimplify/python_nn/ms_slope_b1.csv +50 -0
- molSimplify/python_nn/ms_slope_b2.csv +50 -0
- molSimplify/python_nn/ms_slope_b3.csv +1 -0
- molSimplify/python_nn/ms_slope_w1.csv +50 -0
- molSimplify/python_nn/ms_slope_w2.csv +50 -0
- molSimplify/python_nn/ms_slope_w3.csv +1 -0
- molSimplify/python_nn/ms_split_b1.csv +50 -0
- molSimplify/python_nn/ms_split_b2.csv +50 -0
- molSimplify/python_nn/ms_split_b3.csv +1 -0
- molSimplify/python_nn/ms_split_w1.csv +50 -0
- molSimplify/python_nn/ms_split_w2.csv +50 -0
- molSimplify/python_nn/ms_split_w3.csv +1 -0
- molSimplify/python_nn/slope_center.csv +25 -0
- molSimplify/python_nn/slope_scale.csv +25 -0
- molSimplify/python_nn/split_center.csv +26 -0
- molSimplify/python_nn/split_scale.csv +26 -0
- molSimplify/python_nn/tf_ANN.py +762 -0
- molSimplify/python_nn/train_data.csv +1211 -0
- molSimplify/tf_nn/__init__.py +0 -0
- molSimplify/tf_nn/geo_static_clf/geo_static_clf_model.h5 +0 -0
- molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_name.csv +1591 -0
- molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_x.csv +2790 -0
- molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_y.csv +2790 -0
- molSimplify/tf_nn/geo_static_clf/geo_static_clf_vars.csv +154 -0
- molSimplify/tf_nn/geos/hs_ii_bl_x.csv +1577 -0
- molSimplify/tf_nn/geos/hs_ii_bl_y.csv +1577 -0
- molSimplify/tf_nn/geos/hs_ii_model.h5 +0 -0
- molSimplify/tf_nn/geos/hs_ii_model.json +1 -0
- molSimplify/tf_nn/geos/hs_ii_vars.csv +154 -0
- molSimplify/tf_nn/geos/hs_iii_bl_x.csv +1659 -0
- molSimplify/tf_nn/geos/hs_iii_bl_y.csv +1659 -0
- molSimplify/tf_nn/geos/hs_iii_model.h5 +0 -0
- molSimplify/tf_nn/geos/hs_iii_model.json +1 -0
- molSimplify/tf_nn/geos/hs_iii_vars.csv +154 -0
- molSimplify/tf_nn/geos/ls_ii_bl_x.csv +1374 -0
- molSimplify/tf_nn/geos/ls_ii_bl_y.csv +1374 -0
- molSimplify/tf_nn/geos/ls_ii_model.h5 +0 -0
- molSimplify/tf_nn/geos/ls_ii_model.json +1 -0
- molSimplify/tf_nn/geos/ls_ii_vars.csv +154 -0
- molSimplify/tf_nn/geos/ls_iii_bl_x.csv +1364 -0
- molSimplify/tf_nn/geos/ls_iii_bl_y.csv +1364 -0
- molSimplify/tf_nn/geos/ls_iii_model.h5 +0 -0
- molSimplify/tf_nn/geos/ls_iii_model.json +1 -0
- molSimplify/tf_nn/geos/ls_iii_vars.csv +154 -0
- molSimplify/tf_nn/homolumo/gap_model.h5 +0 -0
- molSimplify/tf_nn/homolumo/gap_model.json +1 -0
- molSimplify/tf_nn/homolumo/gap_test_names.csv +175 -0
- molSimplify/tf_nn/homolumo/gap_test_x.csv +176 -0
- molSimplify/tf_nn/homolumo/gap_test_y.csv +176 -0
- molSimplify/tf_nn/homolumo/gap_train_names.csv +699 -0
- molSimplify/tf_nn/homolumo/gap_train_x.csv +700 -0
- molSimplify/tf_nn/homolumo/gap_train_y.csv +700 -0
- molSimplify/tf_nn/homolumo/gap_vars.csv +153 -0
- molSimplify/tf_nn/homolumo/homo_model.h5 +0 -0
- molSimplify/tf_nn/homolumo/homo_model.json +126 -0
- molSimplify/tf_nn/homolumo/homo_test_names.csv +175 -0
- molSimplify/tf_nn/homolumo/homo_test_x.csv +176 -0
- molSimplify/tf_nn/homolumo/homo_test_y.csv +176 -0
- molSimplify/tf_nn/homolumo/homo_train_names.csv +699 -0
- molSimplify/tf_nn/homolumo/homo_train_x.csv +700 -0
- molSimplify/tf_nn/homolumo/homo_train_y.csv +700 -0
- molSimplify/tf_nn/homolumo/homo_vars.csv +153 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_info.json +7 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_model.h5 +0 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_model.json +1 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_test_names.csv +143 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_test_x.csv +144 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_test_y.csv +144 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_train_names.csv +513 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_train_x.csv +514 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_train_y.csv +514 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_val_names.csv +143 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_val_x.csv +58 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_val_y.csv +58 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_vars.csv +155 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_info.json +7 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_model.h5 +0 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_model.json +1 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_test_names.csv +143 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_test_x.csv +144 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_test_y.csv +144 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_train_names.csv +513 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_train_x.csv +514 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_train_y.csv +514 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_val_names.csv +143 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_val_x.csv +58 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_val_y.csv +58 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_vars.csv +154 -0
- molSimplify/tf_nn/oxocatalysis/hat_model.h5 +0 -0
- molSimplify/tf_nn/oxocatalysis/hat_model.json +1 -0
- molSimplify/tf_nn/oxocatalysis/hat_test_names.csv +419 -0
- molSimplify/tf_nn/oxocatalysis/hat_test_x.csv +420 -0
- molSimplify/tf_nn/oxocatalysis/hat_test_y.csv +420 -0
- molSimplify/tf_nn/oxocatalysis/hat_train_names.csv +1507 -0
- molSimplify/tf_nn/oxocatalysis/hat_train_x.csv +1508 -0
- molSimplify/tf_nn/oxocatalysis/hat_train_y.csv +1508 -0
- molSimplify/tf_nn/oxocatalysis/hat_val_x.csv +169 -0
- molSimplify/tf_nn/oxocatalysis/hat_val_y.csv +169 -0
- molSimplify/tf_nn/oxocatalysis/hat_vars.csv +162 -0
- molSimplify/tf_nn/oxocatalysis/oxo_model.h5 +0 -0
- molSimplify/tf_nn/oxocatalysis/oxo_model.json +1 -0
- molSimplify/tf_nn/oxocatalysis/oxo_test_names.csv +527 -0
- molSimplify/tf_nn/oxocatalysis/oxo_test_x.csv +528 -0
- molSimplify/tf_nn/oxocatalysis/oxo_test_y.csv +528 -0
- molSimplify/tf_nn/oxocatalysis/oxo_train_names.csv +1897 -0
- molSimplify/tf_nn/oxocatalysis/oxo_train_x.csv +1898 -0
- molSimplify/tf_nn/oxocatalysis/oxo_train_y.csv +1898 -0
- molSimplify/tf_nn/oxocatalysis/oxo_val_x.csv +212 -0
- molSimplify/tf_nn/oxocatalysis/oxo_val_y.csv +212 -0
- molSimplify/tf_nn/oxocatalysis/oxo_vars.csv +162 -0
- molSimplify/tf_nn/rescaling_data/gap_mean_x.csv +153 -0
- molSimplify/tf_nn/rescaling_data/gap_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/gap_var_x.csv +153 -0
- molSimplify/tf_nn/rescaling_data/gap_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/geo_static_clf_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/geo_static_clf_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/geo_static_clf_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/geo_static_clf_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/hat_mean_x.csv +162 -0
- molSimplify/tf_nn/rescaling_data/hat_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/hat_var_x.csv +162 -0
- molSimplify/tf_nn/rescaling_data/hat_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/homo_empty_mean_x.csv +155 -0
- molSimplify/tf_nn/rescaling_data/homo_empty_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/homo_empty_var_x.csv +155 -0
- molSimplify/tf_nn/rescaling_data/homo_empty_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/homo_mean_x.csv +153 -0
- molSimplify/tf_nn/rescaling_data/homo_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/homo_var_x.csv +153 -0
- molSimplify/tf_nn/rescaling_data/homo_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/hs_ii_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/hs_ii_mean_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/hs_ii_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/hs_ii_var_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/hs_iii_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/hs_iii_mean_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/hs_iii_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/hs_iii_var_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/ls_ii_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/ls_ii_mean_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/ls_ii_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/ls_ii_var_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/ls_iii_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/ls_iii_mean_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/ls_iii_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/ls_iii_var_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/oxo20_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/oxo20_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/oxo20_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/oxo20_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/oxo_mean_x.csv +162 -0
- molSimplify/tf_nn/rescaling_data/oxo_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/oxo_var_x.csv +162 -0
- molSimplify/tf_nn/rescaling_data/oxo_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/sc_static_clf_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/sc_static_clf_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/sc_static_clf_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/sc_static_clf_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/split_mean_x.csv +155 -0
- molSimplify/tf_nn/rescaling_data/split_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/split_var_x.csv +155 -0
- molSimplify/tf_nn/rescaling_data/split_var_y.csv +1 -0
- molSimplify/tf_nn/sc_static_clf/sc_static_clf_model.h5 +0 -0
- molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_name.csv +1591 -0
- molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_x.csv +1592 -0
- molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_y.csv +1592 -0
- molSimplify/tf_nn/sc_static_clf/sc_static_clf_vars.csv +154 -0
- molSimplify/tf_nn/split/split_model.h5 +0 -0
- molSimplify/tf_nn/split/split_model.json +1 -0
- molSimplify/tf_nn/split/split_vars.csv +155 -0
- molSimplify/tf_nn/split/split_x.csv +1902 -0
- molSimplify/tf_nn/split/split_y.csv +1902 -0
- molSimplify/tf_nn/split/train_names.csv +1901 -0
- molSimplify/utils/__init__.py +0 -0
- molSimplify/utils/decorators.py +16 -0
- molSimplify/utils/metaclasses.py +12 -0
- molSimplify/utils/tensorflow.py +23 -0
- molSimplify/utils/timer.py +16 -0
- molSimplify-1.7.4.dist-info/LICENSE +674 -0
- molSimplify-1.7.4.dist-info/METADATA +821 -0
- molSimplify-1.7.4.dist-info/RECORD +651 -0
- molSimplify-1.7.4.dist-info/WHEEL +5 -0
- molSimplify-1.7.4.dist-info/entry_points.txt +3 -0
- molSimplify-1.7.4.dist-info/top_level.txt +4 -0
- tests/generateTests.py +122 -0
- tests/helperFuncs.py +658 -0
- tests/informatics/test_MOF_descriptors.py +128 -0
- tests/informatics/test_active_learning.py +113 -0
- tests/informatics/test_coulomb_analyze.py +24 -0
- tests/informatics/test_graph_racs.py +193 -0
- tests/ml/test_kernels.py +20 -0
- tests/ml/test_layers.py +47 -0
- tests/runtest.py +10 -0
- tests/test_Mol2D.py +128 -0
- tests/test_basic_imports.py +62 -0
- tests/test_bidentate.py +25 -0
- tests/test_cli.py +20 -0
- tests/test_distgeom.py +106 -0
- tests/test_example_1.py +29 -0
- tests/test_example_3.py +31 -0
- tests/test_example_5.py +43 -0
- tests/test_example_7.py +28 -0
- tests/test_example_8.py +15 -0
- tests/test_example_tbp.py +15 -0
- tests/test_ff_xtb.py +111 -0
- tests/test_geocheck_oct.py +26 -0
- tests/test_geocheck_one_empty.py +15 -0
- tests/test_geometry.py +44 -0
- tests/test_inparse.py +76 -0
- tests/test_io.py +84 -0
- tests/test_jobgen.py +84 -0
- tests/test_joption_pythonic.py +27 -0
- tests/test_ligand_assign.py +58 -0
- tests/test_ligand_assign_consistent.py +60 -0
- tests/test_ligand_class.py +26 -0
- tests/test_ligand_from_mol_file.py +35 -0
- tests/test_ligands.py +86 -0
- tests/test_mol3D.py +337 -0
- tests/test_molcas_caspt2.py +15 -0
- tests/test_molcas_casscf.py +15 -0
- tests/test_old_ANNs.py +68 -0
- tests/test_orca_ccsdt.py +15 -0
- tests/test_orca_dft.py +15 -0
- tests/test_qcgen.py +50 -0
- tests/test_racs.py +124 -0
- tests/test_rmsd.py +68 -0
- tests/test_structgen_functions.py +198 -0
- tests/test_tetrahedral.py +29 -0
- tests/test_tutorial_10_part_one.py +16 -0
- tests/test_tutorial_10_part_two.py +15 -0
- tests/test_tutorial_2.py +11 -0
- tests/test_tutorial_3.py +15 -0
- tests/test_tutorial_4.py +57 -0
- tests/test_tutorial_6.py +10 -0
- tests/test_tutorial_8.py +29 -0
- tests/test_tutorial_9_part_one.py +15 -0
- tests/test_tutorial_9_part_two.py +15 -0
- tests/test_tutorial_qm9_part_one.py +6 -0
- tests/testresources/refs/racs/generate_references.py +85 -0
- workflows/NandyJACSAu2022/bridge_functionalizer.py +253 -0
- workflows/NandyJACSAu2022/frag_functionalizer.py +242 -0
- workflows/NandyJACSAu2022/fragment_classes.py +586 -0
- workflows/NandyJACSAu2022/macrocycle_synthesis.py +179 -0
|
@@ -0,0 +1,1262 @@
|
|
|
1
|
+
# @file nn_prep.py
|
|
2
|
+
# Helper routines for ANN integration
|
|
3
|
+
#
|
|
4
|
+
# Written by Nick Yang for HJK Group
|
|
5
|
+
#
|
|
6
|
+
# Dpt of Chemical Engineering, MIT
|
|
7
|
+
|
|
8
|
+
import copy
|
|
9
|
+
import csv
|
|
10
|
+
import os
|
|
11
|
+
import pickle
|
|
12
|
+
from math import exp
|
|
13
|
+
import matplotlib.pyplot as plt
|
|
14
|
+
import matplotlib.ticker as ticker
|
|
15
|
+
import pandas as pd
|
|
16
|
+
from importlib_resources import files as resource_files
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
import numpy as np
|
|
20
|
+
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
|
|
21
|
+
from sklearn.kernel_ridge import KernelRidge
|
|
22
|
+
from sklearn.metrics import mean_absolute_error
|
|
23
|
+
from sklearn.model_selection import train_test_split, GridSearchCV, LeaveOneOut
|
|
24
|
+
from sklearn.multioutput import MultiOutputRegressor
|
|
25
|
+
|
|
26
|
+
from molSimplify.Classes.globalvars import (globalvars)
|
|
27
|
+
from molSimplify.Informatics.autocorrelation import (atom_only_autocorrelation,
|
|
28
|
+
atom_only_deltametric,
|
|
29
|
+
atom_only_ratiometric,
|
|
30
|
+
atom_only_summetric,
|
|
31
|
+
generate_atomonly_autocorrelations,
|
|
32
|
+
generate_atomonly_deltametrics)
|
|
33
|
+
from molSimplify.Informatics.partialcharges import (ffeatures)
|
|
34
|
+
|
|
35
|
+
# import matplotlib.pyplot as plt
|
|
36
|
+
# import matplotlib.ticker as ticker
|
|
37
|
+
from molSimplify.Scripts.geometry import (vecdiff,
|
|
38
|
+
distance,
|
|
39
|
+
vecangle)
|
|
40
|
+
|
|
41
|
+
np.seterr(divide='ignore')
|
|
42
|
+
|
|
43
|
+
csvf = '/Users/tzuhsiungyang/Dropbox (MIT)/Work at the Kulik group/ts_build/Data/xyzf_optts/selected_xyzfs/label_1distance_descs_atRACs.csv'
|
|
44
|
+
colnum_i_label = 1
|
|
45
|
+
colnum_j_label = 2
|
|
46
|
+
colnum_desc = 2
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def feature_prep(mol, idx):
|
|
50
|
+
# setting up variables
|
|
51
|
+
fidx_list = []
|
|
52
|
+
sidx_list = []
|
|
53
|
+
satno_list = []
|
|
54
|
+
ref_list = []
|
|
55
|
+
fd_list = []
|
|
56
|
+
exit_signal = True
|
|
57
|
+
# getting bond-order matrix
|
|
58
|
+
mol.convert2OBMol()
|
|
59
|
+
BOMatrix = mol.populateBOMatrix()
|
|
60
|
+
|
|
61
|
+
# preping for the loop
|
|
62
|
+
fidx_list.append(mol.findMetal())
|
|
63
|
+
for i in range(len(fidx_list)):
|
|
64
|
+
for fidx in fidx_list[i]:
|
|
65
|
+
for sidx in mol.getBondedAtoms(fidx):
|
|
66
|
+
sidx_list.append([sidx])
|
|
67
|
+
|
|
68
|
+
for i in range(len(fidx_list)):
|
|
69
|
+
for fidx in fidx_list[i]:
|
|
70
|
+
for j in range(len(sidx_list)):
|
|
71
|
+
for sidx in sidx_list[j]:
|
|
72
|
+
BO = int(BOMatrix[fidx][sidx])
|
|
73
|
+
if BO == 0:
|
|
74
|
+
BO = 1
|
|
75
|
+
satno_str = str(mol.getAtom(sidx).atno)
|
|
76
|
+
satno_list.append(int(BO * satno_str))
|
|
77
|
+
|
|
78
|
+
for satno in set(satno_list):
|
|
79
|
+
satnocount = satno_list.count(satno)
|
|
80
|
+
if satnocount > 1:
|
|
81
|
+
s_sel_list = [i for i, atno in enumerate(
|
|
82
|
+
satno_list) if atno is satno]
|
|
83
|
+
exit_signal = False
|
|
84
|
+
|
|
85
|
+
for i in range(len(fidx_list)):
|
|
86
|
+
for fidx in fidx_list[i]:
|
|
87
|
+
ref_list.append(fidx)
|
|
88
|
+
|
|
89
|
+
# starting the loop
|
|
90
|
+
tidx_list = []
|
|
91
|
+
tatno_list = []
|
|
92
|
+
for i in range(len(sidx_list)):
|
|
93
|
+
tidx_list.append([])
|
|
94
|
+
tatno_list.append([])
|
|
95
|
+
|
|
96
|
+
while not exit_signal:
|
|
97
|
+
fpriority_list = []
|
|
98
|
+
for i in s_sel_list:
|
|
99
|
+
t_list = []
|
|
100
|
+
for sidx in sidx_list[i]:
|
|
101
|
+
for tidx in mol.getBondedAtoms(sidx):
|
|
102
|
+
if tidx not in ref_list:
|
|
103
|
+
t_list.append(tidx)
|
|
104
|
+
tidx_list[i] = t_list
|
|
105
|
+
# print(sidx_list)
|
|
106
|
+
# print(tidx_list)
|
|
107
|
+
for i in s_sel_list:
|
|
108
|
+
for sidx in sidx_list[i]:
|
|
109
|
+
atno_list = tatno_list[i]
|
|
110
|
+
ls = []
|
|
111
|
+
for j in s_sel_list:
|
|
112
|
+
for tidx in tidx_list[j]:
|
|
113
|
+
BO = int(BOMatrix[sidx][tidx])
|
|
114
|
+
tatno_str = str(mol.getAtom(tidx).atno)
|
|
115
|
+
ls.append(BO * tatno_str)
|
|
116
|
+
sorted(ls, reverse=True)
|
|
117
|
+
for j in ls:
|
|
118
|
+
atno_list.append(j)
|
|
119
|
+
a = ''.join(atno_list)
|
|
120
|
+
tatno_list[i] = [a]
|
|
121
|
+
sidx_list = []
|
|
122
|
+
for i in range(len(tidx_list)):
|
|
123
|
+
sidx_list.append(tidx_list[i])
|
|
124
|
+
for i in s_sel_list:
|
|
125
|
+
for sidx in sidx_list[i]:
|
|
126
|
+
ref_list.append(sidx)
|
|
127
|
+
test_list = []
|
|
128
|
+
for i in range(len(sidx_list)):
|
|
129
|
+
test_list.append([])
|
|
130
|
+
# get priorities
|
|
131
|
+
for i in range(len(satno_list)):
|
|
132
|
+
atno_list = []
|
|
133
|
+
atno_list.append(str(satno_list[i]))
|
|
134
|
+
if tatno_list[i] == []:
|
|
135
|
+
atno_list.append('')
|
|
136
|
+
else:
|
|
137
|
+
atno_list.append(tatno_list[i][0])
|
|
138
|
+
a = '.'.join(atno_list)
|
|
139
|
+
fpriority_list.append(float(a))
|
|
140
|
+
if tidx_list == test_list or len(set(fpriority_list)) == 6:
|
|
141
|
+
# if tidx_list == test_list:
|
|
142
|
+
exit_signal = True
|
|
143
|
+
# get distance
|
|
144
|
+
# idx = np.argsort(np.array(fpriority_list))[-1]
|
|
145
|
+
sidx_list = mol.getBondedAtomsByCoordNo(fidx_list[0][0], 6)
|
|
146
|
+
mcoord = mol.getAtom(fidx_list[0][0]).coords()
|
|
147
|
+
vMLs = [vecdiff(mcoord, mol.getAtom(i).coords()) for i in sidx_list]
|
|
148
|
+
rMLs = [distance(mcoord, mol.getAtom(i).coords()) for i in sidx_list]
|
|
149
|
+
idx0 = idx
|
|
150
|
+
vangs = [vecangle(vML, vMLs[idx0]) for vML in vMLs]
|
|
151
|
+
idxes = list(range(6))
|
|
152
|
+
idx5 = np.argsort(np.array(vangs))[-1]
|
|
153
|
+
idx1_4 = copy.deepcopy(idxes)
|
|
154
|
+
idx1_4.remove(idx0)
|
|
155
|
+
idx1_4.remove(idx5)
|
|
156
|
+
fprio1_4 = copy.deepcopy(fpriority_list)
|
|
157
|
+
vMLs1_4 = copy.deepcopy(vMLs)
|
|
158
|
+
rMLs1_4 = copy.deepcopy(rMLs)
|
|
159
|
+
if idx0 > idx5:
|
|
160
|
+
fprio1_4.pop(idx0)
|
|
161
|
+
fprio1_4.pop(idx5)
|
|
162
|
+
vMLs1_4.pop(idx0)
|
|
163
|
+
vMLs1_4.pop(idx5)
|
|
164
|
+
rMLs1_4.pop(idx0)
|
|
165
|
+
rMLs1_4.pop(idx5)
|
|
166
|
+
else:
|
|
167
|
+
fprio1_4.pop(idx5)
|
|
168
|
+
fprio1_4.pop(idx0)
|
|
169
|
+
vMLs1_4.pop(idx5)
|
|
170
|
+
vMLs1_4.pop(idx0)
|
|
171
|
+
rMLs1_4.pop(idx5)
|
|
172
|
+
rMLs1_4.pop(idx0)
|
|
173
|
+
# get ax, eq, ax idxes
|
|
174
|
+
idx1_ = np.argsort(np.array(fprio1_4))[-1]
|
|
175
|
+
vangs1_4 = [vecangle(vML, vMLs1_4[idx1_]) for vML in vMLs1_4]
|
|
176
|
+
idx2_ = np.argsort(np.array(vangs1_4))[-1]
|
|
177
|
+
idx3_ = np.argsort(np.array(vangs1_4))[1]
|
|
178
|
+
idx4_ = np.argsort(np.array(vangs1_4))[2]
|
|
179
|
+
fprio1 = fprio1_4[idx1_]
|
|
180
|
+
fprio2 = fprio1_4[idx2_]
|
|
181
|
+
fprio3 = fprio1_4[idx3_]
|
|
182
|
+
fprio4 = fprio1_4[idx4_]
|
|
183
|
+
fprio1_2 = sorted([fprio1, fprio2])
|
|
184
|
+
fprio3_4 = sorted([fprio3, fprio4])
|
|
185
|
+
if fprio3_4 > fprio1_2:
|
|
186
|
+
if fprio3 > fprio4:
|
|
187
|
+
idx1_ = idx3_
|
|
188
|
+
else:
|
|
189
|
+
idx1_ = idx4_
|
|
190
|
+
idx1 = idx1_4[idx1_]
|
|
191
|
+
idx2 = idx1_4[idx2_]
|
|
192
|
+
idx3_4 = copy.deepcopy(idx1_4)
|
|
193
|
+
fprio3_4 = copy.deepcopy(fprio1_4)
|
|
194
|
+
idx3_4.remove(idx1)
|
|
195
|
+
idx3_4.remove(idx2)
|
|
196
|
+
if idx1_ > idx2_:
|
|
197
|
+
fprio3_4.pop(idx1_)
|
|
198
|
+
fprio3_4.pop(idx2_)
|
|
199
|
+
else:
|
|
200
|
+
fprio3_4.pop(idx2_)
|
|
201
|
+
fprio3_4.pop(idx1_)
|
|
202
|
+
idx3 = idx3_4[np.argsort(np.array(fprio3_4))[-1]]
|
|
203
|
+
idx3_4.remove(idx3)
|
|
204
|
+
idx4 = idx3_4[0]
|
|
205
|
+
idx_list = [idx0, idx1, idx2, idx3, idx4, idx5]
|
|
206
|
+
fpriority_list = np.array(fpriority_list)[idx_list].tolist()
|
|
207
|
+
fd_list = np.array(rMLs)[idx_list].tolist()
|
|
208
|
+
|
|
209
|
+
return fpriority_list, fd_list, idx_list
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def normalize(data, mean, std):
|
|
213
|
+
data = np.array(data)
|
|
214
|
+
mean = np.array(mean)
|
|
215
|
+
std = np.array(std)
|
|
216
|
+
data_norm = np.divide((data - mean), std,
|
|
217
|
+
out=np.zeros_like(data - mean), where=std != 0)
|
|
218
|
+
# data_norm = np.nan_to_num(data_norm)
|
|
219
|
+
|
|
220
|
+
return data_norm
|
|
221
|
+
|
|
222
|
+
# predict labels using krr with a given csv file
|
|
223
|
+
# @param csvf the csv file containing headers (first row), data, and label
|
|
224
|
+
# @param colnum_label the column number for the label column
|
|
225
|
+
# @param colnum_desc the starting column number for the descriptor columns
|
|
226
|
+
# @return y_train_data, y_train_pred, y_test_data, y_test_pred, score
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def krr_model_training(csvf, colnum_label, colnum_desc, alpha=1, gamma=1, threshold=0.01):
|
|
230
|
+
# read in desc and label
|
|
231
|
+
with open(csvf, 'r') as f:
|
|
232
|
+
fcsv = csv.reader(f)
|
|
233
|
+
headers = np.array(next(f, None).rstrip('\r\n').split(','))[colnum_desc:]
|
|
234
|
+
X = []
|
|
235
|
+
y = []
|
|
236
|
+
lines = [line for line in fcsv]
|
|
237
|
+
lnums = [len(line) for line in lines]
|
|
238
|
+
count = max(set(lnums), key=lnums.count)
|
|
239
|
+
for line in lines:
|
|
240
|
+
if len(line) == count:
|
|
241
|
+
descs = []
|
|
242
|
+
for desc in line[colnum_desc:]:
|
|
243
|
+
descs.append(float(desc))
|
|
244
|
+
X.append(descs)
|
|
245
|
+
y.append(float(line[colnum_label]))
|
|
246
|
+
X = np.array(X)
|
|
247
|
+
y = np.array(y)
|
|
248
|
+
# process desc and label
|
|
249
|
+
mean_X = np.mean(X, axis=0)
|
|
250
|
+
std_X = np.std(X, axis=0)
|
|
251
|
+
mean_y = np.mean(y, axis=0)
|
|
252
|
+
std_y = np.std(y, axis=0)
|
|
253
|
+
X_norm = normalize(X, mean_X, std_X)
|
|
254
|
+
y_norm = normalize(y, mean_y, std_y)
|
|
255
|
+
# stats
|
|
256
|
+
mean_X_dict = dict(list(zip(headers, mean_X)))
|
|
257
|
+
std_X_dict = dict(list(zip(headers, std_X)))
|
|
258
|
+
stat_names = ['mean_X_dict', 'std_X_dict', 'mean_y', 'std_y']
|
|
259
|
+
stats = [mean_X_dict, std_X_dict, mean_y, std_y]
|
|
260
|
+
stat_dict = dict(list(zip(stat_names, stats)))
|
|
261
|
+
# split to train and test
|
|
262
|
+
X_norm_train, X_norm_test, y_norm_train, y_norm_test = train_test_split(
|
|
263
|
+
X_norm, y_norm, test_size=0.2, random_state=0)
|
|
264
|
+
# end
|
|
265
|
+
# feature selection
|
|
266
|
+
selector = RandomForestRegressor(random_state=0, n_estimators=100)
|
|
267
|
+
selector.fit(X_norm_train, y_norm_train)
|
|
268
|
+
X_norm_train_impts = selector.feature_importances_
|
|
269
|
+
idxes = np.where(X_norm_train_impts > threshold)[0]
|
|
270
|
+
print((len(idxes)))
|
|
271
|
+
importances = X_norm_train_impts[idxes]
|
|
272
|
+
features_sel = headers[idxes]
|
|
273
|
+
# importance
|
|
274
|
+
impt_dict = dict(list(zip(features_sel, importances)))
|
|
275
|
+
X_norm_train_sel = X_norm_train.T[idxes].T
|
|
276
|
+
X_norm_test_sel = X_norm_test.T[idxes].T
|
|
277
|
+
# training with krr
|
|
278
|
+
signal = True
|
|
279
|
+
# krr parameters
|
|
280
|
+
kernel = 'rbf'
|
|
281
|
+
factor_lower = -4
|
|
282
|
+
factor_higher = 4
|
|
283
|
+
gamma_lower = gamma * exp(factor_lower)
|
|
284
|
+
gamma_higher = gamma * exp(factor_higher)
|
|
285
|
+
alpha_lower = alpha * exp(factor_lower)
|
|
286
|
+
alpha_higher = alpha * exp(factor_higher)
|
|
287
|
+
lin = 7
|
|
288
|
+
# optimize hyperparameters
|
|
289
|
+
cycle_i = 0
|
|
290
|
+
while gamma == 1 or alpha == 1 or not signal:
|
|
291
|
+
gammas = np.linspace(gamma_lower, gamma_higher, lin)
|
|
292
|
+
alphas = np.linspace(alpha_lower, alpha_higher, lin)
|
|
293
|
+
tuned_parameters = [
|
|
294
|
+
{'kernel': [kernel], 'gamma': gammas, 'alpha': alphas}]
|
|
295
|
+
regr = GridSearchCV(KernelRidge(), tuned_parameters,
|
|
296
|
+
cv=5, scoring='neg_mean_absolute_error')
|
|
297
|
+
regr.fit(X_norm_train_sel, y_norm_train)
|
|
298
|
+
gamma = regr.best_params_['gamma']
|
|
299
|
+
alpha = regr.best_params_['alpha']
|
|
300
|
+
if (gamma < gammas[lin / 2 - 1] or gamma > gammas[lin / 2]) or \
|
|
301
|
+
(alpha < alphas[lin / 2 - 1] or alpha > alphas[lin / 2]):
|
|
302
|
+
# and cycle_i < 10:
|
|
303
|
+
signal = False
|
|
304
|
+
factor_lower *= 0.8
|
|
305
|
+
factor_higher *= 0.8
|
|
306
|
+
if cycle_i > 10:
|
|
307
|
+
factor_lower = -4
|
|
308
|
+
factor_higher = 4
|
|
309
|
+
cycle_i = 0
|
|
310
|
+
gamma_lower = gamma * exp(factor_lower)
|
|
311
|
+
gamma_higher = gamma * exp(factor_higher)
|
|
312
|
+
alpha_lower = alpha * exp(factor_lower)
|
|
313
|
+
alpha_higher = alpha * exp(factor_higher)
|
|
314
|
+
else:
|
|
315
|
+
signal = True
|
|
316
|
+
cycle_i += 1
|
|
317
|
+
print(('gamma is: ', gamma, '. alpha is: ', alpha))
|
|
318
|
+
# final model
|
|
319
|
+
regr = KernelRidge(kernel=kernel, alpha=alpha, gamma=gamma)
|
|
320
|
+
regr.fit(X_norm_train_sel, y_norm_train)
|
|
321
|
+
# predictions
|
|
322
|
+
y_norm_train_pred = regr.predict(X_norm_train_sel)
|
|
323
|
+
y_train_pred = y_norm_train_pred * std_y + mean_y
|
|
324
|
+
y_train_data = y_norm_train * std_y + mean_y
|
|
325
|
+
y_norm_test_pred = regr.predict(X_norm_test_sel)
|
|
326
|
+
y_test_pred = y_norm_test_pred * std_y + mean_y
|
|
327
|
+
y_test_data = y_norm_test * std_y + mean_y
|
|
328
|
+
# data
|
|
329
|
+
train_names = ['X_norm_sel_dict', 'y_data', 'y_pred']
|
|
330
|
+
X_norm_train_sel_names = features_sel
|
|
331
|
+
X_norm_train_sel_dict = dict(
|
|
332
|
+
list(zip(X_norm_train_sel_names, X_norm_train_sel.T)))
|
|
333
|
+
trains = [X_norm_train_sel_dict, y_train_data, y_train_pred]
|
|
334
|
+
train_dict = dict(list(zip(train_names, trains)))
|
|
335
|
+
test_names = ['X_norm_sel_dict', 'y_data', 'y_pred']
|
|
336
|
+
X_norm_test_sel_names = features_sel
|
|
337
|
+
X_norm_test_sel_dict = dict(list(zip(X_norm_test_sel_names, X_norm_test_sel.T)))
|
|
338
|
+
tests = [X_norm_test_sel_dict, y_test_data, y_test_pred]
|
|
339
|
+
test_dict = dict(list(zip(test_names, tests)))
|
|
340
|
+
# performance
|
|
341
|
+
score_train = regr.score(X_norm_train_sel, y_norm_train)
|
|
342
|
+
score_test = regr.score(X_norm_test_sel, y_norm_test)
|
|
343
|
+
MAE_train = mean_absolute_error(y_train_data, y_train_pred)
|
|
344
|
+
MAE_test = mean_absolute_error(y_test_data, y_test_pred)
|
|
345
|
+
perm_names = ['score_train', 'score_test', 'MAE_train', 'MAE_test']
|
|
346
|
+
perms = [score_train, score_test, MAE_train, MAE_test]
|
|
347
|
+
perm_dict = dict(list(zip(perm_names, perms)))
|
|
348
|
+
|
|
349
|
+
return stat_dict, impt_dict, train_dict, test_dict, perm_dict, regr
|
|
350
|
+
|
|
351
|
+
# predict labels using krr with a given csv file
|
|
352
|
+
# @param csvf the csv file containing headers (first row), data, and label
|
|
353
|
+
# @param colnum_label the column number for the label column
|
|
354
|
+
# @param colnum_desc the starting column number for the descriptor columns
|
|
355
|
+
# @return y_train_data, y_train_pred, y_test_data, y_test_pred, score
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
def krr_model_training_loo(csvf, colnum_label, colnum_desc, feature_names=False, alpha=1, gamma=1, threshold=0.01):
|
|
359
|
+
# read in desc and label
|
|
360
|
+
with open(csvf, 'r') as f:
|
|
361
|
+
fcsv = csv.reader(f)
|
|
362
|
+
headers = np.array(next(f, None).rstrip('\r\n').split(','))[colnum_desc:]
|
|
363
|
+
X = []
|
|
364
|
+
y = []
|
|
365
|
+
lines = [line for line in fcsv]
|
|
366
|
+
lnums = [len(line) for line in lines]
|
|
367
|
+
count = max(set(lnums), key=lnums.count)
|
|
368
|
+
for line in lines:
|
|
369
|
+
if len(line) == count:
|
|
370
|
+
descs = []
|
|
371
|
+
for desc in line[colnum_desc:]:
|
|
372
|
+
descs.append(float(desc))
|
|
373
|
+
X.append(descs)
|
|
374
|
+
y.append(float(line[colnum_label]))
|
|
375
|
+
X = np.array(X)
|
|
376
|
+
y = np.array(y)
|
|
377
|
+
# process desc and label
|
|
378
|
+
mean_X = np.mean(X, axis=0)
|
|
379
|
+
std_X = np.std(X, axis=0)
|
|
380
|
+
mean_y = np.mean(y, axis=0)
|
|
381
|
+
std_y = np.std(y, axis=0)
|
|
382
|
+
X_norm = normalize(X, mean_X, std_X)
|
|
383
|
+
y_norm = normalize(y, mean_y, std_y)
|
|
384
|
+
# split to train and test
|
|
385
|
+
loo = LeaveOneOut()
|
|
386
|
+
total_i = len(X_norm)
|
|
387
|
+
i = 0
|
|
388
|
+
# ys
|
|
389
|
+
ys = []
|
|
390
|
+
# MAEs
|
|
391
|
+
MAEs_test = []
|
|
392
|
+
MAEs_test_i = []
|
|
393
|
+
for train_idx, test_idx in loo.split(X_norm):
|
|
394
|
+
X_norm_train, X_norm_test = X_norm[train_idx], X_norm[test_idx]
|
|
395
|
+
y_norm_train, y_norm_test = y_norm[train_idx], y_norm[test_idx]
|
|
396
|
+
# end
|
|
397
|
+
# feature selection
|
|
398
|
+
if not feature_names:
|
|
399
|
+
selector = RandomForestRegressor(random_state=0, n_estimators=100)
|
|
400
|
+
selector.fit(X_norm_train, y_norm_train)
|
|
401
|
+
X_norm_train_impts = selector.feature_importances_
|
|
402
|
+
idxes = np.where(X_norm_train_impts > threshold)[0]
|
|
403
|
+
print((len(idxes)))
|
|
404
|
+
importances = X_norm_train_impts[idxes]
|
|
405
|
+
features_sel = headers[idxes]
|
|
406
|
+
# importance
|
|
407
|
+
impt_dict = dict(list(zip(features_sel, importances)))
|
|
408
|
+
X_norm_train_sel = X_norm_train.T[idxes].T
|
|
409
|
+
X_norm_test_sel = X_norm_test.T[idxes].T
|
|
410
|
+
print((sorted(impt_dict, key=impt_dict.get)))
|
|
411
|
+
print(impt_dict)
|
|
412
|
+
else:
|
|
413
|
+
idxes = [headers.tolist().index(feature_name)
|
|
414
|
+
for feature_name in feature_names]
|
|
415
|
+
X_norm_train_sel = X_norm_train.T[idxes].T
|
|
416
|
+
X_norm_test_sel = X_norm_test.T[idxes].T
|
|
417
|
+
features_sel = feature_names
|
|
418
|
+
impt_dict = None
|
|
419
|
+
# training with krr
|
|
420
|
+
if i == 0 or (alpha != 1 and gamma != 1):
|
|
421
|
+
signal = True
|
|
422
|
+
else:
|
|
423
|
+
signal = False
|
|
424
|
+
# krr parameters
|
|
425
|
+
kernel = 'rbf'
|
|
426
|
+
factor_lower = -4
|
|
427
|
+
factor_higher = 4
|
|
428
|
+
gamma_lower = gamma * exp(factor_lower)
|
|
429
|
+
gamma_higher = gamma * exp(factor_higher)
|
|
430
|
+
alpha_lower = alpha * exp(factor_lower)
|
|
431
|
+
alpha_higher = alpha * exp(factor_higher)
|
|
432
|
+
lin = 7
|
|
433
|
+
# optimize hyperparameters
|
|
434
|
+
cycle_i = 0
|
|
435
|
+
while gamma == 1 or alpha == 1 or not signal:
|
|
436
|
+
gammas = np.linspace(gamma_lower, gamma_higher, lin)
|
|
437
|
+
alphas = np.linspace(alpha_lower, alpha_higher, lin)
|
|
438
|
+
tuned_parameters = [
|
|
439
|
+
{'kernel': [kernel], 'gamma': gammas, 'alpha': alphas}]
|
|
440
|
+
regr = GridSearchCV(KernelRidge(), tuned_parameters,
|
|
441
|
+
cv=5, scoring='neg_mean_absolute_error')
|
|
442
|
+
regr.fit(X_norm_train_sel, y_norm_train)
|
|
443
|
+
gamma = regr.best_params_['gamma']
|
|
444
|
+
alpha = regr.best_params_['alpha']
|
|
445
|
+
if (gamma < gammas[lin / 2 - 1] or gamma > gammas[lin / 2]) or \
|
|
446
|
+
(alpha < alphas[lin / 2 - 1] or alpha > alphas[lin / 2]):
|
|
447
|
+
# and cycle_i < 10:
|
|
448
|
+
signal = False
|
|
449
|
+
factor_lower *= 0.8
|
|
450
|
+
factor_higher *= 0.8
|
|
451
|
+
if cycle_i > 10:
|
|
452
|
+
factor_lower = -4
|
|
453
|
+
factor_higher = 4
|
|
454
|
+
cycle_i = 0
|
|
455
|
+
gamma_lower = gamma * exp(factor_lower)
|
|
456
|
+
gamma_higher = gamma * exp(factor_higher)
|
|
457
|
+
alpha_lower = alpha * exp(factor_lower)
|
|
458
|
+
alpha_higher = alpha * exp(factor_higher)
|
|
459
|
+
else:
|
|
460
|
+
signal = True
|
|
461
|
+
cycle_i += 1
|
|
462
|
+
print(('gamma is: ', gamma, '. alpha is: ', alpha))
|
|
463
|
+
# final model
|
|
464
|
+
regr = KernelRidge(kernel=kernel, alpha=alpha, gamma=gamma)
|
|
465
|
+
regr.fit(X_norm_train_sel, y_norm_train)
|
|
466
|
+
# predictions
|
|
467
|
+
y_norm_train_pred = regr.predict(X_norm_train_sel)
|
|
468
|
+
y_train_pred = y_norm_train_pred * std_y + mean_y
|
|
469
|
+
y_train_data = y_norm_train * std_y + mean_y
|
|
470
|
+
y_norm_test_pred = regr.predict(X_norm_test_sel)
|
|
471
|
+
y_test_pred = y_norm_test_pred * std_y + mean_y
|
|
472
|
+
y_test_data = y_norm_test * std_y + mean_y
|
|
473
|
+
# data
|
|
474
|
+
|
|
475
|
+
# train_names = ['X_norm_sel_dict', 'y_data', 'y_pred']
|
|
476
|
+
# X_norm_train_sel_names = features_sel
|
|
477
|
+
# X_norm_train_sel_dict = dict(zip(X_norm_train_sel_names, X_norm_train_sel.T))
|
|
478
|
+
# trains = [X_norm_train_sel_dict, y_train_data, y_train_pred]
|
|
479
|
+
# train_dict = dict(zip(train_names, trains))
|
|
480
|
+
# test_names = ['X_norm_sel_dict', 'y_data', 'y_pred']
|
|
481
|
+
# X_norm_test_sel_names = features_sel
|
|
482
|
+
# X_norm_test_sel_dict = dict(zip(X_norm_test_sel_names, X_norm_test_sel.T))
|
|
483
|
+
# tests = [X_norm_test_sel_dict, y_test_data, y_test_pred]
|
|
484
|
+
# test_dict = dict(zip(test_names, tests))
|
|
485
|
+
y_name = ['y_train_data', 'y_train_pred', 'y_test_data', 'y_test_pred']
|
|
486
|
+
y = [y_train_data, y_train_pred, y_test_data, y_test_pred]
|
|
487
|
+
y_dict = dict(list(zip(y_name, y)))
|
|
488
|
+
ys.append(y_dict)
|
|
489
|
+
# performance
|
|
490
|
+
# score_train = regr.score(X_norm_train_sel, y_norm_train)
|
|
491
|
+
# score_test = regr.score(X_norm_test_sel, y_norm_test)
|
|
492
|
+
# MAE_train = mean_absolute_error(y_train_data, y_train_pred)
|
|
493
|
+
MAE_test = mean_absolute_error(y_test_data, y_test_pred)
|
|
494
|
+
# perm_names = ['score_train', 'score_test', 'MAE_train', 'MAE_test']
|
|
495
|
+
# perms = [score_train, score_test, MAE_train, MAE_test]
|
|
496
|
+
# perm_dict = dict(zip(perm_names, perms))
|
|
497
|
+
MAEs_test.append(MAE_test)
|
|
498
|
+
MAEs_test_i.append(i)
|
|
499
|
+
print((str(i) + '/' + str(total_i)))
|
|
500
|
+
i += 1
|
|
501
|
+
perm_dict = dict(list(zip(MAEs_test_i, MAEs_test)))
|
|
502
|
+
|
|
503
|
+
# return stat_dict, impt_dict, train_dict, test_dict, perm_dict, regr
|
|
504
|
+
return impt_dict, perm_dict, ys
|
|
505
|
+
|
|
506
|
+
# predict labels using gradient boosting regressor (GBR) with a given csv file
|
|
507
|
+
# @param csvf the csv file containing headers (first row), data, and label
|
|
508
|
+
# @param colnum_i_label the starting column number for the label column
|
|
509
|
+
# @param colnum_j_label the ending column number for the label column + 1
|
|
510
|
+
# @param colnum_desc the starting column number for the descriptor columns
|
|
511
|
+
# @return y_train_data, y_train_pred, y_test_data, y_test_pred, score
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
def gbr_model_training(csvf, colnum_i_label, colnum_j_label, colnum_desc):
|
|
515
|
+
# read in desc and label
|
|
516
|
+
with open(csvf, 'r') as f:
|
|
517
|
+
fcsv = csv.reader(f)
|
|
518
|
+
headers = np.array(next(f, None).rstrip('\r\n').split(','))[colnum_desc:]
|
|
519
|
+
X = []
|
|
520
|
+
y = []
|
|
521
|
+
lines = [line for line in fcsv]
|
|
522
|
+
lnums = [len(line) for line in lines]
|
|
523
|
+
count = max(set(lnums), key=lnums.count)
|
|
524
|
+
for line in lines:
|
|
525
|
+
if len(line) == count:
|
|
526
|
+
descs = []
|
|
527
|
+
labels = []
|
|
528
|
+
for desc in line[colnum_desc:]:
|
|
529
|
+
descs.append(float(desc))
|
|
530
|
+
for label in line[colnum_i_label:colnum_j_label]:
|
|
531
|
+
labels.append(float(label))
|
|
532
|
+
X.append(descs)
|
|
533
|
+
y.append(labels)
|
|
534
|
+
X = np.array(X)
|
|
535
|
+
y = np.array(y)
|
|
536
|
+
# process desc and label
|
|
537
|
+
mean_X = np.mean(X, axis=0)
|
|
538
|
+
std_X = np.std(X, axis=0)
|
|
539
|
+
mean_y = np.mean(y, axis=0)
|
|
540
|
+
std_y = np.std(y, axis=0)
|
|
541
|
+
# stats
|
|
542
|
+
mean_X_dict = dict(list(zip(headers, mean_X)))
|
|
543
|
+
std_X_dict = dict(list(zip(headers, std_X)))
|
|
544
|
+
stat_names = ['mean_X_dict', 'std_X_dict', 'mean_y', 'std_y']
|
|
545
|
+
stats = [mean_X_dict, std_X_dict, mean_y, std_y]
|
|
546
|
+
stat_dict = dict(list(zip(stat_names, stats)))
|
|
547
|
+
X_norm = normalize(X, mean_X, std_X)
|
|
548
|
+
y_norm = normalize(y, mean_y, std_y)
|
|
549
|
+
# split to train and test
|
|
550
|
+
X_norm_train, X_norm_test, y_norm_train, y_norm_test = train_test_split(
|
|
551
|
+
X_norm, y_norm, test_size=0.2, random_state=0)
|
|
552
|
+
# end
|
|
553
|
+
# feature selection
|
|
554
|
+
selector = RandomForestRegressor(random_state=0, n_estimators=100)
|
|
555
|
+
selector.fit(X_norm_train, y_norm_train.T[0])
|
|
556
|
+
X_norm_train_impts = selector.feature_importances_
|
|
557
|
+
scores = []
|
|
558
|
+
results = []
|
|
559
|
+
thresholds = np.logspace(-2, -2, 1)
|
|
560
|
+
for threshold in thresholds:
|
|
561
|
+
idxes = np.where(X_norm_train_impts > threshold)[0]
|
|
562
|
+
importances = X_norm_train_impts[idxes]
|
|
563
|
+
features_sel = headers[idxes]
|
|
564
|
+
# importance
|
|
565
|
+
impt_dict = dict(list(zip(features_sel, importances)))
|
|
566
|
+
# idxes = range(len(X_norm_train.T))
|
|
567
|
+
X_norm_train_sel = X_norm_train.T[idxes].T
|
|
568
|
+
X_norm_test_sel = X_norm_test.T[idxes].T
|
|
569
|
+
# training with gbr
|
|
570
|
+
regr = MultiOutputRegressor(GradientBoostingRegressor(random_state=0))
|
|
571
|
+
# final model
|
|
572
|
+
regr.fit(X_norm_train_sel, y_norm_train)
|
|
573
|
+
# predictions
|
|
574
|
+
y_norm_train_pred = regr.predict(X_norm_train_sel)
|
|
575
|
+
y_train_pred = y_norm_train_pred * std_y + mean_y
|
|
576
|
+
y_train_data = y_norm_train * std_y + mean_y
|
|
577
|
+
y_norm_test_pred = regr.predict(X_norm_test_sel)
|
|
578
|
+
y_test_pred = y_norm_test_pred * std_y + mean_y
|
|
579
|
+
y_test_data = y_norm_test * std_y + mean_y
|
|
580
|
+
# data
|
|
581
|
+
train_names = ['X_norm_sel_dict', 'y_data', 'y_pred']
|
|
582
|
+
X_norm_train_sel_names = features_sel
|
|
583
|
+
X_norm_train_sel_dict = dict(
|
|
584
|
+
list(zip(X_norm_train_sel_names, X_norm_train_sel.T)))
|
|
585
|
+
trains = [X_norm_train_sel_dict, y_train_data, y_train_pred]
|
|
586
|
+
train_dict = dict(list(zip(train_names, trains)))
|
|
587
|
+
test_names = ['X_norm_sel_dict', 'y_data', 'y_pred']
|
|
588
|
+
X_norm_test_sel_names = features_sel
|
|
589
|
+
X_norm_test_sel_dict = dict(
|
|
590
|
+
list(zip(X_norm_test_sel_names, X_norm_test_sel.T)))
|
|
591
|
+
tests = [X_norm_test_sel_dict, y_test_data, y_test_pred]
|
|
592
|
+
test_dict = dict(list(zip(test_names, tests)))
|
|
593
|
+
# performance
|
|
594
|
+
score_train = regr.score(X_norm_train_sel, y_norm_train)
|
|
595
|
+
score_test = regr.score(X_norm_test_sel, y_norm_test)
|
|
596
|
+
MAE_train = mean_absolute_error(y_train_data, y_train_pred)
|
|
597
|
+
MAE_test = mean_absolute_error(y_test_data, y_test_pred)
|
|
598
|
+
perm_names = ['score_train', 'score_test', 'MAE_train', 'MAE_test']
|
|
599
|
+
perms = [score_train, score_test, MAE_train, MAE_test]
|
|
600
|
+
perm_dict = dict(list(zip(perm_names, perms)))
|
|
601
|
+
scores.append(score_test)
|
|
602
|
+
results.append([stat_dict, impt_dict, train_dict,
|
|
603
|
+
test_dict, perm_dict, regr])
|
|
604
|
+
idx = np.argsort(np.array(scores))[-1]
|
|
605
|
+
stat_dict = results[idx][0]
|
|
606
|
+
impt_dict = results[idx][1]
|
|
607
|
+
train_dict = results[idx][2]
|
|
608
|
+
test_dict = results[idx][3]
|
|
609
|
+
perm_dict = results[idx][4]
|
|
610
|
+
regr = results[idx][5]
|
|
611
|
+
|
|
612
|
+
return stat_dict, impt_dict, train_dict, test_dict, perm_dict, regr
|
|
613
|
+
|
|
614
|
+
# predict labels using a given regr
|
|
615
|
+
# @param core3D mol3D class of a molecule
|
|
616
|
+
# @param spin the spin multiplicity of the core3D
|
|
617
|
+
# @param train_dict th dictionary that contains the training data
|
|
618
|
+
# @param stat_dict the dictionary that contains the statistics of the training data (e.g. mean, std)
|
|
619
|
+
# @param impt_dict the dictionary that contains the important features
|
|
620
|
+
# @param regr the regression model
|
|
621
|
+
# @return bondl_dict, ds (a list of Euclidean distances)
|
|
622
|
+
|
|
623
|
+
|
|
624
|
+
def ML_model_predict(core3D, spin, train_dict, stat_dict, impt_dict, regr):
|
|
625
|
+
bondl_keys = []
|
|
626
|
+
bondls = []
|
|
627
|
+
spin_ohe = [0] * 6
|
|
628
|
+
spin_ohe[spin - 1] = 1
|
|
629
|
+
mean_y = stat_dict['mean_y']
|
|
630
|
+
std_y = stat_dict['std_y']
|
|
631
|
+
mean_X_dict = stat_dict['mean_X_dict']
|
|
632
|
+
std_X_dict = stat_dict['std_X_dict']
|
|
633
|
+
midxes = core3D.findMetal()
|
|
634
|
+
Xs_train = train_dict['X_norm_sel_dict']
|
|
635
|
+
for midx in midxes:
|
|
636
|
+
matno = core3D.getAtom(midx).atno
|
|
637
|
+
fidxes = core3D.getBondedAtoms(midx)
|
|
638
|
+
for fidx_i, fidx in enumerate(fidxes):
|
|
639
|
+
fprio_list, fd_list, idx_list = feature_prep(core3D, fidx_i)
|
|
640
|
+
descs = []
|
|
641
|
+
desc_names = []
|
|
642
|
+
descs.append(matno)
|
|
643
|
+
desc_names.append('matno_0')
|
|
644
|
+
descs += spin_ohe
|
|
645
|
+
for i in range(len(spin_ohe)):
|
|
646
|
+
desc_names.append('spin' + str(i) + '_ohe')
|
|
647
|
+
for idx_i, idx in enumerate(idx_list):
|
|
648
|
+
fidx_ = fidxes[idx]
|
|
649
|
+
descriptor_names, descriptors = get_descriptor_vector_for_atidx(
|
|
650
|
+
core3D, fidx_)
|
|
651
|
+
for descriptor_name in descriptor_names:
|
|
652
|
+
desc_names.append(descriptor_name + '_' + str(idx_i))
|
|
653
|
+
descs += descriptors
|
|
654
|
+
desc_dict = dict(list(zip(desc_names, descs)))
|
|
655
|
+
descs = []
|
|
656
|
+
Xs_train_sel = []
|
|
657
|
+
# d2s = [0] * len(list(Xs_train.values())[0])
|
|
658
|
+
for key in list(impt_dict.keys()):
|
|
659
|
+
desc = np.divide((desc_dict[key] - mean_X_dict[key]), std_X_dict[key], out=np.zeros_like(
|
|
660
|
+
desc_dict[key] - mean_X_dict[key]), where=std_X_dict[key] != 0)
|
|
661
|
+
descs.append(desc)
|
|
662
|
+
X_train = Xs_train[key]
|
|
663
|
+
Xs_train_sel.append(X_train.tolist())
|
|
664
|
+
# d2s = d2s + np.square(np.array(desc * len(X_train)) - np.array(X_train))
|
|
665
|
+
# print('The largest desc is ' + str(max(descs)))
|
|
666
|
+
# ds = np.sqrt(d2s)
|
|
667
|
+
ds = []
|
|
668
|
+
for i in range(len(Xs_train_sel[0])):
|
|
669
|
+
d = np.linalg.norm(np.array(descs) -
|
|
670
|
+
np.array(Xs_train_sel).T[i])
|
|
671
|
+
ds.append(np.linalg.norm(d))
|
|
672
|
+
bondl = regr.predict([descs]) * std_y + mean_y
|
|
673
|
+
bondl_keys.append(fidx)
|
|
674
|
+
bondls.append(bondl)
|
|
675
|
+
bondl_dict = dict(list(zip(bondl_keys, bondls)))
|
|
676
|
+
|
|
677
|
+
return bondl_dict, ds
|
|
678
|
+
|
|
679
|
+
# predict labels using a given regr
|
|
680
|
+
# @param core3D mol3D class of a molecule
|
|
681
|
+
# @param spin the spin multiplicity of the core3D
|
|
682
|
+
# @param mligcaomt the external atom index of the mlig
|
|
683
|
+
# @return bondl_dict, ds (a list of Euclidean distances)
|
|
684
|
+
|
|
685
|
+
|
|
686
|
+
def krr_model_predict(core3D, spin, mligcatom):
|
|
687
|
+
bondl_keys = []
|
|
688
|
+
bondls = []
|
|
689
|
+
spin_ohe = [0] * 6
|
|
690
|
+
spin_ohe[spin - 1] = 1
|
|
691
|
+
globs = globalvars()
|
|
692
|
+
if globs.custom_path: # test if a custom path is used:
|
|
693
|
+
fpath = str(globs.custom_path).rstrip('/') + "/python_krr"
|
|
694
|
+
else:
|
|
695
|
+
fpath = str(resource_files("molSimplify").joinpath("python_krr"))
|
|
696
|
+
# load model
|
|
697
|
+
f_model = fpath + '/hat_krr_model.pkl'
|
|
698
|
+
with open(f_model, 'rb') as f:
|
|
699
|
+
regr = pickle.load(f)
|
|
700
|
+
Xs_train = regr.X_fit_
|
|
701
|
+
# load stats
|
|
702
|
+
# y stats
|
|
703
|
+
f_stats = fpath + '/hat_y_mean_std.csv'
|
|
704
|
+
with open(f_stats, 'r') as f:
|
|
705
|
+
fcsv = csv.reader(f)
|
|
706
|
+
for i, line in enumerate(fcsv):
|
|
707
|
+
if i == 1:
|
|
708
|
+
mean_y = float(line[0])
|
|
709
|
+
std_y = float(line[1])
|
|
710
|
+
# x stats
|
|
711
|
+
f_stats = fpath + '/hat_X_mean_std.csv'
|
|
712
|
+
with open(f_stats, 'r') as f:
|
|
713
|
+
fcsv = csv.reader(f)
|
|
714
|
+
for i, line in enumerate(fcsv):
|
|
715
|
+
if i == 0:
|
|
716
|
+
feature_names = line
|
|
717
|
+
if i == 1:
|
|
718
|
+
mean_X = [float(ele) for ele in line]
|
|
719
|
+
if i == 2:
|
|
720
|
+
std_X = [float(ele) for ele in line]
|
|
721
|
+
mean_X_dict = dict(list(zip(feature_names, mean_X)))
|
|
722
|
+
std_X_dict = dict(list(zip(feature_names, std_X)))
|
|
723
|
+
# load feature names
|
|
724
|
+
f_stats = fpath + '/hat_feature_names.csv'
|
|
725
|
+
with open(f_stats, 'r') as f:
|
|
726
|
+
fcsv = csv.reader(f)
|
|
727
|
+
for i, line in enumerate(fcsv):
|
|
728
|
+
keys = line
|
|
729
|
+
# rOH
|
|
730
|
+
# load model2
|
|
731
|
+
f_model = fpath + '/hat2_krr_model.pkl'
|
|
732
|
+
with open(f_model, 'rb') as f:
|
|
733
|
+
regr2 = pickle.load(f)
|
|
734
|
+
X2s_train = regr2.X_fit_
|
|
735
|
+
# load stats
|
|
736
|
+
# y2 stats
|
|
737
|
+
f_stats = fpath + '/hat2_y_mean_std.csv'
|
|
738
|
+
with open(f_stats, 'r') as f:
|
|
739
|
+
fcsv = csv.reader(f)
|
|
740
|
+
for i, line in enumerate(fcsv):
|
|
741
|
+
if i == 1:
|
|
742
|
+
mean_y2 = float(line[0])
|
|
743
|
+
std_y2 = float(line[1])
|
|
744
|
+
# x2 stats
|
|
745
|
+
# f_stats = fpath + '/hat2_X_mean_std.csv'
|
|
746
|
+
# with open(f_stats, 'r') as f:
|
|
747
|
+
# fcsv = csv.reader(f)
|
|
748
|
+
# for i, line in enumerate(fcsv):
|
|
749
|
+
# if i == 0:
|
|
750
|
+
# feature2_names = line
|
|
751
|
+
# if i == 1:
|
|
752
|
+
# mean_X2 = [float(ele) for ele in line]
|
|
753
|
+
# if i == 2:
|
|
754
|
+
# std_X2 = [float(ele) for ele in line]
|
|
755
|
+
mean_X2_dict = dict(list(zip(feature_names, mean_X)))
|
|
756
|
+
std_X2_dict = dict(list(zip(feature_names, std_X)))
|
|
757
|
+
# load feature2 names
|
|
758
|
+
f_stats = fpath + '/hat2_feature_names.csv'
|
|
759
|
+
with open(f_stats, 'r') as f:
|
|
760
|
+
fcsv = csv.reader(f)
|
|
761
|
+
for i, line in enumerate(fcsv):
|
|
762
|
+
keys2 = line
|
|
763
|
+
# # get train data
|
|
764
|
+
# Xs_train_sel = []
|
|
765
|
+
# f_X_train = '/Users/tzuhsiungyang/Dropbox (MIT)/Work at the Kulik group/ts_build/Data/xyzf_optts/selected_xyzfs/hat_krr_X_train.csv'
|
|
766
|
+
# with open(f_X_train, 'r') as f:
|
|
767
|
+
# fcsv = csv.reader(f)
|
|
768
|
+
# for line in fcsv:
|
|
769
|
+
# Xs_train.append([float(ele) for ele in line])
|
|
770
|
+
# # get kernel space coefs
|
|
771
|
+
# coefs = []
|
|
772
|
+
# f_coef = '/Users/tzuhsiungyang/Dropbox (MIT)/Work at the Kulik group/ts_build/Data/xyzf_optts/selected_xyzfs/hat_krr_dual_coef.csv'
|
|
773
|
+
# with open(f_coef, 'r') as f:
|
|
774
|
+
# fcsv = csv.reader(f)
|
|
775
|
+
# for line in fcsv:
|
|
776
|
+
# coefs = [float(ele) for ele in line]
|
|
777
|
+
# get features
|
|
778
|
+
midxes = core3D.findMetal()
|
|
779
|
+
for midx in midxes:
|
|
780
|
+
matno = core3D.getAtom(midx).atno
|
|
781
|
+
fidxes = core3D.getBondedAtoms(midx)
|
|
782
|
+
ds1 = []
|
|
783
|
+
for fidx_i, fidx in enumerate(fidxes):
|
|
784
|
+
fprio_list, fd_list, idx_list = feature_prep(core3D, fidx_i)
|
|
785
|
+
descs = []
|
|
786
|
+
desc_names = []
|
|
787
|
+
descs.append(matno)
|
|
788
|
+
desc_names.append('matno_0')
|
|
789
|
+
descs += spin_ohe
|
|
790
|
+
for i in range(len(spin_ohe)):
|
|
791
|
+
desc_names.append('spin' + str(i) + '_ohe')
|
|
792
|
+
for idx_i, idx in enumerate(idx_list):
|
|
793
|
+
fidx_ = fidxes[idx]
|
|
794
|
+
descriptor_names, descriptors = get_descriptor_vector_for_atidx(
|
|
795
|
+
core3D, fidx_)
|
|
796
|
+
for descriptor_name in descriptor_names:
|
|
797
|
+
desc_names.append(descriptor_name + '_' + str(idx_i))
|
|
798
|
+
descs += descriptors
|
|
799
|
+
desc_dict = dict(list(zip(desc_names, descs)))
|
|
800
|
+
descs = []
|
|
801
|
+
# Xs_train_sel = []
|
|
802
|
+
# d2s = [0] * len(Xs_train[0])
|
|
803
|
+
for key in keys:
|
|
804
|
+
desc = np.divide((desc_dict[key] - mean_X_dict[key]), std_X_dict[key], out=np.zeros_like(
|
|
805
|
+
desc_dict[key] - mean_X_dict[key]), where=std_X_dict[key] != 0)
|
|
806
|
+
descs.append(desc)
|
|
807
|
+
# d2s = d2s + np.square(np.array(desc * len(X_train)) - np.array(X_train))
|
|
808
|
+
# print('The largest desc is ' + str(max(descs)))
|
|
809
|
+
# ds = np.sqrt(d2s)
|
|
810
|
+
ds = []
|
|
811
|
+
for i in range(len(Xs_train[0])):
|
|
812
|
+
d = np.linalg.norm(np.array(descs) - np.array(Xs_train)[i])
|
|
813
|
+
ds.append(d)
|
|
814
|
+
ds1.append(ds)
|
|
815
|
+
bondl = regr.predict([descs]) * std_y + mean_y
|
|
816
|
+
bondl_keys.append(fidx)
|
|
817
|
+
bondls.append(bondl)
|
|
818
|
+
if fidx == mligcatom:
|
|
819
|
+
descs = []
|
|
820
|
+
# d2s = [0] * len(X2s_train[0])
|
|
821
|
+
for key in keys2:
|
|
822
|
+
desc = np.divide((desc_dict[key] - mean_X2_dict[key]), std_X2_dict[key],
|
|
823
|
+
out=np.zeros_like(desc_dict[key] - mean_X2_dict[key]), where=std_X2_dict[key] != 0)
|
|
824
|
+
descs.append(desc)
|
|
825
|
+
# d2s = d2s + np.square(np.array(desc * len(X_train)) - np.array(X_train))
|
|
826
|
+
# print('The largest desc is ' + str(max(descs)))
|
|
827
|
+
# ds = np.sqrt(d2s)
|
|
828
|
+
ds2 = []
|
|
829
|
+
for i in range(len(X2s_train[0])):
|
|
830
|
+
d2 = np.linalg.norm(
|
|
831
|
+
np.array(descs) - np.array(X2s_train)[i])
|
|
832
|
+
ds2.append(d2)
|
|
833
|
+
bondl2 = regr2.predict([descs]) * std_y2 + mean_y2
|
|
834
|
+
|
|
835
|
+
bondl_dict = dict(list(zip(bondl_keys, bondls)))
|
|
836
|
+
|
|
837
|
+
return bondl_dict, bondl2, ds1, ds2
|
|
838
|
+
|
|
839
|
+
# ## predict labels using gradient boosting regressor (GBR) with a given csv file
|
|
840
|
+
# # @param csvf the csv file containing headers (first row), data, and label
|
|
841
|
+
# # @param colnum_i_label the starting column number for the label column
|
|
842
|
+
# # @param colnum_j_label the ending column number for the label column + 1
|
|
843
|
+
# # @param colnum_desc the starting column number for the descriptor columns
|
|
844
|
+
# # @return y_train_data, y_train_pred, y_test_data, y_test_pred, score
|
|
845
|
+
# def krr_model_predict(core3D, spin, stat_dict, impt_dict, regr):
|
|
846
|
+
# bondl_keys = []
|
|
847
|
+
# bondls = []
|
|
848
|
+
# spin_ohe = [0] * 6
|
|
849
|
+
# spin_ohe[spin - 1] = 1
|
|
850
|
+
# mean_y = stat_dict['mean_y']
|
|
851
|
+
# std_y = stat_dict['std_y']
|
|
852
|
+
# mean_X = stat_dict['mean_X']
|
|
853
|
+
# std_X = stat_dict['std_X']
|
|
854
|
+
# midxes = core3D.findMetal()
|
|
855
|
+
# for midx in midxes:
|
|
856
|
+
# matno = core3D.getAtom(midx).atno
|
|
857
|
+
# fidxes = core3D.getBondedAtoms(midx)
|
|
858
|
+
# for fidx_i, fidx in enumerate(fidxes):
|
|
859
|
+
# fprio_list, fd_list, idx_list = feature_prep(core3D, fidx_i)
|
|
860
|
+
# descs = []
|
|
861
|
+
# desc_names = []
|
|
862
|
+
# descs.append(matno)
|
|
863
|
+
# desc_names.append('matno_0')
|
|
864
|
+
# descs += spin_ohe
|
|
865
|
+
# for i in range(len(spin_ohe)):
|
|
866
|
+
# desc_names.append('spin' + str(i) + '_ohe')
|
|
867
|
+
# for idx_i, idx in enumerate(idx_list):
|
|
868
|
+
# fidx_ = fidxes[idx]
|
|
869
|
+
# descriptor_names, descriptors = get_descriptor_vector_for_atidx(core3D, fidx_)
|
|
870
|
+
# for descriptor_name in descriptor_names:
|
|
871
|
+
# desc_names.append(descriptor_name + '_' + str(idx_i))
|
|
872
|
+
# descs += descriptors
|
|
873
|
+
# normalize(descs, mean_X, std_X)
|
|
874
|
+
# desc_dict = dict(zip(desc_names, descs))
|
|
875
|
+
# descs = []
|
|
876
|
+
# for key in impt_dict.keys():
|
|
877
|
+
# desc = desc_dict[key]
|
|
878
|
+
# descs.append(desc)
|
|
879
|
+
# regr.fit()
|
|
880
|
+
# bondl = regr.predict([descs]) * std_y + mean_y
|
|
881
|
+
# bondl_keys.append(fidx)
|
|
882
|
+
# bondls.append(bondl)
|
|
883
|
+
# bondl_dict = dict(zip(bondl_keys, bondls))
|
|
884
|
+
#
|
|
885
|
+
# return bondl_dict
|
|
886
|
+
|
|
887
|
+
# wrapper to get KRR predictions for bondl_core3D, bondl_m3D, bondl_m3Dsub from a known mol3D using partial charges
|
|
888
|
+
# @param mol mol3D of the molecule
|
|
889
|
+
# @param charge charge of the molecule
|
|
890
|
+
# @return KRR-predicted bondl_core3D
|
|
891
|
+
# KRR accuracies for bondl_core3D: 98.2% (training score) and 47.6 (test score)
|
|
892
|
+
# KRR accuracies for bondl_m3D: 99.5% (training score) and 51.1 (test score)
|
|
893
|
+
|
|
894
|
+
|
|
895
|
+
def invoke_KRR_from_mol3d_dQ(mol, charge):
|
|
896
|
+
X_norm_train = []
|
|
897
|
+
y_norm_train = []
|
|
898
|
+
# # find the metal from RACs
|
|
899
|
+
# metal = mol.getAtom(mol.findMetal()[0]).symbol()
|
|
900
|
+
# ox_modifier = {metal:oxidation_state}
|
|
901
|
+
# get partialQs
|
|
902
|
+
feature_names, features = ffeatures(mol, charge)
|
|
903
|
+
# # get one-hot-encoding (OHE)
|
|
904
|
+
# descriptor_names,descriptors = create_OHE(descriptor_names,descriptors, metal,oxidation_state)
|
|
905
|
+
# # set exchange fraction
|
|
906
|
+
# descriptor_names += ['alpha']
|
|
907
|
+
# descriptors += [alpha]
|
|
908
|
+
# KRR initiation
|
|
909
|
+
# defined variables
|
|
910
|
+
globs = globalvars()
|
|
911
|
+
if globs.custom_path: # test if a custom path is used:
|
|
912
|
+
X_norm_train_csv = str(globs.custom_path).rstrip(
|
|
913
|
+
'/') + "/python_krr/X_norm_train_TS.csv"
|
|
914
|
+
y_norm_train_csv = str(globs.custom_path).rstrip(
|
|
915
|
+
'/') + "/python_krr/y_norm_train_TS.csv"
|
|
916
|
+
else:
|
|
917
|
+
X_norm_train_csv = resource_files("molSimplify.python_krr").joinpath("X_norm_train_TS.csv")
|
|
918
|
+
y_norm_train_csv = resource_files("molSimplify.python_krr").joinpath("y_norm_train_TS.csv")
|
|
919
|
+
with open(X_norm_train_csv, 'r') as f:
|
|
920
|
+
for line in csv.reader(f):
|
|
921
|
+
X_norm_train.append([float(i) for i in line])
|
|
922
|
+
X_norm_train = np.array(X_norm_train)
|
|
923
|
+
with open(y_norm_train_csv, 'r') as f:
|
|
924
|
+
for line in csv.reader(f):
|
|
925
|
+
y_norm_train.append([float(i) for i in line])
|
|
926
|
+
y_norm_train = np.array(y_norm_train)
|
|
927
|
+
# X_norm_train = pd.read_csv(X_norm_train_csv,header=None)
|
|
928
|
+
# y_norm_train = pd.read_csv(y_norm_train_csv,header=None)
|
|
929
|
+
kernel = 'rbf'
|
|
930
|
+
keys = []
|
|
931
|
+
bondls = []
|
|
932
|
+
for targets in ['bondl_core3D', 'bondl_m3D']: # ,'bondl_m3Dsub']:
|
|
933
|
+
keys.append(targets)
|
|
934
|
+
if targets == 'bondl_core3D':
|
|
935
|
+
# KRR parameters for bondl_core3D
|
|
936
|
+
alpha = 0.1
|
|
937
|
+
gamma = 4.6415888336127775
|
|
938
|
+
mean_y_norm_train = 1.8556069976566096
|
|
939
|
+
std_y_norm_train = 0.08511267085380758
|
|
940
|
+
mean_X_norm_train = np.array([1.1886128903870394, 1.0746595698697274, 1.0089390403652372, 1.0051636435711488,
|
|
941
|
+
0.9639844597149281, 1.5924309727104378])
|
|
942
|
+
std_X_norm_train = np.array([1.4887238067607071, 1.4391120341824508, 1.351343230273359, 1.302911028297482,
|
|
943
|
+
1.1511093513567663, 0.7366350688359029])
|
|
944
|
+
|
|
945
|
+
if targets == 'bondl_m3D':
|
|
946
|
+
# KRR parameters for bondl_core3D
|
|
947
|
+
alpha = 0.015848931924611134
|
|
948
|
+
gamma = 8.531678524172808
|
|
949
|
+
mean_y_norm_train = 1.1429284052746633
|
|
950
|
+
std_y_norm_train = 0.04763054722349127
|
|
951
|
+
mean_X_norm_train = np.array(
|
|
952
|
+
[-1.17136495, -1.09058534, -1.04062806, -1.01379334, -0.92612448, -1.30558513])
|
|
953
|
+
std_X_norm_train = np.array(
|
|
954
|
+
[1.36359461, 1.32785945, 1.26392399, 1.21494676, 1.0253893, 0.5940198])
|
|
955
|
+
|
|
956
|
+
# model initation
|
|
957
|
+
X_norm_test = np.array(features[7:13])
|
|
958
|
+
X_norm_test = (X_norm_test - mean_X_norm_train) / std_X_norm_train
|
|
959
|
+
model = KernelRidge(kernel=kernel, alpha=alpha, gamma=gamma)
|
|
960
|
+
model.fit(X_norm_train, y_norm_train)
|
|
961
|
+
y_norm_test = model.predict([X_norm_test])
|
|
962
|
+
y_norm_test = y_norm_test * std_y_norm_train + mean_y_norm_train
|
|
963
|
+
bondl = y_norm_test[0][0]
|
|
964
|
+
bondls.append(bondl)
|
|
965
|
+
|
|
966
|
+
bondl_dict = dict(list(zip(keys, bondls)))
|
|
967
|
+
|
|
968
|
+
return bondl_dict
|
|
969
|
+
|
|
970
|
+
# wrapper to get KRR predictions for bondl_core3D from a known mol3D using RAC-190
|
|
971
|
+
# @param mol mol3D of the molecule
|
|
972
|
+
# @param charge charge of the molecule
|
|
973
|
+
# @return KRR-predicted bondl_core3D
|
|
974
|
+
# KRR accuracies: 98.2% (training score) and 47.6 (test score)
|
|
975
|
+
|
|
976
|
+
|
|
977
|
+
def invoke_KRR_from_mol3d_RACs(mol, charge):
|
|
978
|
+
# # find the metal from RACs
|
|
979
|
+
# metal = mol.getAtom(mol.findMetal()[0]).symbol()
|
|
980
|
+
# ox_modifier = {metal:oxidation_state}
|
|
981
|
+
# get partialQs
|
|
982
|
+
feature_names, features = ffeatures(mol, charge)
|
|
983
|
+
# # get one-hot-encoding (OHE)
|
|
984
|
+
# descriptor_names,descriptors = create_OHE(descriptor_names,descriptors, metal,oxidation_state)
|
|
985
|
+
# # set exchange fraction
|
|
986
|
+
# descriptor_names += ['alpha']
|
|
987
|
+
# descriptors += [alpha]
|
|
988
|
+
# KRR initiation
|
|
989
|
+
# defined variables
|
|
990
|
+
X_norm_train = pd.read_csv(
|
|
991
|
+
'/Users/tzuhsiungyang/anaconda2/envs/molSimplify/molSimplify/molSimplify/python_krr/X_norm_train_TS.csv', header=None)
|
|
992
|
+
y_norm_train = pd.read_csv(
|
|
993
|
+
'/Users/tzuhsiungyang/anaconda2/envs/molSimplify/molSimplify/molSimplify/python_krr/y_norm_train_TS.csv', header=None)
|
|
994
|
+
kernel = 'rbf'
|
|
995
|
+
alpha = 0.1
|
|
996
|
+
gamma = 4.6415888336127775
|
|
997
|
+
mean_y_norm_train = 1.8556069976566096
|
|
998
|
+
std_y_norm_train = 0.08511267085380758
|
|
999
|
+
mean_X_norm_train = np.array([1.1886128903870394, 1.0746595698697274, 1.0089390403652372,
|
|
1000
|
+
1.0051636435711488, 0.9639844597149281, 1.5924309727104378])
|
|
1001
|
+
std_X_norm_train = np.array([1.4887238067607071, 1.4391120341824508, 1.351343230273359,
|
|
1002
|
+
1.302911028297482, 1.1511093513567663, 0.7366350688359029])
|
|
1003
|
+
# model initation
|
|
1004
|
+
X_norm_test = np.array(features[7:13])
|
|
1005
|
+
X_norm_test = (X_norm_test - mean_X_norm_train) / std_X_norm_train
|
|
1006
|
+
model = KernelRidge(kernel=kernel, alpha=alpha, gamma=gamma)
|
|
1007
|
+
model.fit(X_norm_train, y_norm_train)
|
|
1008
|
+
y_norm_test = model.predict([X_norm_test])
|
|
1009
|
+
y_norm_test = y_norm_test * std_y_norm_train + mean_y_norm_train
|
|
1010
|
+
bondl_core3D = y_norm_test[0][0]
|
|
1011
|
+
|
|
1012
|
+
return bondl_core3D
|
|
1013
|
+
|
|
1014
|
+
# Gets the RACs of a given atidx
|
|
1015
|
+
# @param mol mol3D of this molecule
|
|
1016
|
+
# @param atidx the index of the atom of concern
|
|
1017
|
+
# @return descriptor_names updated names
|
|
1018
|
+
# @return descriptors updated RACs
|
|
1019
|
+
|
|
1020
|
+
|
|
1021
|
+
def get_descriptor_vector_for_atidx(mol, atidx, depth=4, oct=False):
|
|
1022
|
+
descriptor_names = []
|
|
1023
|
+
descriptors = []
|
|
1024
|
+
result_dictionary = generate_atomonly_autocorrelations(
|
|
1025
|
+
mol, atidx, False, depth, oct)
|
|
1026
|
+
for colnames in result_dictionary['colnames']:
|
|
1027
|
+
descriptor_names += colnames
|
|
1028
|
+
for results in result_dictionary['results']:
|
|
1029
|
+
descriptors += results.tolist()
|
|
1030
|
+
result_dictionary = generate_atomonly_deltametrics(
|
|
1031
|
+
mol, atidx, False, depth, oct)
|
|
1032
|
+
for colnames in result_dictionary['colnames']:
|
|
1033
|
+
for colname in colnames:
|
|
1034
|
+
descriptor_names.append('D_' + colname)
|
|
1035
|
+
for results in result_dictionary['results']:
|
|
1036
|
+
descriptors += results.tolist()
|
|
1037
|
+
|
|
1038
|
+
return descriptor_names, descriptors
|
|
1039
|
+
|
|
1040
|
+
|
|
1041
|
+
def generate_revised_atomonly_autocorrelations(mol, atomIdx, loud, depth=4, oct=True):
|
|
1042
|
+
# this function gets autocorrelations for a molecule starting
|
|
1043
|
+
# in one single atom only
|
|
1044
|
+
# Inputs:
|
|
1045
|
+
# mol - mol3D class
|
|
1046
|
+
# atomIdx - int, index of atom3D class
|
|
1047
|
+
# loud - bool, print output
|
|
1048
|
+
result = list()
|
|
1049
|
+
colnames = []
|
|
1050
|
+
# allowed_strings = ['nuclear_charge', 'ident', 'topology']
|
|
1051
|
+
# labels_strings = ['Z', 'I', 'T']
|
|
1052
|
+
allowed_strings = ['electronegativity',
|
|
1053
|
+
'nuclear_charge', 'ident', 'topology', 'size']
|
|
1054
|
+
labels_strings = ['chi', 'Z', 'I', 'T', 'S']
|
|
1055
|
+
# print('The selected connection type is ' + str(mol.getAtom(atomIdx).symbol()))
|
|
1056
|
+
for ii, properties in enumerate(allowed_strings):
|
|
1057
|
+
atom_only_ac = atom_only_autocorrelation(
|
|
1058
|
+
mol, properties, depth, atomIdx, oct=oct)
|
|
1059
|
+
this_colnames = []
|
|
1060
|
+
for i in range(0, depth + 1):
|
|
1061
|
+
this_colnames.append(labels_strings[ii] + '-' + str(i))
|
|
1062
|
+
colnames.append(this_colnames)
|
|
1063
|
+
result.append(atom_only_ac)
|
|
1064
|
+
results_dictionary = {'colnames': colnames, 'results': result}
|
|
1065
|
+
return results_dictionary
|
|
1066
|
+
|
|
1067
|
+
|
|
1068
|
+
def generate_atomonly_ratiometrics(mol, atomIdx, loud, depth=4, oct=True):
|
|
1069
|
+
# this function gets autocorrelations for a molecule starting
|
|
1070
|
+
# in one single atom only
|
|
1071
|
+
# Inputs:
|
|
1072
|
+
# mol - mol3D class
|
|
1073
|
+
# atomIdx - int, index of atom3D class
|
|
1074
|
+
# loud - bool, print output
|
|
1075
|
+
result = list()
|
|
1076
|
+
colnames = []
|
|
1077
|
+
# allowed_strings_num = ['electronegativity', 'nuclear_charge']
|
|
1078
|
+
# labels_strings_num = ['chi', 'Z']
|
|
1079
|
+
allowed_strings_num = ['electronegativity',
|
|
1080
|
+
'nuclear_charge', 'ident', 'topology', 'size']
|
|
1081
|
+
labels_strings_num = ['chi', 'Z', 'I', 'T', 'S']
|
|
1082
|
+
# allowed_strings_den = ['size']
|
|
1083
|
+
# labels_strings_den = ['S']
|
|
1084
|
+
allowed_strings_den = ['electronegativity', 'nuclear_charge', 'size']
|
|
1085
|
+
labels_strings_den = ['chi', 'Z', 'S']
|
|
1086
|
+
# print('The selected connection type is ' + str(mol.getAtom(atomIdx).symbol()))
|
|
1087
|
+
for iii, properties_num in enumerate(allowed_strings_num):
|
|
1088
|
+
for iv, properties_den in enumerate(allowed_strings_den):
|
|
1089
|
+
atom_only_ac = atom_only_ratiometric(
|
|
1090
|
+
mol, properties_num, properties_den, depth, atomIdx, oct=oct)
|
|
1091
|
+
this_colnames = []
|
|
1092
|
+
for i in range(0, depth + 1):
|
|
1093
|
+
this_colnames.append(
|
|
1094
|
+
labels_strings_num[iii] + '-' + labels_strings_den[iv] + '-' + str(i))
|
|
1095
|
+
colnames.append(this_colnames)
|
|
1096
|
+
result.append(atom_only_ac)
|
|
1097
|
+
results_dictionary = {'colnames': colnames, 'results': result}
|
|
1098
|
+
return results_dictionary
|
|
1099
|
+
|
|
1100
|
+
|
|
1101
|
+
def generate_atomonly_summetrics(mol, atomIdx, loud, depth=4, oct=True):
|
|
1102
|
+
# this function gets autocorrelations for a molecule starting
|
|
1103
|
+
# in one single atom only
|
|
1104
|
+
# Inputs:
|
|
1105
|
+
# mol - mol3D class
|
|
1106
|
+
# atomIdx - int, index of atom3D class
|
|
1107
|
+
# loud - bool, print output
|
|
1108
|
+
result = list()
|
|
1109
|
+
colnames = []
|
|
1110
|
+
# allowed_strings = ['ident', 'topology', 'size']
|
|
1111
|
+
# labels_strings = ['I', 'T', 'S']
|
|
1112
|
+
allowed_strings = ['electronegativity',
|
|
1113
|
+
'nuclear_charge', 'ident', 'topology', 'size']
|
|
1114
|
+
labels_strings = ['chi', 'Z', 'I', 'T', 'S']
|
|
1115
|
+
# print('The selected connection type is ' + str(mol.getAtom(atomIdx).symbol()))
|
|
1116
|
+
for ii, properties in enumerate(allowed_strings):
|
|
1117
|
+
atom_only_ac = atom_only_summetric(
|
|
1118
|
+
mol, properties, depth, atomIdx, oct=oct)
|
|
1119
|
+
this_colnames = []
|
|
1120
|
+
for i in range(0, depth + 1):
|
|
1121
|
+
this_colnames.append(labels_strings[ii] + '-' + str(i))
|
|
1122
|
+
colnames.append(this_colnames)
|
|
1123
|
+
result.append(atom_only_ac)
|
|
1124
|
+
results_dictionary = {'colnames': colnames, 'results': result}
|
|
1125
|
+
return results_dictionary
|
|
1126
|
+
|
|
1127
|
+
|
|
1128
|
+
def generate_revised_atomonly_deltametrics(mol, atomIdx, loud, depth=4, oct=True):
|
|
1129
|
+
# this function gets autocorrelations for a molecule starting
|
|
1130
|
+
# in one single atom only
|
|
1131
|
+
# Inputs:
|
|
1132
|
+
# mol - mol3D class
|
|
1133
|
+
# atomIdx - int, index of atom3D class
|
|
1134
|
+
# loud - bool, print output
|
|
1135
|
+
result = list()
|
|
1136
|
+
colnames = []
|
|
1137
|
+
# allowed_strings = ['electronegativity', 'ident', 'topology']
|
|
1138
|
+
# labels_strings = ['chi', 'I', 'T']
|
|
1139
|
+
allowed_strings = ['electronegativity',
|
|
1140
|
+
'nuclear_charge', 'ident', 'topology', 'size']
|
|
1141
|
+
labels_strings = ['chi', 'Z', 'I', 'T', 'S']
|
|
1142
|
+
# print('The selected connection type is ' + str(mol.getAtom(atomIdx).symbol()))
|
|
1143
|
+
for ii, properties in enumerate(allowed_strings):
|
|
1144
|
+
atom_only_ac = atom_only_deltametric(
|
|
1145
|
+
mol, properties, depth, atomIdx, oct=oct)
|
|
1146
|
+
this_colnames = []
|
|
1147
|
+
for i in range(0, depth + 1):
|
|
1148
|
+
this_colnames.append(labels_strings[ii] + '-' + str(i))
|
|
1149
|
+
colnames.append(this_colnames)
|
|
1150
|
+
result.append(atom_only_ac)
|
|
1151
|
+
results_dictionary = {'colnames': colnames, 'results': result}
|
|
1152
|
+
return results_dictionary
|
|
1153
|
+
|
|
1154
|
+
# Gets the rRACs of a given atidx
|
|
1155
|
+
# @param mol mol3D of this molecule
|
|
1156
|
+
# @param atidx the index of the atom of concern
|
|
1157
|
+
# @return descriptor_names updated names
|
|
1158
|
+
# @return descriptors updated RACs
|
|
1159
|
+
|
|
1160
|
+
|
|
1161
|
+
def get_revised_descriptor_vector_for_atidx(mol, atidx, depth=4, oct=False):
|
|
1162
|
+
descriptor_names = []
|
|
1163
|
+
descriptors = []
|
|
1164
|
+
result_dictionary = generate_revised_atomonly_autocorrelations(
|
|
1165
|
+
mol, atidx, False, depth, oct)
|
|
1166
|
+
for colnames in result_dictionary['colnames']:
|
|
1167
|
+
for colname in colnames:
|
|
1168
|
+
descriptor_names.append('A_' + colname)
|
|
1169
|
+
for results in result_dictionary['results']:
|
|
1170
|
+
descriptors += results.tolist()
|
|
1171
|
+
result_dictionary = generate_atomonly_ratiometrics(
|
|
1172
|
+
mol, atidx, False, depth, oct)
|
|
1173
|
+
for colnames in result_dictionary['colnames']:
|
|
1174
|
+
for colname in colnames:
|
|
1175
|
+
descriptor_names.append('R_' + colname)
|
|
1176
|
+
for results in result_dictionary['results']:
|
|
1177
|
+
descriptors += results.tolist()
|
|
1178
|
+
result_dictionary = generate_atomonly_summetrics(
|
|
1179
|
+
mol, atidx, False, depth, oct)
|
|
1180
|
+
for colnames in result_dictionary['colnames']:
|
|
1181
|
+
for colname in colnames:
|
|
1182
|
+
descriptor_names.append('S_' + colname)
|
|
1183
|
+
for results in result_dictionary['results']:
|
|
1184
|
+
descriptors += results.tolist()
|
|
1185
|
+
result_dictionary = generate_revised_atomonly_deltametrics(
|
|
1186
|
+
mol, atidx, False, depth, oct)
|
|
1187
|
+
for colnames in result_dictionary['colnames']:
|
|
1188
|
+
for colname in colnames:
|
|
1189
|
+
descriptor_names.append('D_' + colname)
|
|
1190
|
+
for results in result_dictionary['results']:
|
|
1191
|
+
descriptors += results.tolist()
|
|
1192
|
+
|
|
1193
|
+
return descriptor_names, descriptors
|
|
1194
|
+
|
|
1195
|
+
# commented out default_plot() as conda repo does not automatically conda install matplitlib
|
|
1196
|
+
|
|
1197
|
+
|
|
1198
|
+
def default_plot(x, y, name=False):
|
|
1199
|
+
# defs for plt
|
|
1200
|
+
xlabel = r'distance / ${\rm \AA}$'
|
|
1201
|
+
ylabel = r'distance / ${\rm \AA}$'
|
|
1202
|
+
# colors = ['r', 'g', 'b', '.75', 'orange', 'k']
|
|
1203
|
+
# markers = ['o', 's', 'D', 'v', '^', '<', '>']
|
|
1204
|
+
font = {'family': 'sans-serif',
|
|
1205
|
+
# 'weight' : 'bold',
|
|
1206
|
+
'size': 22}
|
|
1207
|
+
# figure size
|
|
1208
|
+
plt.figure(figsize=(7, 6))
|
|
1209
|
+
# dealing with axes
|
|
1210
|
+
x = np.array(x)
|
|
1211
|
+
y = np.array(y)
|
|
1212
|
+
x_min = float(format(np.amin(x), '.1f')) - 0.1
|
|
1213
|
+
x_max = float(format(np.amax(x), '.1f')) + 0.1
|
|
1214
|
+
# x_range = x_max - x_min
|
|
1215
|
+
plt.xlim(x_min, x_max)
|
|
1216
|
+
# y_min = round(y[0],2)
|
|
1217
|
+
# y_max = round(y[-1],2)
|
|
1218
|
+
plt.ylim(x_min, x_max)
|
|
1219
|
+
plt.xlabel(xlabel)
|
|
1220
|
+
plt.ylabel(ylabel)
|
|
1221
|
+
# dealing with ticks
|
|
1222
|
+
ax = plt.axes()
|
|
1223
|
+
ax.xaxis.set_major_locator(ticker.MultipleLocator(0.4))
|
|
1224
|
+
ax.xaxis.set_minor_locator(ticker.MultipleLocator(0.2))
|
|
1225
|
+
ax.yaxis.set_major_locator(ticker.MultipleLocator(0.4))
|
|
1226
|
+
ax.yaxis.set_minor_locator(ticker.MultipleLocator(0.2))
|
|
1227
|
+
plt.tick_params(which='both', axis='both', direction='in',
|
|
1228
|
+
bottom=True, top=True, right=True, left=True)
|
|
1229
|
+
plt.rcParams['axes.linewidth'] = 3
|
|
1230
|
+
plt.rcParams['xtick.major.size'] = 10
|
|
1231
|
+
plt.rcParams['xtick.major.width'] = 3
|
|
1232
|
+
plt.rcParams['ytick.major.size'] = 10
|
|
1233
|
+
plt.rcParams['ytick.major.width'] = 3
|
|
1234
|
+
plt.rcParams['xtick.minor.size'] = 5
|
|
1235
|
+
plt.rcParams['xtick.minor.width'] = 3
|
|
1236
|
+
plt.rcParams['ytick.minor.size'] = 5
|
|
1237
|
+
plt.rcParams['ytick.minor.width'] = 3
|
|
1238
|
+
plt.tight_layout()
|
|
1239
|
+
|
|
1240
|
+
plt.rc('font', **font)
|
|
1241
|
+
plt.plot(x, y, 'o', markeredgecolor='k')
|
|
1242
|
+
plt.plot([x_min, x_max], [x_min, x_max], linestyle='dashed', color='k')
|
|
1243
|
+
# plt.plot([x_min, x_max], [x_min, x_max], 'k', linestyle='dashed')
|
|
1244
|
+
# Commented out the next block since variables a and e are not defined
|
|
1245
|
+
# RM 2022/02/17
|
|
1246
|
+
# plt.hlines(a['mean_y'], x_min, x_max, linestyle='dashed', color='k')
|
|
1247
|
+
# texts = []
|
|
1248
|
+
# for key in sorted(e.keys()):
|
|
1249
|
+
# text = key + ': ' + str(format(e[key], '.2g'))
|
|
1250
|
+
# texts.append(text)
|
|
1251
|
+
# textstr = '\n'.join(texts)
|
|
1252
|
+
# props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
|
|
1253
|
+
# ax.text(0.05, 0.95, textstr, transform=ax.transAxes, fontsize=14,
|
|
1254
|
+
# verticalalignment='top', bbox=props)
|
|
1255
|
+
# plt.show()
|
|
1256
|
+
if name:
|
|
1257
|
+
fpath = os.getcwd()
|
|
1258
|
+
plt.savefig(fpath + '/' + name + '.eps', dpi=400)
|
|
1259
|
+
|
|
1260
|
+
# # plt.imshow(data,interpolation='none')
|
|
1261
|
+
# # # plt.imshow(data,interpolation='nearest')
|
|
1262
|
+
# # plt.savefig('relative_energies_for_Fe-py4.eps',dpi=400)
|