molSimplify 1.7.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docs/source/conf.py +224 -0
- molSimplify/Classes/__init__.py +6 -0
- molSimplify/Classes/atom3D.py +235 -0
- molSimplify/Classes/dft_obs.py +130 -0
- molSimplify/Classes/globalvars.py +827 -0
- molSimplify/Classes/helpers.py +161 -0
- molSimplify/Classes/ligand.py +2330 -0
- molSimplify/Classes/mGUI.py +2493 -0
- molSimplify/Classes/mWidgets.py +438 -0
- molSimplify/Classes/miniGUI.py +41 -0
- molSimplify/Classes/mol2D.py +260 -0
- molSimplify/Classes/mol3D.py +5846 -0
- molSimplify/Classes/monomer3D.py +253 -0
- molSimplify/Classes/partialcharges.py +226 -0
- molSimplify/Classes/protein3D.py +1178 -0
- molSimplify/Classes/rundiag.py +151 -0
- molSimplify/Data/ML.dat +212 -0
- molSimplify/Data/MLS_FSR_for_inter.dat +23 -0
- molSimplify/Data/MLS_FSR_for_inter2.dat +23 -0
- molSimplify/Data/MLS_angle_for_click.dat +8 -0
- molSimplify/Data/MLS_angle_for_inter.dat +23 -0
- molSimplify/Data/MLS_angle_for_inter2.dat +48 -0
- molSimplify/Data/MLS_angle_for_intra.dat +10 -0
- molSimplify/Data/MLS_angle_for_intra2.dat +6 -0
- molSimplify/Data/MLS_angle_for_oa.dat +18 -0
- molSimplify/Data/ML_FSR_for_inter.dat +112 -0
- molSimplify/Data/ML_FSR_for_inter2.dat +110 -0
- molSimplify/Data/ML_bond_for_cat.dat +8 -0
- molSimplify/Data/ML_bond_for_click.dat +8 -0
- molSimplify/Data/ML_bond_for_inter.dat +48 -0
- molSimplify/Data/ML_bond_for_inter2.dat +48 -0
- molSimplify/Data/ML_bond_for_intra.dat +10 -0
- molSimplify/Data/ML_bond_for_intra2.dat +6 -0
- molSimplify/Data/ML_bond_for_oa.dat +18 -0
- molSimplify/Data/bp1.dat +21 -0
- molSimplify/Data/li.dat +3 -0
- molSimplify/Data/no.dat +2 -0
- molSimplify/Data/oct.dat +7 -0
- molSimplify/Data/pbp.dat +8 -0
- molSimplify/Data/spy.dat +6 -0
- molSimplify/Data/sqap.dat +9 -0
- molSimplify/Data/sqp.dat +5 -0
- molSimplify/Data/tbp.dat +6 -0
- molSimplify/Data/tdhd.dat +9 -0
- molSimplify/Data/thd.dat +5 -0
- molSimplify/Data/tpl.dat +4 -0
- molSimplify/Data/tpr.dat +7 -0
- molSimplify/Informatics/HFXsensitivity/__init__.py +0 -0
- molSimplify/Informatics/HFXsensitivity/measure_HFX_sensitivity_oxo_hat_reb_rel.py +443 -0
- molSimplify/Informatics/HFXsensitivity/measure_HFX_stable.py +346 -0
- molSimplify/Informatics/MOF/Linker_rotation.py +179 -0
- molSimplify/Informatics/MOF/MOF_descriptors.py +1299 -0
- molSimplify/Informatics/MOF/MOF_descriptors_alternate_functional.py +589 -0
- molSimplify/Informatics/MOF/MOF_functionalizer.py +1648 -0
- molSimplify/Informatics/MOF/PBC_functions.py +1347 -0
- molSimplify/Informatics/MOF/__init__.py +0 -0
- molSimplify/Informatics/MOF/atomic.py +267 -0
- molSimplify/Informatics/MOF/cluster_extraction.py +388 -0
- molSimplify/Informatics/MOF/fragment_MOFs_for_pormake.py +895 -0
- molSimplify/Informatics/MOF/monofunctionalized_BDC/index_information.py +10 -0
- molSimplify/Informatics/Mol2Parser.py +46 -0
- molSimplify/Informatics/RACassemble.py +408 -0
- molSimplify/Informatics/__init__.py +0 -0
- molSimplify/Informatics/active_learning/__init__.py +0 -0
- molSimplify/Informatics/active_learning/expected_improvement.py +269 -0
- molSimplify/Informatics/autocorrelation.py +1930 -0
- molSimplify/Informatics/clean_autocorrelation.py +778 -0
- molSimplify/Informatics/coulomb_analyze.py +67 -0
- molSimplify/Informatics/decoration_manager.py +193 -0
- molSimplify/Informatics/geo_analyze.py +88 -0
- molSimplify/Informatics/geometrics.py +56 -0
- molSimplify/Informatics/graph_analyze.py +163 -0
- molSimplify/Informatics/graph_racs.py +288 -0
- molSimplify/Informatics/jupyter_vis.py +172 -0
- molSimplify/Informatics/lacRACAssemble.py +2192 -0
- molSimplify/Informatics/lacRACAssemble_bisdithiolenes.py +236 -0
- molSimplify/Informatics/misc_descriptors.py +198 -0
- molSimplify/Informatics/organic_fingerprints.py +61 -0
- molSimplify/Informatics/partialcharges.py +345 -0
- molSimplify/Informatics/protein/activesite.py +53 -0
- molSimplify/Informatics/protein/pymol_add_hs.py +33 -0
- molSimplify/Informatics/rac155_geo.py +48 -0
- molSimplify/Ligands/(1_methylbenzimidazol_2_yl)pyridine.xyz +45 -0
- molSimplify/Ligands/1-4-dimethyl-1-2-3-triazole.xyz +15 -0
- molSimplify/Ligands/12crown4.mol +62 -0
- molSimplify/Ligands/Antipyrine.mol +58 -0
- molSimplify/Ligands/BPAbipy.mol +106 -0
- molSimplify/Ligands/Hpyrrole.mol +26 -0
- molSimplify/Ligands/N-quinolinylbutyramidate.xyz +31 -0
- molSimplify/Ligands/N-quinolinylmethylmethinylacetamidate.xyz +30 -0
- molSimplify/Ligands/NMe2_-1.xyz +11 -0
- molSimplify/Ligands/PCy3.mol +111 -0
- molSimplify/Ligands/PMe3.xyz +15 -0
- molSimplify/Ligands/PPh3.mol +76 -0
- molSimplify/Ligands/Propyphenazone.mol +77 -0
- molSimplify/Ligands/acac.mol +33 -0
- molSimplify/Ligands/acacen.mol +76 -0
- molSimplify/Ligands/acetate.smi +1 -0
- molSimplify/Ligands/acetate.xyz +9 -0
- molSimplify/Ligands/aceticacidbipyridine.mol +70 -0
- molSimplify/Ligands/acetonitrile.mol +17 -0
- molSimplify/Ligands/alanine.mol +30 -0
- molSimplify/Ligands/alphabetizer.py +21 -0
- molSimplify/Ligands/amine.mol +11 -0
- molSimplify/Ligands/ammonia.mol +12 -0
- molSimplify/Ligands/arginine.mol +58 -0
- molSimplify/Ligands/asparagine.mol +38 -0
- molSimplify/Ligands/aspartic_acid.mol +35 -0
- molSimplify/Ligands/azide.mol +11 -0
- molSimplify/Ligands/benzene.mol +28 -0
- molSimplify/Ligands/benzene_pi.mol +30 -0
- molSimplify/Ligands/benzenedithiol.mol +30 -0
- molSimplify/Ligands/benzenethiol.mol +30 -0
- molSimplify/Ligands/benzylisocy.mol +38 -0
- molSimplify/Ligands/bidiazine.mol +42 -0
- molSimplify/Ligands/bidiazole.mol +38 -0
- molSimplify/Ligands/bifuran.mol +38 -0
- molSimplify/Ligands/bihydrodiazine.mol +58 -0
- molSimplify/Ligands/bihydrodiazole.mol +46 -0
- molSimplify/Ligands/bihydrooxazine.mol +54 -0
- molSimplify/Ligands/bihydrooxazole.mol +42 -0
- molSimplify/Ligands/bihydrothiazine.mol +54 -0
- molSimplify/Ligands/bihydrothiazole.mol +42 -0
- molSimplify/Ligands/biimidazole.mol +38 -0
- molSimplify/Ligands/bioxazole.mol +34 -0
- molSimplify/Ligands/bipy.mol +46 -0
- molSimplify/Ligands/bipyrazine.xyz +20 -0
- molSimplify/Ligands/bipyrimidine.mol +42 -0
- molSimplify/Ligands/bipyrrole.mol +42 -0
- molSimplify/Ligands/bisnapthyridylpyridine.mol +111 -0
- molSimplify/Ligands/bithiazole.mol +34 -0
- molSimplify/Ligands/bromide.mol +7 -0
- molSimplify/Ligands/bromide.smi +1 -0
- molSimplify/Ligands/c2.mol +9 -0
- molSimplify/Ligands/caprolactone.mol +41 -0
- molSimplify/Ligands/carbonyl.mol +8 -0
- molSimplify/Ligands/carboxyl.mol +13 -0
- molSimplify/Ligands/cat.mol +30 -0
- molSimplify/Ligands/chloride.mol +7 -0
- molSimplify/Ligands/chloride.smi +1 -0
- molSimplify/Ligands/chloropyridine.mol +27 -0
- molSimplify/Ligands/co2.mol +10 -0
- molSimplify/Ligands/corrolazine.mol +72 -0
- molSimplify/Ligands/cs.mol +8 -0
- molSimplify/Ligands/cyanate.xyz +5 -0
- molSimplify/Ligands/cyanide.mol +9 -0
- molSimplify/Ligands/cyanoaceticporphyrin.mol +114 -0
- molSimplify/Ligands/cyanopyridine.mol +29 -0
- molSimplify/Ligands/cyclam.mol +81 -0
- molSimplify/Ligands/cyclen.mol +69 -0
- molSimplify/Ligands/cyclopentadienyl.mol +26 -0
- molSimplify/Ligands/cysteine.mol +32 -0
- molSimplify/Ligands/diaminomethyl.mol +19 -0
- molSimplify/Ligands/diazine.mol +25 -0
- molSimplify/Ligands/diazole.mol +23 -0
- molSimplify/Ligands/dicyanamide.mol +15 -0
- molSimplify/Ligands/dihydrofuran.mol +27 -0
- molSimplify/Ligands/dmap.xyz +35 -0
- molSimplify/Ligands/dmf.mol +28 -0
- molSimplify/Ligands/dmi.mol +41 -0
- molSimplify/Ligands/dmpe.mol +52 -0
- molSimplify/Ligands/dpmu.mol +47 -0
- molSimplify/Ligands/dppe.mol +112 -0
- molSimplify/Ligands/edta.mol +69 -0
- molSimplify/Ligands/en.mol +28 -0
- molSimplify/Ligands/ethanethiol.mol +21 -0
- molSimplify/Ligands/ethanolamine.mol +26 -0
- molSimplify/Ligands/ethbipy.mol +70 -0
- molSimplify/Ligands/ethyl.mol +19 -0
- molSimplify/Ligands/ethylamine.mol +24 -0
- molSimplify/Ligands/ethylene.mol +16 -0
- molSimplify/Ligands/ethylesteracac.mol +57 -0
- molSimplify/Ligands/fluoride.mol +7 -0
- molSimplify/Ligands/fluoride.smi +1 -0
- molSimplify/Ligands/formaldehyde.mol +12 -0
- molSimplify/Ligands/formamidate.xyz +8 -0
- molSimplify/Ligands/formate.xyz +6 -0
- molSimplify/Ligands/furan.mol +23 -0
- molSimplify/Ligands/glutamic_acid.mol +42 -0
- molSimplify/Ligands/glutamine.mol +44 -0
- molSimplify/Ligands/glycinate.mol +23 -0
- molSimplify/Ligands/glycine.mol +24 -0
- molSimplify/Ligands/h2s.mol +10 -0
- molSimplify/Ligands/helium.mol +6 -0
- molSimplify/Ligands/histidine.mol +45 -0
- molSimplify/Ligands/hmpa.mol +62 -0
- molSimplify/Ligands/hs-.mol +9 -0
- molSimplify/Ligands/hydride.mol +7 -0
- molSimplify/Ligands/hydrocarboxyacetylide.xyz +8 -0
- molSimplify/Ligands/hydrocyanide.mol +10 -0
- molSimplify/Ligands/hydrodiazine.mol +33 -0
- molSimplify/Ligands/hydrodiazole.mol +27 -0
- molSimplify/Ligands/hydrogensulfide.mol +10 -0
- molSimplify/Ligands/hydroisocyanide.mol +11 -0
- molSimplify/Ligands/hydrooxazine.mol +31 -0
- molSimplify/Ligands/hydrooxazole.mol +25 -0
- molSimplify/Ligands/hydrothiazine.mol +31 -0
- molSimplify/Ligands/hydrothiazole.mol +25 -0
- molSimplify/Ligands/hydroxyl.mol +9 -0
- molSimplify/Ligands/imidazole.mol +23 -0
- molSimplify/Ligands/imidazolidinone.mol +29 -0
- molSimplify/Ligands/imine.mol +13 -0
- molSimplify/Ligands/iminodiacetic.mol +33 -0
- molSimplify/Ligands/iodide.mol +7 -0
- molSimplify/Ligands/iodobenzene.xyz +14 -0
- molSimplify/Ligands/isoleucine.mol +48 -0
- molSimplify/Ligands/isothiocyanate.mol +11 -0
- molSimplify/Ligands/leucine.mol +48 -0
- molSimplify/Ligands/ligands.dict +257 -0
- molSimplify/Ligands/lysine.mol +54 -0
- molSimplify/Ligands/mebenzenedithiol.mol +36 -0
- molSimplify/Ligands/mebim_py.xyz +29 -0
- molSimplify/Ligands/mebim_pz.xyz +28 -0
- molSimplify/Ligands/mebipy.mol +58 -0
- molSimplify/Ligands/mecat.mol +36 -0
- molSimplify/Ligands/methanal.mol +11 -0
- molSimplify/Ligands/methanethiol.mol +15 -0
- molSimplify/Ligands/methanol.mol +16 -0
- molSimplify/Ligands/methionine.mol +44 -0
- molSimplify/Ligands/methyl.mol +13 -0
- molSimplify/Ligands/methylacetylide.xyz +8 -0
- molSimplify/Ligands/methylamine.mol +19 -0
- molSimplify/Ligands/methylazide.xyz +9 -0
- molSimplify/Ligands/methylisocy.mol +17 -0
- molSimplify/Ligands/methylpyridine.mol +33 -0
- molSimplify/Ligands/n2.mol +8 -0
- molSimplify/Ligands/n4py.xyz +51 -0
- molSimplify/Ligands/nch.mol +10 -0
- molSimplify/Ligands/nco-.mol +11 -0
- molSimplify/Ligands/nethanolamine.mol +26 -0
- molSimplify/Ligands/nitrate.mol +14 -0
- molSimplify/Ligands/nitrite.mol +11 -0
- molSimplify/Ligands/nitro.mol +11 -0
- molSimplify/Ligands/nitrobipy.mol +54 -0
- molSimplify/Ligands/nitroso.mol +8 -0
- molSimplify/Ligands/nme3.mol +30 -0
- molSimplify/Ligands/no-.mol +10 -0
- molSimplify/Ligands/no2-.mol +11 -0
- molSimplify/Ligands/noxygen.mol +8 -0
- molSimplify/Ligands/ns-.mol +10 -0
- molSimplify/Ligands/o-pyridylbenzene.xyz +23 -0
- molSimplify/Ligands/o-pyridylphenylanion.xyz +22 -0
- molSimplify/Ligands/o2-.mol +9 -0
- molSimplify/Ligands/o2.xyz +4 -0
- molSimplify/Ligands/och2.mol +12 -0
- molSimplify/Ligands/oethanolamine.mol +26 -0
- molSimplify/Ligands/ome2.mol +22 -0
- molSimplify/Ligands/ooh.xyz +5 -0
- molSimplify/Ligands/oxalate.mol +17 -0
- molSimplify/Ligands/oxalate.smi +1 -0
- molSimplify/Ligands/oxygen.mol +7 -0
- molSimplify/Ligands/pentacyanocyclopentadienide.mol +36 -0
- molSimplify/Ligands/ph2-.mol +11 -0
- molSimplify/Ligands/ph3.mol +12 -0
- molSimplify/Ligands/phen.mol +51 -0
- molSimplify/Ligands/phenacac.mol +63 -0
- molSimplify/Ligands/phenalalanine.mol +51 -0
- molSimplify/Ligands/phendione.mol +51 -0
- molSimplify/Ligands/phenphen.mol +75 -0
- molSimplify/Ligands/phenylbenzoxazole.mol +54 -0
- molSimplify/Ligands/phenylcyc.mol +99 -0
- molSimplify/Ligands/phenylenediamine.mol +37 -0
- molSimplify/Ligands/phenylisocy.mol +32 -0
- molSimplify/Ligands/phosacidbipy.mol +66 -0
- molSimplify/Ligands/phosphine.mol +13 -0
- molSimplify/Ligands/phosphorine.mol +27 -0
- molSimplify/Ligands/phosphorustrifluoride.mol +12 -0
- molSimplify/Ligands/phthalocyanine.mol +126 -0
- molSimplify/Ligands/pme3o.mol +32 -0
- molSimplify/Ligands/porphyrin.mol +82 -0
- molSimplify/Ligands/pph3o.mol +77 -0
- molSimplify/Ligands/proline.mol +39 -0
- molSimplify/Ligands/propdiol.mol +21 -0
- molSimplify/Ligands/propylene.mol +23 -0
- molSimplify/Ligands/pyridine.mol +27 -0
- molSimplify/Ligands/pyrimidone.mol +27 -0
- molSimplify/Ligands/pyrrole.mol +24 -0
- molSimplify/Ligands/quinoxalinedithiol.mol +39 -0
- molSimplify/Ligands/s2-.mol +9 -0
- molSimplify/Ligands/salen.mol +75 -0
- molSimplify/Ligands/salphen.mol +84 -0
- molSimplify/Ligands/serine.mol +32 -0
- molSimplify/Ligands/simple_ligands.dict +14 -0
- molSimplify/Ligands/sulfacidbipy.mol +63 -0
- molSimplify/Ligands/tbucat.mol +54 -0
- molSimplify/Ligands/tbuphisocy.mol +56 -0
- molSimplify/Ligands/tbutylcyclen.mol +166 -0
- molSimplify/Ligands/tbutylisocy.mol +35 -0
- molSimplify/Ligands/tbutylthiol.mol +33 -0
- molSimplify/Ligands/tcnoet.mol +43 -0
- molSimplify/Ligands/tcnoetOH.mol +45 -0
- molSimplify/Ligands/terpy.mol +65 -0
- molSimplify/Ligands/tetrahydrofuran.mol +31 -0
- molSimplify/Ligands/thiane.mol +37 -0
- molSimplify/Ligands/thiazole.mol +21 -0
- molSimplify/Ligands/thiocyanate.mol +11 -0
- molSimplify/Ligands/thiol.mol +9 -0
- molSimplify/Ligands/thiophene.mol +23 -0
- molSimplify/Ligands/thiopyridine.mol +29 -0
- molSimplify/Ligands/threonine.mol +38 -0
- molSimplify/Ligands/tpp.mol +165 -0
- molSimplify/Ligands/tricyanomethyl.mol +19 -0
- molSimplify/Ligands/trifluoromethyl.mol +13 -0
- molSimplify/Ligands/tryptophan.mol +60 -0
- molSimplify/Ligands/tyrosine.mol +53 -0
- molSimplify/Ligands/uthiol.mol +11 -0
- molSimplify/Ligands/uthiolme2.mol +23 -0
- molSimplify/Ligands/valine.mol +42 -0
- molSimplify/Ligands/water.mol +10 -0
- molSimplify/Ligands/x.mol +6 -0
- molSimplify/Scripts/__init__.py +0 -0
- molSimplify/Scripts/addtodb.py +308 -0
- molSimplify/Scripts/cellbuilder.py +1592 -0
- molSimplify/Scripts/cellbuilder_tools.py +701 -0
- molSimplify/Scripts/chains.py +342 -0
- molSimplify/Scripts/convert_2to3.py +23 -0
- molSimplify/Scripts/dbinteract.py +631 -0
- molSimplify/Scripts/distgeom.py +617 -0
- molSimplify/Scripts/findcorrelations.py +287 -0
- molSimplify/Scripts/generator.py +267 -0
- molSimplify/Scripts/geometry.py +1224 -0
- molSimplify/Scripts/grabguivars.py +845 -0
- molSimplify/Scripts/in_b3lyp_usetc.py +141 -0
- molSimplify/Scripts/inparse.py +1673 -0
- molSimplify/Scripts/io.py +1149 -0
- molSimplify/Scripts/isomers.py +415 -0
- molSimplify/Scripts/jobgen.py +247 -0
- molSimplify/Scripts/krr_prep.py +1262 -0
- molSimplify/Scripts/molSimplify_io.py +18 -0
- molSimplify/Scripts/molden2psi4wfn.py +166 -0
- molSimplify/Scripts/namegen.py +32 -0
- molSimplify/Scripts/nn_prep.py +561 -0
- molSimplify/Scripts/oct_check_mols.py +782 -0
- molSimplify/Scripts/periodic_QE.py +97 -0
- molSimplify/Scripts/postmold.py +304 -0
- molSimplify/Scripts/postmwfn.py +709 -0
- molSimplify/Scripts/postparse.py +488 -0
- molSimplify/Scripts/postproc.py +139 -0
- molSimplify/Scripts/qcgen.py +1450 -0
- molSimplify/Scripts/rmsd.py +489 -0
- molSimplify/Scripts/rungen.py +670 -0
- molSimplify/Scripts/structgen.py +3040 -0
- molSimplify/Scripts/tf_nn_prep.py +894 -0
- molSimplify/Scripts/tsgen.py +295 -0
- molSimplify/Scripts/uq_calibration.py +69 -0
- molSimplify/__init__.py +0 -0
- molSimplify/__main__.py +197 -0
- molSimplify/icons/chemdb.png +0 -0
- molSimplify/icons/hjklogo.png +0 -0
- molSimplify/icons/icon.png +0 -0
- molSimplify/icons/logo.png +0 -0
- molSimplify/icons/logo_old.png +0 -0
- molSimplify/icons/petachem.png +0 -0
- molSimplify/icons/petachem2.png +0 -0
- molSimplify/icons/petachem_full.png +0 -0
- molSimplify/icons/pythonlogo.png +0 -0
- molSimplify/icons/sge copy.png +0 -0
- molSimplify/icons/sge.png +0 -0
- molSimplify/icons/slurm.png +0 -0
- molSimplify/icons/wft1.png +0 -0
- molSimplify/icons/wft2.png +0 -0
- molSimplify/icons/wft3.png +0 -0
- molSimplify/ml/__init__.py +0 -0
- molSimplify/ml/kernels.py +36 -0
- molSimplify/ml/layers.py +29 -0
- molSimplify/molscontrol/__init__.py +14 -0
- molSimplify/molscontrol/_version.py +521 -0
- molSimplify/molscontrol/clf_tools.py +144 -0
- molSimplify/molscontrol/data/README.md +21 -0
- molSimplify/molscontrol/data/look_and_say.dat +15 -0
- molSimplify/molscontrol/dynamic_classifier.py +514 -0
- molSimplify/molscontrol/io_tools.py +363 -0
- molSimplify/molscontrol/molscontrol.py +49 -0
- molSimplify/molscontrol/terachem/jobscript_control.sh +31 -0
- molSimplify/molscontrol/terachem/terachem_input +22 -0
- molSimplify/python_krr/X_train_TS.csv +535 -0
- molSimplify/python_krr/__init__.py +0 -0
- molSimplify/python_krr/hat2_X_mean_std.csv +3 -0
- molSimplify/python_krr/hat2_feature_names.csv +1 -0
- molSimplify/python_krr/hat2_y_mean_std.csv +2 -0
- molSimplify/python_krr/hat_X_mean_std.csv +6 -0
- molSimplify/python_krr/hat_feature_names.csv +1 -0
- molSimplify/python_krr/hat_krr_X_train.csv +5205 -0
- molSimplify/python_krr/hat_krr_dual_coef.csv +1 -0
- molSimplify/python_krr/hat_y_mean_std.csv +2 -0
- molSimplify/python_krr/sklearn_models.py +34 -0
- molSimplify/python_krr/y_train_TS.csv +535 -0
- molSimplify/python_nn/ANN.py +198 -0
- molSimplify/python_nn/__init__.py +0 -0
- molSimplify/python_nn/clf_analysis_tool.py +125 -0
- molSimplify/python_nn/dictionary_toolbox.py +49 -0
- molSimplify/python_nn/ensemble_test.py +309 -0
- molSimplify/python_nn/hs_center.csv +26 -0
- molSimplify/python_nn/hs_scale.csv +26 -0
- molSimplify/python_nn/ls_center.csv +26 -0
- molSimplify/python_nn/ls_scale.csv +26 -0
- molSimplify/python_nn/ms_hs_b1.csv +50 -0
- molSimplify/python_nn/ms_hs_b2.csv +50 -0
- molSimplify/python_nn/ms_hs_b3.csv +1 -0
- molSimplify/python_nn/ms_hs_w1.csv +50 -0
- molSimplify/python_nn/ms_hs_w2.csv +50 -0
- molSimplify/python_nn/ms_hs_w3.csv +1 -0
- molSimplify/python_nn/ms_ls_b1.csv +50 -0
- molSimplify/python_nn/ms_ls_b2.csv +50 -0
- molSimplify/python_nn/ms_ls_b3.csv +1 -0
- molSimplify/python_nn/ms_ls_w1.csv +50 -0
- molSimplify/python_nn/ms_ls_w2.csv +50 -0
- molSimplify/python_nn/ms_ls_w3.csv +1 -0
- molSimplify/python_nn/ms_slope_b1.csv +50 -0
- molSimplify/python_nn/ms_slope_b2.csv +50 -0
- molSimplify/python_nn/ms_slope_b3.csv +1 -0
- molSimplify/python_nn/ms_slope_w1.csv +50 -0
- molSimplify/python_nn/ms_slope_w2.csv +50 -0
- molSimplify/python_nn/ms_slope_w3.csv +1 -0
- molSimplify/python_nn/ms_split_b1.csv +50 -0
- molSimplify/python_nn/ms_split_b2.csv +50 -0
- molSimplify/python_nn/ms_split_b3.csv +1 -0
- molSimplify/python_nn/ms_split_w1.csv +50 -0
- molSimplify/python_nn/ms_split_w2.csv +50 -0
- molSimplify/python_nn/ms_split_w3.csv +1 -0
- molSimplify/python_nn/slope_center.csv +25 -0
- molSimplify/python_nn/slope_scale.csv +25 -0
- molSimplify/python_nn/split_center.csv +26 -0
- molSimplify/python_nn/split_scale.csv +26 -0
- molSimplify/python_nn/tf_ANN.py +762 -0
- molSimplify/python_nn/train_data.csv +1211 -0
- molSimplify/tf_nn/__init__.py +0 -0
- molSimplify/tf_nn/geo_static_clf/geo_static_clf_model.h5 +0 -0
- molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_name.csv +1591 -0
- molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_x.csv +2790 -0
- molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_y.csv +2790 -0
- molSimplify/tf_nn/geo_static_clf/geo_static_clf_vars.csv +154 -0
- molSimplify/tf_nn/geos/hs_ii_bl_x.csv +1577 -0
- molSimplify/tf_nn/geos/hs_ii_bl_y.csv +1577 -0
- molSimplify/tf_nn/geos/hs_ii_model.h5 +0 -0
- molSimplify/tf_nn/geos/hs_ii_model.json +1 -0
- molSimplify/tf_nn/geos/hs_ii_vars.csv +154 -0
- molSimplify/tf_nn/geos/hs_iii_bl_x.csv +1659 -0
- molSimplify/tf_nn/geos/hs_iii_bl_y.csv +1659 -0
- molSimplify/tf_nn/geos/hs_iii_model.h5 +0 -0
- molSimplify/tf_nn/geos/hs_iii_model.json +1 -0
- molSimplify/tf_nn/geos/hs_iii_vars.csv +154 -0
- molSimplify/tf_nn/geos/ls_ii_bl_x.csv +1374 -0
- molSimplify/tf_nn/geos/ls_ii_bl_y.csv +1374 -0
- molSimplify/tf_nn/geos/ls_ii_model.h5 +0 -0
- molSimplify/tf_nn/geos/ls_ii_model.json +1 -0
- molSimplify/tf_nn/geos/ls_ii_vars.csv +154 -0
- molSimplify/tf_nn/geos/ls_iii_bl_x.csv +1364 -0
- molSimplify/tf_nn/geos/ls_iii_bl_y.csv +1364 -0
- molSimplify/tf_nn/geos/ls_iii_model.h5 +0 -0
- molSimplify/tf_nn/geos/ls_iii_model.json +1 -0
- molSimplify/tf_nn/geos/ls_iii_vars.csv +154 -0
- molSimplify/tf_nn/homolumo/gap_model.h5 +0 -0
- molSimplify/tf_nn/homolumo/gap_model.json +1 -0
- molSimplify/tf_nn/homolumo/gap_test_names.csv +175 -0
- molSimplify/tf_nn/homolumo/gap_test_x.csv +176 -0
- molSimplify/tf_nn/homolumo/gap_test_y.csv +176 -0
- molSimplify/tf_nn/homolumo/gap_train_names.csv +699 -0
- molSimplify/tf_nn/homolumo/gap_train_x.csv +700 -0
- molSimplify/tf_nn/homolumo/gap_train_y.csv +700 -0
- molSimplify/tf_nn/homolumo/gap_vars.csv +153 -0
- molSimplify/tf_nn/homolumo/homo_model.h5 +0 -0
- molSimplify/tf_nn/homolumo/homo_model.json +126 -0
- molSimplify/tf_nn/homolumo/homo_test_names.csv +175 -0
- molSimplify/tf_nn/homolumo/homo_test_x.csv +176 -0
- molSimplify/tf_nn/homolumo/homo_test_y.csv +176 -0
- molSimplify/tf_nn/homolumo/homo_train_names.csv +699 -0
- molSimplify/tf_nn/homolumo/homo_train_x.csv +700 -0
- molSimplify/tf_nn/homolumo/homo_train_y.csv +700 -0
- molSimplify/tf_nn/homolumo/homo_vars.csv +153 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_info.json +7 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_model.h5 +0 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_model.json +1 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_test_names.csv +143 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_test_x.csv +144 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_test_y.csv +144 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_train_names.csv +513 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_train_x.csv +514 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_train_y.csv +514 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_val_names.csv +143 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_val_x.csv +58 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_val_y.csv +58 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_vars.csv +155 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_info.json +7 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_model.h5 +0 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_model.json +1 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_test_names.csv +143 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_test_x.csv +144 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_test_y.csv +144 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_train_names.csv +513 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_train_x.csv +514 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_train_y.csv +514 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_val_names.csv +143 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_val_x.csv +58 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_val_y.csv +58 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_vars.csv +154 -0
- molSimplify/tf_nn/oxocatalysis/hat_model.h5 +0 -0
- molSimplify/tf_nn/oxocatalysis/hat_model.json +1 -0
- molSimplify/tf_nn/oxocatalysis/hat_test_names.csv +419 -0
- molSimplify/tf_nn/oxocatalysis/hat_test_x.csv +420 -0
- molSimplify/tf_nn/oxocatalysis/hat_test_y.csv +420 -0
- molSimplify/tf_nn/oxocatalysis/hat_train_names.csv +1507 -0
- molSimplify/tf_nn/oxocatalysis/hat_train_x.csv +1508 -0
- molSimplify/tf_nn/oxocatalysis/hat_train_y.csv +1508 -0
- molSimplify/tf_nn/oxocatalysis/hat_val_x.csv +169 -0
- molSimplify/tf_nn/oxocatalysis/hat_val_y.csv +169 -0
- molSimplify/tf_nn/oxocatalysis/hat_vars.csv +162 -0
- molSimplify/tf_nn/oxocatalysis/oxo_model.h5 +0 -0
- molSimplify/tf_nn/oxocatalysis/oxo_model.json +1 -0
- molSimplify/tf_nn/oxocatalysis/oxo_test_names.csv +527 -0
- molSimplify/tf_nn/oxocatalysis/oxo_test_x.csv +528 -0
- molSimplify/tf_nn/oxocatalysis/oxo_test_y.csv +528 -0
- molSimplify/tf_nn/oxocatalysis/oxo_train_names.csv +1897 -0
- molSimplify/tf_nn/oxocatalysis/oxo_train_x.csv +1898 -0
- molSimplify/tf_nn/oxocatalysis/oxo_train_y.csv +1898 -0
- molSimplify/tf_nn/oxocatalysis/oxo_val_x.csv +212 -0
- molSimplify/tf_nn/oxocatalysis/oxo_val_y.csv +212 -0
- molSimplify/tf_nn/oxocatalysis/oxo_vars.csv +162 -0
- molSimplify/tf_nn/rescaling_data/gap_mean_x.csv +153 -0
- molSimplify/tf_nn/rescaling_data/gap_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/gap_var_x.csv +153 -0
- molSimplify/tf_nn/rescaling_data/gap_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/geo_static_clf_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/geo_static_clf_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/geo_static_clf_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/geo_static_clf_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/hat_mean_x.csv +162 -0
- molSimplify/tf_nn/rescaling_data/hat_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/hat_var_x.csv +162 -0
- molSimplify/tf_nn/rescaling_data/hat_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/homo_empty_mean_x.csv +155 -0
- molSimplify/tf_nn/rescaling_data/homo_empty_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/homo_empty_var_x.csv +155 -0
- molSimplify/tf_nn/rescaling_data/homo_empty_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/homo_mean_x.csv +153 -0
- molSimplify/tf_nn/rescaling_data/homo_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/homo_var_x.csv +153 -0
- molSimplify/tf_nn/rescaling_data/homo_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/hs_ii_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/hs_ii_mean_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/hs_ii_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/hs_ii_var_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/hs_iii_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/hs_iii_mean_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/hs_iii_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/hs_iii_var_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/ls_ii_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/ls_ii_mean_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/ls_ii_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/ls_ii_var_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/ls_iii_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/ls_iii_mean_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/ls_iii_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/ls_iii_var_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/oxo20_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/oxo20_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/oxo20_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/oxo20_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/oxo_mean_x.csv +162 -0
- molSimplify/tf_nn/rescaling_data/oxo_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/oxo_var_x.csv +162 -0
- molSimplify/tf_nn/rescaling_data/oxo_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/sc_static_clf_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/sc_static_clf_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/sc_static_clf_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/sc_static_clf_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/split_mean_x.csv +155 -0
- molSimplify/tf_nn/rescaling_data/split_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/split_var_x.csv +155 -0
- molSimplify/tf_nn/rescaling_data/split_var_y.csv +1 -0
- molSimplify/tf_nn/sc_static_clf/sc_static_clf_model.h5 +0 -0
- molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_name.csv +1591 -0
- molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_x.csv +1592 -0
- molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_y.csv +1592 -0
- molSimplify/tf_nn/sc_static_clf/sc_static_clf_vars.csv +154 -0
- molSimplify/tf_nn/split/split_model.h5 +0 -0
- molSimplify/tf_nn/split/split_model.json +1 -0
- molSimplify/tf_nn/split/split_vars.csv +155 -0
- molSimplify/tf_nn/split/split_x.csv +1902 -0
- molSimplify/tf_nn/split/split_y.csv +1902 -0
- molSimplify/tf_nn/split/train_names.csv +1901 -0
- molSimplify/utils/__init__.py +0 -0
- molSimplify/utils/decorators.py +16 -0
- molSimplify/utils/metaclasses.py +12 -0
- molSimplify/utils/tensorflow.py +23 -0
- molSimplify/utils/timer.py +16 -0
- molSimplify-1.7.4.dist-info/LICENSE +674 -0
- molSimplify-1.7.4.dist-info/METADATA +821 -0
- molSimplify-1.7.4.dist-info/RECORD +651 -0
- molSimplify-1.7.4.dist-info/WHEEL +5 -0
- molSimplify-1.7.4.dist-info/entry_points.txt +3 -0
- molSimplify-1.7.4.dist-info/top_level.txt +4 -0
- tests/generateTests.py +122 -0
- tests/helperFuncs.py +658 -0
- tests/informatics/test_MOF_descriptors.py +128 -0
- tests/informatics/test_active_learning.py +113 -0
- tests/informatics/test_coulomb_analyze.py +24 -0
- tests/informatics/test_graph_racs.py +193 -0
- tests/ml/test_kernels.py +20 -0
- tests/ml/test_layers.py +47 -0
- tests/runtest.py +10 -0
- tests/test_Mol2D.py +128 -0
- tests/test_basic_imports.py +62 -0
- tests/test_bidentate.py +25 -0
- tests/test_cli.py +20 -0
- tests/test_distgeom.py +106 -0
- tests/test_example_1.py +29 -0
- tests/test_example_3.py +31 -0
- tests/test_example_5.py +43 -0
- tests/test_example_7.py +28 -0
- tests/test_example_8.py +15 -0
- tests/test_example_tbp.py +15 -0
- tests/test_ff_xtb.py +111 -0
- tests/test_geocheck_oct.py +26 -0
- tests/test_geocheck_one_empty.py +15 -0
- tests/test_geometry.py +44 -0
- tests/test_inparse.py +76 -0
- tests/test_io.py +84 -0
- tests/test_jobgen.py +84 -0
- tests/test_joption_pythonic.py +27 -0
- tests/test_ligand_assign.py +58 -0
- tests/test_ligand_assign_consistent.py +60 -0
- tests/test_ligand_class.py +26 -0
- tests/test_ligand_from_mol_file.py +35 -0
- tests/test_ligands.py +86 -0
- tests/test_mol3D.py +337 -0
- tests/test_molcas_caspt2.py +15 -0
- tests/test_molcas_casscf.py +15 -0
- tests/test_old_ANNs.py +68 -0
- tests/test_orca_ccsdt.py +15 -0
- tests/test_orca_dft.py +15 -0
- tests/test_qcgen.py +50 -0
- tests/test_racs.py +124 -0
- tests/test_rmsd.py +68 -0
- tests/test_structgen_functions.py +198 -0
- tests/test_tetrahedral.py +29 -0
- tests/test_tutorial_10_part_one.py +16 -0
- tests/test_tutorial_10_part_two.py +15 -0
- tests/test_tutorial_2.py +11 -0
- tests/test_tutorial_3.py +15 -0
- tests/test_tutorial_4.py +57 -0
- tests/test_tutorial_6.py +10 -0
- tests/test_tutorial_8.py +29 -0
- tests/test_tutorial_9_part_one.py +15 -0
- tests/test_tutorial_9_part_two.py +15 -0
- tests/test_tutorial_qm9_part_one.py +6 -0
- tests/testresources/refs/racs/generate_references.py +85 -0
- workflows/NandyJACSAu2022/bridge_functionalizer.py +253 -0
- workflows/NandyJACSAu2022/frag_functionalizer.py +242 -0
- workflows/NandyJACSAu2022/fragment_classes.py +586 -0
- workflows/NandyJACSAu2022/macrocycle_synthesis.py +179 -0
|
@@ -0,0 +1,589 @@
|
|
|
1
|
+
from molSimplify.Scripts.cellbuilder_tools import *
|
|
2
|
+
from molSimplify.Classes.mol3D import mol3D
|
|
3
|
+
from molSimplify.Informatics.autocorrelation import *
|
|
4
|
+
from molSimplify.Informatics.misc_descriptors import *
|
|
5
|
+
from molSimplify.Informatics.graph_analyze import *
|
|
6
|
+
from molSimplify.Informatics.RACassemble import *
|
|
7
|
+
import os
|
|
8
|
+
import numpy as np
|
|
9
|
+
import pandas as pd
|
|
10
|
+
from scipy import sparse
|
|
11
|
+
import itertools
|
|
12
|
+
from molSimplify.Informatics.MOF.PBC_functions import *
|
|
13
|
+
|
|
14
|
+
#### NOTE: In addition to molSimplify's dependencies, this portion requires
|
|
15
|
+
#### pymatgen to be installed. The RACs are intended to be computed
|
|
16
|
+
#### on the primitive cell of the material. You can compute them
|
|
17
|
+
#### using the commented out snippet of code if necessary.
|
|
18
|
+
|
|
19
|
+
# Example usage is given at the bottom of the script.
|
|
20
|
+
|
|
21
|
+
'''<<<< CODE TO COMPUTE PRIMITIVE UNIT CELLS >>>>'''
|
|
22
|
+
#########################################################################################
|
|
23
|
+
# This MOF RAC generator assumes that pymatgen is installed. #
|
|
24
|
+
# Pymatgen is used to get the primitive cell. #
|
|
25
|
+
#########################################################################################
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def get_primitive(datapath, writepath):
|
|
29
|
+
from pymatgen.io.cif import CifParser
|
|
30
|
+
s = CifParser(datapath, occupancy_tolerance=1).get_structures()[0]
|
|
31
|
+
sprim = s.get_primitive_structure()
|
|
32
|
+
sprim.to("cif", writepath)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
'''<<<< END OF CODE TO COMPUTE PRIMITIVE UNIT CELLS >>>>'''
|
|
36
|
+
|
|
37
|
+
#########################################################################################
|
|
38
|
+
# The RAC functions here average over the different SBUs or linkers present. This is #
|
|
39
|
+
# because one MOF could have multiple different linkers or multiple SBUs, and we need #
|
|
40
|
+
# the vector to be of constant dimension so we can correlate the output property. #
|
|
41
|
+
#########################################################################################
|
|
42
|
+
|
|
43
|
+
def identify_main_chain(temp_mol, link_list):
|
|
44
|
+
G = nx.from_numpy_matrix(temp_mol.graph)
|
|
45
|
+
pairs = []
|
|
46
|
+
if len(link_list) == 1:
|
|
47
|
+
main = list(G.nodes)
|
|
48
|
+
return main
|
|
49
|
+
else:
|
|
50
|
+
for a,b in itertools.combinations(link_list, 2):
|
|
51
|
+
pair = (a,b)
|
|
52
|
+
pairs.append(pair)
|
|
53
|
+
shorts = []
|
|
54
|
+
for i in pairs:
|
|
55
|
+
short = list(nx.shortest_path(G, source=i[0], target=i[1]))
|
|
56
|
+
shorts.append(short)
|
|
57
|
+
paths = list(itertools.chain(*shorts))
|
|
58
|
+
min_cycles = (nx.minimum_cycle_basis(G))
|
|
59
|
+
min_cycles_copy = min_cycles.copy()
|
|
60
|
+
min_cycles_copy_2 = []
|
|
61
|
+
paths_copy = paths.copy()
|
|
62
|
+
while len(min_cycles_copy) != len(min_cycles_copy_2):
|
|
63
|
+
min_cycles_copy_2 = min_cycles_copy.copy()
|
|
64
|
+
for i in min_cycles:
|
|
65
|
+
paths = paths_copy.copy()
|
|
66
|
+
if set(paths) & set(i):
|
|
67
|
+
if not set(i).issubset(set((paths))):
|
|
68
|
+
#print('intersection')
|
|
69
|
+
#print(set(i))
|
|
70
|
+
paths_copy += set(i)
|
|
71
|
+
#print(paths_copy)
|
|
72
|
+
min_cycles_copy.remove(i)
|
|
73
|
+
#print(min_cycles_copy)
|
|
74
|
+
#print(len(min_cycles_copy))
|
|
75
|
+
|
|
76
|
+
main = paths
|
|
77
|
+
return main
|
|
78
|
+
|
|
79
|
+
def make_MOF_SBU_RACs(SBUlist, SBU_subgraph, molcif, depth, name,cell,anchoring_atoms, sbupath=False, connections_list=False, connections_subgraphlist=False):
|
|
80
|
+
descriptor_list = []
|
|
81
|
+
lc_descriptor_list = []
|
|
82
|
+
lc_names = []
|
|
83
|
+
names = []
|
|
84
|
+
n_sbu = len(SBUlist)
|
|
85
|
+
descriptor_names = []
|
|
86
|
+
descriptors = []
|
|
87
|
+
if sbupath:
|
|
88
|
+
sbu_descriptor_path = os.path.dirname(sbupath)
|
|
89
|
+
if os.path.getsize(sbu_descriptor_path+'/sbu_descriptors.csv')>0:
|
|
90
|
+
sbu_descriptors = pd.read_csv(sbu_descriptor_path+'/sbu_descriptors.csv')
|
|
91
|
+
else:
|
|
92
|
+
sbu_descriptors = pd.DataFrame()
|
|
93
|
+
if os.path.getsize(sbu_descriptor_path+'/lc_descriptors.csv')>0:
|
|
94
|
+
lc_descriptors = pd.read_csv(sbu_descriptor_path+'/lc_descriptors.csv')
|
|
95
|
+
else:
|
|
96
|
+
lc_descriptors = pd.DataFrame()
|
|
97
|
+
|
|
98
|
+
"""""""""
|
|
99
|
+
Loop over all SBUs as identified by subgraphs. Then create the mol3Ds for each SBU.
|
|
100
|
+
"""""""""
|
|
101
|
+
for i, SBU in enumerate(SBUlist):
|
|
102
|
+
descriptor_names = []
|
|
103
|
+
descriptors = []
|
|
104
|
+
SBU_mol = mol3D()
|
|
105
|
+
for val in SBU:
|
|
106
|
+
SBU_mol.addAtom(molcif.getAtom(val))
|
|
107
|
+
SBU_mol.graph = SBU_subgraph[i].todense()
|
|
108
|
+
|
|
109
|
+
"""""""""
|
|
110
|
+
For each linker connected to the SBU, find the lc atoms for the lc-RACs.
|
|
111
|
+
"""""""""
|
|
112
|
+
for j, linker in enumerate(connections_list):
|
|
113
|
+
descriptor_names = []
|
|
114
|
+
descriptors = []
|
|
115
|
+
if len(set(SBU).intersection(linker))>0:
|
|
116
|
+
#### This means that the SBU and linker are connected.
|
|
117
|
+
temp_mol = mol3D()
|
|
118
|
+
link_list = []
|
|
119
|
+
for jj, val2 in enumerate(linker):
|
|
120
|
+
if val2 in anchoring_atoms:
|
|
121
|
+
link_list.append(jj)
|
|
122
|
+
#print(anchoring_atoms)
|
|
123
|
+
# This builds a mol object for the linker --> even though it is in the SBU section.
|
|
124
|
+
temp_mol.addAtom(molcif.getAtom(val2))
|
|
125
|
+
|
|
126
|
+
temp_mol.graph = connections_subgraphlist[j].todense()
|
|
127
|
+
"""""""""
|
|
128
|
+
Generate all of the lc autocorrelations (from the connecting atoms)
|
|
129
|
+
"""""""""
|
|
130
|
+
results_dictionary = generate_atomonly_autocorrelations(temp_mol, link_list, loud=False, depth=depth, oct=False, polarizability=False,Gval=True)
|
|
131
|
+
descriptor_names, descriptors = append_descriptors(descriptor_names, descriptors,results_dictionary['colnames'],results_dictionary['results'],'lc','all')
|
|
132
|
+
# print('1',len(descriptor_names),len(descriptors))
|
|
133
|
+
results_dictionary = generate_atomonly_deltametrics(temp_mol, link_list, loud=False, depth=depth, oct=False, polarizability=False,Gval=True)
|
|
134
|
+
descriptor_names, descriptors = append_descriptors(descriptor_names, descriptors,results_dictionary['colnames'],results_dictionary['results'],'D_lc','all')
|
|
135
|
+
# print('2',len(descriptor_names),len(descriptors))
|
|
136
|
+
"""""""""
|
|
137
|
+
If heteroatom functional groups exist (anything that is not C or H, so methyl is missed, also excludes anything lc, so carboxylic metal-coordinating oxygens skipped),
|
|
138
|
+
compile the list of atoms
|
|
139
|
+
"""""""""
|
|
140
|
+
#import time
|
|
141
|
+
#print(time.time())
|
|
142
|
+
functional_atoms = []
|
|
143
|
+
main = identify_main_chain(temp_mol, link_list)
|
|
144
|
+
for jj in range(len(temp_mol.graph)):
|
|
145
|
+
#print(link_list)
|
|
146
|
+
#print(main)
|
|
147
|
+
if jj not in main:
|
|
148
|
+
if not set({temp_mol.atoms[jj].sym}) & set({"H"}):
|
|
149
|
+
functional_atoms.append(jj)
|
|
150
|
+
print(functional_atoms)
|
|
151
|
+
#print(time.time())
|
|
152
|
+
|
|
153
|
+
if len(functional_atoms) > 0:
|
|
154
|
+
results_dictionary = generate_atomonly_autocorrelations(temp_mol, functional_atoms , loud=False, depth=depth, oct=False, polarizability=False,Gval=True)
|
|
155
|
+
descriptor_names, descriptors = append_descriptors(descriptor_names, descriptors,results_dictionary['colnames'],results_dictionary['results'],'func','all')
|
|
156
|
+
# print('3',len(descriptor_names),len(descriptors))
|
|
157
|
+
results_dictionary = generate_atomonly_deltametrics(temp_mol, functional_atoms , loud=False, depth=depth, oct=False, polarizability=False,Gval=True)
|
|
158
|
+
descriptor_names, descriptors = append_descriptors(descriptor_names, descriptors,results_dictionary['colnames'],results_dictionary['results'],'D_func','all')
|
|
159
|
+
# print('4',len(descriptor_names),len(descriptors))
|
|
160
|
+
else:
|
|
161
|
+
descriptor_names, descriptors = append_descriptors(descriptor_names, descriptors,results_dictionary['colnames'],list([numpy.zeros(int(6*(depth + 1)))]),'func','all')
|
|
162
|
+
descriptor_names, descriptors = append_descriptors(descriptor_names, descriptors,results_dictionary['colnames'],list([numpy.zeros(int(6*(depth + 1)))]),'D_func','all')
|
|
163
|
+
# print('5b',len(descriptor_names),len(descriptors))
|
|
164
|
+
for val in descriptors:
|
|
165
|
+
if not (type(val) == float or isinstance(val, numpy.float64)):
|
|
166
|
+
print('Mixed typing. Please convert to python float, and avoid np float')
|
|
167
|
+
raise AssertionError('Mixed typing creates issues. Please convert your typing.')
|
|
168
|
+
descriptor_names += ['name']
|
|
169
|
+
descriptors += [name]
|
|
170
|
+
desc_dict = {key2: descriptors[kk] for kk, key2 in enumerate(descriptor_names)}
|
|
171
|
+
descriptors.remove(name)
|
|
172
|
+
descriptor_names.remove('name')
|
|
173
|
+
lc_descriptors = lc_descriptors.append(desc_dict, ignore_index=True)
|
|
174
|
+
lc_descriptor_list.append(descriptors)
|
|
175
|
+
if j == 0:
|
|
176
|
+
lc_names = descriptor_names
|
|
177
|
+
averaged_lc_descriptors = np.mean(np.array(lc_descriptor_list), axis=0)
|
|
178
|
+
if sbupath:
|
|
179
|
+
lc_descriptors.to_csv(sbu_descriptor_path+'/lc_descriptors.csv', index=False)
|
|
180
|
+
descriptors = []
|
|
181
|
+
descriptor_names = []
|
|
182
|
+
SBU_mol_cart_coords = np.array([atom.coords() for atom in SBU_mol.atoms])
|
|
183
|
+
SBU_mol_atom_labels = [atom.sym for atom in SBU_mol.atoms]
|
|
184
|
+
SBU_mol_adj_mat = np.array(SBU_mol.graph)
|
|
185
|
+
###### WRITE THE SBU MOL TO THE PLACE
|
|
186
|
+
if sbupath and not os.path.exists(sbupath+"/"+str(name)+str(i)+'.xyz'):
|
|
187
|
+
xyzname = sbupath+"/"+str(name)+"_sbu_"+str(i)+".xyz"
|
|
188
|
+
SBU_mol_fcoords_connected = XYZ_connected(cell , SBU_mol_cart_coords , SBU_mol_adj_mat )
|
|
189
|
+
writeXYZandGraph(xyzname , SBU_mol_atom_labels , cell , SBU_mol_fcoords_connected,SBU_mol_adj_mat)
|
|
190
|
+
"""""""""
|
|
191
|
+
Generate all of the SBU based RACs (full scope, mc)
|
|
192
|
+
"""""""""
|
|
193
|
+
results_dictionary = generate_full_complex_autocorrelations(SBU_mol,depth=depth,loud=False,flag_name=False,Gval=True)
|
|
194
|
+
descriptor_names, descriptors = append_descriptors(descriptor_names, descriptors,results_dictionary['colnames'],results_dictionary['results'],'f','all')
|
|
195
|
+
# print('6',len(descriptor_names),len(descriptors))
|
|
196
|
+
#### Now starts at every metal on the graph and autocorrelates
|
|
197
|
+
results_dictionary = generate_multimetal_autocorrelations(molcif,depth=depth,loud=False,Gval=True)
|
|
198
|
+
descriptor_names, descriptors = append_descriptors(descriptor_names, descriptors, results_dictionary['colnames'],results_dictionary['results'],'mc','all')
|
|
199
|
+
# print('7',len(descriptor_names),len(descriptors))
|
|
200
|
+
results_dictionary = generate_multimetal_deltametrics(molcif,depth=depth,loud=False,Gval=True)
|
|
201
|
+
descriptor_names, descriptors = append_descriptors(descriptor_names, descriptors,results_dictionary['colnames'],results_dictionary['results'],'D_mc','all')
|
|
202
|
+
# print('8',len(descriptor_names),len(descriptors))
|
|
203
|
+
descriptor_names += ['name']
|
|
204
|
+
descriptors += [name]
|
|
205
|
+
descriptors == list(descriptors)
|
|
206
|
+
desc_dict = {key: descriptors[ii] for ii, key in enumerate(descriptor_names)}
|
|
207
|
+
descriptors.remove(name)
|
|
208
|
+
descriptor_names.remove('name')
|
|
209
|
+
sbu_descriptors = sbu_descriptors.append(desc_dict, ignore_index=True)
|
|
210
|
+
descriptor_list.append(descriptors)
|
|
211
|
+
if i == 0:
|
|
212
|
+
names = descriptor_names
|
|
213
|
+
if sbupath:
|
|
214
|
+
sbu_descriptors.to_csv(sbu_descriptor_path+'/sbu_descriptors.csv', index=False)
|
|
215
|
+
averaged_SBU_descriptors = np.mean(np.array(descriptor_list), axis=0)
|
|
216
|
+
return names, averaged_SBU_descriptors, lc_names, averaged_lc_descriptors
|
|
217
|
+
|
|
218
|
+
def make_MOF_linker_RACs(linkerlist, linker_subgraphlist, molcif, depth, name, cell, linkerpath=False):
|
|
219
|
+
#### This function makes full scope linker RACs for MOFs ####
|
|
220
|
+
descriptor_list = []
|
|
221
|
+
nlink = len(linkerlist)
|
|
222
|
+
descriptor_names = []
|
|
223
|
+
descriptors = []
|
|
224
|
+
if linkerpath:
|
|
225
|
+
linker_descriptor_path = os.path.dirname(linkerpath)
|
|
226
|
+
if os.path.getsize(linker_descriptor_path+'/linker_descriptors.csv')>0:
|
|
227
|
+
linker_descriptors = pd.read_csv(linker_descriptor_path+'/linker_descriptors.csv')
|
|
228
|
+
else:
|
|
229
|
+
linker_descriptors = pd.DataFrame()
|
|
230
|
+
for i, linker in enumerate(linkerlist):
|
|
231
|
+
linker_mol = mol3D()
|
|
232
|
+
for val in linker:
|
|
233
|
+
linker_mol.addAtom(molcif.getAtom(val))
|
|
234
|
+
linker_mol.graph = linker_subgraphlist[i].todense()
|
|
235
|
+
linker_mol_cart_coords=np.array([atom.coords() for atom in linker_mol.atoms])
|
|
236
|
+
linker_mol_atom_labels=[atom.sym for atom in linker_mol.atoms]
|
|
237
|
+
linker_mol_adj_mat = np.array(linker_mol.graph)
|
|
238
|
+
###### WRITE THE LINKER MOL TO THE PLACE
|
|
239
|
+
if linkerpath and not os.path.exists(linkerpath+"/"+str(name)+str(i)+".xyz"):
|
|
240
|
+
xyzname = linkerpath+"/"+str(name)+"_linker_"+str(i)+".xyz"
|
|
241
|
+
linker_mol_fcoords_connected = XYZ_connected(cell, linker_mol_cart_coords, linker_mol_adj_mat)
|
|
242
|
+
writeXYZandGraph(xyzname, linker_mol_atom_labels, cell, linker_mol_fcoords_connected, linker_mol_adj_mat)
|
|
243
|
+
allowed_strings = ['electronegativity', 'nuclear_charge', 'ident', 'topology', 'size','effective_nuclear_charge']
|
|
244
|
+
labels_strings = ['chi', 'Z', 'I', 'T', 'S','Gval']
|
|
245
|
+
colnames = []
|
|
246
|
+
lig_full = list()
|
|
247
|
+
for ii, properties in enumerate(allowed_strings):
|
|
248
|
+
if not list(descriptors):
|
|
249
|
+
ligand_ac_full = full_autocorrelation(linker_mol, properties, depth)
|
|
250
|
+
else:
|
|
251
|
+
ligand_ac_full += full_autocorrelation(linker_mol, properties, depth)
|
|
252
|
+
this_colnames = []
|
|
253
|
+
for j in range(0,depth+1):
|
|
254
|
+
this_colnames.append('f-lig-'+labels_strings[ii] + '-' + str(j))
|
|
255
|
+
colnames.append(this_colnames)
|
|
256
|
+
lig_full.append(ligand_ac_full)
|
|
257
|
+
lig_full = [item for sublist in lig_full for item in sublist] #flatten lists
|
|
258
|
+
colnames = [item for sublist in colnames for item in sublist]
|
|
259
|
+
colnames += ['name']
|
|
260
|
+
lig_full += [name]
|
|
261
|
+
desc_dict = {key: lig_full[i] for i, key in enumerate(colnames)}
|
|
262
|
+
linker_descriptors = linker_descriptors.append(desc_dict, ignore_index = True)
|
|
263
|
+
lig_full.remove(name)
|
|
264
|
+
colnames.remove('name')
|
|
265
|
+
descriptor_list.append(lig_full)
|
|
266
|
+
#### We dump the standard lc descriptors without averaging or summing so that the user
|
|
267
|
+
#### can make the modifications that they want. By default we take the average ones.
|
|
268
|
+
if linkerpath:
|
|
269
|
+
linker_descriptors.to_csv(linker_descriptor_path+'/linker_descriptors.csv', index=False)
|
|
270
|
+
averaged_ligand_descriptors = np.mean(np.array(descriptor_list), axis=0)
|
|
271
|
+
return colnames, averaged_ligand_descriptors
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def get_MOF_descriptors(data, depth, path=False, xyzpath = False):
|
|
275
|
+
if not path:
|
|
276
|
+
print('Need a directory to place all of the linker, SBU, and ligand objects. Exiting now.')
|
|
277
|
+
raise ValueError('Base path must be specified in order to write descriptors.')
|
|
278
|
+
else:
|
|
279
|
+
if path.endswith('/'):
|
|
280
|
+
path = path[:-1]
|
|
281
|
+
if not os.path.isdir(path+'/ligands'):
|
|
282
|
+
os.mkdir(path+'/ligands')
|
|
283
|
+
if not os.path.isdir(path+'/linkers'):
|
|
284
|
+
os.mkdir(path+'/linkers')
|
|
285
|
+
if not os.path.isdir(path+'/sbus'):
|
|
286
|
+
os.mkdir(path+'/sbus')
|
|
287
|
+
if not os.path.isdir(path+'/xyz'):
|
|
288
|
+
os.mkdir(path+'/xyz')
|
|
289
|
+
if not os.path.isdir(path+'/logs'):
|
|
290
|
+
os.mkdir(path+'/logs')
|
|
291
|
+
if not os.path.exists(path+'/sbu_descriptors.csv'):
|
|
292
|
+
with open(path+'/sbu_descriptors.csv','w') as f:
|
|
293
|
+
f.close()
|
|
294
|
+
if not os.path.exists(path+'/linker_descriptors.csv'):
|
|
295
|
+
with open(path+'/linker_descriptors.csv','w') as g:
|
|
296
|
+
g.close()
|
|
297
|
+
if not os.path.exists(path+'/lc_descriptors.csv'):
|
|
298
|
+
with open(path+'/lc_descriptors.csv','w') as h:
|
|
299
|
+
h.close()
|
|
300
|
+
ligandpath = path+'/ligands'
|
|
301
|
+
linkerpath = path+'/linkers'
|
|
302
|
+
sbupath = path+'/sbus'
|
|
303
|
+
logpath = path+"/logs"
|
|
304
|
+
|
|
305
|
+
"""""""""
|
|
306
|
+
Input cif file and get the cell parameters and adjacency matrix. If overlap, do not featurize.
|
|
307
|
+
Simultaneously prepare mol3D class for MOF for future RAC featurization (molcif)
|
|
308
|
+
"""""""""
|
|
309
|
+
|
|
310
|
+
cpar, allatomtypes, fcoords = readcif(data)
|
|
311
|
+
cell_v = mkcell(cpar)
|
|
312
|
+
cart_coords = fractional2cart(fcoords,cell_v)
|
|
313
|
+
name = os.path.basename(data).strip(".cif")
|
|
314
|
+
if len(cart_coords) > 2000:
|
|
315
|
+
print("Too large cif file, skipping it for now...")
|
|
316
|
+
full_names = [0]
|
|
317
|
+
full_descriptors = [0]
|
|
318
|
+
tmpstr = "Failed to featurize %s: large primitive cell\n"%(name)
|
|
319
|
+
write2file(path,"/FailedStructures.log",tmpstr)
|
|
320
|
+
return full_names, full_descriptors
|
|
321
|
+
distance_mat = compute_distance_matrix2(cell_v,cart_coords)
|
|
322
|
+
try:
|
|
323
|
+
adj_matrix, _ = compute_adj_matrix(distance_mat,allatomtypes)
|
|
324
|
+
except NotImplementedError:
|
|
325
|
+
full_names = [0]
|
|
326
|
+
full_descriptors = [0]
|
|
327
|
+
tmpstr = "Failed to featurize %s: atomic overlap\n"%(name)
|
|
328
|
+
write2file(path,"/FailedStructures.log",tmpstr)
|
|
329
|
+
return full_names, full_descriptors
|
|
330
|
+
|
|
331
|
+
writeXYZandGraph(xyzpath, allatomtypes, cell_v, fcoords, adj_matrix.todense())
|
|
332
|
+
molcif,_,_,_,_ = import_from_cif(data, True)
|
|
333
|
+
molcif.graph = adj_matrix.todense()
|
|
334
|
+
|
|
335
|
+
"""""""""
|
|
336
|
+
check number of connected components.
|
|
337
|
+
if more than 1: it checks if the structure is interpenetrated. Fails if no metal in one of the connected components (identified by the graph).
|
|
338
|
+
This includes floating solvent molecules.
|
|
339
|
+
"""""""""
|
|
340
|
+
|
|
341
|
+
n_components, labels_components = sparse.csgraph.connected_components(csgraph=adj_matrix, directed=False, return_labels=True)
|
|
342
|
+
metal_list = set([at for at in molcif.findMetal(transition_metals_only=False)])
|
|
343
|
+
# print('##### METAL LIST', metal_list, [molcif.getAtom(val).symbol() for val in list(metal_list)])
|
|
344
|
+
# print('##### METAL LIST', metal_list, [val.symbol() for val in molcif.atoms])
|
|
345
|
+
if not len(metal_list) > 0:
|
|
346
|
+
full_names = [0]
|
|
347
|
+
full_descriptors = [0]
|
|
348
|
+
tmpstr = "Failed to featurize %s: no metal found\n"%(name)
|
|
349
|
+
write2file(path,"/FailedStructures.log",tmpstr)
|
|
350
|
+
return full_names, full_descriptors
|
|
351
|
+
|
|
352
|
+
for comp in range(n_components):
|
|
353
|
+
inds_in_comp = [i for i in range(len(labels_components)) if labels_components[i]==comp]
|
|
354
|
+
if not set(inds_in_comp) & metal_list:
|
|
355
|
+
full_names = [0]
|
|
356
|
+
full_descriptors = [0]
|
|
357
|
+
tmpstr = "Failed to featurize %s: solvent molecules\n"%(name)
|
|
358
|
+
write2file(path,"/FailedStructures.log",tmpstr)
|
|
359
|
+
return full_names, full_descriptors
|
|
360
|
+
|
|
361
|
+
if n_components > 1 :
|
|
362
|
+
print("structure is interpenetrated")
|
|
363
|
+
tmpstr = "%s found to be an interpenetrated structure\n"%(name)
|
|
364
|
+
write2file(logpath,"/%s.log"%name,tmpstr)
|
|
365
|
+
|
|
366
|
+
"""""""""
|
|
367
|
+
step 1: metallic part
|
|
368
|
+
removelist = metals (1) + atoms only connected to metals (2) + H connected to (1+2)
|
|
369
|
+
SBUlist = removelist + 1st coordination shell of the metals
|
|
370
|
+
removelist = set()
|
|
371
|
+
Logs the atom types of the connecting atoms to the metal in logpath.
|
|
372
|
+
"""""""""
|
|
373
|
+
SBUlist = set()
|
|
374
|
+
metal_list = set([at for at in molcif.findMetal(transition_metals_only=False)])
|
|
375
|
+
# print('##### METAL LIST2', metal_list, [molcif.getAtom(val).symbol() for val in list(metal_list)])
|
|
376
|
+
# print('##### all LIST2', metal_list, [val.symbol() for val in molcif.atoms])
|
|
377
|
+
[SBUlist.update(set([metal])) for metal in molcif.findMetal(transition_metals_only=False)] #Remove all metals as part of the SBU
|
|
378
|
+
[SBUlist.update(set(molcif.getBondedAtomsSmart(metal))) for metal in molcif.findMetal(transition_metals_only=False)]
|
|
379
|
+
removelist = set()
|
|
380
|
+
[removelist.update(set([metal])) for metal in molcif.findMetal(transition_metals_only=False)] #Remove all metals as part of the SBU
|
|
381
|
+
for metal in removelist:
|
|
382
|
+
bonded_atoms = set(molcif.getBondedAtomsSmart(metal))
|
|
383
|
+
bonded_atoms_types = set([str(allatomtypes[at]) for at in set(molcif.getBondedAtomsSmart(metal))])
|
|
384
|
+
cn = len(bonded_atoms)
|
|
385
|
+
cn_atom = ",".join([at for at in bonded_atoms_types])
|
|
386
|
+
tmpstr = "atom %i with type of %s found to have %i coordinates with atom types of %s\n"%(metal,allatomtypes[metal],cn,cn_atom)
|
|
387
|
+
write2file(logpath,"/%s.log"%name,tmpstr)
|
|
388
|
+
[removelist.update(set([atom])) for atom in SBUlist if all((molcif.getAtom(val).ismetal() or molcif.getAtom(val).symbol().upper() == 'H') for val in molcif.getBondedAtomsSmart(atom))]
|
|
389
|
+
"""""""""
|
|
390
|
+
adding hydrogens connected to atoms which are only connected to metals. In particular interstitial OH, like in UiO SBU.
|
|
391
|
+
"""""""""
|
|
392
|
+
for atom in SBUlist:
|
|
393
|
+
for val in molcif.getBondedAtomsSmart(atom):
|
|
394
|
+
if molcif.getAtom(val).symbol().upper() == 'H':
|
|
395
|
+
removelist.update(set([val]))
|
|
396
|
+
|
|
397
|
+
"""""""""
|
|
398
|
+
At this point:
|
|
399
|
+
The remove list only removes metals and things ONLY connected to metals or hydrogens.
|
|
400
|
+
Thus the coordinating atoms are double counted in the linker.
|
|
401
|
+
|
|
402
|
+
step 2: organic part
|
|
403
|
+
removelist = linkers are all atoms - the removelist (assuming no bond between
|
|
404
|
+
organiclinkers)
|
|
405
|
+
"""""""""
|
|
406
|
+
allatoms = set(range(0, adj_matrix.shape[0]))
|
|
407
|
+
linkers = allatoms - removelist
|
|
408
|
+
linker_list, linker_subgraphlist = get_closed_subgraph(linkers.copy(), removelist.copy(), adj_matrix)
|
|
409
|
+
connections_list = copy.deepcopy(linker_list)
|
|
410
|
+
connections_subgraphlist = copy.deepcopy(linker_subgraphlist)
|
|
411
|
+
linker_length_list = [len(linker_val) for linker_val in linker_list]
|
|
412
|
+
adjmat = adj_matrix.todense()
|
|
413
|
+
"""""""""
|
|
414
|
+
find all anchoring atoms on linkers and ligands (lc identification)
|
|
415
|
+
"""""""""
|
|
416
|
+
anc_atoms = set()
|
|
417
|
+
for linker in linker_list:
|
|
418
|
+
for atom_linker in linker:
|
|
419
|
+
bonded2atom = np.nonzero(adj_matrix[atom_linker,:])[1]
|
|
420
|
+
if set(bonded2atom) & metal_list:
|
|
421
|
+
anc_atoms.add(atom_linker)
|
|
422
|
+
"""""""""
|
|
423
|
+
step 3: linker or ligand ?
|
|
424
|
+
checking to find the anchors and #SBUs that are connected to an organic part
|
|
425
|
+
anchor <= 1 -> ligand
|
|
426
|
+
anchor > 1 and #SBU > 1 -> linker
|
|
427
|
+
else: walk over the linker graph and count #crossing PBC
|
|
428
|
+
if #crossing is odd -> linker
|
|
429
|
+
else -> ligand
|
|
430
|
+
"""""""""
|
|
431
|
+
initial_SBU_list, initial_SBU_subgraphlist = get_closed_subgraph(removelist.copy(), linkers.copy(), adj_matrix)
|
|
432
|
+
templist = linker_list[:]
|
|
433
|
+
tempgraphlist = linker_subgraphlist[:]
|
|
434
|
+
long_ligands = False
|
|
435
|
+
max_min_linker_length , min_max_linker_length = (0,100)
|
|
436
|
+
for ii, atoms_list in reversed(list(enumerate(linker_list))): #Loop over all linker subgraphs
|
|
437
|
+
linkeranchors_list = set()
|
|
438
|
+
linkeranchors_atoms = set()
|
|
439
|
+
sbuanchors_list = set()
|
|
440
|
+
sbu_connect_list = set()
|
|
441
|
+
"""""""""
|
|
442
|
+
Here, we are trying to identify what is actually a linker and what is a ligand.
|
|
443
|
+
To do this, we check if something is connected to more than one SBU. Set to
|
|
444
|
+
handle cases where primitive cell is small, ambiguous cases are recorded.
|
|
445
|
+
"""""""""
|
|
446
|
+
for iii,atoms in enumerate(atoms_list): #loop over all atoms in a linker
|
|
447
|
+
connected_atoms = np.nonzero(adj_matrix[atoms,:])[1]
|
|
448
|
+
for kk, sbu_atoms_list in enumerate(initial_SBU_list): #loop over all SBU subgraphs
|
|
449
|
+
for sbu_atoms in sbu_atoms_list: #Loop over SBU
|
|
450
|
+
if sbu_atoms in connected_atoms:
|
|
451
|
+
linkeranchors_list.add(iii)
|
|
452
|
+
linkeranchors_atoms.add(atoms)
|
|
453
|
+
sbuanchors_list.add(sbu_atoms)
|
|
454
|
+
sbu_connect_list.add(kk) #Add if unique SBUs
|
|
455
|
+
min_length,max_length = linker_length(linker_subgraphlist[ii].todense(),linkeranchors_list)
|
|
456
|
+
|
|
457
|
+
if len(linkeranchors_list) >=2 : # linker, and in one ambigous case, could be a ligand.
|
|
458
|
+
if len(sbu_connect_list) >= 2: #Something that connects two SBUs is certain to be a linker
|
|
459
|
+
max_min_linker_length = max(min_length,max_min_linker_length)
|
|
460
|
+
min_max_linker_length = min(max_length,min_max_linker_length)
|
|
461
|
+
continue
|
|
462
|
+
else:
|
|
463
|
+
# check number of times we cross PBC :
|
|
464
|
+
# TODO: we still can fail in multidentate ligands!
|
|
465
|
+
linker_cart_coords = np.array([
|
|
466
|
+
at.coords() for at in [molcif.getAtom(val) for val in atoms_list]])
|
|
467
|
+
linker_adjmat = np.array(linker_subgraphlist[ii].todense())
|
|
468
|
+
pr_image_organic = ligand_detect(cell_v,linker_cart_coords,linker_adjmat,linkeranchors_list)
|
|
469
|
+
sbu_temp = linkeranchors_atoms.copy()
|
|
470
|
+
sbu_temp.update({val for val in initial_SBU_list[list(sbu_connect_list)[0]]})
|
|
471
|
+
sbu_temp = list(sbu_temp)
|
|
472
|
+
sbu_cart_coords = np.array([
|
|
473
|
+
at.coords() for at in [molcif.getAtom(val) for val in sbu_temp]])
|
|
474
|
+
sbu_adjmat = slice_mat(adj_matrix.todense(),sbu_temp)
|
|
475
|
+
pr_image_sbu = ligand_detect(cell_v,sbu_cart_coords,sbu_adjmat,set(range(len(linkeranchors_list))))
|
|
476
|
+
if not (len(np.unique(pr_image_sbu, axis=0))==1 and len(np.unique(pr_image_organic, axis=0))==1): # linker
|
|
477
|
+
max_min_linker_length = max(min_length,max_min_linker_length)
|
|
478
|
+
min_max_linker_length = min(max_length,min_max_linker_length)
|
|
479
|
+
tmpstr = str(name)+','+' Anchors list: '+str(sbuanchors_list) \
|
|
480
|
+
+','+' SBU connectlist: '+str(sbu_connect_list)+' set to be linker\n'
|
|
481
|
+
write2file(ligandpath,"/ambiguous.txt",tmpstr)
|
|
482
|
+
continue
|
|
483
|
+
else: # all anchoring atoms are in the same unitcell -> ligand
|
|
484
|
+
removelist.update(set(templist[ii])) # we also want to remove these ligands
|
|
485
|
+
SBUlist.update(set(templist[ii])) # we also want to remove these ligands
|
|
486
|
+
linker_list.pop(ii)
|
|
487
|
+
linker_subgraphlist.pop(ii)
|
|
488
|
+
tmpstr = str(name)+','+' Anchors list: '+str(sbuanchors_list) \
|
|
489
|
+
+','+' SBU connectlist: '+str(sbu_connect_list)+' set to be ligand\n'
|
|
490
|
+
write2file(ligandpath,"/ambiguous.txt",tmpstr)
|
|
491
|
+
tmpstr = str(name)+str(ii)+','+' Anchors list: '+ \
|
|
492
|
+
str(sbuanchors_list)+','+' SBU connectlist: '+str(sbu_connect_list)+'\n'
|
|
493
|
+
write2file(ligandpath,"/ligand.txt",tmpstr)
|
|
494
|
+
else: #definite ligand
|
|
495
|
+
write2file(logpath,"/%s.log"%name,"found ligand\n")
|
|
496
|
+
removelist.update(set(templist[ii])) # we also want to remove these ligands
|
|
497
|
+
SBUlist.update(set(templist[ii])) # we also want to remove these ligands
|
|
498
|
+
linker_list.pop(ii)
|
|
499
|
+
linker_subgraphlist.pop(ii)
|
|
500
|
+
tmpstr = str(name)+','+' Anchors list: '+str(sbuanchors_list) \
|
|
501
|
+
+','+' SBU connectlist: '+str(sbu_connect_list)+'\n'
|
|
502
|
+
write2file(ligandpath,"/ligand.txt",tmpstr)
|
|
503
|
+
|
|
504
|
+
tmpstr = str(name) + ", (min_max_linker_length,max_min_linker_length): " + \
|
|
505
|
+
str(min_max_linker_length) + " , " +str(max_min_linker_length) + "\n"
|
|
506
|
+
write2file(logpath,"/%s.log"%name,tmpstr)
|
|
507
|
+
if min_max_linker_length < 3:
|
|
508
|
+
write2file(linkerpath,"/short_ligands.txt",tmpstr)
|
|
509
|
+
if min_max_linker_length > 2:
|
|
510
|
+
# for N-C-C-N ligand ligand
|
|
511
|
+
if max_min_linker_length == min_max_linker_length:
|
|
512
|
+
long_ligands = True
|
|
513
|
+
elif min_max_linker_length > 3:
|
|
514
|
+
long_ligands = True
|
|
515
|
+
|
|
516
|
+
"""""""""
|
|
517
|
+
In the case of long linkers, add second coordination shell without further checks. In the case of short linkers, start from metal
|
|
518
|
+
and grow outwards using the include_extra_shells function
|
|
519
|
+
"""""""""
|
|
520
|
+
linker_length_list = [len(linker_val) for linker_val in linker_list]
|
|
521
|
+
if len(set(linker_length_list)) != 1:
|
|
522
|
+
write2file(linkerpath,"/uneven.txt",str(name)+'\n')
|
|
523
|
+
if not min_max_linker_length < 2: # treating the 2 atom ligands differently! Need caution
|
|
524
|
+
if long_ligands:
|
|
525
|
+
tmpstr = "\nStructure has LONG ligand\n\n"
|
|
526
|
+
write2file(logpath,"/%s.log"%name,tmpstr)
|
|
527
|
+
[[SBUlist.add(val) for val in molcif.getBondedAtomsSmart(zero_first_shell)] for zero_first_shell in SBUlist.copy()] #First account for all of the carboxylic acid type linkers, add in the carbons.
|
|
528
|
+
truncated_linkers = allatoms - SBUlist
|
|
529
|
+
SBU_list, SBU_subgraphlist = get_closed_subgraph(SBUlist, truncated_linkers, adj_matrix)
|
|
530
|
+
if not long_ligands:
|
|
531
|
+
tmpstr = "\nStructure has SHORT ligand\n\n"
|
|
532
|
+
write2file(logpath,"/%s.log"%name,tmpstr)
|
|
533
|
+
SBU_list , SBU_subgraphlist = include_extra_shells(SBU_list,SBU_subgraphlist,molcif ,adj_matrix)
|
|
534
|
+
else:
|
|
535
|
+
tmpstr = "Structure %s has extreamly short ligands, check the outputs\n"%name
|
|
536
|
+
write2file(ligandpath,"/ambiguous.txt",tmpstr)
|
|
537
|
+
tmpstr = "Structure has extreamly short ligands\n"
|
|
538
|
+
write2file(logpath,"/%s.log"%name,tmpstr)
|
|
539
|
+
tmpstr = "Structure has extreamly short ligands\n"
|
|
540
|
+
write2file(logpath,"/%s.log"%name,tmpstr)
|
|
541
|
+
truncated_linkers = allatoms - removelist
|
|
542
|
+
SBU_list, SBU_subgraphlist = get_closed_subgraph(removelist, truncated_linkers, adj_matrix)
|
|
543
|
+
SBU_list, SBU_subgraphlist = include_extra_shells(SBU_list,SBU_subgraphlist,molcif ,adj_matrix)
|
|
544
|
+
SBU_list, SBU_subgraphlist = include_extra_shells(SBU_list,SBU_subgraphlist,molcif ,adj_matrix)
|
|
545
|
+
|
|
546
|
+
"""""""""
|
|
547
|
+
For the cases that have a linker subgraph, do the featurization.
|
|
548
|
+
"""""""""
|
|
549
|
+
if len(linker_subgraphlist)>=1: #Featurize cases that did not fail
|
|
550
|
+
try:
|
|
551
|
+
# if True:
|
|
552
|
+
descriptor_names, descriptors, lc_descriptor_names, lc_descriptors = make_MOF_SBU_RACs(SBU_list, SBU_subgraphlist, molcif, depth, name , cell_v,anc_atoms, sbupath, connections_list, connections_subgraphlist)
|
|
553
|
+
lig_descriptor_names, lig_descriptors = make_MOF_linker_RACs(linker_list, linker_subgraphlist, molcif, depth, name, cell_v, linkerpath)
|
|
554
|
+
full_names = descriptor_names+lig_descriptor_names+lc_descriptor_names #+ ECFP_names
|
|
555
|
+
full_descriptors = list(descriptors)+list(lig_descriptors)+list(lc_descriptors)
|
|
556
|
+
print(len(full_names),len(full_descriptors))
|
|
557
|
+
# else:
|
|
558
|
+
except:
|
|
559
|
+
full_names = [0]
|
|
560
|
+
full_descriptors = [0]
|
|
561
|
+
elif len(linker_subgraphlist) == 1: # this never happens, right?
|
|
562
|
+
print('Suspicious featurization')
|
|
563
|
+
full_names = [1]
|
|
564
|
+
full_descriptors = [1]
|
|
565
|
+
else:
|
|
566
|
+
print('Failed to featurize this MOF.')
|
|
567
|
+
full_names = [0]
|
|
568
|
+
full_descriptors = [0]
|
|
569
|
+
if (len(full_names) <= 1) and (len(full_descriptors) <= 1):
|
|
570
|
+
tmpstr = "Failed to featurize %s\n"%(name)
|
|
571
|
+
write2file(path,"/FailedStructures.log",tmpstr)
|
|
572
|
+
return full_names, full_descriptors
|
|
573
|
+
|
|
574
|
+
|
|
575
|
+
##### Example of usage over a set of cif files.
|
|
576
|
+
# featurization_list = []
|
|
577
|
+
# import sys
|
|
578
|
+
# featurization_directory = sys.argv[1]
|
|
579
|
+
# for cif_file in os.listdir(featurization_directory+'/cif/'):
|
|
580
|
+
# #### This first part gets the primitive cells ####
|
|
581
|
+
# get_primitive(featurization_directory+'/cif/'+cif_file, featurization_directory+'/primitive/'+cif_file)
|
|
582
|
+
# full_names, full_descriptors = get_MOF_descriptors(featurization_directory+'/primitive/'+cif_file,3,path=featurization_directory+'/',
|
|
583
|
+
# xyzpath=featurization_directory+'/xyz/'+cif_file.replace('cif','xyz'))
|
|
584
|
+
# full_names.append('filename')
|
|
585
|
+
# full_descriptors.append(cif_file)
|
|
586
|
+
# featurization = dict(zip(full_names, full_descriptors))
|
|
587
|
+
# featurization_list.append(featurization)
|
|
588
|
+
# df = pd.DataFrame(featurization_list)
|
|
589
|
+
# df.to_csv('./full_featurization_frame.csv',index=False)
|