molSimplify 1.7.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docs/source/conf.py +224 -0
- molSimplify/Classes/__init__.py +6 -0
- molSimplify/Classes/atom3D.py +235 -0
- molSimplify/Classes/dft_obs.py +130 -0
- molSimplify/Classes/globalvars.py +827 -0
- molSimplify/Classes/helpers.py +161 -0
- molSimplify/Classes/ligand.py +2330 -0
- molSimplify/Classes/mGUI.py +2493 -0
- molSimplify/Classes/mWidgets.py +438 -0
- molSimplify/Classes/miniGUI.py +41 -0
- molSimplify/Classes/mol2D.py +260 -0
- molSimplify/Classes/mol3D.py +5846 -0
- molSimplify/Classes/monomer3D.py +253 -0
- molSimplify/Classes/partialcharges.py +226 -0
- molSimplify/Classes/protein3D.py +1178 -0
- molSimplify/Classes/rundiag.py +151 -0
- molSimplify/Data/ML.dat +212 -0
- molSimplify/Data/MLS_FSR_for_inter.dat +23 -0
- molSimplify/Data/MLS_FSR_for_inter2.dat +23 -0
- molSimplify/Data/MLS_angle_for_click.dat +8 -0
- molSimplify/Data/MLS_angle_for_inter.dat +23 -0
- molSimplify/Data/MLS_angle_for_inter2.dat +48 -0
- molSimplify/Data/MLS_angle_for_intra.dat +10 -0
- molSimplify/Data/MLS_angle_for_intra2.dat +6 -0
- molSimplify/Data/MLS_angle_for_oa.dat +18 -0
- molSimplify/Data/ML_FSR_for_inter.dat +112 -0
- molSimplify/Data/ML_FSR_for_inter2.dat +110 -0
- molSimplify/Data/ML_bond_for_cat.dat +8 -0
- molSimplify/Data/ML_bond_for_click.dat +8 -0
- molSimplify/Data/ML_bond_for_inter.dat +48 -0
- molSimplify/Data/ML_bond_for_inter2.dat +48 -0
- molSimplify/Data/ML_bond_for_intra.dat +10 -0
- molSimplify/Data/ML_bond_for_intra2.dat +6 -0
- molSimplify/Data/ML_bond_for_oa.dat +18 -0
- molSimplify/Data/bp1.dat +21 -0
- molSimplify/Data/li.dat +3 -0
- molSimplify/Data/no.dat +2 -0
- molSimplify/Data/oct.dat +7 -0
- molSimplify/Data/pbp.dat +8 -0
- molSimplify/Data/spy.dat +6 -0
- molSimplify/Data/sqap.dat +9 -0
- molSimplify/Data/sqp.dat +5 -0
- molSimplify/Data/tbp.dat +6 -0
- molSimplify/Data/tdhd.dat +9 -0
- molSimplify/Data/thd.dat +5 -0
- molSimplify/Data/tpl.dat +4 -0
- molSimplify/Data/tpr.dat +7 -0
- molSimplify/Informatics/HFXsensitivity/__init__.py +0 -0
- molSimplify/Informatics/HFXsensitivity/measure_HFX_sensitivity_oxo_hat_reb_rel.py +443 -0
- molSimplify/Informatics/HFXsensitivity/measure_HFX_stable.py +346 -0
- molSimplify/Informatics/MOF/Linker_rotation.py +179 -0
- molSimplify/Informatics/MOF/MOF_descriptors.py +1299 -0
- molSimplify/Informatics/MOF/MOF_descriptors_alternate_functional.py +589 -0
- molSimplify/Informatics/MOF/MOF_functionalizer.py +1648 -0
- molSimplify/Informatics/MOF/PBC_functions.py +1347 -0
- molSimplify/Informatics/MOF/__init__.py +0 -0
- molSimplify/Informatics/MOF/atomic.py +267 -0
- molSimplify/Informatics/MOF/cluster_extraction.py +388 -0
- molSimplify/Informatics/MOF/fragment_MOFs_for_pormake.py +895 -0
- molSimplify/Informatics/MOF/monofunctionalized_BDC/index_information.py +10 -0
- molSimplify/Informatics/Mol2Parser.py +46 -0
- molSimplify/Informatics/RACassemble.py +408 -0
- molSimplify/Informatics/__init__.py +0 -0
- molSimplify/Informatics/active_learning/__init__.py +0 -0
- molSimplify/Informatics/active_learning/expected_improvement.py +269 -0
- molSimplify/Informatics/autocorrelation.py +1930 -0
- molSimplify/Informatics/clean_autocorrelation.py +778 -0
- molSimplify/Informatics/coulomb_analyze.py +67 -0
- molSimplify/Informatics/decoration_manager.py +193 -0
- molSimplify/Informatics/geo_analyze.py +88 -0
- molSimplify/Informatics/geometrics.py +56 -0
- molSimplify/Informatics/graph_analyze.py +163 -0
- molSimplify/Informatics/graph_racs.py +288 -0
- molSimplify/Informatics/jupyter_vis.py +172 -0
- molSimplify/Informatics/lacRACAssemble.py +2192 -0
- molSimplify/Informatics/lacRACAssemble_bisdithiolenes.py +236 -0
- molSimplify/Informatics/misc_descriptors.py +198 -0
- molSimplify/Informatics/organic_fingerprints.py +61 -0
- molSimplify/Informatics/partialcharges.py +345 -0
- molSimplify/Informatics/protein/activesite.py +53 -0
- molSimplify/Informatics/protein/pymol_add_hs.py +33 -0
- molSimplify/Informatics/rac155_geo.py +48 -0
- molSimplify/Ligands/(1_methylbenzimidazol_2_yl)pyridine.xyz +45 -0
- molSimplify/Ligands/1-4-dimethyl-1-2-3-triazole.xyz +15 -0
- molSimplify/Ligands/12crown4.mol +62 -0
- molSimplify/Ligands/Antipyrine.mol +58 -0
- molSimplify/Ligands/BPAbipy.mol +106 -0
- molSimplify/Ligands/Hpyrrole.mol +26 -0
- molSimplify/Ligands/N-quinolinylbutyramidate.xyz +31 -0
- molSimplify/Ligands/N-quinolinylmethylmethinylacetamidate.xyz +30 -0
- molSimplify/Ligands/NMe2_-1.xyz +11 -0
- molSimplify/Ligands/PCy3.mol +111 -0
- molSimplify/Ligands/PMe3.xyz +15 -0
- molSimplify/Ligands/PPh3.mol +76 -0
- molSimplify/Ligands/Propyphenazone.mol +77 -0
- molSimplify/Ligands/acac.mol +33 -0
- molSimplify/Ligands/acacen.mol +76 -0
- molSimplify/Ligands/acetate.smi +1 -0
- molSimplify/Ligands/acetate.xyz +9 -0
- molSimplify/Ligands/aceticacidbipyridine.mol +70 -0
- molSimplify/Ligands/acetonitrile.mol +17 -0
- molSimplify/Ligands/alanine.mol +30 -0
- molSimplify/Ligands/alphabetizer.py +21 -0
- molSimplify/Ligands/amine.mol +11 -0
- molSimplify/Ligands/ammonia.mol +12 -0
- molSimplify/Ligands/arginine.mol +58 -0
- molSimplify/Ligands/asparagine.mol +38 -0
- molSimplify/Ligands/aspartic_acid.mol +35 -0
- molSimplify/Ligands/azide.mol +11 -0
- molSimplify/Ligands/benzene.mol +28 -0
- molSimplify/Ligands/benzene_pi.mol +30 -0
- molSimplify/Ligands/benzenedithiol.mol +30 -0
- molSimplify/Ligands/benzenethiol.mol +30 -0
- molSimplify/Ligands/benzylisocy.mol +38 -0
- molSimplify/Ligands/bidiazine.mol +42 -0
- molSimplify/Ligands/bidiazole.mol +38 -0
- molSimplify/Ligands/bifuran.mol +38 -0
- molSimplify/Ligands/bihydrodiazine.mol +58 -0
- molSimplify/Ligands/bihydrodiazole.mol +46 -0
- molSimplify/Ligands/bihydrooxazine.mol +54 -0
- molSimplify/Ligands/bihydrooxazole.mol +42 -0
- molSimplify/Ligands/bihydrothiazine.mol +54 -0
- molSimplify/Ligands/bihydrothiazole.mol +42 -0
- molSimplify/Ligands/biimidazole.mol +38 -0
- molSimplify/Ligands/bioxazole.mol +34 -0
- molSimplify/Ligands/bipy.mol +46 -0
- molSimplify/Ligands/bipyrazine.xyz +20 -0
- molSimplify/Ligands/bipyrimidine.mol +42 -0
- molSimplify/Ligands/bipyrrole.mol +42 -0
- molSimplify/Ligands/bisnapthyridylpyridine.mol +111 -0
- molSimplify/Ligands/bithiazole.mol +34 -0
- molSimplify/Ligands/bromide.mol +7 -0
- molSimplify/Ligands/bromide.smi +1 -0
- molSimplify/Ligands/c2.mol +9 -0
- molSimplify/Ligands/caprolactone.mol +41 -0
- molSimplify/Ligands/carbonyl.mol +8 -0
- molSimplify/Ligands/carboxyl.mol +13 -0
- molSimplify/Ligands/cat.mol +30 -0
- molSimplify/Ligands/chloride.mol +7 -0
- molSimplify/Ligands/chloride.smi +1 -0
- molSimplify/Ligands/chloropyridine.mol +27 -0
- molSimplify/Ligands/co2.mol +10 -0
- molSimplify/Ligands/corrolazine.mol +72 -0
- molSimplify/Ligands/cs.mol +8 -0
- molSimplify/Ligands/cyanate.xyz +5 -0
- molSimplify/Ligands/cyanide.mol +9 -0
- molSimplify/Ligands/cyanoaceticporphyrin.mol +114 -0
- molSimplify/Ligands/cyanopyridine.mol +29 -0
- molSimplify/Ligands/cyclam.mol +81 -0
- molSimplify/Ligands/cyclen.mol +69 -0
- molSimplify/Ligands/cyclopentadienyl.mol +26 -0
- molSimplify/Ligands/cysteine.mol +32 -0
- molSimplify/Ligands/diaminomethyl.mol +19 -0
- molSimplify/Ligands/diazine.mol +25 -0
- molSimplify/Ligands/diazole.mol +23 -0
- molSimplify/Ligands/dicyanamide.mol +15 -0
- molSimplify/Ligands/dihydrofuran.mol +27 -0
- molSimplify/Ligands/dmap.xyz +35 -0
- molSimplify/Ligands/dmf.mol +28 -0
- molSimplify/Ligands/dmi.mol +41 -0
- molSimplify/Ligands/dmpe.mol +52 -0
- molSimplify/Ligands/dpmu.mol +47 -0
- molSimplify/Ligands/dppe.mol +112 -0
- molSimplify/Ligands/edta.mol +69 -0
- molSimplify/Ligands/en.mol +28 -0
- molSimplify/Ligands/ethanethiol.mol +21 -0
- molSimplify/Ligands/ethanolamine.mol +26 -0
- molSimplify/Ligands/ethbipy.mol +70 -0
- molSimplify/Ligands/ethyl.mol +19 -0
- molSimplify/Ligands/ethylamine.mol +24 -0
- molSimplify/Ligands/ethylene.mol +16 -0
- molSimplify/Ligands/ethylesteracac.mol +57 -0
- molSimplify/Ligands/fluoride.mol +7 -0
- molSimplify/Ligands/fluoride.smi +1 -0
- molSimplify/Ligands/formaldehyde.mol +12 -0
- molSimplify/Ligands/formamidate.xyz +8 -0
- molSimplify/Ligands/formate.xyz +6 -0
- molSimplify/Ligands/furan.mol +23 -0
- molSimplify/Ligands/glutamic_acid.mol +42 -0
- molSimplify/Ligands/glutamine.mol +44 -0
- molSimplify/Ligands/glycinate.mol +23 -0
- molSimplify/Ligands/glycine.mol +24 -0
- molSimplify/Ligands/h2s.mol +10 -0
- molSimplify/Ligands/helium.mol +6 -0
- molSimplify/Ligands/histidine.mol +45 -0
- molSimplify/Ligands/hmpa.mol +62 -0
- molSimplify/Ligands/hs-.mol +9 -0
- molSimplify/Ligands/hydride.mol +7 -0
- molSimplify/Ligands/hydrocarboxyacetylide.xyz +8 -0
- molSimplify/Ligands/hydrocyanide.mol +10 -0
- molSimplify/Ligands/hydrodiazine.mol +33 -0
- molSimplify/Ligands/hydrodiazole.mol +27 -0
- molSimplify/Ligands/hydrogensulfide.mol +10 -0
- molSimplify/Ligands/hydroisocyanide.mol +11 -0
- molSimplify/Ligands/hydrooxazine.mol +31 -0
- molSimplify/Ligands/hydrooxazole.mol +25 -0
- molSimplify/Ligands/hydrothiazine.mol +31 -0
- molSimplify/Ligands/hydrothiazole.mol +25 -0
- molSimplify/Ligands/hydroxyl.mol +9 -0
- molSimplify/Ligands/imidazole.mol +23 -0
- molSimplify/Ligands/imidazolidinone.mol +29 -0
- molSimplify/Ligands/imine.mol +13 -0
- molSimplify/Ligands/iminodiacetic.mol +33 -0
- molSimplify/Ligands/iodide.mol +7 -0
- molSimplify/Ligands/iodobenzene.xyz +14 -0
- molSimplify/Ligands/isoleucine.mol +48 -0
- molSimplify/Ligands/isothiocyanate.mol +11 -0
- molSimplify/Ligands/leucine.mol +48 -0
- molSimplify/Ligands/ligands.dict +257 -0
- molSimplify/Ligands/lysine.mol +54 -0
- molSimplify/Ligands/mebenzenedithiol.mol +36 -0
- molSimplify/Ligands/mebim_py.xyz +29 -0
- molSimplify/Ligands/mebim_pz.xyz +28 -0
- molSimplify/Ligands/mebipy.mol +58 -0
- molSimplify/Ligands/mecat.mol +36 -0
- molSimplify/Ligands/methanal.mol +11 -0
- molSimplify/Ligands/methanethiol.mol +15 -0
- molSimplify/Ligands/methanol.mol +16 -0
- molSimplify/Ligands/methionine.mol +44 -0
- molSimplify/Ligands/methyl.mol +13 -0
- molSimplify/Ligands/methylacetylide.xyz +8 -0
- molSimplify/Ligands/methylamine.mol +19 -0
- molSimplify/Ligands/methylazide.xyz +9 -0
- molSimplify/Ligands/methylisocy.mol +17 -0
- molSimplify/Ligands/methylpyridine.mol +33 -0
- molSimplify/Ligands/n2.mol +8 -0
- molSimplify/Ligands/n4py.xyz +51 -0
- molSimplify/Ligands/nch.mol +10 -0
- molSimplify/Ligands/nco-.mol +11 -0
- molSimplify/Ligands/nethanolamine.mol +26 -0
- molSimplify/Ligands/nitrate.mol +14 -0
- molSimplify/Ligands/nitrite.mol +11 -0
- molSimplify/Ligands/nitro.mol +11 -0
- molSimplify/Ligands/nitrobipy.mol +54 -0
- molSimplify/Ligands/nitroso.mol +8 -0
- molSimplify/Ligands/nme3.mol +30 -0
- molSimplify/Ligands/no-.mol +10 -0
- molSimplify/Ligands/no2-.mol +11 -0
- molSimplify/Ligands/noxygen.mol +8 -0
- molSimplify/Ligands/ns-.mol +10 -0
- molSimplify/Ligands/o-pyridylbenzene.xyz +23 -0
- molSimplify/Ligands/o-pyridylphenylanion.xyz +22 -0
- molSimplify/Ligands/o2-.mol +9 -0
- molSimplify/Ligands/o2.xyz +4 -0
- molSimplify/Ligands/och2.mol +12 -0
- molSimplify/Ligands/oethanolamine.mol +26 -0
- molSimplify/Ligands/ome2.mol +22 -0
- molSimplify/Ligands/ooh.xyz +5 -0
- molSimplify/Ligands/oxalate.mol +17 -0
- molSimplify/Ligands/oxalate.smi +1 -0
- molSimplify/Ligands/oxygen.mol +7 -0
- molSimplify/Ligands/pentacyanocyclopentadienide.mol +36 -0
- molSimplify/Ligands/ph2-.mol +11 -0
- molSimplify/Ligands/ph3.mol +12 -0
- molSimplify/Ligands/phen.mol +51 -0
- molSimplify/Ligands/phenacac.mol +63 -0
- molSimplify/Ligands/phenalalanine.mol +51 -0
- molSimplify/Ligands/phendione.mol +51 -0
- molSimplify/Ligands/phenphen.mol +75 -0
- molSimplify/Ligands/phenylbenzoxazole.mol +54 -0
- molSimplify/Ligands/phenylcyc.mol +99 -0
- molSimplify/Ligands/phenylenediamine.mol +37 -0
- molSimplify/Ligands/phenylisocy.mol +32 -0
- molSimplify/Ligands/phosacidbipy.mol +66 -0
- molSimplify/Ligands/phosphine.mol +13 -0
- molSimplify/Ligands/phosphorine.mol +27 -0
- molSimplify/Ligands/phosphorustrifluoride.mol +12 -0
- molSimplify/Ligands/phthalocyanine.mol +126 -0
- molSimplify/Ligands/pme3o.mol +32 -0
- molSimplify/Ligands/porphyrin.mol +82 -0
- molSimplify/Ligands/pph3o.mol +77 -0
- molSimplify/Ligands/proline.mol +39 -0
- molSimplify/Ligands/propdiol.mol +21 -0
- molSimplify/Ligands/propylene.mol +23 -0
- molSimplify/Ligands/pyridine.mol +27 -0
- molSimplify/Ligands/pyrimidone.mol +27 -0
- molSimplify/Ligands/pyrrole.mol +24 -0
- molSimplify/Ligands/quinoxalinedithiol.mol +39 -0
- molSimplify/Ligands/s2-.mol +9 -0
- molSimplify/Ligands/salen.mol +75 -0
- molSimplify/Ligands/salphen.mol +84 -0
- molSimplify/Ligands/serine.mol +32 -0
- molSimplify/Ligands/simple_ligands.dict +14 -0
- molSimplify/Ligands/sulfacidbipy.mol +63 -0
- molSimplify/Ligands/tbucat.mol +54 -0
- molSimplify/Ligands/tbuphisocy.mol +56 -0
- molSimplify/Ligands/tbutylcyclen.mol +166 -0
- molSimplify/Ligands/tbutylisocy.mol +35 -0
- molSimplify/Ligands/tbutylthiol.mol +33 -0
- molSimplify/Ligands/tcnoet.mol +43 -0
- molSimplify/Ligands/tcnoetOH.mol +45 -0
- molSimplify/Ligands/terpy.mol +65 -0
- molSimplify/Ligands/tetrahydrofuran.mol +31 -0
- molSimplify/Ligands/thiane.mol +37 -0
- molSimplify/Ligands/thiazole.mol +21 -0
- molSimplify/Ligands/thiocyanate.mol +11 -0
- molSimplify/Ligands/thiol.mol +9 -0
- molSimplify/Ligands/thiophene.mol +23 -0
- molSimplify/Ligands/thiopyridine.mol +29 -0
- molSimplify/Ligands/threonine.mol +38 -0
- molSimplify/Ligands/tpp.mol +165 -0
- molSimplify/Ligands/tricyanomethyl.mol +19 -0
- molSimplify/Ligands/trifluoromethyl.mol +13 -0
- molSimplify/Ligands/tryptophan.mol +60 -0
- molSimplify/Ligands/tyrosine.mol +53 -0
- molSimplify/Ligands/uthiol.mol +11 -0
- molSimplify/Ligands/uthiolme2.mol +23 -0
- molSimplify/Ligands/valine.mol +42 -0
- molSimplify/Ligands/water.mol +10 -0
- molSimplify/Ligands/x.mol +6 -0
- molSimplify/Scripts/__init__.py +0 -0
- molSimplify/Scripts/addtodb.py +308 -0
- molSimplify/Scripts/cellbuilder.py +1592 -0
- molSimplify/Scripts/cellbuilder_tools.py +701 -0
- molSimplify/Scripts/chains.py +342 -0
- molSimplify/Scripts/convert_2to3.py +23 -0
- molSimplify/Scripts/dbinteract.py +631 -0
- molSimplify/Scripts/distgeom.py +617 -0
- molSimplify/Scripts/findcorrelations.py +287 -0
- molSimplify/Scripts/generator.py +267 -0
- molSimplify/Scripts/geometry.py +1224 -0
- molSimplify/Scripts/grabguivars.py +845 -0
- molSimplify/Scripts/in_b3lyp_usetc.py +141 -0
- molSimplify/Scripts/inparse.py +1673 -0
- molSimplify/Scripts/io.py +1149 -0
- molSimplify/Scripts/isomers.py +415 -0
- molSimplify/Scripts/jobgen.py +247 -0
- molSimplify/Scripts/krr_prep.py +1262 -0
- molSimplify/Scripts/molSimplify_io.py +18 -0
- molSimplify/Scripts/molden2psi4wfn.py +166 -0
- molSimplify/Scripts/namegen.py +32 -0
- molSimplify/Scripts/nn_prep.py +561 -0
- molSimplify/Scripts/oct_check_mols.py +782 -0
- molSimplify/Scripts/periodic_QE.py +97 -0
- molSimplify/Scripts/postmold.py +304 -0
- molSimplify/Scripts/postmwfn.py +709 -0
- molSimplify/Scripts/postparse.py +488 -0
- molSimplify/Scripts/postproc.py +139 -0
- molSimplify/Scripts/qcgen.py +1450 -0
- molSimplify/Scripts/rmsd.py +489 -0
- molSimplify/Scripts/rungen.py +670 -0
- molSimplify/Scripts/structgen.py +3040 -0
- molSimplify/Scripts/tf_nn_prep.py +894 -0
- molSimplify/Scripts/tsgen.py +295 -0
- molSimplify/Scripts/uq_calibration.py +69 -0
- molSimplify/__init__.py +0 -0
- molSimplify/__main__.py +197 -0
- molSimplify/icons/chemdb.png +0 -0
- molSimplify/icons/hjklogo.png +0 -0
- molSimplify/icons/icon.png +0 -0
- molSimplify/icons/logo.png +0 -0
- molSimplify/icons/logo_old.png +0 -0
- molSimplify/icons/petachem.png +0 -0
- molSimplify/icons/petachem2.png +0 -0
- molSimplify/icons/petachem_full.png +0 -0
- molSimplify/icons/pythonlogo.png +0 -0
- molSimplify/icons/sge copy.png +0 -0
- molSimplify/icons/sge.png +0 -0
- molSimplify/icons/slurm.png +0 -0
- molSimplify/icons/wft1.png +0 -0
- molSimplify/icons/wft2.png +0 -0
- molSimplify/icons/wft3.png +0 -0
- molSimplify/ml/__init__.py +0 -0
- molSimplify/ml/kernels.py +36 -0
- molSimplify/ml/layers.py +29 -0
- molSimplify/molscontrol/__init__.py +14 -0
- molSimplify/molscontrol/_version.py +521 -0
- molSimplify/molscontrol/clf_tools.py +144 -0
- molSimplify/molscontrol/data/README.md +21 -0
- molSimplify/molscontrol/data/look_and_say.dat +15 -0
- molSimplify/molscontrol/dynamic_classifier.py +514 -0
- molSimplify/molscontrol/io_tools.py +363 -0
- molSimplify/molscontrol/molscontrol.py +49 -0
- molSimplify/molscontrol/terachem/jobscript_control.sh +31 -0
- molSimplify/molscontrol/terachem/terachem_input +22 -0
- molSimplify/python_krr/X_train_TS.csv +535 -0
- molSimplify/python_krr/__init__.py +0 -0
- molSimplify/python_krr/hat2_X_mean_std.csv +3 -0
- molSimplify/python_krr/hat2_feature_names.csv +1 -0
- molSimplify/python_krr/hat2_y_mean_std.csv +2 -0
- molSimplify/python_krr/hat_X_mean_std.csv +6 -0
- molSimplify/python_krr/hat_feature_names.csv +1 -0
- molSimplify/python_krr/hat_krr_X_train.csv +5205 -0
- molSimplify/python_krr/hat_krr_dual_coef.csv +1 -0
- molSimplify/python_krr/hat_y_mean_std.csv +2 -0
- molSimplify/python_krr/sklearn_models.py +34 -0
- molSimplify/python_krr/y_train_TS.csv +535 -0
- molSimplify/python_nn/ANN.py +198 -0
- molSimplify/python_nn/__init__.py +0 -0
- molSimplify/python_nn/clf_analysis_tool.py +125 -0
- molSimplify/python_nn/dictionary_toolbox.py +49 -0
- molSimplify/python_nn/ensemble_test.py +309 -0
- molSimplify/python_nn/hs_center.csv +26 -0
- molSimplify/python_nn/hs_scale.csv +26 -0
- molSimplify/python_nn/ls_center.csv +26 -0
- molSimplify/python_nn/ls_scale.csv +26 -0
- molSimplify/python_nn/ms_hs_b1.csv +50 -0
- molSimplify/python_nn/ms_hs_b2.csv +50 -0
- molSimplify/python_nn/ms_hs_b3.csv +1 -0
- molSimplify/python_nn/ms_hs_w1.csv +50 -0
- molSimplify/python_nn/ms_hs_w2.csv +50 -0
- molSimplify/python_nn/ms_hs_w3.csv +1 -0
- molSimplify/python_nn/ms_ls_b1.csv +50 -0
- molSimplify/python_nn/ms_ls_b2.csv +50 -0
- molSimplify/python_nn/ms_ls_b3.csv +1 -0
- molSimplify/python_nn/ms_ls_w1.csv +50 -0
- molSimplify/python_nn/ms_ls_w2.csv +50 -0
- molSimplify/python_nn/ms_ls_w3.csv +1 -0
- molSimplify/python_nn/ms_slope_b1.csv +50 -0
- molSimplify/python_nn/ms_slope_b2.csv +50 -0
- molSimplify/python_nn/ms_slope_b3.csv +1 -0
- molSimplify/python_nn/ms_slope_w1.csv +50 -0
- molSimplify/python_nn/ms_slope_w2.csv +50 -0
- molSimplify/python_nn/ms_slope_w3.csv +1 -0
- molSimplify/python_nn/ms_split_b1.csv +50 -0
- molSimplify/python_nn/ms_split_b2.csv +50 -0
- molSimplify/python_nn/ms_split_b3.csv +1 -0
- molSimplify/python_nn/ms_split_w1.csv +50 -0
- molSimplify/python_nn/ms_split_w2.csv +50 -0
- molSimplify/python_nn/ms_split_w3.csv +1 -0
- molSimplify/python_nn/slope_center.csv +25 -0
- molSimplify/python_nn/slope_scale.csv +25 -0
- molSimplify/python_nn/split_center.csv +26 -0
- molSimplify/python_nn/split_scale.csv +26 -0
- molSimplify/python_nn/tf_ANN.py +762 -0
- molSimplify/python_nn/train_data.csv +1211 -0
- molSimplify/tf_nn/__init__.py +0 -0
- molSimplify/tf_nn/geo_static_clf/geo_static_clf_model.h5 +0 -0
- molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_name.csv +1591 -0
- molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_x.csv +2790 -0
- molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_y.csv +2790 -0
- molSimplify/tf_nn/geo_static_clf/geo_static_clf_vars.csv +154 -0
- molSimplify/tf_nn/geos/hs_ii_bl_x.csv +1577 -0
- molSimplify/tf_nn/geos/hs_ii_bl_y.csv +1577 -0
- molSimplify/tf_nn/geos/hs_ii_model.h5 +0 -0
- molSimplify/tf_nn/geos/hs_ii_model.json +1 -0
- molSimplify/tf_nn/geos/hs_ii_vars.csv +154 -0
- molSimplify/tf_nn/geos/hs_iii_bl_x.csv +1659 -0
- molSimplify/tf_nn/geos/hs_iii_bl_y.csv +1659 -0
- molSimplify/tf_nn/geos/hs_iii_model.h5 +0 -0
- molSimplify/tf_nn/geos/hs_iii_model.json +1 -0
- molSimplify/tf_nn/geos/hs_iii_vars.csv +154 -0
- molSimplify/tf_nn/geos/ls_ii_bl_x.csv +1374 -0
- molSimplify/tf_nn/geos/ls_ii_bl_y.csv +1374 -0
- molSimplify/tf_nn/geos/ls_ii_model.h5 +0 -0
- molSimplify/tf_nn/geos/ls_ii_model.json +1 -0
- molSimplify/tf_nn/geos/ls_ii_vars.csv +154 -0
- molSimplify/tf_nn/geos/ls_iii_bl_x.csv +1364 -0
- molSimplify/tf_nn/geos/ls_iii_bl_y.csv +1364 -0
- molSimplify/tf_nn/geos/ls_iii_model.h5 +0 -0
- molSimplify/tf_nn/geos/ls_iii_model.json +1 -0
- molSimplify/tf_nn/geos/ls_iii_vars.csv +154 -0
- molSimplify/tf_nn/homolumo/gap_model.h5 +0 -0
- molSimplify/tf_nn/homolumo/gap_model.json +1 -0
- molSimplify/tf_nn/homolumo/gap_test_names.csv +175 -0
- molSimplify/tf_nn/homolumo/gap_test_x.csv +176 -0
- molSimplify/tf_nn/homolumo/gap_test_y.csv +176 -0
- molSimplify/tf_nn/homolumo/gap_train_names.csv +699 -0
- molSimplify/tf_nn/homolumo/gap_train_x.csv +700 -0
- molSimplify/tf_nn/homolumo/gap_train_y.csv +700 -0
- molSimplify/tf_nn/homolumo/gap_vars.csv +153 -0
- molSimplify/tf_nn/homolumo/homo_model.h5 +0 -0
- molSimplify/tf_nn/homolumo/homo_model.json +126 -0
- molSimplify/tf_nn/homolumo/homo_test_names.csv +175 -0
- molSimplify/tf_nn/homolumo/homo_test_x.csv +176 -0
- molSimplify/tf_nn/homolumo/homo_test_y.csv +176 -0
- molSimplify/tf_nn/homolumo/homo_train_names.csv +699 -0
- molSimplify/tf_nn/homolumo/homo_train_x.csv +700 -0
- molSimplify/tf_nn/homolumo/homo_train_y.csv +700 -0
- molSimplify/tf_nn/homolumo/homo_vars.csv +153 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_info.json +7 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_model.h5 +0 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_model.json +1 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_test_names.csv +143 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_test_x.csv +144 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_test_y.csv +144 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_train_names.csv +513 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_train_x.csv +514 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_train_y.csv +514 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_val_names.csv +143 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_val_x.csv +58 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_val_y.csv +58 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_vars.csv +155 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_info.json +7 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_model.h5 +0 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_model.json +1 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_test_names.csv +143 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_test_x.csv +144 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_test_y.csv +144 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_train_names.csv +513 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_train_x.csv +514 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_train_y.csv +514 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_val_names.csv +143 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_val_x.csv +58 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_val_y.csv +58 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_vars.csv +154 -0
- molSimplify/tf_nn/oxocatalysis/hat_model.h5 +0 -0
- molSimplify/tf_nn/oxocatalysis/hat_model.json +1 -0
- molSimplify/tf_nn/oxocatalysis/hat_test_names.csv +419 -0
- molSimplify/tf_nn/oxocatalysis/hat_test_x.csv +420 -0
- molSimplify/tf_nn/oxocatalysis/hat_test_y.csv +420 -0
- molSimplify/tf_nn/oxocatalysis/hat_train_names.csv +1507 -0
- molSimplify/tf_nn/oxocatalysis/hat_train_x.csv +1508 -0
- molSimplify/tf_nn/oxocatalysis/hat_train_y.csv +1508 -0
- molSimplify/tf_nn/oxocatalysis/hat_val_x.csv +169 -0
- molSimplify/tf_nn/oxocatalysis/hat_val_y.csv +169 -0
- molSimplify/tf_nn/oxocatalysis/hat_vars.csv +162 -0
- molSimplify/tf_nn/oxocatalysis/oxo_model.h5 +0 -0
- molSimplify/tf_nn/oxocatalysis/oxo_model.json +1 -0
- molSimplify/tf_nn/oxocatalysis/oxo_test_names.csv +527 -0
- molSimplify/tf_nn/oxocatalysis/oxo_test_x.csv +528 -0
- molSimplify/tf_nn/oxocatalysis/oxo_test_y.csv +528 -0
- molSimplify/tf_nn/oxocatalysis/oxo_train_names.csv +1897 -0
- molSimplify/tf_nn/oxocatalysis/oxo_train_x.csv +1898 -0
- molSimplify/tf_nn/oxocatalysis/oxo_train_y.csv +1898 -0
- molSimplify/tf_nn/oxocatalysis/oxo_val_x.csv +212 -0
- molSimplify/tf_nn/oxocatalysis/oxo_val_y.csv +212 -0
- molSimplify/tf_nn/oxocatalysis/oxo_vars.csv +162 -0
- molSimplify/tf_nn/rescaling_data/gap_mean_x.csv +153 -0
- molSimplify/tf_nn/rescaling_data/gap_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/gap_var_x.csv +153 -0
- molSimplify/tf_nn/rescaling_data/gap_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/geo_static_clf_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/geo_static_clf_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/geo_static_clf_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/geo_static_clf_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/hat_mean_x.csv +162 -0
- molSimplify/tf_nn/rescaling_data/hat_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/hat_var_x.csv +162 -0
- molSimplify/tf_nn/rescaling_data/hat_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/homo_empty_mean_x.csv +155 -0
- molSimplify/tf_nn/rescaling_data/homo_empty_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/homo_empty_var_x.csv +155 -0
- molSimplify/tf_nn/rescaling_data/homo_empty_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/homo_mean_x.csv +153 -0
- molSimplify/tf_nn/rescaling_data/homo_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/homo_var_x.csv +153 -0
- molSimplify/tf_nn/rescaling_data/homo_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/hs_ii_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/hs_ii_mean_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/hs_ii_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/hs_ii_var_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/hs_iii_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/hs_iii_mean_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/hs_iii_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/hs_iii_var_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/ls_ii_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/ls_ii_mean_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/ls_ii_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/ls_ii_var_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/ls_iii_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/ls_iii_mean_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/ls_iii_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/ls_iii_var_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/oxo20_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/oxo20_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/oxo20_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/oxo20_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/oxo_mean_x.csv +162 -0
- molSimplify/tf_nn/rescaling_data/oxo_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/oxo_var_x.csv +162 -0
- molSimplify/tf_nn/rescaling_data/oxo_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/sc_static_clf_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/sc_static_clf_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/sc_static_clf_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/sc_static_clf_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/split_mean_x.csv +155 -0
- molSimplify/tf_nn/rescaling_data/split_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/split_var_x.csv +155 -0
- molSimplify/tf_nn/rescaling_data/split_var_y.csv +1 -0
- molSimplify/tf_nn/sc_static_clf/sc_static_clf_model.h5 +0 -0
- molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_name.csv +1591 -0
- molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_x.csv +1592 -0
- molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_y.csv +1592 -0
- molSimplify/tf_nn/sc_static_clf/sc_static_clf_vars.csv +154 -0
- molSimplify/tf_nn/split/split_model.h5 +0 -0
- molSimplify/tf_nn/split/split_model.json +1 -0
- molSimplify/tf_nn/split/split_vars.csv +155 -0
- molSimplify/tf_nn/split/split_x.csv +1902 -0
- molSimplify/tf_nn/split/split_y.csv +1902 -0
- molSimplify/tf_nn/split/train_names.csv +1901 -0
- molSimplify/utils/__init__.py +0 -0
- molSimplify/utils/decorators.py +16 -0
- molSimplify/utils/metaclasses.py +12 -0
- molSimplify/utils/tensorflow.py +23 -0
- molSimplify/utils/timer.py +16 -0
- molSimplify-1.7.4.dist-info/LICENSE +674 -0
- molSimplify-1.7.4.dist-info/METADATA +821 -0
- molSimplify-1.7.4.dist-info/RECORD +651 -0
- molSimplify-1.7.4.dist-info/WHEEL +5 -0
- molSimplify-1.7.4.dist-info/entry_points.txt +3 -0
- molSimplify-1.7.4.dist-info/top_level.txt +4 -0
- tests/generateTests.py +122 -0
- tests/helperFuncs.py +658 -0
- tests/informatics/test_MOF_descriptors.py +128 -0
- tests/informatics/test_active_learning.py +113 -0
- tests/informatics/test_coulomb_analyze.py +24 -0
- tests/informatics/test_graph_racs.py +193 -0
- tests/ml/test_kernels.py +20 -0
- tests/ml/test_layers.py +47 -0
- tests/runtest.py +10 -0
- tests/test_Mol2D.py +128 -0
- tests/test_basic_imports.py +62 -0
- tests/test_bidentate.py +25 -0
- tests/test_cli.py +20 -0
- tests/test_distgeom.py +106 -0
- tests/test_example_1.py +29 -0
- tests/test_example_3.py +31 -0
- tests/test_example_5.py +43 -0
- tests/test_example_7.py +28 -0
- tests/test_example_8.py +15 -0
- tests/test_example_tbp.py +15 -0
- tests/test_ff_xtb.py +111 -0
- tests/test_geocheck_oct.py +26 -0
- tests/test_geocheck_one_empty.py +15 -0
- tests/test_geometry.py +44 -0
- tests/test_inparse.py +76 -0
- tests/test_io.py +84 -0
- tests/test_jobgen.py +84 -0
- tests/test_joption_pythonic.py +27 -0
- tests/test_ligand_assign.py +58 -0
- tests/test_ligand_assign_consistent.py +60 -0
- tests/test_ligand_class.py +26 -0
- tests/test_ligand_from_mol_file.py +35 -0
- tests/test_ligands.py +86 -0
- tests/test_mol3D.py +337 -0
- tests/test_molcas_caspt2.py +15 -0
- tests/test_molcas_casscf.py +15 -0
- tests/test_old_ANNs.py +68 -0
- tests/test_orca_ccsdt.py +15 -0
- tests/test_orca_dft.py +15 -0
- tests/test_qcgen.py +50 -0
- tests/test_racs.py +124 -0
- tests/test_rmsd.py +68 -0
- tests/test_structgen_functions.py +198 -0
- tests/test_tetrahedral.py +29 -0
- tests/test_tutorial_10_part_one.py +16 -0
- tests/test_tutorial_10_part_two.py +15 -0
- tests/test_tutorial_2.py +11 -0
- tests/test_tutorial_3.py +15 -0
- tests/test_tutorial_4.py +57 -0
- tests/test_tutorial_6.py +10 -0
- tests/test_tutorial_8.py +29 -0
- tests/test_tutorial_9_part_one.py +15 -0
- tests/test_tutorial_9_part_two.py +15 -0
- tests/test_tutorial_qm9_part_one.py +6 -0
- tests/testresources/refs/racs/generate_references.py +85 -0
- workflows/NandyJACSAu2022/bridge_functionalizer.py +253 -0
- workflows/NandyJACSAu2022/frag_functionalizer.py +242 -0
- workflows/NandyJACSAu2022/fragment_classes.py +586 -0
- workflows/NandyJACSAu2022/macrocycle_synthesis.py +179 -0
|
@@ -0,0 +1,895 @@
|
|
|
1
|
+
from molSimplify.Classes.mol3D import mol3D
|
|
2
|
+
from molSimplify.Classes.atom3D import atom3D
|
|
3
|
+
from molSimplify.Informatics.MOF.PBC_functions import (
|
|
4
|
+
compute_adj_matrix,
|
|
5
|
+
compute_distance_matrix3,
|
|
6
|
+
fractional2cart,
|
|
7
|
+
get_closed_subgraph,
|
|
8
|
+
include_extra_shells,
|
|
9
|
+
ligand_detect,
|
|
10
|
+
linker_length,
|
|
11
|
+
mkcell,
|
|
12
|
+
readcif,
|
|
13
|
+
returnXYZandGraph,
|
|
14
|
+
slice_mat,
|
|
15
|
+
write2file,
|
|
16
|
+
writeXYZandGraph,
|
|
17
|
+
XYZ_connected,
|
|
18
|
+
)
|
|
19
|
+
from molSimplify.Scripts.cellbuilder_tools import import_from_cif
|
|
20
|
+
import numpy as np
|
|
21
|
+
from scipy import sparse
|
|
22
|
+
import networkx as nx
|
|
23
|
+
import copy
|
|
24
|
+
import itertools
|
|
25
|
+
import os
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def periodic_checker(graph, coords):
|
|
29
|
+
"""
|
|
30
|
+
Checks if a graph is periodic or not.
|
|
31
|
+
This does the same task as molSimplify.Informatics.MOF.MOF_descriptors.detect_1D_rod, but in a different way.
|
|
32
|
+
|
|
33
|
+
Parameters
|
|
34
|
+
----------
|
|
35
|
+
graph : numpy.matrix
|
|
36
|
+
Adjacency matrix. Shape is (number of atoms, number of atoms).
|
|
37
|
+
coords : list of list of float
|
|
38
|
+
Cartesian coordinates of atoms. Length of the outer list is the number of atoms, while each inner list is length 3.
|
|
39
|
+
|
|
40
|
+
Returns
|
|
41
|
+
-------
|
|
42
|
+
periodic : bool
|
|
43
|
+
Whether or not a graph is periodic.
|
|
44
|
+
|
|
45
|
+
"""
|
|
46
|
+
from scipy.sparse import csgraph
|
|
47
|
+
csg = csgraph.csgraph_from_dense(graph)
|
|
48
|
+
x, y = csg.nonzero()
|
|
49
|
+
maxdist = 0
|
|
50
|
+
periodic = False
|
|
51
|
+
for row1, row2 in zip(x, y):
|
|
52
|
+
a = np.array(coords[row1])
|
|
53
|
+
b = np.array(coords[row2])
|
|
54
|
+
dist = np.linalg.norm(a-b)
|
|
55
|
+
if dist > maxdist:
|
|
56
|
+
maxdist = dist
|
|
57
|
+
# If any connected atoms are more than four angstroms apart, they are very likely to be offset by a cell vector. Periodic.
|
|
58
|
+
if maxdist > 4:
|
|
59
|
+
periodic = True
|
|
60
|
+
return periodic
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def branch(molcif, main_paths, atoms_in_sbu, new_atoms=None):
|
|
64
|
+
"""
|
|
65
|
+
Climbs out from a given atom and adds the atoms that are in the branch.
|
|
66
|
+
This is important for getting all atoms in a branched functional group of a linker.
|
|
67
|
+
|
|
68
|
+
Parameters
|
|
69
|
+
----------
|
|
70
|
+
molcif : molSimplify.Classes.mol3D.mol3D
|
|
71
|
+
The cell of the cif file being analyzed.
|
|
72
|
+
main_paths : list of int
|
|
73
|
+
Indices of main path atoms (atoms that are part of a linker).
|
|
74
|
+
atoms_in_sbu : list of numpy.int64
|
|
75
|
+
Indices of atoms in the SBU.
|
|
76
|
+
new_atoms : list of numpy.int64
|
|
77
|
+
Indices of new atoms to be included.
|
|
78
|
+
|
|
79
|
+
Returns
|
|
80
|
+
-------
|
|
81
|
+
new_atoms : list of numpy.int64
|
|
82
|
+
Indices of new atoms to be included.
|
|
83
|
+
atoms_in_sbu : list of numpy.int64
|
|
84
|
+
Indices of atoms in the SBU.
|
|
85
|
+
|
|
86
|
+
"""
|
|
87
|
+
if new_atoms is None:
|
|
88
|
+
new_atoms = []
|
|
89
|
+
original_atoms = atoms_in_sbu.copy()
|
|
90
|
+
for atom in new_atoms:
|
|
91
|
+
bonded_list = molcif.getBondedAtoms(atom)
|
|
92
|
+
if (len(set(bonded_list)-set(main_paths)-set(atoms_in_sbu)) > 0):
|
|
93
|
+
new_atoms += list(set(bonded_list)-set(main_paths))
|
|
94
|
+
new_atoms = list(set(new_atoms))
|
|
95
|
+
atoms_in_sbu += new_atoms
|
|
96
|
+
if len(original_atoms) == len(atoms_in_sbu):
|
|
97
|
+
return new_atoms, atoms_in_sbu
|
|
98
|
+
else:
|
|
99
|
+
branch_atoms, branch_atoms_in_sbu = branch(molcif, main_paths, atoms_in_sbu, new_atoms)
|
|
100
|
+
new_atoms += branch_atoms
|
|
101
|
+
atoms_in_sbu += branch_atoms_in_sbu
|
|
102
|
+
return new_atoms, atoms_in_sbu
|
|
103
|
+
|
|
104
|
+
def identify_main_chain(temp_mol, link_list):
|
|
105
|
+
"""
|
|
106
|
+
Identifies the atom that are directly present from one
|
|
107
|
+
connecting point to another. Identifies cases that can be functional groups.
|
|
108
|
+
|
|
109
|
+
Parameters
|
|
110
|
+
----------
|
|
111
|
+
temp_mol : molSimplify.Classes.mol3D.mol3D
|
|
112
|
+
mol3D of a linker.
|
|
113
|
+
link_list : list of int
|
|
114
|
+
The indices of the anchoring atoms of the linker.
|
|
115
|
+
|
|
116
|
+
Returns
|
|
117
|
+
-------
|
|
118
|
+
main : list of int
|
|
119
|
+
Any atoms that lie on the path between two connection points.
|
|
120
|
+
shortest : int
|
|
121
|
+
The shortest path length between two anchoring atoms.
|
|
122
|
+
longest : int
|
|
123
|
+
The longest path length between two anchoring atoms.
|
|
124
|
+
|
|
125
|
+
"""
|
|
126
|
+
G = nx.from_numpy_matrix(temp_mol.graph)
|
|
127
|
+
pairs = []
|
|
128
|
+
shortest = 0
|
|
129
|
+
longest = 0
|
|
130
|
+
if len(link_list) == 1:
|
|
131
|
+
main = list(G.nodes)
|
|
132
|
+
shortest = 1
|
|
133
|
+
longest = 1
|
|
134
|
+
return main, shortest, longest
|
|
135
|
+
else:
|
|
136
|
+
for a, b in itertools.combinations(link_list, 2):
|
|
137
|
+
pair = (a, b)
|
|
138
|
+
pairs.append(pair)
|
|
139
|
+
shorts = []
|
|
140
|
+
for i in pairs:
|
|
141
|
+
short = list(nx.shortest_path(G, source=i[0], target=i[1]))
|
|
142
|
+
shorts.append(short)
|
|
143
|
+
shortest, longest = min([len(short) for short in shorts]), max([len(short) for short in shorts])
|
|
144
|
+
paths = list(itertools.chain(*shorts))
|
|
145
|
+
min_cycles = (nx.minimum_cycle_basis(G)) # gets all closed rings in graph
|
|
146
|
+
min_cycles_copy = min_cycles.copy()
|
|
147
|
+
min_cycles_copy_2 = []
|
|
148
|
+
paths_copy = paths.copy()
|
|
149
|
+
while len(min_cycles_copy) != len(min_cycles_copy_2):
|
|
150
|
+
min_cycles_copy_2 = min_cycles_copy.copy()
|
|
151
|
+
for i in min_cycles:
|
|
152
|
+
paths = paths_copy.copy()
|
|
153
|
+
if set(paths) & set(i):
|
|
154
|
+
# I believe this identifies potential functional groups.
|
|
155
|
+
# Identifies and adds minimum cycles that have atoms in common with any shortest anchoring atom to anchoring atom path.
|
|
156
|
+
if not set(i).issubset(set((paths))):
|
|
157
|
+
paths_copy += set(i)
|
|
158
|
+
min_cycles_copy.remove(i)
|
|
159
|
+
|
|
160
|
+
main = paths
|
|
161
|
+
return main, shortest, longest
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def get_molcif_cycles_no_metal(molcif):
|
|
165
|
+
"""
|
|
166
|
+
Makes the graph and get all cycles in the graph.
|
|
167
|
+
|
|
168
|
+
Parameters
|
|
169
|
+
----------
|
|
170
|
+
molcif : molSimplify.Classes.mol3D.mol3D
|
|
171
|
+
The cell of the cif file being analyzed.
|
|
172
|
+
|
|
173
|
+
Returns
|
|
174
|
+
-------
|
|
175
|
+
subcycle_list : list of list of int
|
|
176
|
+
The individual subcycles. Each inner list is a subcycle.
|
|
177
|
+
flat_subcycle_list : list of int
|
|
178
|
+
Flattened list of subcycle atoms (indices).
|
|
179
|
+
|
|
180
|
+
"""
|
|
181
|
+
G=nx.from_numpy_matrix(molcif.graph)
|
|
182
|
+
cycles = nx.minimum_cycle_basis(G) # gets all closed rings in graph
|
|
183
|
+
subcycle_list = []
|
|
184
|
+
for cycle in cycles:
|
|
185
|
+
skip_row = False
|
|
186
|
+
for element in cycle:
|
|
187
|
+
# don't include any cycles with metal in it
|
|
188
|
+
# This is necessary to not get malformed cycles.
|
|
189
|
+
if molcif.getAtom(element).ismetal():
|
|
190
|
+
skip_row = True
|
|
191
|
+
break
|
|
192
|
+
if not skip_row:
|
|
193
|
+
subcycle_list.append(cycle)
|
|
194
|
+
# Flatten list to contain all atoms in subcycles
|
|
195
|
+
flat_subcycle_list = [item for sublist in subcycle_list for item in sublist]
|
|
196
|
+
return subcycle_list, flat_subcycle_list
|
|
197
|
+
|
|
198
|
+
def breakdown_MOF(SBUlist, SBU_subgraph, molcif, name, cell, anchoring_atoms, sbupath=False, connections_list=False, connections_subgraphlist=False, linkerpath=False):
|
|
199
|
+
"""
|
|
200
|
+
Writes SBU and linker XYZ files.
|
|
201
|
+
|
|
202
|
+
Output codes are as follows:
|
|
203
|
+
2: There exist short (i.e. 2 atom) and longer linkers. We could not split the MOF apart consistently.
|
|
204
|
+
4: The MOF contains a 1D rod, which cannot be easily reassembled into a new MOF.
|
|
205
|
+
None: The MOF was split correctly
|
|
206
|
+
|
|
207
|
+
Parameters
|
|
208
|
+
----------
|
|
209
|
+
SBUlist : list of list of numpy.int64
|
|
210
|
+
Each inner list is its own separate SBU. The ints are the atom indices of that SBU. Length is # of SBUs.
|
|
211
|
+
SBU_subgraph : list of scipy.sparse.csr.csr_matrix
|
|
212
|
+
The atom connections in the SBU subgraph. Length is # of SBUs.
|
|
213
|
+
molcif : molSimplify.Classes.mol3D.mol3D
|
|
214
|
+
The cell of the cif file being analyzed.
|
|
215
|
+
name : str
|
|
216
|
+
The name of the cif being analyzed.
|
|
217
|
+
cell : numpy.ndarray
|
|
218
|
+
The three Cartesian vectors representing the edges of the crystal cell. Shape is (3,3).
|
|
219
|
+
anchoring_atoms : set of numpy.int64
|
|
220
|
+
The indices of the anchoring atoms of the linkers.
|
|
221
|
+
sbupath : str
|
|
222
|
+
The path to which the SBU XYZ files will be written.
|
|
223
|
+
connections_list : list of list of int
|
|
224
|
+
Each inner list is its own separate linker. The ints are the atom indices of that linker. Length is # of linkers.
|
|
225
|
+
connections_subgraphlist : list of numpy.matrix
|
|
226
|
+
The atom connections in the linker subgraph. Length is # of linkers.
|
|
227
|
+
linkerpath : str
|
|
228
|
+
The path to which the linker XYZ files will be written.
|
|
229
|
+
|
|
230
|
+
Returns
|
|
231
|
+
-------
|
|
232
|
+
None
|
|
233
|
+
|
|
234
|
+
"""
|
|
235
|
+
n_sbu = len(SBUlist)
|
|
236
|
+
all_SBU_atoms = []
|
|
237
|
+
all_SBU_X_atoms = []
|
|
238
|
+
|
|
239
|
+
# make the graph and get all cycles in the graph
|
|
240
|
+
# return the flattened list of the subcycle atoms
|
|
241
|
+
# The subcycle list contains all of the individual subcycles (if they need to be compared)
|
|
242
|
+
subcycle_list, flat_subcycle_list = get_molcif_cycles_no_metal(molcif)
|
|
243
|
+
|
|
244
|
+
'''
|
|
245
|
+
Loop over all SBUs as identified by subgraphs. Then create the mol3Ds for each SBU.
|
|
246
|
+
'''
|
|
247
|
+
for i, SBU in enumerate(SBUlist):
|
|
248
|
+
# For a given SBU, make a list of main paths. This contains atoms that are part of the linker.
|
|
249
|
+
main_paths = []
|
|
250
|
+
linker_length_dict = {}
|
|
251
|
+
current_longest = 0
|
|
252
|
+
for j, linker in enumerate(connections_list):
|
|
253
|
+
# For each SBU and linker combo, make a mol3D define the linklist for that linker
|
|
254
|
+
linker_mol = mol3D()
|
|
255
|
+
# keep track of added atoms
|
|
256
|
+
linker_added = []
|
|
257
|
+
link_list = []
|
|
258
|
+
linker_dict = {}
|
|
259
|
+
for jj, val2 in enumerate(linker):
|
|
260
|
+
# add anchoring atom to link list. Val2 has molcif numbering
|
|
261
|
+
linker_dict[jj] = val2
|
|
262
|
+
if val2 in anchoring_atoms:
|
|
263
|
+
link_list.append(jj)
|
|
264
|
+
# This builds a mol object for the linker --> even though it is in the SBU section.
|
|
265
|
+
if not (val2 in linker_added):
|
|
266
|
+
linker_mol.addAtom(molcif.getAtom(val2))
|
|
267
|
+
linker_added.append(val2)
|
|
268
|
+
linker_mol.graph = connections_subgraphlist[j]
|
|
269
|
+
# This identifies anything on the simple path from end to end
|
|
270
|
+
main, shortest, longest = identify_main_chain(linker_mol, link_list)
|
|
271
|
+
if longest > current_longest:
|
|
272
|
+
current_longest = longest
|
|
273
|
+
# Currently, main is in linker indices. Get them back in molcif indices.
|
|
274
|
+
# This is the main chain for a given linker.
|
|
275
|
+
main = [linker_dict[val] for val in main]
|
|
276
|
+
main_paths.extend(main)
|
|
277
|
+
min_length, max_length = linker_length(connections_subgraphlist[j],link_list)
|
|
278
|
+
linker_length_j = max(min_length, max_length)
|
|
279
|
+
# Make a dictionary that will identify the linker length and atoms in the linker by the linker number
|
|
280
|
+
linker_length_dict[j] = {'length':linker_length_j, 'atoms':linker, 'longest':longest}
|
|
281
|
+
if current_longest <= 2:
|
|
282
|
+
return 2
|
|
283
|
+
# put all main path atoms into the main path list
|
|
284
|
+
main_paths = list(set(main_paths))
|
|
285
|
+
SBU_mol = mol3D()
|
|
286
|
+
|
|
287
|
+
# This list keeps track of if an atom has been added to the SBU
|
|
288
|
+
SBU_added = []
|
|
289
|
+
# This dictionary keeps a mapping between molcif indices and SBU_mol indices
|
|
290
|
+
SBU_dict = {}
|
|
291
|
+
# Keeps track of the branches off of a linker for instance.
|
|
292
|
+
branches = []
|
|
293
|
+
# Keeps track of the atoms bonded to a cycle.
|
|
294
|
+
bonded_atoms_to_cycle = []
|
|
295
|
+
# Tuple list keeps track of the atoms that are coordinated to X atoms
|
|
296
|
+
tuple_list_sbu = []
|
|
297
|
+
# Keep track of the indices that should be the X atoms
|
|
298
|
+
atoms_that_are_X = []
|
|
299
|
+
X_checked_list = []
|
|
300
|
+
# Make an atom3D list of the X atoms. These atoms should be added to the end of the XYZ.
|
|
301
|
+
X_atom3D_list = []
|
|
302
|
+
for val in SBU:
|
|
303
|
+
# make SBU mol, add new atom if never added before.
|
|
304
|
+
if val not in SBU_added:
|
|
305
|
+
SBU_mol.addAtom(molcif.getAtom(val))
|
|
306
|
+
# Create a mapping between the molcif indices (values) and the SBUmol indices (keys)
|
|
307
|
+
SBU_dict[SBU_mol.natoms-1] = val
|
|
308
|
+
SBU_added.append(val)
|
|
309
|
+
# Check if any of the atoms added to the SBU are part of a cycle. Checks overlap between first
|
|
310
|
+
# two coordination shells and any rings in the SBU.
|
|
311
|
+
in_cycles = any([val in cycle for cycle in subcycle_list])
|
|
312
|
+
if in_cycles:
|
|
313
|
+
# Some atoms overlap with the cycles that are formally part of a linker.
|
|
314
|
+
cycles_with_overlap = []
|
|
315
|
+
for cycle in subcycle_list:
|
|
316
|
+
if val in cycle:
|
|
317
|
+
if cycle not in cycles_with_overlap:
|
|
318
|
+
cycles_with_overlap.append(cycle)
|
|
319
|
+
temp_bonded_list = []
|
|
320
|
+
for cycle_val in cycle:
|
|
321
|
+
temp_bonded = molcif.getBondedAtoms(cycle_val)
|
|
322
|
+
temp_bonded = list(set(temp_bonded)-set(cycle))
|
|
323
|
+
temp_bonded_list.extend(temp_bonded)
|
|
324
|
+
if cycle_val not in SBU_added:
|
|
325
|
+
SBU_mol.addAtom(molcif.getAtom(cycle_val))
|
|
326
|
+
SBU_dict[SBU_mol.natoms-1] = cycle_val
|
|
327
|
+
SBU_added.append(cycle_val)
|
|
328
|
+
bonded_atoms_to_cycle.append(temp_bonded_list)
|
|
329
|
+
# Check how many atoms are branched
|
|
330
|
+
additional_branched_atoms,_ = branch(molcif, main_paths, SBU_added.copy(), [val])
|
|
331
|
+
for branched_atom in additional_branched_atoms:
|
|
332
|
+
if branched_atom not in SBU_added:
|
|
333
|
+
SBU_mol.addAtom(molcif.getAtom(branched_atom))
|
|
334
|
+
SBU_dict[SBU_mol.natoms-1] = branched_atom
|
|
335
|
+
SBU_added.append(branched_atom)
|
|
336
|
+
if len(bonded_atoms_to_cycle)>1:
|
|
337
|
+
new_bonded_atoms_to_cycle = []
|
|
338
|
+
# Don't let things that are part of another cycle be included here
|
|
339
|
+
for bonded_atoms_to_indiv_cycle in bonded_atoms_to_cycle:
|
|
340
|
+
new_bonded_atoms_to_cycle.append(list(set(bonded_atoms_to_indiv_cycle)-set(flat_subcycle_list)))
|
|
341
|
+
combos = itertools.combinations(new_bonded_atoms_to_cycle, 2)
|
|
342
|
+
for comboval in combos:
|
|
343
|
+
if comboval[0] == comboval[1]:
|
|
344
|
+
continue
|
|
345
|
+
intersection = list(set(comboval[0])&set(comboval[1]))
|
|
346
|
+
if len(intersection)>0:
|
|
347
|
+
for comboval_intersection in intersection:
|
|
348
|
+
if comboval_intersection not in SBU_added:
|
|
349
|
+
SBU_mol.addAtom(molcif.getAtom(comboval_intersection))
|
|
350
|
+
SBU_dict[SBU_mol.natoms-1] = comboval_intersection
|
|
351
|
+
SBU_added.append(comboval_intersection)
|
|
352
|
+
intersection_atoms = list(set.intersection(*map(set,bonded_atoms_to_cycle)))
|
|
353
|
+
for intersection_atom in intersection_atoms:
|
|
354
|
+
if intersection_atom not in SBU_added:
|
|
355
|
+
SBU_mol.addAtom(molcif.getAtom(intersection_atom))
|
|
356
|
+
SBU_dict[SBU_mol.natoms-1] = intersection_atom
|
|
357
|
+
SBU_added.append(intersection_atom)
|
|
358
|
+
for SBU_added_atoms in SBU_added.copy():
|
|
359
|
+
bonded_atoms = molcif.getBondedAtoms(SBU_added_atoms)
|
|
360
|
+
for bonded_atom in bonded_atoms:
|
|
361
|
+
if molcif.getAtom(bonded_atom).symbol() == 'H':
|
|
362
|
+
if bonded_atom not in SBU_added:
|
|
363
|
+
SBU_mol.addAtom(molcif.getAtom(bonded_atom))
|
|
364
|
+
SBU_dict[SBU_mol.natoms-1] = bonded_atom
|
|
365
|
+
SBU_added.append(bonded_atom)
|
|
366
|
+
if (bonded_atom in main_paths) and (not ((bonded_atom in SBU_added) or (bonded_atom in X_checked_list))):
|
|
367
|
+
temp_atom = molcif.getAtom(bonded_atom)
|
|
368
|
+
temp_atom_coords = temp_atom.coords()
|
|
369
|
+
new_atom = atom3D(Sym='X', xyz=temp_atom_coords.copy())
|
|
370
|
+
X_atom3D_list.append((new_atom, bonded_atom, SBU_added_atoms))
|
|
371
|
+
X_checked_list.append(bonded_atom)
|
|
372
|
+
|
|
373
|
+
final_X_indices = []
|
|
374
|
+
for X_atom in X_atom3D_list:
|
|
375
|
+
if X_atom[1] in SBU_added:
|
|
376
|
+
continue
|
|
377
|
+
else:
|
|
378
|
+
SBU_added.append(X_atom[1])
|
|
379
|
+
SBU_mol.addAtom(X_atom[0])
|
|
380
|
+
SBU_dict[SBU_mol.natoms-1] = X_atom[1]
|
|
381
|
+
tuple_list_sbu.append((SBU_mol.natoms-1, X_atom[2]))
|
|
382
|
+
final_X_indices.append(SBU_mol.natoms-1)
|
|
383
|
+
atoms_that_are_X.append(X_atom[1])
|
|
384
|
+
SBU_added_no_X = list(set(SBU_added)-set(atoms_that_are_X))
|
|
385
|
+
inv_SBU_dict = {v: k for k, v in SBU_dict.items()}
|
|
386
|
+
tempgraph = molcif.graph[np.ix_(SBU_added, SBU_added)]
|
|
387
|
+
no_X_graph = molcif.graph[np.ix_(SBU_added_no_X, SBU_added_no_X)]
|
|
388
|
+
SBU_mol.graph = tempgraph
|
|
389
|
+
SBU_mol_cart_coords = np.array([atom.coords() for atom in SBU_mol.atoms])
|
|
390
|
+
SBU_mol_atom_labels =[atom.sym for atom in SBU_mol.atoms]
|
|
391
|
+
SBU_mol_adj_mat = np.array(SBU_mol.graph)
|
|
392
|
+
|
|
393
|
+
SBU_mol_fcoords_connected = XYZ_connected(cell, SBU_mol_cart_coords, SBU_mol_adj_mat)
|
|
394
|
+
coord_list, molgraph = returnXYZandGraph(None, SBU_mol_atom_labels, cell, SBU_mol_fcoords_connected, SBU_mol_adj_mat)
|
|
395
|
+
for r in range(SBU_mol.natoms):
|
|
396
|
+
SBU_mol.getAtom(r).setcoords(coord_list[r])
|
|
397
|
+
for val in tuple_list_sbu:
|
|
398
|
+
SBU_mol.BCM(val[0],inv_SBU_dict[val[1]],0.75)
|
|
399
|
+
new_coords = [[float(val2) for val2 in val.split()[1:]] for val in SBU_mol.coords().split('\n')[2:-1]]
|
|
400
|
+
is_periodic = periodic_checker(tempgraph, new_coords)
|
|
401
|
+
# if is_periodic is true, the SBU is periodic in nature --> 1D rod.
|
|
402
|
+
|
|
403
|
+
###### WRITE THE SBU MOL TO THE PLACE
|
|
404
|
+
if sbupath and not os.path.exists(sbupath+"/"+str(name)+str(i)+'.xyz'):
|
|
405
|
+
if is_periodic:
|
|
406
|
+
xyzname = sbupath+"/"+str(name)+"_sbu1Drod_"+str(i)+".xyz"
|
|
407
|
+
else:
|
|
408
|
+
xyzname = sbupath+"/"+str(name)+"_sbu_"+str(i)+".xyz"
|
|
409
|
+
|
|
410
|
+
if len(final_X_indices)>0:
|
|
411
|
+
X_string = ' '.join([str(val) for val in final_X_indices])
|
|
412
|
+
else:
|
|
413
|
+
X_string = ' '
|
|
414
|
+
coord_list, molgraph = returnXYZandGraph(xyzname, SBU_mol_atom_labels, cell, SBU_mol_fcoords_connected, SBU_mol_adj_mat)
|
|
415
|
+
SBU_mol.writexyz(xyzname, withgraph=True, specialheader=' '+X_string)
|
|
416
|
+
all_SBU_atoms.extend(SBU_added)
|
|
417
|
+
if '1Drod' in xyzname:
|
|
418
|
+
# if SBU is a 1D rod, end it here
|
|
419
|
+
return 4
|
|
420
|
+
atoms_to_be_deleted_from_linker = list(set(all_SBU_atoms))
|
|
421
|
+
for i, linker in enumerate(connections_list):
|
|
422
|
+
linker_mol = mol3D()
|
|
423
|
+
# This list keeps track of if an atom has been added to the SBU
|
|
424
|
+
linker_added = []
|
|
425
|
+
# This dictionary keeps a mapping between molcif indices and SBU_mol indices
|
|
426
|
+
linker_dict = {}
|
|
427
|
+
# Tuple list keeps track of the atoms that are coordinated to X atoms
|
|
428
|
+
tuple_list_linker = []
|
|
429
|
+
# Keep track of the indices that should be the X atoms
|
|
430
|
+
atoms_that_are_X_linker = []
|
|
431
|
+
X_checked_list_linker = []
|
|
432
|
+
# Make an atom3D list of the X atoms. These atoms should be added to the end of the XYZ.
|
|
433
|
+
X_atom3D_list_linker = []
|
|
434
|
+
for val in linker.copy():
|
|
435
|
+
# loop over atoms in linker
|
|
436
|
+
if (val not in atoms_to_be_deleted_from_linker):
|
|
437
|
+
# if current atom should not be deleted (not X), add it.
|
|
438
|
+
linker_mol.addAtom(molcif.getAtom(val))
|
|
439
|
+
linker_added.append(val)
|
|
440
|
+
# keep mapping between linker and molcif
|
|
441
|
+
linker_dict[linker_mol.natoms-1] = val
|
|
442
|
+
current_atom = linker_mol.natoms-1
|
|
443
|
+
# get all of the atoms bonded to the original atom
|
|
444
|
+
for bonded_atom in molcif.getBondedAtoms(val):
|
|
445
|
+
# add the atom if it's in the SBU set
|
|
446
|
+
if (bonded_atom in all_SBU_atoms) and (bonded_atom not in linker_added):
|
|
447
|
+
linker_mol.addAtom(molcif.getAtom(bonded_atom))
|
|
448
|
+
linker_added.append(bonded_atom)
|
|
449
|
+
linker_dict[linker_mol.natoms-1] = bonded_atom
|
|
450
|
+
subatoms = molcif.getBondedAtoms(bonded_atom)
|
|
451
|
+
for subatom in subatoms:
|
|
452
|
+
if (subatom in atoms_to_be_deleted_from_linker) and (not ((subatom in linker_added) or (subatom in X_checked_list_linker))):
|
|
453
|
+
temp_atom_linker = molcif.getAtom(subatom)
|
|
454
|
+
temp_atom_coords_linker = temp_atom_linker.coords()
|
|
455
|
+
new_atom_linker = atom3D(Sym='X', xyz=temp_atom_coords_linker.copy())
|
|
456
|
+
X_atom3D_list_linker.append((new_atom_linker,subatom,bonded_atom))
|
|
457
|
+
X_checked_list.append(bonded_atom)
|
|
458
|
+
final_X_indices_linker = []
|
|
459
|
+
for X_atom_linker in X_atom3D_list_linker:
|
|
460
|
+
if X_atom_linker[1] in linker_added:
|
|
461
|
+
continue
|
|
462
|
+
else:
|
|
463
|
+
linker_added.append(X_atom_linker[1])
|
|
464
|
+
linker_mol.addAtom(X_atom_linker[0])
|
|
465
|
+
linker_dict[linker_mol.natoms-1] = X_atom_linker[1]
|
|
466
|
+
tuple_list_linker.append((linker_mol.natoms-1, X_atom_linker[2]))
|
|
467
|
+
final_X_indices_linker.append(linker_mol.natoms-1)
|
|
468
|
+
atoms_that_are_X_linker.append(X_atom_linker[1])
|
|
469
|
+
|
|
470
|
+
tempgraph = molcif.graph[np.ix_(linker_added, linker_added)]
|
|
471
|
+
linker_added_no_X = list(set(linker_added)-set(atoms_that_are_X_linker))
|
|
472
|
+
no_X_graph_linker = molcif.graph[np.ix_(linker_added_no_X, linker_added_no_X)]
|
|
473
|
+
linker_mol.graph = tempgraph
|
|
474
|
+
|
|
475
|
+
# make sure that the single graph is not multiple
|
|
476
|
+
n_components, labels_components = sparse.csgraph.connected_components(csgraph=no_X_graph_linker)
|
|
477
|
+
linker_mol_cart_coords = np.array([atom.coords() for atom in linker_mol.atoms])
|
|
478
|
+
linker_mol_atom_labels = [atom.sym for atom in linker_mol.atoms]
|
|
479
|
+
linker_mol_adj_mat = np.array(linker_mol.graph)
|
|
480
|
+
inv_linker_dict = {v: k for k, v in linker_dict.items()}
|
|
481
|
+
heavy_atom_count = linker_mol.count_atoms()
|
|
482
|
+
if (linker_mol.natoms == 0) or (n_components > 1) or (heavy_atom_count < 3):
|
|
483
|
+
continue
|
|
484
|
+
linker_mol_fcoords_connected = XYZ_connected(cell, linker_mol_cart_coords , linker_mol_adj_mat )
|
|
485
|
+
coord_list, molgraph = returnXYZandGraph(None , linker_mol_atom_labels , cell , linker_mol_fcoords_connected, linker_mol_adj_mat)
|
|
486
|
+
for r in range(linker_mol.natoms):
|
|
487
|
+
linker_mol.getAtom(r).setcoords(coord_list[r])
|
|
488
|
+
for val in tuple_list_linker:
|
|
489
|
+
linker_mol.BCM(val[0],inv_linker_dict[val[1]],0.75)
|
|
490
|
+
###### WRITE THE LINKER MOL TO THE PLACE
|
|
491
|
+
if linkerpath and not os.path.exists(linkerpath+"/"+str(name)+str(i)+".xyz"):
|
|
492
|
+
xyzname = linkerpath+"/"+str(name)+"_linker_"+str(i)+".xyz"
|
|
493
|
+
|
|
494
|
+
if len(final_X_indices_linker)>0:
|
|
495
|
+
X_string = ' '.join([str(val) for val in final_X_indices_linker])
|
|
496
|
+
else:
|
|
497
|
+
X_string = ' '
|
|
498
|
+
coord_list, molgraph = returnXYZandGraph(xyzname, linker_mol_atom_labels , cell , linker_mol_fcoords_connected, linker_mol_adj_mat)
|
|
499
|
+
linker_mol.writexyz(xyzname, withgraph=True, specialheader=' '+X_string)
|
|
500
|
+
return None
|
|
501
|
+
|
|
502
|
+
def prepare_initial_SBU(molcif, allatomtypes, metal_list, logpath, name):
|
|
503
|
+
"""
|
|
504
|
+
Prepares removelist and SBUlist, which indicate which atoms to remove from linkers and which atoms belong to SBUs.
|
|
505
|
+
|
|
506
|
+
Parameters
|
|
507
|
+
----------
|
|
508
|
+
molcif : molSimplify.Classes.mol3D.mol3D
|
|
509
|
+
The cell of the cif file being analyzed.
|
|
510
|
+
allatomtypes : list of str
|
|
511
|
+
The atom types of the cif file, indicated by periodic symbols like 'O' and 'Cu'. Length is the number of atoms.
|
|
512
|
+
metal_list : set of int
|
|
513
|
+
The indices of metal atoms in the mol3D.
|
|
514
|
+
logpath : str
|
|
515
|
+
The path to which log files are written.
|
|
516
|
+
name : str
|
|
517
|
+
The name of the cif being analyzed.
|
|
518
|
+
|
|
519
|
+
Returns
|
|
520
|
+
-------
|
|
521
|
+
removelist : set of int
|
|
522
|
+
The indices of atoms to remove.
|
|
523
|
+
SBUlist : set of numpy.int64
|
|
524
|
+
The indices of atoms in SBUs. removelist + 1st coordination shell of the metals
|
|
525
|
+
|
|
526
|
+
"""
|
|
527
|
+
SBUlist = set()
|
|
528
|
+
metal_list = set([at for at in molcif.findMetal(transition_metals_only=False)])
|
|
529
|
+
[SBUlist.update(set([metal])) for metal in molcif.findMetal(transition_metals_only=False)] # Remove all metals as part of the SBU
|
|
530
|
+
[SBUlist.update(set(molcif.getBondedAtomsSmart(metal))) for metal in molcif.findMetal(transition_metals_only=False)]
|
|
531
|
+
removelist = set()
|
|
532
|
+
[removelist.update(set([metal])) for metal in molcif.findMetal(transition_metals_only=False)] # Remove all metals as part of the SBU
|
|
533
|
+
for metal in removelist:
|
|
534
|
+
bonded_atoms = set(molcif.getBondedAtomsSmart(metal))
|
|
535
|
+
bonded_atoms_types = set([str(allatomtypes[at]) for at in set(molcif.getBondedAtomsSmart(metal))])
|
|
536
|
+
cn = len(bonded_atoms)
|
|
537
|
+
cn_atom = ",".join([at for at in bonded_atoms_types])
|
|
538
|
+
tmpstr = "atom %i with type of %s found to have %i coordinates with atom types of %s\n"%(metal, allatomtypes[metal], cn, cn_atom)
|
|
539
|
+
write2file(logpath, "/%s.log"%name, tmpstr)
|
|
540
|
+
[removelist.update(set([atom])) for atom in SBUlist if all((molcif.getAtom(val).ismetal() or molcif.getAtom(val).symbol().upper() == 'H') for val in molcif.getBondedAtomsSmart(atom))]
|
|
541
|
+
'''
|
|
542
|
+
adding hydrogens connected to atoms which are only connected to metals. In particular interstitial OH, like in UiO SBU.
|
|
543
|
+
'''
|
|
544
|
+
for atom in SBUlist:
|
|
545
|
+
for val in molcif.getBondedAtomsSmart(atom):
|
|
546
|
+
if molcif.getAtom(val).symbol().upper() == 'H':
|
|
547
|
+
removelist.update(set([val]))
|
|
548
|
+
return removelist, SBUlist
|
|
549
|
+
|
|
550
|
+
def identify_lc_atoms(molcif, removelist, metal_list):
|
|
551
|
+
"""
|
|
552
|
+
Returns linker information including the indices of atoms that anchor onto SBUs.
|
|
553
|
+
|
|
554
|
+
Parameters
|
|
555
|
+
----------
|
|
556
|
+
molcif : molSimplify.Classes.mol3D.mol3D
|
|
557
|
+
The cell of the cif file being analyzed.
|
|
558
|
+
removelist : set of int
|
|
559
|
+
The indices of atoms to remove, i.e. the SBU atoms.
|
|
560
|
+
metal_list : set of int
|
|
561
|
+
The indices of metal atoms in the mol3D.
|
|
562
|
+
|
|
563
|
+
Returns
|
|
564
|
+
-------
|
|
565
|
+
anc_atoms : set of numpy.int64
|
|
566
|
+
The indices of the anchoring atoms of the linkers.
|
|
567
|
+
linkers : set of int
|
|
568
|
+
The indices of linkers.
|
|
569
|
+
linker_list : list of list of int
|
|
570
|
+
Each inner list is its own separate linker. The ints are the atom indices of that linker. Length is # of linkers.
|
|
571
|
+
linker_subgraphlist : list of numpy.matrix
|
|
572
|
+
The atom connections in the linker subgraph. Length is # of linkers.
|
|
573
|
+
allatoms : set of int
|
|
574
|
+
The indices of all of the atoms in the MOF.
|
|
575
|
+
connections_list : list of list of int
|
|
576
|
+
Each inner list is its own separate linker. The ints are the atom indices of that linker. Length is # of linkers.
|
|
577
|
+
connections_subgraphlist : list of numpy.matrix
|
|
578
|
+
The atom connections in the linker subgraph. Length is # of linkers.
|
|
579
|
+
|
|
580
|
+
"""
|
|
581
|
+
allatoms = set(range(0, molcif.graph.shape[0]))
|
|
582
|
+
linkers = allatoms - removelist # Anything that is in the remove list (SBU) is removed, leaving linkers
|
|
583
|
+
# Use the atoms for linkers and the remove list, along with the original full unit cell graph to make the linker subgraphs
|
|
584
|
+
linker_list, linker_subgraphlist = get_closed_subgraph(linkers.copy(), removelist.copy(), molcif.graph)
|
|
585
|
+
# Next, we have to determine which atoms on the linkers are the connecting points to the SBU.
|
|
586
|
+
linker_length_list = [len(linker_val) for linker_val in linker_list]
|
|
587
|
+
adjmat = molcif.graph.copy()
|
|
588
|
+
connections_list = copy.deepcopy(linker_list)
|
|
589
|
+
connections_subgraphlist = copy.deepcopy(linker_subgraphlist)
|
|
590
|
+
'''
|
|
591
|
+
find all anchoring atoms on linkers and ligands (lc identification)
|
|
592
|
+
'''
|
|
593
|
+
anc_atoms = set()
|
|
594
|
+
for linker in linker_list:
|
|
595
|
+
for atom_linker in linker:
|
|
596
|
+
# We check from the graph if the anchor atom is bonded to a metal. If it is then it is an anchoring atom
|
|
597
|
+
bonded2atom = np.nonzero(molcif.graph[atom_linker,:])[1]
|
|
598
|
+
if set(bonded2atom) & metal_list:
|
|
599
|
+
anc_atoms.add(atom_linker)
|
|
600
|
+
# return the anchoring atoms, the atoms we leave as linkers
|
|
601
|
+
return anc_atoms, linkers, linker_list, linker_subgraphlist, allatoms, connections_list, connections_subgraphlist
|
|
602
|
+
|
|
603
|
+
def identify_short_linkers(molcif, initial_SBU_list, initial_SBU_subgraphlist, removelist, linkers, linker_list, linker_subgraphlist, adj_matrix, SBUlist, logpath, linkerpath, name, cell_v):
|
|
604
|
+
"""
|
|
605
|
+
Helps determine whether a MOF has long or short linkers.
|
|
606
|
+
|
|
607
|
+
Parameters
|
|
608
|
+
----------
|
|
609
|
+
molcif : molSimplify.Classes.mol3D.mol3D
|
|
610
|
+
The cell of the cif file being analyzed.
|
|
611
|
+
initial_SBU_list : list of list of numpy.int32
|
|
612
|
+
Each inner list is its own separate SBU. The ints are the atom indices of that SBU. Length is # of SBUs.
|
|
613
|
+
initial_SBU_subgraphlist : list of scipy.sparse.csr.csr_matrix
|
|
614
|
+
The atom connections in the SBU subgraph. Length is # of SBUs.
|
|
615
|
+
removelist : set of int
|
|
616
|
+
The indices of atoms to remove.
|
|
617
|
+
linkers : set of int
|
|
618
|
+
The indices of linkers.
|
|
619
|
+
linker_list : list of list of int
|
|
620
|
+
Each inner list is its own separate linker. The ints are the atom indices of that linker. Length is # of linkers.
|
|
621
|
+
linker_subgraphlist : list of numpy.matrix
|
|
622
|
+
The atom connections in the linker subgraph. Length is # of linkers.
|
|
623
|
+
adj_matrix : scipy.sparse.csr.csr_matrix
|
|
624
|
+
Adjacency matrix. 1 represents a bond, 0 represents no bond. Shape is (number of atoms, number of atoms).
|
|
625
|
+
SBUlist : set of numpy.int64
|
|
626
|
+
The indices of atoms in SBUs. removelist + 1st coordination shell of the metals
|
|
627
|
+
logpath : str
|
|
628
|
+
The path to which log files are written.
|
|
629
|
+
linkerpath : str
|
|
630
|
+
Path of the folder to make TXT files in.
|
|
631
|
+
name : str
|
|
632
|
+
The name of the cif being analyzed.
|
|
633
|
+
cell_v : numpy.ndarray
|
|
634
|
+
The three Cartesian vectors representing the edges of the crystal cell. Shape is (3,3).
|
|
635
|
+
|
|
636
|
+
Returns
|
|
637
|
+
-------
|
|
638
|
+
min_max_linker_length : int
|
|
639
|
+
The longest path length between two anchors in a linker.
|
|
640
|
+
long_ligands : bool
|
|
641
|
+
Indicates whether the linkers are short.
|
|
642
|
+
SBUlist : set of numpy.int64
|
|
643
|
+
The indices of atoms in SBUs. removelist + 1st coordination shell of the metals
|
|
644
|
+
removelist : set of int
|
|
645
|
+
The indices of atoms to remove.
|
|
646
|
+
linker_list : list of list of int
|
|
647
|
+
Each inner list is its own separate linker. The ints are the atom indices of that linker. Length is # of linkers.
|
|
648
|
+
linker_subgraphlist : list of numpy.matrix
|
|
649
|
+
The atom connections in the linker subgraph. Length is # of linkers.
|
|
650
|
+
|
|
651
|
+
"""
|
|
652
|
+
templist = linker_list[:]
|
|
653
|
+
tempgraphlist = linker_subgraphlist[:]
|
|
654
|
+
long_ligands = False
|
|
655
|
+
max_min_linker_length, min_max_linker_length = (0,100) # The maximum value of the minimum linker length, and the minimum value of the maximum linker length. Updated later.
|
|
656
|
+
for ii, atoms_list in reversed(list(enumerate(linker_list))): #Loop over all linker subgraphs
|
|
657
|
+
linkeranchors_list = set()
|
|
658
|
+
linkeranchors_atoms = set()
|
|
659
|
+
sbuanchors_list = set()
|
|
660
|
+
sbu_connect_list = set()
|
|
661
|
+
"""""""""
|
|
662
|
+
Here, we are trying to identify what is actually a linker and what is a ligand.
|
|
663
|
+
To do this, we check if something is connected to more than one SBU. Set to
|
|
664
|
+
handle cases where primitive cell is small, ambiguous cases are recorded.
|
|
665
|
+
"""""""""
|
|
666
|
+
for iii, atoms in enumerate(atoms_list): #loop over all atoms in a linker
|
|
667
|
+
connected_atoms = np.nonzero(adj_matrix[atoms,:])[1]
|
|
668
|
+
for kk, sbu_atoms_list in enumerate(initial_SBU_list): #loop over all SBU subgraphs
|
|
669
|
+
for sbu_atoms in sbu_atoms_list: #Loop over SBU
|
|
670
|
+
if sbu_atoms in connected_atoms:
|
|
671
|
+
linkeranchors_list.add(iii)
|
|
672
|
+
linkeranchors_atoms.add(atoms)
|
|
673
|
+
sbuanchors_list.add(sbu_atoms)
|
|
674
|
+
sbu_connect_list.add(kk) #Add if unique SBUs
|
|
675
|
+
min_length, max_length = linker_length(linker_subgraphlist[ii], linkeranchors_list)
|
|
676
|
+
|
|
677
|
+
if len(linkeranchors_list) >=2 : # linker, and in one ambiguous case, could be a ligand.
|
|
678
|
+
if len(sbu_connect_list) >= 2: #Something that connects two SBUs is certain to be a linker
|
|
679
|
+
max_min_linker_length = max(min_length, max_min_linker_length)
|
|
680
|
+
min_max_linker_length = min(max_length, min_max_linker_length)
|
|
681
|
+
continue
|
|
682
|
+
else:
|
|
683
|
+
# check number of times we cross PBC :
|
|
684
|
+
# TODO: we still can fail in multidentate ligands!
|
|
685
|
+
linker_cart_coords = np.array([at.coords() for at in [molcif.getAtom(val) for val in atoms_list]])
|
|
686
|
+
linker_adjmat = np.array(linker_subgraphlist[ii])
|
|
687
|
+
pr_image_organic = ligand_detect(cell_v, linker_cart_coords, linker_adjmat, linkeranchors_list)
|
|
688
|
+
sbu_temp = linkeranchors_atoms.copy()
|
|
689
|
+
sbu_temp.update({val for val in initial_SBU_list[list(sbu_connect_list)[0]]})
|
|
690
|
+
sbu_temp = list(sbu_temp)
|
|
691
|
+
sbu_cart_coords = np.array([at.coords() for at in [molcif.getAtom(val) for val in sbu_temp]])
|
|
692
|
+
sbu_adjmat = slice_mat(adj_matrix.todense(), sbu_temp)
|
|
693
|
+
pr_image_sbu = ligand_detect(cell_v, sbu_cart_coords, sbu_adjmat,set(range(len(linkeranchors_list))))
|
|
694
|
+
if not (len(np.unique(pr_image_sbu, axis=0))==1 and len(np.unique(pr_image_organic, axis=0))==1): # linker
|
|
695
|
+
max_min_linker_length = max(min_length, max_min_linker_length)
|
|
696
|
+
min_max_linker_length = min(max_length, min_max_linker_length)
|
|
697
|
+
tmpstr = str(name)+','+' Anchors list: '+str(sbuanchors_list) \
|
|
698
|
+
+','+' SBU connectlist: '+str(sbu_connect_list)+' set to be linker\n'
|
|
699
|
+
write2file(linkerpath, "/ambiguous.txt", tmpstr)
|
|
700
|
+
continue
|
|
701
|
+
else: # all anchoring atoms are in the same unitcell -> ligand
|
|
702
|
+
removelist.update(set(templist[ii])) # we also want to remove these ligands
|
|
703
|
+
SBUlist.update(set(templist[ii])) # we also want to remove these ligands
|
|
704
|
+
linker_list.pop(ii)
|
|
705
|
+
linker_subgraphlist.pop(ii)
|
|
706
|
+
tmpstr = str(name)+','+' Anchors list: '+str(sbuanchors_list) \
|
|
707
|
+
+','+' SBU connectlist: '+str(sbu_connect_list)+' set to be ligand\n'
|
|
708
|
+
write2file(linkerpath, "/ambiguous.txt", tmpstr)
|
|
709
|
+
tmpstr = str(name)+str(ii)+','+' Anchors list: '+ \
|
|
710
|
+
str(sbuanchors_list)+','+' SBU connectlist: '+str(sbu_connect_list)+'\n'
|
|
711
|
+
write2file(linkerpath, "/ligand.txt", tmpstr)
|
|
712
|
+
else: #definite ligand
|
|
713
|
+
write2file(logpath, "/%s.log"%name, "found ligand\n")
|
|
714
|
+
removelist.update(set(templist[ii])) # we also want to remove these ligands
|
|
715
|
+
SBUlist.update(set(templist[ii])) # we also want to remove these ligands
|
|
716
|
+
linker_list.pop(ii)
|
|
717
|
+
linker_subgraphlist.pop(ii)
|
|
718
|
+
tmpstr = str(name)+','+' Anchors list: '+str(sbuanchors_list) \
|
|
719
|
+
+','+' SBU connectlist: '+str(sbu_connect_list)+'\n'
|
|
720
|
+
write2file(linkerpath, "/ligand.txt", tmpstr)
|
|
721
|
+
|
|
722
|
+
tmpstr = str(name) + ", (min_max_linker_length,max_min_linker_length): " + \
|
|
723
|
+
str(min_max_linker_length) + " , " +str(max_min_linker_length) + "\n"
|
|
724
|
+
write2file(logpath, "/%s.log"%name, tmpstr)
|
|
725
|
+
if min_max_linker_length < 3:
|
|
726
|
+
write2file(linkerpath, "/short_ligands.txt", tmpstr)
|
|
727
|
+
if min_max_linker_length > 2:
|
|
728
|
+
# for N-C-C-N ligand ligand
|
|
729
|
+
if max_min_linker_length == min_max_linker_length:
|
|
730
|
+
long_ligands = True
|
|
731
|
+
elif min_max_linker_length > 3:
|
|
732
|
+
long_ligands = True
|
|
733
|
+
return min_max_linker_length, long_ligands, SBUlist, removelist, linker_list, linker_subgraphlist
|
|
734
|
+
|
|
735
|
+
def make_MOF_fragments(data, path=False, xyzpath=False):
|
|
736
|
+
"""
|
|
737
|
+
Breaks a MOF into fragments for use with pormake (in silico MOF construction).
|
|
738
|
+
cif for MOF should have P1 symmetry.
|
|
739
|
+
|
|
740
|
+
Output codes are as follows:
|
|
741
|
+
2: There exist short (i.e. 2 atom) and longer linkers. We could not split the MOF apart consistently.
|
|
742
|
+
3: The MOF consists only of very short 2 atom linkers.
|
|
743
|
+
4: The MOF contains a 1D rod, which cannot be easily reassembled into a new MOF.
|
|
744
|
+
None: The MOF was split correctly
|
|
745
|
+
|
|
746
|
+
Parameters
|
|
747
|
+
----------
|
|
748
|
+
data : str
|
|
749
|
+
The path to the cif file for which SBUs and linkers will be identified.
|
|
750
|
+
path : str
|
|
751
|
+
The parent path to which output will be written. Will contain a folder for SBUs and another for linkers.
|
|
752
|
+
xyzpath : str
|
|
753
|
+
The path to which an xyz file and a net (connectivity) file of the MOF will be written.
|
|
754
|
+
|
|
755
|
+
Returns
|
|
756
|
+
-------
|
|
757
|
+
return_code : int or None
|
|
758
|
+
See function description for possible return codes and their meanings.
|
|
759
|
+
|
|
760
|
+
"""
|
|
761
|
+
if not path:
|
|
762
|
+
print('Need a directory to place all of the linker and SBU objects. Exiting now.')
|
|
763
|
+
raise ValueError('Base path must be specified in order to write descriptors.')
|
|
764
|
+
else:
|
|
765
|
+
if path.endswith('/'):
|
|
766
|
+
path = path[:-1]
|
|
767
|
+
if not os.path.isdir(path+'/linkers'):
|
|
768
|
+
os.mkdir(path+'/linkers')
|
|
769
|
+
if not os.path.isdir(path+'/sbus'):
|
|
770
|
+
os.mkdir(path+'/sbus')
|
|
771
|
+
if not os.path.isdir(path+'/xyz'):
|
|
772
|
+
os.mkdir(path+'/xyz')
|
|
773
|
+
if not os.path.isdir(path+'/logs'):
|
|
774
|
+
os.mkdir(path+'/logs')
|
|
775
|
+
linkerpath = path+'/linkers'
|
|
776
|
+
sbupath = path+'/sbus'
|
|
777
|
+
logpath = path+"/logs"
|
|
778
|
+
|
|
779
|
+
'''
|
|
780
|
+
Input cif file and get the cell parameters and adjacency matrix. If overlap, do not featurize.
|
|
781
|
+
Simultaneously prepare mol3D class for MOF for future RAC featurization (molcif)
|
|
782
|
+
'''
|
|
783
|
+
|
|
784
|
+
cpar, allatomtypes, fcoords = readcif(data)
|
|
785
|
+
cell_v = mkcell(cpar)
|
|
786
|
+
cart_coords = fractional2cart(fcoords, cell_v)
|
|
787
|
+
name = os.path.basename(data).strip(".cif")
|
|
788
|
+
if len(cart_coords) > 2000:
|
|
789
|
+
print("cif file is too large, skipping it for now...")
|
|
790
|
+
tmpstr = "Failed to featurize %s: large primitive cell\n"%(name)
|
|
791
|
+
write2file(path,"/FailedStructures.log", tmpstr)
|
|
792
|
+
return None, None
|
|
793
|
+
distance_mat = compute_distance_matrix3(cell_v, cart_coords)
|
|
794
|
+
try:
|
|
795
|
+
adj_matrix, _ = compute_adj_matrix(distance_mat, allatomtypes)
|
|
796
|
+
except NotImplementedError:
|
|
797
|
+
tmpstr = "Failed to featurize %s: atomic overlap\n"%(name)
|
|
798
|
+
write2file(path,"/FailedStructures.log", tmpstr)
|
|
799
|
+
return None, None
|
|
800
|
+
|
|
801
|
+
writeXYZandGraph(xyzpath, allatomtypes, cell_v, fcoords, adj_matrix.todense())
|
|
802
|
+
molcif,_,_,_,_ = import_from_cif(data, True)
|
|
803
|
+
molcif.graph = adj_matrix.todense()
|
|
804
|
+
|
|
805
|
+
'''
|
|
806
|
+
check number of connected components.
|
|
807
|
+
if more than 1: it checks if the structure is interpenetrated. Fails if no metal in one of the connected components (identified by the graph).
|
|
808
|
+
This includes floating solvent molecules.
|
|
809
|
+
'''
|
|
810
|
+
|
|
811
|
+
n_components, labels_components = sparse.csgraph.connected_components(csgraph=adj_matrix, directed=False, return_labels=True)
|
|
812
|
+
metal_list = set([at for at in molcif.findMetal(transition_metals_only=False)])
|
|
813
|
+
if not len(metal_list) > 0:
|
|
814
|
+
tmpstr = "Failed to featurize %s: no metal found\n"%(name)
|
|
815
|
+
write2file(path,"/FailedStructures.log", tmpstr)
|
|
816
|
+
return None, None
|
|
817
|
+
|
|
818
|
+
for comp in range(n_components):
|
|
819
|
+
inds_in_comp = [i for i in range(len(labels_components)) if labels_components[i]==comp]
|
|
820
|
+
if not set(inds_in_comp)&metal_list:
|
|
821
|
+
tmpstr = "Failed to featurize %s: solvent molecules\n"%(name)
|
|
822
|
+
write2file(path,"/FailedStructures.log", tmpstr)
|
|
823
|
+
return None, None
|
|
824
|
+
|
|
825
|
+
if n_components > 1 :
|
|
826
|
+
print("structure is interpenetrated")
|
|
827
|
+
tmpstr = "%s found to be an interpenetrated structure\n"%(name)
|
|
828
|
+
write2file(logpath, "/%s.log"%name, tmpstr)
|
|
829
|
+
|
|
830
|
+
'''
|
|
831
|
+
step 1: metallic part
|
|
832
|
+
removelist = metals (1) + atoms only connected to metals (2) + H connected to (1+2)
|
|
833
|
+
SBUlist = removelist + 1st coordination shell of the metals
|
|
834
|
+
removelist = set()
|
|
835
|
+
Logs the atom types of the connecting atoms to the metal in logpath.
|
|
836
|
+
'''
|
|
837
|
+
removelist, SBUlist = prepare_initial_SBU(molcif, allatomtypes, metal_list, logpath, name)
|
|
838
|
+
|
|
839
|
+
'''
|
|
840
|
+
At this point:
|
|
841
|
+
The remove list only removes metals and things ONLY connected to metals or hydrogens.
|
|
842
|
+
Thus the coordinating atoms are double counted in the linker.
|
|
843
|
+
|
|
844
|
+
step 2: organic part
|
|
845
|
+
removelist = linkers are all atoms - the removelist (assuming no bond between
|
|
846
|
+
organic linkers)
|
|
847
|
+
'''
|
|
848
|
+
anc_atoms, linkers, linker_list, linker_subgraphlist, allatoms, connections_list, connections_subgraphlist = identify_lc_atoms(molcif, removelist, metal_list)
|
|
849
|
+
|
|
850
|
+
'''
|
|
851
|
+
step 3: linker or ligand ?
|
|
852
|
+
checking to find the anchors and #SBUs that are connected to an organic part
|
|
853
|
+
anchor <= 1 -> ligand
|
|
854
|
+
anchor > 1 and #SBU > 1 -> linker
|
|
855
|
+
else: walk over the linker graph and count #crossing PBC
|
|
856
|
+
if #crossing is odd -> linker
|
|
857
|
+
else -> ligand
|
|
858
|
+
'''
|
|
859
|
+
initial_SBU_list, initial_SBU_subgraphlist = get_closed_subgraph(removelist.copy(), linkers.copy(), adj_matrix)
|
|
860
|
+
min_max_linker_length, long_ligands, SBUlist, removelist, linker_list, linker_subgraphlist = identify_short_linkers(molcif, initial_SBU_list, initial_SBU_subgraphlist, removelist, linkers, linker_list, linker_subgraphlist, adj_matrix, SBUlist, logpath, linkerpath, name, cell_v)
|
|
861
|
+
|
|
862
|
+
'''
|
|
863
|
+
In the case of long linkers, add second coordination shell without further checks. In the case of short linkers, start from metal
|
|
864
|
+
and grow outwards using the include_extra_shells function
|
|
865
|
+
'''
|
|
866
|
+
linker_length_list = [len(linker_val) for linker_val in linker_list]
|
|
867
|
+
if len(set(linker_length_list)) != 1:
|
|
868
|
+
write2file(linkerpath, "/uneven.txt", str(name)+'\n')
|
|
869
|
+
if min_max_linker_length > 2: # treating the 2 atom ligands differently! Need caution
|
|
870
|
+
if long_ligands:
|
|
871
|
+
tmpstr = "\nStructure has LONG LINKER\n\n"
|
|
872
|
+
write2file(logpath, "/%s.log"%name, tmpstr)
|
|
873
|
+
[[SBUlist.add(val) for val in molcif.getBondedAtomsSmart(zero_first_shell)] for zero_first_shell in SBUlist.copy()] #First account for all of the carboxylic acid type linkers, add in the carbons.
|
|
874
|
+
truncated_linkers = allatoms - SBUlist
|
|
875
|
+
SBU_list, SBU_subgraphlist = get_closed_subgraph(SBUlist, truncated_linkers, adj_matrix)
|
|
876
|
+
if not long_ligands:
|
|
877
|
+
tmpstr = "\nStructure has SHORT LINKER\n\n"
|
|
878
|
+
write2file(logpath, "/%s.log"%name, tmpstr)
|
|
879
|
+
SBU_list , SBU_subgraphlist = include_extra_shells(SBU_list, SBU_subgraphlist, molcif, adj_matrix)
|
|
880
|
+
print('=== SKIPPING DUE TO LINKER BEING TOO SHORT!')
|
|
881
|
+
return 2
|
|
882
|
+
else:
|
|
883
|
+
tmpstr = "Structure %s has extremely short linkers, check the outputs\n"%name
|
|
884
|
+
write2file(linkerpath, "/short.txt", tmpstr)
|
|
885
|
+
tmpstr = "Structure has extremely short linkers\n"
|
|
886
|
+
write2file(logpath, "/%s.log"%name, tmpstr)
|
|
887
|
+
truncated_linkers = allatoms - removelist
|
|
888
|
+
SBU_list, SBU_subgraphlist = get_closed_subgraph(removelist, truncated_linkers, adj_matrix)
|
|
889
|
+
SBU_list, SBU_subgraphlist = include_extra_shells(SBU_list, SBU_subgraphlist, molcif, adj_matrix)
|
|
890
|
+
SBU_list, SBU_subgraphlist = include_extra_shells(SBU_list, SBU_subgraphlist, molcif, adj_matrix)
|
|
891
|
+
print('=== SKIPPING DUE TO LINKER BEING TOO SHORT!')
|
|
892
|
+
return 3
|
|
893
|
+
|
|
894
|
+
return_code = breakdown_MOF(SBU_list, SBU_subgraphlist, molcif, name, cell_v, anc_atoms, sbupath, connections_list, connections_subgraphlist, linkerpath)
|
|
895
|
+
return return_code
|