molSimplify 1.7.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docs/source/conf.py +224 -0
- molSimplify/Classes/__init__.py +6 -0
- molSimplify/Classes/atom3D.py +235 -0
- molSimplify/Classes/dft_obs.py +130 -0
- molSimplify/Classes/globalvars.py +827 -0
- molSimplify/Classes/helpers.py +161 -0
- molSimplify/Classes/ligand.py +2330 -0
- molSimplify/Classes/mGUI.py +2493 -0
- molSimplify/Classes/mWidgets.py +438 -0
- molSimplify/Classes/miniGUI.py +41 -0
- molSimplify/Classes/mol2D.py +260 -0
- molSimplify/Classes/mol3D.py +5846 -0
- molSimplify/Classes/monomer3D.py +253 -0
- molSimplify/Classes/partialcharges.py +226 -0
- molSimplify/Classes/protein3D.py +1178 -0
- molSimplify/Classes/rundiag.py +151 -0
- molSimplify/Data/ML.dat +212 -0
- molSimplify/Data/MLS_FSR_for_inter.dat +23 -0
- molSimplify/Data/MLS_FSR_for_inter2.dat +23 -0
- molSimplify/Data/MLS_angle_for_click.dat +8 -0
- molSimplify/Data/MLS_angle_for_inter.dat +23 -0
- molSimplify/Data/MLS_angle_for_inter2.dat +48 -0
- molSimplify/Data/MLS_angle_for_intra.dat +10 -0
- molSimplify/Data/MLS_angle_for_intra2.dat +6 -0
- molSimplify/Data/MLS_angle_for_oa.dat +18 -0
- molSimplify/Data/ML_FSR_for_inter.dat +112 -0
- molSimplify/Data/ML_FSR_for_inter2.dat +110 -0
- molSimplify/Data/ML_bond_for_cat.dat +8 -0
- molSimplify/Data/ML_bond_for_click.dat +8 -0
- molSimplify/Data/ML_bond_for_inter.dat +48 -0
- molSimplify/Data/ML_bond_for_inter2.dat +48 -0
- molSimplify/Data/ML_bond_for_intra.dat +10 -0
- molSimplify/Data/ML_bond_for_intra2.dat +6 -0
- molSimplify/Data/ML_bond_for_oa.dat +18 -0
- molSimplify/Data/bp1.dat +21 -0
- molSimplify/Data/li.dat +3 -0
- molSimplify/Data/no.dat +2 -0
- molSimplify/Data/oct.dat +7 -0
- molSimplify/Data/pbp.dat +8 -0
- molSimplify/Data/spy.dat +6 -0
- molSimplify/Data/sqap.dat +9 -0
- molSimplify/Data/sqp.dat +5 -0
- molSimplify/Data/tbp.dat +6 -0
- molSimplify/Data/tdhd.dat +9 -0
- molSimplify/Data/thd.dat +5 -0
- molSimplify/Data/tpl.dat +4 -0
- molSimplify/Data/tpr.dat +7 -0
- molSimplify/Informatics/HFXsensitivity/__init__.py +0 -0
- molSimplify/Informatics/HFXsensitivity/measure_HFX_sensitivity_oxo_hat_reb_rel.py +443 -0
- molSimplify/Informatics/HFXsensitivity/measure_HFX_stable.py +346 -0
- molSimplify/Informatics/MOF/Linker_rotation.py +179 -0
- molSimplify/Informatics/MOF/MOF_descriptors.py +1299 -0
- molSimplify/Informatics/MOF/MOF_descriptors_alternate_functional.py +589 -0
- molSimplify/Informatics/MOF/MOF_functionalizer.py +1648 -0
- molSimplify/Informatics/MOF/PBC_functions.py +1347 -0
- molSimplify/Informatics/MOF/__init__.py +0 -0
- molSimplify/Informatics/MOF/atomic.py +267 -0
- molSimplify/Informatics/MOF/cluster_extraction.py +388 -0
- molSimplify/Informatics/MOF/fragment_MOFs_for_pormake.py +895 -0
- molSimplify/Informatics/MOF/monofunctionalized_BDC/index_information.py +10 -0
- molSimplify/Informatics/Mol2Parser.py +46 -0
- molSimplify/Informatics/RACassemble.py +408 -0
- molSimplify/Informatics/__init__.py +0 -0
- molSimplify/Informatics/active_learning/__init__.py +0 -0
- molSimplify/Informatics/active_learning/expected_improvement.py +269 -0
- molSimplify/Informatics/autocorrelation.py +1930 -0
- molSimplify/Informatics/clean_autocorrelation.py +778 -0
- molSimplify/Informatics/coulomb_analyze.py +67 -0
- molSimplify/Informatics/decoration_manager.py +193 -0
- molSimplify/Informatics/geo_analyze.py +88 -0
- molSimplify/Informatics/geometrics.py +56 -0
- molSimplify/Informatics/graph_analyze.py +163 -0
- molSimplify/Informatics/graph_racs.py +288 -0
- molSimplify/Informatics/jupyter_vis.py +172 -0
- molSimplify/Informatics/lacRACAssemble.py +2192 -0
- molSimplify/Informatics/lacRACAssemble_bisdithiolenes.py +236 -0
- molSimplify/Informatics/misc_descriptors.py +198 -0
- molSimplify/Informatics/organic_fingerprints.py +61 -0
- molSimplify/Informatics/partialcharges.py +345 -0
- molSimplify/Informatics/protein/activesite.py +53 -0
- molSimplify/Informatics/protein/pymol_add_hs.py +33 -0
- molSimplify/Informatics/rac155_geo.py +48 -0
- molSimplify/Ligands/(1_methylbenzimidazol_2_yl)pyridine.xyz +45 -0
- molSimplify/Ligands/1-4-dimethyl-1-2-3-triazole.xyz +15 -0
- molSimplify/Ligands/12crown4.mol +62 -0
- molSimplify/Ligands/Antipyrine.mol +58 -0
- molSimplify/Ligands/BPAbipy.mol +106 -0
- molSimplify/Ligands/Hpyrrole.mol +26 -0
- molSimplify/Ligands/N-quinolinylbutyramidate.xyz +31 -0
- molSimplify/Ligands/N-quinolinylmethylmethinylacetamidate.xyz +30 -0
- molSimplify/Ligands/NMe2_-1.xyz +11 -0
- molSimplify/Ligands/PCy3.mol +111 -0
- molSimplify/Ligands/PMe3.xyz +15 -0
- molSimplify/Ligands/PPh3.mol +76 -0
- molSimplify/Ligands/Propyphenazone.mol +77 -0
- molSimplify/Ligands/acac.mol +33 -0
- molSimplify/Ligands/acacen.mol +76 -0
- molSimplify/Ligands/acetate.smi +1 -0
- molSimplify/Ligands/acetate.xyz +9 -0
- molSimplify/Ligands/aceticacidbipyridine.mol +70 -0
- molSimplify/Ligands/acetonitrile.mol +17 -0
- molSimplify/Ligands/alanine.mol +30 -0
- molSimplify/Ligands/alphabetizer.py +21 -0
- molSimplify/Ligands/amine.mol +11 -0
- molSimplify/Ligands/ammonia.mol +12 -0
- molSimplify/Ligands/arginine.mol +58 -0
- molSimplify/Ligands/asparagine.mol +38 -0
- molSimplify/Ligands/aspartic_acid.mol +35 -0
- molSimplify/Ligands/azide.mol +11 -0
- molSimplify/Ligands/benzene.mol +28 -0
- molSimplify/Ligands/benzene_pi.mol +30 -0
- molSimplify/Ligands/benzenedithiol.mol +30 -0
- molSimplify/Ligands/benzenethiol.mol +30 -0
- molSimplify/Ligands/benzylisocy.mol +38 -0
- molSimplify/Ligands/bidiazine.mol +42 -0
- molSimplify/Ligands/bidiazole.mol +38 -0
- molSimplify/Ligands/bifuran.mol +38 -0
- molSimplify/Ligands/bihydrodiazine.mol +58 -0
- molSimplify/Ligands/bihydrodiazole.mol +46 -0
- molSimplify/Ligands/bihydrooxazine.mol +54 -0
- molSimplify/Ligands/bihydrooxazole.mol +42 -0
- molSimplify/Ligands/bihydrothiazine.mol +54 -0
- molSimplify/Ligands/bihydrothiazole.mol +42 -0
- molSimplify/Ligands/biimidazole.mol +38 -0
- molSimplify/Ligands/bioxazole.mol +34 -0
- molSimplify/Ligands/bipy.mol +46 -0
- molSimplify/Ligands/bipyrazine.xyz +20 -0
- molSimplify/Ligands/bipyrimidine.mol +42 -0
- molSimplify/Ligands/bipyrrole.mol +42 -0
- molSimplify/Ligands/bisnapthyridylpyridine.mol +111 -0
- molSimplify/Ligands/bithiazole.mol +34 -0
- molSimplify/Ligands/bromide.mol +7 -0
- molSimplify/Ligands/bromide.smi +1 -0
- molSimplify/Ligands/c2.mol +9 -0
- molSimplify/Ligands/caprolactone.mol +41 -0
- molSimplify/Ligands/carbonyl.mol +8 -0
- molSimplify/Ligands/carboxyl.mol +13 -0
- molSimplify/Ligands/cat.mol +30 -0
- molSimplify/Ligands/chloride.mol +7 -0
- molSimplify/Ligands/chloride.smi +1 -0
- molSimplify/Ligands/chloropyridine.mol +27 -0
- molSimplify/Ligands/co2.mol +10 -0
- molSimplify/Ligands/corrolazine.mol +72 -0
- molSimplify/Ligands/cs.mol +8 -0
- molSimplify/Ligands/cyanate.xyz +5 -0
- molSimplify/Ligands/cyanide.mol +9 -0
- molSimplify/Ligands/cyanoaceticporphyrin.mol +114 -0
- molSimplify/Ligands/cyanopyridine.mol +29 -0
- molSimplify/Ligands/cyclam.mol +81 -0
- molSimplify/Ligands/cyclen.mol +69 -0
- molSimplify/Ligands/cyclopentadienyl.mol +26 -0
- molSimplify/Ligands/cysteine.mol +32 -0
- molSimplify/Ligands/diaminomethyl.mol +19 -0
- molSimplify/Ligands/diazine.mol +25 -0
- molSimplify/Ligands/diazole.mol +23 -0
- molSimplify/Ligands/dicyanamide.mol +15 -0
- molSimplify/Ligands/dihydrofuran.mol +27 -0
- molSimplify/Ligands/dmap.xyz +35 -0
- molSimplify/Ligands/dmf.mol +28 -0
- molSimplify/Ligands/dmi.mol +41 -0
- molSimplify/Ligands/dmpe.mol +52 -0
- molSimplify/Ligands/dpmu.mol +47 -0
- molSimplify/Ligands/dppe.mol +112 -0
- molSimplify/Ligands/edta.mol +69 -0
- molSimplify/Ligands/en.mol +28 -0
- molSimplify/Ligands/ethanethiol.mol +21 -0
- molSimplify/Ligands/ethanolamine.mol +26 -0
- molSimplify/Ligands/ethbipy.mol +70 -0
- molSimplify/Ligands/ethyl.mol +19 -0
- molSimplify/Ligands/ethylamine.mol +24 -0
- molSimplify/Ligands/ethylene.mol +16 -0
- molSimplify/Ligands/ethylesteracac.mol +57 -0
- molSimplify/Ligands/fluoride.mol +7 -0
- molSimplify/Ligands/fluoride.smi +1 -0
- molSimplify/Ligands/formaldehyde.mol +12 -0
- molSimplify/Ligands/formamidate.xyz +8 -0
- molSimplify/Ligands/formate.xyz +6 -0
- molSimplify/Ligands/furan.mol +23 -0
- molSimplify/Ligands/glutamic_acid.mol +42 -0
- molSimplify/Ligands/glutamine.mol +44 -0
- molSimplify/Ligands/glycinate.mol +23 -0
- molSimplify/Ligands/glycine.mol +24 -0
- molSimplify/Ligands/h2s.mol +10 -0
- molSimplify/Ligands/helium.mol +6 -0
- molSimplify/Ligands/histidine.mol +45 -0
- molSimplify/Ligands/hmpa.mol +62 -0
- molSimplify/Ligands/hs-.mol +9 -0
- molSimplify/Ligands/hydride.mol +7 -0
- molSimplify/Ligands/hydrocarboxyacetylide.xyz +8 -0
- molSimplify/Ligands/hydrocyanide.mol +10 -0
- molSimplify/Ligands/hydrodiazine.mol +33 -0
- molSimplify/Ligands/hydrodiazole.mol +27 -0
- molSimplify/Ligands/hydrogensulfide.mol +10 -0
- molSimplify/Ligands/hydroisocyanide.mol +11 -0
- molSimplify/Ligands/hydrooxazine.mol +31 -0
- molSimplify/Ligands/hydrooxazole.mol +25 -0
- molSimplify/Ligands/hydrothiazine.mol +31 -0
- molSimplify/Ligands/hydrothiazole.mol +25 -0
- molSimplify/Ligands/hydroxyl.mol +9 -0
- molSimplify/Ligands/imidazole.mol +23 -0
- molSimplify/Ligands/imidazolidinone.mol +29 -0
- molSimplify/Ligands/imine.mol +13 -0
- molSimplify/Ligands/iminodiacetic.mol +33 -0
- molSimplify/Ligands/iodide.mol +7 -0
- molSimplify/Ligands/iodobenzene.xyz +14 -0
- molSimplify/Ligands/isoleucine.mol +48 -0
- molSimplify/Ligands/isothiocyanate.mol +11 -0
- molSimplify/Ligands/leucine.mol +48 -0
- molSimplify/Ligands/ligands.dict +257 -0
- molSimplify/Ligands/lysine.mol +54 -0
- molSimplify/Ligands/mebenzenedithiol.mol +36 -0
- molSimplify/Ligands/mebim_py.xyz +29 -0
- molSimplify/Ligands/mebim_pz.xyz +28 -0
- molSimplify/Ligands/mebipy.mol +58 -0
- molSimplify/Ligands/mecat.mol +36 -0
- molSimplify/Ligands/methanal.mol +11 -0
- molSimplify/Ligands/methanethiol.mol +15 -0
- molSimplify/Ligands/methanol.mol +16 -0
- molSimplify/Ligands/methionine.mol +44 -0
- molSimplify/Ligands/methyl.mol +13 -0
- molSimplify/Ligands/methylacetylide.xyz +8 -0
- molSimplify/Ligands/methylamine.mol +19 -0
- molSimplify/Ligands/methylazide.xyz +9 -0
- molSimplify/Ligands/methylisocy.mol +17 -0
- molSimplify/Ligands/methylpyridine.mol +33 -0
- molSimplify/Ligands/n2.mol +8 -0
- molSimplify/Ligands/n4py.xyz +51 -0
- molSimplify/Ligands/nch.mol +10 -0
- molSimplify/Ligands/nco-.mol +11 -0
- molSimplify/Ligands/nethanolamine.mol +26 -0
- molSimplify/Ligands/nitrate.mol +14 -0
- molSimplify/Ligands/nitrite.mol +11 -0
- molSimplify/Ligands/nitro.mol +11 -0
- molSimplify/Ligands/nitrobipy.mol +54 -0
- molSimplify/Ligands/nitroso.mol +8 -0
- molSimplify/Ligands/nme3.mol +30 -0
- molSimplify/Ligands/no-.mol +10 -0
- molSimplify/Ligands/no2-.mol +11 -0
- molSimplify/Ligands/noxygen.mol +8 -0
- molSimplify/Ligands/ns-.mol +10 -0
- molSimplify/Ligands/o-pyridylbenzene.xyz +23 -0
- molSimplify/Ligands/o-pyridylphenylanion.xyz +22 -0
- molSimplify/Ligands/o2-.mol +9 -0
- molSimplify/Ligands/o2.xyz +4 -0
- molSimplify/Ligands/och2.mol +12 -0
- molSimplify/Ligands/oethanolamine.mol +26 -0
- molSimplify/Ligands/ome2.mol +22 -0
- molSimplify/Ligands/ooh.xyz +5 -0
- molSimplify/Ligands/oxalate.mol +17 -0
- molSimplify/Ligands/oxalate.smi +1 -0
- molSimplify/Ligands/oxygen.mol +7 -0
- molSimplify/Ligands/pentacyanocyclopentadienide.mol +36 -0
- molSimplify/Ligands/ph2-.mol +11 -0
- molSimplify/Ligands/ph3.mol +12 -0
- molSimplify/Ligands/phen.mol +51 -0
- molSimplify/Ligands/phenacac.mol +63 -0
- molSimplify/Ligands/phenalalanine.mol +51 -0
- molSimplify/Ligands/phendione.mol +51 -0
- molSimplify/Ligands/phenphen.mol +75 -0
- molSimplify/Ligands/phenylbenzoxazole.mol +54 -0
- molSimplify/Ligands/phenylcyc.mol +99 -0
- molSimplify/Ligands/phenylenediamine.mol +37 -0
- molSimplify/Ligands/phenylisocy.mol +32 -0
- molSimplify/Ligands/phosacidbipy.mol +66 -0
- molSimplify/Ligands/phosphine.mol +13 -0
- molSimplify/Ligands/phosphorine.mol +27 -0
- molSimplify/Ligands/phosphorustrifluoride.mol +12 -0
- molSimplify/Ligands/phthalocyanine.mol +126 -0
- molSimplify/Ligands/pme3o.mol +32 -0
- molSimplify/Ligands/porphyrin.mol +82 -0
- molSimplify/Ligands/pph3o.mol +77 -0
- molSimplify/Ligands/proline.mol +39 -0
- molSimplify/Ligands/propdiol.mol +21 -0
- molSimplify/Ligands/propylene.mol +23 -0
- molSimplify/Ligands/pyridine.mol +27 -0
- molSimplify/Ligands/pyrimidone.mol +27 -0
- molSimplify/Ligands/pyrrole.mol +24 -0
- molSimplify/Ligands/quinoxalinedithiol.mol +39 -0
- molSimplify/Ligands/s2-.mol +9 -0
- molSimplify/Ligands/salen.mol +75 -0
- molSimplify/Ligands/salphen.mol +84 -0
- molSimplify/Ligands/serine.mol +32 -0
- molSimplify/Ligands/simple_ligands.dict +14 -0
- molSimplify/Ligands/sulfacidbipy.mol +63 -0
- molSimplify/Ligands/tbucat.mol +54 -0
- molSimplify/Ligands/tbuphisocy.mol +56 -0
- molSimplify/Ligands/tbutylcyclen.mol +166 -0
- molSimplify/Ligands/tbutylisocy.mol +35 -0
- molSimplify/Ligands/tbutylthiol.mol +33 -0
- molSimplify/Ligands/tcnoet.mol +43 -0
- molSimplify/Ligands/tcnoetOH.mol +45 -0
- molSimplify/Ligands/terpy.mol +65 -0
- molSimplify/Ligands/tetrahydrofuran.mol +31 -0
- molSimplify/Ligands/thiane.mol +37 -0
- molSimplify/Ligands/thiazole.mol +21 -0
- molSimplify/Ligands/thiocyanate.mol +11 -0
- molSimplify/Ligands/thiol.mol +9 -0
- molSimplify/Ligands/thiophene.mol +23 -0
- molSimplify/Ligands/thiopyridine.mol +29 -0
- molSimplify/Ligands/threonine.mol +38 -0
- molSimplify/Ligands/tpp.mol +165 -0
- molSimplify/Ligands/tricyanomethyl.mol +19 -0
- molSimplify/Ligands/trifluoromethyl.mol +13 -0
- molSimplify/Ligands/tryptophan.mol +60 -0
- molSimplify/Ligands/tyrosine.mol +53 -0
- molSimplify/Ligands/uthiol.mol +11 -0
- molSimplify/Ligands/uthiolme2.mol +23 -0
- molSimplify/Ligands/valine.mol +42 -0
- molSimplify/Ligands/water.mol +10 -0
- molSimplify/Ligands/x.mol +6 -0
- molSimplify/Scripts/__init__.py +0 -0
- molSimplify/Scripts/addtodb.py +308 -0
- molSimplify/Scripts/cellbuilder.py +1592 -0
- molSimplify/Scripts/cellbuilder_tools.py +701 -0
- molSimplify/Scripts/chains.py +342 -0
- molSimplify/Scripts/convert_2to3.py +23 -0
- molSimplify/Scripts/dbinteract.py +631 -0
- molSimplify/Scripts/distgeom.py +617 -0
- molSimplify/Scripts/findcorrelations.py +287 -0
- molSimplify/Scripts/generator.py +267 -0
- molSimplify/Scripts/geometry.py +1224 -0
- molSimplify/Scripts/grabguivars.py +845 -0
- molSimplify/Scripts/in_b3lyp_usetc.py +141 -0
- molSimplify/Scripts/inparse.py +1673 -0
- molSimplify/Scripts/io.py +1149 -0
- molSimplify/Scripts/isomers.py +415 -0
- molSimplify/Scripts/jobgen.py +247 -0
- molSimplify/Scripts/krr_prep.py +1262 -0
- molSimplify/Scripts/molSimplify_io.py +18 -0
- molSimplify/Scripts/molden2psi4wfn.py +166 -0
- molSimplify/Scripts/namegen.py +32 -0
- molSimplify/Scripts/nn_prep.py +561 -0
- molSimplify/Scripts/oct_check_mols.py +782 -0
- molSimplify/Scripts/periodic_QE.py +97 -0
- molSimplify/Scripts/postmold.py +304 -0
- molSimplify/Scripts/postmwfn.py +709 -0
- molSimplify/Scripts/postparse.py +488 -0
- molSimplify/Scripts/postproc.py +139 -0
- molSimplify/Scripts/qcgen.py +1450 -0
- molSimplify/Scripts/rmsd.py +489 -0
- molSimplify/Scripts/rungen.py +670 -0
- molSimplify/Scripts/structgen.py +3040 -0
- molSimplify/Scripts/tf_nn_prep.py +894 -0
- molSimplify/Scripts/tsgen.py +295 -0
- molSimplify/Scripts/uq_calibration.py +69 -0
- molSimplify/__init__.py +0 -0
- molSimplify/__main__.py +197 -0
- molSimplify/icons/chemdb.png +0 -0
- molSimplify/icons/hjklogo.png +0 -0
- molSimplify/icons/icon.png +0 -0
- molSimplify/icons/logo.png +0 -0
- molSimplify/icons/logo_old.png +0 -0
- molSimplify/icons/petachem.png +0 -0
- molSimplify/icons/petachem2.png +0 -0
- molSimplify/icons/petachem_full.png +0 -0
- molSimplify/icons/pythonlogo.png +0 -0
- molSimplify/icons/sge copy.png +0 -0
- molSimplify/icons/sge.png +0 -0
- molSimplify/icons/slurm.png +0 -0
- molSimplify/icons/wft1.png +0 -0
- molSimplify/icons/wft2.png +0 -0
- molSimplify/icons/wft3.png +0 -0
- molSimplify/ml/__init__.py +0 -0
- molSimplify/ml/kernels.py +36 -0
- molSimplify/ml/layers.py +29 -0
- molSimplify/molscontrol/__init__.py +14 -0
- molSimplify/molscontrol/_version.py +521 -0
- molSimplify/molscontrol/clf_tools.py +144 -0
- molSimplify/molscontrol/data/README.md +21 -0
- molSimplify/molscontrol/data/look_and_say.dat +15 -0
- molSimplify/molscontrol/dynamic_classifier.py +514 -0
- molSimplify/molscontrol/io_tools.py +363 -0
- molSimplify/molscontrol/molscontrol.py +49 -0
- molSimplify/molscontrol/terachem/jobscript_control.sh +31 -0
- molSimplify/molscontrol/terachem/terachem_input +22 -0
- molSimplify/python_krr/X_train_TS.csv +535 -0
- molSimplify/python_krr/__init__.py +0 -0
- molSimplify/python_krr/hat2_X_mean_std.csv +3 -0
- molSimplify/python_krr/hat2_feature_names.csv +1 -0
- molSimplify/python_krr/hat2_y_mean_std.csv +2 -0
- molSimplify/python_krr/hat_X_mean_std.csv +6 -0
- molSimplify/python_krr/hat_feature_names.csv +1 -0
- molSimplify/python_krr/hat_krr_X_train.csv +5205 -0
- molSimplify/python_krr/hat_krr_dual_coef.csv +1 -0
- molSimplify/python_krr/hat_y_mean_std.csv +2 -0
- molSimplify/python_krr/sklearn_models.py +34 -0
- molSimplify/python_krr/y_train_TS.csv +535 -0
- molSimplify/python_nn/ANN.py +198 -0
- molSimplify/python_nn/__init__.py +0 -0
- molSimplify/python_nn/clf_analysis_tool.py +125 -0
- molSimplify/python_nn/dictionary_toolbox.py +49 -0
- molSimplify/python_nn/ensemble_test.py +309 -0
- molSimplify/python_nn/hs_center.csv +26 -0
- molSimplify/python_nn/hs_scale.csv +26 -0
- molSimplify/python_nn/ls_center.csv +26 -0
- molSimplify/python_nn/ls_scale.csv +26 -0
- molSimplify/python_nn/ms_hs_b1.csv +50 -0
- molSimplify/python_nn/ms_hs_b2.csv +50 -0
- molSimplify/python_nn/ms_hs_b3.csv +1 -0
- molSimplify/python_nn/ms_hs_w1.csv +50 -0
- molSimplify/python_nn/ms_hs_w2.csv +50 -0
- molSimplify/python_nn/ms_hs_w3.csv +1 -0
- molSimplify/python_nn/ms_ls_b1.csv +50 -0
- molSimplify/python_nn/ms_ls_b2.csv +50 -0
- molSimplify/python_nn/ms_ls_b3.csv +1 -0
- molSimplify/python_nn/ms_ls_w1.csv +50 -0
- molSimplify/python_nn/ms_ls_w2.csv +50 -0
- molSimplify/python_nn/ms_ls_w3.csv +1 -0
- molSimplify/python_nn/ms_slope_b1.csv +50 -0
- molSimplify/python_nn/ms_slope_b2.csv +50 -0
- molSimplify/python_nn/ms_slope_b3.csv +1 -0
- molSimplify/python_nn/ms_slope_w1.csv +50 -0
- molSimplify/python_nn/ms_slope_w2.csv +50 -0
- molSimplify/python_nn/ms_slope_w3.csv +1 -0
- molSimplify/python_nn/ms_split_b1.csv +50 -0
- molSimplify/python_nn/ms_split_b2.csv +50 -0
- molSimplify/python_nn/ms_split_b3.csv +1 -0
- molSimplify/python_nn/ms_split_w1.csv +50 -0
- molSimplify/python_nn/ms_split_w2.csv +50 -0
- molSimplify/python_nn/ms_split_w3.csv +1 -0
- molSimplify/python_nn/slope_center.csv +25 -0
- molSimplify/python_nn/slope_scale.csv +25 -0
- molSimplify/python_nn/split_center.csv +26 -0
- molSimplify/python_nn/split_scale.csv +26 -0
- molSimplify/python_nn/tf_ANN.py +762 -0
- molSimplify/python_nn/train_data.csv +1211 -0
- molSimplify/tf_nn/__init__.py +0 -0
- molSimplify/tf_nn/geo_static_clf/geo_static_clf_model.h5 +0 -0
- molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_name.csv +1591 -0
- molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_x.csv +2790 -0
- molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_y.csv +2790 -0
- molSimplify/tf_nn/geo_static_clf/geo_static_clf_vars.csv +154 -0
- molSimplify/tf_nn/geos/hs_ii_bl_x.csv +1577 -0
- molSimplify/tf_nn/geos/hs_ii_bl_y.csv +1577 -0
- molSimplify/tf_nn/geos/hs_ii_model.h5 +0 -0
- molSimplify/tf_nn/geos/hs_ii_model.json +1 -0
- molSimplify/tf_nn/geos/hs_ii_vars.csv +154 -0
- molSimplify/tf_nn/geos/hs_iii_bl_x.csv +1659 -0
- molSimplify/tf_nn/geos/hs_iii_bl_y.csv +1659 -0
- molSimplify/tf_nn/geos/hs_iii_model.h5 +0 -0
- molSimplify/tf_nn/geos/hs_iii_model.json +1 -0
- molSimplify/tf_nn/geos/hs_iii_vars.csv +154 -0
- molSimplify/tf_nn/geos/ls_ii_bl_x.csv +1374 -0
- molSimplify/tf_nn/geos/ls_ii_bl_y.csv +1374 -0
- molSimplify/tf_nn/geos/ls_ii_model.h5 +0 -0
- molSimplify/tf_nn/geos/ls_ii_model.json +1 -0
- molSimplify/tf_nn/geos/ls_ii_vars.csv +154 -0
- molSimplify/tf_nn/geos/ls_iii_bl_x.csv +1364 -0
- molSimplify/tf_nn/geos/ls_iii_bl_y.csv +1364 -0
- molSimplify/tf_nn/geos/ls_iii_model.h5 +0 -0
- molSimplify/tf_nn/geos/ls_iii_model.json +1 -0
- molSimplify/tf_nn/geos/ls_iii_vars.csv +154 -0
- molSimplify/tf_nn/homolumo/gap_model.h5 +0 -0
- molSimplify/tf_nn/homolumo/gap_model.json +1 -0
- molSimplify/tf_nn/homolumo/gap_test_names.csv +175 -0
- molSimplify/tf_nn/homolumo/gap_test_x.csv +176 -0
- molSimplify/tf_nn/homolumo/gap_test_y.csv +176 -0
- molSimplify/tf_nn/homolumo/gap_train_names.csv +699 -0
- molSimplify/tf_nn/homolumo/gap_train_x.csv +700 -0
- molSimplify/tf_nn/homolumo/gap_train_y.csv +700 -0
- molSimplify/tf_nn/homolumo/gap_vars.csv +153 -0
- molSimplify/tf_nn/homolumo/homo_model.h5 +0 -0
- molSimplify/tf_nn/homolumo/homo_model.json +126 -0
- molSimplify/tf_nn/homolumo/homo_test_names.csv +175 -0
- molSimplify/tf_nn/homolumo/homo_test_x.csv +176 -0
- molSimplify/tf_nn/homolumo/homo_test_y.csv +176 -0
- molSimplify/tf_nn/homolumo/homo_train_names.csv +699 -0
- molSimplify/tf_nn/homolumo/homo_train_x.csv +700 -0
- molSimplify/tf_nn/homolumo/homo_train_y.csv +700 -0
- molSimplify/tf_nn/homolumo/homo_vars.csv +153 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_info.json +7 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_model.h5 +0 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_model.json +1 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_test_names.csv +143 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_test_x.csv +144 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_test_y.csv +144 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_train_names.csv +513 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_train_x.csv +514 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_train_y.csv +514 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_val_names.csv +143 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_val_x.csv +58 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_val_y.csv +58 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_vars.csv +155 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_info.json +7 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_model.h5 +0 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_model.json +1 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_test_names.csv +143 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_test_x.csv +144 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_test_y.csv +144 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_train_names.csv +513 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_train_x.csv +514 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_train_y.csv +514 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_val_names.csv +143 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_val_x.csv +58 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_val_y.csv +58 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_vars.csv +154 -0
- molSimplify/tf_nn/oxocatalysis/hat_model.h5 +0 -0
- molSimplify/tf_nn/oxocatalysis/hat_model.json +1 -0
- molSimplify/tf_nn/oxocatalysis/hat_test_names.csv +419 -0
- molSimplify/tf_nn/oxocatalysis/hat_test_x.csv +420 -0
- molSimplify/tf_nn/oxocatalysis/hat_test_y.csv +420 -0
- molSimplify/tf_nn/oxocatalysis/hat_train_names.csv +1507 -0
- molSimplify/tf_nn/oxocatalysis/hat_train_x.csv +1508 -0
- molSimplify/tf_nn/oxocatalysis/hat_train_y.csv +1508 -0
- molSimplify/tf_nn/oxocatalysis/hat_val_x.csv +169 -0
- molSimplify/tf_nn/oxocatalysis/hat_val_y.csv +169 -0
- molSimplify/tf_nn/oxocatalysis/hat_vars.csv +162 -0
- molSimplify/tf_nn/oxocatalysis/oxo_model.h5 +0 -0
- molSimplify/tf_nn/oxocatalysis/oxo_model.json +1 -0
- molSimplify/tf_nn/oxocatalysis/oxo_test_names.csv +527 -0
- molSimplify/tf_nn/oxocatalysis/oxo_test_x.csv +528 -0
- molSimplify/tf_nn/oxocatalysis/oxo_test_y.csv +528 -0
- molSimplify/tf_nn/oxocatalysis/oxo_train_names.csv +1897 -0
- molSimplify/tf_nn/oxocatalysis/oxo_train_x.csv +1898 -0
- molSimplify/tf_nn/oxocatalysis/oxo_train_y.csv +1898 -0
- molSimplify/tf_nn/oxocatalysis/oxo_val_x.csv +212 -0
- molSimplify/tf_nn/oxocatalysis/oxo_val_y.csv +212 -0
- molSimplify/tf_nn/oxocatalysis/oxo_vars.csv +162 -0
- molSimplify/tf_nn/rescaling_data/gap_mean_x.csv +153 -0
- molSimplify/tf_nn/rescaling_data/gap_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/gap_var_x.csv +153 -0
- molSimplify/tf_nn/rescaling_data/gap_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/geo_static_clf_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/geo_static_clf_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/geo_static_clf_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/geo_static_clf_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/hat_mean_x.csv +162 -0
- molSimplify/tf_nn/rescaling_data/hat_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/hat_var_x.csv +162 -0
- molSimplify/tf_nn/rescaling_data/hat_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/homo_empty_mean_x.csv +155 -0
- molSimplify/tf_nn/rescaling_data/homo_empty_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/homo_empty_var_x.csv +155 -0
- molSimplify/tf_nn/rescaling_data/homo_empty_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/homo_mean_x.csv +153 -0
- molSimplify/tf_nn/rescaling_data/homo_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/homo_var_x.csv +153 -0
- molSimplify/tf_nn/rescaling_data/homo_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/hs_ii_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/hs_ii_mean_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/hs_ii_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/hs_ii_var_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/hs_iii_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/hs_iii_mean_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/hs_iii_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/hs_iii_var_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/ls_ii_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/ls_ii_mean_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/ls_ii_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/ls_ii_var_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/ls_iii_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/ls_iii_mean_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/ls_iii_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/ls_iii_var_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/oxo20_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/oxo20_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/oxo20_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/oxo20_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/oxo_mean_x.csv +162 -0
- molSimplify/tf_nn/rescaling_data/oxo_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/oxo_var_x.csv +162 -0
- molSimplify/tf_nn/rescaling_data/oxo_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/sc_static_clf_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/sc_static_clf_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/sc_static_clf_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/sc_static_clf_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/split_mean_x.csv +155 -0
- molSimplify/tf_nn/rescaling_data/split_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/split_var_x.csv +155 -0
- molSimplify/tf_nn/rescaling_data/split_var_y.csv +1 -0
- molSimplify/tf_nn/sc_static_clf/sc_static_clf_model.h5 +0 -0
- molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_name.csv +1591 -0
- molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_x.csv +1592 -0
- molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_y.csv +1592 -0
- molSimplify/tf_nn/sc_static_clf/sc_static_clf_vars.csv +154 -0
- molSimplify/tf_nn/split/split_model.h5 +0 -0
- molSimplify/tf_nn/split/split_model.json +1 -0
- molSimplify/tf_nn/split/split_vars.csv +155 -0
- molSimplify/tf_nn/split/split_x.csv +1902 -0
- molSimplify/tf_nn/split/split_y.csv +1902 -0
- molSimplify/tf_nn/split/train_names.csv +1901 -0
- molSimplify/utils/__init__.py +0 -0
- molSimplify/utils/decorators.py +16 -0
- molSimplify/utils/metaclasses.py +12 -0
- molSimplify/utils/tensorflow.py +23 -0
- molSimplify/utils/timer.py +16 -0
- molSimplify-1.7.4.dist-info/LICENSE +674 -0
- molSimplify-1.7.4.dist-info/METADATA +821 -0
- molSimplify-1.7.4.dist-info/RECORD +651 -0
- molSimplify-1.7.4.dist-info/WHEEL +5 -0
- molSimplify-1.7.4.dist-info/entry_points.txt +3 -0
- molSimplify-1.7.4.dist-info/top_level.txt +4 -0
- tests/generateTests.py +122 -0
- tests/helperFuncs.py +658 -0
- tests/informatics/test_MOF_descriptors.py +128 -0
- tests/informatics/test_active_learning.py +113 -0
- tests/informatics/test_coulomb_analyze.py +24 -0
- tests/informatics/test_graph_racs.py +193 -0
- tests/ml/test_kernels.py +20 -0
- tests/ml/test_layers.py +47 -0
- tests/runtest.py +10 -0
- tests/test_Mol2D.py +128 -0
- tests/test_basic_imports.py +62 -0
- tests/test_bidentate.py +25 -0
- tests/test_cli.py +20 -0
- tests/test_distgeom.py +106 -0
- tests/test_example_1.py +29 -0
- tests/test_example_3.py +31 -0
- tests/test_example_5.py +43 -0
- tests/test_example_7.py +28 -0
- tests/test_example_8.py +15 -0
- tests/test_example_tbp.py +15 -0
- tests/test_ff_xtb.py +111 -0
- tests/test_geocheck_oct.py +26 -0
- tests/test_geocheck_one_empty.py +15 -0
- tests/test_geometry.py +44 -0
- tests/test_inparse.py +76 -0
- tests/test_io.py +84 -0
- tests/test_jobgen.py +84 -0
- tests/test_joption_pythonic.py +27 -0
- tests/test_ligand_assign.py +58 -0
- tests/test_ligand_assign_consistent.py +60 -0
- tests/test_ligand_class.py +26 -0
- tests/test_ligand_from_mol_file.py +35 -0
- tests/test_ligands.py +86 -0
- tests/test_mol3D.py +337 -0
- tests/test_molcas_caspt2.py +15 -0
- tests/test_molcas_casscf.py +15 -0
- tests/test_old_ANNs.py +68 -0
- tests/test_orca_ccsdt.py +15 -0
- tests/test_orca_dft.py +15 -0
- tests/test_qcgen.py +50 -0
- tests/test_racs.py +124 -0
- tests/test_rmsd.py +68 -0
- tests/test_structgen_functions.py +198 -0
- tests/test_tetrahedral.py +29 -0
- tests/test_tutorial_10_part_one.py +16 -0
- tests/test_tutorial_10_part_two.py +15 -0
- tests/test_tutorial_2.py +11 -0
- tests/test_tutorial_3.py +15 -0
- tests/test_tutorial_4.py +57 -0
- tests/test_tutorial_6.py +10 -0
- tests/test_tutorial_8.py +29 -0
- tests/test_tutorial_9_part_one.py +15 -0
- tests/test_tutorial_9_part_two.py +15 -0
- tests/test_tutorial_qm9_part_one.py +6 -0
- tests/testresources/refs/racs/generate_references.py +85 -0
- workflows/NandyJACSAu2022/bridge_functionalizer.py +253 -0
- workflows/NandyJACSAu2022/frag_functionalizer.py +242 -0
- workflows/NandyJACSAu2022/fragment_classes.py +586 -0
- workflows/NandyJACSAu2022/macrocycle_synthesis.py +179 -0
|
@@ -0,0 +1,1347 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import itertools
|
|
3
|
+
import networkx as nx
|
|
4
|
+
from scipy.spatial import distance
|
|
5
|
+
from scipy import sparse
|
|
6
|
+
import copy
|
|
7
|
+
from molSimplify.Scripts.cellbuilder_tools import import_from_cif
|
|
8
|
+
from molSimplify.Informatics.MOF.atomic import (
|
|
9
|
+
COVALENT_RADII,
|
|
10
|
+
alkali,
|
|
11
|
+
lanthanides,
|
|
12
|
+
metals,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
# PBC: periodic boundary conditions
|
|
16
|
+
|
|
17
|
+
deg2rad = np.pi/180.0
|
|
18
|
+
def readcif(name):
|
|
19
|
+
"""
|
|
20
|
+
Reads a cif file and returns information about its structure and composition.
|
|
21
|
+
|
|
22
|
+
Parameters
|
|
23
|
+
----------
|
|
24
|
+
name : str
|
|
25
|
+
The path of the cif file to be read.
|
|
26
|
+
|
|
27
|
+
Returns
|
|
28
|
+
-------
|
|
29
|
+
cpar : numpy.ndarray
|
|
30
|
+
The parameters (i.e. lattice constants) of the MOF cell. Specifically, A, B, C, alpha, beta, and gamma. Shape is (6,).
|
|
31
|
+
atomtypes : list of str
|
|
32
|
+
The atom types of the cif file, indicated by periodic symbols like 'O' and 'Cu'. Length is the number of atoms.
|
|
33
|
+
positions : numpy.ndarray
|
|
34
|
+
The fractional positions of the atoms of the cif file. Shape is (number of atoms, 3).
|
|
35
|
+
|
|
36
|
+
"""
|
|
37
|
+
with open(name , 'r', errors='ignore') as fi: # ignore takes care of unicode errors in some cifs
|
|
38
|
+
EIF = fi.readlines()
|
|
39
|
+
cond=False
|
|
40
|
+
atom_props_count=0
|
|
41
|
+
atomlines=[]
|
|
42
|
+
counter=0
|
|
43
|
+
cell_parameter_boundary=[0.0,0.0]
|
|
44
|
+
for line in EIF:
|
|
45
|
+
line_stripped=line.strip()
|
|
46
|
+
if (not line) or line_stripped.startswith("#"):
|
|
47
|
+
continue
|
|
48
|
+
line_splitted=line.split()
|
|
49
|
+
|
|
50
|
+
if line_stripped.startswith("_cell_length_a"):
|
|
51
|
+
temp = line_splitted[1].replace(')','')
|
|
52
|
+
temp = temp.replace('(','')
|
|
53
|
+
cell_a=float(temp)
|
|
54
|
+
cell_parameter_boundary[0]=counter+1
|
|
55
|
+
elif line_stripped.startswith("_cell_length_b"):
|
|
56
|
+
temp = line_splitted[1].replace(')','')
|
|
57
|
+
temp = temp.replace('(','')
|
|
58
|
+
cell_b=float(temp)
|
|
59
|
+
elif line_stripped.startswith("_cell_length_c"):
|
|
60
|
+
temp = line_splitted[1].replace(')','')
|
|
61
|
+
temp = temp.replace('(','')
|
|
62
|
+
cell_c=float(temp)
|
|
63
|
+
elif line_stripped.startswith("_cell_angle_alpha"):
|
|
64
|
+
temp = line_splitted[1].replace(')','')
|
|
65
|
+
temp = temp.replace('(','')
|
|
66
|
+
cell_alpha=float(temp)
|
|
67
|
+
elif line_stripped.startswith("_cell_angle_beta"):
|
|
68
|
+
temp = line_splitted[1].replace(')','')
|
|
69
|
+
temp = temp.replace('(','')
|
|
70
|
+
cell_beta=float(temp)
|
|
71
|
+
elif line_stripped.startswith("_cell_angle_gamma"):
|
|
72
|
+
temp = line_splitted[1].replace(')','')
|
|
73
|
+
temp = temp.replace('(','')
|
|
74
|
+
cell_gamma=float(temp)
|
|
75
|
+
cell_parameter_boundary[1]=counter+1
|
|
76
|
+
# if cond and line_stripped.startswith("loop_"):
|
|
77
|
+
# break
|
|
78
|
+
# else:
|
|
79
|
+
|
|
80
|
+
if line_stripped.startswith("_atom") :
|
|
81
|
+
|
|
82
|
+
if line_stripped=="_atom_site_label" or line_stripped == '_atom_site_type_symbol':
|
|
83
|
+
cond = True # We have entered the block with the desired atom information.
|
|
84
|
+
# The reason for the or is that the order fo these lines can vary depending on cif
|
|
85
|
+
if line_stripped == '_atom_site_type_symbol':
|
|
86
|
+
type_index=atom_props_count
|
|
87
|
+
elif line_stripped=="_atom_site_fract_x":
|
|
88
|
+
fracx_index=atom_props_count
|
|
89
|
+
elif line_stripped=="_atom_site_fract_y":
|
|
90
|
+
fracy_index=atom_props_count
|
|
91
|
+
elif line_stripped=="_atom_site_fract_z":
|
|
92
|
+
fracz_index=atom_props_count
|
|
93
|
+
# elif "charge" in line_stripped:
|
|
94
|
+
# charge_index=atom_props_count
|
|
95
|
+
|
|
96
|
+
if cond:
|
|
97
|
+
atom_props_count+=1 # Another atom property in the block we are interested in.
|
|
98
|
+
|
|
99
|
+
elif cond:
|
|
100
|
+
|
|
101
|
+
if len(line_splitted)==atom_props_count:
|
|
102
|
+
atomlines.append(line)
|
|
103
|
+
elif line == '\n':
|
|
104
|
+
continue # Allow for newlines between the _atom_ lines and the lines holding the atom information
|
|
105
|
+
else:
|
|
106
|
+
break # Don't need to keep looking through the file, since we've seen all the desired information for all atoms. We left the block.
|
|
107
|
+
|
|
108
|
+
counter+=1
|
|
109
|
+
|
|
110
|
+
positions=[]
|
|
111
|
+
atomtypes=[]
|
|
112
|
+
for cn,at in enumerate(atomlines):
|
|
113
|
+
ln=at.strip().split()
|
|
114
|
+
positions.append([float(ln[fracx_index].replace('(','').replace(')','')),
|
|
115
|
+
float(ln[fracy_index].replace('(','').replace(')','')),
|
|
116
|
+
float(ln[fracz_index].replace('(','').replace(')',''))])
|
|
117
|
+
ln[type_index] = ln[type_index].strip("_")
|
|
118
|
+
at_type = ln[type_index]
|
|
119
|
+
# for idx, char in enumerate(ln[type_index]): # Looking through the characters of the element symbol in order to remove any numbers
|
|
120
|
+
# if char.isdigit(): # This means one of the characters in the atom type is a number.
|
|
121
|
+
# at_type = ln[type_index][:idx] # Overwriting. Use the atom element symbol without numbers.
|
|
122
|
+
# break # Get the characters up to the number, then stop
|
|
123
|
+
at_type = at_type.capitalize()
|
|
124
|
+
atomtypes.append(at_type)
|
|
125
|
+
|
|
126
|
+
cpar=np.array([cell_a,cell_b,cell_c,cell_alpha,cell_beta,cell_gamma])
|
|
127
|
+
positions = np.array(positions)
|
|
128
|
+
return cpar, atomtypes, positions
|
|
129
|
+
|
|
130
|
+
def compute_image_flag(cell, fcoord1, fcoord2):
|
|
131
|
+
"""
|
|
132
|
+
Calculates how to shift fcoord2 to get it as close as possible to fcoord1. Shift by the crystal cell vectors.
|
|
133
|
+
|
|
134
|
+
Parameters
|
|
135
|
+
----------
|
|
136
|
+
cell : numpy.ndarray
|
|
137
|
+
The three Cartesian vectors representing the edges of the crystal cell. Shape is (3,3).
|
|
138
|
+
fcoord1 : numpy.ndarray
|
|
139
|
+
Fractional coordinates of atom 1. Shape is (3,).
|
|
140
|
+
fcoord2 : numpy.ndarray
|
|
141
|
+
Fractional coordinates of atom 2. Shape is (3,).
|
|
142
|
+
|
|
143
|
+
Returns
|
|
144
|
+
-------
|
|
145
|
+
supercells[image] : numpy.ndarray
|
|
146
|
+
The nearest cell shift of fcoord2 to fcoord1. Shape is (3,). Values will be -1, 0, or 1.
|
|
147
|
+
|
|
148
|
+
"""
|
|
149
|
+
supercells = np.array(list(itertools.product((-1, 0, 1), repeat=3)))
|
|
150
|
+
fcoords = fcoord2 + supercells # 27 versions of fcoord2, shifted some cells over in different directions
|
|
151
|
+
coords = np.array([np.dot(j, cell) for j in fcoords]) # Cartesian coordinates
|
|
152
|
+
coord1 = np.dot(fcoord1, cell)
|
|
153
|
+
dists = distance.cdist([coord1], coords) # Euclidean distance
|
|
154
|
+
dists = dists[0].tolist()
|
|
155
|
+
image = dists.index(min(dists)) # The image of the closest fcoord2, when considering cell shifts
|
|
156
|
+
return supercells[image]
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def linker_length(adjmat, anchors):
|
|
160
|
+
"""
|
|
161
|
+
Computes the shortest and longest paths between anchors in a linker.
|
|
162
|
+
|
|
163
|
+
Parameters
|
|
164
|
+
----------
|
|
165
|
+
adjmat : numpy.matrix
|
|
166
|
+
The atom connections in the linker subgraph.
|
|
167
|
+
anchors : set of ints
|
|
168
|
+
The indices of linker atoms that are bonded to SBUs.
|
|
169
|
+
|
|
170
|
+
Returns
|
|
171
|
+
-------
|
|
172
|
+
(min_length,max_length) : tuple of ints
|
|
173
|
+
min_length is the shortest path length between two anchors in a linker.
|
|
174
|
+
max_length is the longest path length between two anchors in a linker.
|
|
175
|
+
|
|
176
|
+
"""
|
|
177
|
+
rows, cols = np.where(adjmat == 1)
|
|
178
|
+
edges = zip(rows.tolist(), cols.tolist())
|
|
179
|
+
gr = nx.Graph()
|
|
180
|
+
gr.add_edges_from(edges)
|
|
181
|
+
|
|
182
|
+
# Start max_length and min_length off with values that will most likely be overwritten.
|
|
183
|
+
max_length = 0
|
|
184
|
+
min_length = 1000
|
|
185
|
+
|
|
186
|
+
for i,j in itertools.combinations(anchors, 2):
|
|
187
|
+
max_length=max(len(nx.shortest_path(gr,i,j))-1,max_length)
|
|
188
|
+
min_length=min(len(nx.shortest_path(gr,i,j))-1,min_length)
|
|
189
|
+
return (min_length,max_length)
|
|
190
|
+
|
|
191
|
+
def slice_mat(mat, atoms):
|
|
192
|
+
"""
|
|
193
|
+
Slice the matrix mat.
|
|
194
|
+
|
|
195
|
+
Parameters
|
|
196
|
+
----------
|
|
197
|
+
mat : numpy.matrix
|
|
198
|
+
The adjacency matrix. Shape is (number of atoms, number of atoms).
|
|
199
|
+
atoms : list of numpy.int32
|
|
200
|
+
The indices of atoms that determine the matrix slice.
|
|
201
|
+
|
|
202
|
+
Returns
|
|
203
|
+
-------
|
|
204
|
+
np.array(mat[np.ix_(list(atoms),list(atoms))]) : numpy.ndarray
|
|
205
|
+
The matrix slice. Shape is (len(atoms), len(atoms)).
|
|
206
|
+
|
|
207
|
+
"""
|
|
208
|
+
return np.array(mat[np.ix_(list(atoms),list(atoms))])
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def ligand_detect(cell, cart_coords, adj_mat, anchorlist):
|
|
212
|
+
"""
|
|
213
|
+
Calculates how to shift anchor atoms so that they are close to atoms bonded to them.
|
|
214
|
+
I imagine this tackles the issue of two bonded atoms being on different sides of a crystal cell.
|
|
215
|
+
|
|
216
|
+
Parameters
|
|
217
|
+
----------
|
|
218
|
+
cell : numpy.ndarray
|
|
219
|
+
The three Cartesian vectors representing the edges of the crystal cell. Shape is (3,3).
|
|
220
|
+
cart_coords : numpy.ndarray
|
|
221
|
+
Cartesian coordinates of the atoms in the linker or sbu. Shape is (number of atoms, 3).
|
|
222
|
+
adj_mat : numpy.ndarray
|
|
223
|
+
Adjacency matrix. 1 represents a bond, 0 represents no bond. Shape is (number of atoms, number of atoms).
|
|
224
|
+
anchorlist : set of ints
|
|
225
|
+
The indices of the anchor atoms in the linker or sbu.
|
|
226
|
+
|
|
227
|
+
Returns
|
|
228
|
+
-------
|
|
229
|
+
np.array(periodic_images) : numpy.ndarray
|
|
230
|
+
The cell shifts that get the anchor atoms closest to an atom (current_node) they are bonded with. Shape is (len(anchorlist), 3).
|
|
231
|
+
|
|
232
|
+
"""
|
|
233
|
+
invcell=np.linalg.inv(cell)
|
|
234
|
+
fcoords=np.dot(cart_coords,invcell) # fractional coordinates
|
|
235
|
+
connected_components=[0] # This list will be grown to include all atoms that are part of the linker or sbu.
|
|
236
|
+
checked=[] # Keeps tracked of the indices of atoms that have already been checked.
|
|
237
|
+
periodic_images=[]
|
|
238
|
+
if 0 in anchorlist:
|
|
239
|
+
periodic_images.append(np.array([0,0,0]))
|
|
240
|
+
counter=0
|
|
241
|
+
while len(connected_components) < len(cart_coords):
|
|
242
|
+
current_node = connected_components[counter]
|
|
243
|
+
for j,v in enumerate(adj_mat[current_node]):
|
|
244
|
+
if v==1 and (j not in checked) and (j not in connected_components): # If find a bonded atom that hasn't been checked yet
|
|
245
|
+
image_flag = compute_image_flag(cell,fcoords[current_node],fcoords[j])
|
|
246
|
+
fcoords[j] += image_flag # Shifting fractional coordinates by the number of cells specified by compute_image_flag
|
|
247
|
+
connected_components.append(j)
|
|
248
|
+
checked.append(j)
|
|
249
|
+
if j in anchorlist:
|
|
250
|
+
periodic_images.append(image_flag)
|
|
251
|
+
counter+=1
|
|
252
|
+
|
|
253
|
+
return np.array(periodic_images)
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def XYZ_connected(cell, cart_coords, adj_mat):
|
|
257
|
+
"""
|
|
258
|
+
Calculate fractional coordinates of atoms for the specified connected component, shifted by cell vectors to make the coordinates close to each other.
|
|
259
|
+
|
|
260
|
+
Parameters
|
|
261
|
+
----------
|
|
262
|
+
cell : numpy.ndarray
|
|
263
|
+
The three Cartesian vectors representing the edges of the crystal cell. Shape is (3,3).
|
|
264
|
+
cart_coords : numpy.ndarray
|
|
265
|
+
Cartesian coordinates of the atoms in this component. Shape is (number of atoms, 3).
|
|
266
|
+
adj_mat : numpy.ndarray
|
|
267
|
+
Adjacency matrix. 1 represents a bond, 0 represents no bond. Shape is (number of atoms, number of atoms).
|
|
268
|
+
|
|
269
|
+
Returns
|
|
270
|
+
-------
|
|
271
|
+
fcoords : numpy.ndarray
|
|
272
|
+
Fractional coordinates of the atoms in this component. Shape is (number of atoms, 3).
|
|
273
|
+
|
|
274
|
+
"""
|
|
275
|
+
invcell=np.linalg.inv(cell)
|
|
276
|
+
fcoords=np.dot(cart_coords,invcell) # fractional coordinates
|
|
277
|
+
connected_components=[0] # This list will be grown to include all atoms that are part of the linker or sbu.
|
|
278
|
+
checked=[] # Keeps tracked of the indices of atoms that have already been checked.
|
|
279
|
+
counter=0
|
|
280
|
+
from scipy import sparse
|
|
281
|
+
n_components, labels_components = sparse.csgraph.connected_components(csgraph=adj_mat, directed=False, return_labels=True)
|
|
282
|
+
# print(n_components,'comp',labels_components)
|
|
283
|
+
tested_index = 0 # The label for the connected components. 0 indicates the first connected component, etc.
|
|
284
|
+
index_counter = 0
|
|
285
|
+
while len(connected_components) < len(cart_coords):
|
|
286
|
+
try:
|
|
287
|
+
current_node = connected_components[counter]
|
|
288
|
+
except:
|
|
289
|
+
indices = [i for i, x in enumerate(labels_components) if x == tested_index] # Indices corresponding to atoms in the component corresponding to tested_index
|
|
290
|
+
current_node = indices[index_counter]
|
|
291
|
+
# print(current_node,indices)
|
|
292
|
+
|
|
293
|
+
if index_counter == (len(indices)-1):
|
|
294
|
+
tested_index += 1
|
|
295
|
+
index_counter = 0
|
|
296
|
+
else:
|
|
297
|
+
index_counter += 1
|
|
298
|
+
for j,v in enumerate(adj_mat[current_node]):
|
|
299
|
+
if v==1 and (j not in checked) and (j not in connected_components): # If find a bonded atom that hasn't been checked yet
|
|
300
|
+
fcoords[j]+=compute_image_flag(cell,fcoords[current_node],fcoords[j]) # Shifting fractional coordinates by the number of cells specified by compute_image_flag
|
|
301
|
+
connected_components.append(j)
|
|
302
|
+
checked.append(j)
|
|
303
|
+
# print(connected_components)
|
|
304
|
+
counter+=1
|
|
305
|
+
return fcoords
|
|
306
|
+
|
|
307
|
+
def writeXYZfcoords(filename, atoms, cell, fcoords):
|
|
308
|
+
"""
|
|
309
|
+
Write an XYZ file using fractional coordinates.
|
|
310
|
+
|
|
311
|
+
Parameters
|
|
312
|
+
----------
|
|
313
|
+
filename : str
|
|
314
|
+
The path to where the xyz of the MOF structure will be written.
|
|
315
|
+
atoms : list of str
|
|
316
|
+
The atom types of the cif file, indicated by periodic symbols like 'O' and 'Cu'. Length is the number of atoms.
|
|
317
|
+
cell : numpy.ndarray
|
|
318
|
+
The three Cartesian vectors representing the edges of the crystal cell. Shape is (3,3).
|
|
319
|
+
fcoords : numpy.ndarray
|
|
320
|
+
The fractional positions of the atoms of the cif file. Shape is (number of atoms, 3).
|
|
321
|
+
|
|
322
|
+
Returns
|
|
323
|
+
-------
|
|
324
|
+
None
|
|
325
|
+
|
|
326
|
+
"""
|
|
327
|
+
with open(filename,"w") as fo:
|
|
328
|
+
fo.write("%i\n\n"%len(atoms))
|
|
329
|
+
for i,fcoord in enumerate(fcoords):
|
|
330
|
+
cart_coord=np.dot(fcoord,cell)
|
|
331
|
+
s="%10.2f %10.2f %10.2f"%(cart_coord[0],cart_coord[1],cart_coord[2])
|
|
332
|
+
fo.write("%s %s\n"%(atoms[i],s))
|
|
333
|
+
|
|
334
|
+
def writeXYZandGraph(filename, atoms, cell, fcoords, molgraph):
|
|
335
|
+
"""
|
|
336
|
+
Write the xyz file for the MOF structure, as well as the net file containing the MOF's graph.
|
|
337
|
+
|
|
338
|
+
Parameters
|
|
339
|
+
----------
|
|
340
|
+
filename : str
|
|
341
|
+
The path to where the xyz of the MOF structure will be written.
|
|
342
|
+
atoms : list of str
|
|
343
|
+
The atom types of the cif file, indicated by periodic symbols like 'O' and 'Cu'. Length is the number of atoms.
|
|
344
|
+
cell : numpy.ndarray
|
|
345
|
+
The three Cartesian vectors representing the edges of the crystal cell. Shape is (3,3).
|
|
346
|
+
fcoords : numpy.ndarray
|
|
347
|
+
The fractional positions of the atoms of the cif file. Shape is (number of atoms, 3).
|
|
348
|
+
molgraph : numpy.matrix or numpy.ndarray
|
|
349
|
+
The adjacency matrix, which indicates which atoms are connected to which atoms. Shape is (number of atoms, number of atoms).
|
|
350
|
+
|
|
351
|
+
Returns
|
|
352
|
+
-------
|
|
353
|
+
None
|
|
354
|
+
|
|
355
|
+
"""
|
|
356
|
+
|
|
357
|
+
with open(filename,"w") as fo:
|
|
358
|
+
fo.write("%i\n\n"%len(atoms)) # The first line indicates the number of atoms in the cell of the structure.
|
|
359
|
+
for i,fcoord in enumerate(fcoords):
|
|
360
|
+
cart_coord=np.dot(fcoord,cell) # Go from fractional coordinates to Cartesian coordinates.
|
|
361
|
+
s="%10.2f %10.2f %10.2f"%(cart_coord[0],cart_coord[1],cart_coord[2]) # X, Y, Z
|
|
362
|
+
fo.write("%s %s\n"%(atoms[i],s)) # Writing the coordinates of each atom.
|
|
363
|
+
tmpstr=",".join([at for at in atoms])
|
|
364
|
+
np.savetxt(filename[:-4]+".net",molgraph,fmt="%i",delimiter=",",header=tmpstr) # Save a net file.
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
def returnXYZandGraph(filename, atoms, cell, fcoords, molgraph):
|
|
368
|
+
"""
|
|
369
|
+
TODO
|
|
370
|
+
|
|
371
|
+
Parameters
|
|
372
|
+
----------
|
|
373
|
+
TODO : TODO
|
|
374
|
+
TODO
|
|
375
|
+
TODO : TODO
|
|
376
|
+
TODO
|
|
377
|
+
TODO : TODO
|
|
378
|
+
TODO
|
|
379
|
+
|
|
380
|
+
Returns
|
|
381
|
+
-------
|
|
382
|
+
TODO : TODO
|
|
383
|
+
TODO
|
|
384
|
+
TODO : TODO
|
|
385
|
+
TODO
|
|
386
|
+
TODO : TODO
|
|
387
|
+
TODO
|
|
388
|
+
|
|
389
|
+
"""
|
|
390
|
+
coord_list = []
|
|
391
|
+
for i,fcoord in enumerate(fcoords):
|
|
392
|
+
cart_coord=np.dot(fcoord,cell)
|
|
393
|
+
coord_list.append([cart_coord[0],cart_coord[1],cart_coord[2]])
|
|
394
|
+
tmpstr=",".join([at for at in atoms])
|
|
395
|
+
if filename is not None:
|
|
396
|
+
np.savetxt(filename[:-4]+".net",molgraph,fmt="%i",delimiter=",",header=tmpstr)
|
|
397
|
+
return coord_list, molgraph
|
|
398
|
+
|
|
399
|
+
def writeXYZcoords(filename, atoms, coords):
|
|
400
|
+
"""
|
|
401
|
+
Write an XYZ file using Cartesian coordinates.
|
|
402
|
+
|
|
403
|
+
Parameters
|
|
404
|
+
----------
|
|
405
|
+
filename : str
|
|
406
|
+
The path to where the xyz of the MOF structure will be written.
|
|
407
|
+
atoms : list of str
|
|
408
|
+
The atom types of the cif file, indicated by periodic symbols like 'O' and 'Cu'. Length is the number of atoms.
|
|
409
|
+
coords : numpy.ndarray
|
|
410
|
+
The Cartesian positions of the atoms of the cif file. Shape is (number of atoms, 3).
|
|
411
|
+
|
|
412
|
+
Returns
|
|
413
|
+
-------
|
|
414
|
+
None
|
|
415
|
+
|
|
416
|
+
"""
|
|
417
|
+
with open(filename,"w") as fo:
|
|
418
|
+
fo.write("%i\n\n"%len(atoms))
|
|
419
|
+
for i,cart_coord in enumerate(coords):
|
|
420
|
+
s="%10.2f %10.2f %10.2f"%(cart_coord[0],cart_coord[1],cart_coord[2])
|
|
421
|
+
fo.write("%s %s\n"%(atoms[i],s))
|
|
422
|
+
return
|
|
423
|
+
|
|
424
|
+
def writeXYZcoords_withcomment(filename, atoms, coords, comment):
|
|
425
|
+
"""
|
|
426
|
+
Write an XYZ file using Cartesian coordinates, with a comment included.
|
|
427
|
+
|
|
428
|
+
Parameters
|
|
429
|
+
----------
|
|
430
|
+
filename : str
|
|
431
|
+
The path to where the xyz of the MOF structure will be written.
|
|
432
|
+
atoms : list of str
|
|
433
|
+
The atom types of the cif file, indicated by periodic symbols like 'O' and 'Cu'. Length is the number of atoms.
|
|
434
|
+
coords : numpy.ndarray
|
|
435
|
+
The Cartesian positions of the atoms of the cif file. Shape is (number of atoms, 3).
|
|
436
|
+
comment : str
|
|
437
|
+
The comment to include in the XYZ file.
|
|
438
|
+
|
|
439
|
+
"""
|
|
440
|
+
with open(filename,"w") as fo:
|
|
441
|
+
fo.write("%i\n"%len(atoms))
|
|
442
|
+
fo.write("%s\n"%comment)
|
|
443
|
+
for i,cart_coord in enumerate(coords):
|
|
444
|
+
s="%10.2f %10.2f %10.2f"%(cart_coord[0],cart_coord[1],cart_coord[2])
|
|
445
|
+
fo.write("%s %s\n"%(atoms[i],s))
|
|
446
|
+
return
|
|
447
|
+
|
|
448
|
+
def write2file(pt, fn, st):
|
|
449
|
+
"""
|
|
450
|
+
Writes the string st to a file.
|
|
451
|
+
|
|
452
|
+
Parameters
|
|
453
|
+
----------
|
|
454
|
+
pt : str
|
|
455
|
+
Path of the folder to make a file in.
|
|
456
|
+
fn : str
|
|
457
|
+
Name of the file to write to.
|
|
458
|
+
st : str
|
|
459
|
+
What to write in the file.
|
|
460
|
+
|
|
461
|
+
Returns
|
|
462
|
+
-------
|
|
463
|
+
None
|
|
464
|
+
|
|
465
|
+
"""
|
|
466
|
+
with open(pt+fn, "a") as fo:
|
|
467
|
+
fo.write(st)
|
|
468
|
+
|
|
469
|
+
def write_cif(fname, cellprm, fcoords, atom_labels):
|
|
470
|
+
"""
|
|
471
|
+
Writes a cif file with the provided parameters.
|
|
472
|
+
|
|
473
|
+
Parameters
|
|
474
|
+
----------
|
|
475
|
+
fname : str
|
|
476
|
+
The path to the cif file to be written.
|
|
477
|
+
cellprm : numpy.ndarray
|
|
478
|
+
The parameters (i.e. lattice constants) of the MOF cell. Specifically, A, B, C, alpha, beta, and gamma. Shape is (6,).
|
|
479
|
+
fcoords : numpy.ndarray
|
|
480
|
+
The fractional positions of the atoms of the cif file. Shape is (number of atoms, 3).
|
|
481
|
+
atom_labels : list of str
|
|
482
|
+
The atom types of the cif file, indicated by periodic symbols like 'O' and 'Cu'. Length is the number of atoms.
|
|
483
|
+
|
|
484
|
+
Returns
|
|
485
|
+
-------
|
|
486
|
+
None
|
|
487
|
+
|
|
488
|
+
"""
|
|
489
|
+
with open(fname,'w') as f_cif:
|
|
490
|
+
f_cif.write("data_I\n")
|
|
491
|
+
f_cif.write("_chemical_name_common \'%s\'\n"%(fname.strip(".cif")))
|
|
492
|
+
f_cif.write("_cell_length_a %8.05f\n"%(cellprm[0]))
|
|
493
|
+
f_cif.write("_cell_length_b %8.05f\n"%(cellprm[1]))
|
|
494
|
+
f_cif.write("_cell_length_c %8.05f\n"%(cellprm[2]))
|
|
495
|
+
f_cif.write("_cell_angle_alpha %4.05f\n"%(cellprm[3]))
|
|
496
|
+
f_cif.write("_cell_angle_beta %4.05f\n"%(cellprm[4]))
|
|
497
|
+
f_cif.write("_cell_angle_gamma %4.05f\n"%(cellprm[5]))
|
|
498
|
+
f_cif.write("_space_group_name_H-M_alt \'P 1\'\n\n\n")
|
|
499
|
+
f_cif.write("loop_\n_space_group_symop_operation_xyz\n 'x, y, z' \n\n")
|
|
500
|
+
f_cif.write("loop_\n")
|
|
501
|
+
f_cif.write("_atom_site_label\n")
|
|
502
|
+
f_cif.write("_atom_site_fract_x\n")
|
|
503
|
+
f_cif.write("_atom_site_fract_y\n")
|
|
504
|
+
f_cif.write("_atom_site_fract_z\n")
|
|
505
|
+
f_cif.write("_atom_site_type_symbol\n")
|
|
506
|
+
for i,atom in enumerate(atom_labels):
|
|
507
|
+
f_cif.write("%-5s %8s %8s %8s %5s\n"%(atom,fcoords[i,0],fcoords[i,1],fcoords[i,2],"%s"%(atom)))
|
|
508
|
+
|
|
509
|
+
def cell_to_cellpar(cell, radians=False):
|
|
510
|
+
"""
|
|
511
|
+
TODO
|
|
512
|
+
|
|
513
|
+
Parameters
|
|
514
|
+
----------
|
|
515
|
+
TODO : TODO
|
|
516
|
+
TODO
|
|
517
|
+
TODO : TODO
|
|
518
|
+
TODO
|
|
519
|
+
TODO : TODO
|
|
520
|
+
TODO
|
|
521
|
+
|
|
522
|
+
Returns
|
|
523
|
+
-------
|
|
524
|
+
TODO : TODO
|
|
525
|
+
TODO
|
|
526
|
+
TODO : TODO
|
|
527
|
+
TODO
|
|
528
|
+
TODO : TODO
|
|
529
|
+
TODO
|
|
530
|
+
|
|
531
|
+
"""
|
|
532
|
+
lengths = [np.linalg.norm(v) for v in cell]
|
|
533
|
+
angles = []
|
|
534
|
+
for i in range(3):
|
|
535
|
+
j = i - 1
|
|
536
|
+
k = i - 2
|
|
537
|
+
ll = lengths[j] * lengths[k]
|
|
538
|
+
if ll > 1e-16:
|
|
539
|
+
x = np.dot(cell[j], cell[k]) / ll
|
|
540
|
+
angle = 180.0 / np.pi * np.arccos(x)
|
|
541
|
+
else:
|
|
542
|
+
angle = 90.0
|
|
543
|
+
angles.append(angle)
|
|
544
|
+
if radians:
|
|
545
|
+
angles = [angle * np.pi / 180 for angle in angles]
|
|
546
|
+
return np.array(lengths + angles)
|
|
547
|
+
|
|
548
|
+
def findPaths(G, u, n):
|
|
549
|
+
"""
|
|
550
|
+
Finds paths between atom u and atoms n bonds away.
|
|
551
|
+
|
|
552
|
+
Parameters
|
|
553
|
+
----------
|
|
554
|
+
G : networkx.classes.graph.Graph
|
|
555
|
+
networkx graph for the linker of interest.
|
|
556
|
+
u : int
|
|
557
|
+
The index of the anchor atom's index in the linker list of indices.
|
|
558
|
+
n : int
|
|
559
|
+
How many bonds away one functionalized atom should be from another.
|
|
560
|
+
|
|
561
|
+
Returns
|
|
562
|
+
-------
|
|
563
|
+
paths : list of list of int
|
|
564
|
+
Inner lists will be length four, if n is three. All inner lists start with u.
|
|
565
|
+
Note, may return [[u]] instead if n is zero. [[u]] is a list of list of int.
|
|
566
|
+
|
|
567
|
+
"""
|
|
568
|
+
if n==0:
|
|
569
|
+
return [[u]]
|
|
570
|
+
paths = [[u]+path for neighbor in G.neighbors(u) for path in findPaths(G,neighbor,n-1) if u not in path] # recursive
|
|
571
|
+
# if u not in path ensures no atom is used twice in a path.
|
|
572
|
+
# Example of paths: [[12, 3, 7, 6], [12, 3, 7, 14], [12, 4, 0, 14], [12, 4, 0, 15], [12, 4, 9, 5], [12, 4, 9, 11]]
|
|
573
|
+
return paths
|
|
574
|
+
|
|
575
|
+
def fractional2cart(fcoords, cell):
|
|
576
|
+
"""
|
|
577
|
+
Convert from fractional coordinates to Cartesian coordinates.
|
|
578
|
+
|
|
579
|
+
Parameters
|
|
580
|
+
----------
|
|
581
|
+
fcoords : numpy.ndarray
|
|
582
|
+
The fractional positions of the atoms of the cif file. Shape is (number of atoms, 3).
|
|
583
|
+
cell : The three Cartesian vectors representing the edges of the crystal cell.
|
|
584
|
+
Shape is (3,3).
|
|
585
|
+
|
|
586
|
+
Returns
|
|
587
|
+
-------
|
|
588
|
+
np.dot(fcoords,cell) : numpy.ndarray
|
|
589
|
+
The Cartesian coordinates of the crystal atoms. Shape is (number of atoms, 3).
|
|
590
|
+
|
|
591
|
+
"""
|
|
592
|
+
return np.dot(fcoords,cell)
|
|
593
|
+
|
|
594
|
+
def frac_coord(coord, cell):
|
|
595
|
+
"""
|
|
596
|
+
Convert from Cartesian coordinates to fractional coordinates.
|
|
597
|
+
|
|
598
|
+
Parameters
|
|
599
|
+
----------
|
|
600
|
+
coord : numpy.ndarray
|
|
601
|
+
The Cartesian coordinates of the atoms of the cif file. Shape is (number of atoms, 3).
|
|
602
|
+
cell : The three Cartesian vectors representing the edges of the crystal cell.
|
|
603
|
+
Shape is (3,3).
|
|
604
|
+
|
|
605
|
+
Returns
|
|
606
|
+
-------
|
|
607
|
+
np.dot(coord,invcell) : numpy.ndarray
|
|
608
|
+
The fractional positions of the crystal atoms. Shape is (number of atoms, 3).
|
|
609
|
+
|
|
610
|
+
"""
|
|
611
|
+
invcell=np.linalg.inv(cell)
|
|
612
|
+
return np.dot(coord,invcell)
|
|
613
|
+
|
|
614
|
+
def compute_distance_matrix3(cell, cart_coords, num_cells=1):
|
|
615
|
+
"""
|
|
616
|
+
Computes the pairwise distances between all atom pairs in the crystal cell.
|
|
617
|
+
|
|
618
|
+
Parameters
|
|
619
|
+
----------
|
|
620
|
+
cell : numpy.ndarray
|
|
621
|
+
The three Cartesian vectors representing the edges of the crystal cell. Shape is (3,3).
|
|
622
|
+
cart_coords : numpy.ndarray
|
|
623
|
+
The Cartesian coordinates of the crystal atoms. Shape is (number of atoms, 3).
|
|
624
|
+
num_cells : int
|
|
625
|
+
The number of crystal cells to put together for the evaluation of distances.
|
|
626
|
+
|
|
627
|
+
Returns
|
|
628
|
+
-------
|
|
629
|
+
distance_matrix : numpy.ndarray
|
|
630
|
+
The distance of each atom to each other atom. Shape is (number of atoms, number of atoms).
|
|
631
|
+
|
|
632
|
+
"""
|
|
633
|
+
pos = np.arange(-num_cells, num_cells+1, 1) # [-1, 0, 1] if num_cells is 1
|
|
634
|
+
combos = np.array(np.meshgrid(pos, pos, pos)).T.reshape(-1,3) # The 27 combinations of -1, 0, 1 if num_cells is 1
|
|
635
|
+
shifts = np.sum(np.expand_dims(cell, axis=0)*np.expand_dims(combos, axis=-1), axis=1) # The possible shifts by the crystal cell vectors.
|
|
636
|
+
# NxNxCells distance array
|
|
637
|
+
shifted = np.expand_dims(cart_coords, axis=1) + np.expand_dims(shifts, axis=0) # The shifted Cartesian coordinates. Shape is (number of atoms, number of combinations in combos, 3)
|
|
638
|
+
|
|
639
|
+
# The distances between atoms, across different crystal cell shifts, for the three Cartesian dimensions.
|
|
640
|
+
dist = np.expand_dims(np.expand_dims(cart_coords, axis=1), axis=1) - np.expand_dims(shifted, axis=0) # Shape is (number of atoms, number of atoms, number of combinations in combos, 3)
|
|
641
|
+
# The shape of np.expand_dims(np.expand_dims(cart_coords, axis=1), axis=1) is (number of atoms, 1, 1, 3)
|
|
642
|
+
# The shape of np.expand_dims(shifted, axis=0) is (1, number of atoms, number of combinations in combos, 3)
|
|
643
|
+
# numpy subtraction expands out the axes of length one for the subtraction.
|
|
644
|
+
|
|
645
|
+
# The standard distance formula of square root of x^2 + y^2 + z^2
|
|
646
|
+
dist = np.sqrt(np.sum(np.square(dist), axis=-1)) # Shape is (number of atoms, number of atoms, number of combinations in combos)
|
|
647
|
+
|
|
648
|
+
# But we want only the minimum
|
|
649
|
+
distance_matrix = np.min(dist, axis=-1) # Consider the distance between two atoms at the crystal cell shift where they are closest.
|
|
650
|
+
return distance_matrix
|
|
651
|
+
|
|
652
|
+
def position_nearest_atom(cell, cart_coords, index_of_interest, num_cells=1):
|
|
653
|
+
"""
|
|
654
|
+
Computes the pairwise distances between all atoms in the crystal cell to the atom specified by index_of_interest; returns the position of the nearest atom.
|
|
655
|
+
|
|
656
|
+
Parameters
|
|
657
|
+
----------
|
|
658
|
+
cell : numpy.ndarray
|
|
659
|
+
The three Cartesian vectors representing the edges of the crystal cell. Shape is (3,3).
|
|
660
|
+
cart_coords : numpy.ndarray
|
|
661
|
+
The Cartesian coordinates of the crystal atoms. Shape is (number of atoms, 3).
|
|
662
|
+
index_of_interest : int
|
|
663
|
+
The index of the atom to which we want to find the nearest atom's position.
|
|
664
|
+
num_cells : int
|
|
665
|
+
The number of crystal cells to put together for the evaluation of distances.
|
|
666
|
+
|
|
667
|
+
Returns
|
|
668
|
+
-------
|
|
669
|
+
nearest_position : numpy.ndarray
|
|
670
|
+
The Cartesian coordinates of the nearest atom. Shape is (3,).
|
|
671
|
+
nearest_index : numpy.int64
|
|
672
|
+
The index of the nearest atom.
|
|
673
|
+
shift_for_nearest_atom : numpy.ndarray
|
|
674
|
+
The crystal cell shifts that position the nearest atom closest to the atom of interest. Shape is (3,). Will look something like [-1 0 -1] or [0 0 0] or etc.
|
|
675
|
+
|
|
676
|
+
"""
|
|
677
|
+
pos = np.arange(-num_cells, num_cells+1, 1) # [-1, 0, 1] if num_cells is 1
|
|
678
|
+
combos = np.array(np.meshgrid(pos, pos, pos)).T.reshape(-1,3) # The 27 combinations of -1, 0, 1 if num_cells is 1
|
|
679
|
+
shifts = np.sum(np.expand_dims(cell, axis=0)*np.expand_dims(combos, axis=-1), axis=1) # The possible shifts by the crystal cell vectors.
|
|
680
|
+
# NxNxCells distance array
|
|
681
|
+
shifted = np.expand_dims(cart_coords, axis=1) + np.expand_dims(shifts, axis=0) # The shifted Cartesian coordinates. Shape is (number of atoms, number of combinations in combos, 3)
|
|
682
|
+
|
|
683
|
+
# The distances between atoms, across different crystal cell shifts, for the three Cartesian dimensions.
|
|
684
|
+
dist = np.expand_dims(np.expand_dims(cart_coords[index_of_interest], axis=0), axis=0) - shifted # Shape is (number of atoms, number of combinations in combos, 3)
|
|
685
|
+
# The shape of np.expand_dims(np.expand_dims(cart_coords[index_of_interest], axis=0), axis=0) is (1, 1, 3). These are the coordinates of the atom of interest.
|
|
686
|
+
# numpy subtraction expands out the axes of length one for the subtraction.
|
|
687
|
+
|
|
688
|
+
# The standard distance formula of square root of x^2 + y^2 + z^2
|
|
689
|
+
dist = np.sqrt(np.sum(np.square(dist), axis=-1)) # Shape is (number of atoms, number of combinations in combos)
|
|
690
|
+
|
|
691
|
+
# Want the atom that is closest to index_of_interest, given the ideal shift
|
|
692
|
+
# Don't want to consider distance of atom of interest to itself, so I eliminate it from consideration this way.
|
|
693
|
+
dist[index_of_interest,:] = np.array([np.Inf]*np.shape(dist)[1])
|
|
694
|
+
# Find the index of the closest atom.
|
|
695
|
+
index_nearest_atom = np.argmin(dist)
|
|
696
|
+
index_nearest_atom = np.unravel_index(index_nearest_atom, np.shape(dist)) # This is (atom index, shift index)
|
|
697
|
+
|
|
698
|
+
# Get the Cartesian coordinates of the nearest atom
|
|
699
|
+
nearest_position = shifted[index_nearest_atom[0], index_nearest_atom[1], :]
|
|
700
|
+
nearest_index = index_nearest_atom[0]
|
|
701
|
+
shift_for_nearest_atom = combos[index_nearest_atom[1],:]
|
|
702
|
+
|
|
703
|
+
return nearest_position, nearest_index, shift_for_nearest_atom
|
|
704
|
+
|
|
705
|
+
def make_graph_from_nodes_edges(nodes, edges, attribs):
|
|
706
|
+
"""
|
|
707
|
+
TODO
|
|
708
|
+
|
|
709
|
+
Parameters
|
|
710
|
+
----------
|
|
711
|
+
TODO : TODO
|
|
712
|
+
TODO
|
|
713
|
+
TODO : TODO
|
|
714
|
+
TODO
|
|
715
|
+
TODO : TODO
|
|
716
|
+
TODO
|
|
717
|
+
|
|
718
|
+
Returns
|
|
719
|
+
-------
|
|
720
|
+
TODO : TODO
|
|
721
|
+
TODO
|
|
722
|
+
TODO : TODO
|
|
723
|
+
TODO
|
|
724
|
+
TODO : TODO
|
|
725
|
+
TODO
|
|
726
|
+
|
|
727
|
+
"""
|
|
728
|
+
gr = nx.Graph()
|
|
729
|
+
[gr.add_node(n,atomicNum=at) for n,at in zip(nodes,attribs)]
|
|
730
|
+
#gr.add_nodes_from(nodes)
|
|
731
|
+
gr.add_edges_from(edges)
|
|
732
|
+
return gr
|
|
733
|
+
|
|
734
|
+
def mkcell(cpar):
|
|
735
|
+
"""
|
|
736
|
+
Update the cell representation to match the parameters.
|
|
737
|
+
|
|
738
|
+
Parameters
|
|
739
|
+
----------
|
|
740
|
+
cpar : numpy.ndarray
|
|
741
|
+
The parameters (i.e. lattice constants) of the MOF cell. Specifically, A, B, C, alpha, beta, and gamma. Shape is (6,).
|
|
742
|
+
|
|
743
|
+
Returns
|
|
744
|
+
-------
|
|
745
|
+
vectors : numpy.ndarray
|
|
746
|
+
The three Cartesian vectors representing the edges of the crystal cell. Shape is (3,3).
|
|
747
|
+
|
|
748
|
+
"""
|
|
749
|
+
|
|
750
|
+
a_mag, b_mag, c_mag = cpar[:3]
|
|
751
|
+
alpha, beta, gamma = [x * deg2rad for x in cpar[3:]] # Converting the angles to radians from degrees.
|
|
752
|
+
a_vec = np.array([a_mag, 0.0, 0.0]) # a_vec is taken to be along the x axis
|
|
753
|
+
b_vec = np.array([b_mag * np.cos(gamma), b_mag * np.sin(gamma), 0.0]) # See this depiction of lattice parameters for reasoning behind these equations. https://www.doitpoms.ac.uk/tlplib/crystallography3/parameters.php. b_vec is taken to be in the X-Y plane.
|
|
754
|
+
c_x = c_mag * np.cos(beta)
|
|
755
|
+
c_y = c_mag * (np.cos(alpha) - np.cos(gamma) * np.cos(beta)) / np.sin(gamma) # You have to use a matrix to convert. This is derived in most textbooks on crystallography, such as McKie & McKie 'Essentials of Crystallography'. https://chemistry.stackexchange.com/questions/136836/converting-fractional-coordinates-into-cartesian-coordinates-for-crystallography
|
|
756
|
+
c_vec = np.array([c_x, c_y, (c_mag**2 - c_x**2 - c_y**2)**0.5]) # c_x**2 + c_y**2 + c_z**2 = c_mag**2
|
|
757
|
+
vectors = np.array([a_vec, b_vec, c_vec])
|
|
758
|
+
return vectors
|
|
759
|
+
|
|
760
|
+
def make_supercell(cell, atoms, fcoords, exp_coeff):
|
|
761
|
+
"""
|
|
762
|
+
TODO
|
|
763
|
+
|
|
764
|
+
Parameters
|
|
765
|
+
----------
|
|
766
|
+
TODO : TODO
|
|
767
|
+
TODO
|
|
768
|
+
TODO : TODO
|
|
769
|
+
TODO
|
|
770
|
+
TODO : TODO
|
|
771
|
+
TODO
|
|
772
|
+
|
|
773
|
+
Returns
|
|
774
|
+
-------
|
|
775
|
+
TODO : TODO
|
|
776
|
+
TODO
|
|
777
|
+
TODO : TODO
|
|
778
|
+
TODO
|
|
779
|
+
TODO : TODO
|
|
780
|
+
TODO
|
|
781
|
+
|
|
782
|
+
"""
|
|
783
|
+
supercell = np.multiply(cell.T, exp_coeff).T
|
|
784
|
+
superatoms=[]
|
|
785
|
+
superfcoords=[]
|
|
786
|
+
for i in range(exp_coeff[0]):
|
|
787
|
+
for j in range(exp_coeff[1]):
|
|
788
|
+
for k in range(exp_coeff[2]):
|
|
789
|
+
for na,atom in enumerate(atoms):
|
|
790
|
+
fc=fcoords[na]
|
|
791
|
+
fx = fc[0]/exp_coeff[0] + float(i)/exp_coeff[0]
|
|
792
|
+
fy = fc[1]/exp_coeff[1] + float(j)/exp_coeff[1]
|
|
793
|
+
fz = fc[2]/exp_coeff[2] + float(k)/exp_coeff[2]
|
|
794
|
+
superfcoords.append([fx,fy,fz])
|
|
795
|
+
superatoms.append(atom)
|
|
796
|
+
superfcoords= np.array(superfcoords)
|
|
797
|
+
return supercell,superatoms,superfcoords
|
|
798
|
+
|
|
799
|
+
|
|
800
|
+
def compute_adj_matrix(distance_mat, allatomtypes, wiggle_room=1, handle_overlap=False):
|
|
801
|
+
"""
|
|
802
|
+
Calculates what atoms are bonded to each other.
|
|
803
|
+
|
|
804
|
+
Bonding is trickier in MOFs than in TM complexes due to metal-metal bonding, motivating the existence of this function
|
|
805
|
+
even though a similar one exists in mol3D.
|
|
806
|
+
|
|
807
|
+
Parameters
|
|
808
|
+
----------
|
|
809
|
+
distance_mat : numpy.ndarray
|
|
810
|
+
The distance of each atom to each other atom. Shape is (number of atoms, number of atoms).
|
|
811
|
+
allatomtypes : list of str
|
|
812
|
+
The atom types of the cif file, indicated by periodic symbols like 'O' and 'Cu'. Length is the number of atoms.
|
|
813
|
+
wiggle_room : float
|
|
814
|
+
A multiplier that allows for more or less strict bond distance cutoffs.
|
|
815
|
+
handle_overlap : bool
|
|
816
|
+
Indicates whether to provide diagnostics for atoms that are overlapping, or to just raise an error if it finds overlaps
|
|
817
|
+
|
|
818
|
+
Returns
|
|
819
|
+
-------
|
|
820
|
+
sparse.csr_matrix(adj_matrix) : scipy.sparse.csr.csr_matrix
|
|
821
|
+
Adjacency matrix. 1 represents a bond, 0 represents no bond. Shape is (number of atoms, number of atoms).
|
|
822
|
+
overlap_atoms : list
|
|
823
|
+
Indices of atoms that overlap with any atom of a lower index.
|
|
824
|
+
|
|
825
|
+
"""
|
|
826
|
+
|
|
827
|
+
overlap_atoms = []
|
|
828
|
+
adj_matrix=np.zeros(distance_mat.shape)
|
|
829
|
+
for i,e1 in enumerate(allatomtypes[:-1]): # Iterating through all pairs of atoms.
|
|
830
|
+
for j,e2 in enumerate(allatomtypes[i+1:]):
|
|
831
|
+
elements = set([e1, e2])
|
|
832
|
+
|
|
833
|
+
# In the context of sets, < means that all the items in the set elements is in the set metals, for example.
|
|
834
|
+
if (elements < metals): # FIXME no metal-metal bond allowed
|
|
835
|
+
continue
|
|
836
|
+
|
|
837
|
+
rad = (COVALENT_RADII[e1] + COVALENT_RADII[e2])
|
|
838
|
+
dist = distance_mat[i,i+j+1]
|
|
839
|
+
# check for atomic overlap:
|
|
840
|
+
if dist < min(COVALENT_RADII[e1] , COVALENT_RADII[e2]):
|
|
841
|
+
print(f"Atomic overlap involving atom {i} and {i+j+1}! Zero-indexed.")
|
|
842
|
+
print(f"dist is {dist} and the cutoff is {min(COVALENT_RADII[e1] , COVALENT_RADII[e2])}")
|
|
843
|
+
if handle_overlap:
|
|
844
|
+
# Check whether atom i is already in overlap_atoms and will be removed.
|
|
845
|
+
# If so, no need to remove an atom that overlaps with atom i.
|
|
846
|
+
if i not in overlap_atoms:
|
|
847
|
+
overlap_atoms.append(i+j+1) # The atom with index i+j+1 overlapped with another atom.
|
|
848
|
+
else:
|
|
849
|
+
print('Overlapping atoms! Error')
|
|
850
|
+
raise NotImplementedError # Exit the function.
|
|
851
|
+
tempsf = 0.9 # This is modified below under certain conditions, to account for looser or tigher bonding.
|
|
852
|
+
# There is probably a better way to fix these kinds of issues.
|
|
853
|
+
# In the context of sets, & is the intersection. If the intersection is null, the (&) expression is False.
|
|
854
|
+
if (set("F") < elements) and (elements & metals): # One of the members of elements is fluorine, and one is a metal.
|
|
855
|
+
tempsf = 0.8
|
|
856
|
+
if (set("C") < elements) and (elements & metals):
|
|
857
|
+
tempsf = 0.95
|
|
858
|
+
if (set("H") < elements) and (elements & metals) and (not elements & alkali):
|
|
859
|
+
tempsf = 0.75
|
|
860
|
+
|
|
861
|
+
if (set("O") < elements) and (elements & metals):
|
|
862
|
+
tempsf = 0.85
|
|
863
|
+
if (set("N") < elements) and (elements & metals):
|
|
864
|
+
tempsf = 0.82
|
|
865
|
+
# fix for water particle recognition.
|
|
866
|
+
if(set(["O", "H"]) <= elements):
|
|
867
|
+
tempsf = 0.8
|
|
868
|
+
# very specific fix for Michelle's amine appended MOF
|
|
869
|
+
if(set(["N","H"]) <= elements):
|
|
870
|
+
tempsf = 0.67
|
|
871
|
+
if(set(["Mg","N"]) <= elements):
|
|
872
|
+
tempsf = 0.80
|
|
873
|
+
if(set(["C","H"]) <= elements):
|
|
874
|
+
tempsf = 0.80
|
|
875
|
+
if(set(["K"]) <= elements):
|
|
876
|
+
tempsf = 0.95
|
|
877
|
+
if(lanthanides & elements):
|
|
878
|
+
tempsf = 0.95
|
|
879
|
+
if(elements ==set(["C"]) ):
|
|
880
|
+
tempsf = 0.85
|
|
881
|
+
if dist*tempsf < rad * wiggle_room: # and not (alkali & elements):
|
|
882
|
+
# Entering this if statement means there is a bond between the two atoms.
|
|
883
|
+
adj_matrix[i,i+j+1]=1
|
|
884
|
+
adj_matrix[i+j+1,i]=1
|
|
885
|
+
|
|
886
|
+
# Removing duplicates and sorting.
|
|
887
|
+
overlap_atoms = [*set(overlap_atoms)]
|
|
888
|
+
return sparse.csr_matrix(adj_matrix), overlap_atoms
|
|
889
|
+
|
|
890
|
+
|
|
891
|
+
|
|
892
|
+
def get_closed_subgraph(linkers, SBUlist, adj_matrix):
|
|
893
|
+
###############################################################################
|
|
894
|
+
# This part separates the linkers into their respective subgraphs #
|
|
895
|
+
# First element is the things you want to find subgraphs of. #
|
|
896
|
+
# If this is the linkers, you input that as the first. #
|
|
897
|
+
# If you input the SBU as the first, then you get the subgraphs of the SBU. #
|
|
898
|
+
# The second element tells you what part of the matrix is NOT what you want. #
|
|
899
|
+
# If we want subgraphs of linkers, we want to exclude the SBU. #
|
|
900
|
+
###############################################################################
|
|
901
|
+
"""
|
|
902
|
+
|
|
903
|
+
Parameters
|
|
904
|
+
----------
|
|
905
|
+
linkers : set of int
|
|
906
|
+
Indices corresponding to atoms in the linkers (or SBUs; see the summary part of this docstring) of the MOF. The part of the matrix to analyze.
|
|
907
|
+
SBUlist : set of numpy.int64
|
|
908
|
+
Indices corresponding to atoms in the SBUs (or linkers) of the MOF. The part of the matrix to ignore.
|
|
909
|
+
adj_matrix : scipy.sparse.csr.csr_matrix
|
|
910
|
+
Adjacency matrix. 1 represents a bond, 0 represents no bond. Shape is (number of atoms, number of atoms).
|
|
911
|
+
|
|
912
|
+
Returns
|
|
913
|
+
-------
|
|
914
|
+
linker_list : list of lists of ints
|
|
915
|
+
Each inner list is its own separate linker (or SBU). The ints are the atom indices of that linker (or SBU). Length is # of linkers (or SBUs).
|
|
916
|
+
linker_subgraphlist : list of scipy.sparse.csr.csr_matrix
|
|
917
|
+
The atom connections in the linker (or SBU) subgraph. Length is # of linkers (or SBUs).
|
|
918
|
+
|
|
919
|
+
"""
|
|
920
|
+
|
|
921
|
+
linkers_sub = linkers.copy()
|
|
922
|
+
linker_list = []
|
|
923
|
+
linker_subgraphlist = []
|
|
924
|
+
counter = 0
|
|
925
|
+
while len(linkers_sub)>0:
|
|
926
|
+
# Every time this while loop is entered, an entire linker will be identified.
|
|
927
|
+
counter += 1
|
|
928
|
+
if counter > 5000:
|
|
929
|
+
break
|
|
930
|
+
start_idx = list(linkers_sub)[0] # index of an atom belonging to the linkers
|
|
931
|
+
current_linker_list = set([start_idx]) # Linker atoms will be added to this set as they are discovered.
|
|
932
|
+
checked_list = set() # Will contain all of the indices that have already been tried as start_idx.
|
|
933
|
+
while len(checked_list) <= len(current_linker_list):
|
|
934
|
+
loop_over = np.nonzero(adj_matrix[start_idx])[1] # indices of atoms with bonds to the atom with the index start_idx
|
|
935
|
+
current_linker_list.update(loop_over)
|
|
936
|
+
current_linker_list = current_linker_list-SBUlist
|
|
937
|
+
checked_list.add(start_idx)
|
|
938
|
+
for val in loop_over:
|
|
939
|
+
if val not in SBUlist:
|
|
940
|
+
current_linker_list.update(np.nonzero(adj_matrix[val])[1]) # np.nonzero(adj_matrix[val])[1] are the indices of atoms with bonds to the atom with index val
|
|
941
|
+
left_to_check = current_linker_list-checked_list-SBUlist # Linker atoms whose connecting atoms still need to be checked.
|
|
942
|
+
if len(left_to_check) == 0:
|
|
943
|
+
break
|
|
944
|
+
else:
|
|
945
|
+
start_idx = list(left_to_check)[0] # update start_idx for the next pass through the while loop
|
|
946
|
+
current_linker_list = current_linker_list - SBUlist
|
|
947
|
+
linkers_sub = linkers_sub - current_linker_list
|
|
948
|
+
####### We want to return both the linker itself as well as the subgraph corresponding to it.
|
|
949
|
+
linker_list.append(list(current_linker_list))
|
|
950
|
+
linker_subgraphlist.append(adj_matrix[np.ix_(list(current_linker_list),list(current_linker_list))])
|
|
951
|
+
|
|
952
|
+
return linker_list, linker_subgraphlist
|
|
953
|
+
|
|
954
|
+
def include_extra_shells(SBUlists, subgraphlists, molcif, adjmat):
|
|
955
|
+
"""
|
|
956
|
+
Include extra atoms in the SBUs. One more shell.
|
|
957
|
+
|
|
958
|
+
Parameters
|
|
959
|
+
----------
|
|
960
|
+
SBUlists : list of lists of ints
|
|
961
|
+
Each inner list is its own separate SBU. The ints are the atom indices of that SBU. Length is # of SBUs.
|
|
962
|
+
subgraphlists : list of scipy.sparse.csr.csr_matrix
|
|
963
|
+
The atom connections in the SBU subgraph. Length is # of SBUs.
|
|
964
|
+
molcif : molSimplify.Classes.mol3D.mol3D
|
|
965
|
+
The cell of the cif file being analyzed.
|
|
966
|
+
adjmat : scipy.sparse.csr.csr_matrix
|
|
967
|
+
1 represents a bond, 0 represents no bond. Shape is (number of atoms, number of atoms).
|
|
968
|
+
|
|
969
|
+
Returns
|
|
970
|
+
-------
|
|
971
|
+
SBUs : list of lists of numpy.int64
|
|
972
|
+
The expanded atom indices of each SBU.
|
|
973
|
+
subgraphs : list of scipy.sparse.csr.csr_matrix
|
|
974
|
+
The atom bonding information of the SBUs in the variable `SBUs`. Which atoms are bonded to which.
|
|
975
|
+
|
|
976
|
+
"""
|
|
977
|
+
|
|
978
|
+
SBUs=[]
|
|
979
|
+
subgraphs=[]
|
|
980
|
+
for SBU in SBUlists:
|
|
981
|
+
for zero_first_shell in copy.deepcopy(SBU):
|
|
982
|
+
for val in molcif.getBondedAtomsSmart(zero_first_shell):
|
|
983
|
+
SBU.append(val) # Include in the SBU every atom that is bonded to the SBU
|
|
984
|
+
SBUset = set(SBU) # Removing duplicate atom indices.
|
|
985
|
+
SBUs.append(list(SBUset))
|
|
986
|
+
subgraphs.append(adjmat[np.ix_(list(SBUset),list(SBUset))])
|
|
987
|
+
|
|
988
|
+
return SBUs, subgraphs
|
|
989
|
+
|
|
990
|
+
def disorder_detector(name):
|
|
991
|
+
"""
|
|
992
|
+
Reads a cif file and returns information on which atoms have fractional occupancy.
|
|
993
|
+
|
|
994
|
+
Parameters
|
|
995
|
+
----------
|
|
996
|
+
name : str
|
|
997
|
+
The path of the cif file to be read.
|
|
998
|
+
|
|
999
|
+
Returns
|
|
1000
|
+
-------
|
|
1001
|
+
disordered_atom_indices : list of ints
|
|
1002
|
+
The indices of atoms with fractional occupancies.
|
|
1003
|
+
disordered_atom_types : list of str
|
|
1004
|
+
The elemental symbols of atoms with fractional occupancies.
|
|
1005
|
+
disordered_atom_occupancies : list of floats
|
|
1006
|
+
The fractional occupancies of the atoms with fractional occupancies.
|
|
1007
|
+
|
|
1008
|
+
"""
|
|
1009
|
+
with open(name , 'r', errors='ignore') as fi: # ignore takes care of unicode errors in some cifs
|
|
1010
|
+
EIF = fi.readlines()
|
|
1011
|
+
cond=False
|
|
1012
|
+
occupancy_index=False
|
|
1013
|
+
atom_props_count=0
|
|
1014
|
+
atomlines=[]
|
|
1015
|
+
for line in EIF:
|
|
1016
|
+
line_stripped=line.strip()
|
|
1017
|
+
if (not line) or line_stripped.startswith("#"):
|
|
1018
|
+
continue
|
|
1019
|
+
line_splitted=line.split()
|
|
1020
|
+
|
|
1021
|
+
if line_stripped.startswith("_atom") :
|
|
1022
|
+
|
|
1023
|
+
if line_stripped == "_atom_site_label" or line_stripped == '_atom_site_type_symbol':
|
|
1024
|
+
cond=True # We have entered the block with the desired atom information.
|
|
1025
|
+
# The reason for the or is that the order fo these lines can vary depending on cif
|
|
1026
|
+
if line_stripped == '_atom_site_type_symbol':
|
|
1027
|
+
type_index=atom_props_count
|
|
1028
|
+
elif line_stripped=="_atom_site_occupancy":
|
|
1029
|
+
occupancy_index=atom_props_count
|
|
1030
|
+
|
|
1031
|
+
if cond:
|
|
1032
|
+
atom_props_count+=1 # Another atom property in the block we are interested in.
|
|
1033
|
+
|
|
1034
|
+
elif cond:
|
|
1035
|
+
if len(line_splitted)==atom_props_count:
|
|
1036
|
+
atomlines.append(line)
|
|
1037
|
+
else:
|
|
1038
|
+
break # Don't need to keep looking through the file, since we've seen all the desired information for all atoms. We left the block.
|
|
1039
|
+
|
|
1040
|
+
|
|
1041
|
+
disordered_atom_indices = []
|
|
1042
|
+
disordered_atom_types = []
|
|
1043
|
+
disordered_atom_occupancies = []
|
|
1044
|
+
|
|
1045
|
+
if occupancy_index: # This means that occupancy information is available
|
|
1046
|
+
for idx, at in enumerate(atomlines): # Go through the lines of the cif with atom specific information. Atom by atom.
|
|
1047
|
+
ln=at.strip().split()
|
|
1048
|
+
|
|
1049
|
+
current_atom_occupancy = ln[occupancy_index].split('(')[0] # Excluding parentheses in order to convert to float.
|
|
1050
|
+
current_atom_occupancy = float(current_atom_occupancy)
|
|
1051
|
+
|
|
1052
|
+
if current_atom_occupancy != 1: # Disordered atom
|
|
1053
|
+
|
|
1054
|
+
disordered_atom_indices.append(idx)
|
|
1055
|
+
|
|
1056
|
+
ln[type_index] = ln[type_index].strip("_")
|
|
1057
|
+
at_type = ln[type_index]
|
|
1058
|
+
disordered_atom_types.append(at_type)
|
|
1059
|
+
|
|
1060
|
+
disordered_atom_occupancies.append(current_atom_occupancy)
|
|
1061
|
+
|
|
1062
|
+
return disordered_atom_indices, disordered_atom_types, disordered_atom_occupancies
|
|
1063
|
+
|
|
1064
|
+
def remove_duplicate_atoms(allatomtypes, fcoords):
|
|
1065
|
+
"""
|
|
1066
|
+
Removes any atoms that have the exact same coordinate as a lower index atom.
|
|
1067
|
+
This pops up after removing symmetry with Vesta. Symmetry removal helps the molSimplify code get connectivity right.
|
|
1068
|
+
|
|
1069
|
+
Parameters
|
|
1070
|
+
----------
|
|
1071
|
+
allatomtypes : list of str
|
|
1072
|
+
The atom types of the cif file, indicated by periodic symbols like 'O' and 'Cu'. Length is the number of atoms.
|
|
1073
|
+
fcoords : numpy.ndarray
|
|
1074
|
+
The fractional positions of the atoms of the cif file. Shape is (number of atoms, 3).
|
|
1075
|
+
|
|
1076
|
+
Returns
|
|
1077
|
+
-------
|
|
1078
|
+
allatomtypes_trim : list of str
|
|
1079
|
+
The atom types of the cif file, indicated by periodic symbols like 'O' and 'Cu'. Length is the number of atoms.
|
|
1080
|
+
All duplicate atoms removed.
|
|
1081
|
+
fcoords_trim : numpy.ndarray
|
|
1082
|
+
The fractional positions of the atoms of the cif file. Shape is (number of atoms, 3).
|
|
1083
|
+
All duplicate atoms removed.
|
|
1084
|
+
|
|
1085
|
+
"""
|
|
1086
|
+
|
|
1087
|
+
# Get the unique fractional coordinate 3-tuples.
|
|
1088
|
+
fcoords_trim, indices = np.unique(fcoords, axis=0, return_index=True)
|
|
1089
|
+
# Get the atom types of the unique fractional coordinates.
|
|
1090
|
+
allatomtypes_trim = [allatomtypes[_i] for _i in indices]
|
|
1091
|
+
|
|
1092
|
+
return allatomtypes_trim, fcoords_trim
|
|
1093
|
+
|
|
1094
|
+
def remove_undesired_atoms(undesired_indices, allatomtypes, fcoords):
|
|
1095
|
+
"""
|
|
1096
|
+
Takes a list of indices, and removes those elements from allatomtypes and fcoords.
|
|
1097
|
+
|
|
1098
|
+
Parameters
|
|
1099
|
+
----------
|
|
1100
|
+
undesired_indices : list
|
|
1101
|
+
The indices of the atoms to remove.
|
|
1102
|
+
allatomtypes : list of str
|
|
1103
|
+
The atom types of the cif file, indicated by periodic symbols like 'O' and 'Cu'. Length is the number of atoms.
|
|
1104
|
+
fcoords : numpy.ndarray
|
|
1105
|
+
The fractional positions of the atoms of the cif file. Shape is (number of atoms, 3).
|
|
1106
|
+
|
|
1107
|
+
Returns
|
|
1108
|
+
-------
|
|
1109
|
+
allatomtypes_trim : list of str
|
|
1110
|
+
The atom types of the cif file, indicated by periodic symbols like 'O' and 'Cu'. Length is the number of atoms.
|
|
1111
|
+
All undesired atoms removed.
|
|
1112
|
+
fcoords_trim : numpy.ndarray
|
|
1113
|
+
The fractional positions of the atoms of the cif file. Shape is (number of atoms, 3).
|
|
1114
|
+
All undesired atoms removed.
|
|
1115
|
+
|
|
1116
|
+
"""
|
|
1117
|
+
number_of_atoms = len(allatomtypes)
|
|
1118
|
+
desired_indices = [_i for _i in list(range(number_of_atoms)) if (_i not in undesired_indices)] # The indices we want to keep.
|
|
1119
|
+
allatomtypes_trim = [value for (_i, value) in enumerate(allatomtypes) if (_i in desired_indices)]
|
|
1120
|
+
fcoords_trim = fcoords[desired_indices]
|
|
1121
|
+
|
|
1122
|
+
return allatomtypes_trim, fcoords_trim
|
|
1123
|
+
|
|
1124
|
+
|
|
1125
|
+
def overlap_removal(cif_path, new_cif_path):
|
|
1126
|
+
"""
|
|
1127
|
+
Reads a cif file, removes overlapping atoms, and writes the cif to the provided path.
|
|
1128
|
+
For a new CIF, recommended to remove symmetry (either with Vesta or with get_primitive), then run overlap_removal, then run solvent_removal.
|
|
1129
|
+
|
|
1130
|
+
Parameters
|
|
1131
|
+
----------
|
|
1132
|
+
cif_path : str
|
|
1133
|
+
The path of the cif file to be read.
|
|
1134
|
+
new_cif_path : str
|
|
1135
|
+
The path to which the modified cif file will be written.
|
|
1136
|
+
|
|
1137
|
+
Returns
|
|
1138
|
+
-------
|
|
1139
|
+
None
|
|
1140
|
+
|
|
1141
|
+
"""
|
|
1142
|
+
|
|
1143
|
+
# Much of this code parallels that in the beginning of the MOF_descriptors.get_MOF_descriptors function
|
|
1144
|
+
|
|
1145
|
+
# Loading the cif and getting information about the crystal cell.
|
|
1146
|
+
cpar, allatomtypes, fcoords = readcif(cif_path)
|
|
1147
|
+
allatomtypes, fcoords = remove_duplicate_atoms(allatomtypes, fcoords)
|
|
1148
|
+
cell_v = mkcell(cpar)
|
|
1149
|
+
cart_coords = fractional2cart(fcoords, cell_v)
|
|
1150
|
+
# if len(cart_coords) > 2000: # Don't deal with large cifs because of computational resources required for their treatment.
|
|
1151
|
+
# raise Exception("Too large of a cif file")
|
|
1152
|
+
|
|
1153
|
+
# Assuming that the cif does not have graph information of the structure.
|
|
1154
|
+
distance_mat = compute_distance_matrix3(cell_v,cart_coords)
|
|
1155
|
+
adj_matrix, overlap_atoms = compute_adj_matrix(distance_mat, allatomtypes, handle_overlap=True)
|
|
1156
|
+
|
|
1157
|
+
# Dealing with the case of overlapping atoms.
|
|
1158
|
+
if len(overlap_atoms) != 0:
|
|
1159
|
+
print('Dealing with overlap')
|
|
1160
|
+
allatomtypes, fcoords = remove_undesired_atoms(overlap_atoms, allatomtypes, fcoords)
|
|
1161
|
+
|
|
1162
|
+
# Writing the cif files
|
|
1163
|
+
write_cif(new_cif_path,cpar,fcoords,allatomtypes)
|
|
1164
|
+
|
|
1165
|
+
def solvent_removal(cif_path, new_cif_path, wiggle_room=1):
|
|
1166
|
+
"""
|
|
1167
|
+
Reads a cif file, removes floating solvent atoms, and writes the cif to the provided path.
|
|
1168
|
+
Assumes cif has P1 symmetry.
|
|
1169
|
+
|
|
1170
|
+
Parameters
|
|
1171
|
+
----------
|
|
1172
|
+
cif_path : str
|
|
1173
|
+
The path of the cif file to be read.
|
|
1174
|
+
new_cif_path : str
|
|
1175
|
+
The path to which the modified cif file will be written.
|
|
1176
|
+
wiggle_room : float
|
|
1177
|
+
A multiplier that allows for more or less strict bond distance cutoffs.
|
|
1178
|
+
Useful for some trouble CIFs with long bonds.
|
|
1179
|
+
|
|
1180
|
+
Returns
|
|
1181
|
+
-------
|
|
1182
|
+
None
|
|
1183
|
+
|
|
1184
|
+
"""
|
|
1185
|
+
|
|
1186
|
+
# Much of this code parallels that in the beginning of the MOF_descriptors.get_MOF_descriptors function
|
|
1187
|
+
|
|
1188
|
+
# Loading the cif and getting information about the crystal cell.
|
|
1189
|
+
cpar, allatomtypes, fcoords = readcif(cif_path)
|
|
1190
|
+
cell_v = mkcell(cpar)
|
|
1191
|
+
cart_coords = fractional2cart(fcoords, cell_v)
|
|
1192
|
+
# if len(cart_coords) > 2000: # Don't deal with large cifs because of computational resources required for their treatment.
|
|
1193
|
+
# raise Exception("Too large of a cif file")
|
|
1194
|
+
|
|
1195
|
+
# Assuming that the cif does not have graph information of the structure.
|
|
1196
|
+
distance_mat = compute_distance_matrix3(cell_v,cart_coords)
|
|
1197
|
+
try:
|
|
1198
|
+
adj_matrix, _ = compute_adj_matrix(distance_mat, allatomtypes, wiggle_room=wiggle_room, handle_overlap=False)
|
|
1199
|
+
except NotImplementedError:
|
|
1200
|
+
raise Exception("Failed due to atomic overlap")
|
|
1201
|
+
|
|
1202
|
+
# Getting the adjacency matrix (bond information).
|
|
1203
|
+
adj_matrix = sparse.csr_matrix(adj_matrix)
|
|
1204
|
+
molcif,_,_,_,_ = import_from_cif(cif_path, True) # molcif is a mol3D class of a single unit cell (or the cell of the cif file)
|
|
1205
|
+
molcif.graph = adj_matrix.todense()
|
|
1206
|
+
|
|
1207
|
+
# Finding the connected components
|
|
1208
|
+
n_components, labels_components = sparse.csgraph.connected_components(csgraph=adj_matrix, directed=False, return_labels=True)
|
|
1209
|
+
print(f'n_components: {n_components}')
|
|
1210
|
+
print(f'labels_components: {labels_components}')
|
|
1211
|
+
print(f'len is {len(labels_components)}')
|
|
1212
|
+
metal_list = set([at for at in molcif.findMetal(transition_metals_only=False)]) # the atom indices of the metals
|
|
1213
|
+
if not len(metal_list) > 0:
|
|
1214
|
+
raise Exception("No metal in the structure.")
|
|
1215
|
+
|
|
1216
|
+
solvent_indices = [] # This list will be filled in with the indices of solvent atoms.
|
|
1217
|
+
|
|
1218
|
+
for comp in range(n_components):
|
|
1219
|
+
inds_in_comp = [i for i in range(len(labels_components)) if labels_components[i]==comp]
|
|
1220
|
+
if not set(inds_in_comp) & metal_list: # In the context of sets, & is the intersection. If the intersection is null, the (&) expression is False; the `not` would then make it True.
|
|
1221
|
+
# If this if statement is entered, there is an entire connected component that has no metals in it. No connections to any metal. I.e. solvent.
|
|
1222
|
+
solvent_indices.extend(inds_in_comp)
|
|
1223
|
+
|
|
1224
|
+
# Removing the atoms corresponding to the solvent.
|
|
1225
|
+
allatomtypes, fcoords = remove_undesired_atoms(solvent_indices, allatomtypes, fcoords)
|
|
1226
|
+
|
|
1227
|
+
# print(f'The solvent indices are {solvent_indices}')
|
|
1228
|
+
|
|
1229
|
+
# Writing the cif files
|
|
1230
|
+
write_cif(new_cif_path,cpar,fcoords,allatomtypes)
|
|
1231
|
+
|
|
1232
|
+
|
|
1233
|
+
|
|
1234
|
+
|
|
1235
|
+
|
|
1236
|
+
|
|
1237
|
+
##### Deprecated #####
|
|
1238
|
+
|
|
1239
|
+
# The functions compute_distance_matrix, compute_distance_matrix2, and compute_distance_matrix3 all do the same thing.
|
|
1240
|
+
# However, compute_distance_matrix3 is significantly faster than compute_distance_matrix2, which in turn is faster than compute_distance_matrix.
|
|
1241
|
+
# This is due to the use of for loops in compute_distance_matrix and compute_distance_matrix2, versus the vectorized (pre-compiled C code) numpy functions in compute_distance_matrix3.
|
|
1242
|
+
|
|
1243
|
+
def compute_distance_matrix(cell, cart_coords):
|
|
1244
|
+
"""
|
|
1245
|
+
Computes the pairwise distances between all atom pairs in the crystal cell. First version of this function.
|
|
1246
|
+
|
|
1247
|
+
Parameters
|
|
1248
|
+
----------
|
|
1249
|
+
cell : numpy.ndarray
|
|
1250
|
+
The three Cartesian vectors representing the edges of the crystal cell. Shape is (3,3).
|
|
1251
|
+
cart_coords : numpy.ndarray
|
|
1252
|
+
The Cartesian coordinates of the crystal atoms. Shape is (number of atoms, 3).
|
|
1253
|
+
|
|
1254
|
+
Returns
|
|
1255
|
+
-------
|
|
1256
|
+
distance_matrix : numpy.ndarray
|
|
1257
|
+
The distance of each atom to each other atom. Shape is (number of atoms, number of atoms).
|
|
1258
|
+
|
|
1259
|
+
"""
|
|
1260
|
+
distance_matrix=np.zeros([len(cart_coords),len(cart_coords)]) # This array will be filled in.
|
|
1261
|
+
for i in range(len(cart_coords)): # Looping through all combinations of atoms.
|
|
1262
|
+
for j in range(i+1,len(cart_coords)):
|
|
1263
|
+
d=min_img_distance(cart_coords[i],cart_coords[j],cell)
|
|
1264
|
+
distance_matrix[i,j]=d # Filling in the distance numpy array.
|
|
1265
|
+
distance_matrix[j,i]=d
|
|
1266
|
+
|
|
1267
|
+
return distance_matrix
|
|
1268
|
+
|
|
1269
|
+
def min_img_distance(coords1, coords2, cell):
|
|
1270
|
+
"""
|
|
1271
|
+
Calculates the distance between two atoms specified by coords1 and coords2.
|
|
1272
|
+
The minimum image distance is taken, meaning the shortest distance between the two atoms with consideration of the repeating periodic structure of the MOF.
|
|
1273
|
+
|
|
1274
|
+
Parameters
|
|
1275
|
+
----------
|
|
1276
|
+
coords1 : numpy.ndarray
|
|
1277
|
+
The Cartesian coordinates of the first atom under consideration. Shape is (3,).
|
|
1278
|
+
coords2 : numpy.ndarray
|
|
1279
|
+
The Cartesian coordinates of the second atom under consideration. Shape is (3,).
|
|
1280
|
+
cell : numpy.ndarray
|
|
1281
|
+
The three Cartesian vectors representing the edges of the crystal cell. Shape is (3,3).
|
|
1282
|
+
|
|
1283
|
+
Returns
|
|
1284
|
+
-------
|
|
1285
|
+
np.linalg.norm(four) : numpy.float64
|
|
1286
|
+
The distance between the two atoms.
|
|
1287
|
+
|
|
1288
|
+
"""
|
|
1289
|
+
invcell=np.linalg.inv(cell) # The inverse cell parameters
|
|
1290
|
+
one = np.dot(coords1,invcell) % 1 # Fractional coordinates. % is modulo.
|
|
1291
|
+
two = np.dot(coords2,invcell) % 1 # Fractional coordinates.
|
|
1292
|
+
three = np.around(one - two) # numpy array of three entries. Possible values of entries are -1, 0, and 1. Corresponds to the crystal cell shift that gets the two atoms the closest.
|
|
1293
|
+
four = np.dot(one - two - three, cell) # Converting back to Cartesian coordinates from fractional.
|
|
1294
|
+
return np.linalg.norm(four)
|
|
1295
|
+
|
|
1296
|
+
def compute_distance_matrix2(cell, cart_coords):
|
|
1297
|
+
"""
|
|
1298
|
+
Computes the pairwise distances between all atom pairs in the crystal cell. Second version of this function.
|
|
1299
|
+
|
|
1300
|
+
Parameters
|
|
1301
|
+
----------
|
|
1302
|
+
cell : numpy.ndarray
|
|
1303
|
+
The three Cartesian vectors representing the edges of the crystal cell. Shape is (3,3).
|
|
1304
|
+
cart_coords : numpy.ndarray
|
|
1305
|
+
The Cartesian coordinates of the crystal atoms. Shape is (number of atoms, 3).
|
|
1306
|
+
|
|
1307
|
+
Returns
|
|
1308
|
+
-------
|
|
1309
|
+
distance_matrix : numpy.ndarray
|
|
1310
|
+
The distance of each atom to each other atom. Shape is (number of atoms, number of atoms).
|
|
1311
|
+
|
|
1312
|
+
"""
|
|
1313
|
+
distance_matrix=np.zeros([len(cart_coords),len(cart_coords)]) # This array will be filled in.
|
|
1314
|
+
for i in range(len(cart_coords)): # Looping through all combinations of atoms.
|
|
1315
|
+
for j in range(i+1,len(cart_coords)):
|
|
1316
|
+
d=min_img_distance2(cart_coords[i],cart_coords[j],cell)
|
|
1317
|
+
distance_matrix[i,j]=d # Filling in the distance numpy array.
|
|
1318
|
+
distance_matrix[j,i]=d
|
|
1319
|
+
|
|
1320
|
+
return distance_matrix
|
|
1321
|
+
|
|
1322
|
+
def min_img_distance2(coords1, coords2, cell):
|
|
1323
|
+
"""
|
|
1324
|
+
Calculates the distance between two atoms specified by coords1 and coords2.
|
|
1325
|
+
The minimum image distance is taken, meaning the shortest distance between the two atoms with consideration of the repeating periodic structure of the MOF.
|
|
1326
|
+
|
|
1327
|
+
Parameters
|
|
1328
|
+
----------
|
|
1329
|
+
coords1 : numpy.ndarray
|
|
1330
|
+
The Cartesian coordinates of the first atom under consideration. Shape is (3,).
|
|
1331
|
+
coords2 : numpy.ndarray
|
|
1332
|
+
The Cartesian coordinates of the second atom under consideration. Shape is (3,).
|
|
1333
|
+
cell : numpy.ndarray
|
|
1334
|
+
The three Cartesian vectors representing the edges of the crystal cell. Shape is (3,3).
|
|
1335
|
+
|
|
1336
|
+
Returns
|
|
1337
|
+
-------
|
|
1338
|
+
np.amin(dists) : numpy.float64
|
|
1339
|
+
The distance between the two atoms.
|
|
1340
|
+
|
|
1341
|
+
"""
|
|
1342
|
+
invcell=np.linalg.inv(cell) # The inverse cell parameters
|
|
1343
|
+
supercells = np.array(list(itertools.product((-1, 0, 1), repeat=3))) # 27 possible crystal cell shifts.
|
|
1344
|
+
fcoords = np.dot(coords2,invcell) + supercells # Many different versions of coords2, shifted different linear combinations of the crystal cell vectors.
|
|
1345
|
+
coords = np.array([np.dot(j,cell) for j in fcoords]) # Converting to Cartesian coordinates.
|
|
1346
|
+
dists = distance.cdist([coords1], coords) # Euclidean distance
|
|
1347
|
+
return np.amin(dists) # Take the minimum, corresponding to the distance between the two atoms at their closest, when considering the periodic structure of a MOF.
|