molSimplify 1.7.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docs/source/conf.py +224 -0
- molSimplify/Classes/__init__.py +6 -0
- molSimplify/Classes/atom3D.py +235 -0
- molSimplify/Classes/dft_obs.py +130 -0
- molSimplify/Classes/globalvars.py +827 -0
- molSimplify/Classes/helpers.py +161 -0
- molSimplify/Classes/ligand.py +2330 -0
- molSimplify/Classes/mGUI.py +2493 -0
- molSimplify/Classes/mWidgets.py +438 -0
- molSimplify/Classes/miniGUI.py +41 -0
- molSimplify/Classes/mol2D.py +260 -0
- molSimplify/Classes/mol3D.py +5846 -0
- molSimplify/Classes/monomer3D.py +253 -0
- molSimplify/Classes/partialcharges.py +226 -0
- molSimplify/Classes/protein3D.py +1178 -0
- molSimplify/Classes/rundiag.py +151 -0
- molSimplify/Data/ML.dat +212 -0
- molSimplify/Data/MLS_FSR_for_inter.dat +23 -0
- molSimplify/Data/MLS_FSR_for_inter2.dat +23 -0
- molSimplify/Data/MLS_angle_for_click.dat +8 -0
- molSimplify/Data/MLS_angle_for_inter.dat +23 -0
- molSimplify/Data/MLS_angle_for_inter2.dat +48 -0
- molSimplify/Data/MLS_angle_for_intra.dat +10 -0
- molSimplify/Data/MLS_angle_for_intra2.dat +6 -0
- molSimplify/Data/MLS_angle_for_oa.dat +18 -0
- molSimplify/Data/ML_FSR_for_inter.dat +112 -0
- molSimplify/Data/ML_FSR_for_inter2.dat +110 -0
- molSimplify/Data/ML_bond_for_cat.dat +8 -0
- molSimplify/Data/ML_bond_for_click.dat +8 -0
- molSimplify/Data/ML_bond_for_inter.dat +48 -0
- molSimplify/Data/ML_bond_for_inter2.dat +48 -0
- molSimplify/Data/ML_bond_for_intra.dat +10 -0
- molSimplify/Data/ML_bond_for_intra2.dat +6 -0
- molSimplify/Data/ML_bond_for_oa.dat +18 -0
- molSimplify/Data/bp1.dat +21 -0
- molSimplify/Data/li.dat +3 -0
- molSimplify/Data/no.dat +2 -0
- molSimplify/Data/oct.dat +7 -0
- molSimplify/Data/pbp.dat +8 -0
- molSimplify/Data/spy.dat +6 -0
- molSimplify/Data/sqap.dat +9 -0
- molSimplify/Data/sqp.dat +5 -0
- molSimplify/Data/tbp.dat +6 -0
- molSimplify/Data/tdhd.dat +9 -0
- molSimplify/Data/thd.dat +5 -0
- molSimplify/Data/tpl.dat +4 -0
- molSimplify/Data/tpr.dat +7 -0
- molSimplify/Informatics/HFXsensitivity/__init__.py +0 -0
- molSimplify/Informatics/HFXsensitivity/measure_HFX_sensitivity_oxo_hat_reb_rel.py +443 -0
- molSimplify/Informatics/HFXsensitivity/measure_HFX_stable.py +346 -0
- molSimplify/Informatics/MOF/Linker_rotation.py +179 -0
- molSimplify/Informatics/MOF/MOF_descriptors.py +1299 -0
- molSimplify/Informatics/MOF/MOF_descriptors_alternate_functional.py +589 -0
- molSimplify/Informatics/MOF/MOF_functionalizer.py +1648 -0
- molSimplify/Informatics/MOF/PBC_functions.py +1347 -0
- molSimplify/Informatics/MOF/__init__.py +0 -0
- molSimplify/Informatics/MOF/atomic.py +267 -0
- molSimplify/Informatics/MOF/cluster_extraction.py +388 -0
- molSimplify/Informatics/MOF/fragment_MOFs_for_pormake.py +895 -0
- molSimplify/Informatics/MOF/monofunctionalized_BDC/index_information.py +10 -0
- molSimplify/Informatics/Mol2Parser.py +46 -0
- molSimplify/Informatics/RACassemble.py +408 -0
- molSimplify/Informatics/__init__.py +0 -0
- molSimplify/Informatics/active_learning/__init__.py +0 -0
- molSimplify/Informatics/active_learning/expected_improvement.py +269 -0
- molSimplify/Informatics/autocorrelation.py +1930 -0
- molSimplify/Informatics/clean_autocorrelation.py +778 -0
- molSimplify/Informatics/coulomb_analyze.py +67 -0
- molSimplify/Informatics/decoration_manager.py +193 -0
- molSimplify/Informatics/geo_analyze.py +88 -0
- molSimplify/Informatics/geometrics.py +56 -0
- molSimplify/Informatics/graph_analyze.py +163 -0
- molSimplify/Informatics/graph_racs.py +288 -0
- molSimplify/Informatics/jupyter_vis.py +172 -0
- molSimplify/Informatics/lacRACAssemble.py +2192 -0
- molSimplify/Informatics/lacRACAssemble_bisdithiolenes.py +236 -0
- molSimplify/Informatics/misc_descriptors.py +198 -0
- molSimplify/Informatics/organic_fingerprints.py +61 -0
- molSimplify/Informatics/partialcharges.py +345 -0
- molSimplify/Informatics/protein/activesite.py +53 -0
- molSimplify/Informatics/protein/pymol_add_hs.py +33 -0
- molSimplify/Informatics/rac155_geo.py +48 -0
- molSimplify/Ligands/(1_methylbenzimidazol_2_yl)pyridine.xyz +45 -0
- molSimplify/Ligands/1-4-dimethyl-1-2-3-triazole.xyz +15 -0
- molSimplify/Ligands/12crown4.mol +62 -0
- molSimplify/Ligands/Antipyrine.mol +58 -0
- molSimplify/Ligands/BPAbipy.mol +106 -0
- molSimplify/Ligands/Hpyrrole.mol +26 -0
- molSimplify/Ligands/N-quinolinylbutyramidate.xyz +31 -0
- molSimplify/Ligands/N-quinolinylmethylmethinylacetamidate.xyz +30 -0
- molSimplify/Ligands/NMe2_-1.xyz +11 -0
- molSimplify/Ligands/PCy3.mol +111 -0
- molSimplify/Ligands/PMe3.xyz +15 -0
- molSimplify/Ligands/PPh3.mol +76 -0
- molSimplify/Ligands/Propyphenazone.mol +77 -0
- molSimplify/Ligands/acac.mol +33 -0
- molSimplify/Ligands/acacen.mol +76 -0
- molSimplify/Ligands/acetate.smi +1 -0
- molSimplify/Ligands/acetate.xyz +9 -0
- molSimplify/Ligands/aceticacidbipyridine.mol +70 -0
- molSimplify/Ligands/acetonitrile.mol +17 -0
- molSimplify/Ligands/alanine.mol +30 -0
- molSimplify/Ligands/alphabetizer.py +21 -0
- molSimplify/Ligands/amine.mol +11 -0
- molSimplify/Ligands/ammonia.mol +12 -0
- molSimplify/Ligands/arginine.mol +58 -0
- molSimplify/Ligands/asparagine.mol +38 -0
- molSimplify/Ligands/aspartic_acid.mol +35 -0
- molSimplify/Ligands/azide.mol +11 -0
- molSimplify/Ligands/benzene.mol +28 -0
- molSimplify/Ligands/benzene_pi.mol +30 -0
- molSimplify/Ligands/benzenedithiol.mol +30 -0
- molSimplify/Ligands/benzenethiol.mol +30 -0
- molSimplify/Ligands/benzylisocy.mol +38 -0
- molSimplify/Ligands/bidiazine.mol +42 -0
- molSimplify/Ligands/bidiazole.mol +38 -0
- molSimplify/Ligands/bifuran.mol +38 -0
- molSimplify/Ligands/bihydrodiazine.mol +58 -0
- molSimplify/Ligands/bihydrodiazole.mol +46 -0
- molSimplify/Ligands/bihydrooxazine.mol +54 -0
- molSimplify/Ligands/bihydrooxazole.mol +42 -0
- molSimplify/Ligands/bihydrothiazine.mol +54 -0
- molSimplify/Ligands/bihydrothiazole.mol +42 -0
- molSimplify/Ligands/biimidazole.mol +38 -0
- molSimplify/Ligands/bioxazole.mol +34 -0
- molSimplify/Ligands/bipy.mol +46 -0
- molSimplify/Ligands/bipyrazine.xyz +20 -0
- molSimplify/Ligands/bipyrimidine.mol +42 -0
- molSimplify/Ligands/bipyrrole.mol +42 -0
- molSimplify/Ligands/bisnapthyridylpyridine.mol +111 -0
- molSimplify/Ligands/bithiazole.mol +34 -0
- molSimplify/Ligands/bromide.mol +7 -0
- molSimplify/Ligands/bromide.smi +1 -0
- molSimplify/Ligands/c2.mol +9 -0
- molSimplify/Ligands/caprolactone.mol +41 -0
- molSimplify/Ligands/carbonyl.mol +8 -0
- molSimplify/Ligands/carboxyl.mol +13 -0
- molSimplify/Ligands/cat.mol +30 -0
- molSimplify/Ligands/chloride.mol +7 -0
- molSimplify/Ligands/chloride.smi +1 -0
- molSimplify/Ligands/chloropyridine.mol +27 -0
- molSimplify/Ligands/co2.mol +10 -0
- molSimplify/Ligands/corrolazine.mol +72 -0
- molSimplify/Ligands/cs.mol +8 -0
- molSimplify/Ligands/cyanate.xyz +5 -0
- molSimplify/Ligands/cyanide.mol +9 -0
- molSimplify/Ligands/cyanoaceticporphyrin.mol +114 -0
- molSimplify/Ligands/cyanopyridine.mol +29 -0
- molSimplify/Ligands/cyclam.mol +81 -0
- molSimplify/Ligands/cyclen.mol +69 -0
- molSimplify/Ligands/cyclopentadienyl.mol +26 -0
- molSimplify/Ligands/cysteine.mol +32 -0
- molSimplify/Ligands/diaminomethyl.mol +19 -0
- molSimplify/Ligands/diazine.mol +25 -0
- molSimplify/Ligands/diazole.mol +23 -0
- molSimplify/Ligands/dicyanamide.mol +15 -0
- molSimplify/Ligands/dihydrofuran.mol +27 -0
- molSimplify/Ligands/dmap.xyz +35 -0
- molSimplify/Ligands/dmf.mol +28 -0
- molSimplify/Ligands/dmi.mol +41 -0
- molSimplify/Ligands/dmpe.mol +52 -0
- molSimplify/Ligands/dpmu.mol +47 -0
- molSimplify/Ligands/dppe.mol +112 -0
- molSimplify/Ligands/edta.mol +69 -0
- molSimplify/Ligands/en.mol +28 -0
- molSimplify/Ligands/ethanethiol.mol +21 -0
- molSimplify/Ligands/ethanolamine.mol +26 -0
- molSimplify/Ligands/ethbipy.mol +70 -0
- molSimplify/Ligands/ethyl.mol +19 -0
- molSimplify/Ligands/ethylamine.mol +24 -0
- molSimplify/Ligands/ethylene.mol +16 -0
- molSimplify/Ligands/ethylesteracac.mol +57 -0
- molSimplify/Ligands/fluoride.mol +7 -0
- molSimplify/Ligands/fluoride.smi +1 -0
- molSimplify/Ligands/formaldehyde.mol +12 -0
- molSimplify/Ligands/formamidate.xyz +8 -0
- molSimplify/Ligands/formate.xyz +6 -0
- molSimplify/Ligands/furan.mol +23 -0
- molSimplify/Ligands/glutamic_acid.mol +42 -0
- molSimplify/Ligands/glutamine.mol +44 -0
- molSimplify/Ligands/glycinate.mol +23 -0
- molSimplify/Ligands/glycine.mol +24 -0
- molSimplify/Ligands/h2s.mol +10 -0
- molSimplify/Ligands/helium.mol +6 -0
- molSimplify/Ligands/histidine.mol +45 -0
- molSimplify/Ligands/hmpa.mol +62 -0
- molSimplify/Ligands/hs-.mol +9 -0
- molSimplify/Ligands/hydride.mol +7 -0
- molSimplify/Ligands/hydrocarboxyacetylide.xyz +8 -0
- molSimplify/Ligands/hydrocyanide.mol +10 -0
- molSimplify/Ligands/hydrodiazine.mol +33 -0
- molSimplify/Ligands/hydrodiazole.mol +27 -0
- molSimplify/Ligands/hydrogensulfide.mol +10 -0
- molSimplify/Ligands/hydroisocyanide.mol +11 -0
- molSimplify/Ligands/hydrooxazine.mol +31 -0
- molSimplify/Ligands/hydrooxazole.mol +25 -0
- molSimplify/Ligands/hydrothiazine.mol +31 -0
- molSimplify/Ligands/hydrothiazole.mol +25 -0
- molSimplify/Ligands/hydroxyl.mol +9 -0
- molSimplify/Ligands/imidazole.mol +23 -0
- molSimplify/Ligands/imidazolidinone.mol +29 -0
- molSimplify/Ligands/imine.mol +13 -0
- molSimplify/Ligands/iminodiacetic.mol +33 -0
- molSimplify/Ligands/iodide.mol +7 -0
- molSimplify/Ligands/iodobenzene.xyz +14 -0
- molSimplify/Ligands/isoleucine.mol +48 -0
- molSimplify/Ligands/isothiocyanate.mol +11 -0
- molSimplify/Ligands/leucine.mol +48 -0
- molSimplify/Ligands/ligands.dict +257 -0
- molSimplify/Ligands/lysine.mol +54 -0
- molSimplify/Ligands/mebenzenedithiol.mol +36 -0
- molSimplify/Ligands/mebim_py.xyz +29 -0
- molSimplify/Ligands/mebim_pz.xyz +28 -0
- molSimplify/Ligands/mebipy.mol +58 -0
- molSimplify/Ligands/mecat.mol +36 -0
- molSimplify/Ligands/methanal.mol +11 -0
- molSimplify/Ligands/methanethiol.mol +15 -0
- molSimplify/Ligands/methanol.mol +16 -0
- molSimplify/Ligands/methionine.mol +44 -0
- molSimplify/Ligands/methyl.mol +13 -0
- molSimplify/Ligands/methylacetylide.xyz +8 -0
- molSimplify/Ligands/methylamine.mol +19 -0
- molSimplify/Ligands/methylazide.xyz +9 -0
- molSimplify/Ligands/methylisocy.mol +17 -0
- molSimplify/Ligands/methylpyridine.mol +33 -0
- molSimplify/Ligands/n2.mol +8 -0
- molSimplify/Ligands/n4py.xyz +51 -0
- molSimplify/Ligands/nch.mol +10 -0
- molSimplify/Ligands/nco-.mol +11 -0
- molSimplify/Ligands/nethanolamine.mol +26 -0
- molSimplify/Ligands/nitrate.mol +14 -0
- molSimplify/Ligands/nitrite.mol +11 -0
- molSimplify/Ligands/nitro.mol +11 -0
- molSimplify/Ligands/nitrobipy.mol +54 -0
- molSimplify/Ligands/nitroso.mol +8 -0
- molSimplify/Ligands/nme3.mol +30 -0
- molSimplify/Ligands/no-.mol +10 -0
- molSimplify/Ligands/no2-.mol +11 -0
- molSimplify/Ligands/noxygen.mol +8 -0
- molSimplify/Ligands/ns-.mol +10 -0
- molSimplify/Ligands/o-pyridylbenzene.xyz +23 -0
- molSimplify/Ligands/o-pyridylphenylanion.xyz +22 -0
- molSimplify/Ligands/o2-.mol +9 -0
- molSimplify/Ligands/o2.xyz +4 -0
- molSimplify/Ligands/och2.mol +12 -0
- molSimplify/Ligands/oethanolamine.mol +26 -0
- molSimplify/Ligands/ome2.mol +22 -0
- molSimplify/Ligands/ooh.xyz +5 -0
- molSimplify/Ligands/oxalate.mol +17 -0
- molSimplify/Ligands/oxalate.smi +1 -0
- molSimplify/Ligands/oxygen.mol +7 -0
- molSimplify/Ligands/pentacyanocyclopentadienide.mol +36 -0
- molSimplify/Ligands/ph2-.mol +11 -0
- molSimplify/Ligands/ph3.mol +12 -0
- molSimplify/Ligands/phen.mol +51 -0
- molSimplify/Ligands/phenacac.mol +63 -0
- molSimplify/Ligands/phenalalanine.mol +51 -0
- molSimplify/Ligands/phendione.mol +51 -0
- molSimplify/Ligands/phenphen.mol +75 -0
- molSimplify/Ligands/phenylbenzoxazole.mol +54 -0
- molSimplify/Ligands/phenylcyc.mol +99 -0
- molSimplify/Ligands/phenylenediamine.mol +37 -0
- molSimplify/Ligands/phenylisocy.mol +32 -0
- molSimplify/Ligands/phosacidbipy.mol +66 -0
- molSimplify/Ligands/phosphine.mol +13 -0
- molSimplify/Ligands/phosphorine.mol +27 -0
- molSimplify/Ligands/phosphorustrifluoride.mol +12 -0
- molSimplify/Ligands/phthalocyanine.mol +126 -0
- molSimplify/Ligands/pme3o.mol +32 -0
- molSimplify/Ligands/porphyrin.mol +82 -0
- molSimplify/Ligands/pph3o.mol +77 -0
- molSimplify/Ligands/proline.mol +39 -0
- molSimplify/Ligands/propdiol.mol +21 -0
- molSimplify/Ligands/propylene.mol +23 -0
- molSimplify/Ligands/pyridine.mol +27 -0
- molSimplify/Ligands/pyrimidone.mol +27 -0
- molSimplify/Ligands/pyrrole.mol +24 -0
- molSimplify/Ligands/quinoxalinedithiol.mol +39 -0
- molSimplify/Ligands/s2-.mol +9 -0
- molSimplify/Ligands/salen.mol +75 -0
- molSimplify/Ligands/salphen.mol +84 -0
- molSimplify/Ligands/serine.mol +32 -0
- molSimplify/Ligands/simple_ligands.dict +14 -0
- molSimplify/Ligands/sulfacidbipy.mol +63 -0
- molSimplify/Ligands/tbucat.mol +54 -0
- molSimplify/Ligands/tbuphisocy.mol +56 -0
- molSimplify/Ligands/tbutylcyclen.mol +166 -0
- molSimplify/Ligands/tbutylisocy.mol +35 -0
- molSimplify/Ligands/tbutylthiol.mol +33 -0
- molSimplify/Ligands/tcnoet.mol +43 -0
- molSimplify/Ligands/tcnoetOH.mol +45 -0
- molSimplify/Ligands/terpy.mol +65 -0
- molSimplify/Ligands/tetrahydrofuran.mol +31 -0
- molSimplify/Ligands/thiane.mol +37 -0
- molSimplify/Ligands/thiazole.mol +21 -0
- molSimplify/Ligands/thiocyanate.mol +11 -0
- molSimplify/Ligands/thiol.mol +9 -0
- molSimplify/Ligands/thiophene.mol +23 -0
- molSimplify/Ligands/thiopyridine.mol +29 -0
- molSimplify/Ligands/threonine.mol +38 -0
- molSimplify/Ligands/tpp.mol +165 -0
- molSimplify/Ligands/tricyanomethyl.mol +19 -0
- molSimplify/Ligands/trifluoromethyl.mol +13 -0
- molSimplify/Ligands/tryptophan.mol +60 -0
- molSimplify/Ligands/tyrosine.mol +53 -0
- molSimplify/Ligands/uthiol.mol +11 -0
- molSimplify/Ligands/uthiolme2.mol +23 -0
- molSimplify/Ligands/valine.mol +42 -0
- molSimplify/Ligands/water.mol +10 -0
- molSimplify/Ligands/x.mol +6 -0
- molSimplify/Scripts/__init__.py +0 -0
- molSimplify/Scripts/addtodb.py +308 -0
- molSimplify/Scripts/cellbuilder.py +1592 -0
- molSimplify/Scripts/cellbuilder_tools.py +701 -0
- molSimplify/Scripts/chains.py +342 -0
- molSimplify/Scripts/convert_2to3.py +23 -0
- molSimplify/Scripts/dbinteract.py +631 -0
- molSimplify/Scripts/distgeom.py +617 -0
- molSimplify/Scripts/findcorrelations.py +287 -0
- molSimplify/Scripts/generator.py +267 -0
- molSimplify/Scripts/geometry.py +1224 -0
- molSimplify/Scripts/grabguivars.py +845 -0
- molSimplify/Scripts/in_b3lyp_usetc.py +141 -0
- molSimplify/Scripts/inparse.py +1673 -0
- molSimplify/Scripts/io.py +1149 -0
- molSimplify/Scripts/isomers.py +415 -0
- molSimplify/Scripts/jobgen.py +247 -0
- molSimplify/Scripts/krr_prep.py +1262 -0
- molSimplify/Scripts/molSimplify_io.py +18 -0
- molSimplify/Scripts/molden2psi4wfn.py +166 -0
- molSimplify/Scripts/namegen.py +32 -0
- molSimplify/Scripts/nn_prep.py +561 -0
- molSimplify/Scripts/oct_check_mols.py +782 -0
- molSimplify/Scripts/periodic_QE.py +97 -0
- molSimplify/Scripts/postmold.py +304 -0
- molSimplify/Scripts/postmwfn.py +709 -0
- molSimplify/Scripts/postparse.py +488 -0
- molSimplify/Scripts/postproc.py +139 -0
- molSimplify/Scripts/qcgen.py +1450 -0
- molSimplify/Scripts/rmsd.py +489 -0
- molSimplify/Scripts/rungen.py +670 -0
- molSimplify/Scripts/structgen.py +3040 -0
- molSimplify/Scripts/tf_nn_prep.py +894 -0
- molSimplify/Scripts/tsgen.py +295 -0
- molSimplify/Scripts/uq_calibration.py +69 -0
- molSimplify/__init__.py +0 -0
- molSimplify/__main__.py +197 -0
- molSimplify/icons/chemdb.png +0 -0
- molSimplify/icons/hjklogo.png +0 -0
- molSimplify/icons/icon.png +0 -0
- molSimplify/icons/logo.png +0 -0
- molSimplify/icons/logo_old.png +0 -0
- molSimplify/icons/petachem.png +0 -0
- molSimplify/icons/petachem2.png +0 -0
- molSimplify/icons/petachem_full.png +0 -0
- molSimplify/icons/pythonlogo.png +0 -0
- molSimplify/icons/sge copy.png +0 -0
- molSimplify/icons/sge.png +0 -0
- molSimplify/icons/slurm.png +0 -0
- molSimplify/icons/wft1.png +0 -0
- molSimplify/icons/wft2.png +0 -0
- molSimplify/icons/wft3.png +0 -0
- molSimplify/ml/__init__.py +0 -0
- molSimplify/ml/kernels.py +36 -0
- molSimplify/ml/layers.py +29 -0
- molSimplify/molscontrol/__init__.py +14 -0
- molSimplify/molscontrol/_version.py +521 -0
- molSimplify/molscontrol/clf_tools.py +144 -0
- molSimplify/molscontrol/data/README.md +21 -0
- molSimplify/molscontrol/data/look_and_say.dat +15 -0
- molSimplify/molscontrol/dynamic_classifier.py +514 -0
- molSimplify/molscontrol/io_tools.py +363 -0
- molSimplify/molscontrol/molscontrol.py +49 -0
- molSimplify/molscontrol/terachem/jobscript_control.sh +31 -0
- molSimplify/molscontrol/terachem/terachem_input +22 -0
- molSimplify/python_krr/X_train_TS.csv +535 -0
- molSimplify/python_krr/__init__.py +0 -0
- molSimplify/python_krr/hat2_X_mean_std.csv +3 -0
- molSimplify/python_krr/hat2_feature_names.csv +1 -0
- molSimplify/python_krr/hat2_y_mean_std.csv +2 -0
- molSimplify/python_krr/hat_X_mean_std.csv +6 -0
- molSimplify/python_krr/hat_feature_names.csv +1 -0
- molSimplify/python_krr/hat_krr_X_train.csv +5205 -0
- molSimplify/python_krr/hat_krr_dual_coef.csv +1 -0
- molSimplify/python_krr/hat_y_mean_std.csv +2 -0
- molSimplify/python_krr/sklearn_models.py +34 -0
- molSimplify/python_krr/y_train_TS.csv +535 -0
- molSimplify/python_nn/ANN.py +198 -0
- molSimplify/python_nn/__init__.py +0 -0
- molSimplify/python_nn/clf_analysis_tool.py +125 -0
- molSimplify/python_nn/dictionary_toolbox.py +49 -0
- molSimplify/python_nn/ensemble_test.py +309 -0
- molSimplify/python_nn/hs_center.csv +26 -0
- molSimplify/python_nn/hs_scale.csv +26 -0
- molSimplify/python_nn/ls_center.csv +26 -0
- molSimplify/python_nn/ls_scale.csv +26 -0
- molSimplify/python_nn/ms_hs_b1.csv +50 -0
- molSimplify/python_nn/ms_hs_b2.csv +50 -0
- molSimplify/python_nn/ms_hs_b3.csv +1 -0
- molSimplify/python_nn/ms_hs_w1.csv +50 -0
- molSimplify/python_nn/ms_hs_w2.csv +50 -0
- molSimplify/python_nn/ms_hs_w3.csv +1 -0
- molSimplify/python_nn/ms_ls_b1.csv +50 -0
- molSimplify/python_nn/ms_ls_b2.csv +50 -0
- molSimplify/python_nn/ms_ls_b3.csv +1 -0
- molSimplify/python_nn/ms_ls_w1.csv +50 -0
- molSimplify/python_nn/ms_ls_w2.csv +50 -0
- molSimplify/python_nn/ms_ls_w3.csv +1 -0
- molSimplify/python_nn/ms_slope_b1.csv +50 -0
- molSimplify/python_nn/ms_slope_b2.csv +50 -0
- molSimplify/python_nn/ms_slope_b3.csv +1 -0
- molSimplify/python_nn/ms_slope_w1.csv +50 -0
- molSimplify/python_nn/ms_slope_w2.csv +50 -0
- molSimplify/python_nn/ms_slope_w3.csv +1 -0
- molSimplify/python_nn/ms_split_b1.csv +50 -0
- molSimplify/python_nn/ms_split_b2.csv +50 -0
- molSimplify/python_nn/ms_split_b3.csv +1 -0
- molSimplify/python_nn/ms_split_w1.csv +50 -0
- molSimplify/python_nn/ms_split_w2.csv +50 -0
- molSimplify/python_nn/ms_split_w3.csv +1 -0
- molSimplify/python_nn/slope_center.csv +25 -0
- molSimplify/python_nn/slope_scale.csv +25 -0
- molSimplify/python_nn/split_center.csv +26 -0
- molSimplify/python_nn/split_scale.csv +26 -0
- molSimplify/python_nn/tf_ANN.py +762 -0
- molSimplify/python_nn/train_data.csv +1211 -0
- molSimplify/tf_nn/__init__.py +0 -0
- molSimplify/tf_nn/geo_static_clf/geo_static_clf_model.h5 +0 -0
- molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_name.csv +1591 -0
- molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_x.csv +2790 -0
- molSimplify/tf_nn/geo_static_clf/geo_static_clf_train_y.csv +2790 -0
- molSimplify/tf_nn/geo_static_clf/geo_static_clf_vars.csv +154 -0
- molSimplify/tf_nn/geos/hs_ii_bl_x.csv +1577 -0
- molSimplify/tf_nn/geos/hs_ii_bl_y.csv +1577 -0
- molSimplify/tf_nn/geos/hs_ii_model.h5 +0 -0
- molSimplify/tf_nn/geos/hs_ii_model.json +1 -0
- molSimplify/tf_nn/geos/hs_ii_vars.csv +154 -0
- molSimplify/tf_nn/geos/hs_iii_bl_x.csv +1659 -0
- molSimplify/tf_nn/geos/hs_iii_bl_y.csv +1659 -0
- molSimplify/tf_nn/geos/hs_iii_model.h5 +0 -0
- molSimplify/tf_nn/geos/hs_iii_model.json +1 -0
- molSimplify/tf_nn/geos/hs_iii_vars.csv +154 -0
- molSimplify/tf_nn/geos/ls_ii_bl_x.csv +1374 -0
- molSimplify/tf_nn/geos/ls_ii_bl_y.csv +1374 -0
- molSimplify/tf_nn/geos/ls_ii_model.h5 +0 -0
- molSimplify/tf_nn/geos/ls_ii_model.json +1 -0
- molSimplify/tf_nn/geos/ls_ii_vars.csv +154 -0
- molSimplify/tf_nn/geos/ls_iii_bl_x.csv +1364 -0
- molSimplify/tf_nn/geos/ls_iii_bl_y.csv +1364 -0
- molSimplify/tf_nn/geos/ls_iii_model.h5 +0 -0
- molSimplify/tf_nn/geos/ls_iii_model.json +1 -0
- molSimplify/tf_nn/geos/ls_iii_vars.csv +154 -0
- molSimplify/tf_nn/homolumo/gap_model.h5 +0 -0
- molSimplify/tf_nn/homolumo/gap_model.json +1 -0
- molSimplify/tf_nn/homolumo/gap_test_names.csv +175 -0
- molSimplify/tf_nn/homolumo/gap_test_x.csv +176 -0
- molSimplify/tf_nn/homolumo/gap_test_y.csv +176 -0
- molSimplify/tf_nn/homolumo/gap_train_names.csv +699 -0
- molSimplify/tf_nn/homolumo/gap_train_x.csv +700 -0
- molSimplify/tf_nn/homolumo/gap_train_y.csv +700 -0
- molSimplify/tf_nn/homolumo/gap_vars.csv +153 -0
- molSimplify/tf_nn/homolumo/homo_model.h5 +0 -0
- molSimplify/tf_nn/homolumo/homo_model.json +126 -0
- molSimplify/tf_nn/homolumo/homo_test_names.csv +175 -0
- molSimplify/tf_nn/homolumo/homo_test_x.csv +176 -0
- molSimplify/tf_nn/homolumo/homo_test_y.csv +176 -0
- molSimplify/tf_nn/homolumo/homo_train_names.csv +699 -0
- molSimplify/tf_nn/homolumo/homo_train_x.csv +700 -0
- molSimplify/tf_nn/homolumo/homo_train_y.csv +700 -0
- molSimplify/tf_nn/homolumo/homo_vars.csv +153 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_info.json +7 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_model.h5 +0 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_model.json +1 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_test_names.csv +143 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_test_x.csv +144 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_test_y.csv +144 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_train_names.csv +513 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_train_x.csv +514 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_train_y.csv +514 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_val_names.csv +143 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_val_x.csv +58 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_val_y.csv +58 -0
- molSimplify/tf_nn/oxoandhomo/homo_empty_vars.csv +155 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_info.json +7 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_model.h5 +0 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_model.json +1 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_test_names.csv +143 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_test_x.csv +144 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_test_y.csv +144 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_train_names.csv +513 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_train_x.csv +514 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_train_y.csv +514 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_val_names.csv +143 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_val_x.csv +58 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_val_y.csv +58 -0
- molSimplify/tf_nn/oxoandhomo/oxo20_vars.csv +154 -0
- molSimplify/tf_nn/oxocatalysis/hat_model.h5 +0 -0
- molSimplify/tf_nn/oxocatalysis/hat_model.json +1 -0
- molSimplify/tf_nn/oxocatalysis/hat_test_names.csv +419 -0
- molSimplify/tf_nn/oxocatalysis/hat_test_x.csv +420 -0
- molSimplify/tf_nn/oxocatalysis/hat_test_y.csv +420 -0
- molSimplify/tf_nn/oxocatalysis/hat_train_names.csv +1507 -0
- molSimplify/tf_nn/oxocatalysis/hat_train_x.csv +1508 -0
- molSimplify/tf_nn/oxocatalysis/hat_train_y.csv +1508 -0
- molSimplify/tf_nn/oxocatalysis/hat_val_x.csv +169 -0
- molSimplify/tf_nn/oxocatalysis/hat_val_y.csv +169 -0
- molSimplify/tf_nn/oxocatalysis/hat_vars.csv +162 -0
- molSimplify/tf_nn/oxocatalysis/oxo_model.h5 +0 -0
- molSimplify/tf_nn/oxocatalysis/oxo_model.json +1 -0
- molSimplify/tf_nn/oxocatalysis/oxo_test_names.csv +527 -0
- molSimplify/tf_nn/oxocatalysis/oxo_test_x.csv +528 -0
- molSimplify/tf_nn/oxocatalysis/oxo_test_y.csv +528 -0
- molSimplify/tf_nn/oxocatalysis/oxo_train_names.csv +1897 -0
- molSimplify/tf_nn/oxocatalysis/oxo_train_x.csv +1898 -0
- molSimplify/tf_nn/oxocatalysis/oxo_train_y.csv +1898 -0
- molSimplify/tf_nn/oxocatalysis/oxo_val_x.csv +212 -0
- molSimplify/tf_nn/oxocatalysis/oxo_val_y.csv +212 -0
- molSimplify/tf_nn/oxocatalysis/oxo_vars.csv +162 -0
- molSimplify/tf_nn/rescaling_data/gap_mean_x.csv +153 -0
- molSimplify/tf_nn/rescaling_data/gap_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/gap_var_x.csv +153 -0
- molSimplify/tf_nn/rescaling_data/gap_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/geo_static_clf_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/geo_static_clf_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/geo_static_clf_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/geo_static_clf_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/hat_mean_x.csv +162 -0
- molSimplify/tf_nn/rescaling_data/hat_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/hat_var_x.csv +162 -0
- molSimplify/tf_nn/rescaling_data/hat_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/homo_empty_mean_x.csv +155 -0
- molSimplify/tf_nn/rescaling_data/homo_empty_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/homo_empty_var_x.csv +155 -0
- molSimplify/tf_nn/rescaling_data/homo_empty_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/homo_mean_x.csv +153 -0
- molSimplify/tf_nn/rescaling_data/homo_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/homo_var_x.csv +153 -0
- molSimplify/tf_nn/rescaling_data/homo_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/hs_ii_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/hs_ii_mean_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/hs_ii_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/hs_ii_var_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/hs_iii_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/hs_iii_mean_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/hs_iii_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/hs_iii_var_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/ls_ii_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/ls_ii_mean_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/ls_ii_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/ls_ii_var_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/ls_iii_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/ls_iii_mean_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/ls_iii_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/ls_iii_var_y.csv +3 -0
- molSimplify/tf_nn/rescaling_data/oxo20_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/oxo20_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/oxo20_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/oxo20_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/oxo_mean_x.csv +162 -0
- molSimplify/tf_nn/rescaling_data/oxo_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/oxo_var_x.csv +162 -0
- molSimplify/tf_nn/rescaling_data/oxo_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/sc_static_clf_mean_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/sc_static_clf_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/sc_static_clf_var_x.csv +154 -0
- molSimplify/tf_nn/rescaling_data/sc_static_clf_var_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/split_mean_x.csv +155 -0
- molSimplify/tf_nn/rescaling_data/split_mean_y.csv +1 -0
- molSimplify/tf_nn/rescaling_data/split_var_x.csv +155 -0
- molSimplify/tf_nn/rescaling_data/split_var_y.csv +1 -0
- molSimplify/tf_nn/sc_static_clf/sc_static_clf_model.h5 +0 -0
- molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_name.csv +1591 -0
- molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_x.csv +1592 -0
- molSimplify/tf_nn/sc_static_clf/sc_static_clf_train_y.csv +1592 -0
- molSimplify/tf_nn/sc_static_clf/sc_static_clf_vars.csv +154 -0
- molSimplify/tf_nn/split/split_model.h5 +0 -0
- molSimplify/tf_nn/split/split_model.json +1 -0
- molSimplify/tf_nn/split/split_vars.csv +155 -0
- molSimplify/tf_nn/split/split_x.csv +1902 -0
- molSimplify/tf_nn/split/split_y.csv +1902 -0
- molSimplify/tf_nn/split/train_names.csv +1901 -0
- molSimplify/utils/__init__.py +0 -0
- molSimplify/utils/decorators.py +16 -0
- molSimplify/utils/metaclasses.py +12 -0
- molSimplify/utils/tensorflow.py +23 -0
- molSimplify/utils/timer.py +16 -0
- molSimplify-1.7.4.dist-info/LICENSE +674 -0
- molSimplify-1.7.4.dist-info/METADATA +821 -0
- molSimplify-1.7.4.dist-info/RECORD +651 -0
- molSimplify-1.7.4.dist-info/WHEEL +5 -0
- molSimplify-1.7.4.dist-info/entry_points.txt +3 -0
- molSimplify-1.7.4.dist-info/top_level.txt +4 -0
- tests/generateTests.py +122 -0
- tests/helperFuncs.py +658 -0
- tests/informatics/test_MOF_descriptors.py +128 -0
- tests/informatics/test_active_learning.py +113 -0
- tests/informatics/test_coulomb_analyze.py +24 -0
- tests/informatics/test_graph_racs.py +193 -0
- tests/ml/test_kernels.py +20 -0
- tests/ml/test_layers.py +47 -0
- tests/runtest.py +10 -0
- tests/test_Mol2D.py +128 -0
- tests/test_basic_imports.py +62 -0
- tests/test_bidentate.py +25 -0
- tests/test_cli.py +20 -0
- tests/test_distgeom.py +106 -0
- tests/test_example_1.py +29 -0
- tests/test_example_3.py +31 -0
- tests/test_example_5.py +43 -0
- tests/test_example_7.py +28 -0
- tests/test_example_8.py +15 -0
- tests/test_example_tbp.py +15 -0
- tests/test_ff_xtb.py +111 -0
- tests/test_geocheck_oct.py +26 -0
- tests/test_geocheck_one_empty.py +15 -0
- tests/test_geometry.py +44 -0
- tests/test_inparse.py +76 -0
- tests/test_io.py +84 -0
- tests/test_jobgen.py +84 -0
- tests/test_joption_pythonic.py +27 -0
- tests/test_ligand_assign.py +58 -0
- tests/test_ligand_assign_consistent.py +60 -0
- tests/test_ligand_class.py +26 -0
- tests/test_ligand_from_mol_file.py +35 -0
- tests/test_ligands.py +86 -0
- tests/test_mol3D.py +337 -0
- tests/test_molcas_caspt2.py +15 -0
- tests/test_molcas_casscf.py +15 -0
- tests/test_old_ANNs.py +68 -0
- tests/test_orca_ccsdt.py +15 -0
- tests/test_orca_dft.py +15 -0
- tests/test_qcgen.py +50 -0
- tests/test_racs.py +124 -0
- tests/test_rmsd.py +68 -0
- tests/test_structgen_functions.py +198 -0
- tests/test_tetrahedral.py +29 -0
- tests/test_tutorial_10_part_one.py +16 -0
- tests/test_tutorial_10_part_two.py +15 -0
- tests/test_tutorial_2.py +11 -0
- tests/test_tutorial_3.py +15 -0
- tests/test_tutorial_4.py +57 -0
- tests/test_tutorial_6.py +10 -0
- tests/test_tutorial_8.py +29 -0
- tests/test_tutorial_9_part_one.py +15 -0
- tests/test_tutorial_9_part_two.py +15 -0
- tests/test_tutorial_qm9_part_one.py +6 -0
- tests/testresources/refs/racs/generate_references.py +85 -0
- workflows/NandyJACSAu2022/bridge_functionalizer.py +253 -0
- workflows/NandyJACSAu2022/frag_functionalizer.py +242 -0
- workflows/NandyJACSAu2022/fragment_classes.py +586 -0
- workflows/NandyJACSAu2022/macrocycle_synthesis.py +179 -0
|
@@ -0,0 +1,1178 @@
|
|
|
1
|
+
# @file protein3D.py
|
|
2
|
+
# Defines protein3D class and contains useful manipulation/retrieval routines.
|
|
3
|
+
#
|
|
4
|
+
# Written by HJK Group
|
|
5
|
+
#
|
|
6
|
+
# Dpt of Chemical Engineering, MIT
|
|
7
|
+
|
|
8
|
+
# imports
|
|
9
|
+
from molSimplify.Classes.monomer3D import monomer3D
|
|
10
|
+
from molSimplify.Classes.mol3D import mol3D
|
|
11
|
+
from molSimplify.Classes.atom3D import atom3D
|
|
12
|
+
from molSimplify.Classes.helpers import read_atom, makeMol
|
|
13
|
+
from molSimplify.Classes.globalvars import globalvars
|
|
14
|
+
import urllib.request
|
|
15
|
+
import urllib.error
|
|
16
|
+
import requests
|
|
17
|
+
from bs4 import BeautifulSoup
|
|
18
|
+
import pandas as pd
|
|
19
|
+
import subprocess
|
|
20
|
+
import shlex
|
|
21
|
+
import ast
|
|
22
|
+
import time
|
|
23
|
+
from scipy.spatial import ConvexHull
|
|
24
|
+
# from pymol import cmd, stored
|
|
25
|
+
# no GUI support for now
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class protein3D:
|
|
29
|
+
"""Holds information about a protein, used to do manipulations. Reads
|
|
30
|
+
information from structure file (pdb, cif) or is directly built from
|
|
31
|
+
molsimplify.
|
|
32
|
+
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
def __init__(self, pdbCode='undef'):
|
|
36
|
+
# Number of monomers
|
|
37
|
+
self.naas = 0
|
|
38
|
+
# Number of heteromolecules
|
|
39
|
+
self.nhetmols = 0
|
|
40
|
+
# Number of chains
|
|
41
|
+
self.nchains = 0
|
|
42
|
+
# Dictionary of monomers
|
|
43
|
+
self.aas = {}
|
|
44
|
+
# Dictionary of all atoms
|
|
45
|
+
self.atoms = {}
|
|
46
|
+
# Dictionary of all atom indices
|
|
47
|
+
self.a_ids = {}
|
|
48
|
+
# Dictionary of heteromolecules
|
|
49
|
+
self.hetmols = {}
|
|
50
|
+
# Dictionary of chains
|
|
51
|
+
self.chains = {}
|
|
52
|
+
# Dictionary of missing atoms
|
|
53
|
+
self.missing_atoms = {}
|
|
54
|
+
# List of missing monomers
|
|
55
|
+
self.missing_aas = []
|
|
56
|
+
# List of chain locations with more than one conformation
|
|
57
|
+
self.conf = []
|
|
58
|
+
# R value
|
|
59
|
+
self.R = -1
|
|
60
|
+
# Rfree value
|
|
61
|
+
self.Rfree = -1
|
|
62
|
+
# PDB code
|
|
63
|
+
self.pdbCode = pdbCode
|
|
64
|
+
# Holder for metals
|
|
65
|
+
self.metals = False
|
|
66
|
+
# Bonds
|
|
67
|
+
self.bonds = {}
|
|
68
|
+
# Data completeness
|
|
69
|
+
self.DataCompleteness = 0
|
|
70
|
+
# RSRZ value
|
|
71
|
+
self.RSRZ = 100
|
|
72
|
+
# TwinL score
|
|
73
|
+
self.TwinL = 0
|
|
74
|
+
# TwinL^2 score
|
|
75
|
+
self.TwinL2 = 0
|
|
76
|
+
# center of mass
|
|
77
|
+
self.com = []
|
|
78
|
+
# centroid
|
|
79
|
+
self.centroid = []
|
|
80
|
+
# convex hull
|
|
81
|
+
self.hull = []
|
|
82
|
+
|
|
83
|
+
def setAAs(self, aas):
|
|
84
|
+
"""
|
|
85
|
+
Set monomers of a protein3D class to different monomers.
|
|
86
|
+
|
|
87
|
+
Parameters
|
|
88
|
+
----------
|
|
89
|
+
aas : dictionary
|
|
90
|
+
Keyed by chain and location
|
|
91
|
+
Valued by monomer3D monomers (amino acids or nucleotides)
|
|
92
|
+
"""
|
|
93
|
+
self.aas = aas
|
|
94
|
+
self.naas = len(aas)
|
|
95
|
+
|
|
96
|
+
def setAtoms(self, atoms):
|
|
97
|
+
"""
|
|
98
|
+
Set atom indices of a protein3D class to atoms.
|
|
99
|
+
|
|
100
|
+
Parameters
|
|
101
|
+
----------
|
|
102
|
+
atoms : dictionary
|
|
103
|
+
Keyed by atom index
|
|
104
|
+
Valued by atom3D atom that has that index
|
|
105
|
+
"""
|
|
106
|
+
self.atoms = atoms
|
|
107
|
+
|
|
108
|
+
def setIndices(self, a_ids):
|
|
109
|
+
""" Set atom indices of a protein3D class to atoms.
|
|
110
|
+
|
|
111
|
+
Parameters
|
|
112
|
+
----------
|
|
113
|
+
a_ids : dictionary
|
|
114
|
+
Keyed by atom3D atom
|
|
115
|
+
Valued by its index
|
|
116
|
+
"""
|
|
117
|
+
self.a_ids = a_ids
|
|
118
|
+
|
|
119
|
+
def setHetmols(self, hetmols):
|
|
120
|
+
"""
|
|
121
|
+
Set heteromolecules of a protein3D class to different ones.
|
|
122
|
+
|
|
123
|
+
Parameters
|
|
124
|
+
----------
|
|
125
|
+
hetmols : dictionary
|
|
126
|
+
Keyed by chain and location
|
|
127
|
+
Valued by mol3D heteromolecules
|
|
128
|
+
"""
|
|
129
|
+
self.hetmols = hetmols
|
|
130
|
+
self.nhetmols = len(hetmols.keys())
|
|
131
|
+
|
|
132
|
+
def setChains(self, chains):
|
|
133
|
+
"""
|
|
134
|
+
Set chains of a protein3D class to different chains.
|
|
135
|
+
|
|
136
|
+
Parameters
|
|
137
|
+
----------
|
|
138
|
+
chains : dictionary
|
|
139
|
+
Keyed by desired chain IDs.
|
|
140
|
+
Valued by the list of molecules in the chain.
|
|
141
|
+
"""
|
|
142
|
+
self.chains = chains
|
|
143
|
+
self.nchains = len(chains.keys())
|
|
144
|
+
|
|
145
|
+
def setMissingAtoms(self, missing_atoms):
|
|
146
|
+
"""
|
|
147
|
+
Set missing atoms of a protein3D class to a new dictionary.
|
|
148
|
+
|
|
149
|
+
Parameters
|
|
150
|
+
----------
|
|
151
|
+
missing_atoms : dictionary
|
|
152
|
+
Keyed by amino acid residues / nucleotides of origin
|
|
153
|
+
Valued by missing atoms
|
|
154
|
+
"""
|
|
155
|
+
self.missing_atoms = missing_atoms
|
|
156
|
+
|
|
157
|
+
def setMissingAAs(self, missing_aas):
|
|
158
|
+
"""
|
|
159
|
+
Set missing amino acids of a protein3D class to a new list.
|
|
160
|
+
|
|
161
|
+
Parameters
|
|
162
|
+
----------
|
|
163
|
+
missing_aas : list
|
|
164
|
+
List of missing amino acids.
|
|
165
|
+
"""
|
|
166
|
+
self.missing_aas = missing_aas
|
|
167
|
+
|
|
168
|
+
def setConf(self, conf):
|
|
169
|
+
"""
|
|
170
|
+
Set possible conformations of a protein3D class to a new list.
|
|
171
|
+
|
|
172
|
+
Parameters
|
|
173
|
+
----------
|
|
174
|
+
conf : list
|
|
175
|
+
List of possible conformations for applicable amino acids.
|
|
176
|
+
"""
|
|
177
|
+
self.conf = conf
|
|
178
|
+
|
|
179
|
+
def autoChooseConf(self):
|
|
180
|
+
"""
|
|
181
|
+
Automatically choose the conformation of a protein3D class
|
|
182
|
+
instance based first on what the greatest occupancy level is and then
|
|
183
|
+
the first conformation ihe alphabet with all else equal.
|
|
184
|
+
|
|
185
|
+
"""
|
|
186
|
+
for c in self.conf:
|
|
187
|
+
c_ids = []
|
|
188
|
+
if c in self.aas.keys():
|
|
189
|
+
lst = self.aas[c]
|
|
190
|
+
else:
|
|
191
|
+
lst = self.hetmols[c]
|
|
192
|
+
if len(lst) == 1:
|
|
193
|
+
self.chains[c[0]].insert(c[1]-1, lst[0])
|
|
194
|
+
else:
|
|
195
|
+
for li in lst:
|
|
196
|
+
if li not in self.chains[c[0]]:
|
|
197
|
+
for j in li.atoms:
|
|
198
|
+
in_more_confs = False
|
|
199
|
+
for m in lst:
|
|
200
|
+
if m != li and j in m.atoms:
|
|
201
|
+
in_more_confs = True
|
|
202
|
+
if type(j) != atom3D and not in_more_confs:
|
|
203
|
+
c_ids.append(j[0])
|
|
204
|
+
elif not in_more_confs:
|
|
205
|
+
c_ids.append(self.getIndex(j))
|
|
206
|
+
# print(c_ids)
|
|
207
|
+
self.stripAtoms(c_ids)
|
|
208
|
+
if type(li) == monomer3D and li in self.aas[c]:
|
|
209
|
+
self.aas[c].remove(li)
|
|
210
|
+
elif type(li) == mol3D and li in self.hetmols[c]:
|
|
211
|
+
self.hetmols[c].remove(li)
|
|
212
|
+
self.setConf([])
|
|
213
|
+
|
|
214
|
+
def setR(self, R):
|
|
215
|
+
"""
|
|
216
|
+
Set R value of protein3D class.
|
|
217
|
+
|
|
218
|
+
Parameters
|
|
219
|
+
----------
|
|
220
|
+
R : float
|
|
221
|
+
The desired new R value.
|
|
222
|
+
"""
|
|
223
|
+
self.R = R
|
|
224
|
+
|
|
225
|
+
def setRfree(self, Rfree):
|
|
226
|
+
"""
|
|
227
|
+
Set Rfree value of protein3D class.
|
|
228
|
+
|
|
229
|
+
Parameters
|
|
230
|
+
----------
|
|
231
|
+
Rfree : float
|
|
232
|
+
The desired new Rfree value.
|
|
233
|
+
"""
|
|
234
|
+
self.Rfree = Rfree
|
|
235
|
+
|
|
236
|
+
def setRSRZ(self, RSRZ):
|
|
237
|
+
"""
|
|
238
|
+
Set RSRZ score of protein3D class.
|
|
239
|
+
|
|
240
|
+
Parameters
|
|
241
|
+
----------
|
|
242
|
+
RSRZ : float
|
|
243
|
+
The desired new RSRZ score.
|
|
244
|
+
"""
|
|
245
|
+
self.RSRZ = RSRZ
|
|
246
|
+
|
|
247
|
+
def getMissingAtoms(self):
|
|
248
|
+
"""
|
|
249
|
+
Get missing atoms of a protein3D class.
|
|
250
|
+
|
|
251
|
+
Examples
|
|
252
|
+
--------
|
|
253
|
+
>>> pdb_system = protein3D()
|
|
254
|
+
>>> pdb_system.fetch_pdb('1MH1') # Fetch a PDB
|
|
255
|
+
fetched: 1MH1
|
|
256
|
+
>>> missing_atoms = pdb_system.getMissingAtoms()
|
|
257
|
+
|
|
258
|
+
List atoms in the first set of missing_atoms
|
|
259
|
+
>>> [atom.sym for atom in list(missing_atoms)[0]]
|
|
260
|
+
['C', 'C', 'C', 'C', 'C', 'C', 'O']
|
|
261
|
+
"""
|
|
262
|
+
return self.missing_atoms.values()
|
|
263
|
+
|
|
264
|
+
def getMissingAAs(self):
|
|
265
|
+
"""
|
|
266
|
+
Get missing amino acid residues of a protein3D class.
|
|
267
|
+
|
|
268
|
+
Examples
|
|
269
|
+
--------
|
|
270
|
+
>>> pdb_system = protein3D()
|
|
271
|
+
>>> pdb_system.fetch_pdb('1MH1') # Fetch a PDB
|
|
272
|
+
fetched: 1MH1
|
|
273
|
+
>>> pdb_system.getMissingAAs() # This gives a list of monomer3D objects
|
|
274
|
+
[monomer3D(VAL, id=182), monomer3D(LYS, id=183), monomer3D(LYS, id=184)]
|
|
275
|
+
"""
|
|
276
|
+
return self.missing_aas
|
|
277
|
+
|
|
278
|
+
def countAAs(self):
|
|
279
|
+
"""
|
|
280
|
+
Return the number of amino acid residues in a protein3D class.
|
|
281
|
+
|
|
282
|
+
Examples
|
|
283
|
+
--------
|
|
284
|
+
>>> pdb_system = protein3D()
|
|
285
|
+
>>> pdb_system.fetch_pdb('1os7') # Fetch a PDB
|
|
286
|
+
fetched: 1os7
|
|
287
|
+
>>> pdb_system.countAAs() # This return the number of AAs in the PDB for all the chains.
|
|
288
|
+
1121
|
|
289
|
+
"""
|
|
290
|
+
return self.naas
|
|
291
|
+
|
|
292
|
+
def findAtom(self, sym="X", aa=True):
|
|
293
|
+
"""
|
|
294
|
+
Find atoms with a specific symbol that are contained in amino acids
|
|
295
|
+
or heteromolecules.
|
|
296
|
+
|
|
297
|
+
Parameters
|
|
298
|
+
----------
|
|
299
|
+
sym : str
|
|
300
|
+
element symbol, default as X.
|
|
301
|
+
aa : boolean
|
|
302
|
+
True if we want atoms contained in amino acids
|
|
303
|
+
False if we want atoms contained in heteromolecules
|
|
304
|
+
|
|
305
|
+
Returns
|
|
306
|
+
-------
|
|
307
|
+
inds: list
|
|
308
|
+
a list of atom indices with the specified symbol.
|
|
309
|
+
|
|
310
|
+
Examples
|
|
311
|
+
--------
|
|
312
|
+
>>> pdb_system = protein3D()
|
|
313
|
+
>>> pdb_system.fetch_pdb('1os7') # Fetch a PDB
|
|
314
|
+
fetched: 1os7
|
|
315
|
+
>>> pdb_system.findAtom(sym="S", aa=True) # Returns indices of sulphur atoms present in amino acids
|
|
316
|
+
[2166, 4442, 6733, 9041]
|
|
317
|
+
>>> pdb_system.findAtom(sym="S", aa=False) # Returns indices of sulphur atoms present in heteromolecules
|
|
318
|
+
[9164, 9182, 9200]
|
|
319
|
+
"""
|
|
320
|
+
inds = []
|
|
321
|
+
if aa:
|
|
322
|
+
mols = self.aas.values()
|
|
323
|
+
else:
|
|
324
|
+
mols = self.hetmols.values()
|
|
325
|
+
for s in mols:
|
|
326
|
+
for m in s:
|
|
327
|
+
for a in m.atoms:
|
|
328
|
+
if type(a) == tuple:
|
|
329
|
+
ii = a[0]
|
|
330
|
+
a = a[1]
|
|
331
|
+
else:
|
|
332
|
+
ii = self.getIndex(a)
|
|
333
|
+
if a.symbol() == sym:
|
|
334
|
+
inds.append(ii)
|
|
335
|
+
return inds
|
|
336
|
+
|
|
337
|
+
def findAA(self, three_lc="XAA"):
|
|
338
|
+
"""
|
|
339
|
+
Find amino acids with a specific three-letter code.
|
|
340
|
+
|
|
341
|
+
Parameters
|
|
342
|
+
----------
|
|
343
|
+
three_lc: str
|
|
344
|
+
three-letter code, default as XAA.
|
|
345
|
+
|
|
346
|
+
Returns
|
|
347
|
+
-------
|
|
348
|
+
inds: set
|
|
349
|
+
a set of amino acid indices with the specified symbol.
|
|
350
|
+
|
|
351
|
+
Examples
|
|
352
|
+
--------
|
|
353
|
+
>>> pdb_system = protein3D()
|
|
354
|
+
>>> pdb_system.fetch_pdb('1os7') # Fetch a PDB
|
|
355
|
+
fetched: 1os7
|
|
356
|
+
|
|
357
|
+
Return a set of pairs where each pair is a combination of the chain name and
|
|
358
|
+
the index of the amino acid specified (in this case, 'MET')
|
|
359
|
+
>>> aa_set = pdb_system.findAA(three_lc = 'MET')
|
|
360
|
+
>>> sorted(aa_set) # Sorting for reproducible order in doctest
|
|
361
|
+
[('A', 268), ('B', 268), ('C', 268), ('D', 268)]
|
|
362
|
+
"""
|
|
363
|
+
inds = set()
|
|
364
|
+
for aa in self.aas.values():
|
|
365
|
+
if aa[0].three_lc == three_lc:
|
|
366
|
+
inds.add((aa[0].chain, aa[0].id))
|
|
367
|
+
return inds
|
|
368
|
+
|
|
369
|
+
def getChain(self, chain_id):
|
|
370
|
+
"""
|
|
371
|
+
Takes a chain of interest and turns it into its own protein3D class instance.
|
|
372
|
+
|
|
373
|
+
Parameters
|
|
374
|
+
----------
|
|
375
|
+
chain_id : string
|
|
376
|
+
The letter name of the chain of interest
|
|
377
|
+
|
|
378
|
+
Returns
|
|
379
|
+
-------
|
|
380
|
+
p : protein3D
|
|
381
|
+
A protein3D instance consisting of just the chain of interest
|
|
382
|
+
|
|
383
|
+
Examples
|
|
384
|
+
--------
|
|
385
|
+
>>> pdb_system = protein3D()
|
|
386
|
+
>>> pdb_system.fetch_pdb('1os7') # Fetch a PDB
|
|
387
|
+
fetched: 1os7
|
|
388
|
+
>>> pdb_system.getChain('A') # doctest: +SKIP
|
|
389
|
+
"""
|
|
390
|
+
p = protein3D()
|
|
391
|
+
p.setPDBCode(self.pdbCode)
|
|
392
|
+
p.setChains({chain_id: self.chains[chain_id]})
|
|
393
|
+
p.setR(self.R)
|
|
394
|
+
p.setRfree(self.Rfree)
|
|
395
|
+
|
|
396
|
+
missing_aas = []
|
|
397
|
+
for aa in self.missing_aas:
|
|
398
|
+
if aa.chain == chain_id:
|
|
399
|
+
missing_aas.append(aa)
|
|
400
|
+
p.setMissingAAs(missing_aas)
|
|
401
|
+
|
|
402
|
+
aas = {}
|
|
403
|
+
for aa in self.aas:
|
|
404
|
+
if aa[0] == chain_id:
|
|
405
|
+
aas[aa] = self.aas[aa]
|
|
406
|
+
p.setAAs(aas)
|
|
407
|
+
|
|
408
|
+
gone_atoms = {}
|
|
409
|
+
for aa in self.missing_atoms.keys():
|
|
410
|
+
if aa[0] == chain_id:
|
|
411
|
+
gone_atoms[aa] = self.missing_atoms[aa]
|
|
412
|
+
p.setMissingAtoms(gone_atoms)
|
|
413
|
+
|
|
414
|
+
hets_flipped = {value[0]: key for key, value in self.hetmols.items()}
|
|
415
|
+
atoms = {}
|
|
416
|
+
a_ids = {}
|
|
417
|
+
hets = {}
|
|
418
|
+
for a_id in self.atoms:
|
|
419
|
+
aa = self.getMolecule(a_id)
|
|
420
|
+
|
|
421
|
+
if type(aa) == monomer3D:
|
|
422
|
+
if aa.chain == chain_id:
|
|
423
|
+
atoms[a_id] = self.atoms[a_id]
|
|
424
|
+
a_ids[self.atoms[a_id]] = a_id
|
|
425
|
+
else:
|
|
426
|
+
if aa not in hets_flipped:
|
|
427
|
+
print(a_id)
|
|
428
|
+
het = hets_flipped[aa]
|
|
429
|
+
het_chain_id = het[0]
|
|
430
|
+
if het_chain_id == chain_id:
|
|
431
|
+
hets[het] = self.hetmols[het]
|
|
432
|
+
atoms[a_id] = self.atoms[a_id]
|
|
433
|
+
a_ids[self.atoms[a_id]] = a_id
|
|
434
|
+
|
|
435
|
+
p.setHetmols(hets)
|
|
436
|
+
p.setAtoms(atoms)
|
|
437
|
+
|
|
438
|
+
bonds = {}
|
|
439
|
+
for a in self.bonds.keys():
|
|
440
|
+
if a in p.atoms.values():
|
|
441
|
+
bonds[a] = set()
|
|
442
|
+
for b in self.bonds[a]:
|
|
443
|
+
if b in p.atoms.values():
|
|
444
|
+
bonds[a].add(b)
|
|
445
|
+
p.setBonds(bonds)
|
|
446
|
+
|
|
447
|
+
p.setIndices(a_ids)
|
|
448
|
+
p.setConf([conf for conf in self.conf if conf[0] == chain_id])
|
|
449
|
+
|
|
450
|
+
return p
|
|
451
|
+
|
|
452
|
+
def getMolecule(self, a_id, aas_only=False):
|
|
453
|
+
"""
|
|
454
|
+
Finds the molecule that the atom is contained in.
|
|
455
|
+
|
|
456
|
+
Parameters
|
|
457
|
+
----------
|
|
458
|
+
a_id : int
|
|
459
|
+
The index of the desired atom whose molecule we want to find
|
|
460
|
+
aas_only : boolean
|
|
461
|
+
True if we want ito find atoms contained in amino acids only.
|
|
462
|
+
False if we want atoms contained in all molecules. Default is False.
|
|
463
|
+
|
|
464
|
+
Returns
|
|
465
|
+
-------
|
|
466
|
+
mol : monomer3D or mol3D
|
|
467
|
+
The amino acid residue, nucleotide, or heteromolecule containing the atom
|
|
468
|
+
|
|
469
|
+
Examples
|
|
470
|
+
--------
|
|
471
|
+
>>> pdb_system = protein3D()
|
|
472
|
+
>>> pdb_system.fetch_pdb('1os7') # Fetch a PDB
|
|
473
|
+
fetched: 1os7
|
|
474
|
+
|
|
475
|
+
This returns an molSimplify.Classes.monomer3D object indicating that the atom is part of an amino acid or nucleotide:
|
|
476
|
+
>>> pdb_system.getMolecule(a_id=2166)
|
|
477
|
+
monomer3D(MET, id=268)
|
|
478
|
+
|
|
479
|
+
This returns a mol3D object indicating that the atom is part of a molecule that is not an amino acid or nucleotide
|
|
480
|
+
>>> pdb_system.getMolecule(a_id=9164)
|
|
481
|
+
mol3D(S1O3N1C2)
|
|
482
|
+
>>> pdb_system.getMolecule(a_id=9164).name # This prints the name of the molecule, in this case, it is 'TAU'
|
|
483
|
+
'TAU'
|
|
484
|
+
"""
|
|
485
|
+
for s in self.aas.values():
|
|
486
|
+
for mol in s: # mol is monomer3D
|
|
487
|
+
if (a_id, self.atoms[a_id]) in mol.atoms:
|
|
488
|
+
return mol
|
|
489
|
+
for mol in self.missing_atoms.keys(): # mol is incomplete monomer3D
|
|
490
|
+
if (a_id, self.atoms[a_id]) in self.missing_atoms[mol]:
|
|
491
|
+
return mol
|
|
492
|
+
if not aas_only:
|
|
493
|
+
for s in self.hetmols.values():
|
|
494
|
+
for mol in s: # mol is mol3D
|
|
495
|
+
if self.atoms[a_id] in mol.atoms:
|
|
496
|
+
return mol
|
|
497
|
+
return None # something is wrong
|
|
498
|
+
|
|
499
|
+
def stripAtoms(self, atoms_stripped):
|
|
500
|
+
"""
|
|
501
|
+
Removes certain atoms from the protein3D class instance.
|
|
502
|
+
|
|
503
|
+
Parameters
|
|
504
|
+
----------
|
|
505
|
+
atoms_stripped : list
|
|
506
|
+
List of atom3D indices that should be removed
|
|
507
|
+
|
|
508
|
+
Examples
|
|
509
|
+
--------
|
|
510
|
+
>>> pdb_system = protein3D()
|
|
511
|
+
>>> pdb_system.fetch_pdb('1os7') # Fetch a PDB
|
|
512
|
+
fetched: 1os7
|
|
513
|
+
>>> pdb_system.stripAtoms([2166, 4442, 6733, 2165]) # This removes the list of atoms with
|
|
514
|
+
>>> # indices listedin the code
|
|
515
|
+
"""
|
|
516
|
+
atoms = self.atoms
|
|
517
|
+
a_ids = self.a_ids
|
|
518
|
+
keys = list(self.aas.keys()) + list(self.hetmols.keys())
|
|
519
|
+
for tup in keys:
|
|
520
|
+
if tup in self.aas.keys():
|
|
521
|
+
mol_set = self.aas[tup].copy()
|
|
522
|
+
else:
|
|
523
|
+
mol_set = self.hetmols[tup].copy()
|
|
524
|
+
for elt in mol_set:
|
|
525
|
+
for a in elt.atoms:
|
|
526
|
+
if type(a) != atom3D:
|
|
527
|
+
atom = a[1]
|
|
528
|
+
else:
|
|
529
|
+
atom = a
|
|
530
|
+
if atom not in self.a_ids.keys():
|
|
531
|
+
continue
|
|
532
|
+
a_id = self.getIndex(atom)
|
|
533
|
+
if a_id in atoms_stripped:
|
|
534
|
+
if (a_id, atom) in elt.atoms:
|
|
535
|
+
elt.atoms.remove((a_id, atom))
|
|
536
|
+
if atom in elt.c:
|
|
537
|
+
elt.c.remove(atom)
|
|
538
|
+
elif atom in elt.n:
|
|
539
|
+
elt.n.remove(atom)
|
|
540
|
+
elif atom in elt.atoms:
|
|
541
|
+
elt.atoms.remove(atom)
|
|
542
|
+
atoms_stripped.remove(a_id)
|
|
543
|
+
if atom in self.bonds.keys():
|
|
544
|
+
for at in self.bonds[atom]:
|
|
545
|
+
if at in self.bonds.keys():
|
|
546
|
+
temp = self.bonds[at].copy()
|
|
547
|
+
if atom in temp:
|
|
548
|
+
temp.remove(atom)
|
|
549
|
+
self.bonds[at] = temp
|
|
550
|
+
del self.bonds[atom]
|
|
551
|
+
del atoms[a_id]
|
|
552
|
+
del a_ids[atom]
|
|
553
|
+
if len(elt.atoms) == 0:
|
|
554
|
+
if tup in self.aas.keys():
|
|
555
|
+
self.aas[tup].remove(elt)
|
|
556
|
+
if len(self.aas[tup]) == 0:
|
|
557
|
+
del self.aas[tup]
|
|
558
|
+
else:
|
|
559
|
+
self.hetmols[tup].remove(elt)
|
|
560
|
+
if len(self.hetmols[tup]) == 0:
|
|
561
|
+
del self.hetmols[tup]
|
|
562
|
+
while len(atoms_stripped) != 0:
|
|
563
|
+
a_id = atoms_stripped[0]
|
|
564
|
+
atoms_stripped.pop(0)
|
|
565
|
+
if a_id not in atoms.keys():
|
|
566
|
+
continue
|
|
567
|
+
atom = atoms[a_id]
|
|
568
|
+
if atom in self.bonds.keys():
|
|
569
|
+
for at in self.bonds[atom]:
|
|
570
|
+
temp = self.bonds[at].copy()
|
|
571
|
+
if atom in temp:
|
|
572
|
+
temp.remove(atom)
|
|
573
|
+
self.bonds[at] = temp
|
|
574
|
+
del self.bonds[atom]
|
|
575
|
+
del atoms[a_id]
|
|
576
|
+
del a_ids[atom]
|
|
577
|
+
self.setAtoms(atoms)
|
|
578
|
+
self.setIndices(a_ids)
|
|
579
|
+
|
|
580
|
+
def stripHetMol(self, hetmol):
|
|
581
|
+
"""
|
|
582
|
+
Removes all heteroatoms part of the specified heteromolecule from
|
|
583
|
+
the protein3D class instance.
|
|
584
|
+
|
|
585
|
+
Parameters
|
|
586
|
+
----------
|
|
587
|
+
hetmol : str
|
|
588
|
+
String representing the name of a heteromolecule whose
|
|
589
|
+
heteroatoms should be stripped from the protein3D class instance
|
|
590
|
+
|
|
591
|
+
Examples
|
|
592
|
+
--------
|
|
593
|
+
>>> pdb_system = protein3D()
|
|
594
|
+
>>> pdb_system.fetch_pdb('3I40') # Fetch a PDB
|
|
595
|
+
fetched: 3I40
|
|
596
|
+
>>> pdb_system.stripHetMol('HOH')
|
|
597
|
+
"""
|
|
598
|
+
hets = self.hetmols.copy()
|
|
599
|
+
for k in hets.keys():
|
|
600
|
+
if k not in self.hetmols.keys():
|
|
601
|
+
continue
|
|
602
|
+
for m in hets[k]:
|
|
603
|
+
if m.name == hetmol:
|
|
604
|
+
ids = []
|
|
605
|
+
for a in m.atoms:
|
|
606
|
+
ids.append(self.a_ids[a])
|
|
607
|
+
self.stripAtoms(ids)
|
|
608
|
+
try: # RM 2023/04/22: I don't think this is necessary as stripAtoms takes care of deleting the hetmol
|
|
609
|
+
del self.hetmols[k]
|
|
610
|
+
except KeyError:
|
|
611
|
+
pass
|
|
612
|
+
|
|
613
|
+
def findMetal(self, transition_metals_only=True):
|
|
614
|
+
"""
|
|
615
|
+
Find metal(s) in a protein3D class.
|
|
616
|
+
|
|
617
|
+
Parameters
|
|
618
|
+
----------
|
|
619
|
+
transition_metals_only : bool, optional
|
|
620
|
+
Only find transition metals. Default is true.
|
|
621
|
+
|
|
622
|
+
Returns
|
|
623
|
+
-------
|
|
624
|
+
metal_list : list
|
|
625
|
+
List of indices of metal atoms in protein3D.
|
|
626
|
+
|
|
627
|
+
Examples
|
|
628
|
+
--------
|
|
629
|
+
>>> pdb_system = protein3D()
|
|
630
|
+
>>> pdb_system.fetch_pdb('1os7')
|
|
631
|
+
fetched: 1os7
|
|
632
|
+
>>> pdb_system.findMetal()
|
|
633
|
+
[9160, 9178, 9196, 9214]
|
|
634
|
+
"""
|
|
635
|
+
if not self.metals:
|
|
636
|
+
metal_list = []
|
|
637
|
+
for li in self.hetmols.values(): # no metals in AAs
|
|
638
|
+
for m in li:
|
|
639
|
+
for a in m.atoms:
|
|
640
|
+
if a.ismetal(transition_metals_only=transition_metals_only):
|
|
641
|
+
if a.occup == 1 or a in self.bonds.keys():
|
|
642
|
+
metal_list.append(self.getIndex(a))
|
|
643
|
+
self.metals = metal_list
|
|
644
|
+
return (self.metals)
|
|
645
|
+
|
|
646
|
+
def freezeatom(self, atomIdx):
|
|
647
|
+
"""
|
|
648
|
+
Set the freeze attribute to be true for a given atom3D class.
|
|
649
|
+
|
|
650
|
+
Parameters
|
|
651
|
+
----------
|
|
652
|
+
atomIdx : int
|
|
653
|
+
Index for atom to be frozen.
|
|
654
|
+
"""
|
|
655
|
+
|
|
656
|
+
self.atoms[atomIdx].frozen = True
|
|
657
|
+
|
|
658
|
+
def freezeatoms(self, Alist):
|
|
659
|
+
"""
|
|
660
|
+
Set the freeze attribute to be true for a given set of atom3D classes,
|
|
661
|
+
given their indices. Preserves ordering, starts from largest index.
|
|
662
|
+
|
|
663
|
+
Parameters
|
|
664
|
+
----------
|
|
665
|
+
Alist : list
|
|
666
|
+
List of indices for atom3D instances to remove.
|
|
667
|
+
"""
|
|
668
|
+
|
|
669
|
+
for h in sorted(Alist, reverse=True):
|
|
670
|
+
self.freezeatom(h)
|
|
671
|
+
|
|
672
|
+
def getAtom(self, idx):
|
|
673
|
+
"""
|
|
674
|
+
Get atom with a given index.
|
|
675
|
+
|
|
676
|
+
Parameters
|
|
677
|
+
----------
|
|
678
|
+
idx : int
|
|
679
|
+
Index of desired atom.
|
|
680
|
+
|
|
681
|
+
Returns
|
|
682
|
+
-------
|
|
683
|
+
atom : atom3D
|
|
684
|
+
atom3D class for element at given index.
|
|
685
|
+
|
|
686
|
+
"""
|
|
687
|
+
return self.atoms[idx]
|
|
688
|
+
|
|
689
|
+
def getIndex(self, atom):
|
|
690
|
+
"""
|
|
691
|
+
Get index of a given atom
|
|
692
|
+
|
|
693
|
+
Parameters
|
|
694
|
+
----------
|
|
695
|
+
atom : atom3D
|
|
696
|
+
atom3D class for element at given index.
|
|
697
|
+
|
|
698
|
+
Returns
|
|
699
|
+
-------
|
|
700
|
+
idx : int
|
|
701
|
+
Index of desired atom.
|
|
702
|
+
|
|
703
|
+
"""
|
|
704
|
+
if hasattr(self, 'a_ids') and atom in self.a_ids.keys():
|
|
705
|
+
idx = self.a_ids[atom]
|
|
706
|
+
else:
|
|
707
|
+
idx = list(self.atoms.keys())[list(self.atoms.values()).index(atom)]
|
|
708
|
+
return idx
|
|
709
|
+
|
|
710
|
+
def getBoundMols(self, h_id, aas_only=False):
|
|
711
|
+
"""
|
|
712
|
+
Get a list of molecules bound to a heteroatom, usually a metal.
|
|
713
|
+
|
|
714
|
+
Parameters
|
|
715
|
+
----------
|
|
716
|
+
h_id : int
|
|
717
|
+
The index of the desired (hetero)atom origin
|
|
718
|
+
aas_only : boolean
|
|
719
|
+
Whether or not to only consider amino acids, defaults False
|
|
720
|
+
|
|
721
|
+
Returns
|
|
722
|
+
-------
|
|
723
|
+
bound_mols : list
|
|
724
|
+
List of monomer3D and/or mol3D instances of molecules bound to hetatm
|
|
725
|
+
"""
|
|
726
|
+
bound_mols = []
|
|
727
|
+
for b_id in self.atoms.keys():
|
|
728
|
+
b = self.atoms[b_id]
|
|
729
|
+
if self.atoms[h_id] not in self.bonds.keys():
|
|
730
|
+
return None
|
|
731
|
+
elif b in self.bonds[self.atoms[h_id]]:
|
|
732
|
+
if self.getMolecule(b_id, aas_only) is not None:
|
|
733
|
+
bound_mols.append(self.getMolecule(b_id, aas_only))
|
|
734
|
+
return bound_mols
|
|
735
|
+
|
|
736
|
+
def readfrompdb(self, text):
|
|
737
|
+
"""
|
|
738
|
+
Read PDB into a protein3D class instance.
|
|
739
|
+
|
|
740
|
+
Parameters
|
|
741
|
+
----------
|
|
742
|
+
text : str
|
|
743
|
+
String of path to PDB file. Path may be local or global.
|
|
744
|
+
May also be the text of a PDB file from the internet.
|
|
745
|
+
"""
|
|
746
|
+
|
|
747
|
+
# read in PDB file
|
|
748
|
+
if '.pdb' in text: # means this is a filename
|
|
749
|
+
self.pdbfile = text
|
|
750
|
+
fname = text.split('.pdb')[0]
|
|
751
|
+
with open(fname + '.pdb', 'r') as f:
|
|
752
|
+
text = f.read()
|
|
753
|
+
enter = '\n'
|
|
754
|
+
else:
|
|
755
|
+
enter = "\\n"
|
|
756
|
+
|
|
757
|
+
# class attributes
|
|
758
|
+
aas = {}
|
|
759
|
+
hetmols = {}
|
|
760
|
+
atoms = {}
|
|
761
|
+
a_ids = {}
|
|
762
|
+
chains = {}
|
|
763
|
+
missing_atoms = {}
|
|
764
|
+
missing_aas = []
|
|
765
|
+
conf = []
|
|
766
|
+
bonds = {}
|
|
767
|
+
|
|
768
|
+
# get R and Rfree values (text is full file)
|
|
769
|
+
if "R VALUE (WORKING SET)" in text:
|
|
770
|
+
temp = text.split("R VALUE (WORKING SET)")
|
|
771
|
+
temp2 = temp[-1].split()
|
|
772
|
+
if temp2[1] != 'NULL':
|
|
773
|
+
R = float(temp2[1])
|
|
774
|
+
else:
|
|
775
|
+
R = -100
|
|
776
|
+
if temp2[8] != 'NULL':
|
|
777
|
+
Rfree = float(temp2[8])
|
|
778
|
+
else:
|
|
779
|
+
Rfree = 100
|
|
780
|
+
elif "R VALUE (WORKING SET, NO CUTOFF)" in text:
|
|
781
|
+
temp = text.split("R VALUE (WORKING SET, NO CUTOFF)")
|
|
782
|
+
temp2 = temp[-1].split()
|
|
783
|
+
if temp2[1] != 'NULL':
|
|
784
|
+
R = float(temp2[1])
|
|
785
|
+
else:
|
|
786
|
+
R = -100
|
|
787
|
+
if temp2[10] != 'NULL':
|
|
788
|
+
Rfree = float(temp2[10])
|
|
789
|
+
else:
|
|
790
|
+
Rfree = 100
|
|
791
|
+
else:
|
|
792
|
+
R = -100
|
|
793
|
+
Rfree = 100
|
|
794
|
+
|
|
795
|
+
# start getting missing amino acids
|
|
796
|
+
if "M RES C SSSEQI" in text:
|
|
797
|
+
text = text.split("M RES C SSSEQI")
|
|
798
|
+
want = text[-1]
|
|
799
|
+
text = text[0].split(enter)
|
|
800
|
+
split = text[-1]
|
|
801
|
+
want = want.split(split)
|
|
802
|
+
for line in want:
|
|
803
|
+
if line == want[-1]:
|
|
804
|
+
text = line
|
|
805
|
+
line = line.split(enter)
|
|
806
|
+
line = line[0]
|
|
807
|
+
text = text.replace(line, '')
|
|
808
|
+
sp = line.split()
|
|
809
|
+
if len(sp) > 2:
|
|
810
|
+
res_num = int(sp[2])
|
|
811
|
+
# Ignoring expression tags which are negative residues
|
|
812
|
+
if res_num > 0:
|
|
813
|
+
a = monomer3D(sp[0], sp[1], sp[2])
|
|
814
|
+
missing_aas.append(a)
|
|
815
|
+
|
|
816
|
+
# start getting missing atoms
|
|
817
|
+
if "M RES CSSEQI ATOMS" in text:
|
|
818
|
+
text = text.split("M RES CSSEQI ATOMS")
|
|
819
|
+
want = text[-1]
|
|
820
|
+
text = text[0].split(enter)
|
|
821
|
+
split = text[-1]
|
|
822
|
+
want = want.split(split)
|
|
823
|
+
for line in want:
|
|
824
|
+
if line == want[-1]:
|
|
825
|
+
text = line
|
|
826
|
+
line = line.split(enter)
|
|
827
|
+
line = line[0]
|
|
828
|
+
text = text.replace(line, '')
|
|
829
|
+
sp = line.split()
|
|
830
|
+
if len(sp) > 2:
|
|
831
|
+
missing_atoms[(sp[1], sp[2])] = []
|
|
832
|
+
for atom in sp[3:]:
|
|
833
|
+
if atom != enter and atom[0] in ['C', 'N', 'O', 'H']:
|
|
834
|
+
missing_atoms[(sp[1], sp[2])].append(
|
|
835
|
+
atom3D(Sym=atom[0], greek=atom))
|
|
836
|
+
# start getting amino acids, nucleotides and heteroatoms
|
|
837
|
+
pa_dict = {'AltLoc': ""}
|
|
838
|
+
if "ENDMDL" in text:
|
|
839
|
+
text.split("ENDMDL")
|
|
840
|
+
text = text[-2] + text[-1]
|
|
841
|
+
text = text.split(enter)
|
|
842
|
+
text = text[1:]
|
|
843
|
+
for line in text:
|
|
844
|
+
if line == text[-1]:
|
|
845
|
+
text = line
|
|
846
|
+
line = line.split(enter)
|
|
847
|
+
line = line[0]
|
|
848
|
+
text = text.replace(line, '')
|
|
849
|
+
l_type = line[:6]
|
|
850
|
+
if "ATOM" in l_type or "HETATM" in l_type:
|
|
851
|
+
line = line.replace("\\'", "\'")
|
|
852
|
+
a_dict = read_atom(line)
|
|
853
|
+
if a_dict['ResName'] in globalvars().getAllAAs() or "ATOM" in l_type:
|
|
854
|
+
# have an amino acid or biomolecule monomer
|
|
855
|
+
a, aas, conf, chains, pa_dict, bonds = makeMol(a_dict, aas, conf, chains, pa_dict, bonds)
|
|
856
|
+
else: # have a normal heteromolecule
|
|
857
|
+
a, hetmols, conf, chains, pa_dict, bonds = makeMol(a_dict, hetmols, conf, chains, pa_dict, bonds, False)
|
|
858
|
+
atoms[a_dict['SerialNum']] = a
|
|
859
|
+
a_ids[a] = a_dict['SerialNum']
|
|
860
|
+
|
|
861
|
+
elif "CONECT" in l_type: # get extra connections
|
|
862
|
+
line = line[6:] # remove type
|
|
863
|
+
li = [line[i:i+5] for i in range(0, len(line), 5)]
|
|
864
|
+
if int(li[0]) in atoms.keys() and atoms[int(li[0])] not in bonds.keys():
|
|
865
|
+
bonds[atoms[int(li[0])]] = set()
|
|
866
|
+
for i in li[1:]:
|
|
867
|
+
try:
|
|
868
|
+
bonds[atoms[int(li[0])]].add(atoms[int(i)])
|
|
869
|
+
if atoms[int(li[0])].loc != '':
|
|
870
|
+
for j in {1, -1}:
|
|
871
|
+
if atoms[int(li[0]) + j].greek == atoms[int(li[0])].greek:
|
|
872
|
+
if atoms[int(li[0]) + j] not in bonds.keys():
|
|
873
|
+
bonds[atoms[int(li[0]) + j]] = {atoms[int(i)]}
|
|
874
|
+
else:
|
|
875
|
+
bonds[atoms[int(li[0]) + j]].add(atoms[int(i)])
|
|
876
|
+
if atoms[int(i)] not in bonds.keys():
|
|
877
|
+
bonds[atoms[int(i)]] = {atoms[int(li[0]) + j]}
|
|
878
|
+
else:
|
|
879
|
+
bonds[atoms[int(i)]].add(atoms[int(li[0]) + j])
|
|
880
|
+
except ValueError:
|
|
881
|
+
# if " " not in i and i != " ":
|
|
882
|
+
# print("likely OXT")
|
|
883
|
+
continue
|
|
884
|
+
# deal with conformations in chains
|
|
885
|
+
for i in conf:
|
|
886
|
+
if i in aas.keys():
|
|
887
|
+
c = aas[i]
|
|
888
|
+
else:
|
|
889
|
+
c = hetmols[i]
|
|
890
|
+
for j in range(len(c)):
|
|
891
|
+
# pick chain with higher occupancy or the A chain if tie
|
|
892
|
+
if type(c[j]) == mol3D:
|
|
893
|
+
for a in c[j].atoms:
|
|
894
|
+
full = True
|
|
895
|
+
if a.occup <= 1/len(c):
|
|
896
|
+
full = False
|
|
897
|
+
if full:
|
|
898
|
+
chains[i[0]].append(c[j])
|
|
899
|
+
elif c[j].atoms[0].occup*100 == 100//len(c) and j == 0:
|
|
900
|
+
chains[i[0]].append(c[j])
|
|
901
|
+
elif c[j].occup > 1/len(c):
|
|
902
|
+
chains[i[0]].append(c[j])
|
|
903
|
+
elif c[j].occup*100 == 100//len(c) and j == 0:
|
|
904
|
+
chains[i[0]].append(c[j])
|
|
905
|
+
self.setChains(chains)
|
|
906
|
+
self.setAAs(aas)
|
|
907
|
+
self.setAtoms(atoms)
|
|
908
|
+
self.setIndices(a_ids)
|
|
909
|
+
self.setHetmols(hetmols)
|
|
910
|
+
self.setMissingAtoms(missing_atoms)
|
|
911
|
+
self.setMissingAAs(missing_aas)
|
|
912
|
+
self.setConf(conf)
|
|
913
|
+
self.setR(R)
|
|
914
|
+
self.setRfree(Rfree)
|
|
915
|
+
self.setBonds(bonds)
|
|
916
|
+
|
|
917
|
+
def fetch_pdb(self, pdbCode):
|
|
918
|
+
"""
|
|
919
|
+
API query to fetch a pdb and write it as a protein3D class instance
|
|
920
|
+
|
|
921
|
+
Parameters
|
|
922
|
+
----------
|
|
923
|
+
pdbCode : str
|
|
924
|
+
Code for protein, e.g. 1os7
|
|
925
|
+
"""
|
|
926
|
+
remoteCode = pdbCode.upper()
|
|
927
|
+
try:
|
|
928
|
+
data = urllib.request.urlopen(
|
|
929
|
+
'https://files.rcsb.org/view/' + remoteCode +
|
|
930
|
+
'.pdb').read()
|
|
931
|
+
except urllib.error.URLError:
|
|
932
|
+
print("warning: %s not found.\n" % pdbCode)
|
|
933
|
+
else:
|
|
934
|
+
try:
|
|
935
|
+
self.readfrompdb(str(data))
|
|
936
|
+
self.setPDBCode(pdbCode)
|
|
937
|
+
print("fetched: %s" % (pdbCode))
|
|
938
|
+
except IOError:
|
|
939
|
+
print('aborted')
|
|
940
|
+
else:
|
|
941
|
+
if len(data) == 0:
|
|
942
|
+
print("warning: %s not valid.\n" % pdbCode)
|
|
943
|
+
|
|
944
|
+
def setBonds(self, bonds):
|
|
945
|
+
"""
|
|
946
|
+
Sets the bonded atoms in the protein.
|
|
947
|
+
|
|
948
|
+
This is effectively the molecular graph.
|
|
949
|
+
|
|
950
|
+
Parameters
|
|
951
|
+
----------
|
|
952
|
+
bonds : dictionary
|
|
953
|
+
Keyed by atom3D atoms in the protein
|
|
954
|
+
Valued by a set consisting of bonded atoms
|
|
955
|
+
"""
|
|
956
|
+
self.bonds = bonds
|
|
957
|
+
|
|
958
|
+
def readMetaData(self):
|
|
959
|
+
"""
|
|
960
|
+
API query to fetch XML data from a pdb and add its useful attributes
|
|
961
|
+
to a protein3D class.
|
|
962
|
+
|
|
963
|
+
Parameters
|
|
964
|
+
----------
|
|
965
|
+
pdbCode : str
|
|
966
|
+
Code for protein, e.g. 1os7
|
|
967
|
+
"""
|
|
968
|
+
pdbCode = self.pdbCode
|
|
969
|
+
try:
|
|
970
|
+
start = 'https://files.rcsb.org/pub/pdb/validation_reports/' + pdbCode[1] + pdbCode[2]
|
|
971
|
+
link = start + '/' + pdbCode + '/' + pdbCode + '_validation.xml'
|
|
972
|
+
xml_doc = requests.get(link)
|
|
973
|
+
except urllib.error.URLError:
|
|
974
|
+
print("warning: %s not found.\n" % pdbCode)
|
|
975
|
+
else:
|
|
976
|
+
try:
|
|
977
|
+
# We then use beautiful soup to read the XML doc. LXML is an XML reader.
|
|
978
|
+
# The soup object is what we then use to parse!
|
|
979
|
+
soup = BeautifulSoup(xml_doc.content, 'lxml-xml')
|
|
980
|
+
|
|
981
|
+
# We can then use methods of the soup object to find "tags" within the XML file.
|
|
982
|
+
# This is how we would extract sections.
|
|
983
|
+
# This is an example of getting everything with a "sec" tag.
|
|
984
|
+
body = soup.find_all('wwPDB-validation-information')
|
|
985
|
+
entry = body[0].find_all("Entry")
|
|
986
|
+
if "DataCompleteness" not in entry[0].attrs.keys():
|
|
987
|
+
self.setDataCompleteness(0)
|
|
988
|
+
print("warning: %s has no DataCompleteness." % pdbCode)
|
|
989
|
+
else:
|
|
990
|
+
self.setDataCompleteness(float(entry[0].attrs["DataCompleteness"]))
|
|
991
|
+
if "percent-RSRZ-outliers" not in entry[0].attrs.keys():
|
|
992
|
+
self.setRSRZ(100)
|
|
993
|
+
print("warning: %s has no RSRZ.\n" % pdbCode)
|
|
994
|
+
else:
|
|
995
|
+
self.setRSRZ(float(entry[0].attrs["percent-RSRZ-outliers"]))
|
|
996
|
+
if "TwinL" not in entry[0].attrs.keys():
|
|
997
|
+
print("warning: %s has no TwinL." % pdbCode)
|
|
998
|
+
self.setTwinL(0)
|
|
999
|
+
else:
|
|
1000
|
+
self.setTwinL(float(entry[0].attrs["TwinL"]))
|
|
1001
|
+
if "TwinL2" not in entry[0].attrs.keys():
|
|
1002
|
+
print("warning: %s has no TwinL2." % pdbCode)
|
|
1003
|
+
self.setTwinL2(0)
|
|
1004
|
+
else:
|
|
1005
|
+
self.setTwinL2(float(entry[0].attrs["TwinL2"]))
|
|
1006
|
+
except IOError:
|
|
1007
|
+
print('aborted')
|
|
1008
|
+
else:
|
|
1009
|
+
if xml_doc is None:
|
|
1010
|
+
print("warning: %s not valid.\n" % pdbCode)
|
|
1011
|
+
|
|
1012
|
+
def setDataCompleteness(self, DataCompleteness):
|
|
1013
|
+
"""
|
|
1014
|
+
Set DataCompleteness value of protein3D class.
|
|
1015
|
+
|
|
1016
|
+
Parameters
|
|
1017
|
+
----------
|
|
1018
|
+
DataCompleteness : float
|
|
1019
|
+
The desired new R value.
|
|
1020
|
+
"""
|
|
1021
|
+
self.DataCompleteness = DataCompleteness
|
|
1022
|
+
|
|
1023
|
+
def setTwinL(self, TwinL):
|
|
1024
|
+
"""
|
|
1025
|
+
Set TwinL score of protein3D class.
|
|
1026
|
+
|
|
1027
|
+
Parameters
|
|
1028
|
+
----------
|
|
1029
|
+
TwinL : float
|
|
1030
|
+
The desired new TwinL score.
|
|
1031
|
+
"""
|
|
1032
|
+
self.TwinL = TwinL
|
|
1033
|
+
|
|
1034
|
+
def setTwinL2(self, TwinL2):
|
|
1035
|
+
"""
|
|
1036
|
+
Set TwinL squared score of protein3D class.
|
|
1037
|
+
|
|
1038
|
+
Parameters
|
|
1039
|
+
----------
|
|
1040
|
+
TwinL2 : float
|
|
1041
|
+
The desired new TwinL squared score.
|
|
1042
|
+
"""
|
|
1043
|
+
self.TwinL2 = TwinL2
|
|
1044
|
+
|
|
1045
|
+
def setEDIAScores(self):
|
|
1046
|
+
"""
|
|
1047
|
+
Sets the EDIA score of a protein3D class.
|
|
1048
|
+
|
|
1049
|
+
Parameters
|
|
1050
|
+
----------
|
|
1051
|
+
pdbCode : string
|
|
1052
|
+
The 4-character code of the protein3D class.
|
|
1053
|
+
"""
|
|
1054
|
+
code = self.pdbCode
|
|
1055
|
+
cmd = ('curl -d \'{"edia":{ "pdbCode":"'+code+'"}}\' -H "Accept: application/json"'
|
|
1056
|
+
' -H "Content-Type: application/json" -X POST https://proteins.plus/api/edia_rest -k')
|
|
1057
|
+
args = shlex.split(cmd)
|
|
1058
|
+
result = subprocess.Popen(args, stdout=subprocess.PIPE,
|
|
1059
|
+
stderr=subprocess.PIPE)
|
|
1060
|
+
result.wait()
|
|
1061
|
+
out, err = result.communicate()
|
|
1062
|
+
dict_str = out.decode("UTF-8")
|
|
1063
|
+
int_dict = ast.literal_eval(dict_str)
|
|
1064
|
+
res2 = subprocess.Popen(['curl', '-k', int_dict['location']],
|
|
1065
|
+
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
1066
|
+
out2, err2 = res2.communicate()
|
|
1067
|
+
dict2_str = out2.decode("UTF-8")
|
|
1068
|
+
dictionary = ast.literal_eval(dict2_str)
|
|
1069
|
+
t = 5 # can change depending on how frequently to loop
|
|
1070
|
+
while dictionary["status_code"] == 202:
|
|
1071
|
+
res2 = subprocess.Popen(['curl', '-k ', int_dict['location']],
|
|
1072
|
+
stdout=subprocess.PIPE,
|
|
1073
|
+
stderr=subprocess.PIPE)
|
|
1074
|
+
# print('sleeping', t)
|
|
1075
|
+
time.sleep(t)
|
|
1076
|
+
res2.wait()
|
|
1077
|
+
out2, err2 = res2.communicate()
|
|
1078
|
+
dict2_str = out2.decode("UTF-8")
|
|
1079
|
+
dictionary = ast.literal_eval(dict2_str)
|
|
1080
|
+
link = dictionary["atom_scores"]
|
|
1081
|
+
df = pd.read_csv(link, on_bad_lines='skip')
|
|
1082
|
+
|
|
1083
|
+
for i, row in df.iterrows():
|
|
1084
|
+
EDIA = row["EDIA"]
|
|
1085
|
+
index = row["Infile id"]
|
|
1086
|
+
if index in self.atoms.keys():
|
|
1087
|
+
a = self.atoms[index]
|
|
1088
|
+
a.setEDIA(EDIA)
|
|
1089
|
+
if a.occup < 1: # more than one conformation
|
|
1090
|
+
subdf = df[df["Infile id"] == index+1]
|
|
1091
|
+
if subdf.shape[0] == 0 and index+1 in self.atoms.keys():
|
|
1092
|
+
self.atoms[index+1].setEDIA(EDIA)
|
|
1093
|
+
elif subdf.shape[0] == 0 and index-1 in self.atoms.keys():
|
|
1094
|
+
self.atoms[index-1].setEDIA(EDIA)
|
|
1095
|
+
else:
|
|
1096
|
+
print("OXT is missing")
|
|
1097
|
+
|
|
1098
|
+
def setPDBCode(self, pdbCode):
|
|
1099
|
+
"""
|
|
1100
|
+
Sets the 4-letter PDB code of a protein3D class instance
|
|
1101
|
+
|
|
1102
|
+
Parameters
|
|
1103
|
+
----------
|
|
1104
|
+
pdbCode : string
|
|
1105
|
+
Desired 4-letter PDB code
|
|
1106
|
+
"""
|
|
1107
|
+
self.pdbCode = pdbCode
|
|
1108
|
+
|
|
1109
|
+
def centermass(self):
|
|
1110
|
+
"""Computes coordinates of center of mass of protein.
|
|
1111
|
+
|
|
1112
|
+
"""
|
|
1113
|
+
|
|
1114
|
+
center_of_mass = [0, 0, 0] # coordinates of center of mass (X, Y, Z)
|
|
1115
|
+
mmass = 0
|
|
1116
|
+
# loop over atoms in molecule
|
|
1117
|
+
if len(self.atoms.keys()) > 0:
|
|
1118
|
+
for atom in self.atoms.values():
|
|
1119
|
+
# calculate center of mass (relative weight according to atomic mass)
|
|
1120
|
+
xyz = atom.coords()
|
|
1121
|
+
center_of_mass[0] += xyz[0] * atom.mass
|
|
1122
|
+
center_of_mass[1] += xyz[1] * atom.mass
|
|
1123
|
+
center_of_mass[2] += xyz[2] * atom.mass
|
|
1124
|
+
mmass += atom.mass
|
|
1125
|
+
# normalize
|
|
1126
|
+
center_of_mass[0] /= mmass
|
|
1127
|
+
center_of_mass[1] /= mmass
|
|
1128
|
+
center_of_mass[2] /= mmass
|
|
1129
|
+
else:
|
|
1130
|
+
center_of_mass = False
|
|
1131
|
+
print(
|
|
1132
|
+
'ERROR: Center of mass calculation failed. Structure will be inaccurate.\n')
|
|
1133
|
+
self.com = center_of_mass
|
|
1134
|
+
|
|
1135
|
+
def setCentroid(self):
|
|
1136
|
+
"""Computes coordinates of center of mass of protein.
|
|
1137
|
+
|
|
1138
|
+
"""
|
|
1139
|
+
|
|
1140
|
+
centroid = [0, 0, 0] # coordinates of centroid (X, Y, Z)
|
|
1141
|
+
# loop over atoms in protein
|
|
1142
|
+
if len(self.atoms.keys()) > 0:
|
|
1143
|
+
for atom in self.atoms.values():
|
|
1144
|
+
# calculate center of mass (relative weight according to atomic mass)
|
|
1145
|
+
xyz = atom.coords()
|
|
1146
|
+
centroid[0] += xyz[0]
|
|
1147
|
+
centroid[1] += xyz[1]
|
|
1148
|
+
centroid[2] += xyz[2]
|
|
1149
|
+
# normalize
|
|
1150
|
+
centroid[0] /= len(self.atoms.keys())
|
|
1151
|
+
centroid[1] /= len(self.atoms.keys())
|
|
1152
|
+
centroid[2] /= len(self.atoms.keys())
|
|
1153
|
+
else:
|
|
1154
|
+
centroid = False
|
|
1155
|
+
print(
|
|
1156
|
+
'ERROR: Centroid calculation failed. Structure will be inaccurate.\n')
|
|
1157
|
+
self.centroid = centroid
|
|
1158
|
+
|
|
1159
|
+
def convexhull(self):
|
|
1160
|
+
"""
|
|
1161
|
+
Computes convex hull of protein.
|
|
1162
|
+
|
|
1163
|
+
Returns
|
|
1164
|
+
-------
|
|
1165
|
+
hull : array
|
|
1166
|
+
Coordinates of convex hull.
|
|
1167
|
+
"""
|
|
1168
|
+
points = []
|
|
1169
|
+
# loop over atoms in protein
|
|
1170
|
+
if len(self.atoms.keys()) > 0:
|
|
1171
|
+
for atom in self.atoms.values():
|
|
1172
|
+
points.append(atom.coords())
|
|
1173
|
+
hull = ConvexHull(points)
|
|
1174
|
+
else:
|
|
1175
|
+
hull = False
|
|
1176
|
+
print(
|
|
1177
|
+
'ERROR: Convex hull calculation failed. Structure will be inaccurate.\n')
|
|
1178
|
+
self.hull = hull
|