py2ls 0.2.4.5__py3-none-any.whl → 0.2.4.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- py2ls/.git/index +0 -0
- py2ls/bio.py +540 -47
- py2ls/ips.py +12 -3
- py2ls/mol.py +289 -0
- py2ls/plot.py +262 -100
- {py2ls-0.2.4.5.dist-info → py2ls-0.2.4.6.dist-info}/METADATA +1 -1
- {py2ls-0.2.4.5.dist-info → py2ls-0.2.4.6.dist-info}/RECORD +8 -7
- {py2ls-0.2.4.5.dist-info → py2ls-0.2.4.6.dist-info}/WHEEL +0 -0
py2ls/ips.py
CHANGED
@@ -60,6 +60,7 @@ except NameError:
|
|
60
60
|
def plt_font(dir_font: str = "/System/Library/Fonts/Hiragino Sans GB.ttc"):
|
61
61
|
"""
|
62
62
|
Add the Chinese (default) font to the font manager
|
63
|
+
show chinese
|
63
64
|
Args:
|
64
65
|
dir_font (str, optional): _description_. Defaults to "/System/Library/Fonts/Hiragino Sans GB.ttc".
|
65
66
|
"""
|
@@ -589,7 +590,7 @@ def flatten(nested: Any, unique_list=True, verbose=True):
|
|
589
590
|
else:
|
590
591
|
return flattened_list
|
591
592
|
|
592
|
-
def strcmp(search_term, candidates, ignore_case=True, verbose=False, scorer="WR"):
|
593
|
+
def strcmp(search_term, candidates, ignore_case=True,get_rank=False, verbose=False, scorer="WR"):
|
593
594
|
"""
|
594
595
|
Compares a search term with a list of candidate strings and finds the best match based on similarity score.
|
595
596
|
|
@@ -623,6 +624,11 @@ def strcmp(search_term, candidates, ignore_case=True, verbose=False, scorer="WR"
|
|
623
624
|
similarity_scores = [fuzz.ratio(str1_, word) for word in str2_]
|
624
625
|
else:
|
625
626
|
similarity_scores = [fuzz.WRatio(str1_, word) for word in str2_]
|
627
|
+
if get_rank:
|
628
|
+
idx = [similarity_scores.index(i) for i in sorted(similarity_scores,reverse=True)]
|
629
|
+
if verbose:
|
630
|
+
display([candidates[ii] for ii in idx])
|
631
|
+
return [candidates[ii] for ii in idx]
|
626
632
|
best_match_index = similarity_scores.index(max(similarity_scores))
|
627
633
|
best_match_score = similarity_scores[best_match_index]
|
628
634
|
else:
|
@@ -3058,8 +3064,11 @@ def figsave(*args, dpi=300):
|
|
3058
3064
|
|
3059
3065
|
def is_str_color(s):
|
3060
3066
|
# Regular expression pattern for hexadecimal color codes
|
3061
|
-
|
3062
|
-
|
3067
|
+
if isinstance(s,str):
|
3068
|
+
color_code_pattern = r"^#([A-Fa-f0-9]{6}|[A-Fa-f0-9]{8})$"
|
3069
|
+
return re.match(color_code_pattern, s) is not None
|
3070
|
+
else:
|
3071
|
+
return True
|
3063
3072
|
|
3064
3073
|
|
3065
3074
|
def is_num(s):
|
py2ls/mol.py
ADDED
@@ -0,0 +1,289 @@
|
|
1
|
+
import os
|
2
|
+
import subprocess
|
3
|
+
from rdkit import Chem
|
4
|
+
from rdkit.Chem import AllChem,Draw
|
5
|
+
from openbabel import openbabel
|
6
|
+
import matplotlib.pyplot as plt
|
7
|
+
# import pymol2 # 使用 PyMOL API 进行分子展示
|
8
|
+
|
9
|
+
from typing import Any, Dict, Union, List
|
10
|
+
|
11
|
+
def load_mol(fpath: str) -> Union[Dict[str, Any], None]:
|
12
|
+
"""
|
13
|
+
Master function to read various molecular structure files and return a consistent molecule dictionary.
|
14
|
+
Supports formats: .pdb, .mol, .sdf, .xyz, .gro, and others through RDKit, Pybel, MDAnalysis, and ASE.
|
15
|
+
|
16
|
+
Parameters:
|
17
|
+
- fpath (str): Path to the molecular file
|
18
|
+
|
19
|
+
Returns:
|
20
|
+
- mol_dict (Dict[str, Any]): Dictionary with molecule information:
|
21
|
+
- 'atoms': List of atom information dictionaries
|
22
|
+
- 'bonds': List of bond information dictionaries
|
23
|
+
- 'metadata': Metadata for molecule (e.g., file name)
|
24
|
+
"""
|
25
|
+
ext = os.path.splitext(fpath)[-1].lower() # Get the file extension
|
26
|
+
|
27
|
+
def create_atom_dict(atom) -> Dict[str, Any]:
|
28
|
+
"""Helper to create a consistent atom dictionary."""
|
29
|
+
return {
|
30
|
+
'element': atom.atomic_symbol,
|
31
|
+
'coords': atom.coords,
|
32
|
+
'index': atom.idx,
|
33
|
+
'charge': atom.formalcharge
|
34
|
+
}
|
35
|
+
|
36
|
+
def create_bond_dict(bond) -> Dict[str, Any]:
|
37
|
+
"""Helper to create a consistent bond dictionary."""
|
38
|
+
return {
|
39
|
+
'start_atom_idx': bond.GetBeginAtomIdx(),
|
40
|
+
'end_atom_idx': bond.GetEndAtomIdx(),
|
41
|
+
'bond_type': bond.GetBondTypeAsDouble()
|
42
|
+
}
|
43
|
+
|
44
|
+
mol_dict = {
|
45
|
+
"atoms": [],
|
46
|
+
"bonds": [],
|
47
|
+
"metadata": {
|
48
|
+
"file_name": os.path.basename(fpath),
|
49
|
+
"format": ext
|
50
|
+
}
|
51
|
+
}
|
52
|
+
|
53
|
+
try:
|
54
|
+
# Handling with RDKit (for .mol and .sdf)
|
55
|
+
if ext in ['.mol', '.sdf']:
|
56
|
+
from rdkit import Chem
|
57
|
+
if ext == '.mol':
|
58
|
+
mol = Chem.MolFromMolFile(fpath)
|
59
|
+
if mol is None:
|
60
|
+
raise ValueError("RDKit failed to parse the .mol file.")
|
61
|
+
atoms = mol.GetAtoms()
|
62
|
+
bonds = mol.GetBonds()
|
63
|
+
elif ext == '.sdf':
|
64
|
+
supplier = Chem.SDMolSupplier(fpath)
|
65
|
+
mol = next(supplier, None)
|
66
|
+
if mol is None:
|
67
|
+
raise ValueError("RDKit failed to parse the .sdf file.")
|
68
|
+
atoms = mol.GetAtoms()
|
69
|
+
bonds = mol.GetBonds()
|
70
|
+
|
71
|
+
# Populate atom and bond data
|
72
|
+
mol_dict["atoms"] = [
|
73
|
+
{
|
74
|
+
"element": atom.GetSymbol(),
|
75
|
+
"coords": atom.GetOwningMol().GetConformer().GetAtomPosition(atom.GetIdx()),
|
76
|
+
"index": atom.GetIdx(),
|
77
|
+
"charge": atom.GetFormalCharge()
|
78
|
+
}
|
79
|
+
for atom in atoms
|
80
|
+
]
|
81
|
+
mol_dict["bonds"] = [
|
82
|
+
create_bond_dict(bond)
|
83
|
+
for bond in bonds
|
84
|
+
]
|
85
|
+
|
86
|
+
# Handling with Pybel (supports multiple formats: .pdb, .mol, .xyz, etc.)
|
87
|
+
elif ext in ['.pdb', '.mol', '.xyz', '.sdf']:
|
88
|
+
from openbabel import pybel
|
89
|
+
|
90
|
+
mol = next(pybel.readfile(ext[1:], fpath), None)
|
91
|
+
if mol is None:
|
92
|
+
raise ValueError("Pybel failed to parse the file.")
|
93
|
+
# Populate atom and bond data
|
94
|
+
mol_dict["atoms"] = [
|
95
|
+
{
|
96
|
+
"element": atom.type,
|
97
|
+
"coords": atom.coords,
|
98
|
+
"index": atom.idx,
|
99
|
+
"charge": atom.partialcharge
|
100
|
+
}
|
101
|
+
for atom in mol.atoms
|
102
|
+
]
|
103
|
+
mol_dict["bonds"] = [
|
104
|
+
{
|
105
|
+
"start_atom_idx": bond.GetBeginAtomIdx(),
|
106
|
+
"end_atom_idx": bond.GetEndAtomIdx(),
|
107
|
+
"bond_type": bond.GetBondOrder()
|
108
|
+
}
|
109
|
+
for bond in openbabel.OBMolBondIter(mol.OBMol)
|
110
|
+
]
|
111
|
+
|
112
|
+
# Handling with MDAnalysis (for .pdb, .gro, and trajectory files)
|
113
|
+
elif ext in ['.pdb', '.gro', '.xyz', '.xtc', '.dcd', '.trr']:
|
114
|
+
import MDAnalysis as mda
|
115
|
+
u = mda.Universe(fpath)
|
116
|
+
atoms = u.atoms
|
117
|
+
mol_dict["atoms"] = [
|
118
|
+
{
|
119
|
+
"element": atom.name,
|
120
|
+
"coords": atom.position,
|
121
|
+
"index": atom.id,
|
122
|
+
"charge": atom.charge if hasattr(atom, 'charge') else None
|
123
|
+
}
|
124
|
+
for atom in atoms
|
125
|
+
]
|
126
|
+
mol_dict["bonds"] = [
|
127
|
+
{"start_atom_idx": bond[0], "end_atom_idx": bond[1], "bond_type": 1}
|
128
|
+
for bond in u.bonds.indices
|
129
|
+
]
|
130
|
+
|
131
|
+
# Handling with ASE (for .xyz, .pdb, and other atomic structure formats)
|
132
|
+
elif ext in ['.xyz', '.pdb', '.vasp', '.cif']:
|
133
|
+
from ase.io import read as ase_read
|
134
|
+
atoms = ase_read(fpath)
|
135
|
+
mol_dict["atoms"] = [
|
136
|
+
{
|
137
|
+
"element": atom.symbol,
|
138
|
+
"coords": atom.position,
|
139
|
+
"index": i,
|
140
|
+
"charge": None
|
141
|
+
}
|
142
|
+
for i, atom in enumerate(atoms)
|
143
|
+
]
|
144
|
+
# ASE does not explicitly support bonds by default, so bonds are not populated here.
|
145
|
+
|
146
|
+
else:
|
147
|
+
raise ValueError(f"Unsupported file extension: {ext}")
|
148
|
+
|
149
|
+
except Exception as e:
|
150
|
+
print(f"Error loading molecule from {fpath}: {e}")
|
151
|
+
return None
|
152
|
+
|
153
|
+
return mol_dict
|
154
|
+
|
155
|
+
class DockingConfig:
|
156
|
+
def __init__(self, receptor_file, ligand_smiles_list, center=(0, 0, 0), size=(20, 20, 20), output_dir="docking_results"):
|
157
|
+
self.receptor_file = receptor_file
|
158
|
+
self.ligand_smiles_list = ligand_smiles_list
|
159
|
+
self.center = center
|
160
|
+
self.size = size
|
161
|
+
self.output_dir = output_dir
|
162
|
+
os.makedirs(output_dir, exist_ok=True)
|
163
|
+
|
164
|
+
def mol_to_pdbqt(mol, output_file):
|
165
|
+
"""Converts an RDKit Mol object to PDBQT format."""
|
166
|
+
obConversion = openbabel.OBConversion()
|
167
|
+
obConversion.SetInAndOutFormats("mol", "pdbqt")
|
168
|
+
obMol = openbabel.OBMol()
|
169
|
+
obConversion.ReadString(obMol, Chem.MolToMolBlock(mol))
|
170
|
+
obConversion.WriteFile(obMol, output_file)
|
171
|
+
|
172
|
+
def prepare_ligand(smiles, ligand_id):
|
173
|
+
"""Prepare the ligand file in PDBQT format."""
|
174
|
+
mol = Chem.MolFromSmiles(smiles)
|
175
|
+
mol = Chem.AddHs(mol)
|
176
|
+
AllChem.EmbedMolecule(mol)
|
177
|
+
AllChem.UFFOptimizeMolecule(mol)
|
178
|
+
ligand_file = f"ligand_{ligand_id}.pdbqt"
|
179
|
+
mol_to_pdbqt(mol, ligand_file)
|
180
|
+
return ligand_file
|
181
|
+
|
182
|
+
def run_docking(receptor_file, ligand_file, output_file, center, size):
|
183
|
+
"""Runs Vina docking using the receptor and ligand files."""
|
184
|
+
vina_command = [
|
185
|
+
"vina",
|
186
|
+
"--receptor", receptor_file,
|
187
|
+
"--ligand", ligand_file,
|
188
|
+
"--center_x", str(center[0]),
|
189
|
+
"--center_y", str(center[1]),
|
190
|
+
"--center_z", str(center[2]),
|
191
|
+
"--size_x", str(size[0]),
|
192
|
+
"--size_y", str(size[1]),
|
193
|
+
"--size_z", str(size[2]),
|
194
|
+
"--out", output_file,
|
195
|
+
"--log", output_file.replace(".pdbqt", ".log")
|
196
|
+
]
|
197
|
+
subprocess.run(vina_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
198
|
+
|
199
|
+
def parse_vina_output(output_file):
|
200
|
+
"""Parses Vina output log file to extract docking scores."""
|
201
|
+
scores = []
|
202
|
+
with open(output_file.replace(".pdbqt", ".log"), 'r') as f:
|
203
|
+
for line in f:
|
204
|
+
if line.startswith("REMARK VINA RESULT"):
|
205
|
+
score = float(line.split()[3])
|
206
|
+
scores.append(score)
|
207
|
+
return scores
|
208
|
+
|
209
|
+
def docking_master_function(config: DockingConfig):
|
210
|
+
"""Master function to run molecular docking for multiple ligands."""
|
211
|
+
receptor_pdbqt = config.receptor_file
|
212
|
+
results = {}
|
213
|
+
|
214
|
+
for i, smiles in enumerate(config.ligand_smiles_list):
|
215
|
+
ligand_file = prepare_ligand(smiles, ligand_id=i)
|
216
|
+
output_file = os.path.join(config.output_dir, f"docked_ligand_{i}.pdbqt")
|
217
|
+
|
218
|
+
# Run docking for each ligand
|
219
|
+
run_docking(
|
220
|
+
receptor_file=receptor_pdbqt,
|
221
|
+
ligand_file=ligand_file,
|
222
|
+
output_file=output_file,
|
223
|
+
center=config.center,
|
224
|
+
size=config.size
|
225
|
+
)
|
226
|
+
|
227
|
+
# Parse docking results and store them
|
228
|
+
scores = parse_vina_output(output_file)
|
229
|
+
results[smiles] = scores
|
230
|
+
print(f"Ligand {i} (SMILES: {smiles}) docking scores: {scores}")
|
231
|
+
|
232
|
+
# Visualize individual docking result
|
233
|
+
visualize_docking(config.receptor_file, output_file, f"{config.output_dir}/ligand_{i}_visualization.png")
|
234
|
+
|
235
|
+
# Clean up intermediate files
|
236
|
+
os.remove(ligand_file)
|
237
|
+
|
238
|
+
# Plot binding affinity distribution
|
239
|
+
plot_binding_affinities(results, f"{config.output_dir}/binding_affinities.png")
|
240
|
+
return results
|
241
|
+
|
242
|
+
def visualize_docking(receptor_file, ligand_file, dir_save):
|
243
|
+
"""Generates a 2D visualization of the docking result using RDKit and Matplotlib."""
|
244
|
+
# Load the receptor and ligand molecules
|
245
|
+
receptor = Chem.MolFromPDBFile(receptor_file, removeHs=False)
|
246
|
+
ligand = Chem.MolFromPDBFile(ligand_file, removeHs=False)
|
247
|
+
|
248
|
+
# Draw the receptor and ligand
|
249
|
+
img = Draw.MolToImage(receptor, size=(300, 300))
|
250
|
+
img_ligand = Draw.MolToImage(ligand, size=(300, 300))
|
251
|
+
|
252
|
+
# Save images
|
253
|
+
img.save(dir_save.replace('.png', '_receptor.png'))
|
254
|
+
img_ligand.save(dir_save.replace('.png', '_ligand.png'))
|
255
|
+
|
256
|
+
print(f"Saved 2D visualizations to {dir_save.replace('.png', '_receptor.png')} and {dir_save.replace('.png', '_ligand.png')}")
|
257
|
+
|
258
|
+
|
259
|
+
def plot_binding_affinities(results, dir_save):
|
260
|
+
"""Plots binding affinities for all ligands."""
|
261
|
+
ligands = list(results.keys())
|
262
|
+
affinities = [min(scores) for scores in results.values()] # Minimum binding affinity per ligand
|
263
|
+
|
264
|
+
plt.figure(figsize=(10, 6))
|
265
|
+
plt.barh(ligands, affinities, color="skyblue")
|
266
|
+
plt.xlabel("Binding Affinity (kcal/mol)")
|
267
|
+
plt.ylabel("Ligands (SMILES)")
|
268
|
+
plt.title("Binding Affinities of Different Ligands")
|
269
|
+
plt.gca().invert_yaxis()
|
270
|
+
plt.tight_layout()
|
271
|
+
plt.savefig(dir_save)
|
272
|
+
plt.show()
|
273
|
+
print(f"Saved binding affinity plot to {dir_save}")
|
274
|
+
|
275
|
+
# 示例使用
|
276
|
+
if __name__ == "__main__":
|
277
|
+
# 配置
|
278
|
+
receptor_file = "receptor.pdbqt"
|
279
|
+
ligand_smiles_list = ["CCO", "CCC", "CCN"] # 示例的配体SMILES列表
|
280
|
+
docking_config = DockingConfig(
|
281
|
+
receptor_file=receptor_file,
|
282
|
+
ligand_smiles_list=ligand_smiles_list,
|
283
|
+
center=(10, 10, 10), # 假设对接中心
|
284
|
+
size=(20, 20, 20) # 假设对接区域大小
|
285
|
+
)
|
286
|
+
|
287
|
+
# 运行master function
|
288
|
+
docking_results = docking_master_function(docking_config)
|
289
|
+
print("Final docking results:", docking_results)
|