labmate-mcp 7.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- labmate_mcp/__init__.py +4 -0
- labmate_mcp/__main__.py +3 -0
- labmate_mcp/apis.py +1744 -0
- labmate_mcp/bench.py +3392 -0
- labmate_mcp/chemistry.py +572 -0
- labmate_mcp/peptide.py +384 -0
- labmate_mcp/server.py +5116 -0
- labmate_mcp/writing.py +1488 -0
- labmate_mcp-7.0.0.dist-info/METADATA +495 -0
- labmate_mcp-7.0.0.dist-info/RECORD +14 -0
- labmate_mcp-7.0.0.dist-info/WHEEL +5 -0
- labmate_mcp-7.0.0.dist-info/entry_points.txt +2 -0
- labmate_mcp-7.0.0.dist-info/licenses/LICENSE +21 -0
- labmate_mcp-7.0.0.dist-info/top_level.txt +1 -0
labmate_mcp/peptide.py
ADDED
|
@@ -0,0 +1,384 @@
|
|
|
1
|
+
"""
|
|
2
|
+
labmate-mcp peptide chemistry module.
|
|
3
|
+
|
|
4
|
+
Wraps three backends:
|
|
5
|
+
- p2smi (local, RDKit) — sequence → SMILES, properties, synthesis check, modifications
|
|
6
|
+
- pichemist (local, AstraZeneca) — pI calculation with 8 pKa reference sets
|
|
7
|
+
- pep-calc.com (REST API) — extinction coefficient, MS peak assignment, ion series
|
|
8
|
+
|
|
9
|
+
All functions are designed for MCP tool wiring: dict/str in → dict/str out.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import json
|
|
15
|
+
import logging
|
|
16
|
+
from typing import Any
|
|
17
|
+
|
|
18
|
+
import httpx
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
# =============================================================================
|
|
23
|
+
# pep-calc.com REST API (free, no auth, HTTP)
|
|
24
|
+
# =============================================================================
|
|
25
|
+
|
|
26
|
+
PEP_CALC_BASE = "http://api.pep-calc.com"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
async def _pep_calc_get(endpoint: str, seq: str, n_term: str = "H", c_term: str = "OH") -> dict:
|
|
30
|
+
"""Call pep-calc.com API endpoint."""
|
|
31
|
+
url = f"{PEP_CALC_BASE}/{endpoint}"
|
|
32
|
+
params = {"seq": seq, "N_term": n_term, "C_term": c_term}
|
|
33
|
+
async with httpx.AsyncClient(timeout=15) as client:
|
|
34
|
+
resp = await client.get(url, params=params)
|
|
35
|
+
resp.raise_for_status()
|
|
36
|
+
return resp.json()
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
async def pep_calc_properties(seq: str, n_term: str = "H", c_term: str = "OH") -> dict:
|
|
40
|
+
"""Get peptide MW, formula, pI, charge summary, and extinction coefficient from pep-calc.com."""
|
|
41
|
+
results = {}
|
|
42
|
+
try:
|
|
43
|
+
basic = await _pep_calc_get("peptide", seq, n_term, c_term)
|
|
44
|
+
results["molecular_weight"] = basic.get("molecularWeight")
|
|
45
|
+
results["formula"] = basic.get("formula")
|
|
46
|
+
results["sequence_length"] = basic.get("seqLength")
|
|
47
|
+
except Exception as e:
|
|
48
|
+
results["basic_error"] = str(e)
|
|
49
|
+
|
|
50
|
+
try:
|
|
51
|
+
iso = await _pep_calc_get("peptide/iso", seq, n_term, c_term)
|
|
52
|
+
results["pI"] = iso.get("pI")
|
|
53
|
+
except Exception:
|
|
54
|
+
pass
|
|
55
|
+
|
|
56
|
+
try:
|
|
57
|
+
charge = await _pep_calc_get("peptide/charge", seq, n_term, c_term)
|
|
58
|
+
results["acidic_residues"] = charge.get("acidicCount")
|
|
59
|
+
results["basic_residues"] = charge.get("basicCount")
|
|
60
|
+
results["uncharged_residues"] = charge.get("unchargedCount")
|
|
61
|
+
except Exception:
|
|
62
|
+
pass
|
|
63
|
+
|
|
64
|
+
try:
|
|
65
|
+
ext = await _pep_calc_get("peptide/ex", seq, n_term, c_term)
|
|
66
|
+
results["extinction_280nm_oxidized"] = ext.get("oxidized")
|
|
67
|
+
results["extinction_280nm_reduced"] = ext.get("reduced")
|
|
68
|
+
except Exception:
|
|
69
|
+
pass
|
|
70
|
+
|
|
71
|
+
return results
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
async def pep_calc_ms_assign(seq: str, mz_values: list[float],
|
|
75
|
+
n_term: str = "H", c_term: str = "OH") -> dict:
|
|
76
|
+
"""Assign observed m/z peaks to peptide deletions/modifications via pep-calc.com."""
|
|
77
|
+
url = f"{PEP_CALC_BASE}/peptide/assign"
|
|
78
|
+
params = {
|
|
79
|
+
"seq": seq,
|
|
80
|
+
"N_term": n_term,
|
|
81
|
+
"C_term": c_term,
|
|
82
|
+
"peaks": ",".join(str(v) for v in mz_values),
|
|
83
|
+
}
|
|
84
|
+
async with httpx.AsyncClient(timeout=15) as client:
|
|
85
|
+
resp = await client.get(url, params=params)
|
|
86
|
+
resp.raise_for_status()
|
|
87
|
+
return resp.json()
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
async def pep_calc_ion_series(seq: str, n_term: str = "H", c_term: str = "OH") -> dict:
|
|
91
|
+
"""Get peptide ion series (b, y, a, c, z ions) for MS interpretation."""
|
|
92
|
+
return await _pep_calc_get("peptide/ions", seq, n_term, c_term)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
# =============================================================================
|
|
96
|
+
# pichemist (AstraZeneca) — pI calculation
|
|
97
|
+
# =============================================================================
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def calculate_pi_from_sequence(sequence: str) -> dict:
|
|
101
|
+
"""
|
|
102
|
+
Calculate isoelectric point using pichemist (AstraZeneca).
|
|
103
|
+
Uses 8 different pKa reference sets and returns consensus pI with statistics.
|
|
104
|
+
"""
|
|
105
|
+
from pichemist.api import pichemist_from_dict, PKaMethod
|
|
106
|
+
from pichemist.model import InputAttribute
|
|
107
|
+
|
|
108
|
+
input_dict = {
|
|
109
|
+
1: {
|
|
110
|
+
InputAttribute.MOL_NAME.value: sequence,
|
|
111
|
+
InputAttribute.MOL_OBJECT.value: None,
|
|
112
|
+
"fasta": sequence,
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
raw = pichemist_from_dict(input_dict, method=PKaMethod.PKA_MATCHER.value)
|
|
116
|
+
entry = raw.get("1", raw.get(1, {}))
|
|
117
|
+
|
|
118
|
+
pI_data = entry.get("pI", {})
|
|
119
|
+
charge_data = entry.get("QpH7", {})
|
|
120
|
+
|
|
121
|
+
return {
|
|
122
|
+
"sequence": sequence,
|
|
123
|
+
"pI_mean": pI_data.get("pI mean"),
|
|
124
|
+
"pI_std": pI_data.get("std"),
|
|
125
|
+
"pI_stderr": pI_data.get("err"),
|
|
126
|
+
"pI_interval": entry.get("pI_interval"),
|
|
127
|
+
"pI_interval_threshold": entry.get("pI_interval_threshold"),
|
|
128
|
+
"charge_at_pH7_mean": charge_data.get("Q at pH7.4 mean"),
|
|
129
|
+
"charge_at_pH7_std": charge_data.get("std"),
|
|
130
|
+
"pI_by_method": {
|
|
131
|
+
k: v for k, v in pI_data.items()
|
|
132
|
+
if k not in ("pI mean", "std", "err")
|
|
133
|
+
},
|
|
134
|
+
"charge_at_pH7_by_method": {
|
|
135
|
+
k: v for k, v in charge_data.items()
|
|
136
|
+
if k not in ("Q at pH7.4 mean", "std", "err")
|
|
137
|
+
},
|
|
138
|
+
"reference_pka_set": entry.get("pKa_set"),
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def calculate_pi_from_smiles(smiles: str) -> dict:
|
|
143
|
+
"""
|
|
144
|
+
Calculate pI from a SMILES string (for modified/noncanonical peptides).
|
|
145
|
+
pichemist cuts amide bonds, matches known fragments, calculates pKas for unknowns.
|
|
146
|
+
"""
|
|
147
|
+
from pichemist.api import pichemist_from_dict, PKaMethod
|
|
148
|
+
from pichemist.model import InputAttribute
|
|
149
|
+
from rdkit import Chem
|
|
150
|
+
|
|
151
|
+
mol = Chem.MolFromSmiles(smiles)
|
|
152
|
+
if mol is None:
|
|
153
|
+
return {"error": f"Invalid SMILES: {smiles}"}
|
|
154
|
+
|
|
155
|
+
input_dict = {
|
|
156
|
+
1: {
|
|
157
|
+
InputAttribute.MOL_NAME.value: smiles,
|
|
158
|
+
InputAttribute.MOL_OBJECT.value: mol,
|
|
159
|
+
"fasta": None,
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
raw = pichemist_from_dict(input_dict, method=PKaMethod.PKA_MATCHER.value)
|
|
163
|
+
entry = raw.get("1", raw.get(1, {}))
|
|
164
|
+
|
|
165
|
+
pI_data = entry.get("pI", {})
|
|
166
|
+
charge_data = entry.get("QpH7", {})
|
|
167
|
+
|
|
168
|
+
return {
|
|
169
|
+
"smiles": smiles,
|
|
170
|
+
"pI_mean": pI_data.get("pI mean"),
|
|
171
|
+
"pI_std": pI_data.get("std"),
|
|
172
|
+
"pI_interval": entry.get("pI_interval"),
|
|
173
|
+
"charge_at_pH7_mean": charge_data.get("Q at pH7.4 mean"),
|
|
174
|
+
"pI_by_method": {
|
|
175
|
+
k: v for k, v in pI_data.items()
|
|
176
|
+
if k not in ("pI mean", "std", "err")
|
|
177
|
+
},
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
# =============================================================================
|
|
182
|
+
# p2smi — peptide SMILES generation, properties, synthesis, modifications
|
|
183
|
+
# =============================================================================
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def sequence_to_smiles(
|
|
187
|
+
sequence: str,
|
|
188
|
+
cyclization: str = "",
|
|
189
|
+
) -> dict:
|
|
190
|
+
"""
|
|
191
|
+
Convert peptide sequence to SMILES string.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
sequence: Amino acid sequence (1-letter codes). Supports 450+ amino acids
|
|
195
|
+
including noncanonical (SwissSidechain).
|
|
196
|
+
cyclization: One of '', 'SS', 'HT', 'SCNT', 'SCCT', 'SCSC', or
|
|
197
|
+
a manual constraint pattern like 'SSXXXCXXXCX'.
|
|
198
|
+
"""
|
|
199
|
+
from p2smi.utilities.smilesgen import (
|
|
200
|
+
linear_peptide_smiles,
|
|
201
|
+
constrained_peptide_smiles,
|
|
202
|
+
what_constraints,
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
result: dict[str, Any] = {"sequence": sequence, "cyclization": cyclization}
|
|
206
|
+
|
|
207
|
+
if not cyclization:
|
|
208
|
+
smiles = linear_peptide_smiles(sequence)
|
|
209
|
+
result["smiles"] = smiles
|
|
210
|
+
result["type"] = "linear"
|
|
211
|
+
else:
|
|
212
|
+
out = constrained_peptide_smiles(sequence, cyclization)
|
|
213
|
+
if isinstance(out, tuple) and len(out) >= 3:
|
|
214
|
+
result["smiles"] = out[2]
|
|
215
|
+
result["applied_constraint"] = out[1]
|
|
216
|
+
result["type"] = "cyclic"
|
|
217
|
+
else:
|
|
218
|
+
result["smiles"] = str(out)
|
|
219
|
+
result["type"] = "cyclic"
|
|
220
|
+
|
|
221
|
+
return result
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def get_cyclization_options(sequence: str) -> dict:
|
|
225
|
+
"""Check which cyclization types a peptide sequence supports."""
|
|
226
|
+
from p2smi.utilities.smilesgen import what_constraints
|
|
227
|
+
|
|
228
|
+
raw = what_constraints(sequence)
|
|
229
|
+
options = []
|
|
230
|
+
for item in raw:
|
|
231
|
+
if isinstance(item, (list, tuple)) and len(item) >= 2:
|
|
232
|
+
constraint = item[1]
|
|
233
|
+
tag = constraint[:2].upper()
|
|
234
|
+
type_map = {
|
|
235
|
+
"SS": "Disulfide",
|
|
236
|
+
"HT": "Head-to-tail",
|
|
237
|
+
"SC": "Sidechain"
|
|
238
|
+
}
|
|
239
|
+
# Determine subtype from mask
|
|
240
|
+
mask = constraint[2:] if len(constraint) > 2 else ""
|
|
241
|
+
if tag == "SC":
|
|
242
|
+
has_n = "N" in mask
|
|
243
|
+
has_z = "Z" in mask
|
|
244
|
+
if has_n and has_z:
|
|
245
|
+
subtype = "SCSC (sidechain–sidechain)"
|
|
246
|
+
elif has_n:
|
|
247
|
+
subtype = "SCNT (sidechain–N-terminus)"
|
|
248
|
+
elif has_z:
|
|
249
|
+
subtype = "SCCT (sidechain–C-terminus)"
|
|
250
|
+
else:
|
|
251
|
+
subtype = "SC (unspecified)"
|
|
252
|
+
else:
|
|
253
|
+
subtype = type_map.get(tag, tag)
|
|
254
|
+
|
|
255
|
+
options.append({
|
|
256
|
+
"type": subtype,
|
|
257
|
+
"constraint_pattern": constraint,
|
|
258
|
+
})
|
|
259
|
+
|
|
260
|
+
return {
|
|
261
|
+
"sequence": sequence,
|
|
262
|
+
"length": len(sequence),
|
|
263
|
+
"cyclization_options": options,
|
|
264
|
+
"num_options": len(options),
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def generate_peptides(
|
|
269
|
+
num_sequences: int = 10,
|
|
270
|
+
min_length: int = 8,
|
|
271
|
+
max_length: int = 20,
|
|
272
|
+
noncanonical_percent: float = 0.0,
|
|
273
|
+
dextro_percent: float = 0.0,
|
|
274
|
+
cyclization: str = "none",
|
|
275
|
+
) -> dict:
|
|
276
|
+
"""
|
|
277
|
+
Generate random peptide sequences with defined constraints.
|
|
278
|
+
|
|
279
|
+
Args:
|
|
280
|
+
cyclization: 'none', 'all', or comma-separated types: 'SS,HT,SCSC,SCNT,SCCT'
|
|
281
|
+
"""
|
|
282
|
+
from p2smi.genPeps import generate_sequences
|
|
283
|
+
|
|
284
|
+
if cyclization.lower() == "all":
|
|
285
|
+
constraints = ["SS", "HT", "SCNT", "SCCT", "SCSC"]
|
|
286
|
+
elif cyclization.lower() in ("none", ""):
|
|
287
|
+
constraints = []
|
|
288
|
+
else:
|
|
289
|
+
constraints = [c.strip().upper() for c in cyclization.split(",")]
|
|
290
|
+
|
|
291
|
+
sequences = generate_sequences(
|
|
292
|
+
num_sequences=min(num_sequences, 100),
|
|
293
|
+
min_length=max(min_length, 2),
|
|
294
|
+
max_length=min(max_length, 100),
|
|
295
|
+
noncanonical_percent=max(0.0, min(1.0, noncanonical_percent)),
|
|
296
|
+
dextro_percent=max(0.0, min(1.0, dextro_percent)),
|
|
297
|
+
constraints=constraints,
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
results = []
|
|
301
|
+
for name, seq in sequences.items():
|
|
302
|
+
results.append({"id": name, "sequence": seq, "length": len(seq)})
|
|
303
|
+
|
|
304
|
+
return {
|
|
305
|
+
"num_generated": len(results),
|
|
306
|
+
"parameters": {
|
|
307
|
+
"min_length": min_length,
|
|
308
|
+
"max_length": max_length,
|
|
309
|
+
"noncanonical_percent": noncanonical_percent,
|
|
310
|
+
"dextro_percent": dextro_percent,
|
|
311
|
+
"cyclization": cyclization,
|
|
312
|
+
},
|
|
313
|
+
"sequences": results,
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def get_peptide_properties(smiles: str) -> dict:
|
|
318
|
+
"""
|
|
319
|
+
Compute molecular properties from a peptide SMILES string.
|
|
320
|
+
Returns MW, formula, logP, TPSA, HBD, HBA, rotatable bonds, Lipinski evaluation.
|
|
321
|
+
"""
|
|
322
|
+
from p2smi.chemProps import molecule_summary
|
|
323
|
+
return molecule_summary(smiles)
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def check_synthesis_feasibility(sequence: str) -> dict:
|
|
327
|
+
"""
|
|
328
|
+
Evaluate peptide sequence for solid-phase synthesis (SPPS) feasibility.
|
|
329
|
+
|
|
330
|
+
Checks:
|
|
331
|
+
- Forbidden motifs (consecutive Pro, DG/DP, N/Q at N-terminus)
|
|
332
|
+
- Cysteine content (>2 is problematic)
|
|
333
|
+
- Terminal residue issues (Pro/Cys at C-terminus)
|
|
334
|
+
- Glycine runs (>4 consecutive)
|
|
335
|
+
- Sequence length (>50 residues)
|
|
336
|
+
- Hydrophobicity (logP check)
|
|
337
|
+
- Charge distribution (need charged residue every 5 positions)
|
|
338
|
+
"""
|
|
339
|
+
from p2smi.synthRules import collect_synthesis_issues
|
|
340
|
+
|
|
341
|
+
issues = collect_synthesis_issues(sequence)
|
|
342
|
+
return {
|
|
343
|
+
"sequence": sequence,
|
|
344
|
+
"length": len(sequence),
|
|
345
|
+
"feasible": len(issues) == 0,
|
|
346
|
+
"verdict": "PASS" if not issues else "FAIL",
|
|
347
|
+
"issues": issues,
|
|
348
|
+
"num_issues": len(issues),
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
def modify_peptide_smiles(
|
|
353
|
+
smiles: str,
|
|
354
|
+
n_methylation: bool = False,
|
|
355
|
+
pegylation: bool = False,
|
|
356
|
+
methylation_fraction: float = 0.3,
|
|
357
|
+
) -> dict:
|
|
358
|
+
"""
|
|
359
|
+
Apply chemical modifications to a peptide SMILES string.
|
|
360
|
+
|
|
361
|
+
Args:
|
|
362
|
+
smiles: Input peptide SMILES
|
|
363
|
+
n_methylation: Apply random N-methylation
|
|
364
|
+
pegylation: Apply random PEGylation
|
|
365
|
+
methylation_fraction: Fraction of amide sites to N-methylate (0-1)
|
|
366
|
+
"""
|
|
367
|
+
from p2smi.chemMods import modify_sequence, is_valid_smiles
|
|
368
|
+
|
|
369
|
+
modified, mods = modify_sequence(
|
|
370
|
+
smiles,
|
|
371
|
+
do_methylate=n_methylation,
|
|
372
|
+
do_pegylate=pegylation,
|
|
373
|
+
nmeth_residues=max(0.0, min(1.0, methylation_fraction)),
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
valid = is_valid_smiles(modified)
|
|
377
|
+
|
|
378
|
+
return {
|
|
379
|
+
"original_smiles": smiles,
|
|
380
|
+
"modified_smiles": modified if valid else None,
|
|
381
|
+
"modifications_applied": mods,
|
|
382
|
+
"valid_smiles": valid,
|
|
383
|
+
"error": None if valid else "Modified SMILES failed RDKit validation",
|
|
384
|
+
}
|