labmate-mcp 7.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
labmate_mcp/peptide.py ADDED
@@ -0,0 +1,384 @@
1
+ """
2
+ labmate-mcp peptide chemistry module.
3
+
4
+ Wraps three backends:
5
+ - p2smi (local, RDKit) — sequence → SMILES, properties, synthesis check, modifications
6
+ - pichemist (local, AstraZeneca) — pI calculation with 8 pKa reference sets
7
+ - pep-calc.com (REST API) — extinction coefficient, MS peak assignment, ion series
8
+
9
+ All functions are designed for MCP tool wiring: dict/str in → dict/str out.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import json
15
+ import logging
16
+ from typing import Any
17
+
18
+ import httpx
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+ # =============================================================================
23
+ # pep-calc.com REST API (free, no auth, HTTP)
24
+ # =============================================================================
25
+
26
+ PEP_CALC_BASE = "http://api.pep-calc.com"
27
+
28
+
29
+ async def _pep_calc_get(endpoint: str, seq: str, n_term: str = "H", c_term: str = "OH") -> dict:
30
+ """Call pep-calc.com API endpoint."""
31
+ url = f"{PEP_CALC_BASE}/{endpoint}"
32
+ params = {"seq": seq, "N_term": n_term, "C_term": c_term}
33
+ async with httpx.AsyncClient(timeout=15) as client:
34
+ resp = await client.get(url, params=params)
35
+ resp.raise_for_status()
36
+ return resp.json()
37
+
38
+
39
+ async def pep_calc_properties(seq: str, n_term: str = "H", c_term: str = "OH") -> dict:
40
+ """Get peptide MW, formula, pI, charge summary, and extinction coefficient from pep-calc.com."""
41
+ results = {}
42
+ try:
43
+ basic = await _pep_calc_get("peptide", seq, n_term, c_term)
44
+ results["molecular_weight"] = basic.get("molecularWeight")
45
+ results["formula"] = basic.get("formula")
46
+ results["sequence_length"] = basic.get("seqLength")
47
+ except Exception as e:
48
+ results["basic_error"] = str(e)
49
+
50
+ try:
51
+ iso = await _pep_calc_get("peptide/iso", seq, n_term, c_term)
52
+ results["pI"] = iso.get("pI")
53
+ except Exception:
54
+ pass
55
+
56
+ try:
57
+ charge = await _pep_calc_get("peptide/charge", seq, n_term, c_term)
58
+ results["acidic_residues"] = charge.get("acidicCount")
59
+ results["basic_residues"] = charge.get("basicCount")
60
+ results["uncharged_residues"] = charge.get("unchargedCount")
61
+ except Exception:
62
+ pass
63
+
64
+ try:
65
+ ext = await _pep_calc_get("peptide/ex", seq, n_term, c_term)
66
+ results["extinction_280nm_oxidized"] = ext.get("oxidized")
67
+ results["extinction_280nm_reduced"] = ext.get("reduced")
68
+ except Exception:
69
+ pass
70
+
71
+ return results
72
+
73
+
74
+ async def pep_calc_ms_assign(seq: str, mz_values: list[float],
75
+ n_term: str = "H", c_term: str = "OH") -> dict:
76
+ """Assign observed m/z peaks to peptide deletions/modifications via pep-calc.com."""
77
+ url = f"{PEP_CALC_BASE}/peptide/assign"
78
+ params = {
79
+ "seq": seq,
80
+ "N_term": n_term,
81
+ "C_term": c_term,
82
+ "peaks": ",".join(str(v) for v in mz_values),
83
+ }
84
+ async with httpx.AsyncClient(timeout=15) as client:
85
+ resp = await client.get(url, params=params)
86
+ resp.raise_for_status()
87
+ return resp.json()
88
+
89
+
90
+ async def pep_calc_ion_series(seq: str, n_term: str = "H", c_term: str = "OH") -> dict:
91
+ """Get peptide ion series (b, y, a, c, z ions) for MS interpretation."""
92
+ return await _pep_calc_get("peptide/ions", seq, n_term, c_term)
93
+
94
+
95
+ # =============================================================================
96
+ # pichemist (AstraZeneca) — pI calculation
97
+ # =============================================================================
98
+
99
+
100
+ def calculate_pi_from_sequence(sequence: str) -> dict:
101
+ """
102
+ Calculate isoelectric point using pichemist (AstraZeneca).
103
+ Uses 8 different pKa reference sets and returns consensus pI with statistics.
104
+ """
105
+ from pichemist.api import pichemist_from_dict, PKaMethod
106
+ from pichemist.model import InputAttribute
107
+
108
+ input_dict = {
109
+ 1: {
110
+ InputAttribute.MOL_NAME.value: sequence,
111
+ InputAttribute.MOL_OBJECT.value: None,
112
+ "fasta": sequence,
113
+ }
114
+ }
115
+ raw = pichemist_from_dict(input_dict, method=PKaMethod.PKA_MATCHER.value)
116
+ entry = raw.get("1", raw.get(1, {}))
117
+
118
+ pI_data = entry.get("pI", {})
119
+ charge_data = entry.get("QpH7", {})
120
+
121
+ return {
122
+ "sequence": sequence,
123
+ "pI_mean": pI_data.get("pI mean"),
124
+ "pI_std": pI_data.get("std"),
125
+ "pI_stderr": pI_data.get("err"),
126
+ "pI_interval": entry.get("pI_interval"),
127
+ "pI_interval_threshold": entry.get("pI_interval_threshold"),
128
+ "charge_at_pH7_mean": charge_data.get("Q at pH7.4 mean"),
129
+ "charge_at_pH7_std": charge_data.get("std"),
130
+ "pI_by_method": {
131
+ k: v for k, v in pI_data.items()
132
+ if k not in ("pI mean", "std", "err")
133
+ },
134
+ "charge_at_pH7_by_method": {
135
+ k: v for k, v in charge_data.items()
136
+ if k not in ("Q at pH7.4 mean", "std", "err")
137
+ },
138
+ "reference_pka_set": entry.get("pKa_set"),
139
+ }
140
+
141
+
142
+ def calculate_pi_from_smiles(smiles: str) -> dict:
143
+ """
144
+ Calculate pI from a SMILES string (for modified/noncanonical peptides).
145
+ pichemist cuts amide bonds, matches known fragments, calculates pKas for unknowns.
146
+ """
147
+ from pichemist.api import pichemist_from_dict, PKaMethod
148
+ from pichemist.model import InputAttribute
149
+ from rdkit import Chem
150
+
151
+ mol = Chem.MolFromSmiles(smiles)
152
+ if mol is None:
153
+ return {"error": f"Invalid SMILES: {smiles}"}
154
+
155
+ input_dict = {
156
+ 1: {
157
+ InputAttribute.MOL_NAME.value: smiles,
158
+ InputAttribute.MOL_OBJECT.value: mol,
159
+ "fasta": None,
160
+ }
161
+ }
162
+ raw = pichemist_from_dict(input_dict, method=PKaMethod.PKA_MATCHER.value)
163
+ entry = raw.get("1", raw.get(1, {}))
164
+
165
+ pI_data = entry.get("pI", {})
166
+ charge_data = entry.get("QpH7", {})
167
+
168
+ return {
169
+ "smiles": smiles,
170
+ "pI_mean": pI_data.get("pI mean"),
171
+ "pI_std": pI_data.get("std"),
172
+ "pI_interval": entry.get("pI_interval"),
173
+ "charge_at_pH7_mean": charge_data.get("Q at pH7.4 mean"),
174
+ "pI_by_method": {
175
+ k: v for k, v in pI_data.items()
176
+ if k not in ("pI mean", "std", "err")
177
+ },
178
+ }
179
+
180
+
181
+ # =============================================================================
182
+ # p2smi — peptide SMILES generation, properties, synthesis, modifications
183
+ # =============================================================================
184
+
185
+
186
+ def sequence_to_smiles(
187
+ sequence: str,
188
+ cyclization: str = "",
189
+ ) -> dict:
190
+ """
191
+ Convert peptide sequence to SMILES string.
192
+
193
+ Args:
194
+ sequence: Amino acid sequence (1-letter codes). Supports 450+ amino acids
195
+ including noncanonical (SwissSidechain).
196
+ cyclization: One of '', 'SS', 'HT', 'SCNT', 'SCCT', 'SCSC', or
197
+ a manual constraint pattern like 'SSXXXCXXXCX'.
198
+ """
199
+ from p2smi.utilities.smilesgen import (
200
+ linear_peptide_smiles,
201
+ constrained_peptide_smiles,
202
+ what_constraints,
203
+ )
204
+
205
+ result: dict[str, Any] = {"sequence": sequence, "cyclization": cyclization}
206
+
207
+ if not cyclization:
208
+ smiles = linear_peptide_smiles(sequence)
209
+ result["smiles"] = smiles
210
+ result["type"] = "linear"
211
+ else:
212
+ out = constrained_peptide_smiles(sequence, cyclization)
213
+ if isinstance(out, tuple) and len(out) >= 3:
214
+ result["smiles"] = out[2]
215
+ result["applied_constraint"] = out[1]
216
+ result["type"] = "cyclic"
217
+ else:
218
+ result["smiles"] = str(out)
219
+ result["type"] = "cyclic"
220
+
221
+ return result
222
+
223
+
224
+ def get_cyclization_options(sequence: str) -> dict:
225
+ """Check which cyclization types a peptide sequence supports."""
226
+ from p2smi.utilities.smilesgen import what_constraints
227
+
228
+ raw = what_constraints(sequence)
229
+ options = []
230
+ for item in raw:
231
+ if isinstance(item, (list, tuple)) and len(item) >= 2:
232
+ constraint = item[1]
233
+ tag = constraint[:2].upper()
234
+ type_map = {
235
+ "SS": "Disulfide",
236
+ "HT": "Head-to-tail",
237
+ "SC": "Sidechain"
238
+ }
239
+ # Determine subtype from mask
240
+ mask = constraint[2:] if len(constraint) > 2 else ""
241
+ if tag == "SC":
242
+ has_n = "N" in mask
243
+ has_z = "Z" in mask
244
+ if has_n and has_z:
245
+ subtype = "SCSC (sidechain–sidechain)"
246
+ elif has_n:
247
+ subtype = "SCNT (sidechain–N-terminus)"
248
+ elif has_z:
249
+ subtype = "SCCT (sidechain–C-terminus)"
250
+ else:
251
+ subtype = "SC (unspecified)"
252
+ else:
253
+ subtype = type_map.get(tag, tag)
254
+
255
+ options.append({
256
+ "type": subtype,
257
+ "constraint_pattern": constraint,
258
+ })
259
+
260
+ return {
261
+ "sequence": sequence,
262
+ "length": len(sequence),
263
+ "cyclization_options": options,
264
+ "num_options": len(options),
265
+ }
266
+
267
+
268
+ def generate_peptides(
269
+ num_sequences: int = 10,
270
+ min_length: int = 8,
271
+ max_length: int = 20,
272
+ noncanonical_percent: float = 0.0,
273
+ dextro_percent: float = 0.0,
274
+ cyclization: str = "none",
275
+ ) -> dict:
276
+ """
277
+ Generate random peptide sequences with defined constraints.
278
+
279
+ Args:
280
+ cyclization: 'none', 'all', or comma-separated types: 'SS,HT,SCSC,SCNT,SCCT'
281
+ """
282
+ from p2smi.genPeps import generate_sequences
283
+
284
+ if cyclization.lower() == "all":
285
+ constraints = ["SS", "HT", "SCNT", "SCCT", "SCSC"]
286
+ elif cyclization.lower() in ("none", ""):
287
+ constraints = []
288
+ else:
289
+ constraints = [c.strip().upper() for c in cyclization.split(",")]
290
+
291
+ sequences = generate_sequences(
292
+ num_sequences=min(num_sequences, 100),
293
+ min_length=max(min_length, 2),
294
+ max_length=min(max_length, 100),
295
+ noncanonical_percent=max(0.0, min(1.0, noncanonical_percent)),
296
+ dextro_percent=max(0.0, min(1.0, dextro_percent)),
297
+ constraints=constraints,
298
+ )
299
+
300
+ results = []
301
+ for name, seq in sequences.items():
302
+ results.append({"id": name, "sequence": seq, "length": len(seq)})
303
+
304
+ return {
305
+ "num_generated": len(results),
306
+ "parameters": {
307
+ "min_length": min_length,
308
+ "max_length": max_length,
309
+ "noncanonical_percent": noncanonical_percent,
310
+ "dextro_percent": dextro_percent,
311
+ "cyclization": cyclization,
312
+ },
313
+ "sequences": results,
314
+ }
315
+
316
+
317
+ def get_peptide_properties(smiles: str) -> dict:
318
+ """
319
+ Compute molecular properties from a peptide SMILES string.
320
+ Returns MW, formula, logP, TPSA, HBD, HBA, rotatable bonds, Lipinski evaluation.
321
+ """
322
+ from p2smi.chemProps import molecule_summary
323
+ return molecule_summary(smiles)
324
+
325
+
326
+ def check_synthesis_feasibility(sequence: str) -> dict:
327
+ """
328
+ Evaluate peptide sequence for solid-phase synthesis (SPPS) feasibility.
329
+
330
+ Checks:
331
+ - Forbidden motifs (consecutive Pro, DG/DP, N/Q at N-terminus)
332
+ - Cysteine content (>2 is problematic)
333
+ - Terminal residue issues (Pro/Cys at C-terminus)
334
+ - Glycine runs (>4 consecutive)
335
+ - Sequence length (>50 residues)
336
+ - Hydrophobicity (logP check)
337
+ - Charge distribution (need charged residue every 5 positions)
338
+ """
339
+ from p2smi.synthRules import collect_synthesis_issues
340
+
341
+ issues = collect_synthesis_issues(sequence)
342
+ return {
343
+ "sequence": sequence,
344
+ "length": len(sequence),
345
+ "feasible": len(issues) == 0,
346
+ "verdict": "PASS" if not issues else "FAIL",
347
+ "issues": issues,
348
+ "num_issues": len(issues),
349
+ }
350
+
351
+
352
+ def modify_peptide_smiles(
353
+ smiles: str,
354
+ n_methylation: bool = False,
355
+ pegylation: bool = False,
356
+ methylation_fraction: float = 0.3,
357
+ ) -> dict:
358
+ """
359
+ Apply chemical modifications to a peptide SMILES string.
360
+
361
+ Args:
362
+ smiles: Input peptide SMILES
363
+ n_methylation: Apply random N-methylation
364
+ pegylation: Apply random PEGylation
365
+ methylation_fraction: Fraction of amide sites to N-methylate (0-1)
366
+ """
367
+ from p2smi.chemMods import modify_sequence, is_valid_smiles
368
+
369
+ modified, mods = modify_sequence(
370
+ smiles,
371
+ do_methylate=n_methylation,
372
+ do_pegylate=pegylation,
373
+ nmeth_residues=max(0.0, min(1.0, methylation_fraction)),
374
+ )
375
+
376
+ valid = is_valid_smiles(modified)
377
+
378
+ return {
379
+ "original_smiles": smiles,
380
+ "modified_smiles": modified if valid else None,
381
+ "modifications_applied": mods,
382
+ "valid_smiles": valid,
383
+ "error": None if valid else "Modified SMILES failed RDKit validation",
384
+ }