labmate-mcp 7.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- labmate_mcp/__init__.py +4 -0
- labmate_mcp/__main__.py +3 -0
- labmate_mcp/apis.py +1744 -0
- labmate_mcp/bench.py +3392 -0
- labmate_mcp/chemistry.py +572 -0
- labmate_mcp/peptide.py +384 -0
- labmate_mcp/server.py +5116 -0
- labmate_mcp/writing.py +1488 -0
- labmate_mcp-7.0.0.dist-info/METADATA +495 -0
- labmate_mcp-7.0.0.dist-info/RECORD +14 -0
- labmate_mcp-7.0.0.dist-info/WHEEL +5 -0
- labmate_mcp-7.0.0.dist-info/entry_points.txt +2 -0
- labmate_mcp-7.0.0.dist-info/licenses/LICENSE +21 -0
- labmate_mcp-7.0.0.dist-info/top_level.txt +1 -0
labmate_mcp/chemistry.py
ADDED
|
@@ -0,0 +1,572 @@
|
|
|
1
|
+
"""
|
|
2
|
+
labmate-mcp chemistry utilities module.
|
|
3
|
+
|
|
4
|
+
Pure-computation tools:
|
|
5
|
+
- Isotope pattern calculation from molecular formula
|
|
6
|
+
- CAS registry number validation
|
|
7
|
+
- Scientific unit conversion
|
|
8
|
+
- Periodic table element lookup
|
|
9
|
+
- Buffer pH / Henderson-Hasselbalch calculations
|
|
10
|
+
|
|
11
|
+
No external API calls — all offline, zero latency.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import math
|
|
17
|
+
import re
|
|
18
|
+
from itertools import product as iter_product
|
|
19
|
+
from typing import Any
|
|
20
|
+
|
|
21
|
+
# =============================================================================
|
|
22
|
+
# Periodic table — element data (Z ≤ 118)
|
|
23
|
+
# =============================================================================
|
|
24
|
+
|
|
25
|
+
ELEMENTS: dict[str, dict[str, Any]] = {
|
|
26
|
+
"H": {"Z": 1, "name": "Hydrogen", "mass": 1.008, "group": 1, "period": 1, "block": "s", "en": 2.20, "config": "1s1", "category": "nonmetal"},
|
|
27
|
+
"He": {"Z": 2, "name": "Helium", "mass": 4.003, "group": 18, "period": 1, "block": "s", "en": None, "config": "1s2", "category": "noble gas"},
|
|
28
|
+
"Li": {"Z": 3, "name": "Lithium", "mass": 6.941, "group": 1, "period": 2, "block": "s", "en": 0.98, "config": "[He] 2s1", "category": "alkali metal"},
|
|
29
|
+
"Be": {"Z": 4, "name": "Beryllium", "mass": 9.012, "group": 2, "period": 2, "block": "s", "en": 1.57, "config": "[He] 2s2", "category": "alkaline earth"},
|
|
30
|
+
"B": {"Z": 5, "name": "Boron", "mass": 10.81, "group": 13, "period": 2, "block": "p", "en": 2.04, "config": "[He] 2s2 2p1", "category": "metalloid"},
|
|
31
|
+
"C": {"Z": 6, "name": "Carbon", "mass": 12.011, "group": 14, "period": 2, "block": "p", "en": 2.55, "config": "[He] 2s2 2p2", "category": "nonmetal"},
|
|
32
|
+
"N": {"Z": 7, "name": "Nitrogen", "mass": 14.007, "group": 15, "period": 2, "block": "p", "en": 3.04, "config": "[He] 2s2 2p3", "category": "nonmetal"},
|
|
33
|
+
"O": {"Z": 8, "name": "Oxygen", "mass": 15.999, "group": 16, "period": 2, "block": "p", "en": 3.44, "config": "[He] 2s2 2p4", "category": "nonmetal"},
|
|
34
|
+
"F": {"Z": 9, "name": "Fluorine", "mass": 18.998, "group": 17, "period": 2, "block": "p", "en": 3.98, "config": "[He] 2s2 2p5", "category": "halogen"},
|
|
35
|
+
"Ne": {"Z": 10, "name": "Neon", "mass": 20.180, "group": 18, "period": 2, "block": "p", "en": None, "config": "[He] 2s2 2p6", "category": "noble gas"},
|
|
36
|
+
"Na": {"Z": 11, "name": "Sodium", "mass": 22.990, "group": 1, "period": 3, "block": "s", "en": 0.93, "config": "[Ne] 3s1", "category": "alkali metal"},
|
|
37
|
+
"Mg": {"Z": 12, "name": "Magnesium", "mass": 24.305, "group": 2, "period": 3, "block": "s", "en": 1.31, "config": "[Ne] 3s2", "category": "alkaline earth"},
|
|
38
|
+
"Al": {"Z": 13, "name": "Aluminium", "mass": 26.982, "group": 13, "period": 3, "block": "p", "en": 1.61, "config": "[Ne] 3s2 3p1", "category": "post-transition metal"},
|
|
39
|
+
"Si": {"Z": 14, "name": "Silicon", "mass": 28.086, "group": 14, "period": 3, "block": "p", "en": 1.90, "config": "[Ne] 3s2 3p2", "category": "metalloid"},
|
|
40
|
+
"P": {"Z": 15, "name": "Phosphorus", "mass": 30.974, "group": 15, "period": 3, "block": "p", "en": 2.19, "config": "[Ne] 3s2 3p3", "category": "nonmetal"},
|
|
41
|
+
"S": {"Z": 16, "name": "Sulfur", "mass": 32.06, "group": 16, "period": 3, "block": "p", "en": 2.58, "config": "[Ne] 3s2 3p4", "category": "nonmetal"},
|
|
42
|
+
"Cl": {"Z": 17, "name": "Chlorine", "mass": 35.45, "group": 17, "period": 3, "block": "p", "en": 3.16, "config": "[Ne] 3s2 3p5", "category": "halogen"},
|
|
43
|
+
"Ar": {"Z": 18, "name": "Argon", "mass": 39.948, "group": 18, "period": 3, "block": "p", "en": None, "config": "[Ne] 3s2 3p6", "category": "noble gas"},
|
|
44
|
+
"K": {"Z": 19, "name": "Potassium", "mass": 39.098, "group": 1, "period": 4, "block": "s", "en": 0.82, "config": "[Ar] 4s1", "category": "alkali metal"},
|
|
45
|
+
"Ca": {"Z": 20, "name": "Calcium", "mass": 40.078, "group": 2, "period": 4, "block": "s", "en": 1.00, "config": "[Ar] 4s2", "category": "alkaline earth"},
|
|
46
|
+
"Sc": {"Z": 21, "name": "Scandium", "mass": 44.956, "group": 3, "period": 4, "block": "d", "en": 1.36, "config": "[Ar] 3d1 4s2", "category": "transition metal"},
|
|
47
|
+
"Ti": {"Z": 22, "name": "Titanium", "mass": 47.867, "group": 4, "period": 4, "block": "d", "en": 1.54, "config": "[Ar] 3d2 4s2", "category": "transition metal"},
|
|
48
|
+
"V": {"Z": 23, "name": "Vanadium", "mass": 50.942, "group": 5, "period": 4, "block": "d", "en": 1.63, "config": "[Ar] 3d3 4s2", "category": "transition metal"},
|
|
49
|
+
"Cr": {"Z": 24, "name": "Chromium", "mass": 51.996, "group": 6, "period": 4, "block": "d", "en": 1.66, "config": "[Ar] 3d5 4s1", "category": "transition metal"},
|
|
50
|
+
"Mn": {"Z": 25, "name": "Manganese", "mass": 54.938, "group": 7, "period": 4, "block": "d", "en": 1.55, "config": "[Ar] 3d5 4s2", "category": "transition metal"},
|
|
51
|
+
"Fe": {"Z": 26, "name": "Iron", "mass": 55.845, "group": 8, "period": 4, "block": "d", "en": 1.83, "config": "[Ar] 3d6 4s2", "category": "transition metal"},
|
|
52
|
+
"Co": {"Z": 27, "name": "Cobalt", "mass": 58.933, "group": 9, "period": 4, "block": "d", "en": 1.88, "config": "[Ar] 3d7 4s2", "category": "transition metal"},
|
|
53
|
+
"Ni": {"Z": 28, "name": "Nickel", "mass": 58.693, "group": 10, "period": 4, "block": "d", "en": 1.91, "config": "[Ar] 3d8 4s2", "category": "transition metal"},
|
|
54
|
+
"Cu": {"Z": 29, "name": "Copper", "mass": 63.546, "group": 11, "period": 4, "block": "d", "en": 1.90, "config": "[Ar] 3d10 4s1", "category": "transition metal"},
|
|
55
|
+
"Zn": {"Z": 30, "name": "Zinc", "mass": 65.38, "group": 12, "period": 4, "block": "d", "en": 1.65, "config": "[Ar] 3d10 4s2", "category": "transition metal"},
|
|
56
|
+
"Ga": {"Z": 31, "name": "Gallium", "mass": 69.723, "group": 13, "period": 4, "block": "p", "en": 1.81, "config": "[Ar] 3d10 4s2 4p1", "category": "post-transition metal"},
|
|
57
|
+
"Ge": {"Z": 32, "name": "Germanium", "mass": 72.63, "group": 14, "period": 4, "block": "p", "en": 2.01, "config": "[Ar] 3d10 4s2 4p2", "category": "metalloid"},
|
|
58
|
+
"As": {"Z": 33, "name": "Arsenic", "mass": 74.922, "group": 15, "period": 4, "block": "p", "en": 2.18, "config": "[Ar] 3d10 4s2 4p3", "category": "metalloid"},
|
|
59
|
+
"Se": {"Z": 34, "name": "Selenium", "mass": 78.96, "group": 16, "period": 4, "block": "p", "en": 2.55, "config": "[Ar] 3d10 4s2 4p4", "category": "nonmetal"},
|
|
60
|
+
"Br": {"Z": 35, "name": "Bromine", "mass": 79.904, "group": 17, "period": 4, "block": "p", "en": 2.96, "config": "[Ar] 3d10 4s2 4p5", "category": "halogen"},
|
|
61
|
+
"Kr": {"Z": 36, "name": "Krypton", "mass": 83.798, "group": 18, "period": 4, "block": "p", "en": 3.00, "config": "[Ar] 3d10 4s2 4p6", "category": "noble gas"},
|
|
62
|
+
"Rb": {"Z": 37, "name": "Rubidium", "mass": 85.468, "group": 1, "period": 5, "block": "s", "en": 0.82, "config": "[Kr] 5s1", "category": "alkali metal"},
|
|
63
|
+
"Sr": {"Z": 38, "name": "Strontium", "mass": 87.62, "group": 2, "period": 5, "block": "s", "en": 0.95, "config": "[Kr] 5s2", "category": "alkaline earth"},
|
|
64
|
+
"Y": {"Z": 39, "name": "Yttrium", "mass": 88.906, "group": 3, "period": 5, "block": "d", "en": 1.22, "config": "[Kr] 4d1 5s2", "category": "transition metal"},
|
|
65
|
+
"Zr": {"Z": 40, "name": "Zirconium", "mass": 91.224, "group": 4, "period": 5, "block": "d", "en": 1.33, "config": "[Kr] 4d2 5s2", "category": "transition metal"},
|
|
66
|
+
"Nb": {"Z": 41, "name": "Niobium", "mass": 92.906, "group": 5, "period": 5, "block": "d", "en": 1.60, "config": "[Kr] 4d4 5s1", "category": "transition metal"},
|
|
67
|
+
"Mo": {"Z": 42, "name": "Molybdenum", "mass": 95.96, "group": 6, "period": 5, "block": "d", "en": 2.16, "config": "[Kr] 4d5 5s1", "category": "transition metal"},
|
|
68
|
+
"Ru": {"Z": 44, "name": "Ruthenium", "mass": 101.07, "group": 8, "period": 5, "block": "d", "en": 2.20, "config": "[Kr] 4d7 5s1", "category": "transition metal"},
|
|
69
|
+
"Rh": {"Z": 45, "name": "Rhodium", "mass": 102.906, "group": 9, "period": 5, "block": "d", "en": 2.28, "config": "[Kr] 4d8 5s1", "category": "transition metal"},
|
|
70
|
+
"Pd": {"Z": 46, "name": "Palladium", "mass": 106.42, "group": 10, "period": 5, "block": "d", "en": 2.20, "config": "[Kr] 4d10", "category": "transition metal"},
|
|
71
|
+
"Ag": {"Z": 47, "name": "Silver", "mass": 107.868, "group": 11, "period": 5, "block": "d", "en": 1.93, "config": "[Kr] 4d10 5s1", "category": "transition metal"},
|
|
72
|
+
"Cd": {"Z": 48, "name": "Cadmium", "mass": 112.411, "group": 12, "period": 5, "block": "d", "en": 1.69, "config": "[Kr] 4d10 5s2", "category": "transition metal"},
|
|
73
|
+
"In": {"Z": 49, "name": "Indium", "mass": 114.818, "group": 13, "period": 5, "block": "p", "en": 1.78, "config": "[Kr] 4d10 5s2 5p1", "category": "post-transition metal"},
|
|
74
|
+
"Sn": {"Z": 50, "name": "Tin", "mass": 118.710, "group": 14, "period": 5, "block": "p", "en": 1.96, "config": "[Kr] 4d10 5s2 5p2", "category": "post-transition metal"},
|
|
75
|
+
"Sb": {"Z": 51, "name": "Antimony", "mass": 121.760, "group": 15, "period": 5, "block": "p", "en": 2.05, "config": "[Kr] 4d10 5s2 5p3", "category": "metalloid"},
|
|
76
|
+
"Te": {"Z": 52, "name": "Tellurium", "mass": 127.60, "group": 16, "period": 5, "block": "p", "en": 2.10, "config": "[Kr] 4d10 5s2 5p4", "category": "metalloid"},
|
|
77
|
+
"I": {"Z": 53, "name": "Iodine", "mass": 126.904, "group": 17, "period": 5, "block": "p", "en": 2.66, "config": "[Kr] 4d10 5s2 5p5", "category": "halogen"},
|
|
78
|
+
"Xe": {"Z": 54, "name": "Xenon", "mass": 131.293, "group": 18, "period": 5, "block": "p", "en": 2.60, "config": "[Kr] 4d10 5s2 5p6", "category": "noble gas"},
|
|
79
|
+
"Cs": {"Z": 55, "name": "Cesium", "mass": 132.905, "group": 1, "period": 6, "block": "s", "en": 0.79, "config": "[Xe] 6s1", "category": "alkali metal"},
|
|
80
|
+
"Ba": {"Z": 56, "name": "Barium", "mass": 137.327, "group": 2, "period": 6, "block": "s", "en": 0.89, "config": "[Xe] 6s2", "category": "alkaline earth"},
|
|
81
|
+
"W": {"Z": 74, "name": "Tungsten", "mass": 183.84, "group": 6, "period": 6, "block": "d", "en": 2.36, "config": "[Xe] 4f14 5d4 6s2", "category": "transition metal"},
|
|
82
|
+
"Re": {"Z": 75, "name": "Rhenium", "mass": 186.207, "group": 7, "period": 6, "block": "d", "en": 1.90, "config": "[Xe] 4f14 5d5 6s2", "category": "transition metal"},
|
|
83
|
+
"Os": {"Z": 76, "name": "Osmium", "mass": 190.23, "group": 8, "period": 6, "block": "d", "en": 2.20, "config": "[Xe] 4f14 5d6 6s2", "category": "transition metal"},
|
|
84
|
+
"Ir": {"Z": 77, "name": "Iridium", "mass": 192.217, "group": 9, "period": 6, "block": "d", "en": 2.20, "config": "[Xe] 4f14 5d7 6s2", "category": "transition metal"},
|
|
85
|
+
"Pt": {"Z": 78, "name": "Platinum", "mass": 195.084, "group": 10, "period": 6, "block": "d", "en": 2.28, "config": "[Xe] 4f14 5d9 6s1", "category": "transition metal"},
|
|
86
|
+
"Au": {"Z": 79, "name": "Gold", "mass": 196.967, "group": 11, "period": 6, "block": "d", "en": 2.54, "config": "[Xe] 4f14 5d10 6s1", "category": "transition metal"},
|
|
87
|
+
"Hg": {"Z": 80, "name": "Mercury", "mass": 200.59, "group": 12, "period": 6, "block": "d", "en": 2.00, "config": "[Xe] 4f14 5d10 6s2", "category": "transition metal"},
|
|
88
|
+
"Tl": {"Z": 81, "name": "Thallium", "mass": 204.38, "group": 13, "period": 6, "block": "p", "en": 1.62, "config": "[Xe] 4f14 5d10 6s2 6p1", "category": "post-transition metal"},
|
|
89
|
+
"Pb": {"Z": 82, "name": "Lead", "mass": 207.2, "group": 14, "period": 6, "block": "p", "en": 2.33, "config": "[Xe] 4f14 5d10 6s2 6p2", "category": "post-transition metal"},
|
|
90
|
+
"Bi": {"Z": 83, "name": "Bismuth", "mass": 208.980, "group": 15, "period": 6, "block": "p", "en": 2.02, "config": "[Xe] 4f14 5d10 6s2 6p3", "category": "post-transition metal"},
|
|
91
|
+
"U": {"Z": 92, "name": "Uranium", "mass": 238.029, "group": None, "period": 7, "block": "f", "en": 1.38, "config": "[Rn] 5f3 6d1 7s2", "category": "actinide"},
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
# Build reverse lookup by atomic number
|
|
95
|
+
_Z_TO_SYMBOL = {v["Z"]: k for k, v in ELEMENTS.items()}
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def lookup_element(query: str) -> dict | None:
|
|
99
|
+
"""
|
|
100
|
+
Look up element by symbol, name, or atomic number.
|
|
101
|
+
Returns full element data dict or None.
|
|
102
|
+
"""
|
|
103
|
+
q = query.strip()
|
|
104
|
+
# By atomic number
|
|
105
|
+
if q.isdigit():
|
|
106
|
+
sym = _Z_TO_SYMBOL.get(int(q))
|
|
107
|
+
if sym:
|
|
108
|
+
return {"symbol": sym, **ELEMENTS[sym]}
|
|
109
|
+
return None
|
|
110
|
+
# By symbol (case-insensitive, capitalize first letter)
|
|
111
|
+
sym = q.capitalize() if len(q) <= 2 else q
|
|
112
|
+
if sym in ELEMENTS:
|
|
113
|
+
return {"symbol": sym, **ELEMENTS[sym]}
|
|
114
|
+
# By name
|
|
115
|
+
q_lower = q.lower()
|
|
116
|
+
for sym, data in ELEMENTS.items():
|
|
117
|
+
if data["name"].lower() == q_lower:
|
|
118
|
+
return {"symbol": sym, **data}
|
|
119
|
+
# Fuzzy match
|
|
120
|
+
matches = []
|
|
121
|
+
for sym, data in ELEMENTS.items():
|
|
122
|
+
if q_lower in data["name"].lower() or q_lower in sym.lower():
|
|
123
|
+
matches.append({"symbol": sym, **data})
|
|
124
|
+
return matches[0] if len(matches) == 1 else (matches if matches else None)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
# =============================================================================
|
|
128
|
+
# CAS registry number validation
|
|
129
|
+
# =============================================================================
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def validate_cas(cas_string: str) -> dict:
|
|
133
|
+
"""
|
|
134
|
+
Validate a CAS registry number (format: NNNNNNN-NN-N).
|
|
135
|
+
|
|
136
|
+
Returns dict with 'valid', 'cas', 'error' keys.
|
|
137
|
+
"""
|
|
138
|
+
cas_string = cas_string.strip().replace(" ", "")
|
|
139
|
+
|
|
140
|
+
# Accept with or without hyphens
|
|
141
|
+
if "-" in cas_string:
|
|
142
|
+
parts = cas_string.split("-")
|
|
143
|
+
if len(parts) != 3:
|
|
144
|
+
return {"valid": False, "cas": cas_string, "error": "CAS must have format NNNNN-NN-N"}
|
|
145
|
+
try:
|
|
146
|
+
part1, part2, check = parts
|
|
147
|
+
if not (part1.isdigit() and part2.isdigit() and check.isdigit()):
|
|
148
|
+
raise ValueError
|
|
149
|
+
if len(part2) != 2 or len(check) != 1:
|
|
150
|
+
raise ValueError
|
|
151
|
+
digits = part1 + part2
|
|
152
|
+
check_digit = int(check)
|
|
153
|
+
except (ValueError, IndexError):
|
|
154
|
+
return {"valid": False, "cas": cas_string, "error": "Invalid CAS format"}
|
|
155
|
+
else:
|
|
156
|
+
if not cas_string.isdigit() or len(cas_string) < 5:
|
|
157
|
+
return {"valid": False, "cas": cas_string, "error": "Invalid CAS format"}
|
|
158
|
+
digits = cas_string[:-1]
|
|
159
|
+
check_digit = int(cas_string[-1])
|
|
160
|
+
# Reconstruct hyphenated form
|
|
161
|
+
cas_string = f"{digits[:-2]}-{digits[-2:]}-{check_digit}"
|
|
162
|
+
|
|
163
|
+
# Validate check digit: sum of (position * digit) mod 10, counting from right
|
|
164
|
+
total = sum((i + 1) * int(d) for i, d in enumerate(reversed(digits)))
|
|
165
|
+
computed_check = total % 10
|
|
166
|
+
|
|
167
|
+
if computed_check != check_digit:
|
|
168
|
+
return {
|
|
169
|
+
"valid": False,
|
|
170
|
+
"cas": cas_string,
|
|
171
|
+
"error": f"Check digit mismatch: expected {check_digit}, computed {computed_check}",
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
return {"valid": True, "cas": cas_string, "error": None}
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
# =============================================================================
|
|
178
|
+
# Isotope pattern calculation
|
|
179
|
+
# =============================================================================
|
|
180
|
+
|
|
181
|
+
# Natural isotope abundances (mass, abundance) for common elements
|
|
182
|
+
_ISOTOPES: dict[str, list[tuple[float, float]]] = {
|
|
183
|
+
"H": [(1.00783, 0.999885), (2.01410, 0.000115)],
|
|
184
|
+
"C": [(12.00000, 0.9893), (13.00335, 0.0107)],
|
|
185
|
+
"N": [(14.00307, 0.99632), (15.00011, 0.00368)],
|
|
186
|
+
"O": [(15.99491, 0.99757), (16.99913, 0.00038), (17.99916, 0.00205)],
|
|
187
|
+
"S": [(31.97207, 0.9493), (32.97146, 0.0076), (33.96787, 0.0429), (35.96708, 0.0002)],
|
|
188
|
+
"P": [(30.97376, 1.0)],
|
|
189
|
+
"F": [(18.99840, 1.0)],
|
|
190
|
+
"Cl": [(34.96885, 0.7576), (36.96590, 0.2424)],
|
|
191
|
+
"Br": [(78.91834, 0.5069), (80.91629, 0.4931)],
|
|
192
|
+
"I": [(126.90447, 1.0)],
|
|
193
|
+
"Si": [(27.97693, 0.92223), (28.97649, 0.04685), (29.97377, 0.03092)],
|
|
194
|
+
"Se": [(73.92248, 0.0089), (75.91921, 0.0937), (76.91991, 0.0763),
|
|
195
|
+
(77.91731, 0.2377), (79.91652, 0.4961), (81.91670, 0.0873)],
|
|
196
|
+
"Na": [(22.98977, 1.0)],
|
|
197
|
+
"K": [(38.96371, 0.932581), (39.96400, 0.000117), (40.96183, 0.067302)],
|
|
198
|
+
"Fe": [(53.93961, 0.05845), (55.93494, 0.91754), (56.93540, 0.02119), (57.93328, 0.00282)],
|
|
199
|
+
"Cu": [(62.92960, 0.6915), (64.92779, 0.3085)],
|
|
200
|
+
"Zn": [(63.92914, 0.4863), (65.92603, 0.2790), (66.92713, 0.0410),
|
|
201
|
+
(67.92485, 0.1875), (69.92532, 0.0062)],
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
# Regex for molecular formula parsing: e.g., C14H19N3O4, Ca(OH)2, etc.
|
|
205
|
+
_FORMULA_RE = re.compile(r"([A-Z][a-z]?)(\d*)")
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _parse_formula_to_elements(formula: str) -> dict[str, int]:
|
|
209
|
+
"""Parse molecular formula string → {element: count}."""
|
|
210
|
+
elements: dict[str, int] = {}
|
|
211
|
+
# Simple parser (handles flat formulas like C9H8O4, not nested parentheses)
|
|
212
|
+
# For parentheses, expand first
|
|
213
|
+
expanded = _expand_parentheses(formula)
|
|
214
|
+
for match in _FORMULA_RE.finditer(expanded):
|
|
215
|
+
elem = match.group(1)
|
|
216
|
+
count = int(match.group(2)) if match.group(2) else 1
|
|
217
|
+
if elem:
|
|
218
|
+
elements[elem] = elements.get(elem, 0) + count
|
|
219
|
+
return elements
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def _expand_parentheses(formula: str) -> str:
|
|
223
|
+
"""Expand parenthesized groups: Ca(OH)2 → CaO2H2."""
|
|
224
|
+
while "(" in formula:
|
|
225
|
+
# Find innermost parentheses
|
|
226
|
+
m = re.search(r"\(([^()]+)\)(\d*)", formula)
|
|
227
|
+
if not m:
|
|
228
|
+
break
|
|
229
|
+
inner = m.group(1)
|
|
230
|
+
mult = int(m.group(2)) if m.group(2) else 1
|
|
231
|
+
# Multiply each element count
|
|
232
|
+
expanded = ""
|
|
233
|
+
for em in _FORMULA_RE.finditer(inner):
|
|
234
|
+
elem = em.group(1)
|
|
235
|
+
count = int(em.group(2)) if em.group(2) else 1
|
|
236
|
+
if elem:
|
|
237
|
+
expanded += f"{elem}{count * mult}"
|
|
238
|
+
formula = formula[:m.start()] + expanded + formula[m.end():]
|
|
239
|
+
return formula
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def calculate_isotope_pattern(
|
|
243
|
+
formula: str = "",
|
|
244
|
+
smiles: str = "",
|
|
245
|
+
charge: int = 1,
|
|
246
|
+
top_n: int = 10,
|
|
247
|
+
min_abundance: float = 0.001,
|
|
248
|
+
) -> dict:
|
|
249
|
+
"""
|
|
250
|
+
Calculate isotope distribution from molecular formula or SMILES.
|
|
251
|
+
|
|
252
|
+
Args:
|
|
253
|
+
formula: Molecular formula (e.g., 'C9H8O4')
|
|
254
|
+
smiles: SMILES string (alternative to formula, uses RDKit)
|
|
255
|
+
charge: Charge state for m/z calculation (default 1)
|
|
256
|
+
top_n: Max number of peaks to return
|
|
257
|
+
min_abundance: Minimum relative abundance to include
|
|
258
|
+
|
|
259
|
+
Returns dict with monoisotopic_mass, average_mass, pattern (list of {mz, abundance}).
|
|
260
|
+
"""
|
|
261
|
+
if smiles and not formula:
|
|
262
|
+
try:
|
|
263
|
+
from rdkit import Chem
|
|
264
|
+
from rdkit.Chem import rdMolDescriptors
|
|
265
|
+
mol = Chem.MolFromSmiles(smiles)
|
|
266
|
+
if mol is None:
|
|
267
|
+
return {"error": f"Invalid SMILES: {smiles}"}
|
|
268
|
+
formula = rdMolDescriptors.CalcMolFormula(mol)
|
|
269
|
+
except ImportError:
|
|
270
|
+
return {"error": "RDKit not available; provide formula instead"}
|
|
271
|
+
|
|
272
|
+
if not formula:
|
|
273
|
+
return {"error": "Provide either formula or smiles"}
|
|
274
|
+
|
|
275
|
+
elements = _parse_formula_to_elements(formula)
|
|
276
|
+
if not elements:
|
|
277
|
+
return {"error": f"Could not parse formula: {formula}"}
|
|
278
|
+
|
|
279
|
+
# Calculate pattern using polynomial multiplication
|
|
280
|
+
pattern = [(0.0, 1.0)] # (mass_offset, probability)
|
|
281
|
+
|
|
282
|
+
for elem, count in elements.items():
|
|
283
|
+
isotopes = _ISOTOPES.get(elem)
|
|
284
|
+
if not isotopes:
|
|
285
|
+
# Monoisotopic only for unknown elements
|
|
286
|
+
mass = ELEMENTS.get(elem, {}).get("mass", 0)
|
|
287
|
+
isotopes = [(mass, 1.0)]
|
|
288
|
+
|
|
289
|
+
# For each atom of this element, convolve with isotope distribution
|
|
290
|
+
for _ in range(count):
|
|
291
|
+
new_pattern: dict[float, float] = {}
|
|
292
|
+
for mass_a, prob_a in pattern:
|
|
293
|
+
for mass_iso, prob_iso in isotopes:
|
|
294
|
+
combined_mass = round(mass_a + mass_iso, 5)
|
|
295
|
+
new_pattern[combined_mass] = new_pattern.get(combined_mass, 0.0) + prob_a * prob_iso
|
|
296
|
+
# Prune low-abundance peaks for efficiency
|
|
297
|
+
pattern = [(m, p) for m, p in new_pattern.items() if p > 1e-8]
|
|
298
|
+
|
|
299
|
+
# Sort by mass, normalize
|
|
300
|
+
pattern.sort(key=lambda x: x[0])
|
|
301
|
+
max_prob = max(p for _, p in pattern) if pattern else 1.0
|
|
302
|
+
|
|
303
|
+
# Aggregate peaks within 0.01 Da (numerical noise)
|
|
304
|
+
aggregated: list[tuple[float, float]] = []
|
|
305
|
+
for mass, prob in pattern:
|
|
306
|
+
if aggregated and abs(mass - aggregated[-1][0]) < 0.01:
|
|
307
|
+
# Merge
|
|
308
|
+
old_m, old_p = aggregated[-1]
|
|
309
|
+
total_p = old_p + prob
|
|
310
|
+
avg_m = (old_m * old_p + mass * prob) / total_p
|
|
311
|
+
aggregated[-1] = (avg_m, total_p)
|
|
312
|
+
else:
|
|
313
|
+
aggregated.append((mass, prob))
|
|
314
|
+
|
|
315
|
+
max_prob = max(p for _, p in aggregated) if aggregated else 1.0
|
|
316
|
+
|
|
317
|
+
result_peaks = []
|
|
318
|
+
for mass, prob in aggregated:
|
|
319
|
+
rel_abundance = prob / max_prob
|
|
320
|
+
if rel_abundance >= min_abundance and len(result_peaks) < top_n:
|
|
321
|
+
mz = mass / abs(charge) if charge != 0 else mass
|
|
322
|
+
result_peaks.append({
|
|
323
|
+
"mz": round(mz, 4),
|
|
324
|
+
"abundance": round(prob * 100, 4),
|
|
325
|
+
"relative": round(rel_abundance * 100, 2),
|
|
326
|
+
})
|
|
327
|
+
|
|
328
|
+
mono_mass = aggregated[0][0] if aggregated else 0.0
|
|
329
|
+
avg_mass = sum(m * p for m, p in aggregated) / sum(p for _, p in aggregated) if aggregated else 0.0
|
|
330
|
+
|
|
331
|
+
return {
|
|
332
|
+
"formula": formula,
|
|
333
|
+
"charge": charge,
|
|
334
|
+
"monoisotopic_mass": round(mono_mass, 4),
|
|
335
|
+
"monoisotopic_mz": round(mono_mass / abs(charge), 4) if charge != 0 else round(mono_mass, 4),
|
|
336
|
+
"average_mass": round(avg_mass, 4),
|
|
337
|
+
"pattern": result_peaks,
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
# =============================================================================
|
|
342
|
+
# Scientific unit conversion
|
|
343
|
+
# =============================================================================
|
|
344
|
+
|
|
345
|
+
# Conversion factors to SI base units
|
|
346
|
+
_MASS_TO_KG = {
|
|
347
|
+
"kg": 1, "g": 1e-3, "mg": 1e-6, "μg": 1e-9, "ug": 1e-9, "ng": 1e-12,
|
|
348
|
+
"pg": 1e-15, "fg": 1e-18, "lb": 0.453592, "oz": 0.0283495,
|
|
349
|
+
"Da": 1.66054e-27, "kDa": 1.66054e-24, "amu": 1.66054e-27,
|
|
350
|
+
}
|
|
351
|
+
_VOLUME_TO_L = {
|
|
352
|
+
"L": 1, "l": 1, "mL": 1e-3, "ml": 1e-3, "μL": 1e-6, "uL": 1e-6,
|
|
353
|
+
"nL": 1e-9, "pL": 1e-12, "dL": 0.1, "kL": 1000,
|
|
354
|
+
"gal": 3.78541, "qt": 0.946353, "pt": 0.473176, "fl_oz": 0.0295735,
|
|
355
|
+
"cm3": 1e-3, "m3": 1000, "mm3": 1e-6,
|
|
356
|
+
}
|
|
357
|
+
_LENGTH_TO_M = {
|
|
358
|
+
"m": 1, "km": 1000, "cm": 0.01, "mm": 1e-3, "μm": 1e-6, "um": 1e-6,
|
|
359
|
+
"nm": 1e-9, "pm": 1e-12, "Å": 1e-10, "A": 1e-10,
|
|
360
|
+
"in": 0.0254, "ft": 0.3048, "mi": 1609.34,
|
|
361
|
+
}
|
|
362
|
+
_ENERGY_TO_J = {
|
|
363
|
+
"J": 1, "kJ": 1000, "cal": 4.184, "kcal": 4184, "eV": 1.60218e-19,
|
|
364
|
+
"keV": 1.60218e-16, "MeV": 1.60218e-13,
|
|
365
|
+
"kJ/mol": 1000/6.02214e23, "kcal/mol": 4184/6.02214e23,
|
|
366
|
+
"Eh": 4.35975e-18, "hartree": 4.35975e-18,
|
|
367
|
+
"cm-1": 1.98645e-23, "wavenumber": 1.98645e-23,
|
|
368
|
+
}
|
|
369
|
+
_PRESSURE_TO_PA = {
|
|
370
|
+
"Pa": 1, "kPa": 1000, "MPa": 1e6, "bar": 1e5, "mbar": 100,
|
|
371
|
+
"atm": 101325, "torr": 133.322, "mmHg": 133.322, "psi": 6894.76,
|
|
372
|
+
}
|
|
373
|
+
_TIME_TO_S = {
|
|
374
|
+
"s": 1, "ms": 1e-3, "μs": 1e-6, "us": 1e-6, "ns": 1e-9, "ps": 1e-12,
|
|
375
|
+
"fs": 1e-15, "min": 60, "h": 3600, "hr": 3600, "d": 86400, "day": 86400,
|
|
376
|
+
}
|
|
377
|
+
_AMOUNT_TO_MOL = {
|
|
378
|
+
"mol": 1, "mmol": 1e-3, "μmol": 1e-6, "umol": 1e-6,
|
|
379
|
+
"nmol": 1e-9, "pmol": 1e-12, "fmol": 1e-15, "kmol": 1000,
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
_UNIT_CATEGORIES = {
|
|
383
|
+
"mass": _MASS_TO_KG,
|
|
384
|
+
"volume": _VOLUME_TO_L,
|
|
385
|
+
"length": _LENGTH_TO_M,
|
|
386
|
+
"energy": _ENERGY_TO_J,
|
|
387
|
+
"pressure": _PRESSURE_TO_PA,
|
|
388
|
+
"time": _TIME_TO_S,
|
|
389
|
+
"amount": _AMOUNT_TO_MOL,
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
def _find_category(unit: str) -> tuple[str, dict] | None:
|
|
394
|
+
"""Find which category a unit belongs to."""
|
|
395
|
+
for cat, table in _UNIT_CATEGORIES.items():
|
|
396
|
+
if unit in table:
|
|
397
|
+
return cat, table
|
|
398
|
+
return None
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
def convert_units(value: float, from_unit: str, to_unit: str) -> dict:
|
|
402
|
+
"""
|
|
403
|
+
Convert between scientific units.
|
|
404
|
+
|
|
405
|
+
Supports: mass (g↔kg↔mg↔Da), volume (L↔mL↔μL), length (m↔nm↔Å),
|
|
406
|
+
energy (J↔kJ↔kcal↔eV↔cm⁻¹↔hartree), pressure (Pa↔atm↔bar↔torr),
|
|
407
|
+
time (s↔ms↔min↔h), amount (mol↔mmol↔μmol), and temperature (°C↔°F↔K).
|
|
408
|
+
"""
|
|
409
|
+
# Temperature special case
|
|
410
|
+
temp_units = {"C", "°C", "F", "°F", "K"}
|
|
411
|
+
fu = from_unit.strip()
|
|
412
|
+
tu = to_unit.strip()
|
|
413
|
+
|
|
414
|
+
if fu in temp_units or tu in temp_units:
|
|
415
|
+
fu_norm = fu.replace("°", "")
|
|
416
|
+
tu_norm = tu.replace("°", "")
|
|
417
|
+
# Convert to Kelvin first
|
|
418
|
+
if fu_norm == "C":
|
|
419
|
+
k = value + 273.15
|
|
420
|
+
elif fu_norm == "F":
|
|
421
|
+
k = (value - 32) * 5/9 + 273.15
|
|
422
|
+
elif fu_norm == "K":
|
|
423
|
+
k = value
|
|
424
|
+
else:
|
|
425
|
+
return {"error": f"Unknown temperature unit: {fu}"}
|
|
426
|
+
# Convert from Kelvin
|
|
427
|
+
if tu_norm == "C":
|
|
428
|
+
result = k - 273.15
|
|
429
|
+
elif tu_norm == "F":
|
|
430
|
+
result = (k - 273.15) * 9/5 + 32
|
|
431
|
+
elif tu_norm == "K":
|
|
432
|
+
result = k
|
|
433
|
+
else:
|
|
434
|
+
return {"error": f"Unknown temperature unit: {tu}"}
|
|
435
|
+
return {
|
|
436
|
+
"value": value, "from_unit": from_unit,
|
|
437
|
+
"result": round(result, 6), "to_unit": to_unit,
|
|
438
|
+
"category": "temperature",
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
cat_from = _find_category(fu)
|
|
442
|
+
cat_to = _find_category(tu)
|
|
443
|
+
|
|
444
|
+
if not cat_from:
|
|
445
|
+
return {"error": f"Unknown unit: {fu}. Supported: {', '.join(u for table in _UNIT_CATEGORIES.values() for u in table)}"}
|
|
446
|
+
if not cat_to:
|
|
447
|
+
return {"error": f"Unknown unit: {tu}. Supported: {', '.join(u for table in _UNIT_CATEGORIES.values() for u in table)}"}
|
|
448
|
+
|
|
449
|
+
if cat_from[0] != cat_to[0]:
|
|
450
|
+
return {"error": f"Cannot convert between {cat_from[0]} ({fu}) and {cat_to[0]} ({tu})"}
|
|
451
|
+
|
|
452
|
+
# Convert: value * (from_factor / to_factor)
|
|
453
|
+
si_value = value * cat_from[1][fu]
|
|
454
|
+
result = si_value / cat_to[1][tu]
|
|
455
|
+
|
|
456
|
+
return {
|
|
457
|
+
"value": value, "from_unit": from_unit,
|
|
458
|
+
"result": result, "to_unit": to_unit,
|
|
459
|
+
"category": cat_from[0],
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
# =============================================================================
|
|
464
|
+
# Buffer pH / Henderson-Hasselbalch
|
|
465
|
+
# =============================================================================
|
|
466
|
+
|
|
467
|
+
# Common buffer pKa values at 25°C
|
|
468
|
+
BUFFER_PKA: dict[str, dict] = {
|
|
469
|
+
"phosphate_1": {"name": "Phosphoric acid (pKa₁)", "pKa": 2.15, "species": "H₃PO₄ / H₂PO₄⁻"},
|
|
470
|
+
"citrate_1": {"name": "Citric acid (pKa₁)", "pKa": 3.13, "species": "H₃Cit / H₂Cit⁻"},
|
|
471
|
+
"formate": {"name": "Formic acid", "pKa": 3.75, "species": "HCOOH / HCOO⁻"},
|
|
472
|
+
"citrate_2": {"name": "Citric acid (pKa₂)", "pKa": 4.76, "species": "H₂Cit⁻ / HCit²⁻"},
|
|
473
|
+
"acetate": {"name": "Acetic acid", "pKa": 4.76, "species": "CH₃COOH / CH₃COO⁻"},
|
|
474
|
+
"citrate_3": {"name": "Citric acid (pKa₃)", "pKa": 6.40, "species": "HCit²⁻ / Cit³⁻"},
|
|
475
|
+
"MES": {"name": "MES", "pKa": 6.15, "species": "MES-H / MES"},
|
|
476
|
+
"PIPES": {"name": "PIPES", "pKa": 6.76, "species": "PIPES-H / PIPES"},
|
|
477
|
+
"phosphate_2": {"name": "Phosphoric acid (pKa₂)", "pKa": 7.20, "species": "H₂PO₄⁻ / HPO₄²⁻"},
|
|
478
|
+
"MOPS": {"name": "MOPS", "pKa": 7.20, "species": "MOPS-H / MOPS"},
|
|
479
|
+
"HEPES": {"name": "HEPES", "pKa": 7.55, "species": "HEPES-H / HEPES"},
|
|
480
|
+
"imidazole": {"name": "Imidazole", "pKa": 6.99, "species": "ImH⁺ / Im"},
|
|
481
|
+
"Tris": {"name": "Tris", "pKa": 8.07, "species": "TrisH⁺ / Tris"},
|
|
482
|
+
"TAPS": {"name": "TAPS", "pKa": 8.44, "species": "TAPS-H / TAPS"},
|
|
483
|
+
"borate": {"name": "Boric acid", "pKa": 9.24, "species": "B(OH)₃ / B(OH)₄⁻"},
|
|
484
|
+
"CHES": {"name": "CHES", "pKa": 9.50, "species": "CHES-H / CHES"},
|
|
485
|
+
"glycine_amino": {"name": "Glycine (amino)", "pKa": 9.60, "species": "Gly⁺ / Gly⁻"},
|
|
486
|
+
"CAPS": {"name": "CAPS", "pKa": 10.40, "species": "CAPS-H / CAPS"},
|
|
487
|
+
"carbonate_1": {"name": "Carbonic acid (pKa₁)", "pKa": 6.35, "species": "H₂CO₃ / HCO₃⁻"},
|
|
488
|
+
"carbonate_2": {"name": "Carbonic acid (pKa₂)", "pKa": 10.33, "species": "HCO₃⁻ / CO₃²⁻"},
|
|
489
|
+
"phosphate_3": {"name": "Phosphoric acid (pKa₃)", "pKa": 12.32, "species": "HPO₄²⁻ / PO₄³⁻"},
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
|
|
493
|
+
def calculate_buffer_ph(
|
|
494
|
+
pKa: float | None = None,
|
|
495
|
+
buffer_name: str | None = None,
|
|
496
|
+
acid_conc: float | None = None,
|
|
497
|
+
base_conc: float | None = None,
|
|
498
|
+
ratio_base_acid: float | None = None,
|
|
499
|
+
target_ph: float | None = None,
|
|
500
|
+
) -> dict:
|
|
501
|
+
"""
|
|
502
|
+
Henderson-Hasselbalch calculations.
|
|
503
|
+
|
|
504
|
+
Mode 1: Given pKa + concentrations/ratio → calculate pH
|
|
505
|
+
Mode 2: Given pKa + target pH → calculate required ratio
|
|
506
|
+
|
|
507
|
+
Args:
|
|
508
|
+
pKa: pKa of the buffer (or provide buffer_name to look up)
|
|
509
|
+
buffer_name: Look up pKa from known buffers (e.g., 'Tris', 'HEPES', 'phosphate_2')
|
|
510
|
+
acid_conc: Concentration of acid form (any consistent unit)
|
|
511
|
+
base_conc: Concentration of base form
|
|
512
|
+
ratio_base_acid: [A⁻]/[HA] ratio (alternative to concentrations)
|
|
513
|
+
target_ph: Target pH to calculate required ratio
|
|
514
|
+
"""
|
|
515
|
+
# Resolve pKa
|
|
516
|
+
if buffer_name and pKa is None:
|
|
517
|
+
buf = BUFFER_PKA.get(buffer_name)
|
|
518
|
+
if not buf:
|
|
519
|
+
# Fuzzy search
|
|
520
|
+
q = buffer_name.lower()
|
|
521
|
+
matches = [(k, v) for k, v in BUFFER_PKA.items() if q in k.lower() or q in v["name"].lower()]
|
|
522
|
+
if matches:
|
|
523
|
+
buf = matches[0][1]
|
|
524
|
+
else:
|
|
525
|
+
avail = ", ".join(BUFFER_PKA.keys())
|
|
526
|
+
return {"error": f"Buffer '{buffer_name}' not found. Available: {avail}"}
|
|
527
|
+
pKa = buf["pKa"]
|
|
528
|
+
species = buf.get("species", "")
|
|
529
|
+
else:
|
|
530
|
+
species = ""
|
|
531
|
+
|
|
532
|
+
if pKa is None:
|
|
533
|
+
return {"error": "Provide pKa or buffer_name"}
|
|
534
|
+
|
|
535
|
+
result: dict[str, Any] = {
|
|
536
|
+
"pKa": pKa,
|
|
537
|
+
"buffer": buffer_name,
|
|
538
|
+
"species": species,
|
|
539
|
+
"buffer_range": f"{pKa - 1:.1f} – {pKa + 1:.1f}",
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
# Mode 2: target pH → ratio
|
|
543
|
+
if target_ph is not None:
|
|
544
|
+
ratio = 10 ** (target_ph - pKa)
|
|
545
|
+
result["target_pH"] = target_ph
|
|
546
|
+
result["required_ratio_base_acid"] = round(ratio, 4)
|
|
547
|
+
result["note"] = f"Mix [A⁻]/[HA] = {ratio:.4f} to achieve pH {target_ph}"
|
|
548
|
+
if abs(target_ph - pKa) > 1:
|
|
549
|
+
result["warning"] = f"Target pH {target_ph} is outside buffer range ({pKa-1:.1f}–{pKa+1:.1f}). Buffer capacity will be poor."
|
|
550
|
+
return result
|
|
551
|
+
|
|
552
|
+
# Mode 1: concentrations/ratio → pH
|
|
553
|
+
if ratio_base_acid is None:
|
|
554
|
+
if acid_conc is not None and base_conc is not None and acid_conc > 0:
|
|
555
|
+
ratio_base_acid = base_conc / acid_conc
|
|
556
|
+
else:
|
|
557
|
+
return {"error": "Provide acid_conc + base_conc, or ratio_base_acid, or target_ph"}
|
|
558
|
+
|
|
559
|
+
if ratio_base_acid <= 0:
|
|
560
|
+
return {"error": "Ratio [base]/[acid] must be > 0"}
|
|
561
|
+
|
|
562
|
+
ph = pKa + math.log10(ratio_base_acid)
|
|
563
|
+
result["ratio_base_acid"] = round(ratio_base_acid, 4)
|
|
564
|
+
result["calculated_pH"] = round(ph, 2)
|
|
565
|
+
if acid_conc is not None:
|
|
566
|
+
result["acid_concentration"] = acid_conc
|
|
567
|
+
if base_conc is not None:
|
|
568
|
+
result["base_concentration"] = base_conc
|
|
569
|
+
if abs(ph - pKa) > 1:
|
|
570
|
+
result["warning"] = f"pH {ph:.2f} is outside optimal buffer range ({pKa-1:.1f}–{pKa+1:.1f})"
|
|
571
|
+
|
|
572
|
+
return result
|